diff --git a/clang/docs/OpenMPSupport.rst b/clang/docs/OpenMPSupport.rst index d6507071d4693..b39f9d3634a63 100644 --- a/clang/docs/OpenMPSupport.rst +++ b/clang/docs/OpenMPSupport.rst @@ -376,6 +376,8 @@ implementation. +-------------------------------------------------------------+---------------------------+---------------------------+--------------------------------------------------------------------------+ | loop stripe transformation | :good:`done` | https://github.com/llvm/llvm-project/pull/119891 | +-------------------------------------------------------------+---------------------------+---------------------------+--------------------------------------------------------------------------+ +| loop fuse transformation | :good:`prototyped` | :none:`unclaimed` | | ++-------------------------------------------------------------+---------------------------+---------------------------+--------------------------------------------------------------------------+ | work distribute construct | :none:`unclaimed` | :none:`unclaimed` | | +-------------------------------------------------------------+---------------------------+---------------------------+--------------------------------------------------------------------------+ | task_iteration | :none:`unclaimed` | :none:`unclaimed` | | diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst index b93fa33acc2a0..8013861621700 100644 --- a/clang/docs/ReleaseNotes.rst +++ b/clang/docs/ReleaseNotes.rst @@ -1044,6 +1044,7 @@ OpenMP Support open parenthesis. (#GH139665) - An error is now emitted when OpenMP ``collapse`` and ``ordered`` clauses have an argument larger than what can fit within a 64-bit integer. +- Added support for 'omp fuse' directive. Improvements ^^^^^^^^^^^^ diff --git a/clang/include/clang-c/Index.h b/clang/include/clang-c/Index.h index d30d15e53802a..00046de62a742 100644 --- a/clang/include/clang-c/Index.h +++ b/clang/include/clang-c/Index.h @@ -2162,6 +2162,10 @@ enum CXCursorKind { */ CXCursor_OMPStripeDirective = 310, + /** OpenMP fuse directive + */ + CXCursor_OMPFuseDirective = 318, + /** OpenACC Compute Construct. */ CXCursor_OpenACCComputeConstruct = 320, diff --git a/clang/include/clang/AST/OpenMPClause.h b/clang/include/clang/AST/OpenMPClause.h index 6fd16bc0f03be..3df5133a17fb4 100644 --- a/clang/include/clang/AST/OpenMPClause.h +++ b/clang/include/clang/AST/OpenMPClause.h @@ -1143,6 +1143,97 @@ class OMPFullClause final : public OMPNoChildClause { static OMPFullClause *CreateEmpty(const ASTContext &C); }; +/// This class represents the 'looprange' clause in the +/// '#pragma omp fuse' directive +/// +/// \code {c} +/// #pragma omp fuse looprange(1,2) +/// { +/// for(int i = 0; i < 64; ++i) +/// for(int j = 0; j < 256; j+=2) +/// for(int k = 127; k >= 0; --k) +/// \endcode +class OMPLoopRangeClause final + : public OMPClause, + private llvm::TrailingObjects { + friend class OMPClauseReader; + friend class llvm::TrailingObjects; + + /// Location of '(' + SourceLocation LParenLoc; + + /// Location of first and count expressions + SourceLocation FirstLoc, CountLoc; + + /// Number of looprange arguments (always 2: first, count) + unsigned NumArgs = 2; + + /// Set the argument expressions. + void setArgs(ArrayRef Args) { + assert(Args.size() == NumArgs && "Expected exactly 2 looprange arguments"); + std::copy(Args.begin(), Args.end(), getTrailingObjects()); + } + + /// Build an empty clause for deserialization. + explicit OMPLoopRangeClause() + : OMPClause(llvm::omp::OMPC_looprange, {}, {}), NumArgs(2) {} + +public: + /// Build a 'looprange' clause AST node. + static OMPLoopRangeClause * + Create(const ASTContext &C, SourceLocation StartLoc, SourceLocation LParenLoc, + SourceLocation FirstLoc, SourceLocation CountLoc, + SourceLocation EndLoc, ArrayRef Args); + + /// Build an empty 'looprange' clause node. + static OMPLoopRangeClause *CreateEmpty(const ASTContext &C); + + // Location getters/setters + SourceLocation getLParenLoc() const { return LParenLoc; } + SourceLocation getFirstLoc() const { return FirstLoc; } + SourceLocation getCountLoc() const { return CountLoc; } + + void setLParenLoc(SourceLocation Loc) { LParenLoc = Loc; } + void setFirstLoc(SourceLocation Loc) { FirstLoc = Loc; } + void setCountLoc(SourceLocation Loc) { CountLoc = Loc; } + + /// Get looprange arguments: first and count + Expr *getFirst() const { return getArgs()[0]; } + Expr *getCount() const { return getArgs()[1]; } + + /// Set looprange arguments: first and count + void setFirst(Expr *E) { getArgs()[0] = E; } + void setCount(Expr *E) { getArgs()[1] = E; } + + MutableArrayRef getArgs() { + return MutableArrayRef(getTrailingObjects(), NumArgs); + } + ArrayRef getArgs() const { + return ArrayRef(getTrailingObjects(), NumArgs); + } + + child_range children() { + return child_range(reinterpret_cast(getArgs().begin()), + reinterpret_cast(getArgs().end())); + } + const_child_range children() const { + auto AR = getArgs(); + return const_child_range(reinterpret_cast(AR.begin()), + reinterpret_cast(AR.end())); + } + + child_range used_children() { + return child_range(child_iterator(), child_iterator()); + } + const_child_range used_children() const { + return const_child_range(const_child_iterator(), const_child_iterator()); + } + + static bool classof(const OMPClause *T) { + return T->getClauseKind() == llvm::omp::OMPC_looprange; + } +}; + /// Representation of the 'partial' clause of the '#pragma omp unroll' /// directive. /// diff --git a/clang/include/clang/AST/RecursiveASTVisitor.h b/clang/include/clang/AST/RecursiveASTVisitor.h index 23a8c4f1f7380..94066edc64933 100644 --- a/clang/include/clang/AST/RecursiveASTVisitor.h +++ b/clang/include/clang/AST/RecursiveASTVisitor.h @@ -3080,6 +3080,9 @@ DEF_TRAVERSE_STMT(OMPUnrollDirective, DEF_TRAVERSE_STMT(OMPReverseDirective, { TRY_TO(TraverseOMPExecutableDirective(S)); }) +DEF_TRAVERSE_STMT(OMPFuseDirective, + { TRY_TO(TraverseOMPExecutableDirective(S)); }) + DEF_TRAVERSE_STMT(OMPInterchangeDirective, { TRY_TO(TraverseOMPExecutableDirective(S)); }) @@ -3397,6 +3400,14 @@ bool RecursiveASTVisitor::VisitOMPFullClause(OMPFullClause *C) { return true; } +template +bool RecursiveASTVisitor::VisitOMPLoopRangeClause( + OMPLoopRangeClause *C) { + TRY_TO(TraverseStmt(C->getFirst())); + TRY_TO(TraverseStmt(C->getCount())); + return true; +} + template bool RecursiveASTVisitor::VisitOMPPartialClause(OMPPartialClause *C) { TRY_TO(TraverseStmt(C->getFactor())); diff --git a/clang/include/clang/AST/StmtOpenMP.h b/clang/include/clang/AST/StmtOpenMP.h index 736bcabbad1f7..cb871c9894d01 100644 --- a/clang/include/clang/AST/StmtOpenMP.h +++ b/clang/include/clang/AST/StmtOpenMP.h @@ -962,6 +962,9 @@ class OMPLoopTransformationDirective : public OMPLoopBasedDirective { /// Number of loops generated by this loop transformation. unsigned NumGeneratedLoops = 0; + /// Number of top level canonical loop nests generated by this loop + /// transformation + unsigned NumGeneratedLoopNests = 0; protected: explicit OMPLoopTransformationDirective(StmtClass SC, @@ -973,6 +976,9 @@ class OMPLoopTransformationDirective : public OMPLoopBasedDirective { /// Set the number of loops generated by this loop transformation. void setNumGeneratedLoops(unsigned Num) { NumGeneratedLoops = Num; } + /// Set the number of top level canonical loop nests generated by this loop + /// transformation + void setNumGeneratedLoopNests(unsigned Num) { NumGeneratedLoopNests = Num; } public: /// Return the number of associated (consumed) loops. @@ -981,6 +987,10 @@ class OMPLoopTransformationDirective : public OMPLoopBasedDirective { /// Return the number of loops generated by this loop transformation. unsigned getNumGeneratedLoops() const { return NumGeneratedLoops; } + /// Return the number of top level canonical loop nests generated by this loop + /// transformation + unsigned getNumGeneratedLoopNests() const { return NumGeneratedLoopNests; } + /// Get the de-sugared statements after the loop transformation. /// /// Might be nullptr if either the directive generates no loops and is handled @@ -995,7 +1005,7 @@ class OMPLoopTransformationDirective : public OMPLoopBasedDirective { Stmt::StmtClass C = T->getStmtClass(); return C == OMPTileDirectiveClass || C == OMPUnrollDirectiveClass || C == OMPReverseDirectiveClass || C == OMPInterchangeDirectiveClass || - C == OMPStripeDirectiveClass; + C == OMPStripeDirectiveClass || C == OMPFuseDirectiveClass; } }; @@ -5561,7 +5571,10 @@ class OMPTileDirective final : public OMPLoopTransformationDirective { : OMPLoopTransformationDirective(OMPTileDirectiveClass, llvm::omp::OMPD_tile, StartLoc, EndLoc, NumLoops) { + // Tiling doubles the original number of loops setNumGeneratedLoops(2 * NumLoops); + // Produces a single top-level canonical loop nest + setNumGeneratedLoopNests(1); } void setPreInits(Stmt *PreInits) { @@ -5639,6 +5652,8 @@ class OMPStripeDirective final : public OMPLoopTransformationDirective { llvm::omp::OMPD_stripe, StartLoc, EndLoc, NumLoops) { setNumGeneratedLoops(2 * NumLoops); + // Similar to Tile, it only generates a single top level loop nest + setNumGeneratedLoopNests(1); } void setPreInits(Stmt *PreInits) { @@ -5790,7 +5805,10 @@ class OMPReverseDirective final : public OMPLoopTransformationDirective { explicit OMPReverseDirective(SourceLocation StartLoc, SourceLocation EndLoc) : OMPLoopTransformationDirective(OMPReverseDirectiveClass, llvm::omp::OMPD_reverse, StartLoc, - EndLoc, 1) {} + EndLoc, 1) { + // Reverse produces a single top-level canonical loop nest + setNumGeneratedLoopNests(1); + } void setPreInits(Stmt *PreInits) { Data->getChildren()[PreInitsOffset] = PreInits; @@ -5857,7 +5875,10 @@ class OMPInterchangeDirective final : public OMPLoopTransformationDirective { : OMPLoopTransformationDirective(OMPInterchangeDirectiveClass, llvm::omp::OMPD_interchange, StartLoc, EndLoc, NumLoops) { + // Interchange produces a single top-level canonical loop + // nest, with the exact same amount of total loops setNumGeneratedLoops(3 * NumLoops); + setNumGeneratedLoopNests(1); } void setPreInits(Stmt *PreInits) { @@ -5908,6 +5929,86 @@ class OMPInterchangeDirective final : public OMPLoopTransformationDirective { } }; +/// Represents the '#pragma omp fuse' loop transformation directive +/// +/// \code{c} +/// #pragma omp fuse +/// { +/// for(int i = 0; i < m1; ++i) {...} +/// for(int j = 0; j < m2; ++j) {...} +/// ... +/// } +/// \endcode + +class OMPFuseDirective final : public OMPLoopTransformationDirective { + friend class ASTStmtReader; + friend class OMPExecutableDirective; + + // Offsets of child members. + enum { + PreInitsOffset = 0, + TransformedStmtOffset, + }; + + explicit OMPFuseDirective(SourceLocation StartLoc, SourceLocation EndLoc, + unsigned NumLoops) + : OMPLoopTransformationDirective(OMPFuseDirectiveClass, + llvm::omp::OMPD_fuse, StartLoc, EndLoc, + NumLoops) {} + + void setPreInits(Stmt *PreInits) { + Data->getChildren()[PreInitsOffset] = PreInits; + } + + void setTransformedStmt(Stmt *S) { + Data->getChildren()[TransformedStmtOffset] = S; + } + +public: + /// Create a new AST node representation for #pragma omp fuse' + /// + /// \param C Context of the AST + /// \param StartLoc Location of the introducer (e.g the 'omp' token) + /// \param EndLoc Location of the directive's end (e.g the tok::eod) + /// \param Clauses The directive's clauses + /// \param NumLoops Number of total affected loops + /// \param NumLoopNests Number of affected top level canonical loops + /// (number of items in the 'looprange' clause if present) + /// \param AssociatedStmt The outermost associated loop + /// \param TransformedStmt The loop nest after fusion, or nullptr in + /// dependent + /// \param PreInits Helper preinits statements for the loop nest + static OMPFuseDirective *Create(const ASTContext &C, SourceLocation StartLoc, + SourceLocation EndLoc, + ArrayRef Clauses, + unsigned NumLoops, unsigned NumLoopNests, + Stmt *AssociatedStmt, Stmt *TransformedStmt, + Stmt *PreInits); + + /// Build an empty '#pragma omp fuse' AST node for deserialization + /// + /// \param C Context of the AST + /// \param NumClauses Number of clauses to allocate + /// \param NumLoops Number of associated loops to allocate + /// \param NumLoopNests Number of top level loops to allocate + static OMPFuseDirective *CreateEmpty(const ASTContext &C, unsigned NumClauses, + unsigned NumLoops, + unsigned NumLoopNests); + + /// Gets the associated loops after the transformation. This is the de-sugared + /// replacement or nulltpr in dependent contexts. + Stmt *getTransformedStmt() const { + return Data->getChildren()[TransformedStmtOffset]; + } + + /// Return preinits statement. + Stmt *getPreInits() const { return Data->getChildren()[PreInitsOffset]; } + + static bool classof(const Stmt *T) { + return T->getStmtClass() == OMPFuseDirectiveClass; + } +}; + /// This represents '#pragma omp scan' directive. /// /// \code diff --git a/clang/include/clang/Basic/DiagnosticSemaKinds.td b/clang/include/clang/Basic/DiagnosticSemaKinds.td index 2835e3a9d9960..974e44bdc7b7a 100644 --- a/clang/include/clang/Basic/DiagnosticSemaKinds.td +++ b/clang/include/clang/Basic/DiagnosticSemaKinds.td @@ -11560,6 +11560,18 @@ def note_omp_implicit_dsa : Note< "implicitly determined as %0">; def err_omp_loop_var_dsa : Error< "loop iteration variable in the associated loop of 'omp %1' directive may not be %0, predetermined as %2">; +def err_omp_not_canonical_loop : Error < + "loop after '#pragma omp %0' is not in canonical form">; +def err_omp_not_a_loop_sequence : Error < + "statement after '#pragma omp %0' must be a loop sequence containing canonical loops or loop-generating constructs">; +def err_omp_empty_loop_sequence : Error < + "loop sequence after '#pragma omp %0' must contain at least 1 canonical loop or loop-generating construct">; +def err_omp_invalid_looprange : Error < + "loop range in '#pragma omp %0' exceeds the number of available loops: " + "range end '%1' is greater than the total number of loops '%2'">; +def warn_omp_redundant_fusion : Warning < + "loop range in '#pragma omp %0' contains only a single loop, resulting in redundant fusion">, + InGroup; def err_omp_not_for : Error< "%select{statement after '#pragma omp %1' must be a for loop|" "expected %2 for loops after '#pragma omp %1'%select{|, but found only %4}3}0">; diff --git a/clang/include/clang/Basic/StmtNodes.td b/clang/include/clang/Basic/StmtNodes.td index 9526fa5808aa5..739160342062c 100644 --- a/clang/include/clang/Basic/StmtNodes.td +++ b/clang/include/clang/Basic/StmtNodes.td @@ -234,6 +234,7 @@ def OMPStripeDirective : StmtNode; def OMPUnrollDirective : StmtNode; def OMPReverseDirective : StmtNode; def OMPInterchangeDirective : StmtNode; +def OMPFuseDirective : StmtNode; def OMPForDirective : StmtNode; def OMPForSimdDirective : StmtNode; def OMPSectionsDirective : StmtNode; diff --git a/clang/include/clang/Parse/Parser.h b/clang/include/clang/Parse/Parser.h index e6492b81dfff8..965dcb7da26d8 100644 --- a/clang/include/clang/Parse/Parser.h +++ b/clang/include/clang/Parse/Parser.h @@ -6739,6 +6739,9 @@ class Parser : public CodeCompletionHandler { OpenMPClauseKind Kind, bool ParseOnly); + /// Parses the 'looprange' clause of a '#pragma omp fuse' directive. + OMPClause *ParseOpenMPLoopRangeClause(); + /// Parses the 'sizes' clause of a '#pragma omp tile' directive. OMPClause *ParseOpenMPSizesClause(); diff --git a/clang/include/clang/Sema/SemaOpenMP.h b/clang/include/clang/Sema/SemaOpenMP.h index 6498390fe96f7..35bb884c0c1f2 100644 --- a/clang/include/clang/Sema/SemaOpenMP.h +++ b/clang/include/clang/Sema/SemaOpenMP.h @@ -457,6 +457,13 @@ class SemaOpenMP : public SemaBase { Stmt *AStmt, SourceLocation StartLoc, SourceLocation EndLoc); + + /// Called on well-formed '#pragma omp fuse' after parsing of its + /// clauses and the associated statement. + StmtResult ActOnOpenMPFuseDirective(ArrayRef Clauses, + Stmt *AStmt, SourceLocation StartLoc, + SourceLocation EndLoc); + /// Called on well-formed '\#pragma omp for' after parsing /// of the associated statement. StmtResult @@ -914,6 +921,12 @@ class SemaOpenMP : public SemaBase { SourceLocation StartLoc, SourceLocation LParenLoc, SourceLocation EndLoc); + + /// Called on well-form 'looprange' clause after parsing its arguments. + OMPClause * + ActOnOpenMPLoopRangeClause(Expr *First, Expr *Count, SourceLocation StartLoc, + SourceLocation LParenLoc, SourceLocation FirstLoc, + SourceLocation CountLoc, SourceLocation EndLoc); /// Called on well-formed 'ordered' clause. OMPClause * ActOnOpenMPOrderedClause(SourceLocation StartLoc, SourceLocation EndLoc, @@ -1478,7 +1491,109 @@ class SemaOpenMP : public SemaBase { bool checkTransformableLoopNest( OpenMPDirectiveKind Kind, Stmt *AStmt, int NumLoops, SmallVectorImpl &LoopHelpers, - Stmt *&Body, SmallVectorImpl> &OriginalInits); + Stmt *&Body, SmallVectorImpl> &OriginalInits); + + /// @brief Categories of loops encountered during semantic OpenMP loop + /// analysis + /// + /// This enumeration identifies the structural category of a loop or sequence + /// of loops analyzed in the context of OpenMP transformations and directives. + /// This categorization helps differentiate between original source loops + /// and the structures resulting from applying OpenMP loop transformations. + enum class OMPLoopCategory { + + /// @var OMPLoopCategory::RegularLoop + /// Represents a standard canonical loop nest found in the + /// original source code or an intact loop after transformations + /// (i.e Post/Pre loops of a loopranged fusion) + RegularLoop, + + /// @var OMPLoopCategory::TransformSingleLoop + /// Represents the resulting loop structure when an OpenMP loop + // transformation, generates a single, top-level loop + TransformSingleLoop, + + /// @var OMPLoopCategory::TransformLoopSequence + /// Represents the resulting loop structure when an OpenMP loop + /// transformation + /// generates a sequence of two or more canonical loop nests + TransformLoopSequence + }; + + /// The main recursive process of `checkTransformableLoopSequence` that + /// performs grammatical parsing of a canonical loop sequence. It extracts + /// key information, such as the number of top-level loops, loop statements, + /// helper expressions, and other relevant loop-related data, all in a single + /// execution to avoid redundant traversals. This analysis flattens inner + /// Loop Sequences + /// + /// \param LoopSeqStmt The AST of the original statement. + /// \param LoopSeqSize [out] Number of top level canonical loops. + /// \param NumLoops [out] Number of total canonical loops (nested too). + /// \param LoopHelpers [out] The multiple loop analyses results. + /// \param ForStmts [out] The multiple Stmt of each For loop. + /// \param OriginalInits [out] The raw original initialization statements + /// of each belonging to a loop of the loop sequence + /// \param TransformPreInits [out] The multiple collection of statements and + /// declarations that must have been executed/declared + /// before entering the loop (each belonging to a + /// particular loop transformation, nullptr otherwise) + /// \param LoopSequencePreInits [out] Additional general collection of loop + /// transformation related statements and declarations + /// not bounded to a particular loop that must be + /// executed before entering the loop transformation + /// \param LoopCategories [out] A sequence of OMPLoopCategory values, + /// one for each loop or loop transformation node + /// successfully analyzed. + /// \param Context + /// \param Kind The loop transformation directive kind. + /// \return Whether the original statement is both syntactically and + /// semantically correct according to OpenMP 6.0 canonical loop + /// sequence definition. + bool analyzeLoopSequence( + Stmt *LoopSeqStmt, unsigned &LoopSeqSize, unsigned &NumLoops, + SmallVectorImpl &LoopHelpers, + SmallVectorImpl &ForStmts, + SmallVectorImpl> &OriginalInits, + SmallVectorImpl> &TransformsPreInits, + SmallVectorImpl> &LoopSequencePreInits, + SmallVectorImpl &LoopCategories, ASTContext &Context, + OpenMPDirectiveKind Kind); + + /// Validates and checks whether a loop sequence can be transformed according + /// to the given directive, providing necessary setup and initialization + /// (Driver function) before recursion using `analyzeLoopSequence`. + /// + /// \param Kind The loop transformation directive kind. + /// \param AStmt The AST of the original statement + /// \param LoopSeqSize [out] Number of top level canonical loops. + /// \param NumLoops [out] Number of total canonical loops (nested too) + /// \param LoopHelpers [out] The multiple loop analyses results. + /// \param ForStmts [out] The multiple Stmt of each For loop. + /// \param OriginalInits [out] The raw original initialization statements + /// of each belonging to a loop of the loop sequence + /// \param TransformsPreInits [out] The multiple collection of statements and + /// declarations that must have been executed/declared + /// before entering the loop (each belonging to a + /// particular loop transformation, nullptr otherwise) + /// \param LoopSequencePreInits [out] Additional general collection of loop + /// transformation related statements and declarations + /// not bounded to a particular loop that must be + /// executed before entering the loop transformation + /// \param LoopCategories [out] A sequence of OMPLoopCategory values, + /// one for each loop or loop transformation node + /// successfully analyzed. + /// \param Context + /// \return Whether there was an absence of errors or not + bool checkTransformableLoopSequence( + OpenMPDirectiveKind Kind, Stmt *AStmt, unsigned &LoopSeqSize, + unsigned &NumLoops, + SmallVectorImpl &LoopHelpers, + SmallVectorImpl &ForStmts, + SmallVectorImpl> &OriginalInits, + SmallVectorImpl> &TransformsPreInits, + SmallVectorImpl> &LoopSequencePreInits, + SmallVectorImpl &LoopCategories, ASTContext &Context); /// Helper to keep information about the current `omp begin/end declare /// variant` nesting. diff --git a/clang/include/clang/Serialization/ASTBitCodes.h b/clang/include/clang/Serialization/ASTBitCodes.h index 5cb9998126a85..8fe9d8248d66f 100644 --- a/clang/include/clang/Serialization/ASTBitCodes.h +++ b/clang/include/clang/Serialization/ASTBitCodes.h @@ -1948,6 +1948,7 @@ enum StmtCode { STMT_OMP_UNROLL_DIRECTIVE, STMT_OMP_REVERSE_DIRECTIVE, STMT_OMP_INTERCHANGE_DIRECTIVE, + STMT_OMP_FUSE_DIRECTIVE, STMT_OMP_FOR_DIRECTIVE, STMT_OMP_FOR_SIMD_DIRECTIVE, STMT_OMP_SECTIONS_DIRECTIVE, diff --git a/clang/lib/AST/OpenMPClause.cpp b/clang/lib/AST/OpenMPClause.cpp index 0e5052b944162..e0570262b2a05 100644 --- a/clang/lib/AST/OpenMPClause.cpp +++ b/clang/lib/AST/OpenMPClause.cpp @@ -1024,6 +1024,29 @@ OMPPartialClause *OMPPartialClause::CreateEmpty(const ASTContext &C) { return new (C) OMPPartialClause(); } +OMPLoopRangeClause * +OMPLoopRangeClause::Create(const ASTContext &C, SourceLocation StartLoc, + SourceLocation LParenLoc, SourceLocation FirstLoc, + SourceLocation CountLoc, SourceLocation EndLoc, + ArrayRef Args) { + + assert(Args.size() == 2 && + "looprange clause must have exactly two arguments"); + OMPLoopRangeClause *Clause = CreateEmpty(C); + Clause->setLocStart(StartLoc); + Clause->setLParenLoc(LParenLoc); + Clause->setFirstLoc(FirstLoc); + Clause->setCountLoc(CountLoc); + Clause->setLocEnd(EndLoc); + Clause->setArgs(Args); + return Clause; +} + +OMPLoopRangeClause *OMPLoopRangeClause::CreateEmpty(const ASTContext &C) { + void *Mem = C.Allocate(totalSizeToAlloc(2)); + return new (Mem) OMPLoopRangeClause(); +} + OMPAllocateClause *OMPAllocateClause::Create( const ASTContext &C, SourceLocation StartLoc, SourceLocation LParenLoc, Expr *Allocator, Expr *Alignment, SourceLocation ColonLoc, @@ -1888,6 +1911,21 @@ void OMPClausePrinter::VisitOMPPartialClause(OMPPartialClause *Node) { } } +void OMPClausePrinter::VisitOMPLoopRangeClause(OMPLoopRangeClause *Node) { + OS << "looprange"; + + Expr *First = Node->getFirst(); + Expr *Count = Node->getCount(); + + if (First && Count) { + OS << "("; + First->printPretty(OS, nullptr, Policy, 0); + OS << ","; + Count->printPretty(OS, nullptr, Policy, 0); + OS << ")"; + } +} + void OMPClausePrinter::VisitOMPAllocatorClause(OMPAllocatorClause *Node) { OS << "allocator("; Node->getAllocator()->printPretty(OS, nullptr, Policy, 0); diff --git a/clang/lib/AST/StmtOpenMP.cpp b/clang/lib/AST/StmtOpenMP.cpp index 093e1f659916f..e6b52792885ba 100644 --- a/clang/lib/AST/StmtOpenMP.cpp +++ b/clang/lib/AST/StmtOpenMP.cpp @@ -456,6 +456,10 @@ OMPUnrollDirective::Create(const ASTContext &C, SourceLocation StartLoc, auto *Dir = createDirective( C, Clauses, AssociatedStmt, TransformedStmtOffset + 1, StartLoc, EndLoc); Dir->setNumGeneratedLoops(NumGeneratedLoops); + // The number of generated loops and loop nests during unroll matches + // given that unroll only generates top level canonical loop nests + // so each generated loop is a top level canonical loop nest + Dir->setNumGeneratedLoopNests(NumGeneratedLoops); Dir->setTransformedStmt(TransformedStmt); Dir->setPreInits(PreInits); return Dir; @@ -505,6 +509,43 @@ OMPInterchangeDirective::CreateEmpty(const ASTContext &C, unsigned NumClauses, SourceLocation(), SourceLocation(), NumLoops); } +OMPFuseDirective *OMPFuseDirective::Create( + const ASTContext &C, SourceLocation StartLoc, SourceLocation EndLoc, + ArrayRef Clauses, unsigned NumLoops, unsigned NumLoopNests, + Stmt *AssociatedStmt, Stmt *TransformedStmt, Stmt *PreInits) { + + OMPFuseDirective *Dir = createDirective( + C, Clauses, AssociatedStmt, TransformedStmtOffset + 1, StartLoc, EndLoc, + NumLoops); + Dir->setTransformedStmt(TransformedStmt); + Dir->setPreInits(PreInits); + // The number of top level canonical nests could + // not match the total number of generated loops + // Example: + // Before fusion: + // for (int i = 0; i < N; ++i) + // for (int j = 0; j < M; ++j) + // A[i][j] = i + j; + // + // for (int k = 0; k < P; ++k) + // B[k] = k * 2; + // Here, NumLoopNests = 2, but NumLoops = 3. + Dir->setNumGeneratedLoopNests(NumLoopNests); + Dir->setNumGeneratedLoops(NumLoops); + return Dir; +} + +OMPFuseDirective *OMPFuseDirective::CreateEmpty(const ASTContext &C, + unsigned NumClauses, + unsigned NumLoops, + unsigned NumLoopNests) { + OMPFuseDirective *Dir = createEmptyDirective( + C, NumClauses, /*HasAssociatedStmt=*/true, TransformedStmtOffset + 1, + SourceLocation(), SourceLocation(), NumLoops); + Dir->setNumGeneratedLoopNests(NumLoopNests); + return Dir; +} + OMPForSimdDirective * OMPForSimdDirective::Create(const ASTContext &C, SourceLocation StartLoc, SourceLocation EndLoc, unsigned CollapsedNum, diff --git a/clang/lib/AST/StmtPrinter.cpp b/clang/lib/AST/StmtPrinter.cpp index dc8af1586624b..12a1d5a943704 100644 --- a/clang/lib/AST/StmtPrinter.cpp +++ b/clang/lib/AST/StmtPrinter.cpp @@ -791,6 +791,11 @@ void StmtPrinter::VisitOMPInterchangeDirective(OMPInterchangeDirective *Node) { PrintOMPExecutableDirective(Node); } +void StmtPrinter::VisitOMPFuseDirective(OMPFuseDirective *Node) { + Indent() << "#pragma omp fuse"; + PrintOMPExecutableDirective(Node); +} + void StmtPrinter::VisitOMPForDirective(OMPForDirective *Node) { Indent() << "#pragma omp for"; PrintOMPExecutableDirective(Node); diff --git a/clang/lib/AST/StmtProfile.cpp b/clang/lib/AST/StmtProfile.cpp index f7d1655f67ed1..9f0ce076c35fa 100644 --- a/clang/lib/AST/StmtProfile.cpp +++ b/clang/lib/AST/StmtProfile.cpp @@ -511,6 +511,13 @@ void OMPClauseProfiler::VisitOMPPartialClause(const OMPPartialClause *C) { Profiler->VisitExpr(Factor); } +void OMPClauseProfiler::VisitOMPLoopRangeClause(const OMPLoopRangeClause *C) { + if (const Expr *First = C->getFirst()) + Profiler->VisitExpr(First); + if (const Expr *Count = C->getCount()) + Profiler->VisitExpr(Count); +} + void OMPClauseProfiler::VisitOMPAllocatorClause(const OMPAllocatorClause *C) { if (C->getAllocator()) Profiler->VisitStmt(C->getAllocator()); @@ -1026,6 +1033,10 @@ void StmtProfiler::VisitOMPInterchangeDirective( VisitOMPLoopTransformationDirective(S); } +void StmtProfiler::VisitOMPFuseDirective(const OMPFuseDirective *S) { + VisitOMPLoopTransformationDirective(S); +} + void StmtProfiler::VisitOMPForDirective(const OMPForDirective *S) { VisitOMPLoopDirective(S); } diff --git a/clang/lib/Basic/OpenMPKinds.cpp b/clang/lib/Basic/OpenMPKinds.cpp index a451fc7c01841..53a9f80e6d3b7 100644 --- a/clang/lib/Basic/OpenMPKinds.cpp +++ b/clang/lib/Basic/OpenMPKinds.cpp @@ -248,6 +248,7 @@ unsigned clang::getOpenMPSimpleClauseType(OpenMPClauseKind Kind, StringRef Str, case OMPC_affinity: case OMPC_when: case OMPC_append_args: + case OMPC_looprange: break; default: break; @@ -583,6 +584,7 @@ const char *clang::getOpenMPSimpleClauseTypeName(OpenMPClauseKind Kind, case OMPC_affinity: case OMPC_when: case OMPC_append_args: + case OMPC_looprange: break; default: break; @@ -702,7 +704,8 @@ bool clang::isOpenMPLoopBoundSharingDirective(OpenMPDirectiveKind Kind) { bool clang::isOpenMPLoopTransformationDirective(OpenMPDirectiveKind DKind) { return DKind == OMPD_tile || DKind == OMPD_unroll || DKind == OMPD_reverse || - DKind == OMPD_interchange || DKind == OMPD_stripe; + DKind == OMPD_interchange || DKind == OMPD_stripe || + DKind == OMPD_fuse; } bool clang::isOpenMPCombinedParallelADirective(OpenMPDirectiveKind DKind) { diff --git a/clang/lib/CodeGen/CGExpr.cpp b/clang/lib/CodeGen/CGExpr.cpp index 2c0edd69d96e9..f369ae01a7a4e 100644 --- a/clang/lib/CodeGen/CGExpr.cpp +++ b/clang/lib/CodeGen/CGExpr.cpp @@ -3241,9 +3241,8 @@ LValue CodeGenFunction::EmitDeclRefLValue(const DeclRefExpr *E) { var, ConvertTypeForMem(VD->getType()), getContext().getDeclAlign(VD)); // No other cases for now. - } else { + } else llvm_unreachable("DeclRefExpr for Decl not entered in LocalDeclMap?"); - } // Handle threadlocal function locals. if (VD->getTLSKind() != VarDecl::TLS_None) diff --git a/clang/lib/CodeGen/CGStmt.cpp b/clang/lib/CodeGen/CGStmt.cpp index 3b701116e5e4b..f06072ba5eea9 100644 --- a/clang/lib/CodeGen/CGStmt.cpp +++ b/clang/lib/CodeGen/CGStmt.cpp @@ -233,6 +233,9 @@ void CodeGenFunction::EmitStmt(const Stmt *S, ArrayRef Attrs) { case Stmt::OMPInterchangeDirectiveClass: EmitOMPInterchangeDirective(cast(*S)); break; + case Stmt::OMPFuseDirectiveClass: + EmitOMPFuseDirective(cast(*S)); + break; case Stmt::OMPForDirectiveClass: EmitOMPForDirective(cast(*S)); break; diff --git a/clang/lib/CodeGen/CGStmtOpenMP.cpp b/clang/lib/CodeGen/CGStmtOpenMP.cpp index 803c7ed37635e..0c664b0f89044 100644 --- a/clang/lib/CodeGen/CGStmtOpenMP.cpp +++ b/clang/lib/CodeGen/CGStmtOpenMP.cpp @@ -197,6 +197,8 @@ class OMPLoopScope : public CodeGenFunction::RunCleanupsScope { } else if (const auto *Interchange = dyn_cast(&S)) { PreInits = Interchange->getPreInits(); + } else if (const auto *Fuse = dyn_cast(&S)) { + PreInits = Fuse->getPreInits(); } else { llvm_unreachable("Unknown loop-based directive kind."); } @@ -2918,6 +2920,12 @@ void CodeGenFunction::EmitOMPInterchangeDirective( EmitStmt(S.getTransformedStmt()); } +void CodeGenFunction::EmitOMPFuseDirective(const OMPFuseDirective &S) { + // Emit the de-sugared statement + OMPTransformDirectiveScopeRAII FuseScope(*this, &S); + EmitStmt(S.getTransformedStmt()); +} + void CodeGenFunction::EmitOMPUnrollDirective(const OMPUnrollDirective &S) { bool UseOMPIRBuilder = CGM.getLangOpts().OpenMPIRBuilder; diff --git a/clang/lib/CodeGen/CodeGenFunction.h b/clang/lib/CodeGen/CodeGenFunction.h index 92e9ab8156ad2..f43682acb0b95 100644 --- a/clang/lib/CodeGen/CodeGenFunction.h +++ b/clang/lib/CodeGen/CodeGenFunction.h @@ -3911,6 +3911,7 @@ class CodeGenFunction : public CodeGenTypeCache { void EmitOMPUnrollDirective(const OMPUnrollDirective &S); void EmitOMPReverseDirective(const OMPReverseDirective &S); void EmitOMPInterchangeDirective(const OMPInterchangeDirective &S); + void EmitOMPFuseDirective(const OMPFuseDirective &S); void EmitOMPForDirective(const OMPForDirective &S); void EmitOMPForSimdDirective(const OMPForSimdDirective &S); void EmitOMPScopeDirective(const OMPScopeDirective &S); diff --git a/clang/lib/Parse/ParseOpenMP.cpp b/clang/lib/Parse/ParseOpenMP.cpp index cfffcdb01a514..ade5192d1968d 100644 --- a/clang/lib/Parse/ParseOpenMP.cpp +++ b/clang/lib/Parse/ParseOpenMP.cpp @@ -3041,6 +3041,39 @@ OMPClause *Parser::ParseOpenMPSizesClause() { OpenLoc, CloseLoc); } +OMPClause *Parser::ParseOpenMPLoopRangeClause() { + SourceLocation ClauseNameLoc = ConsumeToken(); + SourceLocation FirstLoc, CountLoc; + + BalancedDelimiterTracker T(*this, tok::l_paren, tok::annot_pragma_openmp_end); + if (T.consumeOpen()) { + Diag(Tok, diag::err_expected) << tok::l_paren; + return nullptr; + } + + FirstLoc = Tok.getLocation(); + ExprResult FirstVal = ParseConstantExpression(); + if (!FirstVal.isUsable()) { + T.skipToEnd(); + return nullptr; + } + + ExpectAndConsume(tok::comma); + + CountLoc = Tok.getLocation(); + ExprResult CountVal = ParseConstantExpression(); + if (!CountVal.isUsable()) { + T.skipToEnd(); + return nullptr; + } + + T.consumeClose(); + + return Actions.OpenMP().ActOnOpenMPLoopRangeClause( + FirstVal.get(), CountVal.get(), ClauseNameLoc, T.getOpenLocation(), + FirstLoc, CountLoc, T.getCloseLocation()); +} + OMPClause *Parser::ParseOpenMPPermutationClause() { SourceLocation ClauseNameLoc, OpenLoc, CloseLoc; SmallVector ArgExprs; @@ -3469,6 +3502,9 @@ OMPClause *Parser::ParseOpenMPClause(OpenMPDirectiveKind DKind, } Clause = ParseOpenMPClause(CKind, WrongDirective); break; + case OMPC_looprange: + Clause = ParseOpenMPLoopRangeClause(); + break; default: break; } diff --git a/clang/lib/Sema/SemaExceptionSpec.cpp b/clang/lib/Sema/SemaExceptionSpec.cpp index c83eab53891ca..85a374e6eb9b2 100644 --- a/clang/lib/Sema/SemaExceptionSpec.cpp +++ b/clang/lib/Sema/SemaExceptionSpec.cpp @@ -1491,6 +1491,7 @@ CanThrowResult Sema::canThrow(const Stmt *S) { case Stmt::OMPUnrollDirectiveClass: case Stmt::OMPReverseDirectiveClass: case Stmt::OMPInterchangeDirectiveClass: + case Stmt::OMPFuseDirectiveClass: case Stmt::OMPSingleDirectiveClass: case Stmt::OMPTargetDataDirectiveClass: case Stmt::OMPTargetDirectiveClass: diff --git a/clang/lib/Sema/SemaOpenMP.cpp b/clang/lib/Sema/SemaOpenMP.cpp index f16f841d62edd..76484b577f9c1 100644 --- a/clang/lib/Sema/SemaOpenMP.cpp +++ b/clang/lib/Sema/SemaOpenMP.cpp @@ -22,6 +22,7 @@ #include "clang/AST/DeclOpenMP.h" #include "clang/AST/DynamicRecursiveASTVisitor.h" #include "clang/AST/OpenMPClause.h" +#include "clang/AST/RecursiveASTVisitor.h" #include "clang/AST/StmtCXX.h" #include "clang/AST/StmtOpenMP.h" #include "clang/AST/StmtVisitor.h" @@ -47,6 +48,7 @@ #include "llvm/Frontend/OpenMP/OMPConstants.h" #include "llvm/IR/Assumptions.h" #include +#include using namespace clang; using namespace llvm::omp; @@ -4404,6 +4406,7 @@ void SemaOpenMP::ActOnOpenMPRegionStart(OpenMPDirectiveKind DKind, case OMPD_unroll: case OMPD_reverse: case OMPD_interchange: + case OMPD_fuse: case OMPD_assume: break; default: @@ -6221,6 +6224,10 @@ StmtResult SemaOpenMP::ActOnOpenMPExecutableDirective( Res = ActOnOpenMPInterchangeDirective(ClausesWithImplicit, AStmt, StartLoc, EndLoc); break; + case OMPD_fuse: + Res = + ActOnOpenMPFuseDirective(ClausesWithImplicit, AStmt, StartLoc, EndLoc); + break; case OMPD_for: Res = ActOnOpenMPForDirective(ClausesWithImplicit, AStmt, StartLoc, EndLoc, VarsWithInheritedDSA); @@ -14152,10 +14159,49 @@ StmtResult SemaOpenMP::ActOnOpenMPTargetTeamsDistributeSimdDirective( getASTContext(), StartLoc, EndLoc, NestedLoopCount, Clauses, AStmt, B); } +/// Overloaded base case function +template static bool tryHandleAs(T *t, F &&) { + return false; +} + +/// +/// Tries to recursively cast `t` to one of the given types and invokes `f` if +/// successful. +/// +/// @tparam Class The first type to check. +/// @tparam Rest The remaining types to check. +/// @tparam T The base type of `t`. +/// @tparam F The callable type for the function to invoke upon a successful +/// cast. +/// @param t The object to be checked. +/// @param f The function to invoke if `t` matches `Class`. +/// @return `true` if `t` matched any type and `f` was called, otherwise +/// `false`. +template +static bool tryHandleAs(T *t, F &&f) { + if (Class *c = dyn_cast(t)) { + f(c); + return true; + } + return tryHandleAs(t, std::forward(f)); +} + +/// Updates OriginalInits by checking Transform against loop transformation +/// directives and appending their pre-inits if a match is found. +static void updatePreInits(OMPLoopBasedDirective *Transform, + SmallVectorImpl> &PreInits) { + if (!tryHandleAs( + Transform, [&PreInits](auto *Dir) { + appendFlattenedStmtList(PreInits.back(), Dir->getPreInits()); + })) + llvm_unreachable("Unhandled loop transformation"); +} + bool SemaOpenMP::checkTransformableLoopNest( OpenMPDirectiveKind Kind, Stmt *AStmt, int NumLoops, SmallVectorImpl &LoopHelpers, - Stmt *&Body, SmallVectorImpl> &OriginalInits) { + Stmt *&Body, SmallVectorImpl> &OriginalInits) { OriginalInits.emplace_back(); bool Result = OMPLoopBasedDirective::doForAllLoops( AStmt->IgnoreContainers(), /*TryImperfectlyNestedLoops=*/false, NumLoops, @@ -14182,27 +14228,320 @@ bool SemaOpenMP::checkTransformableLoopNest( return false; }, [&OriginalInits](OMPLoopBasedDirective *Transform) { - Stmt *DependentPreInits; - if (auto *Dir = dyn_cast(Transform)) - DependentPreInits = Dir->getPreInits(); - else if (auto *Dir = dyn_cast(Transform)) - DependentPreInits = Dir->getPreInits(); - else if (auto *Dir = dyn_cast(Transform)) - DependentPreInits = Dir->getPreInits(); - else if (auto *Dir = dyn_cast(Transform)) - DependentPreInits = Dir->getPreInits(); - else if (auto *Dir = dyn_cast(Transform)) - DependentPreInits = Dir->getPreInits(); - else - llvm_unreachable("Unhandled loop transformation"); - - appendFlattenedStmtList(OriginalInits.back(), DependentPreInits); + updatePreInits(Transform, OriginalInits); }); assert(OriginalInits.back().empty() && "No preinit after innermost loop"); OriginalInits.pop_back(); return Result; } +/// Counts the total number of nested loops, including the outermost loop (the +/// original loop). PRECONDITION of this visitor is that it must be invoked from +/// the original loop to be analyzed. The traversal is stop for Decl's and +/// Expr's given that they may contain inner loops that must not be counted. +/// +/// Example AST structure for the code: +/// +/// int main() { +/// #pragma omp fuse +/// { +/// for (int i = 0; i < 100; i++) { <-- Outer loop +/// []() { +/// for(int j = 0; j < 100; j++) {} <-- NOT A LOOP +/// }; +/// for(int j = 0; j < 5; ++j) {} <-- Inner loop +/// } +/// for (int r = 0; i < 100; i++) { <-- Outer loop +/// struct LocalClass { +/// void bar() { +/// for(int j = 0; j < 100; j++) {} <-- NOT A LOOP +/// } +/// }; +/// for(int k = 0; k < 10; ++k) {} <-- Inner loop +/// {x = 5; for(k = 0; k < 10; ++k) x += k; x}; <-- NOT A LOOP +/// } +/// } +/// } +/// Result: Loop 'i' contains 2 loops, Loop 'r' also contains 2 loops +class NestedLoopCounterVisitor : public DynamicRecursiveASTVisitor { +private: + unsigned NestedLoopCount = 0; + +public: + explicit NestedLoopCounterVisitor() = default; + + unsigned getNestedLoopCount() const { return NestedLoopCount; } + + bool VisitForStmt(ForStmt *FS) override { + ++NestedLoopCount; + return true; + } + + bool VisitCXXForRangeStmt(CXXForRangeStmt *FRS) override { + ++NestedLoopCount; + return true; + } + + bool TraverseStmt(Stmt *S) override { + if (!S) + return true; + + // Skip traversal of all expressions, including special cases like + // LambdaExpr, StmtExpr, BlockExpr, and RequiresExpr. These expressions + // may contain inner statements (and even loops), but they are not part + // of the syntactic body of the surrounding loop structure. + // Therefore must not be counted + if (isa(S)) + return true; + + // Only recurse into CompoundStmt (block {}) and loop bodies + if (isa(S)) { + return DynamicRecursiveASTVisitor::TraverseStmt(S); + } + + // Stop traversal of the rest of statements, that break perfect + // loop nesting, such as control flow (IfStmt, SwitchStmt...) + return true; + } + + bool TraverseDecl(Decl *D) override { + // Stop in the case of finding a declaration, it is not important + // in order to find nested loops (Possible CXXRecordDecl, RecordDecl, + // FunctionDecl...) + return true; + } +}; + +bool SemaOpenMP::analyzeLoopSequence( + Stmt *LoopSeqStmt, unsigned &LoopSeqSize, unsigned &NumLoops, + SmallVectorImpl &LoopHelpers, + SmallVectorImpl &ForStmts, + SmallVectorImpl> &OriginalInits, + SmallVectorImpl> &TransformsPreInits, + SmallVectorImpl> &LoopSequencePreInits, + SmallVectorImpl &LoopCategories, ASTContext &Context, + OpenMPDirectiveKind Kind) { + + VarsWithInheritedDSAType TmpDSA; + /// Helper Lambda to handle storing initialization and body statements for + /// both ForStmt and CXXForRangeStmt + auto StoreLoopStatements = [&](Stmt *LoopStmt) { + if (auto *For = dyn_cast(LoopStmt)) { + OriginalInits.back().push_back(For->getInit()); + ForStmts.push_back(For); + } else { + auto *CXXFor = cast(LoopStmt); + OriginalInits.back().push_back(CXXFor->getBeginStmt()); + ForStmts.push_back(CXXFor); + } + }; + + /// Helper lambda functions to encapsulate the processing of different + /// derivations of the canonical loop sequence grammar + /// Modularized code for handling loop generation and transformations + auto AnalyzeLoopGeneration = [&](Stmt *Child) { + auto *LoopTransform = dyn_cast(Child); + Stmt *TransformedStmt = LoopTransform->getTransformedStmt(); + unsigned NumGeneratedLoopNests = LoopTransform->getNumGeneratedLoopNests(); + unsigned NumGeneratedLoops = LoopTransform->getNumGeneratedLoops(); + // Handle the case where transformed statement is not available due to + // dependent contexts + if (!TransformedStmt) { + if (NumGeneratedLoopNests > 0) { + LoopSeqSize += NumGeneratedLoopNests; + NumLoops += NumGeneratedLoops; + return true; + } else { + // Unroll full (0 loops produced) + Diag(Child->getBeginLoc(), diag::err_omp_not_for) + << 0 << getOpenMPDirectiveName(Kind); + return false; + } + } + // Handle loop transformations with multiple loop nests + // Unroll full + if (NumGeneratedLoopNests <= 0) { + Diag(Child->getBeginLoc(), diag::err_omp_not_for) + << 0 << getOpenMPDirectiveName(Kind); + return false; + } + // Loop transformatons such as split or loopranged fuse + else if (NumGeneratedLoopNests > 1) { + // Get the preinits related to this loop sequence generating + // loop transformation (i.e loopranged fuse, split...) + LoopSequencePreInits.emplace_back(); + // These preinits differ slightly from regular inits/pre-inits related + // to single loop generating loop transformations (interchange, unroll) + // given that they are not bounded to a particular loop nest + // so they need to be treated independently + updatePreInits(LoopTransform, LoopSequencePreInits); + return analyzeLoopSequence(TransformedStmt, LoopSeqSize, NumLoops, + LoopHelpers, ForStmts, OriginalInits, + TransformsPreInits, LoopSequencePreInits, + LoopCategories, Context, Kind); + } else { + // Vast majority: (Tile, Unroll, Stripe, Reverse, Interchange, Fuse all) + // Process the transformed loop statement + OriginalInits.emplace_back(); + TransformsPreInits.emplace_back(); + LoopHelpers.emplace_back(); + LoopCategories.push_back(OMPLoopCategory::TransformSingleLoop); + + unsigned IsCanonical = + checkOpenMPLoop(Kind, nullptr, nullptr, TransformedStmt, SemaRef, + *DSAStack, TmpDSA, LoopHelpers[LoopSeqSize]); + + if (!IsCanonical) { + Diag(TransformedStmt->getBeginLoc(), diag::err_omp_not_canonical_loop) + << getOpenMPDirectiveName(Kind); + return false; + } + StoreLoopStatements(TransformedStmt); + updatePreInits(LoopTransform, TransformsPreInits); + + NumLoops += NumGeneratedLoops; + ++LoopSeqSize; + return true; + } + }; + + /// Modularized code for handling regular canonical loops + auto AnalyzeRegularLoop = [&](Stmt *Child) { + OriginalInits.emplace_back(); + LoopHelpers.emplace_back(); + LoopCategories.push_back(OMPLoopCategory::RegularLoop); + + unsigned IsCanonical = + checkOpenMPLoop(Kind, nullptr, nullptr, Child, SemaRef, *DSAStack, + TmpDSA, LoopHelpers[LoopSeqSize]); + + if (!IsCanonical) { + Diag(Child->getBeginLoc(), diag::err_omp_not_canonical_loop) + << getOpenMPDirectiveName(Kind); + return false; + } + + StoreLoopStatements(Child); + auto NLCV = NestedLoopCounterVisitor(); + NLCV.TraverseStmt(Child); + NumLoops += NLCV.getNestedLoopCount(); + return true; + }; + + /// Helper functions to validate loop sequence grammar derivations + auto IsLoopSequenceDerivation = [](auto *Child) { + return isa(Child); + }; + /// Helper functions to validate loop generating grammar derivations + auto IsLoopGeneratingStmt = [](auto *Child) { + return isa(Child); + }; + + // High level grammar validation + for (auto *Child : LoopSeqStmt->children()) { + + if (!Child) + continue; + + // Skip over non-loop-sequence statements + if (!IsLoopSequenceDerivation(Child)) { + Child = Child->IgnoreContainers(); + + // Ignore empty compound statement + if (!Child) + continue; + + // In the case of a nested loop sequence ignoring containers would not + // be enough, a recurisve transversal of the loop sequence is required + if (isa(Child)) { + if (!analyzeLoopSequence(Child, LoopSeqSize, NumLoops, LoopHelpers, + ForStmts, OriginalInits, TransformsPreInits, + LoopSequencePreInits, LoopCategories, Context, + Kind)) + return false; + // Already been treated, skip this children + continue; + } + } + // Regular loop sequence handling + if (IsLoopSequenceDerivation(Child)) { + if (IsLoopGeneratingStmt(Child)) { + if (!AnalyzeLoopGeneration(Child)) + return false; + + // AnalyzeLoopGeneration updates Loop Sequence size accordingly + + } else { + if (!AnalyzeRegularLoop(Child)) + return false; + + // Update the Loop Sequence size by one + ++LoopSeqSize; + } + } else { + // Report error for invalid statement inside canonical loop sequence + Diag(Child->getBeginLoc(), diag::err_omp_not_for) + << 0 << getOpenMPDirectiveName(Kind); + return false; + } + } + return true; +} + +bool SemaOpenMP::checkTransformableLoopSequence( + OpenMPDirectiveKind Kind, Stmt *AStmt, unsigned &LoopSeqSize, + unsigned &NumLoops, + SmallVectorImpl &LoopHelpers, + SmallVectorImpl &ForStmts, + SmallVectorImpl> &OriginalInits, + SmallVectorImpl> &TransformsPreInits, + SmallVectorImpl> &LoopSequencePreInits, + SmallVectorImpl &LoopCategories, ASTContext &Context) { + + // Checks whether the given statement is a compound statement + if (!isa(AStmt)) { + Diag(AStmt->getBeginLoc(), diag::err_omp_not_a_loop_sequence) + << getOpenMPDirectiveName(Kind); + return false; + } + // Number of top level canonical loop nests observed (And acts as index) + LoopSeqSize = 0; + // Number of total observed loops + NumLoops = 0; + + // Following OpenMP 6.0 API Specification, a Canonical Loop Sequence follows + // the grammar: + // + // canonical-loop-sequence: + // { + // loop-sequence+ + // } + // where loop-sequence can be any of the following: + // 1. canonical-loop-sequence + // 2. loop-nest + // 3. loop-sequence-generating-construct (i.e OMPLoopTransformationDirective) + // + // To recognise and traverse this structure the following helper functions + // have been defined. analyzeLoopSequence serves as the recurisve entry point + // and tries to match the input AST to the canonical loop sequence grammar + // structure. This function will perform both a semantic and syntactical + // analysis of the given statement according to OpenMP 6.0 definition of + // the aforementioned canonical loop sequence + + // Recursive entry point to process the main loop sequence + if (!analyzeLoopSequence(AStmt, LoopSeqSize, NumLoops, LoopHelpers, ForStmts, + OriginalInits, TransformsPreInits, + LoopSequencePreInits, LoopCategories, Context, Kind)) + return false; + + if (LoopSeqSize <= 0) { + Diag(AStmt->getBeginLoc(), diag::err_omp_empty_loop_sequence) + << getOpenMPDirectiveName(Kind); + return false; + } + return true; +} + /// Add preinit statements that need to be propageted from the selected loop. static void addLoopPreInits(ASTContext &Context, OMPLoopBasedDirective::HelperExprs &LoopHelper, @@ -14226,9 +14565,7 @@ static void addLoopPreInits(ASTContext &Context, RangeEnd->getBeginLoc(), RangeEnd->getEndLoc())); } - llvm::append_range(PreInits, OriginalInit); - // List of OMPCapturedExprDecl, for __begin, __end, and NumIterations if (auto *PI = cast_or_null(LoopHelper.PreInits)) { PreInits.push_back(new (Context) DeclStmt( @@ -14288,7 +14625,7 @@ StmtResult SemaOpenMP::ActOnOpenMPTileDirective(ArrayRef Clauses, // Verify and diagnose loop nest. SmallVector LoopHelpers(NumLoops); Stmt *Body = nullptr; - SmallVector, 4> OriginalInits; + SmallVector, 4> OriginalInits; if (!checkTransformableLoopNest(OMPD_tile, AStmt, NumLoops, LoopHelpers, Body, OriginalInits)) return StmtError(); @@ -14565,7 +14902,7 @@ StmtResult SemaOpenMP::ActOnOpenMPStripeDirective(ArrayRef Clauses, // Verify and diagnose loop nest. SmallVector LoopHelpers(NumLoops); Stmt *Body = nullptr; - SmallVector, 4> OriginalInits; + SmallVector, 4> OriginalInits; if (!checkTransformableLoopNest(OMPD_stripe, AStmt, NumLoops, LoopHelpers, Body, OriginalInits)) return StmtError(); @@ -14826,7 +15163,7 @@ StmtResult SemaOpenMP::ActOnOpenMPUnrollDirective(ArrayRef Clauses, Stmt *Body = nullptr; SmallVector LoopHelpers( NumLoops); - SmallVector, NumLoops + 1> OriginalInits; + SmallVector, NumLoops + 1> OriginalInits; if (!checkTransformableLoopNest(OMPD_unroll, AStmt, NumLoops, LoopHelpers, Body, OriginalInits)) return StmtError(); @@ -15094,7 +15431,7 @@ StmtResult SemaOpenMP::ActOnOpenMPReverseDirective(Stmt *AStmt, Stmt *Body = nullptr; SmallVector LoopHelpers( NumLoops); - SmallVector, NumLoops + 1> OriginalInits; + SmallVector, NumLoops + 1> OriginalInits; if (!checkTransformableLoopNest(OMPD_reverse, AStmt, NumLoops, LoopHelpers, Body, OriginalInits)) return StmtError(); @@ -15286,7 +15623,7 @@ StmtResult SemaOpenMP::ActOnOpenMPInterchangeDirective( // Verify and diagnose loop nest. SmallVector LoopHelpers(NumLoops); Stmt *Body = nullptr; - SmallVector, 2> OriginalInits; + SmallVector, 2> OriginalInits; if (!checkTransformableLoopNest(OMPD_interchange, AStmt, NumLoops, LoopHelpers, Body, OriginalInits)) return StmtError(); @@ -15462,6 +15799,496 @@ StmtResult SemaOpenMP::ActOnOpenMPInterchangeDirective( buildPreInits(Context, PreInits)); } +StmtResult SemaOpenMP::ActOnOpenMPFuseDirective(ArrayRef Clauses, + Stmt *AStmt, + SourceLocation StartLoc, + SourceLocation EndLoc) { + + ASTContext &Context = getASTContext(); + DeclContext *CurrContext = SemaRef.CurContext; + Scope *CurScope = SemaRef.getCurScope(); + CaptureVars CopyTransformer(SemaRef); + + // Ensure the structured block is not empty + if (!AStmt) + return StmtError(); + + unsigned NumLoops = 1; + unsigned LoopSeqSize = 1; + + // Defer transformation in dependent contexts + // The NumLoopNests argument is set to a placeholder 1 (even though + // using looprange fuse could yield up to 3 top level loop nests) + // because a dependent context could prevent determining its true value + if (CurrContext->isDependentContext()) { + return OMPFuseDirective::Create(Context, StartLoc, EndLoc, Clauses, + NumLoops, LoopSeqSize, AStmt, nullptr, + nullptr); + } + + // Validate that the potential loop sequence is transformable for fusion + // Also collect the HelperExprs, Loop Stmts, Inits, and Number of loops + SmallVector LoopHelpers; + SmallVector LoopStmts; + SmallVector> OriginalInits; + SmallVector> TransformsPreInits; + SmallVector> LoopSequencePreInits; + SmallVector LoopCategories; + if (!checkTransformableLoopSequence(OMPD_fuse, AStmt, LoopSeqSize, NumLoops, + LoopHelpers, LoopStmts, OriginalInits, + TransformsPreInits, LoopSequencePreInits, + LoopCategories, Context)) + return StmtError(); + + // Handle clauses, which can be any of the following: [looprange, apply] + const OMPLoopRangeClause *LRC = + OMPExecutableDirective::getSingleClause(Clauses); + + // The clause arguments are invalidated if any error arises + // such as non-constant or non-positive arguments + if (LRC && (!LRC->getFirst() || !LRC->getCount())) + return StmtError(); + + // Delayed semantic check of LoopRange constraint + // Evaluates the loop range arguments and returns the first and count values + auto EvaluateLoopRangeArguments = [&Context](Expr *First, Expr *Count, + uint64_t &FirstVal, + uint64_t &CountVal) { + llvm::APSInt FirstInt = First->EvaluateKnownConstInt(Context); + llvm::APSInt CountInt = Count->EvaluateKnownConstInt(Context); + FirstVal = FirstInt.getZExtValue(); + CountVal = CountInt.getZExtValue(); + }; + + // OpenMP [6.0, Restrictions] + // first + count - 1 must not evaluate to a value greater than the + // loop sequence length of the associated canonical loop sequence. + auto ValidLoopRange = [](uint64_t FirstVal, uint64_t CountVal, + unsigned NumLoops) -> bool { + return FirstVal + CountVal - 1 <= NumLoops; + }; + uint64_t FirstVal = 1, CountVal = 0, LastVal = LoopSeqSize; + + // Validates the loop range after evaluating the semantic information + // and ensures that the range is valid for the given loop sequence size. + // Expressions are evaluated at compile time to obtain constant values. + if (LRC) { + EvaluateLoopRangeArguments(LRC->getFirst(), LRC->getCount(), FirstVal, + CountVal); + if (CountVal == 1) + SemaRef.Diag(LRC->getCountLoc(), diag::warn_omp_redundant_fusion) + << getOpenMPDirectiveName(OMPD_fuse); + + if (!ValidLoopRange(FirstVal, CountVal, LoopSeqSize)) { + SemaRef.Diag(LRC->getFirstLoc(), diag::err_omp_invalid_looprange) + << getOpenMPDirectiveName(OMPD_fuse) << (FirstVal + CountVal - 1) + << LoopSeqSize; + return StmtError(); + } + + LastVal = FirstVal + CountVal - 1; + } + + // Complete fusion generates a single canonical loop nest + // However looprange clause generates several loop nests + unsigned NumLoopNests = LRC ? LoopSeqSize - CountVal + 1 : 1; + + // Emit a warning for redundant loop fusion when the sequence contains only + // one loop. + if (LoopSeqSize == 1) + SemaRef.Diag(AStmt->getBeginLoc(), diag::warn_omp_redundant_fusion) + << getOpenMPDirectiveName(OMPD_fuse); + + assert(LoopHelpers.size() == LoopSeqSize && + "Expecting loop iteration space dimensionality to match number of " + "affected loops"); + assert(OriginalInits.size() == LoopSeqSize && + "Expecting loop iteration space dimensionality to match number of " + "affected loops"); + + // Select the type with the largest bit width among all induction variables + QualType IVType = LoopHelpers[FirstVal - 1].IterationVarRef->getType(); + for (unsigned int I = FirstVal; I < LastVal; ++I) { + QualType CurrentIVType = LoopHelpers[I].IterationVarRef->getType(); + if (Context.getTypeSize(CurrentIVType) > Context.getTypeSize(IVType)) { + IVType = CurrentIVType; + } + } + uint64_t IVBitWidth = Context.getIntWidth(IVType); + + // Create pre-init declarations for all loops lower bounds, upper bounds, + // strides and num-iterations for every top level loop in the fusion + SmallVector LBVarDecls; + SmallVector STVarDecls; + SmallVector NIVarDecls; + SmallVector UBVarDecls; + SmallVector IVVarDecls; + + // Helper lambda to create variables for bounds, strides, and other + // expressions. Generates both the variable declaration and the corresponding + // initialization statement. + auto CreateHelperVarAndStmt = + [&, &SemaRef = SemaRef](Expr *ExprToCopy, const std::string &BaseName, + unsigned I, bool NeedsNewVD = false) { + Expr *TransformedExpr = + AssertSuccess(CopyTransformer.TransformExpr(ExprToCopy)); + if (!TransformedExpr) + return std::pair(nullptr, StmtError()); + + auto Name = (Twine(".omp.") + BaseName + std::to_string(I)).str(); + + VarDecl *VD; + if (NeedsNewVD) { + VD = buildVarDecl(SemaRef, SourceLocation(), IVType, Name); + SemaRef.AddInitializerToDecl(VD, TransformedExpr, false); + + } else { + // Create a unique variable name + DeclRefExpr *DRE = cast(TransformedExpr); + VD = cast(DRE->getDecl()); + VD->setDeclName(&SemaRef.PP.getIdentifierTable().get(Name)); + } + // Create the corresponding declaration statement + StmtResult DeclStmt = new (Context) class DeclStmt( + DeclGroupRef(VD), SourceLocation(), SourceLocation()); + return std::make_pair(VD, DeclStmt); + }; + + // PreInits hold a sequence of variable declarations that must be executed + // before the fused loop begins. These include bounds, strides, and other + // helper variables required for the transformation. Other loop transforms + // also contain their own preinits + SmallVector PreInits; + // Iterator to keep track of loop transformations + unsigned int TransformIndex = 0; + + // Update the general preinits using the preinits generated by loop sequence + // generating loop transformations. These preinits differ slightly from + // single-loop transformation preinits, as they can be detached from a + // specific loop inside the multiple generated loop nests. This happens + // because certain helper variables, like '.omp.fuse.max', are introduced to + // handle fused iteration spaces and may not be directly tied to a single + // original loop. the preinit structure must ensure that hidden variables + // like '.omp.fuse.max' are still properly handled. + // Transformations that apply this concept: Loopranged Fuse, Split + if (!LoopSequencePreInits.empty()) { + for (const auto <PreInits : LoopSequencePreInits) { + if (!LTPreInits.empty()) + llvm::append_range(PreInits, LTPreInits); + } + } + + // Process each single loop to generate and collect declarations + // and statements for all helper expressions related to + // particular single loop nests + + // Also In the case of the fused loops, we keep track of their original + // inits by appending them to their preinits statement, and in the case of + // transformations, also append their preinits (which contain the original + // loop initialization statement or other statements) + + // Firstly we need to update TransformIndex to match the begining of the + // looprange section + for (unsigned int I = 0; I < FirstVal - 1; ++I) { + if (LoopCategories[I] == OMPLoopCategory::TransformSingleLoop) + ++TransformIndex; + } + for (unsigned int I = FirstVal - 1, J = 0; I < LastVal; ++I, ++J) { + + if (LoopCategories[I] == OMPLoopCategory::RegularLoop) { + addLoopPreInits(Context, LoopHelpers[I], LoopStmts[I], OriginalInits[I], + PreInits); + } else if (LoopCategories[I] == OMPLoopCategory::TransformSingleLoop) { + // For transformed loops, insert both pre-inits and original inits. + // Order matters: pre-inits may define variables used in the original + // inits such as upper bounds... + auto TransformPreInit = TransformsPreInits[TransformIndex++]; + if (!TransformPreInit.empty()) + llvm::append_range(PreInits, TransformPreInit); + + addLoopPreInits(Context, LoopHelpers[I], LoopStmts[I], OriginalInits[I], + PreInits); + } + auto [UBVD, UBDStmt] = CreateHelperVarAndStmt(LoopHelpers[I].UB, "ub", J); + auto [LBVD, LBDStmt] = CreateHelperVarAndStmt(LoopHelpers[I].LB, "lb", J); + auto [STVD, STDStmt] = CreateHelperVarAndStmt(LoopHelpers[I].ST, "st", J); + auto [NIVD, NIDStmt] = + CreateHelperVarAndStmt(LoopHelpers[I].NumIterations, "ni", J, true); + auto [IVVD, IVDStmt] = + CreateHelperVarAndStmt(LoopHelpers[I].IterationVarRef, "iv", J); + + if (!LBVD || !STVD || !NIVD || !IVVD) + assert(LBVD && STVD && NIVD && IVVD && + "OpenMP Fuse Helper variables creation failed"); + + UBVarDecls.push_back(UBVD); + LBVarDecls.push_back(LBVD); + STVarDecls.push_back(STVD); + NIVarDecls.push_back(NIVD); + IVVarDecls.push_back(IVVD); + + PreInits.push_back(LBDStmt.get()); + PreInits.push_back(STDStmt.get()); + PreInits.push_back(NIDStmt.get()); + PreInits.push_back(IVDStmt.get()); + } + + auto MakeVarDeclRef = [&SemaRef = this->SemaRef](VarDecl *VD) { + return buildDeclRefExpr(SemaRef, VD, VD->getType(), VD->getLocation(), + false); + }; + + // Following up the creation of the final fused loop will be performed + // which has the following shape (considering the selected loops): + // + // for (fuse.index = 0; fuse.index < max(ni0, ni1..., nik); ++fuse.index) { + // if (fuse.index < ni0){ + // iv0 = lb0 + st0 * fuse.index; + // original.index0 = iv0 + // body(0); + // } + // if (fuse.index < ni1){ + // iv1 = lb1 + st1 * fuse.index; + // original.index1 = iv1 + // body(1); + // } + // + // ... + // + // if (fuse.index < nik){ + // ivk = lbk + stk * fuse.index; + // original.indexk = ivk + // body(k); Expr *InitVal = IntegerLiteral::Create(Context, + // llvm::APInt(IVWidth, 0), + + // } + + // 1. Create the initialized fuse index + const std::string IndexName = Twine(".omp.fuse.index").str(); + Expr *InitVal = IntegerLiteral::Create(Context, llvm::APInt(IVBitWidth, 0), + IVType, SourceLocation()); + VarDecl *IndexDecl = + buildVarDecl(SemaRef, {}, IVType, IndexName, nullptr, nullptr); + SemaRef.AddInitializerToDecl(IndexDecl, InitVal, false); + StmtResult InitStmt = new (Context) + DeclStmt(DeclGroupRef(IndexDecl), SourceLocation(), SourceLocation()); + + if (!InitStmt.isUsable()) + return StmtError(); + + auto MakeIVRef = [&SemaRef = this->SemaRef, IndexDecl, IVType, + Loc = InitVal->getExprLoc()]() { + return buildDeclRefExpr(SemaRef, IndexDecl, IVType, Loc, false); + }; + + // 2. Iteratively compute the max number of logical iterations Max(NI_1, NI_2, + // ..., NI_k) + // + // This loop accumulates the maximum value across multiple expressions, + // ensuring each step constructs a unique AST node for correctness. By using + // intermediate temporary variables and conditional operators, we maintain + // distinct nodes and avoid duplicating subtrees, For instance, max(a,b,c): + // omp.temp0 = max(a, b) + // omp.temp1 = max(omp.temp0, c) + // omp.fuse.max = max(omp.temp1, omp.temp0) + + ExprResult MaxExpr; + // I is the true + for (unsigned I = FirstVal - 1, J = 0; I < LastVal; ++I, ++J) { + DeclRefExpr *NIRef = MakeVarDeclRef(NIVarDecls[J]); + QualType NITy = NIRef->getType(); + + if (MaxExpr.isUnset()) { + // Initialize MaxExpr with the first NI expression + MaxExpr = NIRef; + } else { + // Create a new acummulator variable t_i = MaxExpr + std::string TempName = (Twine(".omp.temp.") + Twine(J)).str(); + VarDecl *TempDecl = + buildVarDecl(SemaRef, {}, NITy, TempName, nullptr, nullptr); + TempDecl->setInit(MaxExpr.get()); + DeclRefExpr *TempRef = + buildDeclRefExpr(SemaRef, TempDecl, NITy, SourceLocation(), false); + DeclRefExpr *TempRef2 = + buildDeclRefExpr(SemaRef, TempDecl, NITy, SourceLocation(), false); + // Add a DeclStmt to PreInits to ensure the variable is declared. + StmtResult TempStmt = new (Context) + DeclStmt(DeclGroupRef(TempDecl), SourceLocation(), SourceLocation()); + + if (!TempStmt.isUsable()) + return StmtError(); + PreInits.push_back(TempStmt.get()); + + // Build MaxExpr <-(MaxExpr > NIRef ? MaxExpr : NIRef) + ExprResult Comparison = + SemaRef.BuildBinOp(nullptr, SourceLocation(), BO_GT, TempRef, NIRef); + // Handle any errors in Comparison creation + if (!Comparison.isUsable()) + return StmtError(); + + DeclRefExpr *NIRef2 = MakeVarDeclRef(NIVarDecls[J]); + // Update MaxExpr using a conditional expression to hold the max value + MaxExpr = new (Context) ConditionalOperator( + Comparison.get(), SourceLocation(), TempRef2, SourceLocation(), + NIRef2->getExprStmt(), NITy, VK_LValue, OK_Ordinary); + + if (!MaxExpr.isUsable()) + return StmtError(); + } + } + if (!MaxExpr.isUsable()) + return StmtError(); + + // 3. Declare the max variable + const std::string MaxName = Twine(".omp.fuse.max").str(); + VarDecl *MaxDecl = + buildVarDecl(SemaRef, {}, IVType, MaxName, nullptr, nullptr); + MaxDecl->setInit(MaxExpr.get()); + DeclRefExpr *MaxRef = buildDeclRefExpr(SemaRef, MaxDecl, IVType, {}, false); + StmtResult MaxStmt = new (Context) + DeclStmt(DeclGroupRef(MaxDecl), SourceLocation(), SourceLocation()); + + if (MaxStmt.isInvalid()) + return StmtError(); + PreInits.push_back(MaxStmt.get()); + + // 4. Create condition Expr: index < n_max + ExprResult CondExpr = SemaRef.BuildBinOp(CurScope, SourceLocation(), BO_LT, + MakeIVRef(), MaxRef); + if (!CondExpr.isUsable()) + return StmtError(); + // 5. Increment Expr: ++index + ExprResult IncrExpr = + SemaRef.BuildUnaryOp(CurScope, SourceLocation(), UO_PreInc, MakeIVRef()); + if (!IncrExpr.isUsable()) + return StmtError(); + + // 6. Build the Fused Loop Body + // The final fused loop iterates over the maximum logical range. Inside the + // loop, each original loop's index is calculated dynamically, and its body + // is executed conditionally. + // + // Each sub-loop's body is guarded by a conditional statement to ensure + // it executes only within its logical iteration range: + // + // if (fuse.index < ni_k){ + // iv_k = lb_k + st_k * fuse.index; + // original.index = iv_k + // body(k); + // } + + CompoundStmt *FusedBody = nullptr; + SmallVector FusedBodyStmts; + for (unsigned I = FirstVal - 1, J = 0; I < LastVal; ++I, ++J) { + // Assingment of the original sub-loop index to compute the logical index + // IV_k = LB_k + omp.fuse.index * ST_k + ExprResult IdxExpr = + SemaRef.BuildBinOp(CurScope, SourceLocation(), BO_Mul, + MakeVarDeclRef(STVarDecls[J]), MakeIVRef()); + if (!IdxExpr.isUsable()) + return StmtError(); + IdxExpr = SemaRef.BuildBinOp(CurScope, SourceLocation(), BO_Add, + MakeVarDeclRef(LBVarDecls[J]), IdxExpr.get()); + + if (!IdxExpr.isUsable()) + return StmtError(); + IdxExpr = SemaRef.BuildBinOp(CurScope, SourceLocation(), BO_Assign, + MakeVarDeclRef(IVVarDecls[J]), IdxExpr.get()); + if (!IdxExpr.isUsable()) + return StmtError(); + + // Update the original i_k = IV_k + SmallVector BodyStmts; + BodyStmts.push_back(IdxExpr.get()); + llvm::append_range(BodyStmts, LoopHelpers[I].Updates); + + // If the loop is a CXXForRangeStmt then the iterator variable is needed + if (auto *SourceCXXFor = dyn_cast(LoopStmts[I])) + BodyStmts.push_back(SourceCXXFor->getLoopVarStmt()); + + Stmt *Body = (isa(LoopStmts[I])) + ? cast(LoopStmts[I])->getBody() + : cast(LoopStmts[I])->getBody(); + BodyStmts.push_back(Body); + + CompoundStmt *CombinedBody = + CompoundStmt::Create(Context, BodyStmts, FPOptionsOverride(), + SourceLocation(), SourceLocation()); + ExprResult Condition = + SemaRef.BuildBinOp(CurScope, SourceLocation(), BO_LT, MakeIVRef(), + MakeVarDeclRef(NIVarDecls[J])); + + if (!Condition.isUsable()) + return StmtError(); + + IfStmt *IfStatement = IfStmt::Create( + Context, SourceLocation(), IfStatementKind::Ordinary, nullptr, nullptr, + Condition.get(), SourceLocation(), SourceLocation(), CombinedBody, + SourceLocation(), nullptr); + + FusedBodyStmts.push_back(IfStatement); + } + FusedBody = CompoundStmt::Create(Context, FusedBodyStmts, FPOptionsOverride(), + SourceLocation(), SourceLocation()); + + // 7. Construct the final fused loop + ForStmt *FusedForStmt = new (Context) + ForStmt(Context, InitStmt.get(), CondExpr.get(), nullptr, IncrExpr.get(), + FusedBody, InitStmt.get()->getBeginLoc(), SourceLocation(), + IncrExpr.get()->getEndLoc()); + + // In the case of looprange, the result of fuse won't simply + // be a single loop (ForStmt), but rather a loop sequence + // (CompoundStmt) of 3 parts: the pre-fusion loops, the fused loop + // and the post-fusion loops, preserving its original order. + // + // Note: If looprange clause produces a single fused loop nest then + // this compound statement wrapper is unnecessary (Therefore this + // treatment is skipped) + + Stmt *FusionStmt = FusedForStmt; + if (LRC && CountVal != LoopSeqSize) { + SmallVector FinalLoops; + // Reset the transform index + TransformIndex = 0; + + // Collect all non-fused loops before and after the fused region. + // Pre-fusion and post-fusion loops are inserted in order exploiting their + // symmetry, along with their corresponding transformation pre-inits if + // needed. The fused loop is added between the two regions. + for (unsigned I = 0; I < LoopSeqSize; ++I) { + if (I >= FirstVal - 1 && I < FirstVal + CountVal - 1) { + // Update the Transformation counter to skip already treated + // loop transformations + if (LoopCategories[I] != OMPLoopCategory::TransformSingleLoop) + ++TransformIndex; + continue; + } + + // No need to handle: + // Regular loops: they are kept intact as-is. + // Loop-sequence-generating transformations: already handled earlier. + // Only TransformSingleLoop requires inserting pre-inits here + + if (LoopCategories[I] == OMPLoopCategory::TransformSingleLoop) { + auto TransformPreInit = TransformsPreInits[TransformIndex++]; + if (!TransformPreInit.empty()) { + llvm::append_range(PreInits, TransformPreInit); + } + } + + FinalLoops.push_back(LoopStmts[I]); + } + + FinalLoops.insert(FinalLoops.begin() + (FirstVal - 1), FusedForStmt); + FusionStmt = CompoundStmt::Create(Context, FinalLoops, FPOptionsOverride(), + SourceLocation(), SourceLocation()); + } + return OMPFuseDirective::Create(Context, StartLoc, EndLoc, Clauses, NumLoops, + NumLoopNests, AStmt, FusionStmt, + buildPreInits(Context, PreInits)); +} + OMPClause *SemaOpenMP::ActOnOpenMPSingleExprClause(OpenMPClauseKind Kind, Expr *Expr, SourceLocation StartLoc, @@ -16581,6 +17408,33 @@ OMPClause *SemaOpenMP::ActOnOpenMPPartialClause(Expr *FactorExpr, FactorExpr); } +OMPClause *SemaOpenMP::ActOnOpenMPLoopRangeClause( + Expr *First, Expr *Count, SourceLocation StartLoc, SourceLocation LParenLoc, + SourceLocation FirstLoc, SourceLocation CountLoc, SourceLocation EndLoc) { + + // OpenMP [6.0, Restrictions] + // First and Count must be integer expressions with positive value + ExprResult FirstVal = + VerifyPositiveIntegerConstantInClause(First, OMPC_looprange); + if (FirstVal.isInvalid()) + First = nullptr; + + ExprResult CountVal = + VerifyPositiveIntegerConstantInClause(Count, OMPC_looprange); + if (CountVal.isInvalid()) + Count = nullptr; + + SmallVector ArgsVec = {First, Count}; + + // OpenMP [6.0, Restrictions] + // first + count - 1 must not evaluate to a value greater than the + // loop sequence length of the associated canonical loop sequence. + // This check must be performed afterwards due to the delayed + // parsing and computation of the associated loop sequence + return OMPLoopRangeClause::Create(getASTContext(), StartLoc, LParenLoc, + FirstLoc, CountLoc, EndLoc, ArgsVec); +} + OMPClause *SemaOpenMP::ActOnOpenMPAlignClause(Expr *A, SourceLocation StartLoc, SourceLocation LParenLoc, SourceLocation EndLoc) { diff --git a/clang/lib/Sema/TreeTransform.h b/clang/lib/Sema/TreeTransform.h index 335e21d927b76..d70e2a3874c07 100644 --- a/clang/lib/Sema/TreeTransform.h +++ b/clang/lib/Sema/TreeTransform.h @@ -1775,6 +1775,14 @@ class TreeTransform { LParenLoc, EndLoc); } + OMPClause * + RebuildOMPLoopRangeClause(Expr *First, Expr *Count, SourceLocation StartLoc, + SourceLocation LParenLoc, SourceLocation FirstLoc, + SourceLocation CountLoc, SourceLocation EndLoc) { + return getSema().OpenMP().ActOnOpenMPLoopRangeClause( + First, Count, StartLoc, LParenLoc, FirstLoc, CountLoc, EndLoc); + } + /// Build a new OpenMP 'allocator' clause. /// /// By default, performs semantic analysis to build the new OpenMP clause. @@ -9666,6 +9674,17 @@ StmtResult TreeTransform::TransformOMPInterchangeDirective( return Res; } +template +StmtResult +TreeTransform::TransformOMPFuseDirective(OMPFuseDirective *D) { + DeclarationNameInfo DirName; + getDerived().getSema().OpenMP().StartOpenMPDSABlock( + D->getDirectiveKind(), DirName, nullptr, D->getBeginLoc()); + StmtResult Res = getDerived().TransformOMPExecutableDirective(D); + getDerived().getSema().OpenMP().EndOpenMPDSABlock(Res.get()); + return Res; +} + template StmtResult TreeTransform::TransformOMPForDirective(OMPForDirective *D) { @@ -10558,6 +10577,31 @@ TreeTransform::TransformOMPPartialClause(OMPPartialClause *C) { C->getEndLoc()); } +template +OMPClause * +TreeTransform::TransformOMPLoopRangeClause(OMPLoopRangeClause *C) { + ExprResult F = getDerived().TransformExpr(C->getFirst()); + if (F.isInvalid()) + return nullptr; + + ExprResult Cn = getDerived().TransformExpr(C->getCount()); + if (Cn.isInvalid()) + return nullptr; + + Expr *First = F.get(); + Expr *Count = Cn.get(); + + bool Changed = (First != C->getFirst()) || (Count != C->getCount()); + + // If no changes and AlwaysRebuild() is false, return the original clause + if (!Changed && !getDerived().AlwaysRebuild()) + return C; + + return RebuildOMPLoopRangeClause(First, Count, C->getBeginLoc(), + C->getLParenLoc(), C->getFirstLoc(), + C->getCountLoc(), C->getEndLoc()); +} + template OMPClause * TreeTransform::TransformOMPCollapseClause(OMPCollapseClause *C) { diff --git a/clang/lib/Serialization/ASTReader.cpp b/clang/lib/Serialization/ASTReader.cpp index c113fd7cbb911..3963aff27c9de 100644 --- a/clang/lib/Serialization/ASTReader.cpp +++ b/clang/lib/Serialization/ASTReader.cpp @@ -11087,6 +11087,9 @@ OMPClause *OMPClauseReader::readClause() { case llvm::omp::OMPC_partial: C = OMPPartialClause::CreateEmpty(Context); break; + case llvm::omp::OMPC_looprange: + C = OMPLoopRangeClause::CreateEmpty(Context); + break; case llvm::omp::OMPC_allocator: C = new (Context) OMPAllocatorClause(); break; @@ -11488,6 +11491,14 @@ void OMPClauseReader::VisitOMPPartialClause(OMPPartialClause *C) { C->setLParenLoc(Record.readSourceLocation()); } +void OMPClauseReader::VisitOMPLoopRangeClause(OMPLoopRangeClause *C) { + C->setFirst(Record.readSubExpr()); + C->setCount(Record.readSubExpr()); + C->setLParenLoc(Record.readSourceLocation()); + C->setFirstLoc(Record.readSourceLocation()); + C->setCountLoc(Record.readSourceLocation()); +} + void OMPClauseReader::VisitOMPAllocatorClause(OMPAllocatorClause *C) { C->setAllocator(Record.readExpr()); C->setLParenLoc(Record.readSourceLocation()); diff --git a/clang/lib/Serialization/ASTReaderStmt.cpp b/clang/lib/Serialization/ASTReaderStmt.cpp index 0ba0378754eb4..a301e1c0b0e32 100644 --- a/clang/lib/Serialization/ASTReaderStmt.cpp +++ b/clang/lib/Serialization/ASTReaderStmt.cpp @@ -2449,6 +2449,7 @@ void ASTStmtReader::VisitOMPLoopTransformationDirective( OMPLoopTransformationDirective *D) { VisitOMPLoopBasedDirective(D); D->setNumGeneratedLoops(Record.readUInt32()); + D->setNumGeneratedLoopNests(Record.readUInt32()); } void ASTStmtReader::VisitOMPTileDirective(OMPTileDirective *D) { @@ -2471,6 +2472,10 @@ void ASTStmtReader::VisitOMPInterchangeDirective(OMPInterchangeDirective *D) { VisitOMPLoopTransformationDirective(D); } +void ASTStmtReader::VisitOMPFuseDirective(OMPFuseDirective *D) { + VisitOMPLoopTransformationDirective(D); +} + void ASTStmtReader::VisitOMPForDirective(OMPForDirective *D) { VisitOMPLoopDirective(D); D->setHasCancel(Record.readBool()); @@ -3613,6 +3618,14 @@ Stmt *ASTReader::ReadStmtFromStream(ModuleFile &F) { S = OMPReverseDirective::CreateEmpty(Context); break; } + case STMT_OMP_FUSE_DIRECTIVE: { + unsigned NumLoops = Record[ASTStmtReader::NumStmtFields]; + unsigned NumClauses = Record[ASTStmtReader::NumStmtFields + 1]; + unsigned NumLoopNests = Record[ASTStmtReader::NumStmtFields + 2]; + S = OMPFuseDirective::CreateEmpty(Context, NumClauses, NumLoops, + NumLoopNests); + break; + } case STMT_OMP_INTERCHANGE_DIRECTIVE: { unsigned NumLoops = Record[ASTStmtReader::NumStmtFields]; diff --git a/clang/lib/Serialization/ASTWriter.cpp b/clang/lib/Serialization/ASTWriter.cpp index 1b3d3c22aa9f5..8548f7e50d34b 100644 --- a/clang/lib/Serialization/ASTWriter.cpp +++ b/clang/lib/Serialization/ASTWriter.cpp @@ -7782,6 +7782,14 @@ void OMPClauseWriter::VisitOMPPartialClause(OMPPartialClause *C) { Record.AddSourceLocation(C->getLParenLoc()); } +void OMPClauseWriter::VisitOMPLoopRangeClause(OMPLoopRangeClause *C) { + Record.AddStmt(C->getFirst()); + Record.AddStmt(C->getCount()); + Record.AddSourceLocation(C->getLParenLoc()); + Record.AddSourceLocation(C->getFirstLoc()); + Record.AddSourceLocation(C->getCountLoc()); +} + void OMPClauseWriter::VisitOMPAllocatorClause(OMPAllocatorClause *C) { Record.AddStmt(C->getAllocator()); Record.AddSourceLocation(C->getLParenLoc()); diff --git a/clang/lib/Serialization/ASTWriterStmt.cpp b/clang/lib/Serialization/ASTWriterStmt.cpp index b9eabd5ddb64c..8b909d5c93686 100644 --- a/clang/lib/Serialization/ASTWriterStmt.cpp +++ b/clang/lib/Serialization/ASTWriterStmt.cpp @@ -2454,6 +2454,7 @@ void ASTStmtWriter::VisitOMPLoopTransformationDirective( OMPLoopTransformationDirective *D) { VisitOMPLoopBasedDirective(D); Record.writeUInt32(D->getNumGeneratedLoops()); + Record.writeUInt32(D->getNumGeneratedLoopNests()); } void ASTStmtWriter::VisitOMPTileDirective(OMPTileDirective *D) { @@ -2481,6 +2482,11 @@ void ASTStmtWriter::VisitOMPInterchangeDirective(OMPInterchangeDirective *D) { Code = serialization::STMT_OMP_INTERCHANGE_DIRECTIVE; } +void ASTStmtWriter::VisitOMPFuseDirective(OMPFuseDirective *D) { + VisitOMPLoopTransformationDirective(D); + Code = serialization::STMT_OMP_FUSE_DIRECTIVE; +} + void ASTStmtWriter::VisitOMPForDirective(OMPForDirective *D) { VisitOMPLoopDirective(D); Record.writeBool(D->hasCancel()); diff --git a/clang/lib/StaticAnalyzer/Core/ExprEngine.cpp b/clang/lib/StaticAnalyzer/Core/ExprEngine.cpp index 1afd4b52eb354..036945b2d1700 100644 --- a/clang/lib/StaticAnalyzer/Core/ExprEngine.cpp +++ b/clang/lib/StaticAnalyzer/Core/ExprEngine.cpp @@ -1817,6 +1817,7 @@ void ExprEngine::Visit(const Stmt *S, ExplodedNode *Pred, case Stmt::OMPStripeDirectiveClass: case Stmt::OMPTileDirectiveClass: case Stmt::OMPInterchangeDirectiveClass: + case Stmt::OMPFuseDirectiveClass: case Stmt::OMPInteropDirectiveClass: case Stmt::OMPDispatchDirectiveClass: case Stmt::OMPMaskedDirectiveClass: diff --git a/clang/test/OpenMP/fuse_ast_print.cpp b/clang/test/OpenMP/fuse_ast_print.cpp new file mode 100644 index 0000000000000..9d85bd1172948 --- /dev/null +++ b/clang/test/OpenMP/fuse_ast_print.cpp @@ -0,0 +1,400 @@ +// Check no warnings/errors +// RUN: %clang_cc1 -triple x86_64-pc-linux-gnu -fopenmp -std=c++20 -fopenmp-version=60 -fsyntax-only -verify %s +// expected-no-diagnostics + +// Check AST and unparsing +// RUN: %clang_cc1 -triple x86_64-pc-linux-gnu -fopenmp -std=c++20 -fopenmp-version=60 -ast-dump %s | FileCheck %s --check-prefix=DUMP +// RUN: %clang_cc1 -triple x86_64-pc-linux-gnu -fopenmp -std=c++20 -fopenmp-version=60 -ast-print %s | FileCheck %s --check-prefix=PRINT + +// Check same results after serialization round-trip +// RUN: %clang_cc1 -triple x86_64-pc-linux-gnu -fopenmp -std=c++20 -fopenmp-version=60 -emit-pch -o %t %s +// RUN: %clang_cc1 -triple x86_64-pc-linux-gnu -fopenmp -std=c++20 -fopenmp-version=60 -include-pch %t -ast-dump-all %s | FileCheck %s --check-prefix=DUMP +// RUN: %clang_cc1 -triple x86_64-pc-linux-gnu -fopenmp -std=c++20 -fopenmp-version=60 -include-pch %t -ast-print %s | FileCheck %s --check-prefix=PRINT + +#ifndef HEADER +#define HEADER + +// placeholder for loop body code +extern "C" void body(...); + +// PRINT-LABEL: void foo1( +// DUMP-LABEL: FunctionDecl {{.*}} foo1 +void foo1() { + // PRINT: #pragma omp fuse + // DUMP: OMPFuseDirective + #pragma omp fuse + // PRINT: { + // DUMP: CompoundStmt + { + // PRINT: for (int i = 0; i < 10; i += 2) + // DUMP: ForStmt + for (int i = 0; i < 10; i += 2) + // PRINT: body(i) + // DUMP: CallExpr + body(i); + // PRINT: for (int j = 10; j > 0; --j) + // DUMP: ForStmt + for (int j = 10; j > 0; --j) + // PRINT: body(j) + // DUMP: CallExpr + body(j); + // PRINT: for (int k = 0; k <= 10; ++k) + // DUMP: ForStmt + for (int k = 0; k <= 10; ++k) + // PRINT: body(k) + // DUMP: CallExpr + body(k); + + } + +} + +// PRINT-LABEL: void foo2( +// DUMP-LABEL: FunctionDecl {{.*}} foo2 +void foo2() { + // PRINT: #pragma omp unroll partial(4) + // DUMP: OMPUnrollDirective + // DUMP-NEXT: OMPPartialClause + // DUMP-NEXT: ConstantExpr + // DUMP-NEXT: value: Int 4 + // DUMP-NEXT: IntegerLiteral {{.*}} 4 + #pragma omp unroll partial(4) + // PRINT: #pragma omp fuse + // DUMP-NEXT: OMPFuseDirective + #pragma omp fuse + // PRINT: { + // DUMP: CompoundStmt + { + // PRINT: for (int i = 0; i < 10; i += 2) + // DUMP: ForStmt + for (int i = 0; i < 10; i += 2) + // PRINT: body(i) + // DUMP: CallExpr + body(i); + // PRINT: for (int j = 10; j > 0; --j) + // DUMP: ForStmt + for (int j = 10; j > 0; --j) + // PRINT: body(j) + // DUMP: CallExpr + body(j); + } + +} + +//PRINT-LABEL: void foo3( +//DUMP-LABEL: FunctionTemplateDecl {{.*}} foo3 +template +void foo3() { + // PRINT: #pragma omp fuse + // DUMP: OMPFuseDirective + #pragma omp fuse + // PRINT: { + // DUMP: CompoundStmt + { + // PRINT: #pragma omp unroll partial(Factor1) + // DUMP: OMPUnrollDirective + #pragma omp unroll partial(Factor1) + // PRINT: for (int i = 0; i < 12; i += 1) + // DUMP: ForStmt + for (int i = 0; i < 12; i += 1) + // PRINT: body(i) + // DUMP: CallExpr + body(i); + // PRINT: #pragma omp unroll partial(Factor2) + // DUMP: OMPUnrollDirective + #pragma omp unroll partial(Factor2) + // PRINT: for (int k = 0; k <= 10; ++k) + // DUMP: ForStmt + for (int k = 0; k <= 10; ++k) + // PRINT: body(k) + // DUMP: CallExpr + body(k); + + } +} + +// Also test instantiating the template. +void tfoo3() { + foo3<4,2>(); +} + +//PRINT-LABEL: void foo4( +//DUMP-LABEL: FunctionTemplateDecl {{.*}} foo4 +template +void foo4(int start, int end) { + // PRINT: #pragma omp fuse + // DUMP: OMPFuseDirective + #pragma omp fuse + // PRINT: { + // DUMP: CompoundStmt + { + // PRINT: for (T i = start; i < end; i += Step) + // DUMP: ForStmt + for (T i = start; i < end; i += Step) + // PRINT: body(i) + // DUMP: CallExpr + body(i); + + // PRINT: for (T j = end; j > start; j -= Step) + // DUMP: ForStmt + for (T j = end; j > start; j -= Step) { + // PRINT: body(j) + // DUMP: CallExpr + body(j); + } + + } +} + +// Also test instantiating the template. +void tfoo4() { + foo4(0, 64); +} + + + +// PRINT-LABEL: void foo5( +// DUMP-LABEL: FunctionDecl {{.*}} foo5 +void foo5() { + double arr[128], arr2[128]; + // PRINT: #pragma omp fuse + // DUMP: OMPFuseDirective + #pragma omp fuse + // PRINT: { + // DUMP: CompoundStmt + { + // PRINT-NEXT: for (auto &&a : arr) + // DUMP-NEXT: CXXForRangeStmt + for (auto &&a: arr) + // PRINT: body(a) + // DUMP: CallExpr + body(a); + // PRINT: for (double v = 42; auto &&b : arr) + // DUMP: CXXForRangeStmt + for (double v = 42; auto &&b: arr) + // PRINT: body(b, v); + // DUMP: CallExpr + body(b, v); + // PRINT: for (auto &&c : arr2) + // DUMP: CXXForRangeStmt + for (auto &&c: arr2) + // PRINT: body(c) + // DUMP: CallExpr + body(c); + + } + +} + +// PRINT-LABEL: void foo6( +// DUMP-LABEL: FunctionDecl {{.*}} foo6 +void foo6() { + // PRINT: #pragma omp fuse + // DUMP: OMPFuseDirective + #pragma omp fuse + // PRINT: { + // DUMP: CompoundStmt + { + // PRINT: #pragma omp fuse + // DUMP: OMPFuseDirective + #pragma omp fuse + // PRINT: { + // DUMP: CompoundStmt + { + // PRINT: for (int i = 0; i <= 10; ++i) + // DUMP: ForStmt + for (int i = 0; i <= 10; ++i) + body(i); + // PRINT: for (int j = 0; j < 100; ++j) + // DUMP: ForStmt + for(int j = 0; j < 100; ++j) + body(j); + } + // PRINT: #pragma omp unroll partial(4) + // DUMP: OMPUnrollDirective + #pragma omp unroll partial(4) + // PRINT: for (int k = 0; k < 250; ++k) + // DUMP: ForStmt + for (int k = 0; k < 250; ++k) + body(k); + } +} + +// PRINT-LABEL: void foo7( +// DUMP-LABEL: FunctionDecl {{.*}} foo7 +void foo7() { + // PRINT: #pragma omp fuse + // DUMP: OMPFuseDirective + #pragma omp fuse + // PRINT: { + // DUMP: CompoundStmt + { + // PRINT: { + // DUMP: CompoundStmt + { + // PRINT: { + // DUMP: CompoundStmt + { + // PRINT: for (int i = 0; i < 10; i += 2) + // DUMP: ForStmt + for (int i = 0; i < 10; i += 2) + // PRINT: body(i) + // DUMP: CallExpr + body(i); + // PRINT: for (int j = 10; j > 0; --j) + // DUMP: ForStmt + for (int j = 10; j > 0; --j) + // PRINT: body(j) + // DUMP: CallExpr + body(j); + } + } + // PRINT: { + // DUMP: CompoundStmt + { + // PRINT: { + // DUMP: CompoundStmt + { + // PRINT: { + // DUMP: CompoundStmt + { + // PRINT: for (int k = 0; k <= 10; ++k) + // DUMP: ForStmt + for (int k = 0; k <= 10; ++k) + // PRINT: body(k) + // DUMP: CallExpr + body(k); + } + } + } + } + +} + +// PRINT-LABEL: void foo8( +// DUMP-LABEL: FunctionDecl {{.*}} foo8 +void foo8() { + // PRINT: #pragma omp fuse looprange(2,2) + // DUMP: OMPFuseDirective + // DUMP: OMPLooprangeClause + #pragma omp fuse looprange(2,2) + // PRINT: { + // DUMP: CompoundStmt + { + // PRINT: for (int i = 0; i < 10; i += 2) + // DUMP: ForStmt + for (int i = 0; i < 10; i += 2) + // PRINT: body(i) + // DUMP: CallExpr + body(i); + // PRINT: for (int j = 10; j > 0; --j) + // DUMP: ForStmt + for (int j = 10; j > 0; --j) + // PRINT: body(j) + // DUMP: CallExpr + body(j); + // PRINT: for (int k = 0; k <= 10; ++k) + // DUMP: ForStmt + for (int k = 0; k <= 10; ++k) + // PRINT: body(k) + // DUMP: CallExpr + body(k); + + } + +} + +//PRINT-LABEL: void foo9( +//DUMP-LABEL: FunctionTemplateDecl {{.*}} foo9 +//DUMP-LABEL: NonTypeTemplateParmDecl {{.*}} F +//DUMP-LABEL: NonTypeTemplateParmDecl {{.*}} C +template +void foo9() { + // PRINT: #pragma omp fuse looprange(F,C) + // DUMP: OMPFuseDirective + // DUMP: OMPLooprangeClause + #pragma omp fuse looprange(F,C) + // PRINT: { + // DUMP: CompoundStmt + { + // PRINT: for (int i = 0; i < 10; i += 2) + // DUMP: ForStmt + for (int i = 0; i < 10; i += 2) + // PRINT: body(i) + // DUMP: CallExpr + body(i); + // PRINT: for (int j = 10; j > 0; --j) + // DUMP: ForStmt + for (int j = 10; j > 0; --j) + // PRINT: body(j) + // DUMP: CallExpr + body(j); + + } +} + +// Also test instantiating the template. +void tfoo9() { + foo9<1, 2>(); +} + +// PRINT-LABEL: void foo10( +// DUMP-LABEL: FunctionDecl {{.*}} foo10 +void foo10() { + // PRINT: #pragma omp fuse looprange(2,2) + // DUMP: OMPFuseDirective + // DUMP: OMPLooprangeClause + #pragma omp fuse looprange(2,2) + // PRINT: { + // DUMP: CompoundStmt + { + // PRINT: for (int i = 0; i < 10; i += 2) + // DUMP: ForStmt + for (int i = 0; i < 10; i += 2) + // PRINT: body(i) + // DUMP: CallExpr + body(i); + // PRINT: for (int ii = 0; ii < 10; ii += 2) + // DUMP: ForStmt + for (int ii = 0; ii < 10; ii += 2) + // PRINT: body(ii) + // DUMP: CallExpr + body(ii); + // PRINT: #pragma omp fuse looprange(2,2) + // DUMP: OMPFuseDirective + // DUMP: OMPLooprangeClause + #pragma omp fuse looprange(2,2) + { + // PRINT: for (int j = 10; j > 0; --j) + // DUMP: ForStmt + for (int j = 10; j > 0; --j) + // PRINT: body(j) + // DUMP: CallExpr + body(j); + // PRINT: for (int jj = 10; jj > 0; --jj) + // DUMP: ForStmt + for (int jj = 10; jj > 0; --jj) + // PRINT: body(jj) + // DUMP: CallExpr + body(jj); + // PRINT: for (int k = 0; k <= 10; ++k) + // DUMP: ForStmt + for (int k = 0; k <= 10; ++k) + // PRINT: body(k) + // DUMP: CallExpr + body(k); + // PRINT: for (int kk = 0; kk <= 10; ++kk) + // DUMP: ForStmt + for (int kk = 0; kk <= 10; ++kk) + // PRINT: body(kk) + // DUMP: CallExpr + body(kk); + } + } + +} + + + + +#endif \ No newline at end of file diff --git a/clang/test/OpenMP/fuse_codegen.cpp b/clang/test/OpenMP/fuse_codegen.cpp new file mode 100644 index 0000000000000..742c280ed0172 --- /dev/null +++ b/clang/test/OpenMP/fuse_codegen.cpp @@ -0,0 +1,2328 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --include-generated-funcs --replace-value-regex "pl_cond[.].+[.|,]" --prefix-filecheck-ir-name _ --version 5 +// expected-no-diagnostics + +// Check code generation +// RUN: %clang_cc1 -verify -triple x86_64-pc-linux-gnu -std=c++20 -fclang-abi-compat=latest -fopenmp -fopenmp-version=60 -emit-llvm %s -o - | FileCheck %s --check-prefix=CHECK1 + +// Check same results after serialization round-trip +// RUN: %clang_cc1 -verify -triple x86_64-pc-linux-gnu -std=c++20 -fclang-abi-compat=latest -fopenmp -fopenmp-version=60 -emit-pch -o %t %s +// RUN: %clang_cc1 -verify -triple x86_64-pc-linux-gnu -std=c++20 -fclang-abi-compat=latest -fopenmp -fopenmp-version=60 -include-pch %t -emit-llvm %s -o - | FileCheck %s --check-prefix=CHECK2 + +#ifndef HEADER +#define HEADER + +//placeholder for loop body code. +extern "C" void body(...) {} + +extern "C" void foo1(int start1, int end1, int step1, int start2, int end2, int step2) { + int i,j; + #pragma omp fuse + { + for(i = start1; i < end1; i += step1) body(i); + for(j = start2; j < end2; j += step2) body(j); + } + +} + +template +void foo2(T start, T end, T step){ + T i,j,k; + #pragma omp fuse + { + for(i = start; i < end; i += step) body(i); + for(j = end; j > start; j -= step) body(j); + for(k = start+step; k < end+step; k += step) body(k); + } +} + +extern "C" void tfoo2() { + foo2(0, 64, 4); +} + +extern "C" void foo3() { + double arr[256]; + #pragma omp fuse + { + #pragma omp fuse + { + for(int i = 0; i < 128; ++i) body(i); + for(int j = 0; j < 256; j+=2) body(j); + } + for(int c = 42; auto &&v: arr) body(c,v); + for(int cc = 37; auto &&vv: arr) body(cc, vv); + } +} + +extern "C" void foo4() { + double arr[256]; + + #pragma omp fuse looprange(2,2) + { + for(int i = 0; i < 128; ++i) body(i); + for(int j = 0; j < 256; j+=2) body(j); + for(int k = 0; k < 64; ++k) body(k); + for(int c = 42; auto &&v: arr) body(c,v); + } +} + +// This exemplifies the usage of loop transformations that generate +// more than top level canonical loop nests (e.g split, loopranged fuse...) +extern "C" void foo5() { + double arr[256]; + #pragma omp fuse looprange(2,2) + { + #pragma omp fuse looprange(2,2) + { + for(int i = 0; i < 128; ++i) body(i); + for(int j = 0; j < 256; j+=2) body(j); + for(int k = 0; k < 512; ++k) body(k); + } + for(int c = 42; auto &&v: arr) body(c,v); + for(int cc = 37; auto &&vv: arr) body(cc, vv); + } +} + + +#endif +// CHECK1-LABEL: define dso_local void @body( +// CHECK1-SAME: ...) #[[ATTR0:[0-9]+]] { +// CHECK1-NEXT: [[ENTRY:.*:]] +// CHECK1-NEXT: ret void +// +// +// CHECK1-LABEL: define dso_local void @foo1( +// CHECK1-SAME: i32 noundef [[START1:%.*]], i32 noundef [[END1:%.*]], i32 noundef [[STEP1:%.*]], i32 noundef [[START2:%.*]], i32 noundef [[END2:%.*]], i32 noundef [[STEP2:%.*]]) #[[ATTR0]] { +// CHECK1-NEXT: [[ENTRY:.*:]] +// CHECK1-NEXT: [[START1_ADDR:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[END1_ADDR:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[STEP1_ADDR:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[START2_ADDR:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[END2_ADDR:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[STEP2_ADDR:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[J:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTNEW_STEP:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURE_EXPR_2:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_LB0:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_ST0:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_NI0:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_IV0:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURE_EXPR_6:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURE_EXPR_7:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTNEW_STEP8:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURE_EXPR_9:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_LB1:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_ST1:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_NI1:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_IV1:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_TEMP_1:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_FUSE_MAX:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_FUSE_INDEX:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: store i32 [[START1]], ptr [[START1_ADDR]], align 4 +// CHECK1-NEXT: store i32 [[END1]], ptr [[END1_ADDR]], align 4 +// CHECK1-NEXT: store i32 [[STEP1]], ptr [[STEP1_ADDR]], align 4 +// CHECK1-NEXT: store i32 [[START2]], ptr [[START2_ADDR]], align 4 +// CHECK1-NEXT: store i32 [[END2]], ptr [[END2_ADDR]], align 4 +// CHECK1-NEXT: store i32 [[STEP2]], ptr [[STEP2_ADDR]], align 4 +// CHECK1-NEXT: [[TMP0:%.*]] = load i32, ptr [[START1_ADDR]], align 4 +// CHECK1-NEXT: store i32 [[TMP0]], ptr [[I]], align 4 +// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[START1_ADDR]], align 4 +// CHECK1-NEXT: store i32 [[TMP1]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[END1_ADDR]], align 4 +// CHECK1-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[STEP1_ADDR]], align 4 +// CHECK1-NEXT: store i32 [[TMP3]], ptr [[DOTNEW_STEP]], align 4 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[SUB:%.*]] = sub i32 [[TMP4]], [[TMP5]] +// CHECK1-NEXT: [[SUB3:%.*]] = sub i32 [[SUB]], 1 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTNEW_STEP]], align 4 +// CHECK1-NEXT: [[ADD:%.*]] = add i32 [[SUB3]], [[TMP6]] +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTNEW_STEP]], align 4 +// CHECK1-NEXT: [[DIV:%.*]] = udiv i32 [[ADD]], [[TMP7]] +// CHECK1-NEXT: [[SUB4:%.*]] = sub i32 [[DIV]], 1 +// CHECK1-NEXT: store i32 [[SUB4]], ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB0]], align 4 +// CHECK1-NEXT: store i32 1, ptr [[DOTOMP_ST0]], align 4 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK1-NEXT: [[ADD5:%.*]] = add i32 [[TMP8]], 1 +// CHECK1-NEXT: store i32 [[ADD5]], ptr [[DOTOMP_NI0]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[START2_ADDR]], align 4 +// CHECK1-NEXT: store i32 [[TMP9]], ptr [[J]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[START2_ADDR]], align 4 +// CHECK1-NEXT: store i32 [[TMP10]], ptr [[DOTCAPTURE_EXPR_6]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[END2_ADDR]], align 4 +// CHECK1-NEXT: store i32 [[TMP11]], ptr [[DOTCAPTURE_EXPR_7]], align 4 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[STEP2_ADDR]], align 4 +// CHECK1-NEXT: store i32 [[TMP12]], ptr [[DOTNEW_STEP8]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_7]], align 4 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_6]], align 4 +// CHECK1-NEXT: [[SUB10:%.*]] = sub i32 [[TMP13]], [[TMP14]] +// CHECK1-NEXT: [[SUB11:%.*]] = sub i32 [[SUB10]], 1 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTNEW_STEP8]], align 4 +// CHECK1-NEXT: [[ADD12:%.*]] = add i32 [[SUB11]], [[TMP15]] +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTNEW_STEP8]], align 4 +// CHECK1-NEXT: [[DIV13:%.*]] = udiv i32 [[ADD12]], [[TMP16]] +// CHECK1-NEXT: [[SUB14:%.*]] = sub i32 [[DIV13]], 1 +// CHECK1-NEXT: store i32 [[SUB14]], ptr [[DOTCAPTURE_EXPR_9]], align 4 +// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB1]], align 4 +// CHECK1-NEXT: store i32 1, ptr [[DOTOMP_ST1]], align 4 +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_9]], align 4 +// CHECK1-NEXT: [[ADD15:%.*]] = add i32 [[TMP17]], 1 +// CHECK1-NEXT: store i32 [[ADD15]], ptr [[DOTOMP_NI1]], align 4 +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_NI0]], align 4 +// CHECK1-NEXT: store i32 [[TMP18]], ptr [[DOTOMP_TEMP_1]], align 4 +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_TEMP_1]], align 4 +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_NI1]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp ugt i32 [[TMP19]], [[TMP20]] +// CHECK1-NEXT: br i1 [[CMP]], label %[[COND_TRUE:.*]], label %[[COND_FALSE:.*]] +// CHECK1: [[COND_TRUE]]: +// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_TEMP_1]], align 4 +// CHECK1-NEXT: br label %[[COND_END:.*]] +// CHECK1: [[COND_FALSE]]: +// CHECK1-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_NI1]], align 4 +// CHECK1-NEXT: br label %[[COND_END]] +// CHECK1: [[COND_END]]: +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ [[TMP21]], %[[COND_TRUE]] ], [ [[TMP22]], %[[COND_FALSE]] ] +// CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_FUSE_MAX]], align 4 +// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_FUSE_INDEX]], align 4 +// CHECK1-NEXT: br label %[[FOR_COND:.*]] +// CHECK1: [[FOR_COND]]: +// CHECK1-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4 +// CHECK1-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_FUSE_MAX]], align 4 +// CHECK1-NEXT: [[CMP16:%.*]] = icmp ult i32 [[TMP23]], [[TMP24]] +// CHECK1-NEXT: br i1 [[CMP16]], label %[[FOR_BODY:.*]], label %[[FOR_END:.*]] +// CHECK1: [[FOR_BODY]]: +// CHECK1-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4 +// CHECK1-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_NI0]], align 4 +// CHECK1-NEXT: [[CMP17:%.*]] = icmp ult i32 [[TMP25]], [[TMP26]] +// CHECK1-NEXT: br i1 [[CMP17]], label %[[IF_THEN:.*]], label %[[IF_END:.*]] +// CHECK1: [[IF_THEN]]: +// CHECK1-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_LB0]], align 4 +// CHECK1-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_ST0]], align 4 +// CHECK1-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4 +// CHECK1-NEXT: [[MUL:%.*]] = mul i32 [[TMP28]], [[TMP29]] +// CHECK1-NEXT: [[ADD18:%.*]] = add i32 [[TMP27]], [[MUL]] +// CHECK1-NEXT: store i32 [[ADD18]], ptr [[DOTOMP_IV0]], align 4 +// CHECK1-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_IV0]], align 4 +// CHECK1-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTNEW_STEP]], align 4 +// CHECK1-NEXT: [[MUL19:%.*]] = mul i32 [[TMP31]], [[TMP32]] +// CHECK1-NEXT: [[ADD20:%.*]] = add i32 [[TMP30]], [[MUL19]] +// CHECK1-NEXT: store i32 [[ADD20]], ptr [[I]], align 4 +// CHECK1-NEXT: [[TMP33:%.*]] = load i32, ptr [[I]], align 4 +// CHECK1-NEXT: call void (...) @body(i32 noundef [[TMP33]]) +// CHECK1-NEXT: br label %[[IF_END]] +// CHECK1: [[IF_END]]: +// CHECK1-NEXT: [[TMP34:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4 +// CHECK1-NEXT: [[TMP35:%.*]] = load i32, ptr [[DOTOMP_NI1]], align 4 +// CHECK1-NEXT: [[CMP21:%.*]] = icmp ult i32 [[TMP34]], [[TMP35]] +// CHECK1-NEXT: br i1 [[CMP21]], label %[[IF_THEN22:.*]], label %[[IF_END27:.*]] +// CHECK1: [[IF_THEN22]]: +// CHECK1-NEXT: [[TMP36:%.*]] = load i32, ptr [[DOTOMP_LB1]], align 4 +// CHECK1-NEXT: [[TMP37:%.*]] = load i32, ptr [[DOTOMP_ST1]], align 4 +// CHECK1-NEXT: [[TMP38:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4 +// CHECK1-NEXT: [[MUL23:%.*]] = mul i32 [[TMP37]], [[TMP38]] +// CHECK1-NEXT: [[ADD24:%.*]] = add i32 [[TMP36]], [[MUL23]] +// CHECK1-NEXT: store i32 [[ADD24]], ptr [[DOTOMP_IV1]], align 4 +// CHECK1-NEXT: [[TMP39:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_6]], align 4 +// CHECK1-NEXT: [[TMP40:%.*]] = load i32, ptr [[DOTOMP_IV1]], align 4 +// CHECK1-NEXT: [[TMP41:%.*]] = load i32, ptr [[DOTNEW_STEP8]], align 4 +// CHECK1-NEXT: [[MUL25:%.*]] = mul i32 [[TMP40]], [[TMP41]] +// CHECK1-NEXT: [[ADD26:%.*]] = add i32 [[TMP39]], [[MUL25]] +// CHECK1-NEXT: store i32 [[ADD26]], ptr [[J]], align 4 +// CHECK1-NEXT: [[TMP42:%.*]] = load i32, ptr [[J]], align 4 +// CHECK1-NEXT: call void (...) @body(i32 noundef [[TMP42]]) +// CHECK1-NEXT: br label %[[IF_END27]] +// CHECK1: [[IF_END27]]: +// CHECK1-NEXT: br label %[[FOR_INC:.*]] +// CHECK1: [[FOR_INC]]: +// CHECK1-NEXT: [[TMP43:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4 +// CHECK1-NEXT: [[INC:%.*]] = add i32 [[TMP43]], 1 +// CHECK1-NEXT: store i32 [[INC]], ptr [[DOTOMP_FUSE_INDEX]], align 4 +// CHECK1-NEXT: br label %[[FOR_COND]], !llvm.loop [[LOOP3:![0-9]+]] +// CHECK1: [[FOR_END]]: +// CHECK1-NEXT: ret void +// +// +// CHECK1-LABEL: define dso_local void @tfoo2( +// CHECK1-SAME: ) #[[ATTR0]] { +// CHECK1-NEXT: [[ENTRY:.*:]] +// CHECK1-NEXT: call void @_Z4foo2IiEvT_S0_S0_(i32 noundef 0, i32 noundef 64, i32 noundef 4) +// CHECK1-NEXT: ret void +// +// +// CHECK1-LABEL: define linkonce_odr void @_Z4foo2IiEvT_S0_S0_( +// CHECK1-SAME: i32 noundef [[START:%.*]], i32 noundef [[END:%.*]], i32 noundef [[STEP:%.*]]) #[[ATTR0]] comdat { +// CHECK1-NEXT: [[ENTRY:.*:]] +// CHECK1-NEXT: [[START_ADDR:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[END_ADDR:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[STEP_ADDR:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[J:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[K:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTNEW_STEP:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURE_EXPR_2:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_LB0:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_ST0:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_NI0:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_IV0:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURE_EXPR_6:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURE_EXPR_7:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTNEW_STEP8:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURE_EXPR_9:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_LB1:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_ST1:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_NI1:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_IV1:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURE_EXPR_17:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURE_EXPR_19:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTNEW_STEP21:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURE_EXPR_22:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_LB2:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_ST2:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_NI2:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_IV2:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_TEMP_1:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_TEMP_2:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_FUSE_MAX:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_FUSE_INDEX:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: store i32 [[START]], ptr [[START_ADDR]], align 4 +// CHECK1-NEXT: store i32 [[END]], ptr [[END_ADDR]], align 4 +// CHECK1-NEXT: store i32 [[STEP]], ptr [[STEP_ADDR]], align 4 +// CHECK1-NEXT: [[TMP0:%.*]] = load i32, ptr [[START_ADDR]], align 4 +// CHECK1-NEXT: store i32 [[TMP0]], ptr [[I]], align 4 +// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[START_ADDR]], align 4 +// CHECK1-NEXT: store i32 [[TMP1]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[END_ADDR]], align 4 +// CHECK1-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[STEP_ADDR]], align 4 +// CHECK1-NEXT: store i32 [[TMP3]], ptr [[DOTNEW_STEP]], align 4 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[SUB:%.*]] = sub i32 [[TMP4]], [[TMP5]] +// CHECK1-NEXT: [[SUB3:%.*]] = sub i32 [[SUB]], 1 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTNEW_STEP]], align 4 +// CHECK1-NEXT: [[ADD:%.*]] = add i32 [[SUB3]], [[TMP6]] +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTNEW_STEP]], align 4 +// CHECK1-NEXT: [[DIV:%.*]] = udiv i32 [[ADD]], [[TMP7]] +// CHECK1-NEXT: [[SUB4:%.*]] = sub i32 [[DIV]], 1 +// CHECK1-NEXT: store i32 [[SUB4]], ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB0]], align 4 +// CHECK1-NEXT: store i32 1, ptr [[DOTOMP_ST0]], align 4 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK1-NEXT: [[ADD5:%.*]] = add i32 [[TMP8]], 1 +// CHECK1-NEXT: store i32 [[ADD5]], ptr [[DOTOMP_NI0]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[END_ADDR]], align 4 +// CHECK1-NEXT: store i32 [[TMP9]], ptr [[J]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[END_ADDR]], align 4 +// CHECK1-NEXT: store i32 [[TMP10]], ptr [[DOTCAPTURE_EXPR_6]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[START_ADDR]], align 4 +// CHECK1-NEXT: store i32 [[TMP11]], ptr [[DOTCAPTURE_EXPR_7]], align 4 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[STEP_ADDR]], align 4 +// CHECK1-NEXT: store i32 [[TMP12]], ptr [[DOTNEW_STEP8]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_6]], align 4 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_7]], align 4 +// CHECK1-NEXT: [[SUB10:%.*]] = sub i32 [[TMP13]], [[TMP14]] +// CHECK1-NEXT: [[SUB11:%.*]] = sub i32 [[SUB10]], 1 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTNEW_STEP8]], align 4 +// CHECK1-NEXT: [[ADD12:%.*]] = add i32 [[SUB11]], [[TMP15]] +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTNEW_STEP8]], align 4 +// CHECK1-NEXT: [[DIV13:%.*]] = udiv i32 [[ADD12]], [[TMP16]] +// CHECK1-NEXT: [[SUB14:%.*]] = sub i32 [[DIV13]], 1 +// CHECK1-NEXT: store i32 [[SUB14]], ptr [[DOTCAPTURE_EXPR_9]], align 4 +// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB1]], align 4 +// CHECK1-NEXT: store i32 1, ptr [[DOTOMP_ST1]], align 4 +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_9]], align 4 +// CHECK1-NEXT: [[ADD15:%.*]] = add i32 [[TMP17]], 1 +// CHECK1-NEXT: store i32 [[ADD15]], ptr [[DOTOMP_NI1]], align 4 +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[START_ADDR]], align 4 +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[STEP_ADDR]], align 4 +// CHECK1-NEXT: [[ADD16:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] +// CHECK1-NEXT: store i32 [[ADD16]], ptr [[K]], align 4 +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[START_ADDR]], align 4 +// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[STEP_ADDR]], align 4 +// CHECK1-NEXT: [[ADD18:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] +// CHECK1-NEXT: store i32 [[ADD18]], ptr [[DOTCAPTURE_EXPR_17]], align 4 +// CHECK1-NEXT: [[TMP22:%.*]] = load i32, ptr [[END_ADDR]], align 4 +// CHECK1-NEXT: [[TMP23:%.*]] = load i32, ptr [[STEP_ADDR]], align 4 +// CHECK1-NEXT: [[ADD20:%.*]] = add nsw i32 [[TMP22]], [[TMP23]] +// CHECK1-NEXT: store i32 [[ADD20]], ptr [[DOTCAPTURE_EXPR_19]], align 4 +// CHECK1-NEXT: [[TMP24:%.*]] = load i32, ptr [[STEP_ADDR]], align 4 +// CHECK1-NEXT: store i32 [[TMP24]], ptr [[DOTNEW_STEP21]], align 4 +// CHECK1-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_19]], align 4 +// CHECK1-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_17]], align 4 +// CHECK1-NEXT: [[SUB23:%.*]] = sub i32 [[TMP25]], [[TMP26]] +// CHECK1-NEXT: [[SUB24:%.*]] = sub i32 [[SUB23]], 1 +// CHECK1-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTNEW_STEP21]], align 4 +// CHECK1-NEXT: [[ADD25:%.*]] = add i32 [[SUB24]], [[TMP27]] +// CHECK1-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTNEW_STEP21]], align 4 +// CHECK1-NEXT: [[DIV26:%.*]] = udiv i32 [[ADD25]], [[TMP28]] +// CHECK1-NEXT: [[SUB27:%.*]] = sub i32 [[DIV26]], 1 +// CHECK1-NEXT: store i32 [[SUB27]], ptr [[DOTCAPTURE_EXPR_22]], align 4 +// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB2]], align 4 +// CHECK1-NEXT: store i32 1, ptr [[DOTOMP_ST2]], align 4 +// CHECK1-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_22]], align 4 +// CHECK1-NEXT: [[ADD28:%.*]] = add i32 [[TMP29]], 1 +// CHECK1-NEXT: store i32 [[ADD28]], ptr [[DOTOMP_NI2]], align 4 +// CHECK1-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_NI0]], align 4 +// CHECK1-NEXT: store i32 [[TMP30]], ptr [[DOTOMP_TEMP_1]], align 4 +// CHECK1-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_TEMP_1]], align 4 +// CHECK1-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTOMP_NI1]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp ugt i32 [[TMP31]], [[TMP32]] +// CHECK1-NEXT: br i1 [[CMP]], label %[[COND_TRUE:.*]], label %[[COND_FALSE:.*]] +// CHECK1: [[COND_TRUE]]: +// CHECK1-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_TEMP_1]], align 4 +// CHECK1-NEXT: br label %[[COND_END:.*]] +// CHECK1: [[COND_FALSE]]: +// CHECK1-NEXT: [[TMP34:%.*]] = load i32, ptr [[DOTOMP_NI1]], align 4 +// CHECK1-NEXT: br label %[[COND_END]] +// CHECK1: [[COND_END]]: +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ [[TMP33]], %[[COND_TRUE]] ], [ [[TMP34]], %[[COND_FALSE]] ] +// CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_TEMP_2]], align 4 +// CHECK1-NEXT: [[TMP35:%.*]] = load i32, ptr [[DOTOMP_TEMP_2]], align 4 +// CHECK1-NEXT: [[TMP36:%.*]] = load i32, ptr [[DOTOMP_NI2]], align 4 +// CHECK1-NEXT: [[CMP29:%.*]] = icmp ugt i32 [[TMP35]], [[TMP36]] +// CHECK1-NEXT: br i1 [[CMP29]], label %[[COND_TRUE30:.*]], label %[[COND_FALSE31:.*]] +// CHECK1: [[COND_TRUE30]]: +// CHECK1-NEXT: [[TMP37:%.*]] = load i32, ptr [[DOTOMP_TEMP_2]], align 4 +// CHECK1-NEXT: br label %[[COND_END32:.*]] +// CHECK1: [[COND_FALSE31]]: +// CHECK1-NEXT: [[TMP38:%.*]] = load i32, ptr [[DOTOMP_NI2]], align 4 +// CHECK1-NEXT: br label %[[COND_END32]] +// CHECK1: [[COND_END32]]: +// CHECK1-NEXT: [[COND33:%.*]] = phi i32 [ [[TMP37]], %[[COND_TRUE30]] ], [ [[TMP38]], %[[COND_FALSE31]] ] +// CHECK1-NEXT: store i32 [[COND33]], ptr [[DOTOMP_FUSE_MAX]], align 4 +// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_FUSE_INDEX]], align 4 +// CHECK1-NEXT: br label %[[FOR_COND:.*]] +// CHECK1: [[FOR_COND]]: +// CHECK1-NEXT: [[TMP39:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4 +// CHECK1-NEXT: [[TMP40:%.*]] = load i32, ptr [[DOTOMP_FUSE_MAX]], align 4 +// CHECK1-NEXT: [[CMP34:%.*]] = icmp ult i32 [[TMP39]], [[TMP40]] +// CHECK1-NEXT: br i1 [[CMP34]], label %[[FOR_BODY:.*]], label %[[FOR_END:.*]] +// CHECK1: [[FOR_BODY]]: +// CHECK1-NEXT: [[TMP41:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4 +// CHECK1-NEXT: [[TMP42:%.*]] = load i32, ptr [[DOTOMP_NI0]], align 4 +// CHECK1-NEXT: [[CMP35:%.*]] = icmp ult i32 [[TMP41]], [[TMP42]] +// CHECK1-NEXT: br i1 [[CMP35]], label %[[IF_THEN:.*]], label %[[IF_END:.*]] +// CHECK1: [[IF_THEN]]: +// CHECK1-NEXT: [[TMP43:%.*]] = load i32, ptr [[DOTOMP_LB0]], align 4 +// CHECK1-NEXT: [[TMP44:%.*]] = load i32, ptr [[DOTOMP_ST0]], align 4 +// CHECK1-NEXT: [[TMP45:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4 +// CHECK1-NEXT: [[MUL:%.*]] = mul i32 [[TMP44]], [[TMP45]] +// CHECK1-NEXT: [[ADD36:%.*]] = add i32 [[TMP43]], [[MUL]] +// CHECK1-NEXT: store i32 [[ADD36]], ptr [[DOTOMP_IV0]], align 4 +// CHECK1-NEXT: [[TMP46:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[TMP47:%.*]] = load i32, ptr [[DOTOMP_IV0]], align 4 +// CHECK1-NEXT: [[TMP48:%.*]] = load i32, ptr [[DOTNEW_STEP]], align 4 +// CHECK1-NEXT: [[MUL37:%.*]] = mul i32 [[TMP47]], [[TMP48]] +// CHECK1-NEXT: [[ADD38:%.*]] = add i32 [[TMP46]], [[MUL37]] +// CHECK1-NEXT: store i32 [[ADD38]], ptr [[I]], align 4 +// CHECK1-NEXT: [[TMP49:%.*]] = load i32, ptr [[I]], align 4 +// CHECK1-NEXT: call void (...) @body(i32 noundef [[TMP49]]) +// CHECK1-NEXT: br label %[[IF_END]] +// CHECK1: [[IF_END]]: +// CHECK1-NEXT: [[TMP50:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4 +// CHECK1-NEXT: [[TMP51:%.*]] = load i32, ptr [[DOTOMP_NI1]], align 4 +// CHECK1-NEXT: [[CMP39:%.*]] = icmp ult i32 [[TMP50]], [[TMP51]] +// CHECK1-NEXT: br i1 [[CMP39]], label %[[IF_THEN40:.*]], label %[[IF_END45:.*]] +// CHECK1: [[IF_THEN40]]: +// CHECK1-NEXT: [[TMP52:%.*]] = load i32, ptr [[DOTOMP_LB1]], align 4 +// CHECK1-NEXT: [[TMP53:%.*]] = load i32, ptr [[DOTOMP_ST1]], align 4 +// CHECK1-NEXT: [[TMP54:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4 +// CHECK1-NEXT: [[MUL41:%.*]] = mul i32 [[TMP53]], [[TMP54]] +// CHECK1-NEXT: [[ADD42:%.*]] = add i32 [[TMP52]], [[MUL41]] +// CHECK1-NEXT: store i32 [[ADD42]], ptr [[DOTOMP_IV1]], align 4 +// CHECK1-NEXT: [[TMP55:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_6]], align 4 +// CHECK1-NEXT: [[TMP56:%.*]] = load i32, ptr [[DOTOMP_IV1]], align 4 +// CHECK1-NEXT: [[TMP57:%.*]] = load i32, ptr [[DOTNEW_STEP8]], align 4 +// CHECK1-NEXT: [[MUL43:%.*]] = mul i32 [[TMP56]], [[TMP57]] +// CHECK1-NEXT: [[SUB44:%.*]] = sub i32 [[TMP55]], [[MUL43]] +// CHECK1-NEXT: store i32 [[SUB44]], ptr [[J]], align 4 +// CHECK1-NEXT: [[TMP58:%.*]] = load i32, ptr [[J]], align 4 +// CHECK1-NEXT: call void (...) @body(i32 noundef [[TMP58]]) +// CHECK1-NEXT: br label %[[IF_END45]] +// CHECK1: [[IF_END45]]: +// CHECK1-NEXT: [[TMP59:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4 +// CHECK1-NEXT: [[TMP60:%.*]] = load i32, ptr [[DOTOMP_NI2]], align 4 +// CHECK1-NEXT: [[CMP46:%.*]] = icmp ult i32 [[TMP59]], [[TMP60]] +// CHECK1-NEXT: br i1 [[CMP46]], label %[[IF_THEN47:.*]], label %[[IF_END52:.*]] +// CHECK1: [[IF_THEN47]]: +// CHECK1-NEXT: [[TMP61:%.*]] = load i32, ptr [[DOTOMP_LB2]], align 4 +// CHECK1-NEXT: [[TMP62:%.*]] = load i32, ptr [[DOTOMP_ST2]], align 4 +// CHECK1-NEXT: [[TMP63:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4 +// CHECK1-NEXT: [[MUL48:%.*]] = mul i32 [[TMP62]], [[TMP63]] +// CHECK1-NEXT: [[ADD49:%.*]] = add i32 [[TMP61]], [[MUL48]] +// CHECK1-NEXT: store i32 [[ADD49]], ptr [[DOTOMP_IV2]], align 4 +// CHECK1-NEXT: [[TMP64:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_17]], align 4 +// CHECK1-NEXT: [[TMP65:%.*]] = load i32, ptr [[DOTOMP_IV2]], align 4 +// CHECK1-NEXT: [[TMP66:%.*]] = load i32, ptr [[DOTNEW_STEP21]], align 4 +// CHECK1-NEXT: [[MUL50:%.*]] = mul i32 [[TMP65]], [[TMP66]] +// CHECK1-NEXT: [[ADD51:%.*]] = add i32 [[TMP64]], [[MUL50]] +// CHECK1-NEXT: store i32 [[ADD51]], ptr [[K]], align 4 +// CHECK1-NEXT: [[TMP67:%.*]] = load i32, ptr [[K]], align 4 +// CHECK1-NEXT: call void (...) @body(i32 noundef [[TMP67]]) +// CHECK1-NEXT: br label %[[IF_END52]] +// CHECK1: [[IF_END52]]: +// CHECK1-NEXT: br label %[[FOR_INC:.*]] +// CHECK1: [[FOR_INC]]: +// CHECK1-NEXT: [[TMP68:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4 +// CHECK1-NEXT: [[INC:%.*]] = add i32 [[TMP68]], 1 +// CHECK1-NEXT: store i32 [[INC]], ptr [[DOTOMP_FUSE_INDEX]], align 4 +// CHECK1-NEXT: br label %[[FOR_COND]], !llvm.loop [[LOOP5:![0-9]+]] +// CHECK1: [[FOR_END]]: +// CHECK1-NEXT: ret void +// +// +// CHECK1-LABEL: define dso_local void @foo3( +// CHECK1-SAME: ) #[[ATTR0]] { +// CHECK1-NEXT: [[ENTRY:.*:]] +// CHECK1-NEXT: [[ARR:%.*]] = alloca [256 x double], align 16 +// CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_LB0:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_ST0:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_NI0:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_IV0:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[J:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_LB1:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_ST1:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_NI1:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_IV1:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_TEMP_1:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_FUSE_MAX:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_FUSE_INDEX:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_LB03:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_ST04:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_NI05:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTOMP_IV06:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[C:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[__RANGE2:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__END2:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__BEGIN2:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[DOTCAPTURE_EXPR_8:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[DOTCAPTURE_EXPR_10:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[DOTCAPTURE_EXPR_11:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTOMP_LB116:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTOMP_ST117:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTOMP_NI118:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTOMP_IV120:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[CC:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[__RANGE221:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__END222:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__BEGIN225:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[DOTCAPTURE_EXPR_27:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[DOTCAPTURE_EXPR_29:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[DOTCAPTURE_EXPR_30:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTOMP_LB2:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTOMP_ST2:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTOMP_NI2:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTOMP_IV2:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTOMP_TEMP_140:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTOMP_TEMP_2:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTOMP_FUSE_MAX46:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTOMP_FUSE_INDEX52:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[V:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[VV:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: store i32 0, ptr [[I]], align 4 +// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB0]], align 4 +// CHECK1-NEXT: store i32 1, ptr [[DOTOMP_ST0]], align 4 +// CHECK1-NEXT: store i32 128, ptr [[DOTOMP_NI0]], align 4 +// CHECK1-NEXT: store i32 0, ptr [[J]], align 4 +// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB1]], align 4 +// CHECK1-NEXT: store i32 1, ptr [[DOTOMP_ST1]], align 4 +// CHECK1-NEXT: store i32 128, ptr [[DOTOMP_NI1]], align 4 +// CHECK1-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTOMP_NI0]], align 4 +// CHECK1-NEXT: store i32 [[TMP0]], ptr [[DOTOMP_TEMP_1]], align 4 +// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTOMP_TEMP_1]], align 4 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_NI1]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP1]], [[TMP2]] +// CHECK1-NEXT: br i1 [[CMP]], label %[[COND_TRUE:.*]], label %[[COND_FALSE:.*]] +// CHECK1: [[COND_TRUE]]: +// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_TEMP_1]], align 4 +// CHECK1-NEXT: br label %[[COND_END:.*]] +// CHECK1: [[COND_FALSE]]: +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_NI1]], align 4 +// CHECK1-NEXT: br label %[[COND_END]] +// CHECK1: [[COND_END]]: +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ [[TMP3]], %[[COND_TRUE]] ], [ [[TMP4]], %[[COND_FALSE]] ] +// CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_FUSE_MAX]], align 4 +// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_FUSE_INDEX]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_FUSE_MAX]], align 4 +// CHECK1-NEXT: store i32 [[TMP5]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP6]], 0 +// CHECK1-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 +// CHECK1-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 +// CHECK1-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB03]], align 4 +// CHECK1-NEXT: store i32 1, ptr [[DOTOMP_ST04]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP7]], 1 +// CHECK1-NEXT: [[CONV:%.*]] = sext i32 [[ADD]] to i64 +// CHECK1-NEXT: store i64 [[CONV]], ptr [[DOTOMP_NI05]], align 8 +// CHECK1-NEXT: store i32 42, ptr [[C]], align 4 +// CHECK1-NEXT: store ptr [[ARR]], ptr [[__RANGE2]], align 8 +// CHECK1-NEXT: [[TMP8:%.*]] = load ptr, ptr [[__RANGE2]], align 8 +// CHECK1-NEXT: [[ARRAYDECAY:%.*]] = getelementptr inbounds [256 x double], ptr [[TMP8]], i64 0, i64 0 +// CHECK1-NEXT: [[ADD_PTR:%.*]] = getelementptr inbounds double, ptr [[ARRAYDECAY]], i64 256 +// CHECK1-NEXT: store ptr [[ADD_PTR]], ptr [[__END2]], align 8 +// CHECK1-NEXT: [[TMP9:%.*]] = load ptr, ptr [[__RANGE2]], align 8 +// CHECK1-NEXT: [[ARRAYDECAY7:%.*]] = getelementptr inbounds [256 x double], ptr [[TMP9]], i64 0, i64 0 +// CHECK1-NEXT: store ptr [[ARRAYDECAY7]], ptr [[__BEGIN2]], align 8 +// CHECK1-NEXT: [[TMP10:%.*]] = load ptr, ptr [[__RANGE2]], align 8 +// CHECK1-NEXT: [[ARRAYDECAY9:%.*]] = getelementptr inbounds [256 x double], ptr [[TMP10]], i64 0, i64 0 +// CHECK1-NEXT: store ptr [[ARRAYDECAY9]], ptr [[DOTCAPTURE_EXPR_8]], align 8 +// CHECK1-NEXT: [[TMP11:%.*]] = load ptr, ptr [[__END2]], align 8 +// CHECK1-NEXT: store ptr [[TMP11]], ptr [[DOTCAPTURE_EXPR_10]], align 8 +// CHECK1-NEXT: [[TMP12:%.*]] = load ptr, ptr [[DOTCAPTURE_EXPR_10]], align 8 +// CHECK1-NEXT: [[TMP13:%.*]] = load ptr, ptr [[DOTCAPTURE_EXPR_8]], align 8 +// CHECK1-NEXT: [[SUB_PTR_LHS_CAST:%.*]] = ptrtoint ptr [[TMP12]] to i64 +// CHECK1-NEXT: [[SUB_PTR_RHS_CAST:%.*]] = ptrtoint ptr [[TMP13]] to i64 +// CHECK1-NEXT: [[SUB_PTR_SUB:%.*]] = sub i64 [[SUB_PTR_LHS_CAST]], [[SUB_PTR_RHS_CAST]] +// CHECK1-NEXT: [[SUB_PTR_DIV:%.*]] = sdiv exact i64 [[SUB_PTR_SUB]], 8 +// CHECK1-NEXT: [[SUB12:%.*]] = sub nsw i64 [[SUB_PTR_DIV]], 1 +// CHECK1-NEXT: [[ADD13:%.*]] = add nsw i64 [[SUB12]], 1 +// CHECK1-NEXT: [[DIV14:%.*]] = sdiv i64 [[ADD13]], 1 +// CHECK1-NEXT: [[SUB15:%.*]] = sub nsw i64 [[DIV14]], 1 +// CHECK1-NEXT: store i64 [[SUB15]], ptr [[DOTCAPTURE_EXPR_11]], align 8 +// CHECK1-NEXT: store i64 0, ptr [[DOTOMP_LB116]], align 8 +// CHECK1-NEXT: store i64 1, ptr [[DOTOMP_ST117]], align 8 +// CHECK1-NEXT: [[TMP14:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_11]], align 8 +// CHECK1-NEXT: [[ADD19:%.*]] = add nsw i64 [[TMP14]], 1 +// CHECK1-NEXT: store i64 [[ADD19]], ptr [[DOTOMP_NI118]], align 8 +// CHECK1-NEXT: store i32 37, ptr [[CC]], align 4 +// CHECK1-NEXT: store ptr [[ARR]], ptr [[__RANGE221]], align 8 +// CHECK1-NEXT: [[TMP15:%.*]] = load ptr, ptr [[__RANGE221]], align 8 +// CHECK1-NEXT: [[ARRAYDECAY23:%.*]] = getelementptr inbounds [256 x double], ptr [[TMP15]], i64 0, i64 0 +// CHECK1-NEXT: [[ADD_PTR24:%.*]] = getelementptr inbounds double, ptr [[ARRAYDECAY23]], i64 256 +// CHECK1-NEXT: store ptr [[ADD_PTR24]], ptr [[__END222]], align 8 +// CHECK1-NEXT: [[TMP16:%.*]] = load ptr, ptr [[__RANGE221]], align 8 +// CHECK1-NEXT: [[ARRAYDECAY26:%.*]] = getelementptr inbounds [256 x double], ptr [[TMP16]], i64 0, i64 0 +// CHECK1-NEXT: store ptr [[ARRAYDECAY26]], ptr [[__BEGIN225]], align 8 +// CHECK1-NEXT: [[TMP17:%.*]] = load ptr, ptr [[__RANGE221]], align 8 +// CHECK1-NEXT: [[ARRAYDECAY28:%.*]] = getelementptr inbounds [256 x double], ptr [[TMP17]], i64 0, i64 0 +// CHECK1-NEXT: store ptr [[ARRAYDECAY28]], ptr [[DOTCAPTURE_EXPR_27]], align 8 +// CHECK1-NEXT: [[TMP18:%.*]] = load ptr, ptr [[__END222]], align 8 +// CHECK1-NEXT: store ptr [[TMP18]], ptr [[DOTCAPTURE_EXPR_29]], align 8 +// CHECK1-NEXT: [[TMP19:%.*]] = load ptr, ptr [[DOTCAPTURE_EXPR_29]], align 8 +// CHECK1-NEXT: [[TMP20:%.*]] = load ptr, ptr [[DOTCAPTURE_EXPR_27]], align 8 +// CHECK1-NEXT: [[SUB_PTR_LHS_CAST31:%.*]] = ptrtoint ptr [[TMP19]] to i64 +// CHECK1-NEXT: [[SUB_PTR_RHS_CAST32:%.*]] = ptrtoint ptr [[TMP20]] to i64 +// CHECK1-NEXT: [[SUB_PTR_SUB33:%.*]] = sub i64 [[SUB_PTR_LHS_CAST31]], [[SUB_PTR_RHS_CAST32]] +// CHECK1-NEXT: [[SUB_PTR_DIV34:%.*]] = sdiv exact i64 [[SUB_PTR_SUB33]], 8 +// CHECK1-NEXT: [[SUB35:%.*]] = sub nsw i64 [[SUB_PTR_DIV34]], 1 +// CHECK1-NEXT: [[ADD36:%.*]] = add nsw i64 [[SUB35]], 1 +// CHECK1-NEXT: [[DIV37:%.*]] = sdiv i64 [[ADD36]], 1 +// CHECK1-NEXT: [[SUB38:%.*]] = sub nsw i64 [[DIV37]], 1 +// CHECK1-NEXT: store i64 [[SUB38]], ptr [[DOTCAPTURE_EXPR_30]], align 8 +// CHECK1-NEXT: store i64 0, ptr [[DOTOMP_LB2]], align 8 +// CHECK1-NEXT: store i64 1, ptr [[DOTOMP_ST2]], align 8 +// CHECK1-NEXT: [[TMP21:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_30]], align 8 +// CHECK1-NEXT: [[ADD39:%.*]] = add nsw i64 [[TMP21]], 1 +// CHECK1-NEXT: store i64 [[ADD39]], ptr [[DOTOMP_NI2]], align 8 +// CHECK1-NEXT: [[TMP22:%.*]] = load i64, ptr [[DOTOMP_NI05]], align 8 +// CHECK1-NEXT: store i64 [[TMP22]], ptr [[DOTOMP_TEMP_140]], align 8 +// CHECK1-NEXT: [[TMP23:%.*]] = load i64, ptr [[DOTOMP_TEMP_140]], align 8 +// CHECK1-NEXT: [[TMP24:%.*]] = load i64, ptr [[DOTOMP_NI118]], align 8 +// CHECK1-NEXT: [[CMP41:%.*]] = icmp sgt i64 [[TMP23]], [[TMP24]] +// CHECK1-NEXT: br i1 [[CMP41]], label %[[COND_TRUE42:.*]], label %[[COND_FALSE43:.*]] +// CHECK1: [[COND_TRUE42]]: +// CHECK1-NEXT: [[TMP25:%.*]] = load i64, ptr [[DOTOMP_TEMP_140]], align 8 +// CHECK1-NEXT: br label %[[COND_END44:.*]] +// CHECK1: [[COND_FALSE43]]: +// CHECK1-NEXT: [[TMP26:%.*]] = load i64, ptr [[DOTOMP_NI118]], align 8 +// CHECK1-NEXT: br label %[[COND_END44]] +// CHECK1: [[COND_END44]]: +// CHECK1-NEXT: [[COND45:%.*]] = phi i64 [ [[TMP25]], %[[COND_TRUE42]] ], [ [[TMP26]], %[[COND_FALSE43]] ] +// CHECK1-NEXT: store i64 [[COND45]], ptr [[DOTOMP_TEMP_2]], align 8 +// CHECK1-NEXT: [[TMP27:%.*]] = load i64, ptr [[DOTOMP_TEMP_2]], align 8 +// CHECK1-NEXT: [[TMP28:%.*]] = load i64, ptr [[DOTOMP_NI2]], align 8 +// CHECK1-NEXT: [[CMP47:%.*]] = icmp sgt i64 [[TMP27]], [[TMP28]] +// CHECK1-NEXT: br i1 [[CMP47]], label %[[COND_TRUE48:.*]], label %[[COND_FALSE49:.*]] +// CHECK1: [[COND_TRUE48]]: +// CHECK1-NEXT: [[TMP29:%.*]] = load i64, ptr [[DOTOMP_TEMP_2]], align 8 +// CHECK1-NEXT: br label %[[COND_END50:.*]] +// CHECK1: [[COND_FALSE49]]: +// CHECK1-NEXT: [[TMP30:%.*]] = load i64, ptr [[DOTOMP_NI2]], align 8 +// CHECK1-NEXT: br label %[[COND_END50]] +// CHECK1: [[COND_END50]]: +// CHECK1-NEXT: [[COND51:%.*]] = phi i64 [ [[TMP29]], %[[COND_TRUE48]] ], [ [[TMP30]], %[[COND_FALSE49]] ] +// CHECK1-NEXT: store i64 [[COND51]], ptr [[DOTOMP_FUSE_MAX46]], align 8 +// CHECK1-NEXT: store i64 0, ptr [[DOTOMP_FUSE_INDEX52]], align 8 +// CHECK1-NEXT: br label %[[FOR_COND:.*]] +// CHECK1: [[FOR_COND]]: +// CHECK1-NEXT: [[TMP31:%.*]] = load i64, ptr [[DOTOMP_FUSE_INDEX52]], align 8 +// CHECK1-NEXT: [[TMP32:%.*]] = load i64, ptr [[DOTOMP_FUSE_MAX46]], align 8 +// CHECK1-NEXT: [[CMP53:%.*]] = icmp slt i64 [[TMP31]], [[TMP32]] +// CHECK1-NEXT: br i1 [[CMP53]], label %[[FOR_BODY:.*]], label %[[FOR_END:.*]] +// CHECK1: [[FOR_BODY]]: +// CHECK1-NEXT: [[TMP33:%.*]] = load i64, ptr [[DOTOMP_FUSE_INDEX52]], align 8 +// CHECK1-NEXT: [[TMP34:%.*]] = load i64, ptr [[DOTOMP_NI05]], align 8 +// CHECK1-NEXT: [[CMP54:%.*]] = icmp slt i64 [[TMP33]], [[TMP34]] +// CHECK1-NEXT: br i1 [[CMP54]], label %[[IF_THEN:.*]], label %[[IF_END74:.*]] +// CHECK1: [[IF_THEN]]: +// CHECK1-NEXT: [[TMP35:%.*]] = load i32, ptr [[DOTOMP_LB03]], align 4 +// CHECK1-NEXT: [[CONV55:%.*]] = sext i32 [[TMP35]] to i64 +// CHECK1-NEXT: [[TMP36:%.*]] = load i32, ptr [[DOTOMP_ST04]], align 4 +// CHECK1-NEXT: [[CONV56:%.*]] = sext i32 [[TMP36]] to i64 +// CHECK1-NEXT: [[TMP37:%.*]] = load i64, ptr [[DOTOMP_FUSE_INDEX52]], align 8 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i64 [[CONV56]], [[TMP37]] +// CHECK1-NEXT: [[ADD57:%.*]] = add nsw i64 [[CONV55]], [[MUL]] +// CHECK1-NEXT: [[CONV58:%.*]] = trunc i64 [[ADD57]] to i32 +// CHECK1-NEXT: store i32 [[CONV58]], ptr [[DOTOMP_IV06]], align 4 +// CHECK1-NEXT: [[TMP38:%.*]] = load i32, ptr [[DOTOMP_IV06]], align 4 +// CHECK1-NEXT: [[MUL59:%.*]] = mul nsw i32 [[TMP38]], 1 +// CHECK1-NEXT: [[ADD60:%.*]] = add nsw i32 0, [[MUL59]] +// CHECK1-NEXT: store i32 [[ADD60]], ptr [[DOTOMP_FUSE_INDEX]], align 4 +// CHECK1-NEXT: [[TMP39:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4 +// CHECK1-NEXT: [[TMP40:%.*]] = load i32, ptr [[DOTOMP_NI0]], align 4 +// CHECK1-NEXT: [[CMP61:%.*]] = icmp slt i32 [[TMP39]], [[TMP40]] +// CHECK1-NEXT: br i1 [[CMP61]], label %[[IF_THEN62:.*]], label %[[IF_END:.*]] +// CHECK1: [[IF_THEN62]]: +// CHECK1-NEXT: [[TMP41:%.*]] = load i32, ptr [[DOTOMP_LB0]], align 4 +// CHECK1-NEXT: [[TMP42:%.*]] = load i32, ptr [[DOTOMP_ST0]], align 4 +// CHECK1-NEXT: [[TMP43:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4 +// CHECK1-NEXT: [[MUL63:%.*]] = mul nsw i32 [[TMP42]], [[TMP43]] +// CHECK1-NEXT: [[ADD64:%.*]] = add nsw i32 [[TMP41]], [[MUL63]] +// CHECK1-NEXT: store i32 [[ADD64]], ptr [[DOTOMP_IV0]], align 4 +// CHECK1-NEXT: [[TMP44:%.*]] = load i32, ptr [[DOTOMP_IV0]], align 4 +// CHECK1-NEXT: [[MUL65:%.*]] = mul nsw i32 [[TMP44]], 1 +// CHECK1-NEXT: [[ADD66:%.*]] = add nsw i32 0, [[MUL65]] +// CHECK1-NEXT: store i32 [[ADD66]], ptr [[I]], align 4 +// CHECK1-NEXT: [[TMP45:%.*]] = load i32, ptr [[I]], align 4 +// CHECK1-NEXT: call void (...) @body(i32 noundef [[TMP45]]) +// CHECK1-NEXT: br label %[[IF_END]] +// CHECK1: [[IF_END]]: +// CHECK1-NEXT: [[TMP46:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4 +// CHECK1-NEXT: [[TMP47:%.*]] = load i32, ptr [[DOTOMP_NI1]], align 4 +// CHECK1-NEXT: [[CMP67:%.*]] = icmp slt i32 [[TMP46]], [[TMP47]] +// CHECK1-NEXT: br i1 [[CMP67]], label %[[IF_THEN68:.*]], label %[[IF_END73:.*]] +// CHECK1: [[IF_THEN68]]: +// CHECK1-NEXT: [[TMP48:%.*]] = load i32, ptr [[DOTOMP_LB1]], align 4 +// CHECK1-NEXT: [[TMP49:%.*]] = load i32, ptr [[DOTOMP_ST1]], align 4 +// CHECK1-NEXT: [[TMP50:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4 +// CHECK1-NEXT: [[MUL69:%.*]] = mul nsw i32 [[TMP49]], [[TMP50]] +// CHECK1-NEXT: [[ADD70:%.*]] = add nsw i32 [[TMP48]], [[MUL69]] +// CHECK1-NEXT: store i32 [[ADD70]], ptr [[DOTOMP_IV1]], align 4 +// CHECK1-NEXT: [[TMP51:%.*]] = load i32, ptr [[DOTOMP_IV1]], align 4 +// CHECK1-NEXT: [[MUL71:%.*]] = mul nsw i32 [[TMP51]], 2 +// CHECK1-NEXT: [[ADD72:%.*]] = add nsw i32 0, [[MUL71]] +// CHECK1-NEXT: store i32 [[ADD72]], ptr [[J]], align 4 +// CHECK1-NEXT: [[TMP52:%.*]] = load i32, ptr [[J]], align 4 +// CHECK1-NEXT: call void (...) @body(i32 noundef [[TMP52]]) +// CHECK1-NEXT: br label %[[IF_END73]] +// CHECK1: [[IF_END73]]: +// CHECK1-NEXT: br label %[[IF_END74]] +// CHECK1: [[IF_END74]]: +// CHECK1-NEXT: [[TMP53:%.*]] = load i64, ptr [[DOTOMP_FUSE_INDEX52]], align 8 +// CHECK1-NEXT: [[TMP54:%.*]] = load i64, ptr [[DOTOMP_NI118]], align 8 +// CHECK1-NEXT: [[CMP75:%.*]] = icmp slt i64 [[TMP53]], [[TMP54]] +// CHECK1-NEXT: br i1 [[CMP75]], label %[[IF_THEN76:.*]], label %[[IF_END81:.*]] +// CHECK1: [[IF_THEN76]]: +// CHECK1-NEXT: [[TMP55:%.*]] = load i64, ptr [[DOTOMP_LB116]], align 8 +// CHECK1-NEXT: [[TMP56:%.*]] = load i64, ptr [[DOTOMP_ST117]], align 8 +// CHECK1-NEXT: [[TMP57:%.*]] = load i64, ptr [[DOTOMP_FUSE_INDEX52]], align 8 +// CHECK1-NEXT: [[MUL77:%.*]] = mul nsw i64 [[TMP56]], [[TMP57]] +// CHECK1-NEXT: [[ADD78:%.*]] = add nsw i64 [[TMP55]], [[MUL77]] +// CHECK1-NEXT: store i64 [[ADD78]], ptr [[DOTOMP_IV120]], align 8 +// CHECK1-NEXT: [[TMP58:%.*]] = load ptr, ptr [[DOTCAPTURE_EXPR_8]], align 8 +// CHECK1-NEXT: [[TMP59:%.*]] = load i64, ptr [[DOTOMP_IV120]], align 8 +// CHECK1-NEXT: [[MUL79:%.*]] = mul nsw i64 [[TMP59]], 1 +// CHECK1-NEXT: [[ADD_PTR80:%.*]] = getelementptr inbounds double, ptr [[TMP58]], i64 [[MUL79]] +// CHECK1-NEXT: store ptr [[ADD_PTR80]], ptr [[__BEGIN2]], align 8 +// CHECK1-NEXT: [[TMP60:%.*]] = load ptr, ptr [[__BEGIN2]], align 8 +// CHECK1-NEXT: store ptr [[TMP60]], ptr [[V]], align 8 +// CHECK1-NEXT: [[TMP61:%.*]] = load i32, ptr [[C]], align 4 +// CHECK1-NEXT: [[TMP62:%.*]] = load ptr, ptr [[V]], align 8 +// CHECK1-NEXT: [[TMP63:%.*]] = load double, ptr [[TMP62]], align 8 +// CHECK1-NEXT: call void (...) @body(i32 noundef [[TMP61]], double noundef [[TMP63]]) +// CHECK1-NEXT: br label %[[IF_END81]] +// CHECK1: [[IF_END81]]: +// CHECK1-NEXT: [[TMP64:%.*]] = load i64, ptr [[DOTOMP_FUSE_INDEX52]], align 8 +// CHECK1-NEXT: [[TMP65:%.*]] = load i64, ptr [[DOTOMP_NI2]], align 8 +// CHECK1-NEXT: [[CMP82:%.*]] = icmp slt i64 [[TMP64]], [[TMP65]] +// CHECK1-NEXT: br i1 [[CMP82]], label %[[IF_THEN83:.*]], label %[[IF_END88:.*]] +// CHECK1: [[IF_THEN83]]: +// CHECK1-NEXT: [[TMP66:%.*]] = load i64, ptr [[DOTOMP_LB2]], align 8 +// CHECK1-NEXT: [[TMP67:%.*]] = load i64, ptr [[DOTOMP_ST2]], align 8 +// CHECK1-NEXT: [[TMP68:%.*]] = load i64, ptr [[DOTOMP_FUSE_INDEX52]], align 8 +// CHECK1-NEXT: [[MUL84:%.*]] = mul nsw i64 [[TMP67]], [[TMP68]] +// CHECK1-NEXT: [[ADD85:%.*]] = add nsw i64 [[TMP66]], [[MUL84]] +// CHECK1-NEXT: store i64 [[ADD85]], ptr [[DOTOMP_IV2]], align 8 +// CHECK1-NEXT: [[TMP69:%.*]] = load ptr, ptr [[DOTCAPTURE_EXPR_27]], align 8 +// CHECK1-NEXT: [[TMP70:%.*]] = load i64, ptr [[DOTOMP_IV2]], align 8 +// CHECK1-NEXT: [[MUL86:%.*]] = mul nsw i64 [[TMP70]], 1 +// CHECK1-NEXT: [[ADD_PTR87:%.*]] = getelementptr inbounds double, ptr [[TMP69]], i64 [[MUL86]] +// CHECK1-NEXT: store ptr [[ADD_PTR87]], ptr [[__BEGIN225]], align 8 +// CHECK1-NEXT: [[TMP71:%.*]] = load ptr, ptr [[__BEGIN225]], align 8 +// CHECK1-NEXT: store ptr [[TMP71]], ptr [[VV]], align 8 +// CHECK1-NEXT: [[TMP72:%.*]] = load i32, ptr [[CC]], align 4 +// CHECK1-NEXT: [[TMP73:%.*]] = load ptr, ptr [[VV]], align 8 +// CHECK1-NEXT: [[TMP74:%.*]] = load double, ptr [[TMP73]], align 8 +// CHECK1-NEXT: call void (...) @body(i32 noundef [[TMP72]], double noundef [[TMP74]]) +// CHECK1-NEXT: br label %[[IF_END88]] +// CHECK1: [[IF_END88]]: +// CHECK1-NEXT: br label %[[FOR_INC:.*]] +// CHECK1: [[FOR_INC]]: +// CHECK1-NEXT: [[TMP75:%.*]] = load i64, ptr [[DOTOMP_FUSE_INDEX52]], align 8 +// CHECK1-NEXT: [[INC:%.*]] = add nsw i64 [[TMP75]], 1 +// CHECK1-NEXT: store i64 [[INC]], ptr [[DOTOMP_FUSE_INDEX52]], align 8 +// CHECK1-NEXT: br label %[[FOR_COND]], !llvm.loop [[LOOP6:![0-9]+]] +// CHECK1: [[FOR_END]]: +// CHECK1-NEXT: ret void +// +// +// CHECK1-LABEL: define dso_local void @foo4( +// CHECK1-SAME: ) #[[ATTR0]] { +// CHECK1-NEXT: [[ENTRY:.*:]] +// CHECK1-NEXT: [[ARR:%.*]] = alloca [256 x double], align 16 +// CHECK1-NEXT: [[J:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_LB0:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_ST0:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_NI0:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_IV0:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[K:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_LB1:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_ST1:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_NI1:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_IV1:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_TEMP_1:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_FUSE_MAX:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_FUSE_INDEX:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[C:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[__RANGE2:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__BEGIN2:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__END2:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[V:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: store i32 0, ptr [[J]], align 4 +// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB0]], align 4 +// CHECK1-NEXT: store i32 1, ptr [[DOTOMP_ST0]], align 4 +// CHECK1-NEXT: store i32 128, ptr [[DOTOMP_NI0]], align 4 +// CHECK1-NEXT: store i32 0, ptr [[K]], align 4 +// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB1]], align 4 +// CHECK1-NEXT: store i32 1, ptr [[DOTOMP_ST1]], align 4 +// CHECK1-NEXT: store i32 64, ptr [[DOTOMP_NI1]], align 4 +// CHECK1-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTOMP_NI0]], align 4 +// CHECK1-NEXT: store i32 [[TMP0]], ptr [[DOTOMP_TEMP_1]], align 4 +// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTOMP_TEMP_1]], align 4 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_NI1]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP1]], [[TMP2]] +// CHECK1-NEXT: br i1 [[CMP]], label %[[COND_TRUE:.*]], label %[[COND_FALSE:.*]] +// CHECK1: [[COND_TRUE]]: +// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_TEMP_1]], align 4 +// CHECK1-NEXT: br label %[[COND_END:.*]] +// CHECK1: [[COND_FALSE]]: +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_NI1]], align 4 +// CHECK1-NEXT: br label %[[COND_END]] +// CHECK1: [[COND_END]]: +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ [[TMP3]], %[[COND_TRUE]] ], [ [[TMP4]], %[[COND_FALSE]] ] +// CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_FUSE_MAX]], align 4 +// CHECK1-NEXT: store i32 0, ptr [[I]], align 4 +// CHECK1-NEXT: br label %[[FOR_COND:.*]] +// CHECK1: [[FOR_COND]]: +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[I]], align 4 +// CHECK1-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP5]], 128 +// CHECK1-NEXT: br i1 [[CMP1]], label %[[FOR_BODY:.*]], label %[[FOR_END:.*]] +// CHECK1: [[FOR_BODY]]: +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[I]], align 4 +// CHECK1-NEXT: call void (...) @body(i32 noundef [[TMP6]]) +// CHECK1-NEXT: br label %[[FOR_INC:.*]] +// CHECK1: [[FOR_INC]]: +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[I]], align 4 +// CHECK1-NEXT: [[INC:%.*]] = add nsw i32 [[TMP7]], 1 +// CHECK1-NEXT: store i32 [[INC]], ptr [[I]], align 4 +// CHECK1-NEXT: br label %[[FOR_COND]], !llvm.loop [[LOOP7:![0-9]+]] +// CHECK1: [[FOR_END]]: +// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_FUSE_INDEX]], align 4 +// CHECK1-NEXT: br label %[[FOR_COND2:.*]] +// CHECK1: [[FOR_COND2]]: +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_FUSE_MAX]], align 4 +// CHECK1-NEXT: [[CMP3:%.*]] = icmp slt i32 [[TMP8]], [[TMP9]] +// CHECK1-NEXT: br i1 [[CMP3]], label %[[FOR_BODY4:.*]], label %[[FOR_END17:.*]] +// CHECK1: [[FOR_BODY4]]: +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_NI0]], align 4 +// CHECK1-NEXT: [[CMP5:%.*]] = icmp slt i32 [[TMP10]], [[TMP11]] +// CHECK1-NEXT: br i1 [[CMP5]], label %[[IF_THEN:.*]], label %[[IF_END:.*]] +// CHECK1: [[IF_THEN]]: +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_LB0]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_ST0]], align 4 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP13]], [[TMP14]] +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[MUL]] +// CHECK1-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV0]], align 4 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV0]], align 4 +// CHECK1-NEXT: [[MUL6:%.*]] = mul nsw i32 [[TMP15]], 2 +// CHECK1-NEXT: [[ADD7:%.*]] = add nsw i32 0, [[MUL6]] +// CHECK1-NEXT: store i32 [[ADD7]], ptr [[J]], align 4 +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[J]], align 4 +// CHECK1-NEXT: call void (...) @body(i32 noundef [[TMP16]]) +// CHECK1-NEXT: br label %[[IF_END]] +// CHECK1: [[IF_END]]: +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4 +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_NI1]], align 4 +// CHECK1-NEXT: [[CMP8:%.*]] = icmp slt i32 [[TMP17]], [[TMP18]] +// CHECK1-NEXT: br i1 [[CMP8]], label %[[IF_THEN9:.*]], label %[[IF_END14:.*]] +// CHECK1: [[IF_THEN9]]: +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_LB1]], align 4 +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_ST1]], align 4 +// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4 +// CHECK1-NEXT: [[MUL10:%.*]] = mul nsw i32 [[TMP20]], [[TMP21]] +// CHECK1-NEXT: [[ADD11:%.*]] = add nsw i32 [[TMP19]], [[MUL10]] +// CHECK1-NEXT: store i32 [[ADD11]], ptr [[DOTOMP_IV1]], align 4 +// CHECK1-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IV1]], align 4 +// CHECK1-NEXT: [[MUL12:%.*]] = mul nsw i32 [[TMP22]], 1 +// CHECK1-NEXT: [[ADD13:%.*]] = add nsw i32 0, [[MUL12]] +// CHECK1-NEXT: store i32 [[ADD13]], ptr [[K]], align 4 +// CHECK1-NEXT: [[TMP23:%.*]] = load i32, ptr [[K]], align 4 +// CHECK1-NEXT: call void (...) @body(i32 noundef [[TMP23]]) +// CHECK1-NEXT: br label %[[IF_END14]] +// CHECK1: [[IF_END14]]: +// CHECK1-NEXT: br label %[[FOR_INC15:.*]] +// CHECK1: [[FOR_INC15]]: +// CHECK1-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4 +// CHECK1-NEXT: [[INC16:%.*]] = add nsw i32 [[TMP24]], 1 +// CHECK1-NEXT: store i32 [[INC16]], ptr [[DOTOMP_FUSE_INDEX]], align 4 +// CHECK1-NEXT: br label %[[FOR_COND2]], !llvm.loop [[LOOP8:![0-9]+]] +// CHECK1: [[FOR_END17]]: +// CHECK1-NEXT: store i32 42, ptr [[C]], align 4 +// CHECK1-NEXT: store ptr [[ARR]], ptr [[__RANGE2]], align 8 +// CHECK1-NEXT: [[TMP25:%.*]] = load ptr, ptr [[__RANGE2]], align 8 +// CHECK1-NEXT: [[ARRAYDECAY:%.*]] = getelementptr inbounds [256 x double], ptr [[TMP25]], i64 0, i64 0 +// CHECK1-NEXT: store ptr [[ARRAYDECAY]], ptr [[__BEGIN2]], align 8 +// CHECK1-NEXT: [[TMP26:%.*]] = load ptr, ptr [[__RANGE2]], align 8 +// CHECK1-NEXT: [[ARRAYDECAY18:%.*]] = getelementptr inbounds [256 x double], ptr [[TMP26]], i64 0, i64 0 +// CHECK1-NEXT: [[ADD_PTR:%.*]] = getelementptr inbounds double, ptr [[ARRAYDECAY18]], i64 256 +// CHECK1-NEXT: store ptr [[ADD_PTR]], ptr [[__END2]], align 8 +// CHECK1-NEXT: br label %[[FOR_COND19:.*]] +// CHECK1: [[FOR_COND19]]: +// CHECK1-NEXT: [[TMP27:%.*]] = load ptr, ptr [[__BEGIN2]], align 8 +// CHECK1-NEXT: [[TMP28:%.*]] = load ptr, ptr [[__END2]], align 8 +// CHECK1-NEXT: [[CMP20:%.*]] = icmp ne ptr [[TMP27]], [[TMP28]] +// CHECK1-NEXT: br i1 [[CMP20]], label %[[FOR_BODY21:.*]], label %[[FOR_END23:.*]] +// CHECK1: [[FOR_BODY21]]: +// CHECK1-NEXT: [[TMP29:%.*]] = load ptr, ptr [[__BEGIN2]], align 8 +// CHECK1-NEXT: store ptr [[TMP29]], ptr [[V]], align 8 +// CHECK1-NEXT: [[TMP30:%.*]] = load i32, ptr [[C]], align 4 +// CHECK1-NEXT: [[TMP31:%.*]] = load ptr, ptr [[V]], align 8 +// CHECK1-NEXT: [[TMP32:%.*]] = load double, ptr [[TMP31]], align 8 +// CHECK1-NEXT: call void (...) @body(i32 noundef [[TMP30]], double noundef [[TMP32]]) +// CHECK1-NEXT: br label %[[FOR_INC22:.*]] +// CHECK1: [[FOR_INC22]]: +// CHECK1-NEXT: [[TMP33:%.*]] = load ptr, ptr [[__BEGIN2]], align 8 +// CHECK1-NEXT: [[INCDEC_PTR:%.*]] = getelementptr inbounds nuw double, ptr [[TMP33]], i32 1 +// CHECK1-NEXT: store ptr [[INCDEC_PTR]], ptr [[__BEGIN2]], align 8 +// CHECK1-NEXT: br label %[[FOR_COND19]] +// CHECK1: [[FOR_END23]]: +// CHECK1-NEXT: ret void +// +// +// CHECK1-LABEL: define dso_local void @foo5( +// CHECK1-SAME: ) #[[ATTR0]] { +// CHECK1-NEXT: [[ENTRY:.*:]] +// CHECK1-NEXT: [[ARR:%.*]] = alloca [256 x double], align 16 +// CHECK1-NEXT: [[J:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_LB0:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_ST0:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_NI0:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_IV0:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[K:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_LB1:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_ST1:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_NI1:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_IV1:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_TEMP_1:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_FUSE_MAX:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_FUSE_INDEX:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_LB03:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_ST04:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_NI05:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTOMP_IV06:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[C:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[__RANGE2:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__END2:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__BEGIN2:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[DOTCAPTURE_EXPR_8:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[DOTCAPTURE_EXPR_10:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[DOTCAPTURE_EXPR_11:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTOMP_LB116:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTOMP_ST117:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTOMP_NI118:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTOMP_IV120:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTOMP_TEMP_121:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTOMP_FUSE_MAX22:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_FUSE_INDEX29:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[V:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[CC:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[__RANGE264:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__BEGIN265:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__END267:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[VV:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: store i32 0, ptr [[J]], align 4 +// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB0]], align 4 +// CHECK1-NEXT: store i32 1, ptr [[DOTOMP_ST0]], align 4 +// CHECK1-NEXT: store i32 128, ptr [[DOTOMP_NI0]], align 4 +// CHECK1-NEXT: store i32 0, ptr [[K]], align 4 +// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB1]], align 4 +// CHECK1-NEXT: store i32 1, ptr [[DOTOMP_ST1]], align 4 +// CHECK1-NEXT: store i32 512, ptr [[DOTOMP_NI1]], align 4 +// CHECK1-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTOMP_NI0]], align 4 +// CHECK1-NEXT: store i32 [[TMP0]], ptr [[DOTOMP_TEMP_1]], align 4 +// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTOMP_TEMP_1]], align 4 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_NI1]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP1]], [[TMP2]] +// CHECK1-NEXT: br i1 [[CMP]], label %[[COND_TRUE:.*]], label %[[COND_FALSE:.*]] +// CHECK1: [[COND_TRUE]]: +// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_TEMP_1]], align 4 +// CHECK1-NEXT: br label %[[COND_END:.*]] +// CHECK1: [[COND_FALSE]]: +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_NI1]], align 4 +// CHECK1-NEXT: br label %[[COND_END]] +// CHECK1: [[COND_END]]: +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ [[TMP3]], %[[COND_TRUE]] ], [ [[TMP4]], %[[COND_FALSE]] ] +// CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_FUSE_MAX]], align 4 +// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_FUSE_INDEX]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_FUSE_MAX]], align 4 +// CHECK1-NEXT: store i32 [[TMP5]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP6]], 0 +// CHECK1-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 +// CHECK1-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 +// CHECK1-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB03]], align 4 +// CHECK1-NEXT: store i32 1, ptr [[DOTOMP_ST04]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP7]], 1 +// CHECK1-NEXT: [[CONV:%.*]] = sext i32 [[ADD]] to i64 +// CHECK1-NEXT: store i64 [[CONV]], ptr [[DOTOMP_NI05]], align 8 +// CHECK1-NEXT: store i32 42, ptr [[C]], align 4 +// CHECK1-NEXT: store ptr [[ARR]], ptr [[__RANGE2]], align 8 +// CHECK1-NEXT: [[TMP8:%.*]] = load ptr, ptr [[__RANGE2]], align 8 +// CHECK1-NEXT: [[ARRAYDECAY:%.*]] = getelementptr inbounds [256 x double], ptr [[TMP8]], i64 0, i64 0 +// CHECK1-NEXT: [[ADD_PTR:%.*]] = getelementptr inbounds double, ptr [[ARRAYDECAY]], i64 256 +// CHECK1-NEXT: store ptr [[ADD_PTR]], ptr [[__END2]], align 8 +// CHECK1-NEXT: [[TMP9:%.*]] = load ptr, ptr [[__RANGE2]], align 8 +// CHECK1-NEXT: [[ARRAYDECAY7:%.*]] = getelementptr inbounds [256 x double], ptr [[TMP9]], i64 0, i64 0 +// CHECK1-NEXT: store ptr [[ARRAYDECAY7]], ptr [[__BEGIN2]], align 8 +// CHECK1-NEXT: [[TMP10:%.*]] = load ptr, ptr [[__RANGE2]], align 8 +// CHECK1-NEXT: [[ARRAYDECAY9:%.*]] = getelementptr inbounds [256 x double], ptr [[TMP10]], i64 0, i64 0 +// CHECK1-NEXT: store ptr [[ARRAYDECAY9]], ptr [[DOTCAPTURE_EXPR_8]], align 8 +// CHECK1-NEXT: [[TMP11:%.*]] = load ptr, ptr [[__END2]], align 8 +// CHECK1-NEXT: store ptr [[TMP11]], ptr [[DOTCAPTURE_EXPR_10]], align 8 +// CHECK1-NEXT: [[TMP12:%.*]] = load ptr, ptr [[DOTCAPTURE_EXPR_10]], align 8 +// CHECK1-NEXT: [[TMP13:%.*]] = load ptr, ptr [[DOTCAPTURE_EXPR_8]], align 8 +// CHECK1-NEXT: [[SUB_PTR_LHS_CAST:%.*]] = ptrtoint ptr [[TMP12]] to i64 +// CHECK1-NEXT: [[SUB_PTR_RHS_CAST:%.*]] = ptrtoint ptr [[TMP13]] to i64 +// CHECK1-NEXT: [[SUB_PTR_SUB:%.*]] = sub i64 [[SUB_PTR_LHS_CAST]], [[SUB_PTR_RHS_CAST]] +// CHECK1-NEXT: [[SUB_PTR_DIV:%.*]] = sdiv exact i64 [[SUB_PTR_SUB]], 8 +// CHECK1-NEXT: [[SUB12:%.*]] = sub nsw i64 [[SUB_PTR_DIV]], 1 +// CHECK1-NEXT: [[ADD13:%.*]] = add nsw i64 [[SUB12]], 1 +// CHECK1-NEXT: [[DIV14:%.*]] = sdiv i64 [[ADD13]], 1 +// CHECK1-NEXT: [[SUB15:%.*]] = sub nsw i64 [[DIV14]], 1 +// CHECK1-NEXT: store i64 [[SUB15]], ptr [[DOTCAPTURE_EXPR_11]], align 8 +// CHECK1-NEXT: store i64 0, ptr [[DOTOMP_LB116]], align 8 +// CHECK1-NEXT: store i64 1, ptr [[DOTOMP_ST117]], align 8 +// CHECK1-NEXT: [[TMP14:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_11]], align 8 +// CHECK1-NEXT: [[ADD19:%.*]] = add nsw i64 [[TMP14]], 1 +// CHECK1-NEXT: store i64 [[ADD19]], ptr [[DOTOMP_NI118]], align 8 +// CHECK1-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTOMP_NI05]], align 8 +// CHECK1-NEXT: store i64 [[TMP15]], ptr [[DOTOMP_TEMP_121]], align 8 +// CHECK1-NEXT: [[TMP16:%.*]] = load i64, ptr [[DOTOMP_TEMP_121]], align 8 +// CHECK1-NEXT: [[TMP17:%.*]] = load i64, ptr [[DOTOMP_NI118]], align 8 +// CHECK1-NEXT: [[CMP23:%.*]] = icmp sgt i64 [[TMP16]], [[TMP17]] +// CHECK1-NEXT: br i1 [[CMP23]], label %[[COND_TRUE24:.*]], label %[[COND_FALSE25:.*]] +// CHECK1: [[COND_TRUE24]]: +// CHECK1-NEXT: [[TMP18:%.*]] = load i64, ptr [[DOTOMP_TEMP_121]], align 8 +// CHECK1-NEXT: br label %[[COND_END26:.*]] +// CHECK1: [[COND_FALSE25]]: +// CHECK1-NEXT: [[TMP19:%.*]] = load i64, ptr [[DOTOMP_NI118]], align 8 +// CHECK1-NEXT: br label %[[COND_END26]] +// CHECK1: [[COND_END26]]: +// CHECK1-NEXT: [[COND27:%.*]] = phi i64 [ [[TMP18]], %[[COND_TRUE24]] ], [ [[TMP19]], %[[COND_FALSE25]] ] +// CHECK1-NEXT: store i64 [[COND27]], ptr [[DOTOMP_FUSE_MAX22]], align 8 +// CHECK1-NEXT: store i32 0, ptr [[I]], align 4 +// CHECK1-NEXT: br label %[[FOR_COND:.*]] +// CHECK1: [[FOR_COND]]: +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[I]], align 4 +// CHECK1-NEXT: [[CMP28:%.*]] = icmp slt i32 [[TMP20]], 128 +// CHECK1-NEXT: br i1 [[CMP28]], label %[[FOR_BODY:.*]], label %[[FOR_END:.*]] +// CHECK1: [[FOR_BODY]]: +// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[I]], align 4 +// CHECK1-NEXT: call void (...) @body(i32 noundef [[TMP21]]) +// CHECK1-NEXT: br label %[[FOR_INC:.*]] +// CHECK1: [[FOR_INC]]: +// CHECK1-NEXT: [[TMP22:%.*]] = load i32, ptr [[I]], align 4 +// CHECK1-NEXT: [[INC:%.*]] = add nsw i32 [[TMP22]], 1 +// CHECK1-NEXT: store i32 [[INC]], ptr [[I]], align 4 +// CHECK1-NEXT: br label %[[FOR_COND]], !llvm.loop [[LOOP9:![0-9]+]] +// CHECK1: [[FOR_END]]: +// CHECK1-NEXT: store i64 0, ptr [[DOTOMP_FUSE_INDEX29]], align 8 +// CHECK1-NEXT: br label %[[FOR_COND30:.*]] +// CHECK1: [[FOR_COND30]]: +// CHECK1-NEXT: [[TMP23:%.*]] = load i64, ptr [[DOTOMP_FUSE_INDEX29]], align 8 +// CHECK1-NEXT: [[TMP24:%.*]] = load i64, ptr [[DOTOMP_FUSE_MAX22]], align 8 +// CHECK1-NEXT: [[CMP31:%.*]] = icmp slt i64 [[TMP23]], [[TMP24]] +// CHECK1-NEXT: br i1 [[CMP31]], label %[[FOR_BODY32:.*]], label %[[FOR_END63:.*]] +// CHECK1: [[FOR_BODY32]]: +// CHECK1-NEXT: [[TMP25:%.*]] = load i64, ptr [[DOTOMP_FUSE_INDEX29]], align 8 +// CHECK1-NEXT: [[TMP26:%.*]] = load i64, ptr [[DOTOMP_NI05]], align 8 +// CHECK1-NEXT: [[CMP33:%.*]] = icmp slt i64 [[TMP25]], [[TMP26]] +// CHECK1-NEXT: br i1 [[CMP33]], label %[[IF_THEN:.*]], label %[[IF_END53:.*]] +// CHECK1: [[IF_THEN]]: +// CHECK1-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_LB03]], align 4 +// CHECK1-NEXT: [[CONV34:%.*]] = sext i32 [[TMP27]] to i64 +// CHECK1-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_ST04]], align 4 +// CHECK1-NEXT: [[CONV35:%.*]] = sext i32 [[TMP28]] to i64 +// CHECK1-NEXT: [[TMP29:%.*]] = load i64, ptr [[DOTOMP_FUSE_INDEX29]], align 8 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i64 [[CONV35]], [[TMP29]] +// CHECK1-NEXT: [[ADD36:%.*]] = add nsw i64 [[CONV34]], [[MUL]] +// CHECK1-NEXT: [[CONV37:%.*]] = trunc i64 [[ADD36]] to i32 +// CHECK1-NEXT: store i32 [[CONV37]], ptr [[DOTOMP_IV06]], align 4 +// CHECK1-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_IV06]], align 4 +// CHECK1-NEXT: [[MUL38:%.*]] = mul nsw i32 [[TMP30]], 1 +// CHECK1-NEXT: [[ADD39:%.*]] = add nsw i32 0, [[MUL38]] +// CHECK1-NEXT: store i32 [[ADD39]], ptr [[DOTOMP_FUSE_INDEX]], align 4 +// CHECK1-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4 +// CHECK1-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTOMP_NI0]], align 4 +// CHECK1-NEXT: [[CMP40:%.*]] = icmp slt i32 [[TMP31]], [[TMP32]] +// CHECK1-NEXT: br i1 [[CMP40]], label %[[IF_THEN41:.*]], label %[[IF_END:.*]] +// CHECK1: [[IF_THEN41]]: +// CHECK1-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_LB0]], align 4 +// CHECK1-NEXT: [[TMP34:%.*]] = load i32, ptr [[DOTOMP_ST0]], align 4 +// CHECK1-NEXT: [[TMP35:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4 +// CHECK1-NEXT: [[MUL42:%.*]] = mul nsw i32 [[TMP34]], [[TMP35]] +// CHECK1-NEXT: [[ADD43:%.*]] = add nsw i32 [[TMP33]], [[MUL42]] +// CHECK1-NEXT: store i32 [[ADD43]], ptr [[DOTOMP_IV0]], align 4 +// CHECK1-NEXT: [[TMP36:%.*]] = load i32, ptr [[DOTOMP_IV0]], align 4 +// CHECK1-NEXT: [[MUL44:%.*]] = mul nsw i32 [[TMP36]], 2 +// CHECK1-NEXT: [[ADD45:%.*]] = add nsw i32 0, [[MUL44]] +// CHECK1-NEXT: store i32 [[ADD45]], ptr [[J]], align 4 +// CHECK1-NEXT: [[TMP37:%.*]] = load i32, ptr [[J]], align 4 +// CHECK1-NEXT: call void (...) @body(i32 noundef [[TMP37]]) +// CHECK1-NEXT: br label %[[IF_END]] +// CHECK1: [[IF_END]]: +// CHECK1-NEXT: [[TMP38:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4 +// CHECK1-NEXT: [[TMP39:%.*]] = load i32, ptr [[DOTOMP_NI1]], align 4 +// CHECK1-NEXT: [[CMP46:%.*]] = icmp slt i32 [[TMP38]], [[TMP39]] +// CHECK1-NEXT: br i1 [[CMP46]], label %[[IF_THEN47:.*]], label %[[IF_END52:.*]] +// CHECK1: [[IF_THEN47]]: +// CHECK1-NEXT: [[TMP40:%.*]] = load i32, ptr [[DOTOMP_LB1]], align 4 +// CHECK1-NEXT: [[TMP41:%.*]] = load i32, ptr [[DOTOMP_ST1]], align 4 +// CHECK1-NEXT: [[TMP42:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4 +// CHECK1-NEXT: [[MUL48:%.*]] = mul nsw i32 [[TMP41]], [[TMP42]] +// CHECK1-NEXT: [[ADD49:%.*]] = add nsw i32 [[TMP40]], [[MUL48]] +// CHECK1-NEXT: store i32 [[ADD49]], ptr [[DOTOMP_IV1]], align 4 +// CHECK1-NEXT: [[TMP43:%.*]] = load i32, ptr [[DOTOMP_IV1]], align 4 +// CHECK1-NEXT: [[MUL50:%.*]] = mul nsw i32 [[TMP43]], 1 +// CHECK1-NEXT: [[ADD51:%.*]] = add nsw i32 0, [[MUL50]] +// CHECK1-NEXT: store i32 [[ADD51]], ptr [[K]], align 4 +// CHECK1-NEXT: [[TMP44:%.*]] = load i32, ptr [[K]], align 4 +// CHECK1-NEXT: call void (...) @body(i32 noundef [[TMP44]]) +// CHECK1-NEXT: br label %[[IF_END52]] +// CHECK1: [[IF_END52]]: +// CHECK1-NEXT: br label %[[IF_END53]] +// CHECK1: [[IF_END53]]: +// CHECK1-NEXT: [[TMP45:%.*]] = load i64, ptr [[DOTOMP_FUSE_INDEX29]], align 8 +// CHECK1-NEXT: [[TMP46:%.*]] = load i64, ptr [[DOTOMP_NI118]], align 8 +// CHECK1-NEXT: [[CMP54:%.*]] = icmp slt i64 [[TMP45]], [[TMP46]] +// CHECK1-NEXT: br i1 [[CMP54]], label %[[IF_THEN55:.*]], label %[[IF_END60:.*]] +// CHECK1: [[IF_THEN55]]: +// CHECK1-NEXT: [[TMP47:%.*]] = load i64, ptr [[DOTOMP_LB116]], align 8 +// CHECK1-NEXT: [[TMP48:%.*]] = load i64, ptr [[DOTOMP_ST117]], align 8 +// CHECK1-NEXT: [[TMP49:%.*]] = load i64, ptr [[DOTOMP_FUSE_INDEX29]], align 8 +// CHECK1-NEXT: [[MUL56:%.*]] = mul nsw i64 [[TMP48]], [[TMP49]] +// CHECK1-NEXT: [[ADD57:%.*]] = add nsw i64 [[TMP47]], [[MUL56]] +// CHECK1-NEXT: store i64 [[ADD57]], ptr [[DOTOMP_IV120]], align 8 +// CHECK1-NEXT: [[TMP50:%.*]] = load ptr, ptr [[DOTCAPTURE_EXPR_8]], align 8 +// CHECK1-NEXT: [[TMP51:%.*]] = load i64, ptr [[DOTOMP_IV120]], align 8 +// CHECK1-NEXT: [[MUL58:%.*]] = mul nsw i64 [[TMP51]], 1 +// CHECK1-NEXT: [[ADD_PTR59:%.*]] = getelementptr inbounds double, ptr [[TMP50]], i64 [[MUL58]] +// CHECK1-NEXT: store ptr [[ADD_PTR59]], ptr [[__BEGIN2]], align 8 +// CHECK1-NEXT: [[TMP52:%.*]] = load ptr, ptr [[__BEGIN2]], align 8 +// CHECK1-NEXT: store ptr [[TMP52]], ptr [[V]], align 8 +// CHECK1-NEXT: [[TMP53:%.*]] = load i32, ptr [[C]], align 4 +// CHECK1-NEXT: [[TMP54:%.*]] = load ptr, ptr [[V]], align 8 +// CHECK1-NEXT: [[TMP55:%.*]] = load double, ptr [[TMP54]], align 8 +// CHECK1-NEXT: call void (...) @body(i32 noundef [[TMP53]], double noundef [[TMP55]]) +// CHECK1-NEXT: br label %[[IF_END60]] +// CHECK1: [[IF_END60]]: +// CHECK1-NEXT: br label %[[FOR_INC61:.*]] +// CHECK1: [[FOR_INC61]]: +// CHECK1-NEXT: [[TMP56:%.*]] = load i64, ptr [[DOTOMP_FUSE_INDEX29]], align 8 +// CHECK1-NEXT: [[INC62:%.*]] = add nsw i64 [[TMP56]], 1 +// CHECK1-NEXT: store i64 [[INC62]], ptr [[DOTOMP_FUSE_INDEX29]], align 8 +// CHECK1-NEXT: br label %[[FOR_COND30]], !llvm.loop [[LOOP10:![0-9]+]] +// CHECK1: [[FOR_END63]]: +// CHECK1-NEXT: store i32 37, ptr [[CC]], align 4 +// CHECK1-NEXT: store ptr [[ARR]], ptr [[__RANGE264]], align 8 +// CHECK1-NEXT: [[TMP57:%.*]] = load ptr, ptr [[__RANGE264]], align 8 +// CHECK1-NEXT: [[ARRAYDECAY66:%.*]] = getelementptr inbounds [256 x double], ptr [[TMP57]], i64 0, i64 0 +// CHECK1-NEXT: store ptr [[ARRAYDECAY66]], ptr [[__BEGIN265]], align 8 +// CHECK1-NEXT: [[TMP58:%.*]] = load ptr, ptr [[__RANGE264]], align 8 +// CHECK1-NEXT: [[ARRAYDECAY68:%.*]] = getelementptr inbounds [256 x double], ptr [[TMP58]], i64 0, i64 0 +// CHECK1-NEXT: [[ADD_PTR69:%.*]] = getelementptr inbounds double, ptr [[ARRAYDECAY68]], i64 256 +// CHECK1-NEXT: store ptr [[ADD_PTR69]], ptr [[__END267]], align 8 +// CHECK1-NEXT: br label %[[FOR_COND70:.*]] +// CHECK1: [[FOR_COND70]]: +// CHECK1-NEXT: [[TMP59:%.*]] = load ptr, ptr [[__BEGIN265]], align 8 +// CHECK1-NEXT: [[TMP60:%.*]] = load ptr, ptr [[__END267]], align 8 +// CHECK1-NEXT: [[CMP71:%.*]] = icmp ne ptr [[TMP59]], [[TMP60]] +// CHECK1-NEXT: br i1 [[CMP71]], label %[[FOR_BODY72:.*]], label %[[FOR_END74:.*]] +// CHECK1: [[FOR_BODY72]]: +// CHECK1-NEXT: [[TMP61:%.*]] = load ptr, ptr [[__BEGIN265]], align 8 +// CHECK1-NEXT: store ptr [[TMP61]], ptr [[VV]], align 8 +// CHECK1-NEXT: [[TMP62:%.*]] = load i32, ptr [[CC]], align 4 +// CHECK1-NEXT: [[TMP63:%.*]] = load ptr, ptr [[VV]], align 8 +// CHECK1-NEXT: [[TMP64:%.*]] = load double, ptr [[TMP63]], align 8 +// CHECK1-NEXT: call void (...) @body(i32 noundef [[TMP62]], double noundef [[TMP64]]) +// CHECK1-NEXT: br label %[[FOR_INC73:.*]] +// CHECK1: [[FOR_INC73]]: +// CHECK1-NEXT: [[TMP65:%.*]] = load ptr, ptr [[__BEGIN265]], align 8 +// CHECK1-NEXT: [[INCDEC_PTR:%.*]] = getelementptr inbounds nuw double, ptr [[TMP65]], i32 1 +// CHECK1-NEXT: store ptr [[INCDEC_PTR]], ptr [[__BEGIN265]], align 8 +// CHECK1-NEXT: br label %[[FOR_COND70]] +// CHECK1: [[FOR_END74]]: +// CHECK1-NEXT: ret void +// +// +// CHECK2-LABEL: define dso_local void @body( +// CHECK2-SAME: ...) #[[ATTR0:[0-9]+]] { +// CHECK2-NEXT: [[ENTRY:.*:]] +// CHECK2-NEXT: ret void +// +// +// CHECK2-LABEL: define dso_local void @foo1( +// CHECK2-SAME: i32 noundef [[START1:%.*]], i32 noundef [[END1:%.*]], i32 noundef [[STEP1:%.*]], i32 noundef [[START2:%.*]], i32 noundef [[END2:%.*]], i32 noundef [[STEP2:%.*]]) #[[ATTR0]] { +// CHECK2-NEXT: [[ENTRY:.*:]] +// CHECK2-NEXT: [[START1_ADDR:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[END1_ADDR:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[STEP1_ADDR:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[START2_ADDR:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[END2_ADDR:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[STEP2_ADDR:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[J:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTNEW_STEP:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTCAPTURE_EXPR_2:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTOMP_LB0:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTOMP_ST0:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTOMP_NI0:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTOMP_IV0:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTCAPTURE_EXPR_6:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTCAPTURE_EXPR_7:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTNEW_STEP8:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTCAPTURE_EXPR_9:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTOMP_LB1:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTOMP_ST1:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTOMP_NI1:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTOMP_IV1:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTOMP_TEMP_1:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTOMP_FUSE_MAX:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTOMP_FUSE_INDEX:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: store i32 [[START1]], ptr [[START1_ADDR]], align 4 +// CHECK2-NEXT: store i32 [[END1]], ptr [[END1_ADDR]], align 4 +// CHECK2-NEXT: store i32 [[STEP1]], ptr [[STEP1_ADDR]], align 4 +// CHECK2-NEXT: store i32 [[START2]], ptr [[START2_ADDR]], align 4 +// CHECK2-NEXT: store i32 [[END2]], ptr [[END2_ADDR]], align 4 +// CHECK2-NEXT: store i32 [[STEP2]], ptr [[STEP2_ADDR]], align 4 +// CHECK2-NEXT: [[TMP0:%.*]] = load i32, ptr [[START1_ADDR]], align 4 +// CHECK2-NEXT: store i32 [[TMP0]], ptr [[I]], align 4 +// CHECK2-NEXT: [[TMP1:%.*]] = load i32, ptr [[START1_ADDR]], align 4 +// CHECK2-NEXT: store i32 [[TMP1]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK2-NEXT: [[TMP2:%.*]] = load i32, ptr [[END1_ADDR]], align 4 +// CHECK2-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK2-NEXT: [[TMP3:%.*]] = load i32, ptr [[STEP1_ADDR]], align 4 +// CHECK2-NEXT: store i32 [[TMP3]], ptr [[DOTNEW_STEP]], align 4 +// CHECK2-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK2-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK2-NEXT: [[SUB:%.*]] = sub i32 [[TMP4]], [[TMP5]] +// CHECK2-NEXT: [[SUB3:%.*]] = sub i32 [[SUB]], 1 +// CHECK2-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTNEW_STEP]], align 4 +// CHECK2-NEXT: [[ADD:%.*]] = add i32 [[SUB3]], [[TMP6]] +// CHECK2-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTNEW_STEP]], align 4 +// CHECK2-NEXT: [[DIV:%.*]] = udiv i32 [[ADD]], [[TMP7]] +// CHECK2-NEXT: [[SUB4:%.*]] = sub i32 [[DIV]], 1 +// CHECK2-NEXT: store i32 [[SUB4]], ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK2-NEXT: store i32 0, ptr [[DOTOMP_LB0]], align 4 +// CHECK2-NEXT: store i32 1, ptr [[DOTOMP_ST0]], align 4 +// CHECK2-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK2-NEXT: [[ADD5:%.*]] = add i32 [[TMP8]], 1 +// CHECK2-NEXT: store i32 [[ADD5]], ptr [[DOTOMP_NI0]], align 4 +// CHECK2-NEXT: [[TMP9:%.*]] = load i32, ptr [[START2_ADDR]], align 4 +// CHECK2-NEXT: store i32 [[TMP9]], ptr [[J]], align 4 +// CHECK2-NEXT: [[TMP10:%.*]] = load i32, ptr [[START2_ADDR]], align 4 +// CHECK2-NEXT: store i32 [[TMP10]], ptr [[DOTCAPTURE_EXPR_6]], align 4 +// CHECK2-NEXT: [[TMP11:%.*]] = load i32, ptr [[END2_ADDR]], align 4 +// CHECK2-NEXT: store i32 [[TMP11]], ptr [[DOTCAPTURE_EXPR_7]], align 4 +// CHECK2-NEXT: [[TMP12:%.*]] = load i32, ptr [[STEP2_ADDR]], align 4 +// CHECK2-NEXT: store i32 [[TMP12]], ptr [[DOTNEW_STEP8]], align 4 +// CHECK2-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_7]], align 4 +// CHECK2-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_6]], align 4 +// CHECK2-NEXT: [[SUB10:%.*]] = sub i32 [[TMP13]], [[TMP14]] +// CHECK2-NEXT: [[SUB11:%.*]] = sub i32 [[SUB10]], 1 +// CHECK2-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTNEW_STEP8]], align 4 +// CHECK2-NEXT: [[ADD12:%.*]] = add i32 [[SUB11]], [[TMP15]] +// CHECK2-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTNEW_STEP8]], align 4 +// CHECK2-NEXT: [[DIV13:%.*]] = udiv i32 [[ADD12]], [[TMP16]] +// CHECK2-NEXT: [[SUB14:%.*]] = sub i32 [[DIV13]], 1 +// CHECK2-NEXT: store i32 [[SUB14]], ptr [[DOTCAPTURE_EXPR_9]], align 4 +// CHECK2-NEXT: store i32 0, ptr [[DOTOMP_LB1]], align 4 +// CHECK2-NEXT: store i32 1, ptr [[DOTOMP_ST1]], align 4 +// CHECK2-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_9]], align 4 +// CHECK2-NEXT: [[ADD15:%.*]] = add i32 [[TMP17]], 1 +// CHECK2-NEXT: store i32 [[ADD15]], ptr [[DOTOMP_NI1]], align 4 +// CHECK2-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_NI0]], align 4 +// CHECK2-NEXT: store i32 [[TMP18]], ptr [[DOTOMP_TEMP_1]], align 4 +// CHECK2-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_TEMP_1]], align 4 +// CHECK2-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_NI1]], align 4 +// CHECK2-NEXT: [[CMP:%.*]] = icmp ugt i32 [[TMP19]], [[TMP20]] +// CHECK2-NEXT: br i1 [[CMP]], label %[[COND_TRUE:.*]], label %[[COND_FALSE:.*]] +// CHECK2: [[COND_TRUE]]: +// CHECK2-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_TEMP_1]], align 4 +// CHECK2-NEXT: br label %[[COND_END:.*]] +// CHECK2: [[COND_FALSE]]: +// CHECK2-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_NI1]], align 4 +// CHECK2-NEXT: br label %[[COND_END]] +// CHECK2: [[COND_END]]: +// CHECK2-NEXT: [[COND:%.*]] = phi i32 [ [[TMP21]], %[[COND_TRUE]] ], [ [[TMP22]], %[[COND_FALSE]] ] +// CHECK2-NEXT: store i32 [[COND]], ptr [[DOTOMP_FUSE_MAX]], align 4 +// CHECK2-NEXT: store i32 0, ptr [[DOTOMP_FUSE_INDEX]], align 4 +// CHECK2-NEXT: br label %[[FOR_COND:.*]] +// CHECK2: [[FOR_COND]]: +// CHECK2-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4 +// CHECK2-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_FUSE_MAX]], align 4 +// CHECK2-NEXT: [[CMP16:%.*]] = icmp ult i32 [[TMP23]], [[TMP24]] +// CHECK2-NEXT: br i1 [[CMP16]], label %[[FOR_BODY:.*]], label %[[FOR_END:.*]] +// CHECK2: [[FOR_BODY]]: +// CHECK2-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4 +// CHECK2-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_NI0]], align 4 +// CHECK2-NEXT: [[CMP17:%.*]] = icmp ult i32 [[TMP25]], [[TMP26]] +// CHECK2-NEXT: br i1 [[CMP17]], label %[[IF_THEN:.*]], label %[[IF_END:.*]] +// CHECK2: [[IF_THEN]]: +// CHECK2-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_LB0]], align 4 +// CHECK2-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_ST0]], align 4 +// CHECK2-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4 +// CHECK2-NEXT: [[MUL:%.*]] = mul i32 [[TMP28]], [[TMP29]] +// CHECK2-NEXT: [[ADD18:%.*]] = add i32 [[TMP27]], [[MUL]] +// CHECK2-NEXT: store i32 [[ADD18]], ptr [[DOTOMP_IV0]], align 4 +// CHECK2-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK2-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_IV0]], align 4 +// CHECK2-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTNEW_STEP]], align 4 +// CHECK2-NEXT: [[MUL19:%.*]] = mul i32 [[TMP31]], [[TMP32]] +// CHECK2-NEXT: [[ADD20:%.*]] = add i32 [[TMP30]], [[MUL19]] +// CHECK2-NEXT: store i32 [[ADD20]], ptr [[I]], align 4 +// CHECK2-NEXT: [[TMP33:%.*]] = load i32, ptr [[I]], align 4 +// CHECK2-NEXT: call void (...) @body(i32 noundef [[TMP33]]) +// CHECK2-NEXT: br label %[[IF_END]] +// CHECK2: [[IF_END]]: +// CHECK2-NEXT: [[TMP34:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4 +// CHECK2-NEXT: [[TMP35:%.*]] = load i32, ptr [[DOTOMP_NI1]], align 4 +// CHECK2-NEXT: [[CMP21:%.*]] = icmp ult i32 [[TMP34]], [[TMP35]] +// CHECK2-NEXT: br i1 [[CMP21]], label %[[IF_THEN22:.*]], label %[[IF_END27:.*]] +// CHECK2: [[IF_THEN22]]: +// CHECK2-NEXT: [[TMP36:%.*]] = load i32, ptr [[DOTOMP_LB1]], align 4 +// CHECK2-NEXT: [[TMP37:%.*]] = load i32, ptr [[DOTOMP_ST1]], align 4 +// CHECK2-NEXT: [[TMP38:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4 +// CHECK2-NEXT: [[MUL23:%.*]] = mul i32 [[TMP37]], [[TMP38]] +// CHECK2-NEXT: [[ADD24:%.*]] = add i32 [[TMP36]], [[MUL23]] +// CHECK2-NEXT: store i32 [[ADD24]], ptr [[DOTOMP_IV1]], align 4 +// CHECK2-NEXT: [[TMP39:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_6]], align 4 +// CHECK2-NEXT: [[TMP40:%.*]] = load i32, ptr [[DOTOMP_IV1]], align 4 +// CHECK2-NEXT: [[TMP41:%.*]] = load i32, ptr [[DOTNEW_STEP8]], align 4 +// CHECK2-NEXT: [[MUL25:%.*]] = mul i32 [[TMP40]], [[TMP41]] +// CHECK2-NEXT: [[ADD26:%.*]] = add i32 [[TMP39]], [[MUL25]] +// CHECK2-NEXT: store i32 [[ADD26]], ptr [[J]], align 4 +// CHECK2-NEXT: [[TMP42:%.*]] = load i32, ptr [[J]], align 4 +// CHECK2-NEXT: call void (...) @body(i32 noundef [[TMP42]]) +// CHECK2-NEXT: br label %[[IF_END27]] +// CHECK2: [[IF_END27]]: +// CHECK2-NEXT: br label %[[FOR_INC:.*]] +// CHECK2: [[FOR_INC]]: +// CHECK2-NEXT: [[TMP43:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4 +// CHECK2-NEXT: [[INC:%.*]] = add i32 [[TMP43]], 1 +// CHECK2-NEXT: store i32 [[INC]], ptr [[DOTOMP_FUSE_INDEX]], align 4 +// CHECK2-NEXT: br label %[[FOR_COND]], !llvm.loop [[LOOP3:![0-9]+]] +// CHECK2: [[FOR_END]]: +// CHECK2-NEXT: ret void +// +// +// CHECK2-LABEL: define dso_local void @foo3( +// CHECK2-SAME: ) #[[ATTR0]] { +// CHECK2-NEXT: [[ENTRY:.*:]] +// CHECK2-NEXT: [[ARR:%.*]] = alloca [256 x double], align 16 +// CHECK2-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTOMP_LB0:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTOMP_ST0:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTOMP_NI0:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTOMP_IV0:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[J:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTOMP_LB1:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTOMP_ST1:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTOMP_NI1:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTOMP_IV1:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTOMP_TEMP_1:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTOMP_FUSE_MAX:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTOMP_FUSE_INDEX:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTOMP_LB03:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTOMP_ST04:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTOMP_NI05:%.*]] = alloca i64, align 8 +// CHECK2-NEXT: [[DOTOMP_IV06:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[C:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[__RANGE2:%.*]] = alloca ptr, align 8 +// CHECK2-NEXT: [[__END2:%.*]] = alloca ptr, align 8 +// CHECK2-NEXT: [[__BEGIN2:%.*]] = alloca ptr, align 8 +// CHECK2-NEXT: [[DOTCAPTURE_EXPR_8:%.*]] = alloca ptr, align 8 +// CHECK2-NEXT: [[DOTCAPTURE_EXPR_10:%.*]] = alloca ptr, align 8 +// CHECK2-NEXT: [[DOTCAPTURE_EXPR_11:%.*]] = alloca i64, align 8 +// CHECK2-NEXT: [[DOTOMP_LB116:%.*]] = alloca i64, align 8 +// CHECK2-NEXT: [[DOTOMP_ST117:%.*]] = alloca i64, align 8 +// CHECK2-NEXT: [[DOTOMP_NI118:%.*]] = alloca i64, align 8 +// CHECK2-NEXT: [[DOTOMP_IV120:%.*]] = alloca i64, align 8 +// CHECK2-NEXT: [[CC:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[__RANGE221:%.*]] = alloca ptr, align 8 +// CHECK2-NEXT: [[__END222:%.*]] = alloca ptr, align 8 +// CHECK2-NEXT: [[__BEGIN225:%.*]] = alloca ptr, align 8 +// CHECK2-NEXT: [[DOTCAPTURE_EXPR_27:%.*]] = alloca ptr, align 8 +// CHECK2-NEXT: [[DOTCAPTURE_EXPR_29:%.*]] = alloca ptr, align 8 +// CHECK2-NEXT: [[DOTCAPTURE_EXPR_30:%.*]] = alloca i64, align 8 +// CHECK2-NEXT: [[DOTOMP_LB2:%.*]] = alloca i64, align 8 +// CHECK2-NEXT: [[DOTOMP_ST2:%.*]] = alloca i64, align 8 +// CHECK2-NEXT: [[DOTOMP_NI2:%.*]] = alloca i64, align 8 +// CHECK2-NEXT: [[DOTOMP_IV2:%.*]] = alloca i64, align 8 +// CHECK2-NEXT: [[DOTOMP_TEMP_140:%.*]] = alloca i64, align 8 +// CHECK2-NEXT: [[DOTOMP_TEMP_2:%.*]] = alloca i64, align 8 +// CHECK2-NEXT: [[DOTOMP_FUSE_MAX46:%.*]] = alloca i64, align 8 +// CHECK2-NEXT: [[DOTOMP_FUSE_INDEX52:%.*]] = alloca i64, align 8 +// CHECK2-NEXT: [[V:%.*]] = alloca ptr, align 8 +// CHECK2-NEXT: [[VV:%.*]] = alloca ptr, align 8 +// CHECK2-NEXT: store i32 0, ptr [[I]], align 4 +// CHECK2-NEXT: store i32 0, ptr [[DOTOMP_LB0]], align 4 +// CHECK2-NEXT: store i32 1, ptr [[DOTOMP_ST0]], align 4 +// CHECK2-NEXT: store i32 128, ptr [[DOTOMP_NI0]], align 4 +// CHECK2-NEXT: store i32 0, ptr [[J]], align 4 +// CHECK2-NEXT: store i32 0, ptr [[DOTOMP_LB1]], align 4 +// CHECK2-NEXT: store i32 1, ptr [[DOTOMP_ST1]], align 4 +// CHECK2-NEXT: store i32 128, ptr [[DOTOMP_NI1]], align 4 +// CHECK2-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTOMP_NI0]], align 4 +// CHECK2-NEXT: store i32 [[TMP0]], ptr [[DOTOMP_TEMP_1]], align 4 +// CHECK2-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTOMP_TEMP_1]], align 4 +// CHECK2-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_NI1]], align 4 +// CHECK2-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP1]], [[TMP2]] +// CHECK2-NEXT: br i1 [[CMP]], label %[[COND_TRUE:.*]], label %[[COND_FALSE:.*]] +// CHECK2: [[COND_TRUE]]: +// CHECK2-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_TEMP_1]], align 4 +// CHECK2-NEXT: br label %[[COND_END:.*]] +// CHECK2: [[COND_FALSE]]: +// CHECK2-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_NI1]], align 4 +// CHECK2-NEXT: br label %[[COND_END]] +// CHECK2: [[COND_END]]: +// CHECK2-NEXT: [[COND:%.*]] = phi i32 [ [[TMP3]], %[[COND_TRUE]] ], [ [[TMP4]], %[[COND_FALSE]] ] +// CHECK2-NEXT: store i32 [[COND]], ptr [[DOTOMP_FUSE_MAX]], align 4 +// CHECK2-NEXT: store i32 0, ptr [[DOTOMP_FUSE_INDEX]], align 4 +// CHECK2-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_FUSE_MAX]], align 4 +// CHECK2-NEXT: store i32 [[TMP5]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK2-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK2-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP6]], 0 +// CHECK2-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 +// CHECK2-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 +// CHECK2-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK2-NEXT: store i32 0, ptr [[DOTOMP_LB03]], align 4 +// CHECK2-NEXT: store i32 1, ptr [[DOTOMP_ST04]], align 4 +// CHECK2-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP7]], 1 +// CHECK2-NEXT: [[CONV:%.*]] = sext i32 [[ADD]] to i64 +// CHECK2-NEXT: store i64 [[CONV]], ptr [[DOTOMP_NI05]], align 8 +// CHECK2-NEXT: store i32 42, ptr [[C]], align 4 +// CHECK2-NEXT: store ptr [[ARR]], ptr [[__RANGE2]], align 8 +// CHECK2-NEXT: [[TMP8:%.*]] = load ptr, ptr [[__RANGE2]], align 8 +// CHECK2-NEXT: [[ARRAYDECAY:%.*]] = getelementptr inbounds [256 x double], ptr [[TMP8]], i64 0, i64 0 +// CHECK2-NEXT: [[ADD_PTR:%.*]] = getelementptr inbounds double, ptr [[ARRAYDECAY]], i64 256 +// CHECK2-NEXT: store ptr [[ADD_PTR]], ptr [[__END2]], align 8 +// CHECK2-NEXT: [[TMP9:%.*]] = load ptr, ptr [[__RANGE2]], align 8 +// CHECK2-NEXT: [[ARRAYDECAY7:%.*]] = getelementptr inbounds [256 x double], ptr [[TMP9]], i64 0, i64 0 +// CHECK2-NEXT: store ptr [[ARRAYDECAY7]], ptr [[__BEGIN2]], align 8 +// CHECK2-NEXT: [[TMP10:%.*]] = load ptr, ptr [[__RANGE2]], align 8 +// CHECK2-NEXT: [[ARRAYDECAY9:%.*]] = getelementptr inbounds [256 x double], ptr [[TMP10]], i64 0, i64 0 +// CHECK2-NEXT: store ptr [[ARRAYDECAY9]], ptr [[DOTCAPTURE_EXPR_8]], align 8 +// CHECK2-NEXT: [[TMP11:%.*]] = load ptr, ptr [[__END2]], align 8 +// CHECK2-NEXT: store ptr [[TMP11]], ptr [[DOTCAPTURE_EXPR_10]], align 8 +// CHECK2-NEXT: [[TMP12:%.*]] = load ptr, ptr [[DOTCAPTURE_EXPR_10]], align 8 +// CHECK2-NEXT: [[TMP13:%.*]] = load ptr, ptr [[DOTCAPTURE_EXPR_8]], align 8 +// CHECK2-NEXT: [[SUB_PTR_LHS_CAST:%.*]] = ptrtoint ptr [[TMP12]] to i64 +// CHECK2-NEXT: [[SUB_PTR_RHS_CAST:%.*]] = ptrtoint ptr [[TMP13]] to i64 +// CHECK2-NEXT: [[SUB_PTR_SUB:%.*]] = sub i64 [[SUB_PTR_LHS_CAST]], [[SUB_PTR_RHS_CAST]] +// CHECK2-NEXT: [[SUB_PTR_DIV:%.*]] = sdiv exact i64 [[SUB_PTR_SUB]], 8 +// CHECK2-NEXT: [[SUB12:%.*]] = sub nsw i64 [[SUB_PTR_DIV]], 1 +// CHECK2-NEXT: [[ADD13:%.*]] = add nsw i64 [[SUB12]], 1 +// CHECK2-NEXT: [[DIV14:%.*]] = sdiv i64 [[ADD13]], 1 +// CHECK2-NEXT: [[SUB15:%.*]] = sub nsw i64 [[DIV14]], 1 +// CHECK2-NEXT: store i64 [[SUB15]], ptr [[DOTCAPTURE_EXPR_11]], align 8 +// CHECK2-NEXT: store i64 0, ptr [[DOTOMP_LB116]], align 8 +// CHECK2-NEXT: store i64 1, ptr [[DOTOMP_ST117]], align 8 +// CHECK2-NEXT: [[TMP14:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_11]], align 8 +// CHECK2-NEXT: [[ADD19:%.*]] = add nsw i64 [[TMP14]], 1 +// CHECK2-NEXT: store i64 [[ADD19]], ptr [[DOTOMP_NI118]], align 8 +// CHECK2-NEXT: store i32 37, ptr [[CC]], align 4 +// CHECK2-NEXT: store ptr [[ARR]], ptr [[__RANGE221]], align 8 +// CHECK2-NEXT: [[TMP15:%.*]] = load ptr, ptr [[__RANGE221]], align 8 +// CHECK2-NEXT: [[ARRAYDECAY23:%.*]] = getelementptr inbounds [256 x double], ptr [[TMP15]], i64 0, i64 0 +// CHECK2-NEXT: [[ADD_PTR24:%.*]] = getelementptr inbounds double, ptr [[ARRAYDECAY23]], i64 256 +// CHECK2-NEXT: store ptr [[ADD_PTR24]], ptr [[__END222]], align 8 +// CHECK2-NEXT: [[TMP16:%.*]] = load ptr, ptr [[__RANGE221]], align 8 +// CHECK2-NEXT: [[ARRAYDECAY26:%.*]] = getelementptr inbounds [256 x double], ptr [[TMP16]], i64 0, i64 0 +// CHECK2-NEXT: store ptr [[ARRAYDECAY26]], ptr [[__BEGIN225]], align 8 +// CHECK2-NEXT: [[TMP17:%.*]] = load ptr, ptr [[__RANGE221]], align 8 +// CHECK2-NEXT: [[ARRAYDECAY28:%.*]] = getelementptr inbounds [256 x double], ptr [[TMP17]], i64 0, i64 0 +// CHECK2-NEXT: store ptr [[ARRAYDECAY28]], ptr [[DOTCAPTURE_EXPR_27]], align 8 +// CHECK2-NEXT: [[TMP18:%.*]] = load ptr, ptr [[__END222]], align 8 +// CHECK2-NEXT: store ptr [[TMP18]], ptr [[DOTCAPTURE_EXPR_29]], align 8 +// CHECK2-NEXT: [[TMP19:%.*]] = load ptr, ptr [[DOTCAPTURE_EXPR_29]], align 8 +// CHECK2-NEXT: [[TMP20:%.*]] = load ptr, ptr [[DOTCAPTURE_EXPR_27]], align 8 +// CHECK2-NEXT: [[SUB_PTR_LHS_CAST31:%.*]] = ptrtoint ptr [[TMP19]] to i64 +// CHECK2-NEXT: [[SUB_PTR_RHS_CAST32:%.*]] = ptrtoint ptr [[TMP20]] to i64 +// CHECK2-NEXT: [[SUB_PTR_SUB33:%.*]] = sub i64 [[SUB_PTR_LHS_CAST31]], [[SUB_PTR_RHS_CAST32]] +// CHECK2-NEXT: [[SUB_PTR_DIV34:%.*]] = sdiv exact i64 [[SUB_PTR_SUB33]], 8 +// CHECK2-NEXT: [[SUB35:%.*]] = sub nsw i64 [[SUB_PTR_DIV34]], 1 +// CHECK2-NEXT: [[ADD36:%.*]] = add nsw i64 [[SUB35]], 1 +// CHECK2-NEXT: [[DIV37:%.*]] = sdiv i64 [[ADD36]], 1 +// CHECK2-NEXT: [[SUB38:%.*]] = sub nsw i64 [[DIV37]], 1 +// CHECK2-NEXT: store i64 [[SUB38]], ptr [[DOTCAPTURE_EXPR_30]], align 8 +// CHECK2-NEXT: store i64 0, ptr [[DOTOMP_LB2]], align 8 +// CHECK2-NEXT: store i64 1, ptr [[DOTOMP_ST2]], align 8 +// CHECK2-NEXT: [[TMP21:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_30]], align 8 +// CHECK2-NEXT: [[ADD39:%.*]] = add nsw i64 [[TMP21]], 1 +// CHECK2-NEXT: store i64 [[ADD39]], ptr [[DOTOMP_NI2]], align 8 +// CHECK2-NEXT: [[TMP22:%.*]] = load i64, ptr [[DOTOMP_NI05]], align 8 +// CHECK2-NEXT: store i64 [[TMP22]], ptr [[DOTOMP_TEMP_140]], align 8 +// CHECK2-NEXT: [[TMP23:%.*]] = load i64, ptr [[DOTOMP_TEMP_140]], align 8 +// CHECK2-NEXT: [[TMP24:%.*]] = load i64, ptr [[DOTOMP_NI118]], align 8 +// CHECK2-NEXT: [[CMP41:%.*]] = icmp sgt i64 [[TMP23]], [[TMP24]] +// CHECK2-NEXT: br i1 [[CMP41]], label %[[COND_TRUE42:.*]], label %[[COND_FALSE43:.*]] +// CHECK2: [[COND_TRUE42]]: +// CHECK2-NEXT: [[TMP25:%.*]] = load i64, ptr [[DOTOMP_TEMP_140]], align 8 +// CHECK2-NEXT: br label %[[COND_END44:.*]] +// CHECK2: [[COND_FALSE43]]: +// CHECK2-NEXT: [[TMP26:%.*]] = load i64, ptr [[DOTOMP_NI118]], align 8 +// CHECK2-NEXT: br label %[[COND_END44]] +// CHECK2: [[COND_END44]]: +// CHECK2-NEXT: [[COND45:%.*]] = phi i64 [ [[TMP25]], %[[COND_TRUE42]] ], [ [[TMP26]], %[[COND_FALSE43]] ] +// CHECK2-NEXT: store i64 [[COND45]], ptr [[DOTOMP_TEMP_2]], align 8 +// CHECK2-NEXT: [[TMP27:%.*]] = load i64, ptr [[DOTOMP_TEMP_2]], align 8 +// CHECK2-NEXT: [[TMP28:%.*]] = load i64, ptr [[DOTOMP_NI2]], align 8 +// CHECK2-NEXT: [[CMP47:%.*]] = icmp sgt i64 [[TMP27]], [[TMP28]] +// CHECK2-NEXT: br i1 [[CMP47]], label %[[COND_TRUE48:.*]], label %[[COND_FALSE49:.*]] +// CHECK2: [[COND_TRUE48]]: +// CHECK2-NEXT: [[TMP29:%.*]] = load i64, ptr [[DOTOMP_TEMP_2]], align 8 +// CHECK2-NEXT: br label %[[COND_END50:.*]] +// CHECK2: [[COND_FALSE49]]: +// CHECK2-NEXT: [[TMP30:%.*]] = load i64, ptr [[DOTOMP_NI2]], align 8 +// CHECK2-NEXT: br label %[[COND_END50]] +// CHECK2: [[COND_END50]]: +// CHECK2-NEXT: [[COND51:%.*]] = phi i64 [ [[TMP29]], %[[COND_TRUE48]] ], [ [[TMP30]], %[[COND_FALSE49]] ] +// CHECK2-NEXT: store i64 [[COND51]], ptr [[DOTOMP_FUSE_MAX46]], align 8 +// CHECK2-NEXT: store i64 0, ptr [[DOTOMP_FUSE_INDEX52]], align 8 +// CHECK2-NEXT: br label %[[FOR_COND:.*]] +// CHECK2: [[FOR_COND]]: +// CHECK2-NEXT: [[TMP31:%.*]] = load i64, ptr [[DOTOMP_FUSE_INDEX52]], align 8 +// CHECK2-NEXT: [[TMP32:%.*]] = load i64, ptr [[DOTOMP_FUSE_MAX46]], align 8 +// CHECK2-NEXT: [[CMP53:%.*]] = icmp slt i64 [[TMP31]], [[TMP32]] +// CHECK2-NEXT: br i1 [[CMP53]], label %[[FOR_BODY:.*]], label %[[FOR_END:.*]] +// CHECK2: [[FOR_BODY]]: +// CHECK2-NEXT: [[TMP33:%.*]] = load i64, ptr [[DOTOMP_FUSE_INDEX52]], align 8 +// CHECK2-NEXT: [[TMP34:%.*]] = load i64, ptr [[DOTOMP_NI05]], align 8 +// CHECK2-NEXT: [[CMP54:%.*]] = icmp slt i64 [[TMP33]], [[TMP34]] +// CHECK2-NEXT: br i1 [[CMP54]], label %[[IF_THEN:.*]], label %[[IF_END74:.*]] +// CHECK2: [[IF_THEN]]: +// CHECK2-NEXT: [[TMP35:%.*]] = load i32, ptr [[DOTOMP_LB03]], align 4 +// CHECK2-NEXT: [[CONV55:%.*]] = sext i32 [[TMP35]] to i64 +// CHECK2-NEXT: [[TMP36:%.*]] = load i32, ptr [[DOTOMP_ST04]], align 4 +// CHECK2-NEXT: [[CONV56:%.*]] = sext i32 [[TMP36]] to i64 +// CHECK2-NEXT: [[TMP37:%.*]] = load i64, ptr [[DOTOMP_FUSE_INDEX52]], align 8 +// CHECK2-NEXT: [[MUL:%.*]] = mul nsw i64 [[CONV56]], [[TMP37]] +// CHECK2-NEXT: [[ADD57:%.*]] = add nsw i64 [[CONV55]], [[MUL]] +// CHECK2-NEXT: [[CONV58:%.*]] = trunc i64 [[ADD57]] to i32 +// CHECK2-NEXT: store i32 [[CONV58]], ptr [[DOTOMP_IV06]], align 4 +// CHECK2-NEXT: [[TMP38:%.*]] = load i32, ptr [[DOTOMP_IV06]], align 4 +// CHECK2-NEXT: [[MUL59:%.*]] = mul nsw i32 [[TMP38]], 1 +// CHECK2-NEXT: [[ADD60:%.*]] = add nsw i32 0, [[MUL59]] +// CHECK2-NEXT: store i32 [[ADD60]], ptr [[DOTOMP_FUSE_INDEX]], align 4 +// CHECK2-NEXT: [[TMP39:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4 +// CHECK2-NEXT: [[TMP40:%.*]] = load i32, ptr [[DOTOMP_NI0]], align 4 +// CHECK2-NEXT: [[CMP61:%.*]] = icmp slt i32 [[TMP39]], [[TMP40]] +// CHECK2-NEXT: br i1 [[CMP61]], label %[[IF_THEN62:.*]], label %[[IF_END:.*]] +// CHECK2: [[IF_THEN62]]: +// CHECK2-NEXT: [[TMP41:%.*]] = load i32, ptr [[DOTOMP_LB0]], align 4 +// CHECK2-NEXT: [[TMP42:%.*]] = load i32, ptr [[DOTOMP_ST0]], align 4 +// CHECK2-NEXT: [[TMP43:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4 +// CHECK2-NEXT: [[MUL63:%.*]] = mul nsw i32 [[TMP42]], [[TMP43]] +// CHECK2-NEXT: [[ADD64:%.*]] = add nsw i32 [[TMP41]], [[MUL63]] +// CHECK2-NEXT: store i32 [[ADD64]], ptr [[DOTOMP_IV0]], align 4 +// CHECK2-NEXT: [[TMP44:%.*]] = load i32, ptr [[DOTOMP_IV0]], align 4 +// CHECK2-NEXT: [[MUL65:%.*]] = mul nsw i32 [[TMP44]], 1 +// CHECK2-NEXT: [[ADD66:%.*]] = add nsw i32 0, [[MUL65]] +// CHECK2-NEXT: store i32 [[ADD66]], ptr [[I]], align 4 +// CHECK2-NEXT: [[TMP45:%.*]] = load i32, ptr [[I]], align 4 +// CHECK2-NEXT: call void (...) @body(i32 noundef [[TMP45]]) +// CHECK2-NEXT: br label %[[IF_END]] +// CHECK2: [[IF_END]]: +// CHECK2-NEXT: [[TMP46:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4 +// CHECK2-NEXT: [[TMP47:%.*]] = load i32, ptr [[DOTOMP_NI1]], align 4 +// CHECK2-NEXT: [[CMP67:%.*]] = icmp slt i32 [[TMP46]], [[TMP47]] +// CHECK2-NEXT: br i1 [[CMP67]], label %[[IF_THEN68:.*]], label %[[IF_END73:.*]] +// CHECK2: [[IF_THEN68]]: +// CHECK2-NEXT: [[TMP48:%.*]] = load i32, ptr [[DOTOMP_LB1]], align 4 +// CHECK2-NEXT: [[TMP49:%.*]] = load i32, ptr [[DOTOMP_ST1]], align 4 +// CHECK2-NEXT: [[TMP50:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4 +// CHECK2-NEXT: [[MUL69:%.*]] = mul nsw i32 [[TMP49]], [[TMP50]] +// CHECK2-NEXT: [[ADD70:%.*]] = add nsw i32 [[TMP48]], [[MUL69]] +// CHECK2-NEXT: store i32 [[ADD70]], ptr [[DOTOMP_IV1]], align 4 +// CHECK2-NEXT: [[TMP51:%.*]] = load i32, ptr [[DOTOMP_IV1]], align 4 +// CHECK2-NEXT: [[MUL71:%.*]] = mul nsw i32 [[TMP51]], 2 +// CHECK2-NEXT: [[ADD72:%.*]] = add nsw i32 0, [[MUL71]] +// CHECK2-NEXT: store i32 [[ADD72]], ptr [[J]], align 4 +// CHECK2-NEXT: [[TMP52:%.*]] = load i32, ptr [[J]], align 4 +// CHECK2-NEXT: call void (...) @body(i32 noundef [[TMP52]]) +// CHECK2-NEXT: br label %[[IF_END73]] +// CHECK2: [[IF_END73]]: +// CHECK2-NEXT: br label %[[IF_END74]] +// CHECK2: [[IF_END74]]: +// CHECK2-NEXT: [[TMP53:%.*]] = load i64, ptr [[DOTOMP_FUSE_INDEX52]], align 8 +// CHECK2-NEXT: [[TMP54:%.*]] = load i64, ptr [[DOTOMP_NI118]], align 8 +// CHECK2-NEXT: [[CMP75:%.*]] = icmp slt i64 [[TMP53]], [[TMP54]] +// CHECK2-NEXT: br i1 [[CMP75]], label %[[IF_THEN76:.*]], label %[[IF_END81:.*]] +// CHECK2: [[IF_THEN76]]: +// CHECK2-NEXT: [[TMP55:%.*]] = load i64, ptr [[DOTOMP_LB116]], align 8 +// CHECK2-NEXT: [[TMP56:%.*]] = load i64, ptr [[DOTOMP_ST117]], align 8 +// CHECK2-NEXT: [[TMP57:%.*]] = load i64, ptr [[DOTOMP_FUSE_INDEX52]], align 8 +// CHECK2-NEXT: [[MUL77:%.*]] = mul nsw i64 [[TMP56]], [[TMP57]] +// CHECK2-NEXT: [[ADD78:%.*]] = add nsw i64 [[TMP55]], [[MUL77]] +// CHECK2-NEXT: store i64 [[ADD78]], ptr [[DOTOMP_IV120]], align 8 +// CHECK2-NEXT: [[TMP58:%.*]] = load ptr, ptr [[DOTCAPTURE_EXPR_8]], align 8 +// CHECK2-NEXT: [[TMP59:%.*]] = load i64, ptr [[DOTOMP_IV120]], align 8 +// CHECK2-NEXT: [[MUL79:%.*]] = mul nsw i64 [[TMP59]], 1 +// CHECK2-NEXT: [[ADD_PTR80:%.*]] = getelementptr inbounds double, ptr [[TMP58]], i64 [[MUL79]] +// CHECK2-NEXT: store ptr [[ADD_PTR80]], ptr [[__BEGIN2]], align 8 +// CHECK2-NEXT: [[TMP60:%.*]] = load ptr, ptr [[__BEGIN2]], align 8 +// CHECK2-NEXT: store ptr [[TMP60]], ptr [[V]], align 8 +// CHECK2-NEXT: [[TMP61:%.*]] = load i32, ptr [[C]], align 4 +// CHECK2-NEXT: [[TMP62:%.*]] = load ptr, ptr [[V]], align 8 +// CHECK2-NEXT: [[TMP63:%.*]] = load double, ptr [[TMP62]], align 8 +// CHECK2-NEXT: call void (...) @body(i32 noundef [[TMP61]], double noundef [[TMP63]]) +// CHECK2-NEXT: br label %[[IF_END81]] +// CHECK2: [[IF_END81]]: +// CHECK2-NEXT: [[TMP64:%.*]] = load i64, ptr [[DOTOMP_FUSE_INDEX52]], align 8 +// CHECK2-NEXT: [[TMP65:%.*]] = load i64, ptr [[DOTOMP_NI2]], align 8 +// CHECK2-NEXT: [[CMP82:%.*]] = icmp slt i64 [[TMP64]], [[TMP65]] +// CHECK2-NEXT: br i1 [[CMP82]], label %[[IF_THEN83:.*]], label %[[IF_END88:.*]] +// CHECK2: [[IF_THEN83]]: +// CHECK2-NEXT: [[TMP66:%.*]] = load i64, ptr [[DOTOMP_LB2]], align 8 +// CHECK2-NEXT: [[TMP67:%.*]] = load i64, ptr [[DOTOMP_ST2]], align 8 +// CHECK2-NEXT: [[TMP68:%.*]] = load i64, ptr [[DOTOMP_FUSE_INDEX52]], align 8 +// CHECK2-NEXT: [[MUL84:%.*]] = mul nsw i64 [[TMP67]], [[TMP68]] +// CHECK2-NEXT: [[ADD85:%.*]] = add nsw i64 [[TMP66]], [[MUL84]] +// CHECK2-NEXT: store i64 [[ADD85]], ptr [[DOTOMP_IV2]], align 8 +// CHECK2-NEXT: [[TMP69:%.*]] = load ptr, ptr [[DOTCAPTURE_EXPR_27]], align 8 +// CHECK2-NEXT: [[TMP70:%.*]] = load i64, ptr [[DOTOMP_IV2]], align 8 +// CHECK2-NEXT: [[MUL86:%.*]] = mul nsw i64 [[TMP70]], 1 +// CHECK2-NEXT: [[ADD_PTR87:%.*]] = getelementptr inbounds double, ptr [[TMP69]], i64 [[MUL86]] +// CHECK2-NEXT: store ptr [[ADD_PTR87]], ptr [[__BEGIN225]], align 8 +// CHECK2-NEXT: [[TMP71:%.*]] = load ptr, ptr [[__BEGIN225]], align 8 +// CHECK2-NEXT: store ptr [[TMP71]], ptr [[VV]], align 8 +// CHECK2-NEXT: [[TMP72:%.*]] = load i32, ptr [[CC]], align 4 +// CHECK2-NEXT: [[TMP73:%.*]] = load ptr, ptr [[VV]], align 8 +// CHECK2-NEXT: [[TMP74:%.*]] = load double, ptr [[TMP73]], align 8 +// CHECK2-NEXT: call void (...) @body(i32 noundef [[TMP72]], double noundef [[TMP74]]) +// CHECK2-NEXT: br label %[[IF_END88]] +// CHECK2: [[IF_END88]]: +// CHECK2-NEXT: br label %[[FOR_INC:.*]] +// CHECK2: [[FOR_INC]]: +// CHECK2-NEXT: [[TMP75:%.*]] = load i64, ptr [[DOTOMP_FUSE_INDEX52]], align 8 +// CHECK2-NEXT: [[INC:%.*]] = add nsw i64 [[TMP75]], 1 +// CHECK2-NEXT: store i64 [[INC]], ptr [[DOTOMP_FUSE_INDEX52]], align 8 +// CHECK2-NEXT: br label %[[FOR_COND]], !llvm.loop [[LOOP5:![0-9]+]] +// CHECK2: [[FOR_END]]: +// CHECK2-NEXT: ret void +// +// +// CHECK2-LABEL: define dso_local void @foo4( +// CHECK2-SAME: ) #[[ATTR0]] { +// CHECK2-NEXT: [[ENTRY:.*:]] +// CHECK2-NEXT: [[ARR:%.*]] = alloca [256 x double], align 16 +// CHECK2-NEXT: [[J:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTOMP_LB0:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTOMP_ST0:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTOMP_NI0:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTOMP_IV0:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[K:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTOMP_LB1:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTOMP_ST1:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTOMP_NI1:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTOMP_IV1:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTOMP_TEMP_1:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTOMP_FUSE_MAX:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTOMP_FUSE_INDEX:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[C:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[__RANGE2:%.*]] = alloca ptr, align 8 +// CHECK2-NEXT: [[__BEGIN2:%.*]] = alloca ptr, align 8 +// CHECK2-NEXT: [[__END2:%.*]] = alloca ptr, align 8 +// CHECK2-NEXT: [[V:%.*]] = alloca ptr, align 8 +// CHECK2-NEXT: store i32 0, ptr [[J]], align 4 +// CHECK2-NEXT: store i32 0, ptr [[DOTOMP_LB0]], align 4 +// CHECK2-NEXT: store i32 1, ptr [[DOTOMP_ST0]], align 4 +// CHECK2-NEXT: store i32 128, ptr [[DOTOMP_NI0]], align 4 +// CHECK2-NEXT: store i32 0, ptr [[K]], align 4 +// CHECK2-NEXT: store i32 0, ptr [[DOTOMP_LB1]], align 4 +// CHECK2-NEXT: store i32 1, ptr [[DOTOMP_ST1]], align 4 +// CHECK2-NEXT: store i32 64, ptr [[DOTOMP_NI1]], align 4 +// CHECK2-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTOMP_NI0]], align 4 +// CHECK2-NEXT: store i32 [[TMP0]], ptr [[DOTOMP_TEMP_1]], align 4 +// CHECK2-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTOMP_TEMP_1]], align 4 +// CHECK2-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_NI1]], align 4 +// CHECK2-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP1]], [[TMP2]] +// CHECK2-NEXT: br i1 [[CMP]], label %[[COND_TRUE:.*]], label %[[COND_FALSE:.*]] +// CHECK2: [[COND_TRUE]]: +// CHECK2-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_TEMP_1]], align 4 +// CHECK2-NEXT: br label %[[COND_END:.*]] +// CHECK2: [[COND_FALSE]]: +// CHECK2-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_NI1]], align 4 +// CHECK2-NEXT: br label %[[COND_END]] +// CHECK2: [[COND_END]]: +// CHECK2-NEXT: [[COND:%.*]] = phi i32 [ [[TMP3]], %[[COND_TRUE]] ], [ [[TMP4]], %[[COND_FALSE]] ] +// CHECK2-NEXT: store i32 [[COND]], ptr [[DOTOMP_FUSE_MAX]], align 4 +// CHECK2-NEXT: store i32 0, ptr [[I]], align 4 +// CHECK2-NEXT: br label %[[FOR_COND:.*]] +// CHECK2: [[FOR_COND]]: +// CHECK2-NEXT: [[TMP5:%.*]] = load i32, ptr [[I]], align 4 +// CHECK2-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP5]], 128 +// CHECK2-NEXT: br i1 [[CMP1]], label %[[FOR_BODY:.*]], label %[[FOR_END:.*]] +// CHECK2: [[FOR_BODY]]: +// CHECK2-NEXT: [[TMP6:%.*]] = load i32, ptr [[I]], align 4 +// CHECK2-NEXT: call void (...) @body(i32 noundef [[TMP6]]) +// CHECK2-NEXT: br label %[[FOR_INC:.*]] +// CHECK2: [[FOR_INC]]: +// CHECK2-NEXT: [[TMP7:%.*]] = load i32, ptr [[I]], align 4 +// CHECK2-NEXT: [[INC:%.*]] = add nsw i32 [[TMP7]], 1 +// CHECK2-NEXT: store i32 [[INC]], ptr [[I]], align 4 +// CHECK2-NEXT: br label %[[FOR_COND]], !llvm.loop [[LOOP6:![0-9]+]] +// CHECK2: [[FOR_END]]: +// CHECK2-NEXT: store i32 0, ptr [[DOTOMP_FUSE_INDEX]], align 4 +// CHECK2-NEXT: br label %[[FOR_COND2:.*]] +// CHECK2: [[FOR_COND2]]: +// CHECK2-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4 +// CHECK2-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_FUSE_MAX]], align 4 +// CHECK2-NEXT: [[CMP3:%.*]] = icmp slt i32 [[TMP8]], [[TMP9]] +// CHECK2-NEXT: br i1 [[CMP3]], label %[[FOR_BODY4:.*]], label %[[FOR_END17:.*]] +// CHECK2: [[FOR_BODY4]]: +// CHECK2-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4 +// CHECK2-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_NI0]], align 4 +// CHECK2-NEXT: [[CMP5:%.*]] = icmp slt i32 [[TMP10]], [[TMP11]] +// CHECK2-NEXT: br i1 [[CMP5]], label %[[IF_THEN:.*]], label %[[IF_END:.*]] +// CHECK2: [[IF_THEN]]: +// CHECK2-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_LB0]], align 4 +// CHECK2-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_ST0]], align 4 +// CHECK2-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4 +// CHECK2-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP13]], [[TMP14]] +// CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[MUL]] +// CHECK2-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV0]], align 4 +// CHECK2-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV0]], align 4 +// CHECK2-NEXT: [[MUL6:%.*]] = mul nsw i32 [[TMP15]], 2 +// CHECK2-NEXT: [[ADD7:%.*]] = add nsw i32 0, [[MUL6]] +// CHECK2-NEXT: store i32 [[ADD7]], ptr [[J]], align 4 +// CHECK2-NEXT: [[TMP16:%.*]] = load i32, ptr [[J]], align 4 +// CHECK2-NEXT: call void (...) @body(i32 noundef [[TMP16]]) +// CHECK2-NEXT: br label %[[IF_END]] +// CHECK2: [[IF_END]]: +// CHECK2-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4 +// CHECK2-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_NI1]], align 4 +// CHECK2-NEXT: [[CMP8:%.*]] = icmp slt i32 [[TMP17]], [[TMP18]] +// CHECK2-NEXT: br i1 [[CMP8]], label %[[IF_THEN9:.*]], label %[[IF_END14:.*]] +// CHECK2: [[IF_THEN9]]: +// CHECK2-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_LB1]], align 4 +// CHECK2-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_ST1]], align 4 +// CHECK2-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4 +// CHECK2-NEXT: [[MUL10:%.*]] = mul nsw i32 [[TMP20]], [[TMP21]] +// CHECK2-NEXT: [[ADD11:%.*]] = add nsw i32 [[TMP19]], [[MUL10]] +// CHECK2-NEXT: store i32 [[ADD11]], ptr [[DOTOMP_IV1]], align 4 +// CHECK2-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IV1]], align 4 +// CHECK2-NEXT: [[MUL12:%.*]] = mul nsw i32 [[TMP22]], 1 +// CHECK2-NEXT: [[ADD13:%.*]] = add nsw i32 0, [[MUL12]] +// CHECK2-NEXT: store i32 [[ADD13]], ptr [[K]], align 4 +// CHECK2-NEXT: [[TMP23:%.*]] = load i32, ptr [[K]], align 4 +// CHECK2-NEXT: call void (...) @body(i32 noundef [[TMP23]]) +// CHECK2-NEXT: br label %[[IF_END14]] +// CHECK2: [[IF_END14]]: +// CHECK2-NEXT: br label %[[FOR_INC15:.*]] +// CHECK2: [[FOR_INC15]]: +// CHECK2-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4 +// CHECK2-NEXT: [[INC16:%.*]] = add nsw i32 [[TMP24]], 1 +// CHECK2-NEXT: store i32 [[INC16]], ptr [[DOTOMP_FUSE_INDEX]], align 4 +// CHECK2-NEXT: br label %[[FOR_COND2]], !llvm.loop [[LOOP7:![0-9]+]] +// CHECK2: [[FOR_END17]]: +// CHECK2-NEXT: store i32 42, ptr [[C]], align 4 +// CHECK2-NEXT: store ptr [[ARR]], ptr [[__RANGE2]], align 8 +// CHECK2-NEXT: [[TMP25:%.*]] = load ptr, ptr [[__RANGE2]], align 8 +// CHECK2-NEXT: [[ARRAYDECAY:%.*]] = getelementptr inbounds [256 x double], ptr [[TMP25]], i64 0, i64 0 +// CHECK2-NEXT: store ptr [[ARRAYDECAY]], ptr [[__BEGIN2]], align 8 +// CHECK2-NEXT: [[TMP26:%.*]] = load ptr, ptr [[__RANGE2]], align 8 +// CHECK2-NEXT: [[ARRAYDECAY18:%.*]] = getelementptr inbounds [256 x double], ptr [[TMP26]], i64 0, i64 0 +// CHECK2-NEXT: [[ADD_PTR:%.*]] = getelementptr inbounds double, ptr [[ARRAYDECAY18]], i64 256 +// CHECK2-NEXT: store ptr [[ADD_PTR]], ptr [[__END2]], align 8 +// CHECK2-NEXT: br label %[[FOR_COND19:.*]] +// CHECK2: [[FOR_COND19]]: +// CHECK2-NEXT: [[TMP27:%.*]] = load ptr, ptr [[__BEGIN2]], align 8 +// CHECK2-NEXT: [[TMP28:%.*]] = load ptr, ptr [[__END2]], align 8 +// CHECK2-NEXT: [[CMP20:%.*]] = icmp ne ptr [[TMP27]], [[TMP28]] +// CHECK2-NEXT: br i1 [[CMP20]], label %[[FOR_BODY21:.*]], label %[[FOR_END23:.*]] +// CHECK2: [[FOR_BODY21]]: +// CHECK2-NEXT: [[TMP29:%.*]] = load ptr, ptr [[__BEGIN2]], align 8 +// CHECK2-NEXT: store ptr [[TMP29]], ptr [[V]], align 8 +// CHECK2-NEXT: [[TMP30:%.*]] = load i32, ptr [[C]], align 4 +// CHECK2-NEXT: [[TMP31:%.*]] = load ptr, ptr [[V]], align 8 +// CHECK2-NEXT: [[TMP32:%.*]] = load double, ptr [[TMP31]], align 8 +// CHECK2-NEXT: call void (...) @body(i32 noundef [[TMP30]], double noundef [[TMP32]]) +// CHECK2-NEXT: br label %[[FOR_INC22:.*]] +// CHECK2: [[FOR_INC22]]: +// CHECK2-NEXT: [[TMP33:%.*]] = load ptr, ptr [[__BEGIN2]], align 8 +// CHECK2-NEXT: [[INCDEC_PTR:%.*]] = getelementptr inbounds nuw double, ptr [[TMP33]], i32 1 +// CHECK2-NEXT: store ptr [[INCDEC_PTR]], ptr [[__BEGIN2]], align 8 +// CHECK2-NEXT: br label %[[FOR_COND19]] +// CHECK2: [[FOR_END23]]: +// CHECK2-NEXT: ret void +// +// +// CHECK2-LABEL: define dso_local void @foo5( +// CHECK2-SAME: ) #[[ATTR0]] { +// CHECK2-NEXT: [[ENTRY:.*:]] +// CHECK2-NEXT: [[ARR:%.*]] = alloca [256 x double], align 16 +// CHECK2-NEXT: [[J:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTOMP_LB0:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTOMP_ST0:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTOMP_NI0:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTOMP_IV0:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[K:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTOMP_LB1:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTOMP_ST1:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTOMP_NI1:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTOMP_IV1:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTOMP_TEMP_1:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTOMP_FUSE_MAX:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTOMP_FUSE_INDEX:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTOMP_LB03:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTOMP_ST04:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTOMP_NI05:%.*]] = alloca i64, align 8 +// CHECK2-NEXT: [[DOTOMP_IV06:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[C:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[__RANGE2:%.*]] = alloca ptr, align 8 +// CHECK2-NEXT: [[__END2:%.*]] = alloca ptr, align 8 +// CHECK2-NEXT: [[__BEGIN2:%.*]] = alloca ptr, align 8 +// CHECK2-NEXT: [[DOTCAPTURE_EXPR_8:%.*]] = alloca ptr, align 8 +// CHECK2-NEXT: [[DOTCAPTURE_EXPR_10:%.*]] = alloca ptr, align 8 +// CHECK2-NEXT: [[DOTCAPTURE_EXPR_11:%.*]] = alloca i64, align 8 +// CHECK2-NEXT: [[DOTOMP_LB116:%.*]] = alloca i64, align 8 +// CHECK2-NEXT: [[DOTOMP_ST117:%.*]] = alloca i64, align 8 +// CHECK2-NEXT: [[DOTOMP_NI118:%.*]] = alloca i64, align 8 +// CHECK2-NEXT: [[DOTOMP_IV120:%.*]] = alloca i64, align 8 +// CHECK2-NEXT: [[DOTOMP_TEMP_121:%.*]] = alloca i64, align 8 +// CHECK2-NEXT: [[DOTOMP_FUSE_MAX22:%.*]] = alloca i64, align 8 +// CHECK2-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTOMP_FUSE_INDEX29:%.*]] = alloca i64, align 8 +// CHECK2-NEXT: [[V:%.*]] = alloca ptr, align 8 +// CHECK2-NEXT: [[CC:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[__RANGE264:%.*]] = alloca ptr, align 8 +// CHECK2-NEXT: [[__BEGIN265:%.*]] = alloca ptr, align 8 +// CHECK2-NEXT: [[__END267:%.*]] = alloca ptr, align 8 +// CHECK2-NEXT: [[VV:%.*]] = alloca ptr, align 8 +// CHECK2-NEXT: store i32 0, ptr [[J]], align 4 +// CHECK2-NEXT: store i32 0, ptr [[DOTOMP_LB0]], align 4 +// CHECK2-NEXT: store i32 1, ptr [[DOTOMP_ST0]], align 4 +// CHECK2-NEXT: store i32 128, ptr [[DOTOMP_NI0]], align 4 +// CHECK2-NEXT: store i32 0, ptr [[K]], align 4 +// CHECK2-NEXT: store i32 0, ptr [[DOTOMP_LB1]], align 4 +// CHECK2-NEXT: store i32 1, ptr [[DOTOMP_ST1]], align 4 +// CHECK2-NEXT: store i32 512, ptr [[DOTOMP_NI1]], align 4 +// CHECK2-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTOMP_NI0]], align 4 +// CHECK2-NEXT: store i32 [[TMP0]], ptr [[DOTOMP_TEMP_1]], align 4 +// CHECK2-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTOMP_TEMP_1]], align 4 +// CHECK2-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_NI1]], align 4 +// CHECK2-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP1]], [[TMP2]] +// CHECK2-NEXT: br i1 [[CMP]], label %[[COND_TRUE:.*]], label %[[COND_FALSE:.*]] +// CHECK2: [[COND_TRUE]]: +// CHECK2-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_TEMP_1]], align 4 +// CHECK2-NEXT: br label %[[COND_END:.*]] +// CHECK2: [[COND_FALSE]]: +// CHECK2-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_NI1]], align 4 +// CHECK2-NEXT: br label %[[COND_END]] +// CHECK2: [[COND_END]]: +// CHECK2-NEXT: [[COND:%.*]] = phi i32 [ [[TMP3]], %[[COND_TRUE]] ], [ [[TMP4]], %[[COND_FALSE]] ] +// CHECK2-NEXT: store i32 [[COND]], ptr [[DOTOMP_FUSE_MAX]], align 4 +// CHECK2-NEXT: store i32 0, ptr [[DOTOMP_FUSE_INDEX]], align 4 +// CHECK2-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_FUSE_MAX]], align 4 +// CHECK2-NEXT: store i32 [[TMP5]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK2-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK2-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP6]], 0 +// CHECK2-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 +// CHECK2-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 +// CHECK2-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK2-NEXT: store i32 0, ptr [[DOTOMP_LB03]], align 4 +// CHECK2-NEXT: store i32 1, ptr [[DOTOMP_ST04]], align 4 +// CHECK2-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP7]], 1 +// CHECK2-NEXT: [[CONV:%.*]] = sext i32 [[ADD]] to i64 +// CHECK2-NEXT: store i64 [[CONV]], ptr [[DOTOMP_NI05]], align 8 +// CHECK2-NEXT: store i32 42, ptr [[C]], align 4 +// CHECK2-NEXT: store ptr [[ARR]], ptr [[__RANGE2]], align 8 +// CHECK2-NEXT: [[TMP8:%.*]] = load ptr, ptr [[__RANGE2]], align 8 +// CHECK2-NEXT: [[ARRAYDECAY:%.*]] = getelementptr inbounds [256 x double], ptr [[TMP8]], i64 0, i64 0 +// CHECK2-NEXT: [[ADD_PTR:%.*]] = getelementptr inbounds double, ptr [[ARRAYDECAY]], i64 256 +// CHECK2-NEXT: store ptr [[ADD_PTR]], ptr [[__END2]], align 8 +// CHECK2-NEXT: [[TMP9:%.*]] = load ptr, ptr [[__RANGE2]], align 8 +// CHECK2-NEXT: [[ARRAYDECAY7:%.*]] = getelementptr inbounds [256 x double], ptr [[TMP9]], i64 0, i64 0 +// CHECK2-NEXT: store ptr [[ARRAYDECAY7]], ptr [[__BEGIN2]], align 8 +// CHECK2-NEXT: [[TMP10:%.*]] = load ptr, ptr [[__RANGE2]], align 8 +// CHECK2-NEXT: [[ARRAYDECAY9:%.*]] = getelementptr inbounds [256 x double], ptr [[TMP10]], i64 0, i64 0 +// CHECK2-NEXT: store ptr [[ARRAYDECAY9]], ptr [[DOTCAPTURE_EXPR_8]], align 8 +// CHECK2-NEXT: [[TMP11:%.*]] = load ptr, ptr [[__END2]], align 8 +// CHECK2-NEXT: store ptr [[TMP11]], ptr [[DOTCAPTURE_EXPR_10]], align 8 +// CHECK2-NEXT: [[TMP12:%.*]] = load ptr, ptr [[DOTCAPTURE_EXPR_10]], align 8 +// CHECK2-NEXT: [[TMP13:%.*]] = load ptr, ptr [[DOTCAPTURE_EXPR_8]], align 8 +// CHECK2-NEXT: [[SUB_PTR_LHS_CAST:%.*]] = ptrtoint ptr [[TMP12]] to i64 +// CHECK2-NEXT: [[SUB_PTR_RHS_CAST:%.*]] = ptrtoint ptr [[TMP13]] to i64 +// CHECK2-NEXT: [[SUB_PTR_SUB:%.*]] = sub i64 [[SUB_PTR_LHS_CAST]], [[SUB_PTR_RHS_CAST]] +// CHECK2-NEXT: [[SUB_PTR_DIV:%.*]] = sdiv exact i64 [[SUB_PTR_SUB]], 8 +// CHECK2-NEXT: [[SUB12:%.*]] = sub nsw i64 [[SUB_PTR_DIV]], 1 +// CHECK2-NEXT: [[ADD13:%.*]] = add nsw i64 [[SUB12]], 1 +// CHECK2-NEXT: [[DIV14:%.*]] = sdiv i64 [[ADD13]], 1 +// CHECK2-NEXT: [[SUB15:%.*]] = sub nsw i64 [[DIV14]], 1 +// CHECK2-NEXT: store i64 [[SUB15]], ptr [[DOTCAPTURE_EXPR_11]], align 8 +// CHECK2-NEXT: store i64 0, ptr [[DOTOMP_LB116]], align 8 +// CHECK2-NEXT: store i64 1, ptr [[DOTOMP_ST117]], align 8 +// CHECK2-NEXT: [[TMP14:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_11]], align 8 +// CHECK2-NEXT: [[ADD19:%.*]] = add nsw i64 [[TMP14]], 1 +// CHECK2-NEXT: store i64 [[ADD19]], ptr [[DOTOMP_NI118]], align 8 +// CHECK2-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTOMP_NI05]], align 8 +// CHECK2-NEXT: store i64 [[TMP15]], ptr [[DOTOMP_TEMP_121]], align 8 +// CHECK2-NEXT: [[TMP16:%.*]] = load i64, ptr [[DOTOMP_TEMP_121]], align 8 +// CHECK2-NEXT: [[TMP17:%.*]] = load i64, ptr [[DOTOMP_NI118]], align 8 +// CHECK2-NEXT: [[CMP23:%.*]] = icmp sgt i64 [[TMP16]], [[TMP17]] +// CHECK2-NEXT: br i1 [[CMP23]], label %[[COND_TRUE24:.*]], label %[[COND_FALSE25:.*]] +// CHECK2: [[COND_TRUE24]]: +// CHECK2-NEXT: [[TMP18:%.*]] = load i64, ptr [[DOTOMP_TEMP_121]], align 8 +// CHECK2-NEXT: br label %[[COND_END26:.*]] +// CHECK2: [[COND_FALSE25]]: +// CHECK2-NEXT: [[TMP19:%.*]] = load i64, ptr [[DOTOMP_NI118]], align 8 +// CHECK2-NEXT: br label %[[COND_END26]] +// CHECK2: [[COND_END26]]: +// CHECK2-NEXT: [[COND27:%.*]] = phi i64 [ [[TMP18]], %[[COND_TRUE24]] ], [ [[TMP19]], %[[COND_FALSE25]] ] +// CHECK2-NEXT: store i64 [[COND27]], ptr [[DOTOMP_FUSE_MAX22]], align 8 +// CHECK2-NEXT: store i32 0, ptr [[I]], align 4 +// CHECK2-NEXT: br label %[[FOR_COND:.*]] +// CHECK2: [[FOR_COND]]: +// CHECK2-NEXT: [[TMP20:%.*]] = load i32, ptr [[I]], align 4 +// CHECK2-NEXT: [[CMP28:%.*]] = icmp slt i32 [[TMP20]], 128 +// CHECK2-NEXT: br i1 [[CMP28]], label %[[FOR_BODY:.*]], label %[[FOR_END:.*]] +// CHECK2: [[FOR_BODY]]: +// CHECK2-NEXT: [[TMP21:%.*]] = load i32, ptr [[I]], align 4 +// CHECK2-NEXT: call void (...) @body(i32 noundef [[TMP21]]) +// CHECK2-NEXT: br label %[[FOR_INC:.*]] +// CHECK2: [[FOR_INC]]: +// CHECK2-NEXT: [[TMP22:%.*]] = load i32, ptr [[I]], align 4 +// CHECK2-NEXT: [[INC:%.*]] = add nsw i32 [[TMP22]], 1 +// CHECK2-NEXT: store i32 [[INC]], ptr [[I]], align 4 +// CHECK2-NEXT: br label %[[FOR_COND]], !llvm.loop [[LOOP8:![0-9]+]] +// CHECK2: [[FOR_END]]: +// CHECK2-NEXT: store i64 0, ptr [[DOTOMP_FUSE_INDEX29]], align 8 +// CHECK2-NEXT: br label %[[FOR_COND30:.*]] +// CHECK2: [[FOR_COND30]]: +// CHECK2-NEXT: [[TMP23:%.*]] = load i64, ptr [[DOTOMP_FUSE_INDEX29]], align 8 +// CHECK2-NEXT: [[TMP24:%.*]] = load i64, ptr [[DOTOMP_FUSE_MAX22]], align 8 +// CHECK2-NEXT: [[CMP31:%.*]] = icmp slt i64 [[TMP23]], [[TMP24]] +// CHECK2-NEXT: br i1 [[CMP31]], label %[[FOR_BODY32:.*]], label %[[FOR_END63:.*]] +// CHECK2: [[FOR_BODY32]]: +// CHECK2-NEXT: [[TMP25:%.*]] = load i64, ptr [[DOTOMP_FUSE_INDEX29]], align 8 +// CHECK2-NEXT: [[TMP26:%.*]] = load i64, ptr [[DOTOMP_NI05]], align 8 +// CHECK2-NEXT: [[CMP33:%.*]] = icmp slt i64 [[TMP25]], [[TMP26]] +// CHECK2-NEXT: br i1 [[CMP33]], label %[[IF_THEN:.*]], label %[[IF_END53:.*]] +// CHECK2: [[IF_THEN]]: +// CHECK2-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_LB03]], align 4 +// CHECK2-NEXT: [[CONV34:%.*]] = sext i32 [[TMP27]] to i64 +// CHECK2-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_ST04]], align 4 +// CHECK2-NEXT: [[CONV35:%.*]] = sext i32 [[TMP28]] to i64 +// CHECK2-NEXT: [[TMP29:%.*]] = load i64, ptr [[DOTOMP_FUSE_INDEX29]], align 8 +// CHECK2-NEXT: [[MUL:%.*]] = mul nsw i64 [[CONV35]], [[TMP29]] +// CHECK2-NEXT: [[ADD36:%.*]] = add nsw i64 [[CONV34]], [[MUL]] +// CHECK2-NEXT: [[CONV37:%.*]] = trunc i64 [[ADD36]] to i32 +// CHECK2-NEXT: store i32 [[CONV37]], ptr [[DOTOMP_IV06]], align 4 +// CHECK2-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_IV06]], align 4 +// CHECK2-NEXT: [[MUL38:%.*]] = mul nsw i32 [[TMP30]], 1 +// CHECK2-NEXT: [[ADD39:%.*]] = add nsw i32 0, [[MUL38]] +// CHECK2-NEXT: store i32 [[ADD39]], ptr [[DOTOMP_FUSE_INDEX]], align 4 +// CHECK2-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4 +// CHECK2-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTOMP_NI0]], align 4 +// CHECK2-NEXT: [[CMP40:%.*]] = icmp slt i32 [[TMP31]], [[TMP32]] +// CHECK2-NEXT: br i1 [[CMP40]], label %[[IF_THEN41:.*]], label %[[IF_END:.*]] +// CHECK2: [[IF_THEN41]]: +// CHECK2-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_LB0]], align 4 +// CHECK2-NEXT: [[TMP34:%.*]] = load i32, ptr [[DOTOMP_ST0]], align 4 +// CHECK2-NEXT: [[TMP35:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4 +// CHECK2-NEXT: [[MUL42:%.*]] = mul nsw i32 [[TMP34]], [[TMP35]] +// CHECK2-NEXT: [[ADD43:%.*]] = add nsw i32 [[TMP33]], [[MUL42]] +// CHECK2-NEXT: store i32 [[ADD43]], ptr [[DOTOMP_IV0]], align 4 +// CHECK2-NEXT: [[TMP36:%.*]] = load i32, ptr [[DOTOMP_IV0]], align 4 +// CHECK2-NEXT: [[MUL44:%.*]] = mul nsw i32 [[TMP36]], 2 +// CHECK2-NEXT: [[ADD45:%.*]] = add nsw i32 0, [[MUL44]] +// CHECK2-NEXT: store i32 [[ADD45]], ptr [[J]], align 4 +// CHECK2-NEXT: [[TMP37:%.*]] = load i32, ptr [[J]], align 4 +// CHECK2-NEXT: call void (...) @body(i32 noundef [[TMP37]]) +// CHECK2-NEXT: br label %[[IF_END]] +// CHECK2: [[IF_END]]: +// CHECK2-NEXT: [[TMP38:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4 +// CHECK2-NEXT: [[TMP39:%.*]] = load i32, ptr [[DOTOMP_NI1]], align 4 +// CHECK2-NEXT: [[CMP46:%.*]] = icmp slt i32 [[TMP38]], [[TMP39]] +// CHECK2-NEXT: br i1 [[CMP46]], label %[[IF_THEN47:.*]], label %[[IF_END52:.*]] +// CHECK2: [[IF_THEN47]]: +// CHECK2-NEXT: [[TMP40:%.*]] = load i32, ptr [[DOTOMP_LB1]], align 4 +// CHECK2-NEXT: [[TMP41:%.*]] = load i32, ptr [[DOTOMP_ST1]], align 4 +// CHECK2-NEXT: [[TMP42:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4 +// CHECK2-NEXT: [[MUL48:%.*]] = mul nsw i32 [[TMP41]], [[TMP42]] +// CHECK2-NEXT: [[ADD49:%.*]] = add nsw i32 [[TMP40]], [[MUL48]] +// CHECK2-NEXT: store i32 [[ADD49]], ptr [[DOTOMP_IV1]], align 4 +// CHECK2-NEXT: [[TMP43:%.*]] = load i32, ptr [[DOTOMP_IV1]], align 4 +// CHECK2-NEXT: [[MUL50:%.*]] = mul nsw i32 [[TMP43]], 1 +// CHECK2-NEXT: [[ADD51:%.*]] = add nsw i32 0, [[MUL50]] +// CHECK2-NEXT: store i32 [[ADD51]], ptr [[K]], align 4 +// CHECK2-NEXT: [[TMP44:%.*]] = load i32, ptr [[K]], align 4 +// CHECK2-NEXT: call void (...) @body(i32 noundef [[TMP44]]) +// CHECK2-NEXT: br label %[[IF_END52]] +// CHECK2: [[IF_END52]]: +// CHECK2-NEXT: br label %[[IF_END53]] +// CHECK2: [[IF_END53]]: +// CHECK2-NEXT: [[TMP45:%.*]] = load i64, ptr [[DOTOMP_FUSE_INDEX29]], align 8 +// CHECK2-NEXT: [[TMP46:%.*]] = load i64, ptr [[DOTOMP_NI118]], align 8 +// CHECK2-NEXT: [[CMP54:%.*]] = icmp slt i64 [[TMP45]], [[TMP46]] +// CHECK2-NEXT: br i1 [[CMP54]], label %[[IF_THEN55:.*]], label %[[IF_END60:.*]] +// CHECK2: [[IF_THEN55]]: +// CHECK2-NEXT: [[TMP47:%.*]] = load i64, ptr [[DOTOMP_LB116]], align 8 +// CHECK2-NEXT: [[TMP48:%.*]] = load i64, ptr [[DOTOMP_ST117]], align 8 +// CHECK2-NEXT: [[TMP49:%.*]] = load i64, ptr [[DOTOMP_FUSE_INDEX29]], align 8 +// CHECK2-NEXT: [[MUL56:%.*]] = mul nsw i64 [[TMP48]], [[TMP49]] +// CHECK2-NEXT: [[ADD57:%.*]] = add nsw i64 [[TMP47]], [[MUL56]] +// CHECK2-NEXT: store i64 [[ADD57]], ptr [[DOTOMP_IV120]], align 8 +// CHECK2-NEXT: [[TMP50:%.*]] = load ptr, ptr [[DOTCAPTURE_EXPR_8]], align 8 +// CHECK2-NEXT: [[TMP51:%.*]] = load i64, ptr [[DOTOMP_IV120]], align 8 +// CHECK2-NEXT: [[MUL58:%.*]] = mul nsw i64 [[TMP51]], 1 +// CHECK2-NEXT: [[ADD_PTR59:%.*]] = getelementptr inbounds double, ptr [[TMP50]], i64 [[MUL58]] +// CHECK2-NEXT: store ptr [[ADD_PTR59]], ptr [[__BEGIN2]], align 8 +// CHECK2-NEXT: [[TMP52:%.*]] = load ptr, ptr [[__BEGIN2]], align 8 +// CHECK2-NEXT: store ptr [[TMP52]], ptr [[V]], align 8 +// CHECK2-NEXT: [[TMP53:%.*]] = load i32, ptr [[C]], align 4 +// CHECK2-NEXT: [[TMP54:%.*]] = load ptr, ptr [[V]], align 8 +// CHECK2-NEXT: [[TMP55:%.*]] = load double, ptr [[TMP54]], align 8 +// CHECK2-NEXT: call void (...) @body(i32 noundef [[TMP53]], double noundef [[TMP55]]) +// CHECK2-NEXT: br label %[[IF_END60]] +// CHECK2: [[IF_END60]]: +// CHECK2-NEXT: br label %[[FOR_INC61:.*]] +// CHECK2: [[FOR_INC61]]: +// CHECK2-NEXT: [[TMP56:%.*]] = load i64, ptr [[DOTOMP_FUSE_INDEX29]], align 8 +// CHECK2-NEXT: [[INC62:%.*]] = add nsw i64 [[TMP56]], 1 +// CHECK2-NEXT: store i64 [[INC62]], ptr [[DOTOMP_FUSE_INDEX29]], align 8 +// CHECK2-NEXT: br label %[[FOR_COND30]], !llvm.loop [[LOOP9:![0-9]+]] +// CHECK2: [[FOR_END63]]: +// CHECK2-NEXT: store i32 37, ptr [[CC]], align 4 +// CHECK2-NEXT: store ptr [[ARR]], ptr [[__RANGE264]], align 8 +// CHECK2-NEXT: [[TMP57:%.*]] = load ptr, ptr [[__RANGE264]], align 8 +// CHECK2-NEXT: [[ARRAYDECAY66:%.*]] = getelementptr inbounds [256 x double], ptr [[TMP57]], i64 0, i64 0 +// CHECK2-NEXT: store ptr [[ARRAYDECAY66]], ptr [[__BEGIN265]], align 8 +// CHECK2-NEXT: [[TMP58:%.*]] = load ptr, ptr [[__RANGE264]], align 8 +// CHECK2-NEXT: [[ARRAYDECAY68:%.*]] = getelementptr inbounds [256 x double], ptr [[TMP58]], i64 0, i64 0 +// CHECK2-NEXT: [[ADD_PTR69:%.*]] = getelementptr inbounds double, ptr [[ARRAYDECAY68]], i64 256 +// CHECK2-NEXT: store ptr [[ADD_PTR69]], ptr [[__END267]], align 8 +// CHECK2-NEXT: br label %[[FOR_COND70:.*]] +// CHECK2: [[FOR_COND70]]: +// CHECK2-NEXT: [[TMP59:%.*]] = load ptr, ptr [[__BEGIN265]], align 8 +// CHECK2-NEXT: [[TMP60:%.*]] = load ptr, ptr [[__END267]], align 8 +// CHECK2-NEXT: [[CMP71:%.*]] = icmp ne ptr [[TMP59]], [[TMP60]] +// CHECK2-NEXT: br i1 [[CMP71]], label %[[FOR_BODY72:.*]], label %[[FOR_END74:.*]] +// CHECK2: [[FOR_BODY72]]: +// CHECK2-NEXT: [[TMP61:%.*]] = load ptr, ptr [[__BEGIN265]], align 8 +// CHECK2-NEXT: store ptr [[TMP61]], ptr [[VV]], align 8 +// CHECK2-NEXT: [[TMP62:%.*]] = load i32, ptr [[CC]], align 4 +// CHECK2-NEXT: [[TMP63:%.*]] = load ptr, ptr [[VV]], align 8 +// CHECK2-NEXT: [[TMP64:%.*]] = load double, ptr [[TMP63]], align 8 +// CHECK2-NEXT: call void (...) @body(i32 noundef [[TMP62]], double noundef [[TMP64]]) +// CHECK2-NEXT: br label %[[FOR_INC73:.*]] +// CHECK2: [[FOR_INC73]]: +// CHECK2-NEXT: [[TMP65:%.*]] = load ptr, ptr [[__BEGIN265]], align 8 +// CHECK2-NEXT: [[INCDEC_PTR:%.*]] = getelementptr inbounds nuw double, ptr [[TMP65]], i32 1 +// CHECK2-NEXT: store ptr [[INCDEC_PTR]], ptr [[__BEGIN265]], align 8 +// CHECK2-NEXT: br label %[[FOR_COND70]] +// CHECK2: [[FOR_END74]]: +// CHECK2-NEXT: ret void +// +// +// CHECK2-LABEL: define dso_local void @tfoo2( +// CHECK2-SAME: ) #[[ATTR0]] { +// CHECK2-NEXT: [[ENTRY:.*:]] +// CHECK2-NEXT: call void @_Z4foo2IiEvT_S0_S0_(i32 noundef 0, i32 noundef 64, i32 noundef 4) +// CHECK2-NEXT: ret void +// +// +// CHECK2-LABEL: define linkonce_odr void @_Z4foo2IiEvT_S0_S0_( +// CHECK2-SAME: i32 noundef [[START:%.*]], i32 noundef [[END:%.*]], i32 noundef [[STEP:%.*]]) #[[ATTR0]] comdat { +// CHECK2-NEXT: [[ENTRY:.*:]] +// CHECK2-NEXT: [[START_ADDR:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[END_ADDR:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[STEP_ADDR:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[J:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[K:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTNEW_STEP:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTCAPTURE_EXPR_2:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTOMP_LB0:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTOMP_ST0:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTOMP_NI0:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTOMP_IV0:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTCAPTURE_EXPR_6:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTCAPTURE_EXPR_7:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTNEW_STEP8:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTCAPTURE_EXPR_9:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTOMP_LB1:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTOMP_ST1:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTOMP_NI1:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTOMP_IV1:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTCAPTURE_EXPR_17:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTCAPTURE_EXPR_19:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTNEW_STEP21:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTCAPTURE_EXPR_22:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTOMP_LB2:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTOMP_ST2:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTOMP_NI2:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTOMP_IV2:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTOMP_TEMP_1:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTOMP_TEMP_2:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTOMP_FUSE_MAX:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTOMP_FUSE_INDEX:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: store i32 [[START]], ptr [[START_ADDR]], align 4 +// CHECK2-NEXT: store i32 [[END]], ptr [[END_ADDR]], align 4 +// CHECK2-NEXT: store i32 [[STEP]], ptr [[STEP_ADDR]], align 4 +// CHECK2-NEXT: [[TMP0:%.*]] = load i32, ptr [[START_ADDR]], align 4 +// CHECK2-NEXT: store i32 [[TMP0]], ptr [[I]], align 4 +// CHECK2-NEXT: [[TMP1:%.*]] = load i32, ptr [[START_ADDR]], align 4 +// CHECK2-NEXT: store i32 [[TMP1]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK2-NEXT: [[TMP2:%.*]] = load i32, ptr [[END_ADDR]], align 4 +// CHECK2-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK2-NEXT: [[TMP3:%.*]] = load i32, ptr [[STEP_ADDR]], align 4 +// CHECK2-NEXT: store i32 [[TMP3]], ptr [[DOTNEW_STEP]], align 4 +// CHECK2-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK2-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK2-NEXT: [[SUB:%.*]] = sub i32 [[TMP4]], [[TMP5]] +// CHECK2-NEXT: [[SUB3:%.*]] = sub i32 [[SUB]], 1 +// CHECK2-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTNEW_STEP]], align 4 +// CHECK2-NEXT: [[ADD:%.*]] = add i32 [[SUB3]], [[TMP6]] +// CHECK2-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTNEW_STEP]], align 4 +// CHECK2-NEXT: [[DIV:%.*]] = udiv i32 [[ADD]], [[TMP7]] +// CHECK2-NEXT: [[SUB4:%.*]] = sub i32 [[DIV]], 1 +// CHECK2-NEXT: store i32 [[SUB4]], ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK2-NEXT: store i32 0, ptr [[DOTOMP_LB0]], align 4 +// CHECK2-NEXT: store i32 1, ptr [[DOTOMP_ST0]], align 4 +// CHECK2-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK2-NEXT: [[ADD5:%.*]] = add i32 [[TMP8]], 1 +// CHECK2-NEXT: store i32 [[ADD5]], ptr [[DOTOMP_NI0]], align 4 +// CHECK2-NEXT: [[TMP9:%.*]] = load i32, ptr [[END_ADDR]], align 4 +// CHECK2-NEXT: store i32 [[TMP9]], ptr [[J]], align 4 +// CHECK2-NEXT: [[TMP10:%.*]] = load i32, ptr [[END_ADDR]], align 4 +// CHECK2-NEXT: store i32 [[TMP10]], ptr [[DOTCAPTURE_EXPR_6]], align 4 +// CHECK2-NEXT: [[TMP11:%.*]] = load i32, ptr [[START_ADDR]], align 4 +// CHECK2-NEXT: store i32 [[TMP11]], ptr [[DOTCAPTURE_EXPR_7]], align 4 +// CHECK2-NEXT: [[TMP12:%.*]] = load i32, ptr [[STEP_ADDR]], align 4 +// CHECK2-NEXT: store i32 [[TMP12]], ptr [[DOTNEW_STEP8]], align 4 +// CHECK2-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_6]], align 4 +// CHECK2-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_7]], align 4 +// CHECK2-NEXT: [[SUB10:%.*]] = sub i32 [[TMP13]], [[TMP14]] +// CHECK2-NEXT: [[SUB11:%.*]] = sub i32 [[SUB10]], 1 +// CHECK2-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTNEW_STEP8]], align 4 +// CHECK2-NEXT: [[ADD12:%.*]] = add i32 [[SUB11]], [[TMP15]] +// CHECK2-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTNEW_STEP8]], align 4 +// CHECK2-NEXT: [[DIV13:%.*]] = udiv i32 [[ADD12]], [[TMP16]] +// CHECK2-NEXT: [[SUB14:%.*]] = sub i32 [[DIV13]], 1 +// CHECK2-NEXT: store i32 [[SUB14]], ptr [[DOTCAPTURE_EXPR_9]], align 4 +// CHECK2-NEXT: store i32 0, ptr [[DOTOMP_LB1]], align 4 +// CHECK2-NEXT: store i32 1, ptr [[DOTOMP_ST1]], align 4 +// CHECK2-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_9]], align 4 +// CHECK2-NEXT: [[ADD15:%.*]] = add i32 [[TMP17]], 1 +// CHECK2-NEXT: store i32 [[ADD15]], ptr [[DOTOMP_NI1]], align 4 +// CHECK2-NEXT: [[TMP18:%.*]] = load i32, ptr [[START_ADDR]], align 4 +// CHECK2-NEXT: [[TMP19:%.*]] = load i32, ptr [[STEP_ADDR]], align 4 +// CHECK2-NEXT: [[ADD16:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] +// CHECK2-NEXT: store i32 [[ADD16]], ptr [[K]], align 4 +// CHECK2-NEXT: [[TMP20:%.*]] = load i32, ptr [[START_ADDR]], align 4 +// CHECK2-NEXT: [[TMP21:%.*]] = load i32, ptr [[STEP_ADDR]], align 4 +// CHECK2-NEXT: [[ADD18:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] +// CHECK2-NEXT: store i32 [[ADD18]], ptr [[DOTCAPTURE_EXPR_17]], align 4 +// CHECK2-NEXT: [[TMP22:%.*]] = load i32, ptr [[END_ADDR]], align 4 +// CHECK2-NEXT: [[TMP23:%.*]] = load i32, ptr [[STEP_ADDR]], align 4 +// CHECK2-NEXT: [[ADD20:%.*]] = add nsw i32 [[TMP22]], [[TMP23]] +// CHECK2-NEXT: store i32 [[ADD20]], ptr [[DOTCAPTURE_EXPR_19]], align 4 +// CHECK2-NEXT: [[TMP24:%.*]] = load i32, ptr [[STEP_ADDR]], align 4 +// CHECK2-NEXT: store i32 [[TMP24]], ptr [[DOTNEW_STEP21]], align 4 +// CHECK2-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_19]], align 4 +// CHECK2-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_17]], align 4 +// CHECK2-NEXT: [[SUB23:%.*]] = sub i32 [[TMP25]], [[TMP26]] +// CHECK2-NEXT: [[SUB24:%.*]] = sub i32 [[SUB23]], 1 +// CHECK2-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTNEW_STEP21]], align 4 +// CHECK2-NEXT: [[ADD25:%.*]] = add i32 [[SUB24]], [[TMP27]] +// CHECK2-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTNEW_STEP21]], align 4 +// CHECK2-NEXT: [[DIV26:%.*]] = udiv i32 [[ADD25]], [[TMP28]] +// CHECK2-NEXT: [[SUB27:%.*]] = sub i32 [[DIV26]], 1 +// CHECK2-NEXT: store i32 [[SUB27]], ptr [[DOTCAPTURE_EXPR_22]], align 4 +// CHECK2-NEXT: store i32 0, ptr [[DOTOMP_LB2]], align 4 +// CHECK2-NEXT: store i32 1, ptr [[DOTOMP_ST2]], align 4 +// CHECK2-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_22]], align 4 +// CHECK2-NEXT: [[ADD28:%.*]] = add i32 [[TMP29]], 1 +// CHECK2-NEXT: store i32 [[ADD28]], ptr [[DOTOMP_NI2]], align 4 +// CHECK2-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_NI0]], align 4 +// CHECK2-NEXT: store i32 [[TMP30]], ptr [[DOTOMP_TEMP_1]], align 4 +// CHECK2-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_TEMP_1]], align 4 +// CHECK2-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTOMP_NI1]], align 4 +// CHECK2-NEXT: [[CMP:%.*]] = icmp ugt i32 [[TMP31]], [[TMP32]] +// CHECK2-NEXT: br i1 [[CMP]], label %[[COND_TRUE:.*]], label %[[COND_FALSE:.*]] +// CHECK2: [[COND_TRUE]]: +// CHECK2-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_TEMP_1]], align 4 +// CHECK2-NEXT: br label %[[COND_END:.*]] +// CHECK2: [[COND_FALSE]]: +// CHECK2-NEXT: [[TMP34:%.*]] = load i32, ptr [[DOTOMP_NI1]], align 4 +// CHECK2-NEXT: br label %[[COND_END]] +// CHECK2: [[COND_END]]: +// CHECK2-NEXT: [[COND:%.*]] = phi i32 [ [[TMP33]], %[[COND_TRUE]] ], [ [[TMP34]], %[[COND_FALSE]] ] +// CHECK2-NEXT: store i32 [[COND]], ptr [[DOTOMP_TEMP_2]], align 4 +// CHECK2-NEXT: [[TMP35:%.*]] = load i32, ptr [[DOTOMP_TEMP_2]], align 4 +// CHECK2-NEXT: [[TMP36:%.*]] = load i32, ptr [[DOTOMP_NI2]], align 4 +// CHECK2-NEXT: [[CMP29:%.*]] = icmp ugt i32 [[TMP35]], [[TMP36]] +// CHECK2-NEXT: br i1 [[CMP29]], label %[[COND_TRUE30:.*]], label %[[COND_FALSE31:.*]] +// CHECK2: [[COND_TRUE30]]: +// CHECK2-NEXT: [[TMP37:%.*]] = load i32, ptr [[DOTOMP_TEMP_2]], align 4 +// CHECK2-NEXT: br label %[[COND_END32:.*]] +// CHECK2: [[COND_FALSE31]]: +// CHECK2-NEXT: [[TMP38:%.*]] = load i32, ptr [[DOTOMP_NI2]], align 4 +// CHECK2-NEXT: br label %[[COND_END32]] +// CHECK2: [[COND_END32]]: +// CHECK2-NEXT: [[COND33:%.*]] = phi i32 [ [[TMP37]], %[[COND_TRUE30]] ], [ [[TMP38]], %[[COND_FALSE31]] ] +// CHECK2-NEXT: store i32 [[COND33]], ptr [[DOTOMP_FUSE_MAX]], align 4 +// CHECK2-NEXT: store i32 0, ptr [[DOTOMP_FUSE_INDEX]], align 4 +// CHECK2-NEXT: br label %[[FOR_COND:.*]] +// CHECK2: [[FOR_COND]]: +// CHECK2-NEXT: [[TMP39:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4 +// CHECK2-NEXT: [[TMP40:%.*]] = load i32, ptr [[DOTOMP_FUSE_MAX]], align 4 +// CHECK2-NEXT: [[CMP34:%.*]] = icmp ult i32 [[TMP39]], [[TMP40]] +// CHECK2-NEXT: br i1 [[CMP34]], label %[[FOR_BODY:.*]], label %[[FOR_END:.*]] +// CHECK2: [[FOR_BODY]]: +// CHECK2-NEXT: [[TMP41:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4 +// CHECK2-NEXT: [[TMP42:%.*]] = load i32, ptr [[DOTOMP_NI0]], align 4 +// CHECK2-NEXT: [[CMP35:%.*]] = icmp ult i32 [[TMP41]], [[TMP42]] +// CHECK2-NEXT: br i1 [[CMP35]], label %[[IF_THEN:.*]], label %[[IF_END:.*]] +// CHECK2: [[IF_THEN]]: +// CHECK2-NEXT: [[TMP43:%.*]] = load i32, ptr [[DOTOMP_LB0]], align 4 +// CHECK2-NEXT: [[TMP44:%.*]] = load i32, ptr [[DOTOMP_ST0]], align 4 +// CHECK2-NEXT: [[TMP45:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4 +// CHECK2-NEXT: [[MUL:%.*]] = mul i32 [[TMP44]], [[TMP45]] +// CHECK2-NEXT: [[ADD36:%.*]] = add i32 [[TMP43]], [[MUL]] +// CHECK2-NEXT: store i32 [[ADD36]], ptr [[DOTOMP_IV0]], align 4 +// CHECK2-NEXT: [[TMP46:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK2-NEXT: [[TMP47:%.*]] = load i32, ptr [[DOTOMP_IV0]], align 4 +// CHECK2-NEXT: [[TMP48:%.*]] = load i32, ptr [[DOTNEW_STEP]], align 4 +// CHECK2-NEXT: [[MUL37:%.*]] = mul i32 [[TMP47]], [[TMP48]] +// CHECK2-NEXT: [[ADD38:%.*]] = add i32 [[TMP46]], [[MUL37]] +// CHECK2-NEXT: store i32 [[ADD38]], ptr [[I]], align 4 +// CHECK2-NEXT: [[TMP49:%.*]] = load i32, ptr [[I]], align 4 +// CHECK2-NEXT: call void (...) @body(i32 noundef [[TMP49]]) +// CHECK2-NEXT: br label %[[IF_END]] +// CHECK2: [[IF_END]]: +// CHECK2-NEXT: [[TMP50:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4 +// CHECK2-NEXT: [[TMP51:%.*]] = load i32, ptr [[DOTOMP_NI1]], align 4 +// CHECK2-NEXT: [[CMP39:%.*]] = icmp ult i32 [[TMP50]], [[TMP51]] +// CHECK2-NEXT: br i1 [[CMP39]], label %[[IF_THEN40:.*]], label %[[IF_END45:.*]] +// CHECK2: [[IF_THEN40]]: +// CHECK2-NEXT: [[TMP52:%.*]] = load i32, ptr [[DOTOMP_LB1]], align 4 +// CHECK2-NEXT: [[TMP53:%.*]] = load i32, ptr [[DOTOMP_ST1]], align 4 +// CHECK2-NEXT: [[TMP54:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4 +// CHECK2-NEXT: [[MUL41:%.*]] = mul i32 [[TMP53]], [[TMP54]] +// CHECK2-NEXT: [[ADD42:%.*]] = add i32 [[TMP52]], [[MUL41]] +// CHECK2-NEXT: store i32 [[ADD42]], ptr [[DOTOMP_IV1]], align 4 +// CHECK2-NEXT: [[TMP55:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_6]], align 4 +// CHECK2-NEXT: [[TMP56:%.*]] = load i32, ptr [[DOTOMP_IV1]], align 4 +// CHECK2-NEXT: [[TMP57:%.*]] = load i32, ptr [[DOTNEW_STEP8]], align 4 +// CHECK2-NEXT: [[MUL43:%.*]] = mul i32 [[TMP56]], [[TMP57]] +// CHECK2-NEXT: [[SUB44:%.*]] = sub i32 [[TMP55]], [[MUL43]] +// CHECK2-NEXT: store i32 [[SUB44]], ptr [[J]], align 4 +// CHECK2-NEXT: [[TMP58:%.*]] = load i32, ptr [[J]], align 4 +// CHECK2-NEXT: call void (...) @body(i32 noundef [[TMP58]]) +// CHECK2-NEXT: br label %[[IF_END45]] +// CHECK2: [[IF_END45]]: +// CHECK2-NEXT: [[TMP59:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4 +// CHECK2-NEXT: [[TMP60:%.*]] = load i32, ptr [[DOTOMP_NI2]], align 4 +// CHECK2-NEXT: [[CMP46:%.*]] = icmp ult i32 [[TMP59]], [[TMP60]] +// CHECK2-NEXT: br i1 [[CMP46]], label %[[IF_THEN47:.*]], label %[[IF_END52:.*]] +// CHECK2: [[IF_THEN47]]: +// CHECK2-NEXT: [[TMP61:%.*]] = load i32, ptr [[DOTOMP_LB2]], align 4 +// CHECK2-NEXT: [[TMP62:%.*]] = load i32, ptr [[DOTOMP_ST2]], align 4 +// CHECK2-NEXT: [[TMP63:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4 +// CHECK2-NEXT: [[MUL48:%.*]] = mul i32 [[TMP62]], [[TMP63]] +// CHECK2-NEXT: [[ADD49:%.*]] = add i32 [[TMP61]], [[MUL48]] +// CHECK2-NEXT: store i32 [[ADD49]], ptr [[DOTOMP_IV2]], align 4 +// CHECK2-NEXT: [[TMP64:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_17]], align 4 +// CHECK2-NEXT: [[TMP65:%.*]] = load i32, ptr [[DOTOMP_IV2]], align 4 +// CHECK2-NEXT: [[TMP66:%.*]] = load i32, ptr [[DOTNEW_STEP21]], align 4 +// CHECK2-NEXT: [[MUL50:%.*]] = mul i32 [[TMP65]], [[TMP66]] +// CHECK2-NEXT: [[ADD51:%.*]] = add i32 [[TMP64]], [[MUL50]] +// CHECK2-NEXT: store i32 [[ADD51]], ptr [[K]], align 4 +// CHECK2-NEXT: [[TMP67:%.*]] = load i32, ptr [[K]], align 4 +// CHECK2-NEXT: call void (...) @body(i32 noundef [[TMP67]]) +// CHECK2-NEXT: br label %[[IF_END52]] +// CHECK2: [[IF_END52]]: +// CHECK2-NEXT: br label %[[FOR_INC:.*]] +// CHECK2: [[FOR_INC]]: +// CHECK2-NEXT: [[TMP68:%.*]] = load i32, ptr [[DOTOMP_FUSE_INDEX]], align 4 +// CHECK2-NEXT: [[INC:%.*]] = add i32 [[TMP68]], 1 +// CHECK2-NEXT: store i32 [[INC]], ptr [[DOTOMP_FUSE_INDEX]], align 4 +// CHECK2-NEXT: br label %[[FOR_COND]], !llvm.loop [[LOOP10:![0-9]+]] +// CHECK2: [[FOR_END]]: +// CHECK2-NEXT: ret void +// +//. +// CHECK1: [[LOOP3]] = distinct !{[[LOOP3]], [[META4:![0-9]+]]} +// CHECK1: [[META4]] = !{!"llvm.loop.mustprogress"} +// CHECK1: [[LOOP5]] = distinct !{[[LOOP5]], [[META4]]} +// CHECK1: [[LOOP6]] = distinct !{[[LOOP6]], [[META4]]} +// CHECK1: [[LOOP7]] = distinct !{[[LOOP7]], [[META4]]} +// CHECK1: [[LOOP8]] = distinct !{[[LOOP8]], [[META4]]} +// CHECK1: [[LOOP9]] = distinct !{[[LOOP9]], [[META4]]} +// CHECK1: [[LOOP10]] = distinct !{[[LOOP10]], [[META4]]} +//. +// CHECK2: [[LOOP3]] = distinct !{[[LOOP3]], [[META4:![0-9]+]]} +// CHECK2: [[META4]] = !{!"llvm.loop.mustprogress"} +// CHECK2: [[LOOP5]] = distinct !{[[LOOP5]], [[META4]]} +// CHECK2: [[LOOP6]] = distinct !{[[LOOP6]], [[META4]]} +// CHECK2: [[LOOP7]] = distinct !{[[LOOP7]], [[META4]]} +// CHECK2: [[LOOP8]] = distinct !{[[LOOP8]], [[META4]]} +// CHECK2: [[LOOP9]] = distinct !{[[LOOP9]], [[META4]]} +// CHECK2: [[LOOP10]] = distinct !{[[LOOP10]], [[META4]]} +//. diff --git a/clang/test/OpenMP/fuse_messages.cpp b/clang/test/OpenMP/fuse_messages.cpp new file mode 100644 index 0000000000000..4902d424373e5 --- /dev/null +++ b/clang/test/OpenMP/fuse_messages.cpp @@ -0,0 +1,211 @@ +// RUN: %clang_cc1 -triple x86_64-pc-linux-gnu -std=c++20 -fopenmp -fopenmp-version=60 -fsyntax-only -Wuninitialized -verify %s + +void func() { + + // expected-error@+2 {{statement after '#pragma omp fuse' must be a loop sequence containing canonical loops or loop-generating constructs}} + #pragma omp fuse + ; + + // expected-error@+2 {{statement after '#pragma omp fuse' must be a for loop}} + #pragma omp fuse + {int bar = 0;} + + // expected-error@+4 {{statement after '#pragma omp fuse' must be a for loop}} + #pragma omp fuse + { + for(int i = 0; i < 10; ++i); + int x = 2; + } + + // expected-error@+2 {{statement after '#pragma omp fuse' must be a loop sequence containing canonical loops or loop-generating constructs}} + #pragma omp fuse + #pragma omp for + for (int i = 0; i < 7; ++i) + ; + + { + // expected-error@+2 {{expected statement}} + #pragma omp fuse + } + + // expected-warning@+1 {{extra tokens at the end of '#pragma omp fuse' are ignored}} + #pragma omp fuse foo + { + for (int i = 0; i < 7; ++i) + ; + for(int j = 0; j < 100; ++j); + + } + + + // expected-error@+1 {{unexpected OpenMP clause 'final' in directive '#pragma omp fuse'}} + #pragma omp fuse final(0) + { + for (int i = 0; i < 7; ++i) + ; + for(int j = 0; j < 100; ++j); + + } + + //expected-error@+4 {{loop after '#pragma omp fuse' is not in canonical form}} + //expected-error@+3 {{increment clause of OpenMP for loop must perform simple addition or subtraction on loop variable 'i'}} + #pragma omp fuse + { + for(int i = 0; i < 10; i*=2) { + ; + } + for(int j = 0; j < 100; ++j); + } + + //expected-error@+2 {{loop sequence after '#pragma omp fuse' must contain at least 1 canonical loop or loop-generating construct}} + #pragma omp fuse + {} + + //expected-error@+3 {{statement after '#pragma omp fuse' must be a for loop}} + #pragma omp fuse + { + #pragma omp unroll full + for(int i = 0; i < 10; ++i); + + for(int j = 0; j < 10; ++j); + } + + //expected-warning@+2 {{loop range in '#pragma omp fuse' contains only a single loop, resulting in redundant fusion}} + #pragma omp fuse + { + for(int i = 0; i < 10; ++i); + } + + //expected-warning@+1 {{loop range in '#pragma omp fuse' contains only a single loop, resulting in redundant fusion}} + #pragma omp fuse looprange(1, 1) + { + for(int i = 0; i < 10; ++i); + for(int j = 0; j < 100; ++j); + } + + //expected-error@+1 {{argument to 'looprange' clause must be a strictly positive integer value}} + #pragma omp fuse looprange(1, -1) + { + for(int i = 0; i < 10; ++i); + for(int j = 0; j < 100; ++j); + } + + //expected-error@+1 {{argument to 'looprange' clause must be a strictly positive integer value}} + #pragma omp fuse looprange(1, 0) + { + for(int i = 0; i < 10; ++i); + for(int j = 0; j < 100; ++j); + } + + const int x = 1; + constexpr int y = 4; + //expected-error@+1 {{loop range in '#pragma omp fuse' exceeds the number of available loops: range end '4' is greater than the total number of loops '3'}} + #pragma omp fuse looprange(x,y) + { + for(int i = 0; i < 10; ++i); + for(int j = 0; j < 100; ++j); + for(int k = 0; k < 50; ++k); + } + + //expected-error@+1 {{loop range in '#pragma omp fuse' exceeds the number of available loops: range end '420' is greater than the total number of loops '3'}} + #pragma omp fuse looprange(1,420) + { + for(int i = 0; i < 10; ++i); + for(int j = 0; j < 100; ++j); + for(int k = 0; k < 50; ++k); + } + + //expected-error@+1 {{loop range in '#pragma omp fuse' exceeds the number of available loops: range end '6' is greater than the total number of loops '5'}} + #pragma omp fuse looprange(1,6) + { + for(int i = 0; i < 10; ++i); + for(int j = 0; j < 100; ++j); + for(int k = 0; k < 50; ++k); + // This fusion results in 2 loops + #pragma omp fuse looprange(1,2) + { + for(int i = 0; i < 10; ++i); + for(int j = 0; j < 100; ++j); + for(int k = 0; k < 50; ++k); + } + } + + //expected-error@+1 {{loop range in '#pragma omp fuse' exceeds the number of available loops: range end '4' is greater than the total number of loops '3'}} + #pragma omp fuse looprange(2,3) + { + #pragma omp unroll partial(2) + for(int i = 0; i < 10; ++i); + + #pragma omp reverse + for(int j = 0; j < 10; ++j); + + #pragma omp fuse + { + { + #pragma omp reverse + for(int j = 0; j < 10; ++j); + } + for(int k = 0; k < 50; ++k); + } + } +} + +// In a template context, but expression itself not instantiation-dependent +template +static void templated_func() { + + //expected-warning@+1 {{loop range in '#pragma omp fuse' contains only a single loop, resulting in redundant fusion}} + #pragma omp fuse looprange(2,1) + { + for(int i = 0; i < 10; ++i); + for(int j = 0; j < 100; ++j); + for(int k = 0; k < 50; ++k); + } + + //expected-error@+1 {{loop range in '#pragma omp fuse' exceeds the number of available loops: range end '5' is greater than the total number of loops '3'}} + #pragma omp fuse looprange(3,3) + { + for(int i = 0; i < 10; ++i); + for(int j = 0; j < 100; ++j); + for(int k = 0; k < 50; ++k); + } + +} + +template +static void templated_func_value_dependent() { + + //expected-warning@+1 {{loop range in '#pragma omp fuse' contains only a single loop, resulting in redundant fusion}} + #pragma omp fuse looprange(V,1) + { + for(int i = 0; i < 10; ++i); + for(int j = 0; j < 100; ++j); + for(int k = 0; k < 50; ++k); + } +} + +template +static void templated_func_type_dependent() { + constexpr T s = 1; + + //expected-error@+1 {{argument to 'looprange' clause must be a strictly positive integer value}} + #pragma omp fuse looprange(s,s-1) + { + for(int i = 0; i < 10; ++i); + for(int j = 0; j < 100; ++j); + for(int k = 0; k < 50; ++k); + } +} + + +void template_inst() { + // expected-note@+1 {{in instantiation of function template specialization 'templated_func' requested here}} + templated_func(); + // expected-note@+1 {{in instantiation of function template specialization 'templated_func_value_dependent<1>' requested here}} + templated_func_value_dependent<1>(); + // expected-note@+1 {{in instantiation of function template specialization 'templated_func_type_dependent' requested here}} + templated_func_type_dependent(); + +} + + diff --git a/clang/tools/libclang/CIndex.cpp b/clang/tools/libclang/CIndex.cpp index 1d9d1027521bc..c533ca8d339cc 100644 --- a/clang/tools/libclang/CIndex.cpp +++ b/clang/tools/libclang/CIndex.cpp @@ -2206,6 +2206,7 @@ class EnqueueVisitor : public ConstStmtVisitor, void VisitOMPUnrollDirective(const OMPUnrollDirective *D); void VisitOMPReverseDirective(const OMPReverseDirective *D); void VisitOMPInterchangeDirective(const OMPInterchangeDirective *D); + void VisitOMPFuseDirective(const OMPFuseDirective *D); void VisitOMPForDirective(const OMPForDirective *D); void VisitOMPForSimdDirective(const OMPForSimdDirective *D); void VisitOMPSectionsDirective(const OMPSectionsDirective *D); @@ -2411,6 +2412,11 @@ void OMPClauseEnqueue::VisitOMPPartialClause(const OMPPartialClause *C) { Visitor->AddStmt(C->getFactor()); } +void OMPClauseEnqueue::VisitOMPLoopRangeClause(const OMPLoopRangeClause *C) { + Visitor->AddStmt(C->getFirst()); + Visitor->AddStmt(C->getCount()); +} + void OMPClauseEnqueue::VisitOMPAllocatorClause(const OMPAllocatorClause *C) { Visitor->AddStmt(C->getAllocator()); } @@ -3364,6 +3370,10 @@ void EnqueueVisitor::VisitOMPInterchangeDirective( VisitOMPLoopTransformationDirective(D); } +void EnqueueVisitor::VisitOMPFuseDirective(const OMPFuseDirective *D) { + VisitOMPLoopTransformationDirective(D); +} + void EnqueueVisitor::VisitOMPForDirective(const OMPForDirective *D) { VisitOMPLoopDirective(D); } @@ -6318,6 +6328,8 @@ CXString clang_getCursorKindSpelling(enum CXCursorKind Kind) { return cxstring::createRef("OMPReverseDirective"); case CXCursor_OMPInterchangeDirective: return cxstring::createRef("OMPInterchangeDirective"); + case CXCursor_OMPFuseDirective: + return cxstring::createRef("OMPFuseDirective"); case CXCursor_OMPForDirective: return cxstring::createRef("OMPForDirective"); case CXCursor_OMPForSimdDirective: diff --git a/clang/tools/libclang/CXCursor.cpp b/clang/tools/libclang/CXCursor.cpp index 635d03a88d105..709fa60d28d8d 100644 --- a/clang/tools/libclang/CXCursor.cpp +++ b/clang/tools/libclang/CXCursor.cpp @@ -688,6 +688,9 @@ CXCursor cxcursor::MakeCXCursor(const Stmt *S, const Decl *Parent, case Stmt::OMPInterchangeDirectiveClass: K = CXCursor_OMPInterchangeDirective; break; + case Stmt::OMPFuseDirectiveClass: + K = CXCursor_OMPFuseDirective; + break; case Stmt::OMPForDirectiveClass: K = CXCursor_OMPForDirective; break; diff --git a/flang/include/flang/Parser/dump-parse-tree.h b/flang/include/flang/Parser/dump-parse-tree.h index df9278697346f..c220c4dafb52f 100644 --- a/flang/include/flang/Parser/dump-parse-tree.h +++ b/flang/include/flang/Parser/dump-parse-tree.h @@ -609,6 +609,7 @@ class ParseTreeDumper { NODE(OmpLinearClause, Modifier) NODE(parser, OmpLinearModifier) NODE_ENUM(OmpLinearModifier, Value) + NODE(parser, OmpLoopRangeClause) NODE(parser, OmpStepComplexModifier) NODE(parser, OmpStepSimpleModifier) NODE(parser, OmpLoopDirective) diff --git a/flang/include/flang/Parser/parse-tree.h b/flang/include/flang/Parser/parse-tree.h index 254236b510544..be80141b49e2b 100644 --- a/flang/include/flang/Parser/parse-tree.h +++ b/flang/include/flang/Parser/parse-tree.h @@ -4361,6 +4361,15 @@ struct OmpLinearClause { std::tuple t; }; +// Ref: [6.0:207-208] +// +// loop-range-clause -> +// LOOPRANGE(first, count) // since 6.0 +struct OmpLoopRangeClause { + TUPLE_CLASS_BOILERPLATE(OmpLoopRangeClause); + std::tuple t; +}; + // Ref: [4.5:216-219], [5.0:315-324], [5.1:347-355], [5.2:150-158] // // map-clause -> diff --git a/flang/lib/Lower/OpenMP/Clauses.cpp b/flang/lib/Lower/OpenMP/Clauses.cpp index f3088b18b77ff..ea535ab3adbe7 100644 --- a/flang/lib/Lower/OpenMP/Clauses.cpp +++ b/flang/lib/Lower/OpenMP/Clauses.cpp @@ -998,6 +998,11 @@ Link make(const parser::OmpClause::Link &inp, return Link{/*List=*/makeObjects(inp.v, semaCtx)}; } +LoopRange make(const parser::OmpClause::Looprange &inp, + semantics::SemanticsContext &semaCtx) { + llvm_unreachable("Unimplemented: looprange"); +} + Map make(const parser::OmpClause::Map &inp, semantics::SemanticsContext &semaCtx) { // inp.v -> parser::OmpMapClause diff --git a/flang/lib/Lower/OpenMP/Clauses.h b/flang/lib/Lower/OpenMP/Clauses.h index d7ab21d428e32..bda8571e65f23 100644 --- a/flang/lib/Lower/OpenMP/Clauses.h +++ b/flang/lib/Lower/OpenMP/Clauses.h @@ -239,6 +239,7 @@ using Initializer = tomp::clause::InitializerT; using InReduction = tomp::clause::InReductionT; using IsDevicePtr = tomp::clause::IsDevicePtrT; using Lastprivate = tomp::clause::LastprivateT; +using LoopRange = tomp::clause::LoopRangeT; using Linear = tomp::clause::LinearT; using Link = tomp::clause::LinkT; using Map = tomp::clause::MapT; diff --git a/flang/lib/Parser/openmp-parsers.cpp b/flang/lib/Parser/openmp-parsers.cpp index 52d3a5844c969..393dbe8ada002 100644 --- a/flang/lib/Parser/openmp-parsers.cpp +++ b/flang/lib/Parser/openmp-parsers.cpp @@ -841,6 +841,11 @@ TYPE_PARSER( maybe(":"_tok >> nonemptyList(Parser{})), /*PostModified=*/pure(true))) +TYPE_PARSER( + construct(scalarIntConstantExpr, + "," >> scalarIntConstantExpr) +) + // OpenMPv5.2 12.5.2 detach-clause -> DETACH (event-handle) TYPE_PARSER(construct(Parser{})) @@ -1010,6 +1015,8 @@ TYPE_PARSER( // parenthesized(Parser{}))) || "LINK" >> construct(construct( parenthesized(Parser{}))) || + "LOOPRANGE" >> construct(construct( + parenthesized(Parser{}))) || "MAP" >> construct(construct( parenthesized(Parser{}))) || "MATCH" >> construct(construct( diff --git a/flang/lib/Parser/unparse.cpp b/flang/lib/Parser/unparse.cpp index a626888b7dfe5..00b5a8c0600e1 100644 --- a/flang/lib/Parser/unparse.cpp +++ b/flang/lib/Parser/unparse.cpp @@ -2314,6 +2314,13 @@ class UnparseVisitor { } } } + void Unparse(const OmpLoopRangeClause &x) { + Word("LOOPRANGE("); + Walk(std::get<0>(x.t)); + Put(", "); + Walk(std::get<1>(x.t)); + Put(")"); + } void Unparse(const OmpReductionClause &x) { using Modifier = OmpReductionClause::Modifier; Walk(std::get>>(x.t), ": "); diff --git a/flang/lib/Semantics/check-omp-structure.cpp b/flang/lib/Semantics/check-omp-structure.cpp index bda0d62829506..74f6e4e6a97af 100644 --- a/flang/lib/Semantics/check-omp-structure.cpp +++ b/flang/lib/Semantics/check-omp-structure.cpp @@ -3385,6 +3385,12 @@ CHECK_REQ_CONSTANT_SCALAR_INT_CLAUSE(Collapse, OMPC_collapse) CHECK_REQ_CONSTANT_SCALAR_INT_CLAUSE(Safelen, OMPC_safelen) CHECK_REQ_CONSTANT_SCALAR_INT_CLAUSE(Simdlen, OMPC_simdlen) +void OmpStructureChecker::Enter(const parser::OmpClause::Looprange &x) { + context_.Say(GetContext().clauseSource, + "LOOPRANGE clause is not implemented yet"_err_en_US, + ContextDirectiveAsFortran()); +} + // Restrictions specific to each clause are implemented apart from the // generalized restrictions. diff --git a/llvm/include/llvm/Frontend/OpenMP/ClauseT.h b/llvm/include/llvm/Frontend/OpenMP/ClauseT.h index e0714e812e5cd..dd51274c1aaf5 100644 --- a/llvm/include/llvm/Frontend/OpenMP/ClauseT.h +++ b/llvm/include/llvm/Frontend/OpenMP/ClauseT.h @@ -1233,6 +1233,15 @@ struct WriteT { using EmptyTrait = std::true_type; }; +// V6: [6.4.7] Looprange clause +template struct LoopRangeT { + using Begin = E; + using End = E; + + using TupleTrait = std::true_type; + std::tuple t; +}; + // --- template @@ -1263,9 +1272,10 @@ using TupleClausesT = DefaultmapT, DeviceT, DistScheduleT, DoacrossT, FromT, GrainsizeT, IfT, InitT, InReductionT, - LastprivateT, LinearT, MapT, - NumTasksT, OrderT, ReductionT, - ScheduleT, TaskReductionT, ToT>; + LastprivateT, LinearT, LoopRangeT, + MapT, NumTasksT, OrderT, + ReductionT, ScheduleT, + TaskReductionT, ToT>; template using UnionClausesT = std::variant>; diff --git a/llvm/include/llvm/Frontend/OpenMP/OMP.td b/llvm/include/llvm/Frontend/OpenMP/OMP.td index 0af4b436649a3..3be758686c634 100644 --- a/llvm/include/llvm/Frontend/OpenMP/OMP.td +++ b/llvm/include/llvm/Frontend/OpenMP/OMP.td @@ -271,6 +271,10 @@ def OMPC_Linear : Clause<"linear"> { def OMPC_Link : Clause<"link"> { let flangClass = "OmpObjectList"; } +def OMPC_LoopRange : Clause<"looprange"> { + let clangClass = "OMPLoopRangeClause"; + let flangClass = "OmpLoopRangeClause"; +} def OMPC_Map : Clause<"map"> { let clangClass = "OMPMapClause"; let flangClass = "OmpMapClause"; @@ -852,6 +856,13 @@ def OMP_For : Directive<"for"> { let category = CA_Executable; let languages = [L_C]; } +def OMP_Fuse : Directive<"fuse"> { + let allowedOnceClauses = [ + VersionedClause + ]; + let association = AS_Loop; + let category = CA_Executable; +} def OMP_Interchange : Directive<"interchange"> { let allowedOnceClauses = [ VersionedClause, diff --git a/openmp/runtime/test/transform/fuse/foreach.cpp b/openmp/runtime/test/transform/fuse/foreach.cpp new file mode 100644 index 0000000000000..cabf4bf8a511d --- /dev/null +++ b/openmp/runtime/test/transform/fuse/foreach.cpp @@ -0,0 +1,192 @@ +// RUN: %libomp-cxx20-compile-and-run | FileCheck %s --match-full-lines + +#ifndef HEADER +#define HEADER + +#include +#include +#include +#include + +struct Reporter { + const char *name; + + Reporter(const char *name) : name(name) { print("ctor"); } + + Reporter() : name("") { print("ctor"); } + + Reporter(const Reporter &that) : name(that.name) { print("copy ctor"); } + + Reporter(Reporter &&that) : name(that.name) { print("move ctor"); } + + ~Reporter() { print("dtor"); } + + const Reporter &operator=(const Reporter &that) { + print("copy assign"); + this->name = that.name; + return *this; + } + + const Reporter &operator=(Reporter &&that) { + print("move assign"); + this->name = that.name; + return *this; + } + + struct Iterator { + const Reporter *owner; + int pos; + + Iterator(const Reporter *owner, int pos) : owner(owner), pos(pos) {} + + Iterator(const Iterator &that) : owner(that.owner), pos(that.pos) { + owner->print("iterator copy ctor"); + } + + Iterator(Iterator &&that) : owner(that.owner), pos(that.pos) { + owner->print("iterator move ctor"); + } + + ~Iterator() { owner->print("iterator dtor"); } + + const Iterator &operator=(const Iterator &that) { + owner->print("iterator copy assign"); + this->owner = that.owner; + this->pos = that.pos; + return *this; + } + + const Iterator &operator=(Iterator &&that) { + owner->print("iterator move assign"); + this->owner = that.owner; + this->pos = that.pos; + return *this; + } + + bool operator==(const Iterator &that) const { + owner->print("iterator %d == %d", 2 - this->pos, 2 - that.pos); + return this->pos == that.pos; + } + + Iterator &operator++() { + owner->print("iterator prefix ++"); + pos -= 1; + return *this; + } + + Iterator operator++(int) { + owner->print("iterator postfix ++"); + auto result = *this; + pos -= 1; + return result; + } + + int operator*() const { + int result = 2 - pos; + owner->print("iterator deref: %i", result); + return result; + } + + size_t operator-(const Iterator &that) const { + int result = (2 - this->pos) - (2 - that.pos); + owner->print("iterator distance: %d", result); + return result; + } + + Iterator operator+(int steps) const { + owner->print("iterator advance: %i += %i", 2 - this->pos, steps); + return Iterator(owner, pos - steps); + } + + void print(const char *msg) const { owner->print(msg); } + }; + + Iterator begin() const { + print("begin()"); + return Iterator(this, 2); + } + + Iterator end() const { + print("end()"); + return Iterator(this, -1); + } + + void print(const char *msg, ...) const { + va_list args; + va_start(args, msg); + printf("[%s] ", name); + vprintf(msg, args); + printf("\n"); + va_end(args); + } +}; + +int main() { + printf("do\n"); +#pragma omp fuse + { + for (Reporter a{"C"}; auto &&v : Reporter("A")) + printf("v=%d\n", v); + for (Reporter aa{"D"}; auto &&vv : Reporter("B")) + printf("vv=%d\n", vv); + } + printf("done\n"); + return EXIT_SUCCESS; +} + +// CHECK: [C] ctor +// CHECK-NEXT: [A] ctor +// CHECK-NEXT: [A] end() +// CHECK-NEXT: [A] begin() +// CHECK-NEXT: [A] begin() +// CHECK-NEXT: [A] iterator distance: 3 +// CHECK-NEXT: [D] ctor +// CHECK-NEXT: [B] ctor +// CHECK-NEXT: [B] end() +// CHECK-NEXT: [B] begin() +// CHECK-NEXT: [B] begin() +// CHECK-NEXT: [B] iterator distance: 3 +// CHECK-NEXT: [A] iterator advance: 0 += 0 +// CHECK-NEXT: [A] iterator move assign +// CHECK-NEXT: [A] iterator deref: 0 +// CHECK-NEXT: v=0 +// CHECK-NEXT: [A] iterator dtor +// CHECK-NEXT: [B] iterator advance: 0 += 0 +// CHECK-NEXT: [B] iterator move assign +// CHECK-NEXT: [B] iterator deref: 0 +// CHECK-NEXT: vv=0 +// CHECK-NEXT: [B] iterator dtor +// CHECK-NEXT: [A] iterator advance: 0 += 1 +// CHECK-NEXT: [A] iterator move assign +// CHECK-NEXT: [A] iterator deref: 1 +// CHECK-NEXT: v=1 +// CHECK-NEXT: [A] iterator dtor +// CHECK-NEXT: [B] iterator advance: 0 += 1 +// CHECK-NEXT: [B] iterator move assign +// CHECK-NEXT: [B] iterator deref: 1 +// CHECK-NEXT: vv=1 +// CHECK-NEXT: [B] iterator dtor +// CHECK-NEXT: [A] iterator advance: 0 += 2 +// CHECK-NEXT: [A] iterator move assign +// CHECK-NEXT: [A] iterator deref: 2 +// CHECK-NEXT: v=2 +// CHECK-NEXT: [A] iterator dtor +// CHECK-NEXT: [B] iterator advance: 0 += 2 +// CHECK-NEXT: [B] iterator move assign +// CHECK-NEXT: [B] iterator deref: 2 +// CHECK-NEXT: vv=2 +// CHECK-NEXT: [B] iterator dtor +// CHECK-NEXT: [B] iterator dtor +// CHECK-NEXT: [B] iterator dtor +// CHECK-NEXT: [B] iterator dtor +// CHECK-NEXT: [B] dtor +// CHECK-NEXT: [D] dtor +// CHECK-NEXT: [A] iterator dtor +// CHECK-NEXT: [A] iterator dtor +// CHECK-NEXT: [A] iterator dtor +// CHECK-NEXT: [A] dtor +// CHECK-NEXT: [C] dtor +// CHECK-NEXT: done + + +#endif diff --git a/openmp/runtime/test/transform/fuse/intfor.c b/openmp/runtime/test/transform/fuse/intfor.c new file mode 100644 index 0000000000000..b8171b4df7042 --- /dev/null +++ b/openmp/runtime/test/transform/fuse/intfor.c @@ -0,0 +1,50 @@ +// RUN: %libomp-compile-and-run | FileCheck %s --match-full-lines + +#ifndef HEADER +#define HEADER + +#include +#include + +int main() { + printf("do\n"); +#pragma omp fuse + { + for (int i = 5; i <= 25; i += 5) + printf("i=%d\n", i); + for (int j = 10; j < 100; j += 10) + printf("j=%d\n", j); + for (int k = 10; k > 0; --k) + printf("k=%d\n", k); + } + printf("done\n"); + return EXIT_SUCCESS; +} +#endif /* HEADER */ + +// CHECK: do +// CHECK-NEXT: i=5 +// CHECK-NEXT: j=10 +// CHECK-NEXT: k=10 +// CHECK-NEXT: i=10 +// CHECK-NEXT: j=20 +// CHECK-NEXT: k=9 +// CHECK-NEXT: i=15 +// CHECK-NEXT: j=30 +// CHECK-NEXT: k=8 +// CHECK-NEXT: i=20 +// CHECK-NEXT: j=40 +// CHECK-NEXT: k=7 +// CHECK-NEXT: i=25 +// CHECK-NEXT: j=50 +// CHECK-NEXT: k=6 +// CHECK-NEXT: j=60 +// CHECK-NEXT: k=5 +// CHECK-NEXT: j=70 +// CHECK-NEXT: k=4 +// CHECK-NEXT: j=80 +// CHECK-NEXT: k=3 +// CHECK-NEXT: j=90 +// CHECK-NEXT: k=2 +// CHECK-NEXT: k=1 +// CHECK-NEXT: done diff --git a/openmp/runtime/test/transform/fuse/iterfor.cpp b/openmp/runtime/test/transform/fuse/iterfor.cpp new file mode 100644 index 0000000000000..552484b2981c4 --- /dev/null +++ b/openmp/runtime/test/transform/fuse/iterfor.cpp @@ -0,0 +1,194 @@ +// RUN: %libomp-cxx20-compile-and-run | FileCheck %s --match-full-lines + +#ifndef HEADER +#define HEADER + +#include +#include +#include +#include + +struct Reporter { + const char *name; + + Reporter(const char *name) : name(name) { print("ctor"); } + + Reporter() : name("") { print("ctor"); } + + Reporter(const Reporter &that) : name(that.name) { print("copy ctor"); } + + Reporter(Reporter &&that) : name(that.name) { print("move ctor"); } + + ~Reporter() { print("dtor"); } + + const Reporter &operator=(const Reporter &that) { + print("copy assign"); + this->name = that.name; + return *this; + } + + const Reporter &operator=(Reporter &&that) { + print("move assign"); + this->name = that.name; + return *this; + } + + struct Iterator { + const Reporter *owner; + int pos; + + Iterator(const Reporter *owner, int pos) : owner(owner), pos(pos) {} + + Iterator(const Iterator &that) : owner(that.owner), pos(that.pos) { + owner->print("iterator copy ctor"); + } + + Iterator(Iterator &&that) : owner(that.owner), pos(that.pos) { + owner->print("iterator move ctor"); + } + + ~Iterator() { owner->print("iterator dtor"); } + + const Iterator &operator=(const Iterator &that) { + owner->print("iterator copy assign"); + this->owner = that.owner; + this->pos = that.pos; + return *this; + } + + const Iterator &operator=(Iterator &&that) { + owner->print("iterator move assign"); + this->owner = that.owner; + this->pos = that.pos; + return *this; + } + + bool operator==(const Iterator &that) const { + owner->print("iterator %d == %d", 2 - this->pos, 2 - that.pos); + return this->pos == that.pos; + } + + bool operator!=(const Iterator &that) const { + owner->print("iterator %d != %d", 2 - this->pos, 2 - that.pos); + return this->pos != that.pos; + } + + Iterator &operator++() { + owner->print("iterator prefix ++"); + pos -= 1; + return *this; + } + + Iterator operator++(int) { + owner->print("iterator postfix ++"); + auto result = *this; + pos -= 1; + return result; + } + + int operator*() const { + int result = 2 - pos; + owner->print("iterator deref: %i", result); + return result; + } + + size_t operator-(const Iterator &that) const { + int result = (2 - this->pos) - (2 - that.pos); + owner->print("iterator distance: %d", result); + return result; + } + + Iterator operator+(int steps) const { + owner->print("iterator advance: %i += %i", 2 - this->pos, steps); + return Iterator(owner, pos - steps); + } + }; + + Iterator begin() const { + print("begin()"); + return Iterator(this, 2); + } + + Iterator end() const { + print("end()"); + return Iterator(this, -1); + } + + void print(const char *msg, ...) const { + va_list args; + va_start(args, msg); + printf("[%s] ", name); + vprintf(msg, args); + printf("\n"); + va_end(args); + } +}; + +int main() { + printf("do\n"); + Reporter C("C"); + Reporter D("D"); +#pragma omp fuse + { + for (auto it = C.begin(); it != C.end(); ++it) + printf("v=%d\n", *it); + + for (auto it = D.begin(); it != D.end(); ++it) + printf("vv=%d\n", *it); + } + printf("done\n"); + return EXIT_SUCCESS; +} + +#endif /* HEADER */ + +// CHECK: do +// CHECK: [C] ctor +// CHECK-NEXT: [D] ctor +// CHECK-NEXT: [C] begin() +// CHECK-NEXT: [C] begin() +// CHECK-NEXT: [C] end() +// CHECK-NEXT: [C] iterator distance: 3 +// CHECK-NEXT: [D] begin() +// CHECK-NEXT: [D] begin() +// CHECK-NEXT: [D] end() +// CHECK-NEXT: [D] iterator distance: 3 +// CHECK-NEXT: [C] iterator advance: 0 += 0 +// CHECK-NEXT: [C] iterator move assign +// CHECK-NEXT: [C] iterator deref: 0 +// CHECK-NEXT: v=0 +// CHECK-NEXT: [C] iterator dtor +// CHECK-NEXT: [D] iterator advance: 0 += 0 +// CHECK-NEXT: [D] iterator move assign +// CHECK-NEXT: [D] iterator deref: 0 +// CHECK-NEXT: vv=0 +// CHECK-NEXT: [D] iterator dtor +// CHECK-NEXT: [C] iterator advance: 0 += 1 +// CHECK-NEXT: [C] iterator move assign +// CHECK-NEXT: [C] iterator deref: 1 +// CHECK-NEXT: v=1 +// CHECK-NEXT: [C] iterator dtor +// CHECK-NEXT: [D] iterator advance: 0 += 1 +// CHECK-NEXT: [D] iterator move assign +// CHECK-NEXT: [D] iterator deref: 1 +// CHECK-NEXT: vv=1 +// CHECK-NEXT: [D] iterator dtor +// CHECK-NEXT: [C] iterator advance: 0 += 2 +// CHECK-NEXT: [C] iterator move assign +// CHECK-NEXT: [C] iterator deref: 2 +// CHECK-NEXT: v=2 +// CHECK-NEXT: [C] iterator dtor +// CHECK-NEXT: [D] iterator advance: 0 += 2 +// CHECK-NEXT: [D] iterator move assign +// CHECK-NEXT: [D] iterator deref: 2 +// CHECK-NEXT: vv=2 +// CHECK-NEXT: [D] iterator dtor +// CHECK-NEXT: [D] iterator dtor +// CHECK-NEXT: [D] iterator dtor +// CHECK-NEXT: [C] iterator dtor +// CHECK-NEXT: [C] iterator dtor +// CHECK-NEXT: done +// CHECK-NEXT: [D] iterator dtor +// CHECK-NEXT: [C] iterator dtor +// CHECK-NEXT: [D] dtor +// CHECK-NEXT: [C] dtor diff --git a/openmp/runtime/test/transform/fuse/parallel-wsloop-collapse-foreach.cpp b/openmp/runtime/test/transform/fuse/parallel-wsloop-collapse-foreach.cpp new file mode 100644 index 0000000000000..e9f76713fe3e0 --- /dev/null +++ b/openmp/runtime/test/transform/fuse/parallel-wsloop-collapse-foreach.cpp @@ -0,0 +1,208 @@ +// RUN: %libomp-cxx20-compile-and-run | FileCheck %s --match-full-lines + +#ifndef HEADER +#define HEADER + +#include +#include +#include +#include + +struct Reporter { + const char *name; + + Reporter(const char *name) : name(name) { print("ctor"); } + + Reporter() : name("") { print("ctor"); } + + Reporter(const Reporter &that) : name(that.name) { print("copy ctor"); } + + Reporter(Reporter &&that) : name(that.name) { print("move ctor"); } + + ~Reporter() { print("dtor"); } + + const Reporter &operator=(const Reporter &that) { + print("copy assign"); + this->name = that.name; + return *this; + } + + const Reporter &operator=(Reporter &&that) { + print("move assign"); + this->name = that.name; + return *this; + } + + struct Iterator { + const Reporter *owner; + int pos; + + Iterator(const Reporter *owner, int pos) : owner(owner), pos(pos) {} + + Iterator(const Iterator &that) : owner(that.owner), pos(that.pos) { + owner->print("iterator copy ctor"); + } + + Iterator(Iterator &&that) : owner(that.owner), pos(that.pos) { + owner->print("iterator move ctor"); + } + + ~Iterator() { owner->print("iterator dtor"); } + + const Iterator &operator=(const Iterator &that) { + owner->print("iterator copy assign"); + this->owner = that.owner; + this->pos = that.pos; + return *this; + } + + const Iterator &operator=(Iterator &&that) { + owner->print("iterator move assign"); + this->owner = that.owner; + this->pos = that.pos; + return *this; + } + + bool operator==(const Iterator &that) const { + owner->print("iterator %d == %d", 2 - this->pos, 2 - that.pos); + return this->pos == that.pos; + } + + Iterator &operator++() { + owner->print("iterator prefix ++"); + pos -= 1; + return *this; + } + + Iterator operator++(int) { + owner->print("iterator postfix ++"); + auto result = *this; + pos -= 1; + return result; + } + + int operator*() const { + int result = 2 - pos; + owner->print("iterator deref: %i", result); + return result; + } + + size_t operator-(const Iterator &that) const { + int result = (2 - this->pos) - (2 - that.pos); + owner->print("iterator distance: %d", result); + return result; + } + + Iterator operator+(int steps) const { + owner->print("iterator advance: %i += %i", 2 - this->pos, steps); + return Iterator(owner, pos - steps); + } + + void print(const char *msg) const { owner->print(msg); } + }; + + Iterator begin() const { + print("begin()"); + return Iterator(this, 2); + } + + Iterator end() const { + print("end()"); + return Iterator(this, -1); + } + + void print(const char *msg, ...) const { + va_list args; + va_start(args, msg); + printf("[%s] ", name); + vprintf(msg, args); + printf("\n"); + va_end(args); + } +}; + +int main() { + printf("do\n"); +#pragma omp parallel for collapse(2) num_threads(1) + for (int i = 0; i < 3; ++i) +#pragma omp fuse + { + for (Reporter c{"init-stmt"}; auto &&v : Reporter("range")) + printf("i=%d v=%d\n", i, v); + for (int vv = 0; vv < 3; ++vv) + printf("i=%d vv=%d\n", i, vv); + } + printf("done\n"); + return EXIT_SUCCESS; +} + +#endif /* HEADER */ + +// CHECK: do +// CHECK-NEXT: [init-stmt] ctor +// CHECK-NEXT: [range] ctor +// CHECK-NEXT: [range] end() +// CHECK-NEXT: [range] begin() +// CHECK-NEXT: [range] begin() +// CHECK-NEXT: [range] iterator distance: 3 +// CHECK-NEXT: [range] iterator advance: 0 += 0 +// CHECK-NEXT: [range] iterator move assign +// CHECK-NEXT: [range] iterator deref: 0 +// CHECK-NEXT: i=0 v=0 +// CHECK-NEXT: [range] iterator dtor +// CHECK-NEXT: i=0 vv=0 +// CHECK-NEXT: [range] iterator advance: 0 += 1 +// CHECK-NEXT: [range] iterator move assign +// CHECK-NEXT: [range] iterator deref: 1 +// CHECK-NEXT: i=0 v=1 +// CHECK-NEXT: [range] iterator dtor +// CHECK-NEXT: i=0 vv=1 +// CHECK-NEXT: [range] iterator advance: 0 += 2 +// CHECK-NEXT: [range] iterator move assign +// CHECK-NEXT: [range] iterator deref: 2 +// CHECK-NEXT: i=0 v=2 +// CHECK-NEXT: [range] iterator dtor +// CHECK-NEXT: i=0 vv=2 +// CHECK-NEXT: [range] iterator advance: 0 += 0 +// CHECK-NEXT: [range] iterator move assign +// CHECK-NEXT: [range] iterator deref: 0 +// CHECK-NEXT: i=1 v=0 +// CHECK-NEXT: [range] iterator dtor +// CHECK-NEXT: i=1 vv=0 +// CHECK-NEXT: [range] iterator advance: 0 += 1 +// CHECK-NEXT: [range] iterator move assign +// CHECK-NEXT: [range] iterator deref: 1 +// CHECK-NEXT: i=1 v=1 +// CHECK-NEXT: [range] iterator dtor +// CHECK-NEXT: i=1 vv=1 +// CHECK-NEXT: [range] iterator advance: 0 += 2 +// CHECK-NEXT: [range] iterator move assign +// CHECK-NEXT: [range] iterator deref: 2 +// CHECK-NEXT: i=1 v=2 +// CHECK-NEXT: [range] iterator dtor +// CHECK-NEXT: i=1 vv=2 +// CHECK-NEXT: [range] iterator advance: 0 += 0 +// CHECK-NEXT: [range] iterator move assign +// CHECK-NEXT: [range] iterator deref: 0 +// CHECK-NEXT: i=2 v=0 +// CHECK-NEXT: [range] iterator dtor +// CHECK-NEXT: i=2 vv=0 +// CHECK-NEXT: [range] iterator advance: 0 += 1 +// CHECK-NEXT: [range] iterator move assign +// CHECK-NEXT: [range] iterator deref: 1 +// CHECK-NEXT: i=2 v=1 +// CHECK-NEXT: [range] iterator dtor +// CHECK-NEXT: i=2 vv=1 +// CHECK-NEXT: [range] iterator advance: 0 += 2 +// CHECK-NEXT: [range] iterator move assign +// CHECK-NEXT: [range] iterator deref: 2 +// CHECK-NEXT: i=2 v=2 +// CHECK-NEXT: [range] iterator dtor +// CHECK-NEXT: i=2 vv=2 +// CHECK-NEXT: [range] iterator dtor +// CHECK-NEXT: [range] iterator dtor +// CHECK-NEXT: [range] iterator dtor +// CHECK-NEXT: [range] dtor +// CHECK-NEXT: [init-stmt] dtor +// CHECK-NEXT: done + diff --git a/openmp/runtime/test/transform/fuse/parallel-wsloop-collapse-intfor.c b/openmp/runtime/test/transform/fuse/parallel-wsloop-collapse-intfor.c new file mode 100644 index 0000000000000..272908e72c429 --- /dev/null +++ b/openmp/runtime/test/transform/fuse/parallel-wsloop-collapse-intfor.c @@ -0,0 +1,45 @@ +// RUN: %libomp-cxx-compile-and-run | FileCheck %s --match-full-lines + +#ifndef HEADER +#define HEADER + +#include +#include + +int main() { + printf("do\n"); +#pragma omp parallel for collapse(2) num_threads(1) + for (int i = 0; i < 3; ++i) +#pragma omp fuse + { + for (int j = 0; j < 3; ++j) + printf("i=%d j=%d\n", i, j); + for (int k = 0; k < 3; ++k) + printf("i=%d k=%d\n", i, k); + } + printf("done\n"); + return EXIT_SUCCESS; +} + +#endif /* HEADER */ + +// CHECK: do +// CHECK: i=0 j=0 +// CHECK-NEXT: i=0 k=0 +// CHECK-NEXT: i=0 j=1 +// CHECK-NEXT: i=0 k=1 +// CHECK-NEXT: i=0 j=2 +// CHECK-NEXT: i=0 k=2 +// CHECK-NEXT: i=1 j=0 +// CHECK-NEXT: i=1 k=0 +// CHECK-NEXT: i=1 j=1 +// CHECK-NEXT: i=1 k=1 +// CHECK-NEXT: i=1 j=2 +// CHECK-NEXT: i=1 k=2 +// CHECK-NEXT: i=2 j=0 +// CHECK-NEXT: i=2 k=0 +// CHECK-NEXT: i=2 j=1 +// CHECK-NEXT: i=2 k=1 +// CHECK-NEXT: i=2 j=2 +// CHECK-NEXT: i=2 k=2 +// CHECK-NEXT: done