diff --git a/flang/lib/Lower/OpenMP/DataSharingProcessor.cpp b/flang/lib/Lower/OpenMP/DataSharingProcessor.cpp index d725dfd3e94f3..6b0d783fb8c30 100644 --- a/flang/lib/Lower/OpenMP/DataSharingProcessor.cpp +++ b/flang/lib/Lower/OpenMP/DataSharingProcessor.cpp @@ -594,7 +594,7 @@ void DataSharingProcessor::doPrivatize(const semantics::Symbol *sym, sym, cannotHaveNonDefaultLowerBounds); // TODO: currently there are false positives from dead uses of the mold // arg - if (!result.getInitMoldArg().getUses().empty()) + if (result.initReadsFromMold()) mightHaveReadHostSym.insert(sym); } diff --git a/mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td b/mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td index 2d8e022190f62..f5a8a7ba04375 100644 --- a/mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td +++ b/mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td @@ -146,13 +146,24 @@ def PrivateClauseOp : OpenMP_Op<"private", [IsolatedFromAbove, RecipeInterface]> return region.empty() ? nullptr : region.getArgument(0); } + /// Returns true if the init region might read from the mold argument + bool initReadsFromMold() { + BlockArgument moldArg = getInitMoldArg(); + return moldArg && !moldArg.use_empty(); + } + + /// Returns true if any region of this privatizer might read from the mold + /// argument + bool readsFromMold() { + return initReadsFromMold() || !getCopyRegion().empty(); + } + /// needsMap returns true if the value being privatized should additionally /// be mapped to the target region using a MapInfoOp. This is most common /// when an allocatable is privatized. In such cases, the descriptor is used /// in privatization and needs to be mapped on to the device. bool needsMap() { - BlockArgument moldArg = getInitMoldArg(); - return moldArg ? !moldArg.use_empty() : false; + return initReadsFromMold(); } /// Get the type for arguments to nested regions. This should diff --git a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp index 4b5a319f7cc8a..5035551dd6023 100644 --- a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp +++ b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp @@ -13,6 +13,7 @@ #include "mlir/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.h" #include "mlir/Analysis/TopologicalSortUtils.h" #include "mlir/Dialect/LLVMIR/LLVMDialect.h" +#include "mlir/Dialect/LLVMIR/LLVMTypes.h" #include "mlir/Dialect/OpenMP/OpenMPDialect.h" #include "mlir/Dialect/OpenMP/OpenMPInterfaces.h" #include "mlir/IR/IRMapping.h" @@ -24,10 +25,12 @@ #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/SetVector.h" +#include "llvm/ADT/SmallVector.h" #include "llvm/ADT/TypeSwitch.h" #include "llvm/Frontend/OpenMP/OMPConstants.h" #include "llvm/Frontend/OpenMP/OMPIRBuilder.h" #include "llvm/IR/DebugInfoMetadata.h" +#include "llvm/IR/DerivedTypes.h" #include "llvm/IR/IRBuilder.h" #include "llvm/IR/ReplaceConstant.h" #include "llvm/Support/FileSystem.h" @@ -1349,23 +1352,23 @@ findAssociatedValue(Value privateVar, llvm::IRBuilderBase &builder, /// Initialize a single (first)private variable. You probably want to use /// allocateAndInitPrivateVars instead of this. -static llvm::Error initPrivateVar( +/// This returns the private variable which has been initialized. This +/// variable should be mapped before constructing the body of the Op. +static llvm::Expected initPrivateVar( llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation, omp::PrivateClauseOp &privDecl, Value mlirPrivVar, BlockArgument &blockArg, - llvm::Value **llvmPrivateVarIt, llvm::BasicBlock *privInitBlock, + llvm::Value *llvmPrivateVar, llvm::BasicBlock *privInitBlock, llvm::DenseMap *mappedPrivateVars = nullptr) { Region &initRegion = privDecl.getInitRegion(); - if (initRegion.empty()) { - moduleTranslation.mapValue(blockArg, *llvmPrivateVarIt); - return llvm::Error::success(); - } + if (initRegion.empty()) + return llvmPrivateVar; // map initialization region block arguments llvm::Value *nonPrivateVar = findAssociatedValue( mlirPrivVar, builder, moduleTranslation, mappedPrivateVars); assert(nonPrivateVar); moduleTranslation.mapValue(privDecl.getInitMoldArg(), nonPrivateVar); - moduleTranslation.mapValue(privDecl.getInitPrivateArg(), *llvmPrivateVarIt); + moduleTranslation.mapValue(privDecl.getInitPrivateArg(), llvmPrivateVar); // in-place convert the private initialization region SmallVector phis; @@ -1376,17 +1379,15 @@ static llvm::Error initPrivateVar( assert(phis.size() == 1 && "expected one allocation to be yielded"); - // prefer the value yielded from the init region to the allocated private - // variable in case the region is operating on arguments by-value (e.g. - // Fortran character boxes). - moduleTranslation.mapValue(blockArg, phis[0]); - *llvmPrivateVarIt = phis[0]; - // clear init region block argument mapping in case it needs to be // re-created with a different source for another use of the same // reduction decl moduleTranslation.forgetMapping(initRegion); - return llvm::Error::success(); + + // Prefer the value yielded from the init region to the allocated private + // variable in case the region is operating on arguments by-value (e.g. + // Fortran character boxes). + return phis[0]; } static llvm::Error @@ -1403,15 +1404,18 @@ initPrivateVars(llvm::IRBuilderBase &builder, llvm::BasicBlock *privInitBlock = splitBB(builder, true, "omp.private.init"); setInsertPointForPossiblyEmptyBlock(builder, privInitBlock); - for (auto [idx, zip] : llvm::enumerate( - llvm::zip_equal(privateDecls, mlirPrivateVars, privateBlockArgs))) { - auto [privDecl, mlirPrivVar, blockArg] = zip; - llvm::Error err = initPrivateVar( + for (auto [idx, zip] : llvm::enumerate(llvm::zip_equal( + privateDecls, mlirPrivateVars, privateBlockArgs, llvmPrivateVars))) { + auto [privDecl, mlirPrivVar, blockArg, llvmPrivateVar] = zip; + llvm::Expected privVarOrErr = initPrivateVar( builder, moduleTranslation, privDecl, mlirPrivVar, blockArg, - llvmPrivateVars.begin() + idx, privInitBlock, mappedPrivateVars); + llvmPrivateVar, privInitBlock, mappedPrivateVars); + + if (!privVarOrErr) + return privVarOrErr.takeError(); - if (err) - return err; + llvmPrivateVar = privVarOrErr.get(); + moduleTranslation.mapValue(blockArg, llvmPrivateVar); setInsertPointForPossiblyEmptyBlock(builder); } @@ -1465,7 +1469,7 @@ static LogicalResult copyFirstPrivateVars(llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation, SmallVectorImpl &mlirPrivateVars, - SmallVectorImpl &llvmPrivateVars, + ArrayRef llvmPrivateVars, SmallVectorImpl &privateDecls) { // Apply copy region for firstprivate. bool needsFirstprivate = @@ -1762,6 +1766,119 @@ buildDependData(std::optional dependKinds, OperandRange dependVars, } } +namespace { +/// TaskContextStructManager takes care of creating and freeing a structure +/// containing information needed by the task body to execute. +class TaskContextStructManager { +public: + TaskContextStructManager(llvm::IRBuilderBase &builder, + LLVM::ModuleTranslation &moduleTranslation, + MutableArrayRef privateDecls) + : builder{builder}, moduleTranslation{moduleTranslation}, + privateDecls{privateDecls} {} + + /// Creates a heap allocated struct containing space for each private + /// variable. Invariant: privateVarTypes, privateDecls, and the elements of + /// the structure should all have the same order (although privateDecls which + /// do not read from the mold argument are skipped). + void generateTaskContextStruct(); + + /// Create GEPs to access each member of the structure representing a private + /// variable, adding them to llvmPrivateVars. Null values are added where + /// private decls were skipped so that the ordering continues to match the + /// private decls. + void createGEPsToPrivateVars(); + + /// De-allocate the task context structure. + void freeStructPtr(); + + MutableArrayRef getLLVMPrivateVarGEPs() { + return llvmPrivateVarGEPs; + } + + llvm::Value *getStructPtr() { return structPtr; } + +private: + llvm::IRBuilderBase &builder; + LLVM::ModuleTranslation &moduleTranslation; + MutableArrayRef privateDecls; + + /// The type of each member of the structure, in order. + SmallVector privateVarTypes; + + /// LLVM values for each private variable, or null if that private variable is + /// not included in the task context structure + SmallVector llvmPrivateVarGEPs; + + /// A pointer to the structure containing context for this task. + llvm::Value *structPtr = nullptr; + /// The type of the structure + llvm::Type *structTy = nullptr; +}; +} // namespace + +void TaskContextStructManager::generateTaskContextStruct() { + if (privateDecls.empty()) + return; + privateVarTypes.reserve(privateDecls.size()); + + for (omp::PrivateClauseOp &privOp : privateDecls) { + // Skip private variables which can safely be allocated and initialised + // inside of the task + if (!privOp.readsFromMold()) + continue; + Type mlirType = privOp.getType(); + privateVarTypes.push_back(moduleTranslation.convertType(mlirType)); + } + + structTy = llvm::StructType::get(moduleTranslation.getLLVMContext(), + privateVarTypes); + + llvm::DataLayout dataLayout = + builder.GetInsertBlock()->getModule()->getDataLayout(); + llvm::Type *intPtrTy = builder.getIntPtrTy(dataLayout); + llvm::Constant *allocSize = llvm::ConstantExpr::getSizeOf(structTy); + + // Heap allocate the structure + structPtr = builder.CreateMalloc(intPtrTy, structTy, allocSize, + /*ArraySize=*/nullptr, /*MallocF=*/nullptr, + "omp.task.context_ptr"); +} + +void TaskContextStructManager::createGEPsToPrivateVars() { + if (!structPtr) { + assert(privateVarTypes.empty()); + return; + } + + // Create GEPs for each struct member + llvmPrivateVarGEPs.clear(); + llvmPrivateVarGEPs.reserve(privateDecls.size()); + llvm::Value *zero = builder.getInt32(0); + unsigned i = 0; + for (auto privDecl : privateDecls) { + if (!privDecl.readsFromMold()) { + // Handle this inside of the task so we don't pass unnessecary vars in + llvmPrivateVarGEPs.push_back(nullptr); + continue; + } + llvm::Value *iVal = builder.getInt32(i); + llvm::Value *gep = builder.CreateGEP(structTy, structPtr, {zero, iVal}); + llvmPrivateVarGEPs.push_back(gep); + i += 1; + } +} + +void TaskContextStructManager::freeStructPtr() { + if (!structPtr) + return; + + llvm::IRBuilderBase::InsertPointGuard guard{builder}; + // Ensure we don't put the call to free() after the terminator + builder.SetInsertPoint(builder.GetInsertBlock()->getTerminator()); + builder.CreateFree(structPtr); +} + /// Converts an OpenMP task construct into LLVM IR using OpenMPIRBuilder. static LogicalResult convertOmpTaskOp(omp::TaskOp taskOp, llvm::IRBuilderBase &builder, @@ -1779,6 +1896,8 @@ convertOmpTaskOp(omp::TaskOp taskOp, llvm::IRBuilderBase &builder, mlirPrivateVars.reserve(privateBlockArgs.size()); llvmPrivateVars.reserve(privateBlockArgs.size()); collectPrivatizationDecls(taskOp, privateDecls); + TaskContextStructManager taskStructMgr{builder, moduleTranslation, + privateDecls}; for (mlir::Value privateVar : taskOp.getPrivateVars()) mlirPrivateVars.push_back(privateVar); @@ -1826,30 +1945,61 @@ convertOmpTaskOp(omp::TaskOp taskOp, llvm::IRBuilderBase &builder, moduleTranslation, allocaIP); // Allocate and initialize private variables - // TODO: package private variables up in a structure - for (auto [privDecl, mlirPrivVar, blockArg] : - llvm::zip_equal(privateDecls, mlirPrivateVars, privateBlockArgs)) { - llvm::Type *llvmAllocType = - moduleTranslation.convertType(privDecl.getType()); + builder.SetInsertPoint(initBlock->getTerminator()); + + // Create task variable structure + taskStructMgr.generateTaskContextStruct(); + // GEPs so that we can initialize the variables. Don't use these GEPs inside + // of the body otherwise it will be the GEP not the struct which is fowarded + // to the outlined function. GEPs forwarded in this way are passed in a + // stack-allocated (by OpenMPIRBuilder) structure which is not safe for tasks + // which may not be executed until after the current stack frame goes out of + // scope. + taskStructMgr.createGEPsToPrivateVars(); + + for (auto [privDecl, mlirPrivVar, blockArg, llvmPrivateVarAlloc] : + llvm::zip_equal(privateDecls, mlirPrivateVars, privateBlockArgs, + taskStructMgr.getLLVMPrivateVarGEPs())) { + // To be handled inside the task. + if (!privDecl.readsFromMold()) + continue; + assert(llvmPrivateVarAlloc && + "reads from mold so shouldn't have been skipped"); - // Allocations: - builder.SetInsertPoint(allocaIP.getBlock()->getTerminator()); - llvm::Value *llvmPrivateVar = builder.CreateAlloca( - llvmAllocType, /*ArraySize=*/nullptr, "omp.private.alloc"); + llvm::Expected privateVarOrErr = + initPrivateVar(builder, moduleTranslation, privDecl, mlirPrivVar, + blockArg, llvmPrivateVarAlloc, initBlock); + if (!privateVarOrErr) + return handleError(privateVarOrErr, *taskOp.getOperation()); - builder.SetInsertPoint(initBlock->getTerminator()); - auto err = initPrivateVar(builder, moduleTranslation, privDecl, mlirPrivVar, - blockArg, &llvmPrivateVar, initBlock); - if (err) - return handleError(std::move(err), *taskOp.getOperation()); + llvm::IRBuilderBase::InsertPointGuard guard(builder); + builder.SetInsertPoint(builder.GetInsertBlock()->getTerminator()); + + // TODO: this is a bit of a hack for Fortran character boxes. + // Character boxes are passed by value into the init region and then the + // initialized character box is yielded by value. Here we need to store the + // yielded value into the private allocation, and load the private + // allocation to match the type expected by region block arguments. + if ((privateVarOrErr.get() != llvmPrivateVarAlloc) && + !mlir::isa(blockArg.getType())) { + builder.CreateStore(privateVarOrErr.get(), llvmPrivateVarAlloc); + // Load it so we have the value pointed to by the GEP + llvmPrivateVarAlloc = builder.CreateLoad(privateVarOrErr.get()->getType(), + llvmPrivateVarAlloc); + } + assert(llvmPrivateVarAlloc->getType() == + moduleTranslation.convertType(blockArg.getType())); - llvmPrivateVars.push_back(llvmPrivateVar); + // Mapping blockArg -> llvmPrivateVarAlloc is done inside the body callback + // so that OpenMPIRBuilder doesn't try to pass each GEP address through a + // stack allocated structure. } // firstprivate copy region - builder.SetInsertPoint(copyBlock->getTerminator()); + setInsertPointForPossiblyEmptyBlock(builder, copyBlock); if (failed(copyFirstPrivateVars(builder, moduleTranslation, mlirPrivateVars, - llvmPrivateVars, privateDecls))) + taskStructMgr.getLLVMPrivateVarGEPs(), + privateDecls))) return llvm::failure(); // Set up for call to createTask() @@ -1857,8 +2007,66 @@ convertOmpTaskOp(omp::TaskOp taskOp, llvm::IRBuilderBase &builder, auto bodyCB = [&](InsertPointTy allocaIP, InsertPointTy codegenIP) -> llvm::Error { - builder.restoreIP(codegenIP); + // Save the alloca insertion point on ModuleTranslation stack for use in + // nested regions. + LLVM::ModuleTranslation::SaveStack frame( + moduleTranslation, allocaIP); + // translate the body of the task: + builder.restoreIP(codegenIP); + + llvm::BasicBlock *privInitBlock = nullptr; + llvmPrivateVars.resize(privateBlockArgs.size()); + for (auto [i, zip] : llvm::enumerate(llvm::zip_equal( + privateBlockArgs, privateDecls, mlirPrivateVars))) { + auto [blockArg, privDecl, mlirPrivVar] = zip; + // This is handled before the task executes + if (privDecl.readsFromMold()) + continue; + + llvm::IRBuilderBase::InsertPointGuard guard(builder); + llvm::Type *llvmAllocType = + moduleTranslation.convertType(privDecl.getType()); + builder.SetInsertPoint(allocaIP.getBlock()->getTerminator()); + llvm::Value *llvmPrivateVar = builder.CreateAlloca( + llvmAllocType, /*ArraySize=*/nullptr, "omp.private.alloc"); + + llvm::Expected privateVarOrError = + initPrivateVar(builder, moduleTranslation, privDecl, mlirPrivVar, + blockArg, llvmPrivateVar, privInitBlock); + if (!privateVarOrError) + return privateVarOrError.takeError(); + moduleTranslation.mapValue(blockArg, privateVarOrError.get()); + llvmPrivateVars[i] = privateVarOrError.get(); + } + + taskStructMgr.createGEPsToPrivateVars(); + for (auto [i, llvmPrivVar] : + llvm::enumerate(taskStructMgr.getLLVMPrivateVarGEPs())) { + if (!llvmPrivVar) { + assert(llvmPrivateVars[i] && "This is added in the loop above"); + continue; + } + llvmPrivateVars[i] = llvmPrivVar; + } + + // Find and map the addresses of each variable within the task context + // structure + for (auto [blockArg, llvmPrivateVar, privateDecl] : + llvm::zip_equal(privateBlockArgs, llvmPrivateVars, privateDecls)) { + // This was handled above. + if (!privateDecl.readsFromMold()) + continue; + // Fix broken pass-by-value case for Fortran character boxes + if (!mlir::isa(blockArg.getType())) { + llvmPrivateVar = builder.CreateLoad( + moduleTranslation.convertType(blockArg.getType()), llvmPrivateVar); + } + assert(llvmPrivateVar->getType() == + moduleTranslation.convertType(blockArg.getType())); + moduleTranslation.mapValue(blockArg, llvmPrivateVar); + } + auto continuationBlockOrError = convertOmpOpRegions( taskOp.getRegion(), "omp.task.region", builder, moduleTranslation); if (failed(handleError(continuationBlockOrError, *taskOp))) @@ -1870,6 +2078,9 @@ convertOmpTaskOp(omp::TaskOp taskOp, llvm::IRBuilderBase &builder, llvmPrivateVars, privateDecls))) return llvm::make_error(); + // Free heap allocated task context structure at the end of the task. + taskStructMgr.freeStructPtr(); + return llvm::Error::success(); }; diff --git a/mlir/test/Target/LLVMIR/openmp-llvm.mlir b/mlir/test/Target/LLVMIR/openmp-llvm.mlir index 2f3df35f541d0..f25ba4aa3c8dc 100644 --- a/mlir/test/Target/LLVMIR/openmp-llvm.mlir +++ b/mlir/test/Target/LLVMIR/openmp-llvm.mlir @@ -2795,9 +2795,10 @@ llvm.func @par_task_(%arg0: !llvm.ptr {fir.bindc_name = "a"}) { // CHECK: call i32 @__kmpc_omp_task({{.*}}, ptr %[[TASK_ALLOC]]) // CHECK: define internal void @[[task_outlined_fn]](i32 %[[GLOBAL_TID_VAL:.*]], ptr %[[STRUCT_ARG:.*]]) // CHECK: %[[LOADED_STRUCT_PTR:.*]] = load ptr, ptr %[[STRUCT_ARG]], align 8 -// CHECK: %[[GEP_STRUCTARG:.*]] = getelementptr { ptr, ptr }, ptr %[[LOADED_STRUCT_PTR]], i32 0, i32 0 +// CHECK: %[[GEP_STRUCTARG:.*]] = getelementptr { ptr }, ptr %[[LOADED_STRUCT_PTR]], i32 0, i32 0 // CHECK: %[[LOADGEP_STRUCTARG:.*]] = load ptr, ptr %[[GEP_STRUCTARG]], align 8 -// CHECK: call void ({{.*}}) @__kmpc_fork_call({{.*}}, ptr @[[parallel_outlined_fn:.+]], ptr %[[LOADGEP_STRUCTARG]]) +// CHEKC: %[[NEW_STRUCTARG:.*]] = alloca { ptr }, align 8 +// CHECK: call void ({{.*}}) @__kmpc_fork_call({{.*}}, ptr @[[parallel_outlined_fn:.+]], // CHECK: define internal void @[[parallel_outlined_fn]] // ----- @@ -2825,14 +2826,15 @@ llvm.func @task(%arg0 : !llvm.ptr) { // CHECK-LABEL: @task // CHECK-SAME: (ptr %[[ARG:.*]]) // CHECK: %[[STRUCT_ARG:.*]] = alloca { ptr }, align 8 -// CHECK: %[[OMP_PRIVATE_ALLOC:.*]] = alloca i32, align 4 // ... // CHECK: br label %omp.private.init // CHECK: omp.private.init: +// CHECK: %[[TASK_STRUCT:.*]] = tail call ptr @malloc(i64 ptrtoint (ptr getelementptr ({ i32 }, ptr null, i32 1) to i64)) +// CHECK: %[[GEP:.*]] = getelementptr { i32 }, ptr %[[TASK_STRUCT:.*]], i32 0, i32 0 // CHECK: br label %omp.private.copy1 // CHECK: omp.private.copy1: // CHECK: %[[LOADED:.*]] = load i32, ptr %[[ARG]], align 4 -// CHECK: store i32 %[[LOADED]], ptr %[[OMP_PRIVATE_ALLOC]], align 4 +// CHECK: store i32 %[[LOADED]], ptr %[[GEP]], align 4 // ... // CHECK: br label %omp.task.start // CHECK: omp.task.start: @@ -2846,12 +2848,13 @@ llvm.func @task(%arg0 : !llvm.ptr) { // CHECK: %[[VAL_14:.*]] = load ptr, ptr %[[VAL_13]], align 8 // CHECK: br label %task.body // CHECK: task.body: ; preds = %task.alloca +// CHECK: %[[VAL_15:.*]] = getelementptr { i32 }, ptr %[[VAL_14]], i32 0, i32 0 // CHECK: br label %omp.task.region // CHECK: omp.task.region: ; preds = %task.body -// CHECK: call void @foo(ptr %[[VAL_14]]) +// CHECK: call void @foo(ptr %[[VAL_15]]) // CHECK: br label %omp.region.cont // CHECK: omp.region.cont: ; preds = %omp.task.region -// CHECK: call void @destroy(ptr %[[VAL_14]]) +// CHECK: call void @destroy(ptr %[[VAL_15]]) // CHECK: br label %task.exit.exitStub // CHECK: task.exit.exitStub: ; preds = %omp.region.cont // CHECK: ret void diff --git a/mlir/test/Target/LLVMIR/openmp-task-privatization.mlir b/mlir/test/Target/LLVMIR/openmp-task-privatization.mlir new file mode 100644 index 0000000000000..f7a8b970f5f80 --- /dev/null +++ b/mlir/test/Target/LLVMIR/openmp-task-privatization.mlir @@ -0,0 +1,79 @@ +// RUN: mlir-translate -mlir-to-llvmir %s | FileCheck %s + +omp.private {type = private} @privatizer : i32 + +omp.private {type = firstprivate} @firstprivatizer : i32 copy { +^bb0(%arg0: !llvm.ptr, %arg1: !llvm.ptr): + %0 = llvm.load %arg0 : !llvm.ptr -> i32 + llvm.store %0, %arg1 : i32, !llvm.ptr + omp.yield(%arg1 : !llvm.ptr) +} + +llvm.func @task_privatization_test() { + %c0 = llvm.mlir.constant(0: i32) : i32 + %c1 = llvm.mlir.constant(1: i32) : i32 + %0 = llvm.alloca %c1 x i32 : (i32) -> !llvm.ptr + %1 = llvm.alloca %c1 x i32 : (i32) -> !llvm.ptr + llvm.store %c0, %0 : i32, !llvm.ptr + llvm.store %c1, %1 : i32, !llvm.ptr + + omp.task private(@privatizer %0 -> %arg0, @firstprivatizer %1 -> %arg1 : !llvm.ptr, !llvm.ptr) { + %2 = llvm.load %arg1 : !llvm.ptr -> i32 + llvm.store %2, %arg0 : i32, !llvm.ptr + omp.terminator + } + llvm.return +} + +// CHECK: define void @task_privatization_test() +// CHECK: %[[STRUCT_ARG:.*]] = alloca { ptr }, align 8 +// CHECK: %[[VAL_0:.*]] = alloca i32, align 4 +// CHECK: %[[VAL_1:.*]] = alloca i32, align 4 +// CHECK: store i32 0, ptr %[[VAL_0]], align 4 +// CHECK: store i32 1, ptr %[[VAL_1]], align 4 +// CHECK: br label %entry +// CHECK: entry: +// CHECK: br label %omp.private.init +// CHECK: omp.private.init: +// CHECK: %[[VAL_5:.*]] = tail call ptr @malloc(i64 ptrtoint (ptr getelementptr ([[STRUCT_KMP_PRIVATES_T:.*]], ptr null, i32 1) to i64)) +// CHECK: %[[VAL_7:.*]] = getelementptr { i32 }, ptr %[[VAL_5]], i32 0, i32 0 +// CHECK: br label %omp.private.copy +// CHECK: omp.private.copy: +// CHECK: %[[VAL_10:.*]] = load i32, ptr %[[VAL_1]], align 4 +// CHECK: store i32 %[[VAL_10]], ptr %[[VAL_7]], align 4 +// CHECK: br label %omp.task.start +// CHECK: omp.task.start: +// CHECK: br label %codeRepl +// CHECK: codeRepl: +// CHECK: %[[GEP_OMP_TASK_CONTEXT_PTR:.*]] = getelementptr { ptr }, ptr %[[STRUCT_ARG]], i32 0, i32 0 +// CHECK: store ptr %[[VAL_5]], ptr %[[GEP_OMP_TASK_CONTEXT_PTR]], align 8 +// CHECK: %[[VAL_14:.*]] = call i32 @__kmpc_global_thread_num(ptr @1) +// CHECK: %[[VAL_15:.*]] = call ptr @__kmpc_omp_task_alloc(ptr @1, i32 %[[VAL_14]], i32 1, i64 40, i64 8, ptr @task_privatization_test..omp_par) +// CHECK: %[[ALLOCATED_TASK_STRUCT:.*]] = load ptr, ptr %[[VAL_15]], align 8 +// CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 1 %[[ALLOCATED_TASK_STRUCT]], ptr align 1 %[[STRUCT_ARG]], i64 8, i1 false) +// CHECK: %[[VAL_16:.*]] = call i32 @__kmpc_omp_task(ptr @1, i32 %[[VAL_14]], ptr %[[VAL_15]]) +// CHECK: br label %[[VAL_17:.*]] +// CHECK: task.exit: +// CHECK: ret void + +// CHECK-LABEL: define internal void @task_privatization_test..omp_par( +// CHECK-SAME: i32 %[[GLOBAL_TID_VAL:.*]], ptr %[[OMP_TASK_CONTEXT_PTR_PTR_PTR_PTR:.*]]) +// CHECK: task.alloca: +// CHECK: %[[OMP_TASK_CONEXT_PTR_PTR_PTR:.*]] = load ptr, ptr %[[OMP_TASK_CONTEXT_PTR_PTR_PTR_PTR]], align 8 +// CHECK: %[[OMP_TASK_CONTEXT_PTR_PTR:.*]] = getelementptr { ptr }, ptr %[[OMP_TASK_CONTEXT_PTR_PTR_PTR:.*]], i32 0, i32 0 +// CHECK: %[[OMP_TASK_CONTEXT_PTR:.*]] = load ptr, ptr %[[OMP_TASK_CONTEXT_PTR_PTR:.*]], align 8 +// CHECK: %[[OMP_PRIVATE_ALLOC:.*]] = alloca i32, align 4 +// CHECK: br label %[[VAL_18:.*]] +// CHECK: task.body: ; preds = %[[VAL_19:.*]] +// CHECK: %[[VAL_20:.*]] = getelementptr { i32 }, ptr %[[OMP_TASK_CONTEXT_PTR]], i32 0, i32 0 +// CHECK: br label %[[VAL_23:.*]] +// CHECK: omp.task.region: ; preds = %[[VAL_18]] +// CHECK: %[[VAL_24:.*]] = load i32, ptr %[[VAL_20]], align 4 +// CHECK: store i32 %[[VAL_24]], ptr %[[OMP_PRIVATE_ALLOC]], align 4 +// CHECK: br label %[[VAL_25:.*]] +// CHECK: omp.region.cont: ; preds = %[[VAL_23]] +// CHECK: tail call void @free(ptr %[[OMP_TASK_CONTEXT_PTR]]) +// CHECK: br label %[[VAL_26:.*]] +// CHECK: task.exit.exitStub: ; preds = %[[VAL_25]] +// CHECK: ret void +