From 7a86041636c7c11bc5b1d3ff907b54495a1c3aab Mon Sep 17 00:00:00 2001 From: Thirumalai-Shaktivel Date: Mon, 27 Jan 2025 09:25:21 +0000 Subject: [PATCH 1/2] [Flang] Add lowering support for depobj in depend clause From Documentation: depobj: The task dependences are derived from the depend clause specified in the depobj constructs that initialized dependences represented by the depend objects specified in the depend clause as if the depend clauses of the depobj constructs were specified in the current construct. Implementation details: - The variable is of type omp_depend_kind and is used as a locator_list. - Access the base address of obj and compute the clause size, based on the obj value and other clauses count. - Allocate struct.kmp_dep_info with the size computed before. - Now, populate all the depend clauses information into the alloca. the other clauses info is added first in the index, 0, 1, 2, ... and then all the depobj clauses info. - Then, the alloca and size is passed as argument to __kmpc_omp_task_with_deps runtime. - `Stacksave` and `Stackrestore` is used to restore the stack pointer to the state before the depobj operations. Basically removing all the alloca's used. TODO: Requires depobj construct support for checking runtime results. Also, test debobj modify and destroy clauses --- flang/lib/Lower/OpenMP/ClauseProcessor.cpp | 7 +- .../OpenMP/Todo/depend-clause-depobj.f90 | 10 --- flang/test/Lower/OpenMP/task.f90 | 8 +- .../llvm/Frontend/OpenMP/OMPIRBuilder.h | 6 +- llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp | 80 ++++++++++++++++--- .../mlir/Dialect/OpenMP/OpenMPEnums.td | 10 ++- .../OpenMP/OpenMPToLLVMIRTranslation.cpp | 7 +- mlir/test/Target/LLVMIR/openmp-llvm.mlir | 67 ++++++++++++++-- 8 files changed, 158 insertions(+), 37 deletions(-) delete mode 100644 flang/test/Lower/OpenMP/Todo/depend-clause-depobj.f90 diff --git a/flang/lib/Lower/OpenMP/ClauseProcessor.cpp b/flang/lib/Lower/OpenMP/ClauseProcessor.cpp index 299d9d438f115..3378ea2fc2b41 100644 --- a/flang/lib/Lower/OpenMP/ClauseProcessor.cpp +++ b/flang/lib/Lower/OpenMP/ClauseProcessor.cpp @@ -139,7 +139,6 @@ static mlir::omp::ClauseTaskDependAttr genDependKindAttr(lower::AbstractConverter &converter, const omp::clause::DependenceType kind) { fir::FirOpBuilder &firOpBuilder = converter.getFirOpBuilder(); - mlir::Location currentLocation = converter.getCurrentLocation(); mlir::omp::ClauseTaskDepend pbKind; switch (kind) { @@ -152,15 +151,15 @@ genDependKindAttr(lower::AbstractConverter &converter, case omp::clause::DependenceType::Inout: pbKind = mlir::omp::ClauseTaskDepend::taskdependinout; break; + case omp::clause::DependenceType::Depobj: + pbKind = mlir::omp::ClauseTaskDepend::taskdependdepobj; + break; case omp::clause::DependenceType::Mutexinoutset: pbKind = mlir::omp::ClauseTaskDepend::taskdependmutexinoutset; break; case omp::clause::DependenceType::Inoutset: pbKind = mlir::omp::ClauseTaskDepend::taskdependinoutset; break; - case omp::clause::DependenceType::Depobj: - TODO(currentLocation, "DEPOBJ dependence-type"); - break; case omp::clause::DependenceType::Sink: case omp::clause::DependenceType::Source: llvm_unreachable("unhandled parser task dependence type"); diff --git a/flang/test/Lower/OpenMP/Todo/depend-clause-depobj.f90 b/flang/test/Lower/OpenMP/Todo/depend-clause-depobj.f90 deleted file mode 100644 index 4e98d77d0bb3e..0000000000000 --- a/flang/test/Lower/OpenMP/Todo/depend-clause-depobj.f90 +++ /dev/null @@ -1,10 +0,0 @@ -!RUN: %not_todo_cmd bbc -emit-hlfir -fopenmp -fopenmp-version=52 -o - %s 2>&1 | FileCheck %s -!RUN: %not_todo_cmd %flang_fc1 -emit-hlfir -fopenmp -fopenmp-version=52 -o - %s 2>&1 | FileCheck %s - -!CHECK: not yet implemented: DEPOBJ dependence-type - -subroutine f00(x) - integer :: x - !$omp task depend(depobj: x) - !$omp end task -end diff --git a/flang/test/Lower/OpenMP/task.f90 b/flang/test/Lower/OpenMP/task.f90 index 13ebf2acd9101..28d1b36a162a7 100644 --- a/flang/test/Lower/OpenMP/task.f90 +++ b/flang/test/Lower/OpenMP/task.f90 @@ -150,12 +150,18 @@ subroutine task_depend_multi_task() x = x - 12 !CHECK: omp.terminator !$omp end task - !CHECK: omp.task depend(taskdependinoutset -> %{{.+}} : !fir.ref) + !CHECK: omp.task depend(taskdependinoutset -> %{{.+}} : !fir.ref) !$omp task depend(inoutset : x) !CHECK: arith.subi x = x - 12 !CHECK: omp.terminator !$omp end task + !CHECK: omp.task depend(taskdependdepobj -> %{{.+}} : !fir.ref) + !$omp task depend(depobj: obj) + ! CHECK: arith.addi + x = x + 73 + ! CHECK: omp.terminator + !$omp end task end subroutine task_depend_multi_task !=============================================================================== diff --git a/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h b/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h index 9802cbe8b7b94..2d996e5fe3554 100644 --- a/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h +++ b/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h @@ -1241,10 +1241,12 @@ class OpenMPIRBuilder { omp::RTLDependenceKindTy DepKind = omp::RTLDependenceKindTy::DepUnknown; Type *DepValueType; Value *DepVal; + bool isTypeDepObj; explicit DependData() = default; DependData(omp::RTLDependenceKindTy DepKind, Type *DepValueType, - Value *DepVal) - : DepKind(DepKind), DepValueType(DepValueType), DepVal(DepVal) {} + Value *DepVal, bool isTypeDepObj = false) + : DepKind(DepKind), DepValueType(DepValueType), DepVal(DepVal), + isTypeDepObj(isTypeDepObj) {} }; /// Generator for `#omp task` diff --git a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp index 8cc3a99d92023..476c0c80b985a 100644 --- a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp +++ b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp @@ -2049,19 +2049,61 @@ OpenMPIRBuilder::InsertPointOrErrorTy OpenMPIRBuilder::createTask( Builder.CreateStore(Priority, CmplrData); } - Value *DepArray = nullptr; + Value *DepAlloca = nullptr; + Value *stackSave = nullptr; + Value *depSize = Builder.getInt32(Dependencies.size()); if (Dependencies.size()) { InsertPointTy OldIP = Builder.saveIP(); Builder.SetInsertPoint( &OldIP.getBlock()->getParent()->getEntryBlock().back()); Type *DepArrayTy = ArrayType::get(DependInfo, Dependencies.size()); - DepArray = Builder.CreateAlloca(DepArrayTy, nullptr, ".dep.arr.addr"); + + // Used to keep a count of other dependence type apart from DEPOBJ + size_t otherDepTypeCount = 0; + SmallVector objsVal; + // Load all the value of DEPOBJ object from omp_depend_t object + for (const DependData &dep : Dependencies) { + if (dep.isTypeDepObj) { + Value *loadDepVal = Builder.CreateLoad(VoidPtr, dep.DepVal); + Value *depValGEP = + Builder.CreateGEP(DependInfo, loadDepVal, Builder.getInt64(-1)); + Value *obj = + Builder.CreateConstInBoundsGEP2_64(DependInfo, depValGEP, 0, 0); + Value *objVal = Builder.CreateLoad(Builder.getInt64Ty(), obj); + objsVal.push_back(objVal); + } else { + otherDepTypeCount++; + } + } + + // Add all the values and use it as the size for DependInfo alloca + if (objsVal.size() > 0) { + depSize = objsVal[0]; + for (size_t i = 1; i < objsVal.size(); i++) + depSize = Builder.CreateAdd(depSize, objsVal[i]); + if (otherDepTypeCount > 0) + depSize = + Builder.CreateAdd(depSize, Builder.getInt64(otherDepTypeCount)); + } + + if (!isa(depSize)) { + // stackSave to save the stack pointer + if (!stackSave) + stackSave = Builder.CreateStackSave(); + DepAlloca = Builder.CreateAlloca(DependInfo, depSize, "dep.addr"); + ((AllocaInst *)DepAlloca)->setAlignment(Align(16)); + depSize = Builder.CreateTrunc(depSize, Builder.getInt32Ty()); + } else { + DepAlloca = Builder.CreateAlloca(DepArrayTy, nullptr, ".dep.arr.addr"); + } unsigned P = 0; for (const DependData &Dep : Dependencies) { + if (Dep.isTypeDepObj) + continue; Value *Base = - Builder.CreateConstInBoundsGEP2_64(DepArrayTy, DepArray, 0, P); + Builder.CreateGEP(DependInfo, DepAlloca, Builder.getInt64(P)); // Store the pointer to the variable Value *Addr = Builder.CreateStructGEP( DependInfo, Base, @@ -2087,6 +2129,23 @@ OpenMPIRBuilder::InsertPointOrErrorTy OpenMPIRBuilder::createTask( ++P; } + P = 0; + Value *depAllocaIdx = Builder.getInt64(otherDepTypeCount); + for (const DependData &dep : Dependencies) { + if (dep.isTypeDepObj) { + Value *depAllocaPtr = + Builder.CreateGEP(DependInfo, DepAlloca, depAllocaIdx); + Align alignment = Align(8); + Value *loadDepVal = Builder.CreateLoad(VoidPtr, dep.DepVal); + Value *memCpySize = + Builder.CreateMul(Builder.getInt64(24), objsVal[P]); + Builder.CreateMemCpy(depAllocaPtr, alignment, loadDepVal, alignment, + memCpySize); + depAllocaIdx = Builder.CreateAdd(depAllocaIdx, objsVal[P]); + ++P; + } + } + Builder.restoreIP(OldIP); } @@ -2124,7 +2183,7 @@ OpenMPIRBuilder::InsertPointOrErrorTy OpenMPIRBuilder::createTask( getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_omp_wait_deps); Builder.CreateCall( TaskWaitFn, - {Ident, ThreadID, Builder.getInt32(Dependencies.size()), DepArray, + {Ident, ThreadID, Builder.getInt32(Dependencies.size()), DepAlloca, ConstantInt::get(Builder.getInt32Ty(), 0), ConstantPointerNull::get(PointerType::getUnqual(M.getContext()))}); } @@ -2146,12 +2205,13 @@ OpenMPIRBuilder::InsertPointOrErrorTy OpenMPIRBuilder::createTask( if (Dependencies.size()) { Function *TaskFn = getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_omp_task_with_deps); - Builder.CreateCall( - TaskFn, - {Ident, ThreadID, TaskData, Builder.getInt32(Dependencies.size()), - DepArray, ConstantInt::get(Builder.getInt32Ty(), 0), - ConstantPointerNull::get(PointerType::getUnqual(M.getContext()))}); - + Builder.CreateCall(TaskFn, {Ident, ThreadID, TaskData, depSize, DepAlloca, + ConstantInt::get(Builder.getInt32Ty(), 0), + ConstantPointerNull::get( + PointerType::getUnqual(M.getContext()))}); + // stackSave is used by depend(depobj: x) clause to save the stack pointer + if (stackSave) + Builder.CreateStackRestore(stackSave); } else { // Emit the @__kmpc_omp_task runtime call to spawn the task Function *TaskFn = getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_omp_task); diff --git a/mlir/include/mlir/Dialect/OpenMP/OpenMPEnums.td b/mlir/include/mlir/Dialect/OpenMP/OpenMPEnums.td index 690e3df1f685e..bbe1174775184 100644 --- a/mlir/include/mlir/Dialect/OpenMP/OpenMPEnums.td +++ b/mlir/include/mlir/Dialect/OpenMP/OpenMPEnums.td @@ -111,12 +111,14 @@ def ClauseTaskDependInOut : I32EnumAttrCase<"taskdependinout", 2>; def ClauseTaskDependMutexInOutSet : I32EnumAttrCase<"taskdependmutexinoutset", 3>; def ClauseTaskDependInOutSet : I32EnumAttrCase<"taskdependinoutset", 4>; +def ClauseTaskDependDepObj : I32EnumAttrCase<"taskdependdepobj", 5>; def ClauseTaskDepend - : OpenMP_I32EnumAttr< - "ClauseTaskDepend", "depend clause in a target or task construct", - [ClauseTaskDependIn, ClauseTaskDependOut, ClauseTaskDependInOut, - ClauseTaskDependMutexInOutSet, ClauseTaskDependInOutSet]>; + : OpenMP_I32EnumAttr<"ClauseTaskDepend", + "depend clause in a target or task construct", + [ClauseTaskDependIn, ClauseTaskDependOut, + ClauseTaskDependInOut, ClauseTaskDependMutexInOutSet, + ClauseTaskDependInOutSet, ClauseTaskDependDepObj]>; def ClauseTaskDependAttr : OpenMP_EnumAttr { diff --git a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp index 3fcdefa8a2f67..de4bd108fff67 100644 --- a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp +++ b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp @@ -1715,6 +1715,7 @@ buildDependData(std::optional dependKinds, OperandRange dependVars, return; for (auto dep : llvm::zip(dependVars, dependKinds->getValue())) { llvm::omp::RTLDependenceKindTy type; + bool isTypeDepObj = false; switch ( cast(std::get<1>(dep)).getValue()) { case mlir::omp::ClauseTaskDepend::taskdependin: @@ -1733,9 +1734,13 @@ buildDependData(std::optional dependKinds, OperandRange dependVars, case mlir::omp::ClauseTaskDepend::taskdependinoutset: type = llvm::omp::RTLDependenceKindTy::DepInOutSet; break; + case mlir::omp::ClauseTaskDepend::taskdependdepobj: + isTypeDepObj = true; + break; }; llvm::Value *depVal = moduleTranslation.lookupValue(std::get<0>(dep)); - llvm::OpenMPIRBuilder::DependData dd(type, depVal->getType(), depVal); + llvm::OpenMPIRBuilder::DependData dd(type, depVal->getType(), depVal, + isTypeDepObj); dds.emplace_back(dd); } } diff --git a/mlir/test/Target/LLVMIR/openmp-llvm.mlir b/mlir/test/Target/LLVMIR/openmp-llvm.mlir index 9868ef227d49e..b8ae3d0bec2c8 100644 --- a/mlir/test/Target/LLVMIR/openmp-llvm.mlir +++ b/mlir/test/Target/LLVMIR/openmp-llvm.mlir @@ -2653,7 +2653,7 @@ llvm.func @omp_task_attrs() -> () attributes { // CHECK-LABEL: define void @omp_task_with_deps // CHECK-SAME: (ptr %[[zaddr:.+]]) // CHECK: %[[dep_arr_addr:.+]] = alloca [1 x %struct.kmp_dep_info], align 8 -// CHECK: %[[dep_arr_addr_0:.+]] = getelementptr inbounds [1 x %struct.kmp_dep_info], ptr %[[dep_arr_addr]], i64 0, i64 0 +// CHECK: %[[dep_arr_addr_0:.+]] = getelementptr %struct.kmp_dep_info, ptr %[[dep_arr_addr]], i64 0 // CHECK: %[[dep_arr_addr_0_val:.+]] = getelementptr inbounds nuw %struct.kmp_dep_info, ptr %[[dep_arr_addr_0]], i32 0, i32 0 // CHECK: %[[dep_arr_addr_0_val_int:.+]] = ptrtoint ptr %0 to i64 // CHECK: store i64 %[[dep_arr_addr_0_val_int]], ptr %[[dep_arr_addr_0_val]], align 4 @@ -2664,28 +2664,28 @@ llvm.func @omp_task_attrs() -> () attributes { // ----- // dependence_type: Out // CHECK: %[[DEP_ARR_ADDR1:.+]] = alloca [1 x %struct.kmp_dep_info], align 8 -// CHECK: %[[DEP_ARR_ADDR_1:.+]] = getelementptr inbounds [1 x %struct.kmp_dep_info], ptr %[[DEP_ARR_ADDR1]], i64 0, i64 0 +// CHECK: %[[DEP_ARR_ADDR_1:.+]] = getelementptr %struct.kmp_dep_info, ptr %[[DEP_ARR_ADDR1]], i64 0 // [...] // CHECK: %[[DEP_TYPE_1:.+]] = getelementptr inbounds nuw %struct.kmp_dep_info, ptr %[[DEP_ARR_ADDR_1]], i32 0, i32 2 // CHECK: store i8 3, ptr %[[DEP_TYPE_1]], align 1 // ----- // dependence_type: Inout // CHECK: %[[DEP_ARR_ADDR2:.+]] = alloca [1 x %struct.kmp_dep_info], align 8 -// CHECK: %[[DEP_ARR_ADDR_2:.+]] = getelementptr inbounds [1 x %struct.kmp_dep_info], ptr %[[DEP_ARR_ADDR2]], i64 0, i64 0 +// CHECK: %[[DEP_ARR_ADDR_2:.+]] = getelementptr %struct.kmp_dep_info, ptr %[[DEP_ARR_ADDR2]], i64 0 // [...] // CHECK: %[[DEP_TYPE_2:.+]] = getelementptr inbounds nuw %struct.kmp_dep_info, ptr %[[DEP_ARR_ADDR_2]], i32 0, i32 2 // CHECK: store i8 3, ptr %[[DEP_TYPE_2]], align 1 // ----- // dependence_type: Mutexinoutset // CHECK: %[[DEP_ARR_ADDR3:.+]] = alloca [1 x %struct.kmp_dep_info], align 8 -// CHECK: %[[DEP_ARR_ADDR_3:.+]] = getelementptr inbounds [1 x %struct.kmp_dep_info], ptr %[[DEP_ARR_ADDR3]], i64 0, i64 0 +// CHECK: %[[DEP_ARR_ADDR_3:.+]] = getelementptr %struct.kmp_dep_info, ptr %[[DEP_ARR_ADDR3]], i64 0 // [...] // CHECK: %[[DEP_TYPE_3:.+]] = getelementptr inbounds nuw %struct.kmp_dep_info, ptr %[[DEP_ARR_ADDR_3]], i32 0, i32 2 // CHECK: store i8 4, ptr %[[DEP_TYPE_3]], align 1 // ----- // dependence_type: Inoutset // CHECK: %[[DEP_ARR_ADDR4:.+]] = alloca [1 x %struct.kmp_dep_info], align 8 -// CHECK: %[[DEP_ARR_ADDR_4:.+]] = getelementptr inbounds [1 x %struct.kmp_dep_info], ptr %[[DEP_ARR_ADDR4]], i64 0, i64 0 +// CHECK: %[[DEP_ARR_ADDR_4:.+]] = getelementptr %struct.kmp_dep_info, ptr %[[DEP_ARR_ADDR4]], i64 0 // [...] // CHECK: %[[DEP_TYPE_4:.+]] = getelementptr inbounds nuw %struct.kmp_dep_info, ptr %[[DEP_ARR_ADDR_4]], i32 0, i32 2 // CHECK: store i8 8, ptr %[[DEP_TYPE_4]], align 1 @@ -2734,6 +2734,63 @@ llvm.func @omp_task_with_deps(%zaddr: !llvm.ptr) { // ----- +// CHECK-LABEL: define void @omp_task_with_deps_02(ptr %0, ptr %1) { + +// CHECK: %[[obj:.+]] = alloca i64, i64 1, align 8 +// CHECK: %[[obj_load_01:.+]] = load ptr, ptr %[[obj]], align 8 +// CHECK: %[[gep_01:.+]] = getelementptr %struct.kmp_dep_info, ptr %[[obj_load_01]], i64 -1 +// CHECK: %[[gep_02:.+]] = getelementptr inbounds %struct.kmp_dep_info, ptr %[[gep_01]], i64 0, i64 0 +// CHECK: %[[obj_addr:.+]] = load i64, ptr %[[gep_02]], align 4 + +// CHECK: %[[size:.+]] = add i64 %[[obj_addr]], 2 + +// CHECK: %[[stack_ptr:.+]] = call ptr @llvm.stacksave.p0() +// CHECK: %[[dep_addr:.+]] = alloca %struct.kmp_dep_info, i64 %[[size]], align 16 +// CHECK: %[[dep_size:.+]] = trunc i64 %[[size]] to i32 + +// CHECK: %[[gep_03:.+]] = getelementptr %struct.kmp_dep_info, ptr %[[dep_addr]], i64 0 +// CHECK: %[[gep_04:.+]] = getelementptr inbounds nuw %struct.kmp_dep_info, ptr %[[gep_03]], i32 0, i32 0 +// CHECK: %[[arg_01_int:.+]] = ptrtoint ptr %0 to i64 +// CHECK: store i64 %[[arg_01_int]], ptr %[[gep_04]], align 4 +// CHECK: %[[gep_05:.+]] = getelementptr inbounds nuw %struct.kmp_dep_info, ptr %[[gep_03]], i32 0, i32 1 +// CHECK: store i64 8, ptr %[[gep_05]], align 4 +// CHECK: %[[gep_06:.+]] = getelementptr inbounds nuw %struct.kmp_dep_info, ptr %[[gep_03]], i32 0, i32 2 +// CHECK: store i8 1, ptr %[[gep_06]], align 1 + +// CHECK: %[[gep_07:.+]] = getelementptr %struct.kmp_dep_info, ptr %[[dep_addr]], i64 1 +// CHECK: %[[gep_08:.+]] = getelementptr inbounds nuw %struct.kmp_dep_info, ptr %[[gep_07]], i32 0, i32 0 +// CHECK: %[[arg_02_int:.+]] = ptrtoint ptr %1 to i64 +// CHECK: store i64 %[[arg_02_int]], ptr %[[gep_08]], align 4 +// CHECK: %[[gep_09:.+]] = getelementptr inbounds nuw %struct.kmp_dep_info, ptr %[[gep_07]], i32 0, i32 1 +// CHECK: store i64 8, ptr %[[gep_09]], align 4 +// CHECK: %[[gep_10:.+]] = getelementptr inbounds nuw %struct.kmp_dep_info, ptr %[[gep_07]], i32 0, i32 2 +// CHECK: store i8 3, ptr %[[gep_10]], align 1 + +// CHECK: %[[gep_11:.+]] = getelementptr %struct.kmp_dep_info, ptr %[[dep_addr]], i64 2 +// CHECK: %[[obj_load_02:.+]] = load ptr, ptr %[[obj]], align 8 +// CHECK: %[[obj_size:.+]] = mul i64 24, %[[obj_addr]] +// CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 8 %[[gep_11]], ptr align 8 %[[obj_load_02]], i64 %[[obj_size]], i1 false) +// CHECK: %[[dep_size_idx:.+]] = add i64 2, %[[obj_addr]] + +// CHECK: %[[task:.+]] = call i32 @__kmpc_omp_task_with_deps({{.*}}, i32 %[[dep_size]], ptr %[[dep_addr]], i32 0, ptr null) +// CHECK: call void @llvm.stackrestore.p0(ptr %[[stack_ptr]]) +// CHECK: } + + +llvm.func @omp_task_with_deps_02(%arg0: !llvm.ptr, %arg1: !llvm.ptr) { + %c_1 = llvm.mlir.constant(1 : i64) : i64 + %1 = llvm.alloca %c_1 x i64 : (i64) -> !llvm.ptr + omp.task depend(taskdependin -> %arg0 : !llvm.ptr, taskdependdepobj -> %1 : !llvm.ptr, taskdependout -> %arg1 : !llvm.ptr) { + %4 = llvm.load %arg0 : !llvm.ptr -> i64 + %5 = llvm.add %4, %c_1 : i64 + llvm.store %5, %arg1 : i64, !llvm.ptr + omp.terminator + } + llvm.return +} + +// ----- + // CHECK-LABEL: define void @omp_task // CHECK-SAME: (i32 %[[x:.+]], i32 %[[y:.+]], ptr %[[zaddr:.+]]) module attributes {llvm.target_triple = "x86_64-unknown-linux-gnu"} { From 376b1e4562a888b8ad05922b5c37ef5c704b1f2a Mon Sep 17 00:00:00 2001 From: Thirumalai-Shaktivel Date: Mon, 3 Mar 2025 09:20:32 +0000 Subject: [PATCH 2/2] Remove Align 16 --- llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp | 1 - mlir/test/Target/LLVMIR/openmp-llvm.mlir | 12 ++++++------ 2 files changed, 6 insertions(+), 7 deletions(-) diff --git a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp index 476c0c80b985a..0ef6e4b99b36a 100644 --- a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp +++ b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp @@ -2092,7 +2092,6 @@ OpenMPIRBuilder::InsertPointOrErrorTy OpenMPIRBuilder::createTask( if (!stackSave) stackSave = Builder.CreateStackSave(); DepAlloca = Builder.CreateAlloca(DependInfo, depSize, "dep.addr"); - ((AllocaInst *)DepAlloca)->setAlignment(Align(16)); depSize = Builder.CreateTrunc(depSize, Builder.getInt32Ty()); } else { DepAlloca = Builder.CreateAlloca(DepArrayTy, nullptr, ".dep.arr.addr"); diff --git a/mlir/test/Target/LLVMIR/openmp-llvm.mlir b/mlir/test/Target/LLVMIR/openmp-llvm.mlir index b8ae3d0bec2c8..777deed5d0f7f 100644 --- a/mlir/test/Target/LLVMIR/openmp-llvm.mlir +++ b/mlir/test/Target/LLVMIR/openmp-llvm.mlir @@ -1554,7 +1554,7 @@ llvm.func @_QPomp_atomic_update_complex() { //CHECK: %[[VAL_8:.*]] = fadd contract float %[[VAL_6]], 1.000000e+00 //CHECK: %[[VAL_9:.*]] = insertvalue { float, float } undef, float %[[VAL_7]], 0 //CHECK: %[[VAL_10:.*]] = insertvalue { float, float } %[[VAL_9]], float %[[VAL_8]], 1 -//CHECK: store { float, float } %[[VAL_10]], ptr %[[X_NEW_VAL]], align 4 +//CHECK: store { float, float } %[[VAL_10]], ptr %[[X_NEW_VAL]], align 4 //CHECK: %[[VAL_11:.*]] = call i1 @__atomic_compare_exchange(i64 8, ptr %[[ORIG_VAL]], ptr %[[ATOMIC_TEMP_LOAD]], ptr %[[X_NEW_VAL]], i32 2, i32 2) //CHECK: %[[VAL_12:.*]] = load { float, float }, ptr %[[ATOMIC_TEMP_LOAD]], align 4 //CHECK: br i1 %[[VAL_11]], label %.atomic.exit, label %.atomic.cont @@ -2745,7 +2745,7 @@ llvm.func @omp_task_with_deps(%zaddr: !llvm.ptr) { // CHECK: %[[size:.+]] = add i64 %[[obj_addr]], 2 // CHECK: %[[stack_ptr:.+]] = call ptr @llvm.stacksave.p0() -// CHECK: %[[dep_addr:.+]] = alloca %struct.kmp_dep_info, i64 %[[size]], align 16 +// CHECK: %[[dep_addr:.+]] = alloca %struct.kmp_dep_info, i64 %[[size]], align 8 // CHECK: %[[dep_size:.+]] = trunc i64 %[[size]] to i32 // CHECK: %[[gep_03:.+]] = getelementptr %struct.kmp_dep_info, ptr %[[dep_addr]], i64 0 @@ -3069,8 +3069,8 @@ llvm.func @omp_opaque_pointers(%arg0 : !llvm.ptr, %arg1: !llvm.ptr, %expr: i32) // CHECK: @__omp_rtl_assume_threads_oversubscription = weak_odr hidden constant i32 1 // CHECK: @__omp_rtl_assume_no_thread_state = weak_odr hidden constant i32 1 // CHECK: @__omp_rtl_assume_no_nested_parallelism = weak_odr hidden constant i32 1 -module attributes {omp.flags = #omp.flags} {} // ----- @@ -3115,8 +3115,8 @@ module attributes {omp.version = #omp.version} {} // CHECK: @__omp_rtl_assume_threads_oversubscription = weak_odr hidden constant i32 0 // CHECK: @__omp_rtl_assume_no_thread_state = weak_odr hidden constant i32 0 // CHECK: @__omp_rtl_assume_no_nested_parallelism = weak_odr hidden constant i32 0 -module attributes {omp.flags = #omp.flags} {} // -----