diff --git a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp index 6883d78cd317d..090a5a9836b79 100644 --- a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp +++ b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp @@ -161,6 +161,10 @@ static LogicalResult checkImplementationStatus(Operation &op) { if (op.getDevice()) result = todo("device"); }; + auto checkDistSchedule = [&todo](auto op, LogicalResult &result) { + if (op.getDistScheduleChunkSize()) + result = todo("dist_schedule with chunk_size"); + }; auto checkHasDeviceAddr = [&todo](auto op, LogicalResult &result) { if (!op.getHasDeviceAddrVars().empty()) result = todo("has_device_addr"); @@ -252,6 +256,16 @@ static LogicalResult checkImplementationStatus(Operation &op) { LogicalResult result = success(); llvm::TypeSwitch(op) + .Case([&](omp::DistributeOp op) { + if (op.isComposite() && + isa_and_present(op.getNestedWrapper())) + result = op.emitError() << "not yet implemented: " + "composite omp.distribute + omp.wsloop"; + checkAllocate(op, result); + checkDistSchedule(op, result); + checkOrder(op, result); + checkPrivate(op, result); + }) .Case([&](omp::OrderedRegionOp op) { checkParLevelSimd(op, result); }) .Case([&](omp::SectionsOp op) { checkAllocate(op, result); @@ -3854,6 +3868,72 @@ convertOmpTargetData(Operation *op, llvm::IRBuilderBase &builder, return success(); } +static LogicalResult +convertOmpDistribute(Operation &opInst, llvm::IRBuilderBase &builder, + LLVM::ModuleTranslation &moduleTranslation) { + llvm::OpenMPIRBuilder *ompBuilder = moduleTranslation.getOpenMPBuilder(); + auto distributeOp = cast(opInst); + if (failed(checkImplementationStatus(opInst))) + return failure(); + + using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy; + auto bodyGenCB = [&](InsertPointTy allocaIP, + InsertPointTy codeGenIP) -> llvm::Error { + // Save the alloca insertion point on ModuleTranslation stack for use in + // nested regions. + LLVM::ModuleTranslation::SaveStack frame( + moduleTranslation, allocaIP); + + // DistributeOp has only one region associated with it. + builder.restoreIP(codeGenIP); + + llvm::OpenMPIRBuilder *ompBuilder = moduleTranslation.getOpenMPBuilder(); + llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder); + llvm::Expected regionBlock = + convertOmpOpRegions(distributeOp.getRegion(), "omp.distribute.region", + builder, moduleTranslation); + if (!regionBlock) + return regionBlock.takeError(); + builder.SetInsertPoint(*regionBlock, (*regionBlock)->begin()); + + // TODO: Add support for clauses which are valid for DISTRIBUTE constructs. + // Static schedule is the default. + auto schedule = omp::ClauseScheduleKind::Static; + bool isOrdered = false; + std::optional scheduleMod; + bool isSimd = false; + llvm::omp::WorksharingLoopType workshareLoopType = + llvm::omp::WorksharingLoopType::DistributeStaticLoop; + bool loopNeedsBarrier = false; + llvm::Value *chunk = nullptr; + + llvm::CanonicalLoopInfo *loopInfo = findCurrentLoopInfo(moduleTranslation); + llvm::OpenMPIRBuilder::InsertPointOrErrorTy wsloopIP = + ompBuilder->applyWorkshareLoop( + ompLoc.DL, loopInfo, allocaIP, loopNeedsBarrier, + convertToScheduleKind(schedule), chunk, isSimd, + scheduleMod == omp::ScheduleModifier::monotonic, + scheduleMod == omp::ScheduleModifier::nonmonotonic, isOrdered, + workshareLoopType); + + if (!wsloopIP) + return wsloopIP.takeError(); + return llvm::Error::success(); + }; + + llvm::OpenMPIRBuilder::InsertPointTy allocaIP = + findAllocaInsertPoint(builder, moduleTranslation); + llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder); + llvm::OpenMPIRBuilder::InsertPointOrErrorTy afterIP = + ompBuilder->createDistribute(ompLoc, allocaIP, bodyGenCB); + + if (failed(handleError(afterIP, opInst))) + return failure(); + + builder.restoreIP(*afterIP); + return success(); +} + /// Lowers the FlagsAttr which is applied to the module on the device /// pass when offloading, this attribute contains OpenMP RTL globals that can /// be passed as flags to the frontend, otherwise they are set to default @@ -4813,6 +4893,9 @@ convertHostOrTargetOperation(Operation *op, llvm::IRBuilderBase &builder, .Case([&](omp::TargetOp) { return convertOmpTarget(*op, builder, moduleTranslation); }) + .Case([&](omp::DistributeOp) { + return convertOmpDistribute(*op, builder, moduleTranslation); + }) .Case([&](omp::LoopNestOp) { return convertOmpLoopNest(*op, builder, moduleTranslation); }) diff --git a/mlir/test/Target/LLVMIR/openmp-llvm.mlir b/mlir/test/Target/LLVMIR/openmp-llvm.mlir index cf18c07dd605b..a5a490e527d79 100644 --- a/mlir/test/Target/LLVMIR/openmp-llvm.mlir +++ b/mlir/test/Target/LLVMIR/openmp-llvm.mlir @@ -3270,3 +3270,40 @@ llvm.func @omp_task_if(%boolexpr: i1) { // ----- module attributes {omp.requires = #omp} {} + +// ----- + +llvm.func @distribute() { + %0 = llvm.mlir.constant(42 : index) : i64 + %1 = llvm.mlir.constant(10 : index) : i64 + %2 = llvm.mlir.constant(1 : index) : i64 + omp.distribute { + omp.loop_nest (%arg1) : i64 = (%1) to (%0) step (%2) { + omp.yield + } + } + llvm.return +} + +// CHECK-LABEL: define void @distribute +// CHECK: call void @[[OUTLINED:.*]]({{.*}}) +// CHECK-NEXT: br label %[[EXIT:.*]] +// CHECK: [[EXIT]]: +// CHECK: ret void + +// CHECK: define internal void @[[OUTLINED]]({{.*}}) +// CHECK: %[[LASTITER:.*]] = alloca i32 +// CHECK: %[[LB:.*]] = alloca i64 +// CHECK: %[[UB:.*]] = alloca i64 +// CHECK: %[[STRIDE:.*]] = alloca i64 +// CHECK: br label %[[BODY:.*]] +// CHECK: [[BODY]]: +// CHECK-NEXT: br label %[[REGION:.*]] +// CHECK: [[REGION]]: +// CHECK-NEXT: br label %[[PREHEADER:.*]] +// CHECK: [[PREHEADER]]: +// CHECK: store i64 0, ptr %[[LB]] +// CHECK: store i64 31, ptr %[[UB]] +// CHECK: store i64 1, ptr %[[STRIDE]] +// CHECK: %[[TID:.*]] = call i32 @__kmpc_global_thread_num({{.*}}) +// CHECK: call void @__kmpc_for_static_init_{{.*}}(ptr @{{.*}}, i32 %[[TID]], i32 92, ptr %[[LASTITER]], ptr %[[LB]], ptr %[[UB]], ptr %[[STRIDE]], i64 1, i64 0) diff --git a/mlir/test/Target/LLVMIR/openmp-todo.mlir b/mlir/test/Target/LLVMIR/openmp-todo.mlir index e97b5e54e6415..71dbc061c3104 100644 --- a/mlir/test/Target/LLVMIR/openmp-todo.mlir +++ b/mlir/test/Target/LLVMIR/openmp-todo.mlir @@ -66,10 +66,70 @@ llvm.func @do_simd(%lb : i32, %ub : i32, %step : i32) { // ----- -llvm.func @distribute(%lb : i32, %ub : i32, %step : i32) { - // expected-error@below {{not yet implemented: omp.distribute}} +llvm.func @distribute_wsloop(%lb : i32, %ub : i32, %step : i32) { + // expected-error@below {{LLVM Translation failed for operation: omp.parallel}} + omp.parallel { + // expected-error@below {{not yet implemented: composite omp.distribute + omp.wsloop}} + // expected-error@below {{LLVM Translation failed for operation: omp.distribute}} + omp.distribute { + omp.wsloop { + omp.loop_nest (%iv) : i32 = (%lb) to (%ub) step (%step) { + omp.yield + } + } {omp.composite} + } {omp.composite} + omp.terminator + } {omp.composite} + llvm.return +} + +// ----- + +llvm.func @distribute_allocate(%lb : i32, %ub : i32, %step : i32, %x : !llvm.ptr) { + // expected-error@below {{not yet implemented: Unhandled clause allocate in omp.distribute operation}} + // expected-error@below {{LLVM Translation failed for operation: omp.distribute}} + omp.distribute allocate(%x : !llvm.ptr -> %x : !llvm.ptr) { + omp.loop_nest (%iv) : i32 = (%lb) to (%ub) step (%step) { + omp.yield + } + } + llvm.return +} + +// ----- + +llvm.func @distribute_dist_schedule(%lb : i32, %ub : i32, %step : i32, %x : i32) { + // expected-error@below {{not yet implemented: Unhandled clause dist_schedule with chunk_size in omp.distribute operation}} + // expected-error@below {{LLVM Translation failed for operation: omp.distribute}} + omp.distribute dist_schedule_static dist_schedule_chunk_size(%x : i32) { + omp.loop_nest (%iv) : i32 = (%lb) to (%ub) step (%step) { + omp.yield + } + } + llvm.return +} + +// ----- + +llvm.func @distribute_order(%lb : i32, %ub : i32, %step : i32) { + // expected-error@below {{not yet implemented: Unhandled clause order in omp.distribute operation}} + // expected-error@below {{LLVM Translation failed for operation: omp.distribute}} + omp.distribute order(concurrent) { + omp.loop_nest (%iv) : i32 = (%lb) to (%ub) step (%step) { + omp.yield + } + } + llvm.return +} + +// ----- + +omp.private {type = private} @x.privatizer : !llvm.ptr + +llvm.func @distribute_private(%lb : i32, %ub : i32, %step : i32, %x : !llvm.ptr) { + // expected-error@below {{not yet implemented: Unhandled clause privatization in omp.distribute operation}} // expected-error@below {{LLVM Translation failed for operation: omp.distribute}} - omp.distribute { + omp.distribute private(@x.privatizer %x -> %arg0 : !llvm.ptr) { omp.loop_nest (%iv) : i32 = (%lb) to (%ub) step (%step) { omp.yield }