diff --git a/flang/lib/Lower/OpenMP/OpenMP.cpp b/flang/lib/Lower/OpenMP/OpenMP.cpp index 82673f0948a5b..3a8c7dcb0690a 100644 --- a/flang/lib/Lower/OpenMP/OpenMP.cpp +++ b/flang/lib/Lower/OpenMP/OpenMP.cpp @@ -2128,6 +2128,161 @@ genLoopOp(lower::AbstractConverter &converter, lower::SymMap &symTable, return loopOp; } +static mlir::omp::CanonicalLoopOp +genCanonicalLoopOp(lower::AbstractConverter &converter, lower::SymMap &symTable, + semantics::SemanticsContext &semaCtx, + lower::pft::Evaluation &eval, mlir::Location loc, + const ConstructQueue &queue, + ConstructQueue::const_iterator item, + llvm::ArrayRef ivs, + llvm::omp::Directive directive, DataSharingProcessor &dsp) { + fir::FirOpBuilder &firOpBuilder = converter.getFirOpBuilder(); + + assert(ivs.size() == 1 && "Nested loops not yet implemented"); + const semantics::Symbol *iv = ivs[0]; + + auto &nestedEval = eval.getFirstNestedEvaluation(); + if (nestedEval.getIf()->IsDoConcurrent()) { + TODO(loc, "Do Concurrent in unroll construct"); + } + + // Get the loop bounds (and increment) + auto &doLoopEval = nestedEval.getFirstNestedEvaluation(); + auto *doStmt = doLoopEval.getIf(); + assert(doStmt && "Expected do loop to be in the nested evaluation"); + auto &loopControl = std::get>(doStmt->t); + assert(loopControl.has_value()); + auto *bounds = std::get_if(&loopControl->u); + assert(bounds && "Expected bounds for canonical loop"); + lower::StatementContext stmtCtx; + mlir::Value loopLBVar = fir::getBase( + converter.genExprValue(*semantics::GetExpr(bounds->lower), stmtCtx)); + mlir::Value loopUBVar = fir::getBase( + converter.genExprValue(*semantics::GetExpr(bounds->upper), stmtCtx)); + mlir::Value loopStepVar = [&]() { + if (bounds->step) { + return fir::getBase( + converter.genExprValue(*semantics::GetExpr(bounds->step), stmtCtx)); + } else { + // If `step` is not present, assume it is `1`. + return firOpBuilder.createIntegerConstant(loc, firOpBuilder.getI32Type(), + 1); + } + }(); + + // Get the integer kind for the loop variable and cast the loop bounds + size_t loopVarTypeSize = bounds->name.thing.symbol->GetUltimate().size(); + mlir::Type loopVarType = getLoopVarType(converter, loopVarTypeSize); + loopLBVar = firOpBuilder.createConvert(loc, loopVarType, loopLBVar); + loopUBVar = firOpBuilder.createConvert(loc, loopVarType, loopUBVar); + loopStepVar = firOpBuilder.createConvert(loc, loopVarType, loopStepVar); + + // Start lowering + mlir::Value zero = firOpBuilder.createIntegerConstant(loc, loopVarType, 0); + mlir::Value one = firOpBuilder.createIntegerConstant(loc, loopVarType, 1); + mlir::Value isDownwards = firOpBuilder.create( + loc, mlir::arith::CmpIPredicate::slt, loopStepVar, zero); + + // Ensure we are counting upwards. If not, negate step and swap lb and ub. + mlir::Value negStep = + firOpBuilder.create(loc, zero, loopStepVar); + mlir::Value incr = firOpBuilder.create( + loc, isDownwards, negStep, loopStepVar); + mlir::Value lb = firOpBuilder.create( + loc, isDownwards, loopUBVar, loopLBVar); + mlir::Value ub = firOpBuilder.create( + loc, isDownwards, loopLBVar, loopUBVar); + + // Compute the trip count assuming lb <= ub. This guarantees that the result + // is non-negative and we can use unsigned arithmetic. + mlir::Value span = firOpBuilder.create( + loc, ub, lb, ::mlir::arith::IntegerOverflowFlags::nuw); + mlir::Value tcMinusOne = + firOpBuilder.create(loc, span, incr); + mlir::Value tcIfLooping = firOpBuilder.create( + loc, tcMinusOne, one, ::mlir::arith::IntegerOverflowFlags::nuw); + + // Fall back to 0 if lb > ub + mlir::Value isZeroTC = firOpBuilder.create( + loc, mlir::arith::CmpIPredicate::slt, ub, lb); + mlir::Value tripcount = firOpBuilder.create( + loc, isZeroTC, zero, tcIfLooping); + + // Create the CLI handle. + auto newcli = firOpBuilder.create(loc); + mlir::Value cli = newcli.getResult(); + + auto ivCallback = [&](mlir::Operation *op) + -> llvm::SmallVector { + mlir::Region ®ion = op->getRegion(0); + + // Create the op's region skeleton (BB taking the iv as argument) + firOpBuilder.createBlock(®ion, {}, {loopVarType}, {loc}); + + // Compute the value of the loop variable from the logical iteration number. + mlir::Value natIterNum = fir::getBase(region.front().getArgument(0)); + mlir::Value scaled = + firOpBuilder.create(loc, natIterNum, loopStepVar); + mlir::Value userVal = + firOpBuilder.create(loc, loopLBVar, scaled); + + // The argument is not currently in memory, so make a temporary for the + // argument, and store it there, then bind that location to the argument. + mlir::Operation *storeOp = + createAndSetPrivatizedLoopVar(converter, loc, userVal, iv); + + firOpBuilder.setInsertionPointAfter(storeOp); + return {iv}; + }; + + // Create the omp.canonical_loop operation + auto canonLoop = genOpWithBody( + OpWithBodyGenInfo(converter, symTable, semaCtx, loc, nestedEval, + directive) + .setClauses(&item->clauses) + .setDataSharingProcessor(&dsp) + .setGenRegionEntryCb(ivCallback), + queue, item, tripcount, cli); + + firOpBuilder.setInsertionPointAfter(canonLoop); + return canonLoop; +} + +static void genUnrollOp(Fortran::lower::AbstractConverter &converter, + Fortran::lower::SymMap &symTable, + lower::StatementContext &stmtCtx, + Fortran::semantics::SemanticsContext &semaCtx, + Fortran::lower::pft::Evaluation &eval, + mlir::Location loc, const ConstructQueue &queue, + ConstructQueue::const_iterator item) { + fir::FirOpBuilder &firOpBuilder = converter.getFirOpBuilder(); + + mlir::omp::LoopRelatedClauseOps loopInfo; + llvm::SmallVector iv; + collectLoopRelatedInfo(converter, loc, eval, item->clauses, loopInfo, iv); + + // Clauses for unrolling not yet implemnted + ClauseProcessor cp(converter, semaCtx, item->clauses); + cp.processTODO( + loc, llvm::omp::Directive::OMPD_unroll); + + // Even though unroll does not support data-sharing clauses, but this is + // required to fill the symbol table. + DataSharingProcessor dsp(converter, semaCtx, item->clauses, eval, + /*shouldCollectPreDeterminedSymbols=*/true, + /*useDelayedPrivatization=*/false, symTable); + dsp.processStep1(); + + // Emit the associated loop + auto canonLoop = + genCanonicalLoopOp(converter, symTable, semaCtx, eval, loc, queue, item, + iv, llvm::omp::Directive::OMPD_unroll, dsp); + + // Apply unrolling to it + auto cli = canonLoop.getCli(); + firOpBuilder.create(loc, cli); +} + static mlir::omp::MaskedOp genMaskedOp(lower::AbstractConverter &converter, lower::SymMap &symTable, lower::StatementContext &stmtCtx, @@ -3516,12 +3671,9 @@ static void genOMPDispatch(lower::AbstractConverter &converter, newOp = genTeamsOp(converter, symTable, stmtCtx, semaCtx, eval, loc, queue, item); break; - case llvm::omp::Directive::OMPD_tile: - case llvm::omp::Directive::OMPD_unroll: { - unsigned version = semaCtx.langOptions().OpenMPVersion; - TODO(loc, "Unhandled loop directive (" + - llvm::omp::getOpenMPDirectiveName(dir, version) + ")"); - } + case llvm::omp::Directive::OMPD_unroll: + genUnrollOp(converter, symTable, stmtCtx, semaCtx, eval, loc, queue, item); + break; // case llvm::omp::Directive::OMPD_workdistribute: case llvm::omp::Directive::OMPD_workshare: newOp = genWorkshareOp(converter, symTable, stmtCtx, semaCtx, eval, loc, diff --git a/flang/test/Lower/OpenMP/unroll-heuristic01.f90 b/flang/test/Lower/OpenMP/unroll-heuristic01.f90 new file mode 100644 index 0000000000000..a5f5c003b8a7c --- /dev/null +++ b/flang/test/Lower/OpenMP/unroll-heuristic01.f90 @@ -0,0 +1,39 @@ +! RUN: %flang_fc1 -emit-hlfir -fopenmp -fopenmp-version=51 -o - %s 2>&1 | FileCheck %s + + +subroutine omp_unroll_heuristic01(lb, ub, inc) + integer res, i, lb, ub, inc + + !$omp unroll + do i = lb, ub, inc + res = i + end do + !$omp end unroll + +end subroutine omp_unroll_heuristic01 + + +!CHECK-LABEL: func.func @_QPomp_unroll_heuristic01( +!CHECK: %c0_i32 = arith.constant 0 : i32 +!CHECK-NEXT: %c1_i32 = arith.constant 1 : i32 +!CHECK-NEXT: %13 = arith.cmpi slt, %12, %c0_i32 : i32 +!CHECK-NEXT: %14 = arith.subi %c0_i32, %12 : i32 +!CHECK-NEXT: %15 = arith.select %13, %14, %12 : i32 +!CHECK-NEXT: %16 = arith.select %13, %11, %10 : i32 +!CHECK-NEXT: %17 = arith.select %13, %10, %11 : i32 +!CHECK-NEXT: %18 = arith.subi %17, %16 overflow : i32 +!CHECK-NEXT: %19 = arith.divui %18, %15 : i32 +!CHECK-NEXT: %20 = arith.addi %19, %c1_i32 overflow : i32 +!CHECK-NEXT: %21 = arith.cmpi slt, %17, %16 : i32 +!CHECK-NEXT: %22 = arith.select %21, %c0_i32, %20 : i32 +!CHECK-NEXT: %canonloop_s0 = omp.new_cli +!CHECK-NEXT: omp.canonical_loop(%canonloop_s0) %iv : i32 in range(%22) { +!CHECK-NEXT: %23 = arith.muli %iv, %12 : i32 +!CHECK-NEXT: %24 = arith.addi %10, %23 : i32 +!CHECK-NEXT: hlfir.assign %24 to %9#0 : i32, !fir.ref +!CHECK-NEXT: %25 = fir.load %9#0 : !fir.ref +!CHECK-NEXT: hlfir.assign %25 to %6#0 : i32, !fir.ref +!CHECK-NEXT: omp.terminator +!CHECK-NEXT: } +!CHECK-NEXT: omp.unroll_heuristic(%canonloop_s0) +!CHECK-NEXT: return diff --git a/flang/test/Lower/OpenMP/unroll-heuristic02.f90 b/flang/test/Lower/OpenMP/unroll-heuristic02.f90 new file mode 100644 index 0000000000000..669f185f910c4 --- /dev/null +++ b/flang/test/Lower/OpenMP/unroll-heuristic02.f90 @@ -0,0 +1,70 @@ +! RUN: %flang_fc1 -emit-hlfir -fopenmp -fopenmp-version=51 -o - %s 2>&1 | FileCheck %s + + +subroutine omp_unroll_heuristic_nested02(outer_lb, outer_ub, outer_inc, inner_lb, inner_ub, inner_inc) + integer res, i, j, inner_lb, inner_ub, inner_inc, outer_lb, outer_ub, outer_inc + + !$omp unroll + do i = outer_lb, outer_ub, outer_inc + !$omp unroll + do j = inner_lb, inner_ub, inner_inc + res = i + j + end do + !$omp end unroll + end do + !$omp end unroll + +end subroutine omp_unroll_heuristic_nested02 + + +!CHECK-LABEL: func.func @_QPomp_unroll_heuristic_nested02(%arg0: !fir.ref {fir.bindc_name = "outer_lb"}, %arg1: !fir.ref {fir.bindc_name = "outer_ub"}, %arg2: !fir.ref {fir.bindc_name = "outer_inc"}, %arg3: !fir.ref {fir.bindc_name = "inner_lb"}, %arg4: !fir.ref {fir.bindc_name = "inner_ub"}, %arg5: !fir.ref {fir.bindc_name = "inner_inc"}) { +!CHECK: %c0_i32 = arith.constant 0 : i32 +!CHECK-NEXT: %c1_i32 = arith.constant 1 : i32 +!CHECK-NEXT: %18 = arith.cmpi slt, %17, %c0_i32 : i32 +!CHECK-NEXT: %19 = arith.subi %c0_i32, %17 : i32 +!CHECK-NEXT: %20 = arith.select %18, %19, %17 : i32 +!CHECK-NEXT: %21 = arith.select %18, %16, %15 : i32 +!CHECK-NEXT: %22 = arith.select %18, %15, %16 : i32 +!CHECK-NEXT: %23 = arith.subi %22, %21 overflow : i32 +!CHECK-NEXT: %24 = arith.divui %23, %20 : i32 +!CHECK-NEXT: %25 = arith.addi %24, %c1_i32 overflow : i32 +!CHECK-NEXT: %26 = arith.cmpi slt, %22, %21 : i32 +!CHECK-NEXT: %27 = arith.select %26, %c0_i32, %25 : i32 +!CHECK-NEXT: %canonloop_s0 = omp.new_cli +!CHECK-NEXT: omp.canonical_loop(%canonloop_s0) %iv : i32 in range(%27) { +!CHECK-NEXT: %28 = arith.muli %iv, %17 : i32 +!CHECK-NEXT: %29 = arith.addi %15, %28 : i32 +!CHECK-NEXT: hlfir.assign %29 to %14#0 : i32, !fir.ref +!CHECK-NEXT: %30 = fir.alloca i32 {bindc_name = "j", pinned, uniq_name = "_QFomp_unroll_heuristic_nested02Ej"} +!CHECK-NEXT: %31:2 = hlfir.declare %30 {uniq_name = "_QFomp_unroll_heuristic_nested02Ej"} : (!fir.ref) -> (!fir.ref, !fir.ref) +!CHECK-NEXT: %32 = fir.load %4#0 : !fir.ref +!CHECK-NEXT: %33 = fir.load %5#0 : !fir.ref +!CHECK-NEXT: %34 = fir.load %3#0 : !fir.ref +!CHECK-NEXT: %c0_i32_0 = arith.constant 0 : i32 +!CHECK-NEXT: %c1_i32_1 = arith.constant 1 : i32 +!CHECK-NEXT: %35 = arith.cmpi slt, %34, %c0_i32_0 : i32 +!CHECK-NEXT: %36 = arith.subi %c0_i32_0, %34 : i32 +!CHECK-NEXT: %37 = arith.select %35, %36, %34 : i32 +!CHECK-NEXT: %38 = arith.select %35, %33, %32 : i32 +!CHECK-NEXT: %39 = arith.select %35, %32, %33 : i32 +!CHECK-NEXT: %40 = arith.subi %39, %38 overflow : i32 +!CHECK-NEXT: %41 = arith.divui %40, %37 : i32 +!CHECK-NEXT: %42 = arith.addi %41, %c1_i32_1 overflow : i32 +!CHECK-NEXT: %43 = arith.cmpi slt, %39, %38 : i32 +!CHECK-NEXT: %44 = arith.select %43, %c0_i32_0, %42 : i32 +!CHECK-NEXT: %canonloop_s0_s0 = omp.new_cli +!CHECK-NEXT: omp.canonical_loop(%canonloop_s0_s0) %iv_2 : i32 in range(%44) { +!CHECK-NEXT: %45 = arith.muli %iv_2, %34 : i32 +!CHECK-NEXT: %46 = arith.addi %32, %45 : i32 +!CHECK-NEXT: hlfir.assign %46 to %31#0 : i32, !fir.ref +!CHECK-NEXT: %47 = fir.load %14#0 : !fir.ref +!CHECK-NEXT: %48 = fir.load %31#0 : !fir.ref +!CHECK-NEXT: %49 = arith.addi %47, %48 : i32 +!CHECK-NEXT: hlfir.assign %49 to %12#0 : i32, !fir.ref +!CHECK-NEXT: omp.terminator +!CHECK-NEXT: } +!CHECK-NEXT: omp.unroll_heuristic(%canonloop_s0_s0) +!CHECK-NEXT: omp.terminator +!CHECK-NEXT: } +!CHECK-NEXT: omp.unroll_heuristic(%canonloop_s0) +!CHECK-NEXT: return diff --git a/flang/test/Parser/OpenMP/unroll-heuristic.f90 b/flang/test/Parser/OpenMP/unroll-heuristic.f90 new file mode 100644 index 0000000000000..2f589af0c83ca --- /dev/null +++ b/flang/test/Parser/OpenMP/unroll-heuristic.f90 @@ -0,0 +1,43 @@ +! RUN: %flang_fc1 -fopenmp -fopenmp-version=51 %s -fdebug-unparse | FileCheck --check-prefix=UNPARSE %s +! RUN: %flang_fc1 -fopenmp -fopenmp-version=51 %s -fdebug-dump-parse-tree | FileCheck --check-prefix=PTREE %s + +subroutine openmp_parse_unroll_heuristic + integer i + + !$omp unroll + do i = 1, 100 + call func(i) + end do + !$omp end unroll +END subroutine openmp_parse_unroll_heuristic + + +!UNPARSE: !$OMP UNROLL +!UNPARSE-NEXT: DO i=1_4,100_4 +!UNPARSE-NEXT: CALL func(i) +!UNPARSE-NEXT: END DO +!UNPARSE-NEXT: !$OMP END UNROLL + +!PTREE: OpenMPConstruct -> OpenMPLoopConstruct +!PTREE-NEXT: | OmpBeginLoopDirective +!PTREE-NEXT: | | OmpLoopDirective -> llvm::omp::Directive = unroll +!PTREE-NEXT: | | OmpClauseList -> +!PTREE-NEXT: | DoConstruct +!PTREE-NEXT: | | NonLabelDoStmt +!PTREE-NEXT: | | | LoopControl -> LoopBounds +!PTREE-NEXT: | | | | Scalar -> Name = 'i' +!PTREE-NEXT: | | | | Scalar -> Expr = '1_4' +!PTREE-NEXT: | | | | | LiteralConstant -> IntLiteralConstant = '1' +!PTREE-NEXT: | | | | Scalar -> Expr = '100_4' +!PTREE-NEXT: | | | | | LiteralConstant -> IntLiteralConstant = '100' +!PTREE-NEXT: | | Block +!PTREE-NEXT: | | | ExecutionPartConstruct -> ExecutableConstruct -> ActionStmt -> CallStmt = 'CALL func(i)' +!PTREE-NEXT: | | | | | | Call +!PTREE-NEXT: | | | | | ProcedureDesignator -> Name = 'func' +!PTREE-NEXT: | | | | | ActualArgSpec +!PTREE-NEXT: | | | | | | ActualArg -> Expr = 'i' +!PTREE-NEXT: | | | | | | | Designator -> DataRef -> Name = 'i' +!PTREE-NEXT: | | EndDoStmt -> +!PTREE-NEXT: | OmpEndLoopDirective +!PTREE-NEXT: | | OmpLoopDirective -> llvm::omp::Directive = unroll +!PTREE-NEXT: | | OmpClauseList -> diff --git a/flang/test/Parser/OpenMP/unroll.f90 b/flang/test/Parser/OpenMP/unroll-partial.f90 similarity index 100% rename from flang/test/Parser/OpenMP/unroll.f90 rename to flang/test/Parser/OpenMP/unroll-partial.f90 diff --git a/mlir/include/mlir/Dialect/OpenMP/OpenMPClauseOperands.h b/mlir/include/mlir/Dialect/OpenMP/OpenMPClauseOperands.h index f9a85626a3f14..faf820dcfdb29 100644 --- a/mlir/include/mlir/Dialect/OpenMP/OpenMPClauseOperands.h +++ b/mlir/include/mlir/Dialect/OpenMP/OpenMPClauseOperands.h @@ -15,14 +15,10 @@ #ifndef MLIR_DIALECT_OPENMP_OPENMPCLAUSEOPERANDS_H_ #define MLIR_DIALECT_OPENMP_OPENMPCLAUSEOPERANDS_H_ +#include "mlir/Dialect/OpenMP/OpenMPOpsAttributes.h" #include "mlir/IR/BuiltinAttributes.h" #include "llvm/ADT/SmallVector.h" -#include "mlir/Dialect/OpenMP/OpenMPOpsEnums.h.inc" - -#define GET_ATTRDEF_CLASSES -#include "mlir/Dialect/OpenMP/OpenMPOpsAttributes.h.inc" - #include "mlir/Dialect/OpenMP/OpenMPClauseOps.h.inc" namespace mlir { diff --git a/mlir/include/mlir/Dialect/OpenMP/OpenMPDialect.h b/mlir/include/mlir/Dialect/OpenMP/OpenMPDialect.h index 248ac2eb72c61..0a844fc2380bf 100644 --- a/mlir/include/mlir/Dialect/OpenMP/OpenMPDialect.h +++ b/mlir/include/mlir/Dialect/OpenMP/OpenMPDialect.h @@ -16,6 +16,7 @@ #include "mlir/Dialect/LLVMIR/LLVMDialect.h" #include "mlir/Dialect/OpenACCMPCommon/Interfaces/AtomicInterfaces.h" #include "mlir/Dialect/OpenACCMPCommon/Interfaces/OpenACCMPOpsInterfaces.h" +#include "mlir/Dialect/OpenMP/OpenMPInterfaces.h" #include "mlir/IR/Dialect.h" #include "mlir/IR/OpDefinition.h" #include "mlir/IR/PatternMatch.h" @@ -24,6 +25,11 @@ #include "mlir/Interfaces/SideEffectInterfaces.h" #include "llvm/Frontend/OpenMP/OMPDeviceConstants.h" +namespace mlir::omp { +/// Find the omp.new_cli, generator, and consumer of a canonical loop info. +std::tuple decodeCli(mlir::Value cli); +} // namespace mlir::omp + #define GET_TYPEDEF_CLASSES #include "mlir/Dialect/OpenMP/OpenMPOpsTypes.h.inc" @@ -33,8 +39,6 @@ #include "mlir/Dialect/OpenMP/OpenMPTypeInterfaces.h.inc" -#include "mlir/Dialect/OpenMP/OpenMPInterfaces.h" - #define GET_OP_CLASSES #include "mlir/Dialect/OpenMP/OpenMPOps.h.inc" diff --git a/mlir/include/mlir/Dialect/OpenMP/OpenMPInterfaces.h b/mlir/include/mlir/Dialect/OpenMP/OpenMPInterfaces.h index 989ab1710c211..bc9534974d21f 100644 --- a/mlir/include/mlir/Dialect/OpenMP/OpenMPInterfaces.h +++ b/mlir/include/mlir/Dialect/OpenMP/OpenMPInterfaces.h @@ -14,6 +14,7 @@ #define MLIR_DIALECT_OPENMP_OPENMPINTERFACES_H_ #include "mlir/Dialect/LLVMIR/LLVMDialect.h" +#include "mlir/Dialect/OpenMP/OpenMPOpsAttributes.h" #include "mlir/IR/Dialect.h" #include "mlir/IR/OpDefinition.h" #include "mlir/IR/PatternMatch.h" diff --git a/mlir/include/mlir/Dialect/OpenMP/OpenMPOpBase.td b/mlir/include/mlir/Dialect/OpenMP/OpenMPOpBase.td index f3dd44d2c0717..bbcfb87fa03c6 100644 --- a/mlir/include/mlir/Dialect/OpenMP/OpenMPOpBase.td +++ b/mlir/include/mlir/Dialect/OpenMP/OpenMPOpBase.td @@ -204,4 +204,15 @@ class OpenMP_Op traits = [], let regions = !if(singleRegion, (region AnyRegion:$region), (region)); } + +// Base class for OpenMP loop transformations (that either consume or generate +// loops) +// +// Doesn't actually create a C++ base class (only defines default values for +// tablegen classes that derive from this). Use LoopTransformationInterface +// instead for common operations. +class OpenMPTransform_Op traits = []> : + OpenMP_Op], traits) > { +} + #endif // OPENMP_OP_BASE diff --git a/mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td b/mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td index ac80926053a2d..8641c9b8150ee 100644 --- a/mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td +++ b/mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td @@ -22,6 +22,7 @@ include "mlir/Dialect/OpenMP/OpenMPOpBase.td" include "mlir/Interfaces/ControlFlowInterfaces.td" include "mlir/Interfaces/SideEffectInterfaces.td" include "mlir/IR/EnumAttr.td" +include "mlir/IR/OpAsmInterface.td" include "mlir/IR/OpBase.td" include "mlir/IR/SymbolInterfaces.td" @@ -356,6 +357,212 @@ def SingleOp : OpenMP_Op<"single", traits = [ let hasVerifier = 1; } +//===---------------------------------------------------------------------===// +// OpenMP Canonical Loop Info Type +//===---------------------------------------------------------------------===// + +def CanonicalLoopInfoType : OpenMP_Type<"CanonicalLoopInfo", "cli"> { + let summary = "Type for representing a reference to a canonical loop"; + let description = [{ + A variable of type CanonicalLoopInfo refers to an OpenMP-compatible + canonical loop in the same function. Values of this type are not + available at runtime and therefore cannot be used by the program itself, + i.e. an opaque type. It is similar to the transform dialect's + `!transform.interface` type, but instead of implementing an interface + for each transformation, the OpenMP dialect itself defines possible + operations on this type. + + A value of type CanonicalLoopInfoType (in the following: CLI) value can be + + 1. created by omp.new_cli. + 2. passed to omp.canonical_loop to associate the loop to that CLI. A CLI + can only be associated once. + 3. passed to an omp loop transformation operation that modifies the loop + associated with the CLI. The CLI is the "applyee" and the operation is + the consumer. A CLI can only be consumed once. + 4. passed to an omp loop transformation operation to associate the cli with + a result of that transformation. The CLI is the "generatee" and the + operation is the generator. + + A CLI cannot + + 1. be returned from a function. + 2. be passed to operations that are not specifically designed to take a + CanonicalLoopInfoType, including AnyType. + + A CLI directly corresponds to an object of + OpenMPIRBuilder's CanonicalLoopInfo struct when lowering to LLVM-IR. + }]; +} + +//===---------------------------------------------------------------------===// +// OpenMP Canonical Loop Info Creation +//===---------------------------------------------------------------------===// + +def NewCliOp : OpenMP_Op<"new_cli", + [DeclareOpInterfaceMethods]> { + let summary = "Create a new Canonical Loop Info value."; + let description = [{ + Create a new CLI that can be passed as an argument to a CanonicalLoopOp + and to loop transformation operations to handle dependencies between + loop transformation operations. + }]; + + let arguments = (ins ); + let results = (outs CanonicalLoopInfoType:$result); + let assemblyFormat = [{ + attr-dict + }]; + + let builders = [ + OpBuilder<(ins )>, + ]; + + let hasVerifier = 1; +} + +//===---------------------------------------------------------------------===// +// OpenMP Canonical Loop Operation +//===---------------------------------------------------------------------===// +def CanonicalLoopOp : OpenMPTransform_Op<"canonical_loop", + [DeclareOpInterfaceMethods]> { + let summary = "OpenMP Canonical Loop Operation"; + let description = [{ + All loops that conform to OpenMP's definition of a canonical loop can be + simplified to a CanonicalLoopOp. In particular, there are no loop-carried + variables and the number of iterations it will execute is know before the + operation. This allows e.g. to determine the number of threads and chunks + the iterations space is split into before executing any iteration. More + restrictions may apply in cases such as (collapsed) loop nests, doacross + loops, etc. + + In contrast to other loop operations such as `scf.for`, the number of + iterations is determined by only a single variable, the trip-count. The + induction variable value is the logical iteration number of that iteration, + which OpenMP defines to be between 0 and the trip-count (exclusive). + Loop representation having lower-bound, upper-bound, and step-size operands, + require passes to do more work than necessary, including handling special + cases such as upper-bound smaller than lower-bound, upper-bound equal to + the integer type's maximal value, negative step size, etc. This complexity + is better only handled once by the front-end and can apply its semantics + for such cases while still being able to represent any kind of loop, which + kind of the point of a mid-end intermediate representation. User-defined + types such as random-access iterators in C++ could not directly be + represented anyway. + + The induction variable is always of the same type as the tripcount argument. + Since it can never be negative, tripcount is always interpreted as an + unsigned integer. It is the caller's responsibility to ensure the tripcount + is not negative when its interpretation is signed, i.e. + `%tripcount = max(0,%tripcount)`. + + An optional argument to a omp.canonical_loop that can be passed in + is a CanonicalLoopInfo value that can be used to refer to the canonical + loop to apply transformations -- such as tiling, unrolling, or + work-sharing -- to the loop, similar to the transform dialect but + with OpenMP-specific semantics. Because it is optional, it has to be the + last of the operands, but appears first in the pretty format printing. + + The pretty assembly format is inspired by python syntax, where `range(n)` + returns an iterator that runs from $0$ to $n-1$. The pretty assembly syntax + is one of: + + omp.canonical_loop(%cli) %iv : !type in range(%tripcount) + omp.canonical_loop %iv : !type in range(%tripcount) + + A CanonicalLoopOp is lowered to LLVM-IR using + `OpenMPIRBuilder::createCanonicalLoop`. + + #### Examples + + Translation from lower-bound, upper-bound, step-size to trip-count. + ```c + for (int i = 3; i < 42; i+=2) { + B[i] = A[i]; + } + ``` + + ```mlir + %lb = arith.constant 3 : i32 + %ub = arith.constant 42 : i32 + %step = arith.constant 2 : i32 + %range = arith.sub %ub, %lb : i32 + %tripcount = arith.div %range, %step : i32 + omp.canonical_loop %iv : i32 in range(%tripcount) { + %offset = arith.mul %iv, %step : i32 + %i = arith.add %offset, %lb : i32 + %a = load %arrA[%i] : memref + store %a, %arrB[%i] : memref + } + ``` + + Nested canonical loop with transformation of the inner loop. + ```mlir + %outer = omp.new_cli : !omp.cli + %inner = omp.new_cli : !omp.cli + omp.canonical_loop(%outer) %iv1 : i32 in range(%tc1) { + omp.canonical_loop(%inner) %iv2 : i32 in range(%tc2) { + %a = load %arrA[%iv1, %iv2] : memref + store %a, %arrB[%iv1, %iv2] : memref + } + } + omp.unroll_full(%inner) + ``` + }]; + + + let arguments = (ins IntLikeType:$tripCount, + Optional:$cli); + let regions = (region AnyRegion:$region); + + let extraClassDeclaration = [{ + ::mlir::Value getInductionVar(); + }]; + + let builders = [ + OpBuilder<(ins "::mlir::Value":$tripCount)>, + OpBuilder<(ins "::mlir::Value":$tripCount, "::mlir::Value":$cli)>, + ]; + + let hasCustomAssemblyFormat = 1; + let hasVerifier = 1; +} + +//===----------------------------------------------------------------------===// +// OpenMP unroll_heuristic operation +//===----------------------------------------------------------------------===// + +def UnrollHeuristicOp : OpenMPTransform_Op<"unroll_heuristic", []> { + let summary = "OpenMP heuristic unroll operation"; + let description = [{ + Represents a `#pragma omp unroll` construct introduced in OpenMP 5.1. + + The operation has one applyee and no generatees. The applyee is unrolled + according to implementation-defined heuristics. Implementations may choose + to not unroll the loop, partially unroll by a chosen factor, or fully + unroll it. Even if the implementation chooses to partially unroll the + applyee, the resulting unrolled loop is not accessible as a generatee. Use + omp.unroll_partial if a generatee is required. + + The lowering is implemented using `OpenMPIRBuilder::unrollLoopHeuristic`, + which just attaches `llvm.loop.unroll.enable` metadata to the loop so the + unrolling is carried-out by LLVM's LoopUnroll pass. That is, unrolling only + actually performed in optimized builds. + + Assembly formats: + omp.unroll_heuristic(%cli) + omp.unroll_heuristic(%cli) -> () + }]; + + let arguments = (ins CanonicalLoopInfoType:$applyee); + + let builders = [ + OpBuilder<(ins "::mlir::Value":$cli)>, + ]; + + let hasCustomAssemblyFormat = 1; +} + //===----------------------------------------------------------------------===// // 2.8.3 Workshare Construct //===----------------------------------------------------------------------===// diff --git a/mlir/include/mlir/Dialect/OpenMP/OpenMPOpsAttributes.h b/mlir/include/mlir/Dialect/OpenMP/OpenMPOpsAttributes.h new file mode 100644 index 0000000000000..9a653c4b557b5 --- /dev/null +++ b/mlir/include/mlir/Dialect/OpenMP/OpenMPOpsAttributes.h @@ -0,0 +1,17 @@ +//===- OpenMPOpsAttributes.h ------------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef MLIR_DIALECT_OPENMP_OPENMPOPSATTRIBUTES_H_ +#define MLIR_DIALECT_OPENMP_OPENMPOPSATTRIBUTES_H_ + +#include "mlir/Dialect/OpenMP/OpenMPOpsEnums.h" + +#define GET_ATTRDEF_CLASSES +#include "mlir/Dialect/OpenMP/OpenMPOpsAttributes.h.inc" + +#endif // MLIR_DIALECT_OPENMP_OPENMPOPSATTRIBUTES_H_ diff --git a/mlir/include/mlir/Dialect/OpenMP/OpenMPOpsEnums.h b/mlir/include/mlir/Dialect/OpenMP/OpenMPOpsEnums.h new file mode 100644 index 0000000000000..0f6c41a179536 --- /dev/null +++ b/mlir/include/mlir/Dialect/OpenMP/OpenMPOpsEnums.h @@ -0,0 +1,14 @@ +//===- OpenMPOpsEnums.h -----------------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef MLIR_DIALECT_OPENMP_OPENMPOPSENUMS_H_ +#define MLIR_DIALECT_OPENMP_OPENMPOPSENUMS_H_ + +#include "mlir/Dialect/OpenMP/OpenMPOpsEnums.h.inc" + +#endif // MLIR_DIALECT_OPENMP_OPENMPOPSENUMS_H_ diff --git a/mlir/include/mlir/Dialect/OpenMP/OpenMPOpsInterfaces.td b/mlir/include/mlir/Dialect/OpenMP/OpenMPOpsInterfaces.td index 92bf34ef3145f..02ec95c10d67d 100644 --- a/mlir/include/mlir/Dialect/OpenMP/OpenMPOpsInterfaces.td +++ b/mlir/include/mlir/Dialect/OpenMP/OpenMPOpsInterfaces.td @@ -551,4 +551,62 @@ def OffloadModuleInterface : OpInterface<"OffloadModuleInterface"> { ]; } +def LoopTransformationInterface : OpInterface<"LoopTransformationInterface"> { + let description = [{ + Methods that are common for OpenMP loop transformation operations. + }]; + + let cppNamespace = "::mlir::omp"; + + let methods = [ + + InterfaceMethod< + /*description=*/[{ + Get the indices for the arguments that represent CanonicalLoopInfo + applyees, i.e. loops that are transformed/consumed by this operation. + }], + /*returnType=*/ "std::pair", + /*methodName=*/ "getApplyeesODSOperandIndexAndLength", + /*args=*/(ins) + >, + + InterfaceMethod< + /*description=*/[{ + Get the indices for the arguments that represent CanonicalLoopInfo + generatees, i.e. loops that created by this operation. + }], + /*returnType=*/ "std::pair", + /*methodName=*/ "getGenerateesODSOperandIndexAndLength", + /*args=*/(ins) + >, + + InterfaceMethod< + /*description=*/[{ + Return the number of applyees of this loop transformation. + }], + /*returnType=*/ "unsigned", + /*methodName=*/ "getNumApplyees", + /*args=*/ (ins), + /*methodBody=*/ "", + /*defaultImpl=*/[{ + return $_op.getApplyeesODSOperandIndexAndLength().second; + }] + >, + + InterfaceMethod< + /*description=*/[{ + Return the number of generatees of this loop transformation. + }], + /*returnType=*/ "unsigned", + /*methodName=*/ "getNumGeneratees", + /*args=*/ (ins), + /*methodBody=*/ "", + /*defaultImpl=*/[{ + return $_op.getGenerateesODSOperandIndexAndLength().second; + }] + >, + + ]; +} + #endif // OPENMP_OPS_INTERFACES diff --git a/mlir/include/mlir/Target/LLVMIR/ModuleTranslation.h b/mlir/include/mlir/Target/LLVMIR/ModuleTranslation.h index 97ae14aa0d6af..7de9021694438 100644 --- a/mlir/include/mlir/Target/LLVMIR/ModuleTranslation.h +++ b/mlir/include/mlir/Target/LLVMIR/ModuleTranslation.h @@ -15,6 +15,7 @@ #define MLIR_TARGET_LLVMIR_MODULETRANSLATION_H #include "mlir/Dialect/LLVMIR/LLVMInterfaces.h" +#include "mlir/Dialect/OpenMP/OpenMPDialect.h" #include "mlir/IR/Operation.h" #include "mlir/IR/SymbolTable.h" #include "mlir/IR/Value.h" @@ -23,6 +24,7 @@ #include "mlir/Target/LLVMIR/TypeToLLVM.h" #include "llvm/ADT/SetVector.h" +#include "llvm/Frontend/OpenMP/OMPIRBuilder.h" #include "llvm/IR/FPEnv.h" namespace llvm { @@ -107,6 +109,41 @@ class ModuleTranslation { return blockMapping.lookup(block); } + /// Find the LLVM-IR loop that represents an MLIR loop. + llvm::CanonicalLoopInfo *lookupOMPLoop(omp::NewCliOp mlir) const { + llvm::CanonicalLoopInfo *result = loopMapping.lookup(mlir); + assert(result && "attempt to get non-existing loop"); + return result; + } + + /// Find the LLVM-IR loop that represents an MLIR loop. + llvm::CanonicalLoopInfo *lookupOMPLoop(Value mlir) const { + return lookupOMPLoop(mlir.getDefiningOp()); + } + + /// Mark an OpenMP loop as having been consumed. + void invalidateOmpLoop(omp::NewCliOp mlir) { loopMapping.erase(mlir); } + + /// Mark an OpenMP loop as having been consumed. + void invalidateOmpLoop(Value mlir) { + invalidateOmpLoop(mlir.getDefiningOp()); + } + + /// Map an MLIR OpenMP dialect CanonicalLoopInfo to its lowered LLVM-IR + /// OpenMPIRBuilder CanonicalLoopInfo + void mapOmpLoop(omp::NewCliOp mlir, llvm::CanonicalLoopInfo *llvm) { + assert(llvm && "argument must be non-null"); + llvm::CanonicalLoopInfo *&cur = loopMapping[mlir]; + assert(cur == nullptr && "attempting to map a loop that is already mapped"); + cur = llvm; + } + + /// Map an MLIR OpenMP dialect CanonicalLoopInfo to its lowered LLVM-IR + /// OpenMPIRBuilder CanonicalLoopInfo + void mapOmpLoop(Value mlir, llvm::CanonicalLoopInfo *llvm) { + mapOmpLoop(mlir.getDefiningOp(), llvm); + } + /// Stores the mapping between an MLIR operation with successors and a /// corresponding LLVM IR instruction. void mapBranch(Operation *mlir, llvm::Instruction *llvm) { @@ -431,6 +468,12 @@ class ModuleTranslation { DenseMap valueMapping; DenseMap blockMapping; + /// List of not yet consumed MLIR loop handles (represented by an omp.new_cli + /// operation which creates a value of type CanonicalLoopInfoType) and their + /// LLVM-IR representation as CanonicalLoopInfo which is managed by the + /// OpenMPIRBuilder. + DenseMap loopMapping; + /// A mapping between MLIR LLVM dialect terminators and LLVM IR terminators /// they are converted to. This allows for connecting PHI nodes to the source /// values after all operations are converted. diff --git a/mlir/lib/Conversion/OpenMPToLLVM/OpenMPToLLVM.cpp b/mlir/lib/Conversion/OpenMPToLLVM/OpenMPToLLVM.cpp index 7a0a7f86bc1e9..e77c4a0b94de9 100644 --- a/mlir/lib/Conversion/OpenMPToLLVM/OpenMPToLLVM.cpp +++ b/mlir/lib/Conversion/OpenMPToLLVM/OpenMPToLLVM.cpp @@ -42,6 +42,16 @@ template struct OpenMPOpConversion : public ConvertOpToLLVMPattern { using ConvertOpToLLVMPattern::ConvertOpToLLVMPattern; + OpenMPOpConversion(LLVMTypeConverter &typeConverter, + PatternBenefit benefit = 1) + : ConvertOpToLLVMPattern(typeConverter, benefit) { + // Operations using CanonicalLoopInfoType are lowered only by + // mlir::translateModuleToLLVMIR() using the OpenMPIRBuilder. Until then, + // the type and operations using it must be preserved. + typeConverter.addConversion( + [&](::mlir::omp::CanonicalLoopInfoType type) { return type; }); + } + LogicalResult matchAndRewrite(T op, typename T::Adaptor adaptor, ConversionPatternRewriter &rewriter) const override { diff --git a/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp b/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp index e94d570b57122..64b6dd32b0c37 100644 --- a/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp +++ b/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp @@ -24,6 +24,7 @@ #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/BitVector.h" +#include "llvm/ADT/PostOrderIterator.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/STLForwardCompat.h" #include "llvm/ADT/SmallString.h" @@ -3012,6 +3013,366 @@ void LoopNestOp::gatherWrappers( } } +//===----------------------------------------------------------------------===// +// OpenMP canonical loop handling +//===----------------------------------------------------------------------===// + +std::tuple +mlir::omp ::decodeCli(Value cli) { + + // Defining a CLI for a generated loop is optional; if there is none then + // there is no followup-tranformation + if (!cli) + return {{}, nullptr, nullptr}; + + MLIRContext *ctx = cli.getContext(); + assert(cli.getType() == CanonicalLoopInfoType::get(ctx) && + "Unexpected type of cli"); + + NewCliOp create = cast(cli.getDefiningOp()); + OpOperand *gen = nullptr; + OpOperand *cons = nullptr; + for (OpOperand &use : cli.getUses()) { + auto op = cast(use.getOwner()); + auto applyees = op.getApplyeesODSOperandIndexAndLength(); + auto generatees = op.getGenerateesODSOperandIndexAndLength(); + + unsigned opnum = use.getOperandNumber(); + if (generatees.first <= opnum && + opnum < generatees.first + generatees.second) { + assert(!gen && "Each CLI may have at most one consumer"); + gen = &use; + } else if (applyees.first <= opnum && + opnum < applyees.first + applyees.second) { + assert(!cons && "Each CLI may have at most one def"); + cons = &use; + } else { + llvm_unreachable("Unexpected operand for a CLI"); + } + } + + return {create, gen, cons}; +} + +void NewCliOp::build(::mlir::OpBuilder &odsBuilder, + ::mlir::OperationState &odsState) { + odsState.addTypes(CanonicalLoopInfoType::get(odsBuilder.getContext())); +} + +void NewCliOp::getAsmResultNames(OpAsmSetValueNameFn setNameFn) { + Value result = getResult(); + auto [newCli, gen, cond] = decodeCli(result); + + // Derive the CLI variable name from its generator: + // * "canonloop" for omp.canonical_loop + // * custom name for loop transformation generatees + // * "cli" as fallback if no generator + // * "_r" suffix for nested loops, where is the sequential order + // at that level + // * "_s" suffix for operations with multiple regions, where is + // the index of that region + std::string cliName{"cli"}; + if (gen) { + cliName = + TypeSwitch(gen->getOwner()) + .Case([&](CanonicalLoopOp op) { + // Find the canonical loop nesting: For each ancestor add a + // "+_r" suffix (in reverse order) + SmallVector components; + Operation *o = op.getOperation(); + while (o) { + if (o->hasTrait()) + break; + + Region *r = o->getParentRegion(); + if (!r) + break; + + Operation *parent = r->getParentOp(); + auto getSequentialIndex = [](Region *r, Operation *o) { + llvm::ReversePostOrderTraversal traversal( + &r->getBlocks().front()); + size_t idx = 0; + for (Block *b : traversal) { + for (Operation &op : *b) { + if (&op == o) + return idx; + // Only consider operations that are containers as + // possible children + if (!op.getRegions().empty()) + idx += 1; + } + } + llvm_unreachable("Operation not part of the region"); + }; + size_t sequentialIdx = getSequentialIndex(r, o); + components.push_back(("s" + Twine(sequentialIdx)).str()); + + if (!parent) + break; + + // If the operation has more than one region, also count in + // which of the regions + if (parent->getRegions().size() > 1) { + auto getRegionIndex = [](Operation *o, Region *r) { + for (auto [idx, region] : + llvm::enumerate(o->getRegions())) { + if (®ion == r) + return idx; + } + llvm_unreachable("Region not child its parent operation"); + }; + size_t regionIdx = getRegionIndex(parent, r); + components.push_back(("r" + Twine(regionIdx)).str()); + } + + // next parent + o = parent; + } + + SmallString<64> Name("canonloop"); + for (std::string s : reverse(components)) { + Name += '_'; + Name += s; + } + + return Name; + }) + .Case([&](UnrollHeuristicOp op) -> std::string { + llvm_unreachable("heuristic unrolling does not generate a loop"); + }) + .Default([&](Operation *op) { + assert(!"TODO: Custom name for this operation"); + return "transformed"; + }); + } + + setNameFn(result, cliName); +} + +LogicalResult NewCliOp::verify() { + Value cli = getResult(); + + MLIRContext *ctx = cli.getContext(); + assert(cli.getType() == CanonicalLoopInfoType::get(ctx) && + "Unexpected type of cli"); + + // Check that the CLI is used in at most generator and one consumer + OpOperand *gen = nullptr; + OpOperand *cons = nullptr; + for (mlir::OpOperand &use : cli.getUses()) { + auto op = cast(use.getOwner()); + auto applyees = op.getApplyeesODSOperandIndexAndLength(); + auto generatees = op.getGenerateesODSOperandIndexAndLength(); + + unsigned opnum = use.getOperandNumber(); + if (generatees.first <= opnum && + opnum < generatees.first + generatees.second) { + if (gen) { + InFlightDiagnostic error = + emitOpError("CLI must have at most one generator"); + error.attachNote(gen->getOwner()->getLoc()) + .append("first generator here:"); + error.attachNote(use.getOwner()->getLoc()) + .append("second generator here:"); + return error; + } + + gen = &use; + } else if (applyees.first <= opnum && + opnum < applyees.first + applyees.second) { + if (cons) { + InFlightDiagnostic error = + emitOpError("CLI must have at most one consumer"); + error.attachNote(cons->getOwner()->getLoc()) + .append("first consumer here:") + .appendOp(*cons->getOwner(), + OpPrintingFlags().printGenericOpForm()); + error.attachNote(use.getOwner()->getLoc()) + .append("second consumer here:") + .appendOp(*use.getOwner(), OpPrintingFlags().printGenericOpForm()); + return error; + } + + cons = &use; + } else { + llvm_unreachable("Unexpected operand for a CLI"); + } + } + + // If the CLI is source of a transformation, it must have a generator + if (cons && !gen) { + InFlightDiagnostic error = emitOpError("CLI has no generator"); + error.attachNote(cons->getOwner()->getLoc()) + .append("see consumer here: ") + .appendOp(*cons->getOwner(), OpPrintingFlags().printGenericOpForm()); + return error; + } + + return success(); +} + +void CanonicalLoopOp::build(OpBuilder &odsBuilder, OperationState &odsState, + Value tripCount) { + odsState.addOperands(tripCount); + odsState.addOperands(Value()); + (void)odsState.addRegion(); +} + +void CanonicalLoopOp::build(OpBuilder &odsBuilder, OperationState &odsState, + Value tripCount, ::mlir::Value cli) { + odsState.addOperands(tripCount); + odsState.addOperands(cli); + (void)odsState.addRegion(); +} + +void CanonicalLoopOp::getAsmBlockNames(OpAsmSetBlockNameFn setNameFn) { + setNameFn(&getRegion().front(), "body_entry"); +} + +void CanonicalLoopOp::getAsmBlockArgumentNames(Region ®ion, + OpAsmSetValueNameFn setNameFn) { + setNameFn(region.getArgument(0), "iv"); +} + +void CanonicalLoopOp::print(OpAsmPrinter &p) { + if (getCli()) + p << '(' << getCli() << ')'; + p << ' ' << getInductionVar() << " : " << getInductionVar().getType() + << " in range(" << getTripCount() << ") "; + + p.printRegion(getRegion(), /*printEntryBlockArgs=*/false, + /*printBlockTerminators=*/true); + + p.printOptionalAttrDict((*this)->getAttrs()); +} + +mlir::ParseResult CanonicalLoopOp::parse(::mlir::OpAsmParser &parser, + ::mlir::OperationState &result) { + CanonicalLoopInfoType cliType = + CanonicalLoopInfoType::get(parser.getContext()); + + // Parse (optional) omp.cli identifier + OpAsmParser::UnresolvedOperand cli; + SmallVector cliOperand; + if (!parser.parseOptionalLParen()) { + if (parser.parseOperand(cli) || + parser.resolveOperand(cli, cliType, cliOperand) || parser.parseRParen()) + return failure(); + } + + // We derive the type of tripCount from inductionVariable. MLIR requires the + // type of tripCount to be known when calling resolveOperand so we have parse + // the type before processing the inductionVariable. + OpAsmParser::Argument inductionVariable; + OpAsmParser::UnresolvedOperand tripcount; + if (parser.parseArgument(inductionVariable, /*allowType*/ true) || + parser.parseKeyword("in") || parser.parseKeyword("range") || + parser.parseLParen() || parser.parseOperand(tripcount) || + parser.parseRParen() || + parser.resolveOperand(tripcount, inductionVariable.type, result.operands)) + return failure(); + + // Parse the loop body. + Region *region = result.addRegion(); + if (parser.parseRegion(*region, {inductionVariable})) + return failure(); + + // We parsed the cli operand forst, but because it is optional, it must be + // last in the operand list. + result.operands.append(cliOperand); + + // Parse the optional attribute list. + if (parser.parseOptionalAttrDict(result.attributes)) + return failure(); + + return mlir::success(); +} + +LogicalResult CanonicalLoopOp::verify() { + // The region's entry must accept the induction variable + // It can also be empty if just created + if (!getRegion().empty()) { + Region ®ion = getRegion(); + if (region.getNumArguments() != 1) + return emitOpError( + "Canonical loop region must have exactly one argument"); + + if (getInductionVar().getType() != getTripCount().getType()) + return emitOpError( + "Region argument must be the same type as the trip count"); + } + + return success(); +} + +Value CanonicalLoopOp::getInductionVar() { return getRegion().getArgument(0); } + +std::pair +CanonicalLoopOp::getApplyeesODSOperandIndexAndLength() { + // No applyees + return {0, 0}; +} + +std::pair +CanonicalLoopOp::getGenerateesODSOperandIndexAndLength() { + return getODSOperandIndexAndLength(odsIndex_cli); +} + +//===----------------------------------------------------------------------===// +// UnrollHeuristicOp +//===----------------------------------------------------------------------===// + +void UnrollHeuristicOp::build(::mlir::OpBuilder &odsBuilder, + ::mlir::OperationState &odsState, + ::mlir::Value cli) { + odsState.addOperands(cli); +} + +void UnrollHeuristicOp::print(OpAsmPrinter &p) { + p << '(' << getApplyee() << ')'; + + p.printOptionalAttrDict((*this)->getAttrs()); +} + +mlir::ParseResult UnrollHeuristicOp::parse(::mlir::OpAsmParser &parser, + ::mlir::OperationState &result) { + auto cliType = CanonicalLoopInfoType::get(parser.getContext()); + + if (parser.parseLParen()) + return failure(); + + OpAsmParser::UnresolvedOperand applyee; + if (parser.parseOperand(applyee) || + parser.resolveOperand(applyee, cliType, result.operands)) + return failure(); + + if (parser.parseRParen()) + return failure(); + + // Optional output loop (full unrolling has none) + if (!parser.parseOptionalArrow()) { + if (parser.parseLParen() || parser.parseRParen()) + return failure(); + } + + // Parse the optional attribute list. + if (parser.parseOptionalAttrDict(result.attributes)) + return failure(); + + return mlir::success(); +} + +std::pair +UnrollHeuristicOp ::getApplyeesODSOperandIndexAndLength() { + return getODSOperandIndexAndLength(odsIndex_applyee); +} + +std::pair +UnrollHeuristicOp::getGenerateesODSOperandIndexAndLength() { + return {0, 0}; +} + //===----------------------------------------------------------------------===// // Critical construct (2.17.1) //===----------------------------------------------------------------------===// diff --git a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp index 6bccc1d6f5d30..d3cfdf26a344b 100644 --- a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp +++ b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp @@ -3014,6 +3014,67 @@ convertOmpLoopNest(Operation &opInst, llvm::IRBuilderBase &builder, return success(); } +/// Convert an omp.canonical_loop to LLVM-IR +static LogicalResult +convertOmpCanonicalLoopOp(omp::CanonicalLoopOp op, llvm::IRBuilderBase &builder, + LLVM::ModuleTranslation &moduleTranslation) { + llvm::OpenMPIRBuilder *ompBuilder = moduleTranslation.getOpenMPBuilder(); + + llvm::OpenMPIRBuilder::LocationDescription loopLoc(builder); + Value loopIV = op.getInductionVar(); + Value loopTC = op.getTripCount(); + + llvm::Value *llvmTC = moduleTranslation.lookupValue(loopTC); + + llvm::Expected llvmOrError = + ompBuilder->createCanonicalLoop( + loopLoc, + [&](llvm::OpenMPIRBuilder::InsertPointTy ip, llvm::Value *llvmIV) { + // Register the mapping of MLIR induction variable to LLVM-IR + // induction variable + moduleTranslation.mapValue(loopIV, llvmIV); + + builder.restoreIP(ip); + llvm::Expected bodyGenStatus = + convertOmpOpRegions(op.getRegion(), "omp.loop.region", builder, + moduleTranslation); + + return bodyGenStatus.takeError(); + }, + llvmTC, "omp.loop"); + if (!llvmOrError) + return op.emitError(llvm::toString(llvmOrError.takeError())); + + llvm::CanonicalLoopInfo *llvmCLI = *llvmOrError; + llvm::IRBuilderBase::InsertPoint afterIP = llvmCLI->getAfterIP(); + builder.restoreIP(afterIP); + + // Register the mapping of MLIR loop to LLVM-IR OpenMPIRBuilder loop + if (Value cli = op.getCli()) + moduleTranslation.mapOmpLoop(cli, llvmCLI); + + return success(); +} + +/// Apply a `#pragma omp unroll` / "!$omp unroll" transformation using the +/// OpenMPIRBuilder. +static LogicalResult +applyUnrollHeuristic(omp::UnrollHeuristicOp op, llvm::IRBuilderBase &builder, + LLVM::ModuleTranslation &moduleTranslation) { + llvm::OpenMPIRBuilder *ompBuilder = moduleTranslation.getOpenMPBuilder(); + + Value applyee = op.getApplyee(); + assert(applyee && "Loop to apply unrolling on required"); + + llvm::CanonicalLoopInfo *consBuilderCLI = + moduleTranslation.lookupOMPLoop(applyee); + llvm::OpenMPIRBuilder::LocationDescription loc(builder); + ompBuilder->unrollLoopHeuristic(loc.DL, consBuilderCLI); + + moduleTranslation.invalidateOmpLoop(applyee); + return success(); +} + /// Convert an Atomic Ordering attribute to llvm::AtomicOrdering. static llvm::AtomicOrdering convertAtomicOrdering(std::optional ao) { @@ -5866,6 +5927,23 @@ convertHostOrTargetOperation(Operation *op, llvm::IRBuilderBase &builder, // etc. and then discarded return success(); }) + .Case([&](omp::NewCliOp op) { + // Meta-operation: Doesn't do anything by itself, but used to + // identify a loop. + return success(); + }) + .Case([&](omp::CanonicalLoopOp op) { + return convertOmpCanonicalLoopOp(op, builder, moduleTranslation); + }) + .Case([&](omp::UnrollHeuristicOp op) { + // FIXME: Handling omp.unroll_heuristic as an executable requires + // that the generator (e.g. omp.canonical_loop) has been seen first. + // For construct that require all codegen to occur inside a callback + // (e.g. OpenMPIRBilder::createParallel), all codegen of that + // contained region including their transformations must occur at + // the omp.canonical_loop. + return applyUnrollHeuristic(op, builder, moduleTranslation); + }) .Default([&](Operation *inst) { return inst->emitError() << "not yet implemented: " << inst->getName(); diff --git a/mlir/test/Dialect/OpenMP/cli-canonical_loop-invalid.mlir b/mlir/test/Dialect/OpenMP/cli-canonical_loop-invalid.mlir new file mode 100644 index 0000000000000..345c53d2890b8 --- /dev/null +++ b/mlir/test/Dialect/OpenMP/cli-canonical_loop-invalid.mlir @@ -0,0 +1,50 @@ +// RUN: mlir-opt %s -verify-diagnostics + + +func.func @omp_canonloop_block_arg(%tc : i32) -> () { + // expected-error@below {{Canonical loop region must have exactly one argument}} + "omp.canonical_loop" (%tc) ({ + ^bb0(%iv: i32, %somearg: i32): + omp.terminator + }) : (i32) -> () + return +} + + +func.func @omp_canonloop_multiple_generators(%tc : i32) -> () { + // expected-error@below {{'omp.new_cli' op CLI must have at most one generator}} + %cli = omp.new_cli + // expected-note@below {{second generator here}} + omp.canonical_loop(%cli) %iv1 : i32 in range(%tc) { + omp.terminator + } + // expected-note@below {{first generator here}} + omp.canonical_loop(%cli) %iv2 : i32 in range(%tc) { + omp.terminator + } + return +} + + +func.func @omp_canonloop_multiple_consumers() -> () { + // expected-error@below {{'omp.new_cli' op CLI must have at most one consumer}} + %cli = omp.new_cli + %tc = llvm.mlir.constant(4 : i32) : i32 + omp.canonical_loop(%cli) %iv1 : i32 in range(%tc) { + omp.terminator + } + // expected-note@below {{second consumer here}} + omp.unroll_heuristic(%cli) + // expected-note@below {{first consumer here}} + omp.unroll_heuristic(%cli) + return +} + + +func.func @omp_canonloop_no_generator() -> () { + // expected-error@below {{'omp.new_cli' op CLI has no generator}} + %cli = omp.new_cli + // expected-note@below {{see consumer here}} + omp.unroll_heuristic(%cli) + return +} diff --git a/mlir/test/Dialect/OpenMP/cli-canonical_loop.mlir b/mlir/test/Dialect/OpenMP/cli-canonical_loop.mlir new file mode 100644 index 0000000000000..adadb8bbac49d --- /dev/null +++ b/mlir/test/Dialect/OpenMP/cli-canonical_loop.mlir @@ -0,0 +1,157 @@ +// RUN: mlir-opt %s | FileCheck %s +// RUN: mlir-opt %s | mlir-opt | FileCheck %s + + +// CHECK-LABEL: @omp_canonloop_raw( +// CHECK-SAME: %[[tc:.+]]: i32) +func.func @omp_canonloop_raw(%tc : i32) -> () { + // CHECK: omp.canonical_loop %iv : i32 in range(%[[tc]]) { + "omp.canonical_loop" (%tc) ({ + ^bb0(%iv: i32): + // CHECK-NEXT: = llvm.add %iv, %iv : i32 + %newval = llvm.add %iv, %iv : i32 + // CHECK-NEXT: omp.terminator + omp.terminator + // CHECK-NEXT: } + }) : (i32) -> () + // CHECK-NEXT: return + return +} + + +// CHECK-LABEL: @omp_canonloop_sequential_raw( +// CHECK-SAME: %[[tc:.+]]: i32) +func.func @omp_canonloop_sequential_raw(%tc : i32) -> () { + // CHECK-NEXT: %canonloop_s0 = omp.new_cli + %canonloop_s0 = "omp.new_cli" () : () -> (!omp.cli) + // CHECK-NEXT: omp.canonical_loop(%canonloop_s0) %iv : i32 in range(%[[tc]]) { + "omp.canonical_loop" (%tc, %canonloop_s0) ({ + ^bb_first(%iv_first: i32): + // CHECK-NEXT: = llvm.add %iv, %iv : i32 + %newval = llvm.add %iv_first, %iv_first : i32 + // CHECK-NEXT: omp.terminator + omp.terminator + // CHECK-NEXT: } + }) : (i32, !omp.cli) -> () + + // CHECK-NEXT: %canonloop_s1 = omp.new_cli + %canonloop_s1 = "omp.new_cli" () : () -> (!omp.cli) + // CHECK-NEXT: omp.canonical_loop(%canonloop_s1) %iv : i32 in range(%[[tc]]) { + "omp.canonical_loop" (%tc, %canonloop_s1) ({ + ^bb_second(%iv_second: i32): + // CHECK: omp.terminator + omp.terminator + // CHECK-NEXT: } + }) : (i32, !omp.cli) -> () + + // CHECK-NEXT: return + return +} + + +// CHECK-LABEL: @omp_nested_canonloop_raw( +// CHECK-SAME: %[[tc_outer:.+]]: i32, %[[tc_inner:.+]]: i32) +func.func @omp_nested_canonloop_raw(%tc_outer : i32, %tc_inner : i32) -> () { + // CHECK-NEXT: %canonloop_s0 = omp.new_cli + %outer = "omp.new_cli" () : () -> (!omp.cli) + // CHECK-NEXT: %canonloop_s0_s0 = omp.new_cli + %inner = "omp.new_cli" () : () -> (!omp.cli) + // CHECK-NEXT: omp.canonical_loop(%canonloop_s0) %iv : i32 in range(%[[tc_outer]]) { + "omp.canonical_loop" (%tc_outer, %outer) ({ + ^bb_outer(%iv_outer: i32): + // CHECK-NEXT: omp.canonical_loop(%canonloop_s0_s0) %iv_0 : i32 in range(%[[tc_inner]]) { + "omp.canonical_loop" (%tc_inner, %inner) ({ + ^bb_inner(%iv_inner: i32): + // CHECK-NEXT: = llvm.add %iv, %iv_0 : i32 + %newval = llvm.add %iv_outer, %iv_inner: i32 + // CHECK-NEXT: omp.terminator + omp.terminator + }) : (i32, !omp.cli) -> () + // CHECK: omp.terminator + omp.terminator + }) : (i32, !omp.cli) -> () + return +} + + +// CHECK-LABEL: @omp_canonloop_pretty( +// CHECK-SAME: %[[tc:.+]]: i32) +func.func @omp_canonloop_pretty(%tc : i32) -> () { + // CHECK-NEXT: omp.canonical_loop %iv : i32 in range(%[[tc]]) { + omp.canonical_loop %iv : i32 in range(%tc) { + // CHECK-NEXT: llvm.add %iv, %iv : i32 + %newval = llvm.add %iv, %iv: i32 + // CHECK-NEXT: omp.terminator + omp.terminator + } + return +} + + +// CHECK-LABEL: @omp_canonloop_constant_pretty() +func.func @omp_canonloop_constant_pretty() -> () { + // CHECK-NEXT: %[[tc:.+]] = llvm.mlir.constant(4 : i32) : i32 + %tc = llvm.mlir.constant(4 : i32) : i32 + // CHECK-NEXT: omp.canonical_loop %iv : i32 in range(%[[tc]]) { + omp.canonical_loop %iv : i32 in range(%tc) { + // CHECK-NEXT: llvm.add %iv, %iv : i32 + %newval = llvm.add %iv, %iv: i32 + // CHECK-NEXT: omp.terminator + omp.terminator + } + return +} + + +// CHECK-LABEL: @omp_canonloop_sequential_pretty( +// CHECK-SAME: %[[tc:.+]]: i32) +func.func @omp_canonloop_sequential_pretty(%tc : i32) -> () { + // CHECK-NEXT: %canonloop_s0 = omp.new_cli + %canonloop_s0 = omp.new_cli + // CHECK-NEXT: omp.canonical_loop(%canonloop_s0) %iv : i32 in range(%[[tc]]) { + omp.canonical_loop(%canonloop_s0) %iv : i32 in range(%tc) { + // CHECK-NEXT: omp.terminator + omp.terminator + } + + // CHECK: %canonloop_s1 = omp.new_cli + %canonloop_s1 = omp.new_cli + // CHECK-NEXT: omp.canonical_loop(%canonloop_s1) %iv : i32 in range(%[[tc]]) { + omp.canonical_loop(%canonloop_s1) %iv_0 : i32 in range(%tc) { + // CHECK-NEXT: omp.terminator + omp.terminator + } + + return +} + + +// CHECK-LABEL: @omp_canonloop_nested_pretty( +// CHECK-SAME: %[[tc:.+]]: i32) +func.func @omp_canonloop_nested_pretty(%tc : i32) -> () { + // CHECK-NEXT: %canonloop_s0 = omp.new_cli + %canonloop_s0 = omp.new_cli + // CHECK-NEXT: %canonloop_s0_s0 = omp.new_cli + %canonloop_s0_s0 = omp.new_cli + // CHECK-NEXT: omp.canonical_loop(%canonloop_s0) %iv : i32 in range(%[[tc]]) { + omp.canonical_loop(%canonloop_s0) %iv : i32 in range(%tc) { + // CHECK-NEXT: omp.canonical_loop(%canonloop_s0_s0) %iv_0 : i32 in range(%[[tc]]) { + omp.canonical_loop(%canonloop_s0_s0) %iv_0 : i32 in range(%tc) { + // CHECK: omp.terminator + omp.terminator + } + // CHECK: omp.terminator + omp.terminator + } + return +} + + +// CHECK-LABEL: @omp_newcli_unused( +// CHECK-SAME: ) +func.func @omp_newcli_unused() -> () { + // CHECK-NEXT: %cli = omp.new_cli + %cli = omp.new_cli + // CHECK-NEXT: return + return +} diff --git a/mlir/test/Dialect/OpenMP/cli-unroll-heuristic.mlir b/mlir/test/Dialect/OpenMP/cli-unroll-heuristic.mlir new file mode 100644 index 0000000000000..cda7d0b500166 --- /dev/null +++ b/mlir/test/Dialect/OpenMP/cli-unroll-heuristic.mlir @@ -0,0 +1,59 @@ +// RUN: mlir-opt %s | FileCheck %s +// RUN: mlir-opt %s | mlir-opt | FileCheck %s + + +// CHECK-LABEL: @omp_unroll_heuristic_raw( +// CHECK-SAME: %[[tc:.+]]: i32) { +func.func @omp_unroll_heuristic_raw(%tc : i32) -> () { + // CHECK-NEXT: %canonloop_s0 = omp.new_cli + %canonloop = "omp.new_cli" () : () -> (!omp.cli) + // CHECK-NEXT: omp.canonical_loop(%canonloop_s0) %iv : i32 in range(%[[tc]]) { + "omp.canonical_loop" (%tc, %canonloop) ({ + ^bb0(%iv: i32): + omp.terminator + }) : (i32, !omp.cli) -> () + // CHECK: omp.unroll_heuristic(%canonloop_s0) + "omp.unroll_heuristic" (%canonloop) : (!omp.cli) -> () + return +} + + +// CHECK-LABEL: @omp_unroll_heuristic_pretty( +// CHECK-SAME: %[[tc:.+]]: i32) { +func.func @omp_unroll_heuristic_pretty(%tc : i32) -> () { + // CHECK-NEXT: %[[CANONLOOP:.+]] = omp.new_cli + %canonloop = "omp.new_cli" () : () -> (!omp.cli) + // CHECK-NEXT: omp.canonical_loop(%canonloop_s0) %iv : i32 in range(%[[tc]]) { + omp.canonical_loop(%canonloop) %iv : i32 in range(%tc) { + omp.terminator + } + // CHECK: omp.unroll_heuristic(%canonloop_s0) + omp.unroll_heuristic(%canonloop) + return +} + + +// CHECK-LABEL: @omp_unroll_heuristic_nested_pretty( +// CHECK-SAME: %[[tc:.+]]: i32) { +func.func @omp_unroll_heuristic_nested_pretty(%tc : i32) -> () { + // CHECK-NEXT: %canonloop_s0 = omp.new_cli + %cli_outer = omp.new_cli + // CHECK-NEXT: %canonloop_s0_s0 = omp.new_cli + %cli_inner = omp.new_cli + // CHECK-NEXT: omp.canonical_loop(%canonloop_s0) %iv : i32 in range(%[[tc]]) { + omp.canonical_loop(%cli_outer) %iv_outer : i32 in range(%tc) { + // CHECK-NEXT: omp.canonical_loop(%canonloop_s0_s0) %iv_0 : i32 in range(%[[tc]]) { + omp.canonical_loop(%cli_inner) %iv_inner : i32 in range(%tc) { + // CHECK: omp.terminator + omp.terminator + } + // CHECK: omp.terminator + omp.terminator + } + + // CHECK: omp.unroll_heuristic(%canonloop_s0) + omp.unroll_heuristic(%cli_outer) + // CHECK-NEXT: omp.unroll_heuristic(%canonloop_s0_s0) + omp.unroll_heuristic(%cli_inner) + return +} diff --git a/mlir/test/Target/LLVMIR/openmp-cli-canonical_loop.mlir b/mlir/test/Target/LLVMIR/openmp-cli-canonical_loop.mlir new file mode 100644 index 0000000000000..9abef003d6183 --- /dev/null +++ b/mlir/test/Target/LLVMIR/openmp-cli-canonical_loop.mlir @@ -0,0 +1,175 @@ +// Test lowering of standalone omp.canonical_loop +// RUN: mlir-translate -mlir-to-llvmir %s | FileCheck %s + +// CHECK-LABEL: define void @anon_loop( +// CHECK-SAME: ptr %[[ptr:.+]], +// CHECK-SAME: i32 %[[tc:.+]]) { +// CHECK-NEXT: br label %omp_omp.loop.preheader +// CHECK-EMPTY: +// CHECK-NEXT: omp_omp.loop.preheader: +// CHECK-NEXT: br label %omp_omp.loop.header +// CHECK-EMPTY: +// CHECK-NEXT: omp_omp.loop.header: +// CHECK-NEXT: %omp_omp.loop.iv = phi i32 [ 0, %omp_omp.loop.preheader ], [ %omp_omp.loop.next, %omp_omp.loop.inc ] +// CHECK-NEXT: br label %omp_omp.loop.cond +// CHECK-EMPTY: +// CHECK-NEXT: omp_omp.loop.cond: +// CHECK-NEXT: %omp_omp.loop.cmp = icmp ult i32 %omp_omp.loop.iv, %[[tc]] +// CHECK-NEXT: br i1 %omp_omp.loop.cmp, label %omp_omp.loop.body, label %omp_omp.loop.exit +// CHECK-EMPTY: +// CHECK-NEXT: omp_omp.loop.body: +// CHECK-NEXT: br label %omp.loop.region +// CHECK-EMPTY: +// CHECK-NEXT: omp.loop.region: +// CHECK-NEXT: store float 4.200000e+01, ptr %[[ptr]], align 4 +// CHECK-NEXT: br label %omp.region.cont +// CHECK-EMPTY: +// CHECK-NEXT: omp.region.cont: +// CHECK-NEXT: br label %omp_omp.loop.inc +// CHECK-EMPTY: +// CHECK-NEXT: omp_omp.loop.inc: +// CHECK-NEXT: %omp_omp.loop.next = add nuw i32 %omp_omp.loop.iv, 1 +// CHECK-NEXT: br label %omp_omp.loop.header +// CHECK-EMPTY: +// CHECK-NEXT: omp_omp.loop.exit: +// CHECK-NEXT: br label %omp_omp.loop.after +// CHECK-EMPTY: +// CHECK-NEXT: omp_omp.loop.after: +// CHECK-NEXT: ret void +// CHECK-NEXT: } +llvm.func @anon_loop(%ptr: !llvm.ptr, %tc : i32) -> () { + omp.canonical_loop %iv : i32 in range(%tc) { + %val = llvm.mlir.constant(42.0 : f32) : f32 + llvm.store %val, %ptr : f32, !llvm.ptr + omp.terminator + } + llvm.return +} + + + +// CHECK-LABEL: define void @trivial_loop( +// CHECK-SAME: ptr %[[ptr:.+]], +// CHECK-SAME: i32 %[[tc:.+]]) { +// CHECK-NEXT: br label %omp_omp.loop.preheader +// CHECK-EMPTY: +// CHECK-NEXT: omp_omp.loop.preheader: +// CHECK-NEXT: br label %omp_omp.loop.header +// CHECK-EMPTY: +// CHECK-NEXT: omp_omp.loop.header: +// CHECK-NEXT: %omp_omp.loop.iv = phi i32 [ 0, %omp_omp.loop.preheader ], [ %omp_omp.loop.next, %omp_omp.loop.inc ] +// CHECK-NEXT: br label %omp_omp.loop.cond +// CHECK-EMPTY: +// CHECK-NEXT: omp_omp.loop.cond: +// CHECK-NEXT: %omp_omp.loop.cmp = icmp ult i32 %omp_omp.loop.iv, %[[tc]] +// CHECK-NEXT: br i1 %omp_omp.loop.cmp, label %omp_omp.loop.body, label %omp_omp.loop.exit +// CHECK-EMPTY: +// CHECK-NEXT: omp_omp.loop.body: +// CHECK-NEXT: br label %omp.loop.region +// CHECK-EMPTY: +// CHECK-NEXT: omp.loop.region: +// CHECK-NEXT: store float 4.200000e+01, ptr %[[ptr]], align 4 +// CHECK-NEXT: br label %omp.region.cont +// CHECK-EMPTY: +// CHECK-NEXT: omp.region.cont: +// CHECK-NEXT: br label %omp_omp.loop.inc +// CHECK-EMPTY: +// CHECK-NEXT: omp_omp.loop.inc: +// CHECK-NEXT: %omp_omp.loop.next = add nuw i32 %omp_omp.loop.iv, 1 +// CHECK-NEXT: br label %omp_omp.loop.header +// CHECK-EMPTY: +// CHECK-NEXT: omp_omp.loop.exit: +// CHECK-NEXT: br label %omp_omp.loop.after +// CHECK-EMPTY: +// CHECK-NEXT: omp_omp.loop.after: +// CHECK-NEXT: ret void +// CHECK-NEXT: } +llvm.func @trivial_loop(%ptr: !llvm.ptr, %tc : i32) -> () { + %cli = omp.new_cli + omp.canonical_loop(%cli) %iv : i32 in range(%tc) { + %val = llvm.mlir.constant(42.0 : f32) : f32 + llvm.store %val, %ptr : f32, !llvm.ptr + omp.terminator + } + llvm.return +} + + +// CHECK-LABEL: define void @nested_loop( +// CHECK-SAME: ptr %[[ptr:.+]], i32 %[[outer_tc:.+]], i32 %[[inner_tc:.+]]) { +// CHECK-NEXT: br label %omp_omp.loop.preheader +// CHECK-EMPTY: +// CHECK-NEXT: omp_omp.loop.preheader: +// CHECK-NEXT: br label %omp_omp.loop.header +// CHECK-EMPTY: +// CHECK-NEXT: omp_omp.loop.header: +// CHECK-NEXT: %omp_omp.loop.iv = phi i32 [ 0, %omp_omp.loop.preheader ], [ %omp_omp.loop.next, %omp_omp.loop.inc ] +// CHECK-NEXT: br label %omp_omp.loop.cond +// CHECK-EMPTY: +// CHECK-NEXT: omp_omp.loop.cond: +// CHECK-NEXT: %omp_omp.loop.cmp = icmp ult i32 %omp_omp.loop.iv, %[[outer_tc]] +// CHECK-NEXT: br i1 %omp_omp.loop.cmp, label %omp_omp.loop.body, label %omp_omp.loop.exit +// CHECK-EMPTY: +// CHECK-NEXT: omp_omp.loop.body: +// CHECK-NEXT: br label %omp.loop.region +// CHECK-EMPTY: +// CHECK-NEXT: omp.loop.region: +// CHECK-NEXT: br label %omp_omp.loop.preheader1 +// CHECK-EMPTY: +// CHECK-NEXT: omp_omp.loop.preheader1: +// CHECK-NEXT: br label %omp_omp.loop.header2 +// CHECK-EMPTY: +// CHECK-NEXT: omp_omp.loop.header2: +// CHECK-NEXT: %omp_omp.loop.iv8 = phi i32 [ 0, %omp_omp.loop.preheader1 ], [ %omp_omp.loop.next10, %omp_omp.loop.inc5 ] +// CHECK-NEXT: br label %omp_omp.loop.cond3 +// CHECK-EMPTY: +// CHECK-NEXT: omp_omp.loop.cond3: +// CHECK-NEXT: %omp_omp.loop.cmp9 = icmp ult i32 %omp_omp.loop.iv8, %[[inner_tc]] +// CHECK-NEXT: br i1 %omp_omp.loop.cmp9, label %omp_omp.loop.body4, label %omp_omp.loop.exit6 +// CHECK-EMPTY: +// CHECK-NEXT: omp_omp.loop.body4: +// CHECK-NEXT: br label %omp.loop.region12 +// CHECK-EMPTY: +// CHECK-NEXT: omp.loop.region12: +// CHECK-NEXT: store float 4.200000e+01, ptr %[[ptr]], align 4 +// CHECK-NEXT: br label %omp.region.cont11 +// CHECK-EMPTY: +// CHECK-NEXT: omp.region.cont11: +// CHECK-NEXT: br label %omp_omp.loop.inc5 +// CHECK-EMPTY: +// CHECK-NEXT: omp_omp.loop.inc5: +// CHECK-NEXT: %omp_omp.loop.next10 = add nuw i32 %omp_omp.loop.iv8, 1 +// CHECK-NEXT: br label %omp_omp.loop.header2 +// CHECK-EMPTY: +// CHECK-NEXT: omp_omp.loop.exit6: +// CHECK-NEXT: br label %omp_omp.loop.after7 +// CHECK-EMPTY: +// CHECK-NEXT: omp_omp.loop.after7: +// CHECK-NEXT: br label %omp.region.cont +// CHECK-EMPTY: +// CHECK-NEXT: omp.region.cont: +// CHECK-NEXT: br label %omp_omp.loop.inc +// CHECK-EMPTY: +// CHECK-NEXT: omp_omp.loop.inc: +// CHECK-NEXT: %omp_omp.loop.next = add nuw i32 %omp_omp.loop.iv, 1 +// CHECK-NEXT: br label %omp_omp.loop.header +// CHECK-EMPTY: +// CHECK-NEXT: omp_omp.loop.exit: +// CHECK-NEXT: br label %omp_omp.loop.after +// CHECK-EMPTY: +// CHECK-NEXT: omp_omp.loop.after: +// CHECK-NEXT: ret void +// CHECK-NEXT: } +llvm.func @nested_loop(%ptr: !llvm.ptr, %outer_tc : i32, %inner_tc : i32) -> () { + %outer_cli = omp.new_cli + %inner_cli = omp.new_cli + omp.canonical_loop(%outer_cli) %outer_iv : i32 in range(%outer_tc) { + omp.canonical_loop(%inner_cli) %inner_iv : i32 in range(%inner_tc) { + %val = llvm.mlir.constant(42.0 : f32) : f32 + llvm.store %val, %ptr : f32, !llvm.ptr + omp.terminator + } + omp.terminator + } + llvm.return +} diff --git a/mlir/test/Target/LLVMIR/openmp-cli-unroll-heuristic01.mlir b/mlir/test/Target/LLVMIR/openmp-cli-unroll-heuristic01.mlir new file mode 100644 index 0000000000000..0f0448e15f983 --- /dev/null +++ b/mlir/test/Target/LLVMIR/openmp-cli-unroll-heuristic01.mlir @@ -0,0 +1,56 @@ +// Test lowering of the omp.unroll_heuristic +// RUN: mlir-translate -mlir-to-llvmir %s | FileCheck %s + + +// CHECK-LABEL: define void @unroll_heuristic_trivial_loop( +// CHECK-SAME: ptr %[[ptr:.+]], i32 %[[tc:.+]]) { +// CHECK-NEXT: br label %omp_omp.loop.preheader +// CHECK-EMPTY: +// CHECK-NEXT: omp_omp.loop.preheader: +// CHECK-NEXT: br label %omp_omp.loop.header +// CHECK-EMPTY: +// CHECK-NEXT: omp_omp.loop.header: +// CHECK-NEXT: %omp_omp.loop.iv = phi i32 [ 0, %omp_omp.loop.preheader ], [ %omp_omp.loop.next, %omp_omp.loop.inc ] +// CHECK-NEXT: br label %omp_omp.loop.cond +// CHECK-EMPTY: +// CHECK-NEXT: omp_omp.loop.cond: +// CHECK-NEXT: %omp_omp.loop.cmp = icmp ult i32 %omp_omp.loop.iv, %[[tc]] +// CHECK-NEXT: br i1 %omp_omp.loop.cmp, label %omp_omp.loop.body, label %omp_omp.loop.exit +// CHECK-EMPTY: +// CHECK-NEXT: omp_omp.loop.body: +// CHECK-NEXT: br label %omp.loop.region +// CHECK-EMPTY: +// CHECK-NEXT: omp.loop.region: +// CHECK-NEXT: store float 4.200000e+01, ptr %[[ptr]], align 4 +// CHECK-NEXT: br label %omp.region.cont +// CHECK-EMPTY: +// CHECK-NEXT: omp.region.cont: +// CHECK-NEXT: br label %omp_omp.loop.inc +// CHECK-EMPTY: +// CHECK-NEXT: omp_omp.loop.inc: +// CHECK-NEXT: %omp_omp.loop.next = add nuw i32 %omp_omp.loop.iv, 1 +// CHECK-NEXT: br label %omp_omp.loop.header, !llvm.loop ![[$MD1:[0-9]+]] +// CHECK-EMPTY: +// CHECK-NEXT: omp_omp.loop.exit: +// CHECK-NEXT: br label %omp_omp.loop.after +// CHECK-EMPTY: +// CHECK-NEXT: omp_omp.loop.after: +// CHECK-NEXT: ret void +// CHECK-NEXT: } +llvm.func @unroll_heuristic_trivial_loop(%ptr: !llvm.ptr, %tc: i32) -> () { + %literal_cli = omp.new_cli + omp.canonical_loop(%literal_cli) %iv : i32 in range(%tc) { + %val = llvm.mlir.constant(42.0 : f32) : f32 + llvm.store %val, %ptr : f32, !llvm.ptr + omp.terminator + } + omp.unroll_heuristic(%literal_cli) + llvm.return +} + + +// Start of metadata +// CHECK-LABEL: !llvm.module.flags + +// CHECK: ![[$MD1]] = distinct !{![[$MD1]], ![[$MD2:[0-9]+]]} +// CHECK: ![[$MD2]] = !{!"llvm.loop.unroll.enable"} diff --git a/mlir/test/Target/LLVMIR/openmp-cli-unroll-heuristic02.mlir b/mlir/test/Target/LLVMIR/openmp-cli-unroll-heuristic02.mlir new file mode 100644 index 0000000000000..f82b4990e378e --- /dev/null +++ b/mlir/test/Target/LLVMIR/openmp-cli-unroll-heuristic02.mlir @@ -0,0 +1,93 @@ +// Test lowering of the omp.unroll_heuristic +// RUN: mlir-translate -mlir-to-llvmir %s | FileCheck %s + + +// CHECK-LABEL: define void @unroll_heuristic_nested_loop( +// CHECK-SAME: ptr %[[ptr:.+]], i32 %[[outer_tc:.+]], i32 %[[inner_tc:.+]]) { +// CHECK-NEXT: br label %omp_omp.loop.preheader +// CHECK-EMPTY: +// CHECK-NEXT: omp_omp.loop.preheader: +// CHECK-NEXT: br label %omp_omp.loop.header +// CHECK-EMPTY: +// CHECK-NEXT: omp_omp.loop.header: +// CHECK-NEXT: %omp_omp.loop.iv = phi i32 [ 0, %omp_omp.loop.preheader ], [ %omp_omp.loop.next, %omp_omp.loop.inc ] +// CHECK-NEXT: br label %omp_omp.loop.cond +// CHECK-EMPTY: +// CHECK-NEXT: omp_omp.loop.cond: +// CHECK-NEXT: %omp_omp.loop.cmp = icmp ult i32 %omp_omp.loop.iv, %[[outer_tc]] +// CHECK-NEXT: br i1 %omp_omp.loop.cmp, label %omp_omp.loop.body, label %omp_omp.loop.exit +// CHECK-EMPTY: +// CHECK-NEXT: omp_omp.loop.body: +// CHECK-NEXT: br label %omp.loop.region +// CHECK-EMPTY: +// CHECK-NEXT: omp.loop.region: +// CHECK-NEXT: br label %omp_omp.loop.preheader1 +// CHECK-EMPTY: +// CHECK-NEXT: omp_omp.loop.preheader1: +// CHECK-NEXT: br label %omp_omp.loop.header2 +// CHECK-EMPTY: +// CHECK-NEXT: omp_omp.loop.header2: +// CHECK-NEXT: %omp_omp.loop.iv8 = phi i32 [ 0, %omp_omp.loop.preheader1 ], [ %omp_omp.loop.next10, %omp_omp.loop.inc5 ] +// CHECK-NEXT: br label %omp_omp.loop.cond3 +// CHECK-EMPTY: +// CHECK-NEXT: omp_omp.loop.cond3: +// CHECK-NEXT: %omp_omp.loop.cmp9 = icmp ult i32 %omp_omp.loop.iv8, %[[inner_tc]] +// CHECK-NEXT: br i1 %omp_omp.loop.cmp9, label %omp_omp.loop.body4, label %omp_omp.loop.exit6 +// CHECK-EMPTY: +// CHECK-NEXT: omp_omp.loop.body4: +// CHECK-NEXT: br label %omp.loop.region12 +// CHECK-EMPTY: +// CHECK-NEXT: omp.loop.region12: +// CHECK-NEXT: store float 4.200000e+01, ptr %[[ptr]], align 4 +// CHECK-NEXT: br label %omp.region.cont11 +// CHECK-EMPTY: +// CHECK-NEXT: omp.region.cont11: +// CHECK-NEXT: br label %omp_omp.loop.inc5 +// CHECK-EMPTY: +// CHECK-NEXT: omp_omp.loop.inc5: +// CHECK-NEXT: %omp_omp.loop.next10 = add nuw i32 %omp_omp.loop.iv8, 1 +// CHECK-NEXT: br label %omp_omp.loop.header2, !llvm.loop ![[$MD1:[0-9]+]] +// CHECK-EMPTY: +// CHECK-NEXT: omp_omp.loop.exit6: +// CHECK-NEXT: br label %omp_omp.loop.after7 +// CHECK-EMPTY: +// CHECK-NEXT: omp_omp.loop.after7: +// CHECK-NEXT: br label %omp.region.cont +// CHECK-EMPTY: +// CHECK-NEXT: omp.region.cont: +// CHECK-NEXT: br label %omp_omp.loop.inc +// CHECK-EMPTY: +// CHECK-NEXT: omp_omp.loop.inc: +// CHECK-NEXT: %omp_omp.loop.next = add nuw i32 %omp_omp.loop.iv, 1 +// CHECK-NEXT: br label %omp_omp.loop.header, !llvm.loop ![[$MD3:[0-9]+]] +// CHECK-EMPTY: +// CHECK-NEXT: omp_omp.loop.exit: +// CHECK-NEXT: br label %omp_omp.loop.after +// CHECK-EMPTY: +// CHECK-NEXT: omp_omp.loop.after: +// CHECK-NEXT: ret void +// CHECK-NEXT: } +llvm.func @unroll_heuristic_nested_loop(%ptr: !llvm.ptr, %outer_tc: i32, %inner_tc: i32) -> () { + %outer_cli = omp.new_cli + %inner_cli = omp.new_cli + omp.canonical_loop(%outer_cli) %outer_iv : i32 in range(%outer_tc) { + omp.canonical_loop(%inner_cli) %inner_iv : i32 in range(%inner_tc) { + %val = llvm.mlir.constant(42.0 : f32) : f32 + llvm.store %val, %ptr : f32, !llvm.ptr + omp.terminator + } + omp.terminator + } + omp.unroll_heuristic(%outer_cli) + omp.unroll_heuristic(%inner_cli) + llvm.return +} + + +// Start of metadata +// CHECK-LABEL: !llvm.module.flags + +// CHECK: ![[$MD1]] = distinct !{![[$MD1]], ![[$MD2:[0-9]+]]} +// CHECK: ![[$MD2]] = !{!"llvm.loop.unroll.enable"} +// CHECK: ![[$MD3]] = distinct !{![[$MD3]], ![[$MD2]]} + diff --git a/mlir/tools/mlir-tblgen/OpDefinitionsGen.cpp b/mlir/tools/mlir-tblgen/OpDefinitionsGen.cpp index 6008ed4673d1b..cbb4030f3adb4 100644 --- a/mlir/tools/mlir-tblgen/OpDefinitionsGen.cpp +++ b/mlir/tools/mlir-tblgen/OpDefinitionsGen.cpp @@ -2223,6 +2223,17 @@ generateNamedOperandGetters(const Operator &op, Class &opClass, "'SameVariadicOperandSize' traits"); } + // Print the ods names so they don't need to be hardcoded in the source. + for (int i = 0; i != numOperands; ++i) { + const auto &operand = op.getOperand(i); + if (operand.name.empty()) + continue; + + opClass.declare("static constexpr int", Twine("odsIndex_") + + operand.name + " = " + + Twine(i)); + } + // First emit a few "sink" getter methods upon which we layer all nicer named // getter methods. // If generating for an adaptor, the method is put into the non-templated