diff --git a/mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td b/mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td index 99ac5cfb7b9e9..e7b01b0570567 100644 --- a/mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td +++ b/mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td @@ -405,6 +405,170 @@ def SingleOp : OpenMP_Op<"single", [AttrSizedOperandSegments]> { let hasVerifier = 1; } +//===---------------------------------------------------------------------===// +// OpenMP Canonical Loop Info Type +//===---------------------------------------------------------------------===// + +def CanonicalLoopInfoType : OpenMP_Type<"CanonicalLoopInfo", "cli"> { + let summary = "Type for representing a reference to a canonical loop"; + let description = [{ + A variable of type CanonicalLoopInfo refers to an OpenMP-compatible + canonical loop in the same function. Values of this type are not + available at runtime and therefore cannot be used by the program itself, + i.e. an opaque type. It is similar to the transform dialect's + `!transform.interface` type, but instead of implementing an interface + for each transformation, the OpenMP dialect itself defines possible + operations on this type. + + A CanonicalLoopInfo value can be + + 1. passed to omp.canonical_loop to associate the loop to that value + 2. passed to omp operations that take a CanonicalLoopInfo argument, + such as `omp.unroll`. + + A CanonicalLoopInfo value can not + + 1. be returned from a function, + 2. passed to operations that are not specifically designed to take a + CanonicalLoopInfo, including AnyType. + + A CanonicalLoopInfo value directly corresponds to an object of + OpenMPIRBuilder's CanonicalLoopInfo struct when lowering to LLVM-IR. + }]; +} + +//===---------------------------------------------------------------------===// +// OpenMP Canonical Loop Info Operation +//===---------------------------------------------------------------------===// + +def NewCliOp : OpenMP_Op<"new_cli"> { + let summary = "Create a new Canonical Loop Info value."; + let description = [{ + Create a new CLI that can be passed as an argument to a CanonicalLoopOp + and to loop transformation operations to handle dependencies between + loop transformation operations. + }]; + let results = (outs CanonicalLoopInfoType:$result); + let assemblyFormat = [{ + attr-dict `:` type($result) + }]; +} + + +//===---------------------------------------------------------------------===// +// OpenMP Canonical Loop Operation +//===---------------------------------------------------------------------===// +def CanonicalLoopOp : OpenMP_Op<"canonical_loop", []> { + let summary = "OpenMP Canonical Loop Operation"; + let description = [{ + All loops that conform to OpenMP's definition of a canonical loop can be + simplified to a CanonicalLoopOp. In particular, there are no loop-carried + variables and the number of iterations it will execute is know before the + operation. This allows e.g. to determine the number of threads and chunks + the iterations space is split into before executing any iteration. More + restrictions may apply in cases such as (collapsed) loop nests, doacross + loops, etc. + + The induction variable is always of the same type as the tripcount argument. + Since it can never be negative, tripcount is always interpreted as an + unsigned integer. It is the caller's responsbility to ensure the tripcount + is not negative when its interpretation is signed, i.e. + `%tripcount = max(0,%tripcount)`. + + In contrast to other loop operations such as `scf.for`, the number of + iterations is determined by only a single variable, the trip-count. The + induction variable value is the logical iteration number of that iteration, + which OpenMP defines to be between 0 and the trip-count (exclusive). + Loop representation having lower-bound, upper-bound, and step-size operands, + require passes to do more work than necessary, including handling special + cases such as upper-bound smaller than lower-bound, upper-bound equal to + the integer type's maximal value, negative step size, etc. This complexity + is better only handled once by the front-end and can apply its semantics + for such cases while still being able to represent any kind of loop, which + kind of the point of a mid-end intermediate representation. User-defined + types such as random-access iterators in C++ could not directly be + represented anyway. + + An optional argument to a omp.canonical_loop that can be passed in + is a CanonicalLoopInfo value that can be used to refer to the canonical + loop to apply transformations -- such as tiling, unrolling, or + work-sharing -- to the loop, similar to the transform dialect but + with OpenMP-specific semantics. + + A CanonicalLoopOp can be lowered to LLVM-IR using OpenMPIRBuilder's + createCanonicalLoop method. + + #### Examples + + Translation from lower-bound, upper-bount, step-size to trip-count. + ```c + for (int i = 3; i < 42; i+=2) { + B[i] = A[i]; + } + ``` + + ```mlir + %lb = arith.constant 3 : i32 + %ub = arith.constant 42 : i32 + %step = arith.constant 2 : i32 + %range = arith.sub %ub, %lb : i32 + %tc = arith.div %range, %step : i32 + omp.canonical_loop %iv : i32 in [0, %tc) { + %offset = arith.mul %iv, %step : i32 + %i = arith.add %offset, %lb : i32 + %a = load %arrA[%i] : memref + store %a, %arrB[%i] : memref + } + ``` + + Nested canonical loop with transformation. + ```mlir + %outer = omp.new_cli : !omp.cli + %inner = omp.new_cli : !omp.cli + omp.canonical_loop %iv1 : i32 in [0, %tripcount), %outer : !omp.cli{ + omp.canonical_loop %iv2 : i32 in [0, %tc), %inner : !omp.cli { + %a = load %arrA[%iv1, %iv2] : memref + store %a, %arrB[%iv1, %iv2] : memref + } + } + omp.tile(%outer, %inner : !omp.cli, !omp.cli) + ``` + + Nested canonical loop with other constructs. The `omp.distribute` + operation has not been added yet, so this is suggested use with other + constructs. + ```mlir + omp.target { + omp.teams { + omp.distribute { + %outer = omp.new_cli : !omp.cli + %inner = omp.new_cli : !omp.cli + omp.canonical_loop %iv1 : i32 in [0, %tripcount), %outer : !omp.cli { + omp.canonical_loop %iv2 : i32 in [0, %tc), %inner : !omp.cli { + %a = load %arrA[%iv1, %iv2] : memref + store %a, %arrB[%iv1, %iv2] : memref + } + } + omp.collapse(%outer, %inner) + } + } + } + ``` + + }]; + let hasCustomAssemblyFormat = 1; + let hasVerifier = 1; + + let arguments = (ins IntLikeType:$tripCount, + Optional:$cli); + + let regions = (region AnyRegion:$region); + + let extraClassDeclaration = [{ + ::mlir::Value getInductionVar(); + }]; +} + //===----------------------------------------------------------------------===// // 2.9.2 Workshare Loop Construct //===----------------------------------------------------------------------===// @@ -619,7 +783,7 @@ def SimdLoopOp : OpenMP_Op<"simdloop", [AttrSizedOperandSegments, def YieldOp : OpenMP_Op<"yield", [Pure, ReturnLike, Terminator, ParentOneOf<["WsLoopOp", "ReductionDeclareOp", - "AtomicUpdateOp", "SimdLoopOp"]>]> { + "AtomicUpdateOp", "SimdLoopOp", "CanonicalLoopOp"]>]> { let summary = "loop yield and termination operation"; let description = [{ "omp.yield" yields SSA values from the OpenMP dialect op region and diff --git a/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp b/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp index 480af0e1307c1..980b13cf373bf 100644 --- a/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp +++ b/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp @@ -1551,6 +1551,87 @@ LogicalResult DataBoundsOp::verify() { return success(); } +//===----------------------------------------------------------------------===// +// CanonicaLoopOp +//===----------------------------------------------------------------------===// + +Value mlir::omp::CanonicalLoopOp::getInductionVar() { + return getRegion().getArgument(0); +} + +void mlir::omp::CanonicalLoopOp::print(OpAsmPrinter &p) { + p << " " << getInductionVar() << " : " << getInductionVar().getType() + << " in [0, " << getTripCount() << ")"; + if (getCli()) { + p << ", " << getCli() << " : " << getCli().getType(); + } + p << " "; + + // omp.yield is implicit if no arguments passed to it. + p.printRegion(getRegion(), /*printEntryBlockArgs=*/false, + /*printBlockTerminators=*/true); + + p.printOptionalAttrDict((*this)->getAttrs()); +} + +mlir::ParseResult +mlir::omp::CanonicalLoopOp::parse(::mlir::OpAsmParser &parser, + ::mlir::OperationState &result) { + Builder &builder = parser.getBuilder(); + + // We derive the type of tripCount from inductionVariable. Unfortunately we + // cannot do the other way around because MLIR requires the type of tripCount + // to be known when calling resolveOperand. + OpAsmParser::Argument inductionVariable; + if (parser.parseArgument(inductionVariable, /*allowType*/ true) || + parser.parseKeyword("in") || parser.parseLSquare()) + return failure(); + + int zero = -1; + SMLoc zeroLoc = parser.getCurrentLocation(); + if (parser.parseInteger(zero)) + return failure(); + if (zero != 0) { + parser.emitError(zeroLoc, "Logical iteration space starts with zero"); + return failure(); + } + + OpAsmParser::UnresolvedOperand tripcount; + if (parser.parseComma() || parser.parseOperand(tripcount) || + parser.parseRParen() || + parser.resolveOperand(tripcount, inductionVariable.type, result.operands)) + return failure(); + + OpAsmParser::UnresolvedOperand cli; + Type type; + if (succeeded(parser.parseOptionalComma())) + if (parser.parseOperand(cli) || parser.parseColonType(type) || + parser.resolveOperand(cli, type, result.operands)) + return failure(); + + // Parse the loop body. + Region *region = result.addRegion(); + if (parser.parseRegion(*region, {inductionVariable})) + return failure(); + + // Parse the optional attribute list. + if (parser.parseOptionalAttrDict(result.attributes)) + return failure(); + + return mlir::success(); +} + +LogicalResult CanonicalLoopOp::verify() { + Value indVar = getInductionVar(); + Value tripCount = getTripCount(); + + if (indVar.getType() != tripCount.getType()) + return emitOpError( + "Region argument must be the same type as the trip count"); + + return success(); +} + #define GET_ATTRDEF_CLASSES #include "mlir/Dialect/OpenMP/OpenMPOpsAttributes.cpp.inc" diff --git a/mlir/test/Dialect/OpenMP/cli.mlir b/mlir/test/Dialect/OpenMP/cli.mlir new file mode 100644 index 0000000000000..48ab6756c2aea --- /dev/null +++ b/mlir/test/Dialect/OpenMP/cli.mlir @@ -0,0 +1,60 @@ +// RUN: mlir-opt %s | mlir-opt | FileCheck %s + +// CHECK-LABEL: @omp_canonloop_raw +// CHECK-SAME: (%[[tc:.*]]: i32) +func.func @omp_canonloop_raw(%tc : i32) -> () { + // CHECK: omp.canonical_loop %{{.*}} : i32 in [0, %[[tc]]) { + "omp.canonical_loop" (%tc) ({ + ^bb0(%iv: i32): + omp.yield + }) : (i32) -> () + return +} + +// CHECK-LABEL: @omp_nested_canonloop_raw +// CHECK-SAME: (%[[tc_outer:.*]]: i32, %[[tc_inner:.*]]: i32) +func.func @omp_nested_canonloop_raw(%tc_outer : i32, %tc_inner : i32) -> () { + // CHECK: %[[outer_cli:.*]] = omp.new_cli : !omp.cli + %outer = "omp.new_cli" () : () -> (!omp.cli) + // CHECK: %[[inner_cli:.*]] = omp.new_cli : !omp.cli + %inner = "omp.new_cli" () : () -> (!omp.cli) + // CHECK: omp.canonical_loop %{{.*}} : i32 in [0, %[[tc_outer]]), %[[outer_cli]] : !omp.cli { + "omp.canonical_loop" (%tc_outer, %outer) ({ + ^bb_outer(%iv_outer: i32): + // CHECK: omp.canonical_loop %{{.*}} : i32 in [0, %[[tc_inner]]), %[[inner_cli]] : !omp.cli { + "omp.canonical_loop" (%tc_inner, %inner) ({ + ^bb_inner(%iv_inner: i32): + omp.yield + }) : (i32, !omp.cli) -> () + omp.yield + }) : (i32, !omp.cli) -> () + return +} + +// CHECK-LABEL: @omp_canonloop_pretty +// CHECK-SAME: (%[[tc:.*]]: i32) +func.func @omp_canonloop_pretty(%tc : i32) -> () { + // CHECK: omp.canonical_loop %[[iv:.*]] : i32 in [0, %[[tc]]) { + omp.canonical_loop %iv : i32 in [0, %tc) { + // CHECK-NEXT: %{{.*}} = llvm.add %[[iv]], %[[iv]] : i32 + %newval = llvm.add %iv, %iv: i32 + omp.yield + } + return +} + +// CHECK-LABEL: @omp_canonloop_nested_pretty +func.func @omp_canonloop_nested_pretty(%tc : i32) -> () { + // CHECK: %[[cli:.*]] = omp.new_cli : !omp.cli + %cli = omp.new_cli : !omp.cli + // CHECK: omp.canonical_loop %{{.*}} : i32 in [0, %{{.*}}), %[[cli]] : !omp.cli { + omp.canonical_loop %iv1 : i32 in [0, %tc), %cli : !omp.cli { + // CHECK: omp.canonical_loop %{{.*}} : i32 in [0, %{{.*}}) { + omp.canonical_loop %iv2 : i32 in [0, %tc) { + omp.yield + } + omp.yield + } + return +} +