diff --git a/mlir/include/mlir/Dialect/SCF/IR/SCFOps.td b/mlir/include/mlir/Dialect/SCF/IR/SCFOps.td index 0b063aa772bab..0e23257456223 100644 --- a/mlir/include/mlir/Dialect/SCF/IR/SCFOps.td +++ b/mlir/include/mlir/Dialect/SCF/IR/SCFOps.td @@ -140,6 +140,7 @@ def ForOp : SCF_Op<"for", "getSingleUpperBound", "getYieldedValuesMutable", "promoteIfSingleIteration", "replaceWithAdditionalYields", "yieldTiledValuesAndReplace"]>, + LoopLikeWithInductionVarsOpInterface, AllTypesMatch<["lowerBound", "upperBound", "step"]>, ConditionallySpeculatable, DeclareOpInterfaceMethodsgetArguments().drop_front(getNumInductionVars())[index]; } + /// Return the induction variables. + ::mlir::ValueRange getInductionVars() { + return getBody()->getArguments().take_front(getNumInductionVars()); + } + + /// Get lower bounds as `OpFoldResult`. + SmallVector getMixedLowerBound() { + return {getAsOpFoldResult(getLowerBound())}; + } + + /// Get upper bounds as `OpFoldResult`. + SmallVector getMixedUpperBound() { + return {getAsOpFoldResult(getUpperBound())}; + } + + // Get steps as `OpFoldResult`. + SmallVector getMixedStep() { + return {getAsOpFoldResult(getStep())}; + } + + /// Get lower bounds as values. + SmallVector getLowerBound(OpBuilder &b) { + return ValueRange{getLowerBound()}; + } + + /// Get upper bounds as values. + SmallVector getUpperBound(OpBuilder &b) { + return ValueRange{getUpperBound()}; + } + + /// Get steps as values. + SmallVector getStep(OpBuilder &b) { + return ValueRange{getStep()}; + } + + /// Set the lower bounds from `OpFoldResult`. + void setMixedLowerBounds(OpBuilder &b, ArrayRef lbs) { + setLowerBound(getValueOrCreateConstantIndexOp(b, getLoc(), lbs[0])); + } + + /// Set the upper bounds from `OpFoldResult`. + void setMixedUpperBounds(OpBuilder &b, ArrayRef ubs) { + setUpperBound(getValueOrCreateConstantIndexOp(b, getLoc(), ubs[0])); + } + + /// Set the steps from `OpFoldResult`. + void setMixedSteps(OpBuilder &b, ArrayRef steps) { + setStep(getValueOrCreateConstantIndexOp(b, getLoc(), steps[0])); + } + + /// Set the lower bounds from values. + void setLowerBounds(ArrayRef lbs) { + assert(lbs.size() == 1 && "expected a single lower bound"); + setLowerBound(lbs[0]); + } + + /// Set the upper bounds from values. + void setUpperBounds(ArrayRef ubs) { + assert(ubs.size() == 1 && "expected a single upper bound"); + setUpperBound(ubs[0]); + } + + /// Set the steps from values. + void setSteps(ArrayRef steps) { + assert(steps.size() == 1 && "expected a single step"); + setStep(steps[0]); + } + void setLowerBound(Value bound) { getOperation()->setOperand(0, bound); } void setUpperBound(Value bound) { getOperation()->setOperand(1, bound); } void setStep(Value step) { getOperation()->setOperand(2, step); } @@ -304,6 +373,7 @@ def ForallOp : SCF_Op<"forall", [ ["getInitsMutable", "getRegionIterArgs", "getSingleInductionVar", "getSingleLowerBound", "getSingleUpperBound", "getSingleStep", "promoteIfSingleIteration", "yieldTiledValuesAndReplace"]>, + LoopLikeWithInductionVarsOpInterface, RecursiveMemoryEffects, SingleBlockImplicitTerminator<"scf::InParallelOp">, DeclareOpInterfaceMethods, @@ -543,6 +613,33 @@ def ForallOp : SCF_Op<"forall", [ return getValueOrCreateConstantIndexOp(b, getLoc(), getMixedStep()); } + /// Set the lower bounds from `OpFoldResult`. + void setMixedLowerBounds(OpBuilder &b, ArrayRef lbs); + + /// Set the upper bounds from `OpFoldResult`. + void setMixedUpperBounds(OpBuilder &b, ArrayRef ubs); + + /// Set the steps from `OpFoldResult`. + void setMixedSteps(OpBuilder &b, ArrayRef steps); + + /// Set the lower bounds from values. + void setLowerBounds(ArrayRef lbs) { + OpBuilder b(getOperation()->getContext()); + return setMixedLowerBounds(b, getAsOpFoldResult(lbs)); + } + + /// Set the upper bounds from values. + void setUpperBounds(ArrayRef ubs) { + OpBuilder b(getOperation()->getContext()); + return setMixedUpperBounds(b, getAsOpFoldResult(ubs)); + } + + /// Set the steps from values. + void setSteps(ArrayRef steps) { + OpBuilder b(getOperation()->getContext()); + return setMixedSteps(b, getAsOpFoldResult(steps)); + } + int64_t getRank() { return getStaticLowerBound().size(); } /// Number of operands controlling the loop: lbs, ubs, steps diff --git a/mlir/include/mlir/Dialect/Utils/LoopUtils.h b/mlir/include/mlir/Dialect/Utils/LoopUtils.h new file mode 100644 index 0000000000000..15e901dc0e45e --- /dev/null +++ b/mlir/include/mlir/Dialect/Utils/LoopUtils.h @@ -0,0 +1,30 @@ +//===- LoopUtils.h - Helpers related to loop operations ---------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This header file defines utilities for loop operations. +// +//===----------------------------------------------------------------------===// + +#include "mlir/IR/PatternMatch.h" + +namespace mlir { + +// This structure is to pass and return sets of loop parameters without +// confusing the order. +struct LoopParams { + Value lowerBound; + Value upperBound; + Value step; +}; + +/// Calculate the normalized loop upper bounds with lower bound equal to zero +/// and step equal to one. +LoopParams emitNormalizedLoopBounds(RewriterBase &rewriter, Location loc, + Value lb, Value ub, Value step); + +} // namespace mlir diff --git a/mlir/include/mlir/Interfaces/LoopLikeInterface.h b/mlir/include/mlir/Interfaces/LoopLikeInterface.h index 42609e824c86a..fab5ffa26e574 100644 --- a/mlir/include/mlir/Interfaces/LoopLikeInterface.h +++ b/mlir/include/mlir/Interfaces/LoopLikeInterface.h @@ -13,6 +13,7 @@ #ifndef MLIR_INTERFACES_LOOPLIKEINTERFACE_H_ #define MLIR_INTERFACES_LOOPLIKEINTERFACE_H_ +#include "mlir/Dialect/Utils/StaticValueUtils.h" #include "mlir/IR/OpDefinition.h" namespace mlir { @@ -28,6 +29,9 @@ using NewYieldValuesFn = std::function( namespace detail { /// Verify invariants of the LoopLikeOpInterface. LogicalResult verifyLoopLikeOpInterface(Operation *op); + +/// Verify invariants of the LoopLikeWithInductionVarsOpInterface. +LogicalResult verifyLoopLikeWithInductionVarsOpInterface(Operation *op); } // namespace detail //===----------------------------------------------------------------------===// diff --git a/mlir/include/mlir/Interfaces/LoopLikeInterface.td b/mlir/include/mlir/Interfaces/LoopLikeInterface.td index f0dc6e60eba58..d7580616efaa4 100644 --- a/mlir/include/mlir/Interfaces/LoopLikeInterface.td +++ b/mlir/include/mlir/Interfaces/LoopLikeInterface.td @@ -375,6 +375,132 @@ def LoopLikeOpInterface : OpInterface<"LoopLikeOpInterface"> { }]; } +def LoopLikeWithInductionVarsOpInterface + : OpInterface<"LoopLikeWithInductionVarsOpInterface", [LoopLikeOpInterface]> { + let description = [{ + Interface for loop-like operations with one or more induction variables. + This interface contains helper functions for retrieving and updating the + lower bound, upper bound and step size for each induction variable and + provides a utility function to check whether the loop is normalized., i.e. + all lower bounds are equal to zero and steps are equal to one. + }]; + let cppNamespace = "::mlir"; + + let methods = [ + InterfaceMethod<[{ + Return the induction variables if they exist, otherwise return + std::nullopt. + }], + /*retTy=*/"::mlir::ValueRange", + /*methodName=*/"getInductionVars" + >, + InterfaceMethod<[{ + Return the lower bound values or attributes as OpFoldResult. + }], + /*retTy=*/"SmallVector<::mlir::OpFoldResult>", + /*methodName=*/"getMixedLowerBound" + >, + InterfaceMethod<[{ + Return the step values or attributes if they exist as OpFoldResult. + }], + /*retTy=*/"SmallVector<::mlir::OpFoldResult>", + /*methodName=*/"getMixedStep" + >, + InterfaceMethod<[{ + Return the upper bound values or attributes as OpFoldResult. + }], + /*retTy=*/"SmallVector<::mlir::OpFoldResult>", + /*methodName=*/"getMixedUpperBound" + >, + InterfaceMethod<[{ + Return the lower bounds as values. + }], + /*retTy=*/"SmallVector", + /*methodName=*/"getLowerBound", + /*args=*/(ins "OpBuilder &":$b) + >, + InterfaceMethod<[{ + Return the steps as values. + }], + /*retTy=*/"SmallVector", + /*methodName=*/"getStep", + /*args=*/(ins "OpBuilder &":$b) + >, + InterfaceMethod<[{ + Return the upper bounds as values. + }], + /*retTy=*/"SmallVector", + /*methodName=*/"getUpperBound", + /*args=*/(ins "OpBuilder &":$b) + >, + InterfaceMethod<[{ + Set the lower bounds from an array of `OpFoldResult`. + }], + /*retTy=*/"void", + /*methodName=*/"setMixedLowerBounds", + /*args=*/(ins "OpBuilder &":$b, "ArrayRef":$lbs) + >, + InterfaceMethod<[{ + Set the steps from an array of `OpFoldResult`. + }], + /*retTy=*/"void", + /*methodName=*/"setMixedSteps", + /*args=*/(ins "OpBuilder &":$b, "ArrayRef":$lbs) + >, + InterfaceMethod<[{ + Set the upper bounds from an array of `OpFoldResult`. + }], + /*retTy=*/"void", + /*methodName=*/"setMixedUpperBounds", + /*args=*/(ins "OpBuilder &":$b, "ArrayRef":$lbs) + >, + InterfaceMethod<[{ + Set the lower bounds from an array of values. + }], + /*retTy=*/"void", + /*methodName=*/"setLowerBounds", + /*args=*/(ins "ArrayRef":$lbs) + >, + InterfaceMethod<[{ + Set the steps from an array of values. + }], + /*retTy=*/"void", + /*methodName=*/"setSteps", + /*args=*/(ins "ArrayRef":$lbs) + >, + InterfaceMethod<[{ + Set the upper bounds from an array of values. + }], + /*retTy=*/"void", + /*methodName=*/"setUpperBounds", + /*args=*/(ins "ArrayRef":$lbs) + >, + InterfaceMethod<[{ + Checks if the lower bounds are zeros and steps are ones. + }], + /*retTy=*/"bool", + /*methodName=*/"isNormalized", + /*args=*/(ins), + /*methodBody=*/"", + /*defaultImplementation=*/[{ + auto allEqual = [](ArrayRef results, int64_t val) { + return llvm::all_of(results, [&](OpFoldResult ofr) { + auto intValue = getConstantIntValue(ofr); + return intValue.has_value() && intValue == val; + }); + }; + SmallVector<::mlir::OpFoldResult> lbs = $_op.getMixedLowerBound(); + SmallVector<::mlir::OpFoldResult> steps = $_op.getMixedStep(); + return allEqual(lbs, 0) && allEqual(steps, 1); + }] + > + ]; + + let verify = [{ + return detail::verifyLoopLikeWithInductionVarsOpInterface($_op); + }]; +} + //===----------------------------------------------------------------------===// // Traits //===----------------------------------------------------------------------===// diff --git a/mlir/include/mlir/Transforms/Passes.h b/mlir/include/mlir/Transforms/Passes.h index 58bd61b2ae8b8..755ec7ecdfbad 100644 --- a/mlir/include/mlir/Transforms/Passes.h +++ b/mlir/include/mlir/Transforms/Passes.h @@ -82,6 +82,10 @@ std::unique_ptr createLoopInvariantCodeMotionPass(); /// Creates a pass that hoists loop-invariant subset ops. std::unique_ptr createLoopInvariantSubsetHoistingPass(); +/// Create a pass that normalizes the loop bounds of loop-like operations with +/// induction variables. +std::unique_ptr createNormalizeLoopBoundsPass(); + /// Creates a pass to strip debug information from a function. std::unique_ptr createStripDebugInfoPass(); diff --git a/mlir/include/mlir/Transforms/Passes.td b/mlir/include/mlir/Transforms/Passes.td index 1b40a87c63f27..5d1256e502a12 100644 --- a/mlir/include/mlir/Transforms/Passes.td +++ b/mlir/include/mlir/Transforms/Passes.td @@ -377,6 +377,12 @@ def Mem2Reg : Pass<"mem2reg"> { ]; } +def NormalizeLoopBounds : Pass<"normalize-loop-bounds"> { + let summary = "Normalize the loop bounds of loop-like operations with " + "induction variables."; + let constructor = "mlir::createNormalizeLoopBoundsPass()"; +} + def PrintOpStats : Pass<"print-op-stats"> { let summary = "Print statistics of operations"; let constructor = "mlir::createPrintOpStatsPass()"; diff --git a/mlir/lib/Dialect/SCF/IR/SCF.cpp b/mlir/lib/Dialect/SCF/IR/SCF.cpp index 107fd0690f193..3e7becb094b6b 100644 --- a/mlir/lib/Dialect/SCF/IR/SCF.cpp +++ b/mlir/lib/Dialect/SCF/IR/SCF.cpp @@ -1387,6 +1387,66 @@ void ForallOp::build( build(b, result, lbs, ubs, steps, outputs, mapping, bodyBuilderFn); } +/// Set the lower bounds from `OpFoldResult`. +void ForallOp::setMixedLowerBounds(OpBuilder &b, ArrayRef lbs) { + SmallVector staticLbs; + SmallVector dynamicLbs; + dispatchIndexOpFoldResults(lbs, dynamicLbs, staticLbs); + getOperation()->setOperands(0, getDynamicLowerBound().size(), dynamicLbs); + (*this)->setAttr(getStaticLowerBoundAttrName(), + b.getDenseI64ArrayAttr(staticLbs)); + ArrayRef segmentSizes = + (*this) + ->getAttrOfType("operandSegmentSizes") + .asArrayRef(); + SmallVector newSegmentSizes(segmentSizes.begin(), + segmentSizes.end()); + newSegmentSizes[0] = dynamicLbs.size(); + (*this)->setAttr("operandSegmentSizes", + b.getDenseI32ArrayAttr(newSegmentSizes)); +} + +/// Set the upper bounds from `OpFoldResult`. +void ForallOp::setMixedUpperBounds(OpBuilder &b, ArrayRef ubs) { + SmallVector staticUbs; + SmallVector dynamicUbs; + dispatchIndexOpFoldResults(ubs, dynamicUbs, staticUbs); + size_t offset = getDynamicLowerBound().size(); + getOperation()->setOperands(offset, getDynamicUpperBound().size(), + dynamicUbs); + (*this)->setAttr(getStaticUpperBoundAttrName(), + b.getDenseI64ArrayAttr(staticUbs)); + ArrayRef segmentSizes = + (*this) + ->getAttrOfType("operandSegmentSizes") + .asArrayRef(); + SmallVector newSegmentSizes(segmentSizes.begin(), + segmentSizes.end()); + newSegmentSizes[1] = dynamicUbs.size(); + (*this)->setAttr("operandSegmentSizes", + b.getDenseI32ArrayAttr(newSegmentSizes)); +} + +/// Set the steps from `OpFoldResult`. +void ForallOp::setMixedSteps(OpBuilder &b, ArrayRef steps) { + SmallVector staticSteps; + SmallVector dynamicSteps; + dispatchIndexOpFoldResults(steps, dynamicSteps, staticSteps); + size_t offset = getDynamicLowerBound().size() + getDynamicUpperBound().size(); + getOperation()->setOperands(offset, getDynamicStep().size(), dynamicSteps); + (*this)->setAttr(getStaticStepAttrName(), + b.getDenseI64ArrayAttr(staticSteps)); + ArrayRef segmentSizes = + (*this) + ->getAttrOfType("operandSegmentSizes") + .asArrayRef(); + SmallVector newSegmentSizes(segmentSizes.begin(), + segmentSizes.end()); + newSegmentSizes[2] = dynamicSteps.size(); + (*this)->setAttr("operandSegmentSizes", + b.getDenseI32ArrayAttr(newSegmentSizes)); +} + // Checks if the lbs are zeros and steps are ones. bool ForallOp::isNormalized() { auto allEqual = [](ArrayRef results, int64_t val) { diff --git a/mlir/lib/Dialect/SCF/Utils/Utils.cpp b/mlir/lib/Dialect/SCF/Utils/Utils.cpp index 6658cca03eba7..41f52cb84f4ed 100644 --- a/mlir/lib/Dialect/SCF/Utils/Utils.cpp +++ b/mlir/lib/Dialect/SCF/Utils/Utils.cpp @@ -16,6 +16,7 @@ #include "mlir/Dialect/Arith/Utils/Utils.h" #include "mlir/Dialect/Func/IR/FuncOps.h" #include "mlir/Dialect/SCF/IR/SCF.h" +#include "mlir/Dialect/Utils/LoopUtils.h" #include "mlir/IR/BuiltinOps.h" #include "mlir/IR/IRMapping.h" #include "mlir/IR/PatternMatch.h" @@ -29,16 +30,6 @@ using namespace mlir; -namespace { -// This structure is to pass and return sets of loop parameters without -// confusing the order. -struct LoopParams { - Value lowerBound; - Value upperBound; - Value step; -}; -} // namespace - SmallVector mlir::replaceLoopNestWithNewYields( RewriterBase &rewriter, MutableArrayRef loopNest, ValueRange newIterOperands, const NewYieldValuesFn &newYieldValuesFn, @@ -473,50 +464,6 @@ LogicalResult mlir::loopUnrollByFactor( return success(); } -/// Transform a loop with a strictly positive step -/// for %i = %lb to %ub step %s -/// into a 0-based loop with step 1 -/// for %ii = 0 to ceildiv(%ub - %lb, %s) step 1 { -/// %i = %ii * %s + %lb -/// Insert the induction variable remapping in the body of `inner`, which is -/// expected to be either `loop` or another loop perfectly nested under `loop`. -/// Insert the definition of new bounds immediate before `outer`, which is -/// expected to be either `loop` or its parent in the loop nest. -static LoopParams emitNormalizedLoopBounds(RewriterBase &rewriter, Location loc, - Value lb, Value ub, Value step) { - // For non-index types, generate `arith` instructions - // Check if the loop is already known to have a constant zero lower bound or - // a constant one step. - bool isZeroBased = false; - if (auto lbCst = getConstantIntValue(lb)) - isZeroBased = lbCst.value() == 0; - - bool isStepOne = false; - if (auto stepCst = getConstantIntValue(step)) - isStepOne = stepCst.value() == 1; - - // Compute the number of iterations the loop executes: ceildiv(ub - lb, step) - // assuming the step is strictly positive. Update the bounds and the step - // of the loop to go from 0 to the number of iterations, if necessary. - if (isZeroBased && isStepOne) - return {lb, ub, step}; - - Value diff = isZeroBased ? ub : rewriter.create(loc, ub, lb); - Value newUpperBound = - isStepOne ? diff : rewriter.create(loc, diff, step); - - Value newLowerBound = isZeroBased - ? lb - : rewriter.create( - loc, rewriter.getZeroAttr(lb.getType())); - Value newStep = isStepOne - ? step - : rewriter.create( - loc, rewriter.getIntegerAttr(step.getType(), 1)); - - return {newLowerBound, newUpperBound, newStep}; -} - /// Get back the original induction variable values after loop normalization static void denormalizeInductionVariable(RewriterBase &rewriter, Location loc, Value normalizedIv, Value origLb, diff --git a/mlir/lib/Dialect/Utils/CMakeLists.txt b/mlir/lib/Dialect/Utils/CMakeLists.txt index a0096e5f299d5..41b2fe287beb3 100644 --- a/mlir/lib/Dialect/Utils/CMakeLists.txt +++ b/mlir/lib/Dialect/Utils/CMakeLists.txt @@ -1,5 +1,6 @@ add_mlir_library(MLIRDialectUtils IndexingUtils.cpp + LoopUtils.cpp ReshapeOpsUtils.cpp StructuredOpsUtils.cpp StaticValueUtils.cpp diff --git a/mlir/lib/Dialect/Utils/LoopUtils.cpp b/mlir/lib/Dialect/Utils/LoopUtils.cpp new file mode 100644 index 0000000000000..3d8aa5ef7dfc1 --- /dev/null +++ b/mlir/lib/Dialect/Utils/LoopUtils.cpp @@ -0,0 +1,52 @@ +//===- LoopUtils.cpp - Helpers related to loop operations -----------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "mlir/Dialect/Utils/LoopUtils.h" +#include "mlir/Dialect/Arith/IR/Arith.h" +#include "mlir/Dialect/Utils/StaticValueUtils.h" + +using namespace mlir; + +/// Calculate the normalized loop upper bounds with lower bound equal to zero +/// and step equal to one. +LoopParams mlir::emitNormalizedLoopBounds(RewriterBase &rewriter, Location loc, + Value lb, Value ub, Value step) { + // For non-index types, generate `arith` instructions + // Check if the loop is already known to have a constant zero lower bound or + // a constant one step. + bool isZeroBased = false; + if (auto lbCst = getConstantIntValue(lb)) + isZeroBased = lbCst.value() == 0; + + bool isStepOne = false; + if (auto stepCst = getConstantIntValue(step)) + isStepOne = stepCst.value() == 1; + + // Compute the number of iterations the loop executes: ceildiv(ub - lb, step) + // assuming the step is strictly positive. Update the bounds and the step + // of the loop to go from 0 to the number of iterations, if necessary. + if (isZeroBased && isStepOne) + return {lb, ub, step}; + + Value diff = + isZeroBased ? ub : rewriter.createOrFold(loc, ub, lb); + Value newUpperBound = + isStepOne ? diff + : rewriter.createOrFold(loc, diff, step); + + Value newLowerBound = isZeroBased + ? lb + : rewriter.create( + loc, rewriter.getZeroAttr(lb.getType())); + Value newStep = isStepOne + ? step + : rewriter.create( + loc, rewriter.getIntegerAttr(step.getType(), 1)); + + return {newLowerBound, newUpperBound, newStep}; +} diff --git a/mlir/lib/IR/Operation.cpp b/mlir/lib/IR/Operation.cpp index b51357198b1ca..5454411bc535b 100644 --- a/mlir/lib/IR/Operation.cpp +++ b/mlir/lib/IR/Operation.cpp @@ -245,8 +245,6 @@ void Operation::setOperands(ValueRange operands) { /// than the range pointed to by 'start'+'length'. void Operation::setOperands(unsigned start, unsigned length, ValueRange operands) { - assert((start + length) <= getNumOperands() && - "invalid operand range specified"); if (LLVM_LIKELY(hasOperandStorage)) return getOperandStorage().setOperands(this, start, length, operands); assert(operands.empty() && "setting operands without an operand storage"); diff --git a/mlir/lib/Interfaces/LoopLikeInterface.cpp b/mlir/lib/Interfaces/LoopLikeInterface.cpp index 1e0e87b64e811..3f478b9bc0b96 100644 --- a/mlir/lib/Interfaces/LoopLikeInterface.cpp +++ b/mlir/lib/Interfaces/LoopLikeInterface.cpp @@ -113,3 +113,28 @@ LogicalResult detail::verifyLoopLikeOpInterface(Operation *op) { return success(); } + +LogicalResult +detail::verifyLoopLikeWithInductionVarsOpInterface(Operation *op) { + auto loopLikeOp = cast(op); + + // Verify number of induction variables, lower bounds, upper bounds and steps. + if (loopLikeOp.getInductionVars().size() != + loopLikeOp.getMixedLowerBound().size()) + return op->emitOpError( + "different number of induction variables and lower bounds: ") + << loopLikeOp.getInductionVars().size() + << " != " << loopLikeOp.getMixedLowerBound().size(); + if (loopLikeOp.getInductionVars().size() != loopLikeOp.getMixedStep().size()) + return op->emitOpError( + "different number of induction variables and steps: ") + << loopLikeOp.getInductionVars().size() + << " != " << loopLikeOp.getMixedStep().size(); + if (loopLikeOp.getInductionVars().size() != + loopLikeOp.getMixedUpperBound().size()) + return op->emitOpError( + "different number of induction variables and upper bounds: ") + << loopLikeOp.getInductionVars().size() + << " != " << loopLikeOp.getMixedUpperBound().size(); + return success(); +} diff --git a/mlir/lib/Transforms/CMakeLists.txt b/mlir/lib/Transforms/CMakeLists.txt index 90c0298fb5e46..dc24da367ca48 100644 --- a/mlir/lib/Transforms/CMakeLists.txt +++ b/mlir/lib/Transforms/CMakeLists.txt @@ -10,6 +10,7 @@ add_mlir_library(MLIRTransforms LocationSnapshot.cpp LoopInvariantCodeMotion.cpp Mem2Reg.cpp + NormalizeLoopBounds.cpp OpStats.cpp PrintIR.cpp RemoveDeadValues.cpp @@ -30,6 +31,7 @@ add_mlir_library(MLIRTransforms LINK_LIBS PUBLIC MLIRAnalysis MLIRCopyOpInterface + MLIRDialectUtils MLIRFunctionInterfaces MLIRLoopLikeInterface MLIRMemorySlotInterfaces diff --git a/mlir/lib/Transforms/NormalizeLoopBounds.cpp b/mlir/lib/Transforms/NormalizeLoopBounds.cpp new file mode 100644 index 0000000000000..ffb51b05ce1ca --- /dev/null +++ b/mlir/lib/Transforms/NormalizeLoopBounds.cpp @@ -0,0 +1,118 @@ +// Copyright 2024 The IREE Authors +// +// Licensed under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + +#include "mlir/Transforms/Passes.h" + +#include "mlir/Dialect/Affine/IR/AffineOps.h" +#include "mlir/Dialect/Utils/LoopUtils.h" +#include "mlir/Interfaces/LoopLikeInterface.h" + +namespace mlir { +#define GEN_PASS_DEF_NORMALIZELOOPBOUNDS +#include "mlir/Transforms/Passes.h.inc" +} // namespace mlir + +using namespace mlir; + +/// Normalize a loop-like operation with induction variables, i.e. calculate +/// new normalized upper bounds for lower bounds equal to zero and step sizes +/// equal to one. Then, insert new `affine.apply` operations to calculate the +/// denormalized index values and update all usage from the original induction +/// variables to the results of the `affine.apply` operations. +/// +/// Example: +/// Transform a `scf.forall` loop with a strictly positive steps +/// forall (%i, %j) = (%lb0, %lb1) to (%ub0, %ub1) step (%s0, %s1) +/// into a 0-based loop with step 1 +/// forall (%i, %j) in (ceildiv(%ub0 - %lb0, %s0), ceildiv(%ub1 - %lb1, %s1)) +LogicalResult +normalizeLoopBounds(RewriterBase &rewriter, + LoopLikeWithInductionVarsOpInterface loopLikeOp) { + OpBuilder::InsertionGuard g(rewriter); + if (loopLikeOp.isNormalized()) + return success(); + + SmallVector newLbs; + SmallVector newUbs; + SmallVector newSteps; + rewriter.setInsertionPoint(loopLikeOp); + for (auto &&[iv, lb, ub, step] : llvm::zip( + loopLikeOp.getInductionVars(), loopLikeOp.getLowerBound(rewriter), + loopLikeOp.getUpperBound(rewriter), loopLikeOp.getStep(rewriter))) { + std::optional lbInt = getConstantIntValue(lb); + std::optional stepInt = getConstantIntValue(step); + + rewriter.setInsertionPoint(loopLikeOp); + auto newLoopParams = + emitNormalizedLoopBounds(rewriter, loopLikeOp.getLoc(), lb, ub, step); + + newLbs.push_back(newLoopParams.lowerBound); + newUbs.push_back(newLoopParams.upperBound); + newSteps.push_back(newLoopParams.step); + + Region ®ion = loopLikeOp.getOperation()->getRegion(0); + rewriter.setInsertionPointToStart(®ion.front()); + SmallVector operands = {iv}; + AffineExpr idxExpr, stepExpr, offsetExpr, res; + if (!lbInt && !stepInt) { + bindDims(loopLikeOp.getContext(), idxExpr, stepExpr, offsetExpr); + res = idxExpr * stepExpr + offsetExpr; + operands.push_back(step); + operands.push_back(lb); + } else if (!lbInt) { + bindDims(loopLikeOp.getContext(), idxExpr, offsetExpr); + res = idxExpr * stepInt.value() + offsetExpr; + operands.push_back(lb); + } else if (!stepInt) { + bindDims(loopLikeOp.getContext(), idxExpr, stepExpr); + res = idxExpr * stepExpr + lbInt.value(); + operands.push_back(step); + } else { + bindDims(loopLikeOp.getContext(), idxExpr); + res = idxExpr * stepInt.value() + lbInt.value(); + } + + auto affineApply = rewriter.create( + loopLikeOp.getLoc(), res, operands); + SmallPtrSet preserve( + {iv.getDefiningOp(), affineApply.getOperation()}); + rewriter.replaceAllUsesExcept(iv, affineApply.getResult(), preserve); + } + + rewriter.setInsertionPoint(loopLikeOp); + rewriter.modifyOpInPlace(loopLikeOp, [&]() { + loopLikeOp.setLowerBounds(newLbs); + loopLikeOp.setUpperBounds(newUbs); + loopLikeOp.setSteps(newSteps); + }); + return success(); +} + +namespace { + +/// Pass which normalizes the loop bounds of operations implementing +/// `LoopLikeWithInductionVarsOpInterface`. +struct NormalizeLoopBounds + : public impl::NormalizeLoopBoundsBase { + void getDependentDialects(DialectRegistry ®istry) const override { + registry.insert(); + } + + void runOnOperation() override { + Operation *parentOp = getOperation(); + IRRewriter rewriter(parentOp->getContext()); + + parentOp->walk([&](LoopLikeWithInductionVarsOpInterface loopLikeOp) { + (void)normalizeLoopBounds(rewriter, loopLikeOp); + }); + } +}; + +} // namespace + +std::unique_ptr mlir::createNormalizeLoopBoundsPass() { + return std::make_unique(); +} diff --git a/mlir/test/Dialect/Affine/loop-coalescing.mlir b/mlir/test/Dialect/Affine/loop-coalescing.mlir index ae0adf5a0a02d..0a96e01162d48 100644 --- a/mlir/test/Dialect/Affine/loop-coalescing.mlir +++ b/mlir/test/Dialect/Affine/loop-coalescing.mlir @@ -72,19 +72,16 @@ func.func @multi_use() { return } -func.func @unnormalized_loops() { +// CHECK: %[[orig_ub_i:.*]]: index, %[[orig_ub_j:.*]]: index +func.func @unnormalized_loops(%ubi: index, %ubj: index) { // CHECK: %[[orig_step_i:.*]] = arith.constant 2 // CHECK: %[[orig_step_j:.*]] = arith.constant 3 // CHECK: %[[orig_lb_i:.*]] = arith.constant 5 // CHECK: %[[orig_lb_j:.*]] = arith.constant 7 - // CHECK: %[[orig_ub_i:.*]] = arith.constant 10 - // CHECK: %[[orig_ub_j:.*]] = arith.constant 17 %c2 = arith.constant 2 : index %c3 = arith.constant 3 : index %c5 = arith.constant 5 : index %c7 = arith.constant 7 : index - %c10 = arith.constant 10 : index - %c17 = arith.constant 17 : index // Number of iterations in the outer scf. // CHECK: %[[diff_i:.*]] = arith.subi %[[orig_ub_i]], %[[orig_lb_i]] @@ -101,10 +98,10 @@ func.func @unnormalized_loops() { // New bounds of the outer scf. // CHECK: %[[range:.*]] = arith.muli %[[numiter_i]], %[[numiter_j]] // CHECK: scf.for %[[i:.*]] = %[[lb_i]] to %[[range]] step %[[step_i]] - scf.for %i = %c5 to %c10 step %c2 { + scf.for %i = %c5 to %ubi step %c2 { // The inner loop has been removed. // CHECK-NOT: scf.for - scf.for %j = %c7 to %c17 step %c3 { + scf.for %j = %c7 to %ubj step %c3 { // The IVs are rewritten. // CHECK: %[[normalized_j:.*]] = arith.remsi %[[i]], %[[numiter_j]] // CHECK: %[[normalized_i:.*]] = arith.divsi %[[i]], %[[numiter_j]] diff --git a/mlir/test/Transforms/normalize-loop-bounds.mlir b/mlir/test/Transforms/normalize-loop-bounds.mlir new file mode 100644 index 0000000000000..5130f4282b36b --- /dev/null +++ b/mlir/test/Transforms/normalize-loop-bounds.mlir @@ -0,0 +1,266 @@ +// RUN: mlir-opt %s -split-input-file -normalize-loop-bounds -verify-diagnostics | FileCheck %s + +// CHECK: #[[$MAP:.+]] = affine_map<(d0) -> (d0 + 2)> +// CHECK-LABEL: func.func @for_lowerbound_static +// CHECK-DAG: %[[C0:.+]] = arith.constant 0 : index +// CHECK-DAG: %[[C1:.+]] = arith.constant 1 : index +// CHECK-DAG: %[[C6:.+]] = arith.constant 6 : index +// CHECK: scf.for %[[ARG:.+]] = %[[C0]] to %[[C6]] step %[[C1]] +// CHECK-NEXT: affine.apply #[[$MAP]](%[[ARG]]) +module { + func.func @for_lowerbound_static() { + %c1 = arith.constant 1 : index + %c2 = arith.constant 2 : index + %c8 = arith.constant 8 : index + scf.for %arg0 = %c2 to %c8 step %c1 { + } + return + } +} + +// ----- + +// CHECK: #[[$MAP:.+]] = affine_map<(d0, d1) -> (d0 + d1)> +// CHECK-LABEL: func.func @for_lowerbound_dynamic +// CHECK-SAME: %[[ARG0:.+]]: index +// CHECK-DAG: %[[C0:.+]] = arith.constant 0 : index +// CHECK-DAG: %[[C1:.+]] = arith.constant 1 : index +// CHECK-DAG: %[[C8:.+]] = arith.constant 8 : index +// CHECK-DAG: %[[UB:.+]] = arith.subi %[[C8]], %[[ARG0]] : index +// CHECK: scf.for %[[ARG:.+]] = %[[C0]] to %[[UB]] step %[[C1]] +// CHECK-NEXT: affine.apply #[[$MAP]](%[[ARG]], %[[ARG0]]) +module { + func.func @for_lowerbound_dynamic(%lb: index) { + %c1 = arith.constant 1 : index + %c8 = arith.constant 8 : index + scf.for %arg0 = %lb to %c8 step %c1 { + } + return + } +} + +// ----- + +// CHECK: #[[$MAP:.+]] = affine_map<(d0) -> (d0 * 2)> +// CHECK-LABEL: func.func @for_step_static +// CHECK-DAG: %[[C0:.+]] = arith.constant 0 : index +// CHECK-DAG: %[[C1:.+]] = arith.constant 1 : index +// CHECK-DAG: %[[C4:.+]] = arith.constant 4 : index +// CHECK: scf.for %[[ARG:.+]] = %[[C0]] to %[[C4]] step %[[C1]] +// CHECK-NEXT: affine.apply #[[$MAP]](%[[ARG]]) +module { + func.func @for_step_static() { + %c0 = arith.constant 0 : index + %c2 = arith.constant 2 : index + %c8 = arith.constant 8 : index + scf.for %arg0 = %c0 to %c8 step %c2 { + } + return + } +} + +// ----- + +// CHECK: #[[$MAP:.+]] = affine_map<(d0, d1) -> (d0 * d1)> +// CHECK-LABEL: func.func @for_step_dynamic +// CHECK-SAME: %[[ARG0:.+]]: index +// CHECK-DAG: %[[C0:.+]] = arith.constant 0 : index +// CHECK-DAG: %[[C1:.+]] = arith.constant 1 : index +// CHECK-DAG: %[[C8:.+]] = arith.constant 8 : index +// CHECK-DAG: %[[UB:.+]] = arith.ceildivsi %[[C8]], %[[ARG0]] : index +// CHECK: scf.for %[[ARG:.+]] = %[[C0]] to %[[UB]] step %[[C1]] +// CHECK-NEXT: affine.apply #[[$MAP]](%[[ARG]], %[[ARG0]]) +module { + func.func @for_step_dynamic(%step: index) { + %c0 = arith.constant 0 : index + %c8 = arith.constant 8 : index + scf.for %arg0 = %c0 to %c8 step %step { + } + return + } +} + +// ----- + +// CHECK: #[[$MAP:.+]] = affine_map<(d0) -> (d0 * 4 + 1)> +// CHECK-LABEL: func.func @for_lowerbound_and_step_static +// CHECK-DAG: %[[C0:.+]] = arith.constant 0 : index +// CHECK-DAG: %[[C1:.+]] = arith.constant 1 : index +// CHECK-DAG: %[[C3:.+]] = arith.constant 3 : index +// CHECK: scf.for %[[ARG:.+]] = %[[C0]] to %[[C3]] step %[[C1]] +// CHECK-NEXT: affine.apply #[[$MAP]](%[[ARG]]) +module { + func.func @for_lowerbound_and_step_static() { + %c1 = arith.constant 1 : index + %c4 = arith.constant 4 : index + %c13 = arith.constant 13 : index + scf.for %arg0 = %c1 to %c13 step %c4 { + } + return + } +} + +// ----- + +// CHECK: #[[$MAP:.+]] = affine_map<(d0, d1, d2) -> (d0 * d1 + d2)> +// CHECK-LABEL: func.func @for_lowerbound_and_step_dynamic +// CHECK-SAME: %[[LB:.+]]: index, %[[STEP:.+]]: index +// CHECK-DAG: %[[C0:.+]] = arith.constant 0 : index +// CHECK-DAG: %[[C1:.+]] = arith.constant 1 : index +// CHECK-DAG: %[[C13:.+]] = arith.constant 13 : index +// CHECK-DAG: %[[SUB:.+]] = arith.subi %[[C13]], %[[LB]] : index +// CHECK-DAG: %[[UB:.+]] = arith.ceildivsi %[[SUB]], %[[STEP]] : index +// CHECK: scf.for %[[ARG:.+]] = %[[C0]] to %[[UB]] step %[[C1]] +// CHECK-NEXT: affine.apply #[[$MAP]](%[[ARG]], %[[STEP]], %[[LB]]) +module { + func.func @for_lowerbound_and_step_dynamic(%lb: index, %step: index) { + %c1 = arith.constant 1 : index + %c4 = arith.constant 4 : index + %c13 = arith.constant 13 : index + scf.for %arg0 = %lb to %c13 step %step { + } + return + } +} + +// ----- + +// CHECK-DAG: #[[$MAP0:.+]] = affine_map<(d0) -> (d0 + 4)> +// CHECK-DAG: #[[$MAP1:.+]] = affine_map<(d0) -> (d0 + 2)> +// CHECK-LABEL: func.func @forall_lowerbound_static +// CHECK: scf.forall (%[[ARG0:.+]], %[[ARG1:.+]]) in (6, 12) +// CHECK-DAG: affine.apply #[[$MAP1]](%[[ARG0]]) +// CHECK-DAG: affine.apply #[[$MAP0]](%[[ARG1]]) +module { + func.func @forall_lowerbound_static() { + scf.forall (%arg2, %arg3) = (2, 4) to (8, 16) step (1, 1) { + } + return + } +} + +// ----- + +// CHECK-DAG: #[[$MAP0:.+]] = affine_map<(d0, d1) -> (d0 + d1)> +// CHECK-LABEL: func.func @forall_lowerbound_dynamic +// CHECK-SAME: %[[LB0:.+]]: index, %[[LB1:.+]]: index +// CHECK-DAG: %[[C8:.+]] = arith.constant 8 : index +// CHECK-DAG: %[[C16:.+]] = arith.constant 16 : index +// CHECK-DAG: %[[UB0:.+]] = arith.subi %[[C8]], %[[LB0]] : index +// CHECK-DAG: %[[UB1:.+]] = arith.subi %[[C16]], %[[LB1]] : index +// CHECK: scf.forall (%[[ARG0:.+]], %[[ARG1:.+]]) in (%[[UB0]], %[[UB1]]) +// CHECK-DAG: affine.apply #[[$MAP0]](%[[ARG0]], %[[LB0]]) +// CHECK-DAG: affine.apply #[[$MAP0]](%[[ARG1]], %[[LB1]]) +module { + func.func @forall_lowerbound_dynamic(%lb0: index, %lb1: index) { + scf.forall (%arg2, %arg3) = (%lb0, %lb1) to (8, 16) step (1, 1) { + } + return + } +} + +// ----- + +// CHECK: #[[$MAP:.+]] = affine_map<(d0) -> (d0 * 8)> +// CHECK-LABEL: func.func @forall_step_static +// CHECK: scf.forall (%[[ARG0:.+]], %[[ARG1:.+]]) in (1, 2) +// CHECK-DAG: affine.apply #[[$MAP]](%[[ARG0]]) +// CHECK-DAG: affine.apply #[[$MAP]](%[[ARG1]]) +module { + func.func @forall_step_static() { + scf.forall (%arg2, %arg3) = (0, 0) to (8, 16) step (8, 8) { + } + return + } +} + +// ----- + +// CHECK: #[[$MAP:.+]] = affine_map<(d0, d1) -> (d0 * d1)> +// CHECK-LABEL: func.func @forall_step_dynamic +// CHECK-SAME: %[[STEP0:.+]]: index, %[[STEP1:.+]]: index +// CHECK-DAG: %[[C8:.+]] = arith.constant 8 : index +// CHECK-DAG: %[[C16:.+]] = arith.constant 16 : index +// CHECK-DAG: %[[UB0:.+]] = arith.ceildivsi %[[C8]], %[[STEP0]] : index +// CHECK-DAG: %[[UB1:.+]] = arith.ceildivsi %[[C16]], %[[STEP1]] : index +// CHECK: scf.forall (%[[ARG0:.+]], %[[ARG1:.+]]) in (%[[UB0]], %[[UB1]]) +// CHECK-DAG: affine.apply #[[$MAP]](%[[ARG0]], %[[STEP0]]) +// CHECK-DAG: affine.apply #[[$MAP]](%[[ARG1]], %[[STEP1]]) +module { + func.func @forall_step_dynamic(%step0: index, %step1: index) { + scf.forall (%arg2, %arg3) = (0, 0) to (8, 16) step (%step0, %step1) { + } + return + } +} + +// ----- + +// CHECK-DAG: #[[$MAP0:.+]] = affine_map<(d0) -> (d0 * 4 + 4)> +// CHECK-DAG: #[[$MAP1:.+]] = affine_map<(d0) -> (d0 * 2 + 2)> +// CHECK-LABEL: func.func @forall_lowerbound_and_step_static +// CHECK: scf.forall (%[[ARG0:.+]], %[[ARG1:.+]]) in (3, 3) +// CHECK-DAG: affine.apply #[[$MAP1]](%[[ARG0]]) +// CHECK-DAG: affine.apply #[[$MAP0]](%[[ARG1]]) +module { + func.func @forall_lowerbound_and_step_static() { + scf.forall (%arg2, %arg3) = (2, 4) to (8, 16) step (2, 4) { + } + return + } +} + +// ----- + +// CHECK-DAG: #[[$MAP:.+]] = affine_map<(d0, d1, d2) -> (d0 * d1 + d2)> +// CHECK-LABEL: func.func @forall_lowerbound_and_step_dynamic +// CHECK-SAME: %[[LB0:.+]]: index, %[[LB1:.+]]: index, %[[STEP0:.+]]: index, %[[STEP1:.+]]: index +// CHECK-DAG: %[[C8:.+]] = arith.constant 8 : index +// CHECK-DAG: %[[C16:.+]] = arith.constant 16 : index +// CHECK-DAG: %[[SUB0:.+]] = arith.subi %[[C8]], %[[LB0]] : index +// CHECK-DAG: %[[SUB1:.+]] = arith.subi %[[C16]], %[[LB1]] : index +// CHECK-DAG: %[[UB0:.+]] = arith.ceildivsi %[[SUB0]], %[[STEP0]] : index +// CHECK-DAG: %[[UB1:.+]] = arith.ceildivsi %[[SUB1]], %[[STEP1]] : index +// CHECK: scf.forall (%[[ARG0:.+]], %[[ARG1:.+]]) in (%[[UB0]], %[[UB1]]) +// CHECK-DAG: affine.apply #[[$MAP]](%[[ARG0]], %[[STEP0]], %[[LB0]]) +// CHECK-DAG: affine.apply #[[$MAP]](%[[ARG1]], %[[STEP1]], %[[LB1]]) +module { + func.func @forall_lowerbound_and_step_dynamic(%lb0: index, %lb1: index, %step0: index, %step1: index) { + scf.forall (%arg2, %arg3) = (%lb0, %lb1) to (8, 16) step (%step0, %step1) { + } + return + } +} + +// ----- + +// CHECK-DAG: #[[$MAP:.+]] = affine_map<(d0) -> (d0 * 4 + 2)> +// CHECK-LABEL: func.func @forall_with_shared_outs_static +// CHECK-SAME: %[[OUT:.+]]: tensor<200x100xf32> +// CHECK: scf.forall (%[[ARG0:.+]]) in (2) shared_outs(%{{.+}} = %[[OUT]]) +// CHECK-DAG: affine.apply #[[$MAP]](%[[ARG0]]) +module { + func.func @forall_with_shared_outs_static(%out: tensor<200x100xf32>) { + scf.forall (%arg0) = (2) to (8) step (4) shared_outs (%o = %out) -> tensor<200x100xf32> { + } + return + } +} + +// ----- + +// CHECK-DAG: #[[$MAP:.+]] = affine_map<(d0, d1, d2) -> (d0 * d1 + d2)> +// CHECK-LABEL: func.func @forall_with_shared_outs_dynamic +// CHECK-SAME: %[[LB:.+]]: index, %[[STEP:.+]]: index, %[[OUT:.+]]: tensor<200x100xf32> +// CHECK-DAG: %[[C8:.+]] = arith.constant 8 : index +// CHECK-DAG: %[[SUB:.+]] = arith.subi %[[C8]], %[[LB]] : index +// CHECK-DAG: %[[UB:.+]] = arith.ceildivsi %[[SUB]], %[[STEP]] : index +// CHECK: scf.forall (%[[ARG:.+]]) in (%[[UB]]) shared_outs(%{{.+}} = %[[OUT]]) +// CHECK-DAG: affine.apply #[[$MAP]](%[[ARG]], %[[STEP]], %[[LB]]) +module { + func.func @forall_with_shared_outs_dynamic(%lb: index, %step: index, %out: tensor<200x100xf32>) { + scf.forall (%arg0) = (%lb) to (8) step (%step) shared_outs (%o = %out) -> tensor<200x100xf32> { + } + return + } +}