diff --git a/flang/lib/Optimizer/Transforms/StackArrays.cpp b/flang/lib/Optimizer/Transforms/StackArrays.cpp index 2a9d3397e87b0..9a6566bef50f1 100644 --- a/flang/lib/Optimizer/Transforms/StackArrays.cpp +++ b/flang/lib/Optimizer/Transforms/StackArrays.cpp @@ -198,7 +198,9 @@ class AllocMemConversion : public mlir::OpRewritePattern { /// Determine where to insert the alloca operation. The returned value should /// be checked to see if it is inside a loop - static InsertionPoint findAllocaInsertionPoint(fir::AllocMemOp &oldAlloc); + static InsertionPoint + findAllocaInsertionPoint(fir::AllocMemOp &oldAlloc, + const llvm::SmallVector &freeOps); private: /// Handle to the DFA (already run) @@ -206,7 +208,9 @@ class AllocMemConversion : public mlir::OpRewritePattern { /// If we failed to find an insertion point not inside a loop, see if it would /// be safe to use an llvm.stacksave/llvm.stackrestore inside the loop - static InsertionPoint findAllocaLoopInsertionPoint(fir::AllocMemOp &oldAlloc); + static InsertionPoint findAllocaLoopInsertionPoint( + fir::AllocMemOp &oldAlloc, + const llvm::SmallVector &freeOps); /// Returns the alloca if it was successfully inserted, otherwise {} std::optional @@ -484,6 +488,22 @@ StackArraysAnalysisWrapper::analyseFunction(mlir::Operation *func) { llvm::DenseSet freedValues; point.appendFreedValues(freedValues); + // Find all fir.freemem operations corresponding to fir.allocmem + // in freedValues. It is best to find the association going back + // from fir.freemem to fir.allocmem through the def-use chains, + // so that we can use lookThroughDeclaresAndConverts same way + // the AllocationAnalysis is handling them. + llvm::DenseMap> + allocToFreeMemMap; + func->walk([&](fir::FreeMemOp freeOp) { + mlir::Value memref = lookThroughDeclaresAndConverts(freeOp.getHeapref()); + if (!freedValues.count(memref)) + return; + + auto allocMem = memref.getDefiningOp(); + allocToFreeMemMap[allocMem].push_back(freeOp); + }); + // We only replace allocations which are definately freed on all routes // through the function because otherwise the allocation may have an intende // lifetime longer than the current stack frame (e.g. a heap allocation which @@ -491,7 +511,8 @@ StackArraysAnalysisWrapper::analyseFunction(mlir::Operation *func) { for (mlir::Value freedValue : freedValues) { fir::AllocMemOp allocmem = freedValue.getDefiningOp(); InsertionPoint insertionPoint = - AllocMemConversion::findAllocaInsertionPoint(allocmem); + AllocMemConversion::findAllocaInsertionPoint( + allocmem, allocToFreeMemMap[allocmem]); if (insertionPoint) candidateOps.insert({allocmem, insertionPoint}); } @@ -578,8 +599,9 @@ static bool isInLoop(mlir::Operation *op) { op->getParentOfType(); } -InsertionPoint -AllocMemConversion::findAllocaInsertionPoint(fir::AllocMemOp &oldAlloc) { +InsertionPoint AllocMemConversion::findAllocaInsertionPoint( + fir::AllocMemOp &oldAlloc, + const llvm::SmallVector &freeOps) { // Ideally the alloca should be inserted at the end of the function entry // block so that we do not allocate stack space in a loop. However, // the operands to the alloca may not be available that early, so insert it @@ -596,7 +618,7 @@ AllocMemConversion::findAllocaInsertionPoint(fir::AllocMemOp &oldAlloc) { if (isInLoop(oldAllocOp)) { // where we want to put it is in a loop, and even the old location is in // a loop. Give up. - return findAllocaLoopInsertionPoint(oldAlloc); + return findAllocaLoopInsertionPoint(oldAlloc, freeOps); } return {oldAllocOp}; } @@ -657,28 +679,14 @@ AllocMemConversion::findAllocaInsertionPoint(fir::AllocMemOp &oldAlloc) { return checkReturn(&entryBlock); } -InsertionPoint -AllocMemConversion::findAllocaLoopInsertionPoint(fir::AllocMemOp &oldAlloc) { +InsertionPoint AllocMemConversion::findAllocaLoopInsertionPoint( + fir::AllocMemOp &oldAlloc, + const llvm::SmallVector &freeOps) { mlir::Operation *oldAllocOp = oldAlloc; // This is only called as a last resort. We should try to insert at the // location of the old allocation, which is inside of a loop, using // llvm.stacksave/llvm.stackrestore - // find freemem ops - llvm::SmallVector freeOps; - - for (mlir::Operation *user : oldAllocOp->getUsers()) { - if (auto declareOp = mlir::dyn_cast_if_present(user)) { - for (mlir::Operation *user : declareOp->getUsers()) { - if (mlir::isa(user)) - freeOps.push_back(user); - } - } - - if (mlir::isa(user)) - freeOps.push_back(user); - } - assert(freeOps.size() && "DFA should only return freed memory"); // Don't attempt to reason about a stacksave/stackrestore between different diff --git a/flang/test/Transforms/stack-arrays.fir b/flang/test/Transforms/stack-arrays.fir index 444136d53e034..a784cea9bc3a4 100644 --- a/flang/test/Transforms/stack-arrays.fir +++ b/flang/test/Transforms/stack-arrays.fir @@ -418,3 +418,45 @@ func.func @lookthrough() { // CHECK: func.func @lookthrough() { // CHECK: fir.alloca !fir.array<42xi32> // CHECK-NOT: fir.freemem + +// StackArrays is better to find fir.freemem ops corresponding to fir.allocmem +// using the same look through mechanism as during the allocation analysis, +// looking through fir.convert and fir.declare. +func.func @finding_freemem_in_block() { + %c0 = arith.constant 0 : index + %c10_i32 = arith.constant 10 : i32 + %c1_i32 = arith.constant 1 : i32 + %0 = fir.alloca i32 {bindc_name = "k", uniq_name = "k"} + %1 = fir.declare %0 {uniq_name = "k"} : (!fir.ref) -> !fir.ref + fir.store %c1_i32 to %1 : !fir.ref + cf.br ^bb1 +^bb1: // 2 preds: ^bb0, ^bb2 + %2 = fir.load %1 : !fir.ref + %3 = arith.cmpi sle, %2, %c10_i32 : i32 + cf.cond_br %3, ^bb2, ^bb3 +^bb2: // pred: ^bb1 + %4 = fir.declare %1 {fortran_attrs = #fir.var_attrs, uniq_name = "x"} : (!fir.ref) -> !fir.ref + %5 = fir.load %4 : !fir.ref + %6 = fir.convert %5 : (i32) -> index + %7 = arith.cmpi sgt, %6, %c0 : index + %8 = arith.select %7, %6, %c0 : index + %9 = fir.shape %8 : (index) -> !fir.shape<1> + %10 = fir.allocmem !fir.array, %8 {bindc_name = ".tmp.expr_result", uniq_name = ""} + %11 = fir.convert %10 : (!fir.heap>) -> !fir.ref> + %12 = fir.declare %11(%9) {uniq_name = ".tmp.expr_result"} : (!fir.ref>, !fir.shape<1>) -> !fir.ref> + %13 = fir.embox %12(%9) : (!fir.ref>, !fir.shape<1>) -> !fir.box> + %14 = fir.call @_QPfunc(%1) fastmath : (!fir.ref) -> !fir.array + fir.save_result %14 to %12(%9) : !fir.array, !fir.ref>, !fir.shape<1> + fir.call @_QPsub(%13) fastmath : (!fir.box>) -> () + %15 = fir.convert %12 : (!fir.ref>) -> !fir.heap> + fir.freemem %15 : !fir.heap> + %16 = fir.load %1 : !fir.ref + %17 = arith.addi %16, %c1_i32 : i32 + fir.store %17 to %1 : !fir.ref + cf.br ^bb1 +^bb3: // pred: ^bb1 + return +} +// CHECK: func.func @finding_freemem_in_block() { +// CHECK: fir.alloca !fir.array +// CHECK-NOT: fir.freemem