Skip to content

[Utils][mlir] Fix interaction between CodeExtractor and OpenMPIRBuilder #145051

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 4 commits into from
Jun 25, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 7 additions & 1 deletion llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h
Original file line number Diff line number Diff line change
Expand Up @@ -484,7 +484,7 @@ class OpenMPIRBuilder {
/// not have an effect on \p M (see initialize)
OpenMPIRBuilder(Module &M)
: M(M), Builder(M.getContext()), OffloadInfoManager(this),
T(M.getTargetTriple()) {}
T(M.getTargetTriple()), IsFinalized(false) {}
LLVM_ABI ~OpenMPIRBuilder();

class AtomicInfo : public llvm::AtomicInfo {
Expand Down Expand Up @@ -521,6 +521,10 @@ class OpenMPIRBuilder {
/// all functions are finalized.
LLVM_ABI void finalize(Function *Fn = nullptr);

/// Check whether the finalize function has already run
/// \return true if the finalize function has already run
LLVM_ABI bool isFinalized();

/// Add attributes known for \p FnID to \p Fn.
LLVM_ABI void addAttributes(omp::RuntimeFunction FnID, Function &Fn);

Expand Down Expand Up @@ -3286,6 +3290,8 @@ class OpenMPIRBuilder {
Value *emitRMWOpAsInstruction(Value *Src1, Value *Src2,
AtomicRMWInst::BinOp RMWOp);

bool IsFinalized;

public:
/// a struct to pack relevant information while generating atomic Ops
struct AtomicOpValue {
Expand Down
4 changes: 4 additions & 0 deletions llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -824,8 +824,12 @@ void OpenMPIRBuilder::finalize(Function *Fn) {
M.getGlobalVariable("__openmp_nvptx_data_transfer_temporary_storage")};
emitUsed("llvm.compiler.used", LLVMCompilerUsed);
}

IsFinalized = true;
}

bool OpenMPIRBuilder::isFinalized() { return IsFinalized; }

OpenMPIRBuilder::~OpenMPIRBuilder() {
assert(OutlineInfos.empty() && "There must be no outstanding outlinings");
}
Expand Down
13 changes: 7 additions & 6 deletions llvm/lib/Transforms/Utils/CodeExtractor.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1528,12 +1528,10 @@ CodeExtractor::extractCodeRegion(const CodeExtractorAnalysisCache &CEAC,
fixupDebugInfoPostExtraction(*oldFunction, *newFunction, *TheCall, inputs,
NewValues);

LLVM_DEBUG(if (verifyFunction(*newFunction, &errs())) {
newFunction->dump();
report_fatal_error("verification of newFunction failed!");
});
LLVM_DEBUG(if (verifyFunction(*oldFunction))
report_fatal_error("verification of oldFunction failed!"));
LLVM_DEBUG(llvm::dbgs() << "After extractCodeRegion - newFunction:\n");
LLVM_DEBUG(newFunction->dump());
LLVM_DEBUG(llvm::dbgs() << "After extractCodeRegion - oldFunction:\n");
LLVM_DEBUG(oldFunction->dump());
LLVM_DEBUG(if (AC && verifyAssumptionCache(*oldFunction, *newFunction, AC))
report_fatal_error("Stale Asumption cache for old Function!"));
return newFunction;
Expand Down Expand Up @@ -1833,6 +1831,9 @@ CallInst *CodeExtractor::emitReplacerCall(
// This takes place of the original loop
BasicBlock *codeReplacer =
BasicBlock::Create(Context, "codeRepl", oldFunction, ReplIP);
if (AllocationBlock)
assert(AllocationBlock->getParent() == oldFunction &&
"AllocationBlock is not in the same function");
BasicBlock *AllocaBlock =
AllocationBlock ? AllocationBlock : &oldFunction->getEntryBlock();

Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
; REQUIRES: asserts
; RUN: opt -S -passes='function(instsimplify),hotcoldsplit' -hotcoldsplit-threshold=-1 -debug < %s 2>&1 | FileCheck %s
; RUN: opt -S -passes='function(instsimplify),hotcoldsplit' -hotcoldsplit-threshold=-1 < %s 2>&1 | FileCheck %s
; RUN: opt -passes='function(instcombine),hotcoldsplit,function(instsimplify)' %s -o /dev/null

target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -498,7 +498,13 @@ findAllocaInsertPoint(llvm::IRBuilderBase &builder,
allocaInsertPoint = frame.allocaInsertPoint;
return WalkResult::interrupt();
});
if (walkResult.wasInterrupted())
// In cases with multiple levels of outlining, the tree walk might find an
// alloca insertion point that is inside the original function while the
// builder insertion point is inside the outlined function. We need to make
// sure that we do not use it in those cases.
if (walkResult.wasInterrupted() &&
allocaInsertPoint.getBlock()->getParent() ==
builder.GetInsertBlock()->getParent())
return allocaInsertPoint;

// Otherwise, insert to the entry block of the surrounding function.
Expand Down
6 changes: 5 additions & 1 deletion mlir/lib/Target/LLVMIR/ModuleTranslation.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -777,7 +777,7 @@ ModuleTranslation::ModuleTranslation(Operation *module,
}

ModuleTranslation::~ModuleTranslation() {
if (ompBuilder)
if (ompBuilder && !ompBuilder->isFinalized())
ompBuilder->finalize();
}

Expand Down Expand Up @@ -2331,6 +2331,10 @@ mlir::translateModuleToLLVMIR(Operation *module, llvm::LLVMContext &llvmContext,
// beforehand.
translator.debugTranslation->addModuleFlagsIfNotPresent();

// Call the OpenMP IR Builder callbacks prior to verifying the module
if (auto *ompBuilder = translator.getOpenMPBuilder())
ompBuilder->finalize();

if (!disableVerification &&
llvm::verifyModule(*translator.llvmModule, &llvm::errs()))
return nullptr;
Comment on lines +2335 to 2340
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think we can move module verification to ~ModuleTranslation instead. This way we both make sure that verification happens after finalization and we do not need to add the isFinalized bool.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We could do this, but then we'd have to change the signature of mlir::translateModuleToLLVMIR to remove the disableVerification option as it would no longer be possible to run the translation without verification, because verification would always happen in the destructor. Is this desirable?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think leaving it here makes more sense indeed. Thanks for looking into it.

Expand Down
62 changes: 62 additions & 0 deletions mlir/test/Target/LLVMIR/openmp-nested-task-target-parallel.mlir
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
// RUN: mlir-translate -mlir-to-llvmir %s | FileCheck %s
// This tests the fix for https://github.com/llvm/llvm-project/issues/138102
// We are only interested in ensuring that the -mlir-to-llvmir pass doesn't crash

// CHECK-LABEL: define internal void @_QQmain..omp_par

omp.private {type = private} @_QFEi_private_i32 : i32
omp.private {type = firstprivate} @_QFEc_firstprivate_i32 : i32 copy {
^bb0(%arg0: !llvm.ptr, %arg1: !llvm.ptr):
%0 = llvm.load %arg0 : !llvm.ptr -> i32
llvm.store %0, %arg1 : i32, !llvm.ptr
omp.yield(%arg1 : !llvm.ptr)
}
llvm.func @_QQmain() {
%0 = llvm.mlir.constant(1 : i64) : i64
%1 = llvm.alloca %0 x i32 {bindc_name = "i"} : (i64) -> !llvm.ptr
%2 = llvm.mlir.constant(1 : i64) : i64
%3 = llvm.alloca %2 x i32 {bindc_name = "c"} : (i64) -> !llvm.ptr
%4 = llvm.mlir.constant(10 : index) : i64
%5 = llvm.mlir.constant(0 : index) : i64
%6 = llvm.mlir.constant(10000 : index) : i64
%7 = llvm.mlir.constant(1 : index) : i64
%8 = llvm.mlir.constant(1 : i64) : i64
%9 = llvm.mlir.addressof @_QFECchunksz : !llvm.ptr
%10 = llvm.mlir.constant(1 : i64) : i64
%11 = llvm.trunc %7 : i64 to i32
llvm.br ^bb1(%11, %4 : i32, i64)
^bb1(%12: i32, %13: i64): // 2 preds: ^bb0, ^bb2
%14 = llvm.icmp "sgt" %13, %5 : i64
llvm.store %12, %3 : i32, !llvm.ptr
omp.task private(@_QFEc_firstprivate_i32 %3 -> %arg0 : !llvm.ptr) {
%19 = omp.map.info var_ptr(%1 : !llvm.ptr, i32) map_clauses(implicit, exit_release_or_enter_alloc) capture(ByCopy) -> !llvm.ptr {name = "i"}
%20 = omp.map.info var_ptr(%arg0 : !llvm.ptr, i32) map_clauses(implicit, exit_release_or_enter_alloc) capture(ByCopy) -> !llvm.ptr {name = "c"}
%21 = omp.map.info var_ptr(%9 : !llvm.ptr, i32) map_clauses(implicit, exit_release_or_enter_alloc) capture(ByCopy) -> !llvm.ptr {name = "chunksz"}
omp.target map_entries(%19 -> %arg1, %20 -> %arg2, %21 -> %arg3 : !llvm.ptr, !llvm.ptr, !llvm.ptr) {
%22 = llvm.mlir.constant(9999 : i32) : i32
%23 = llvm.mlir.constant(1 : i32) : i32
omp.parallel {
%24 = llvm.load %arg2 : !llvm.ptr -> i32
%25 = llvm.add %24, %22 : i32
omp.wsloop private(@_QFEi_private_i32 %arg1 -> %arg4 : !llvm.ptr) {
omp.loop_nest (%arg5) : i32 = (%24) to (%25) inclusive step (%23) {
llvm.store %arg5, %arg4 : i32, !llvm.ptr
omp.yield
}
}
omp.terminator
}
omp.terminator
}
omp.terminator
}
llvm.return
}
llvm.mlir.global internal constant @_QFECchunksz() {addr_space = 0 : i32} : i32 {
%0 = llvm.mlir.constant(10000 : i32) : i32
llvm.return %0 : i32
}
llvm.mlir.global internal constant @_QFECn() {addr_space = 0 : i32} : i32 {
%0 = llvm.mlir.constant(100000 : i32) : i32
llvm.return %0 : i32
}
Loading