From 44690bec4c01f17b4b267c5cc4443a04aab16372 Mon Sep 17 00:00:00 2001 From: Valentin Churavy Date: Fri, 1 Oct 2021 19:45:59 -0400 Subject: [PATCH 01/15] Revert "[MC] Always emit relocations for same-section function references" This reverts commit 5a5ac65768d124d98a10e8520363a0a4be3f4e38. (cherry picked from commit ae2638d84b63af89ece7e30f39d435013ce42ee2) (cherry picked from commit 05848b6d4d8ccc212f3ba9d9f58af42f26983e2c) --- llvm/lib/MC/WinCOFFObjectWriter.cpp | 12 +++++------- llvm/test/MC/COFF/diff.s | 25 ++++++++----------------- 2 files changed, 13 insertions(+), 24 deletions(-) diff --git a/llvm/lib/MC/WinCOFFObjectWriter.cpp b/llvm/lib/MC/WinCOFFObjectWriter.cpp index c0b5e8bdc5039..18c002cb165d0 100644 --- a/llvm/lib/MC/WinCOFFObjectWriter.cpp +++ b/llvm/lib/MC/WinCOFFObjectWriter.cpp @@ -679,14 +679,12 @@ void WinCOFFObjectWriter::executePostLayoutBinding(MCAssembler &Asm, bool WinCOFFObjectWriter::isSymbolRefDifferenceFullyResolvedImpl( const MCAssembler &Asm, const MCSymbol &SymA, const MCFragment &FB, bool InSet, bool IsPCRel) const { - // Don't drop relocations between functions, even if they are in the same text - // section. Multiple Visual C++ linker features depend on having the - // relocations present. The /INCREMENTAL flag will cause these relocations to - // point to thunks, and the /GUARD:CF flag assumes that it can use relocations - // to approximate the set of all address taken functions. LLD's implementation - // of /GUARD:CF also relies on the existance of these relocations. + // MS LINK expects to be able to replace all references to a function with a + // thunk to implement their /INCREMENTAL feature. Make sure we don't optimize + // away any relocations to functions. uint16_t Type = cast(SymA).getType(); - if ((Type >> COFF::SCT_COMPLEX_TYPE_SHIFT) == COFF::IMAGE_SYM_DTYPE_FUNCTION) + if (Asm.isIncrementalLinkerCompatible() && + (Type >> COFF::SCT_COMPLEX_TYPE_SHIFT) == COFF::IMAGE_SYM_DTYPE_FUNCTION) return false; return MCObjectWriter::isSymbolRefDifferenceFullyResolvedImpl(Asm, SymA, FB, InSet, IsPCRel); diff --git a/llvm/test/MC/COFF/diff.s b/llvm/test/MC/COFF/diff.s index 90466b59d0252..640bf8189e039 100644 --- a/llvm/test/MC/COFF/diff.s +++ b/llvm/test/MC/COFF/diff.s @@ -1,14 +1,19 @@ // RUN: llvm-mc -filetype=obj -triple i686-pc-mingw32 %s | llvm-readobj -S --sr --sd - | FileCheck %s -// COFF resolves differences between labels in the same section, unless that -// label is declared with function type. - .section baz, "xr" + .def X + .scl 2; + .type 32; + .endef .globl X X: mov Y-X+42, %eax retl + .def Y + .scl 2; + .type 32; + .endef .globl Y Y: retl @@ -25,11 +30,6 @@ _foobar: # @foobar # %bb.0: ret - .globl _baz -_baz: - calll _foobar - retl - .data .globl _rust_crate # @rust_crate .align 4 @@ -39,15 +39,6 @@ _rust_crate: .long _foobar-_rust_crate .long _foobar-_rust_crate -// Even though _baz and _foobar are in the same .text section, we keep the -// relocation for compatibility with the VC linker's /guard:cf and /incremental -// flags, even on mingw. - -// CHECK: Name: .text -// CHECK: Relocations [ -// CHECK-NEXT: 0x12 IMAGE_REL_I386_REL32 _foobar -// CHECK-NEXT: ] - // CHECK: Name: .data // CHECK: Relocations [ // CHECK-NEXT: 0x4 IMAGE_REL_I386_DIR32 _foobar From 7863b41e25bcd69007e5a3e059dbfa873f2e65e6 Mon Sep 17 00:00:00 2001 From: Valentin Churavy Date: Sat, 19 May 2018 11:56:55 -0400 Subject: [PATCH 02/15] Allow for custom address spaces Julia uses addressspaces for GC and we want these to be sanitized as well. (cherry picked from commit 3f53397f402b67341afe2bcb3a3316606b47d15c) (cherry picked from commit 58df73b7d510d59462d56092595cf9c91404c601) --- llvm/lib/Transforms/Instrumentation/ThreadSanitizer.cpp | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/llvm/lib/Transforms/Instrumentation/ThreadSanitizer.cpp b/llvm/lib/Transforms/Instrumentation/ThreadSanitizer.cpp index a127e81ce6433..3316611e0af22 100644 --- a/llvm/lib/Transforms/Instrumentation/ThreadSanitizer.cpp +++ b/llvm/lib/Transforms/Instrumentation/ThreadSanitizer.cpp @@ -375,7 +375,9 @@ static bool shouldInstrumentReadWriteFromAddress(const Module *M, Value *Addr) { // with them. if (Addr) { Type *PtrTy = cast(Addr->getType()->getScalarType()); - if (PtrTy->getPointerAddressSpace() != 0) + auto AS = PtrTy->getPointerAddressSpace(); + // Allow for custom addresspaces + if (AS != 0 && AS < 10) return false; } From 8e7a3ed7886256fe8898de2d109deccbb48728a1 Mon Sep 17 00:00:00 2001 From: Keno Fischer Date: Wed, 29 Sep 2021 15:17:47 -0400 Subject: [PATCH 03/15] [clang/CMake] Respect LLVM_TOOLS_INSTALL_DIR Otherwise clang installs all of its tools into `bin/` while LLVM installs its tools into (LLVM_TOOLS_INSTALL_DIR). I could swear this used to work (and in fact the julia build system assumes it), but I can't pin down a specific commit that would have broken this, and julia has been relying on pre-compiled binaries for a while now (that don't use this setting), so it may have been broken for quite a while. Differential Revision: https://reviews.llvm.org/D88630 (cherry picked from commit 6104e14b830c31dffb1b6bce1c6f9a0760993ff1) (cherry picked from commit f252e1795b885bf83f76ff4b029b0484aefebb31) (cherry picked from commit 9039ce8ab323e8e0bea24323a7231e4c53070def) --- clang/cmake/modules/AddClang.cmake | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/clang/cmake/modules/AddClang.cmake b/clang/cmake/modules/AddClang.cmake index 75b0080f67156..96ac1dc9a86f9 100644 --- a/clang/cmake/modules/AddClang.cmake +++ b/clang/cmake/modules/AddClang.cmake @@ -169,7 +169,7 @@ macro(add_clang_tool name) get_target_export_arg(${name} Clang export_to_clangtargets) install(TARGETS ${name} ${export_to_clangtargets} - RUNTIME DESTINATION "${CMAKE_INSTALL_BINDIR}" + RUNTIME DESTINATION "${LLVM_TOOLS_INSTALL_DIR}" COMPONENT ${name}) if(NOT LLVM_ENABLE_IDE) From 39c935b2d35550f892736e100264f9c2e344d7ed Mon Sep 17 00:00:00 2001 From: Valentin Churavy Date: Sat, 16 Jan 2021 17:36:09 -0500 Subject: [PATCH 04/15] Don't merge icmps derived from pointers with addressspaces IIUC we can't emit `memcmp` between pointers in addressspaces, doing so will trigger an assertion since the signature of the memcmp will not match it's arguments (https://bugs.llvm.org/show_bug.cgi?id=48661). This PR disables the attempt to merge icmps, when the pointer is in an addressspace. Differential Revision: https://reviews.llvm.org/D94813 (cherry picked from commit 458b259600f7efd82387eb7c4e09bdcee328106b) (cherry picked from commit aaf2d2763f878f73770ccfdaf40f77a565b24a73) --- .../Transforms/MergeICmps/addressspaces.ll | 67 +++++++++++++++++++ 1 file changed, 67 insertions(+) create mode 100644 llvm/test/Transforms/MergeICmps/addressspaces.ll diff --git a/llvm/test/Transforms/MergeICmps/addressspaces.ll b/llvm/test/Transforms/MergeICmps/addressspaces.ll new file mode 100644 index 0000000000000..9a74b4a5b2ca4 --- /dev/null +++ b/llvm/test/Transforms/MergeICmps/addressspaces.ll @@ -0,0 +1,67 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt < %s -mergeicmps -S | FileCheck %s + +source_filename = "==" +target datalayout = "e-m:e-i64:64-n32:64" +target triple = "powerpc64le-unknown-linux-gnu" + +define void @juliaAS([2 x [5 x i64]] addrspace(11)* nocapture nonnull readonly align 8 dereferenceable(80) %0, [2 x [5 x i64]] addrspace(11)* nocapture nonnull readonly align 8 dereferenceable(80) %1) { +; CHECK-LABEL: @juliaAS( +; CHECK-NEXT: top: +; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds [2 x [5 x i64]], [2 x [5 x i64]] addrspace(11)* [[TMP0:%.*]], i64 0, i64 1, i64 2 +; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds [2 x [5 x i64]], [2 x [5 x i64]] addrspace(11)* [[TMP0]], i64 0, i64 1, i64 3 +; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds [2 x [5 x i64]], [2 x [5 x i64]] addrspace(11)* [[TMP0]], i64 0, i64 1, i64 4 +; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds [2 x [5 x i64]], [2 x [5 x i64]] addrspace(11)* [[TMP1:%.*]], i64 0, i64 1, i64 2 +; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds [2 x [5 x i64]], [2 x [5 x i64]] addrspace(11)* [[TMP1]], i64 0, i64 1, i64 3 +; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds [2 x [5 x i64]], [2 x [5 x i64]] addrspace(11)* [[TMP1]], i64 0, i64 1, i64 4 +; CHECK-NEXT: [[TMP8:%.*]] = load i64, i64 addrspace(11)* [[TMP2]], align 8 +; CHECK-NEXT: [[TMP9:%.*]] = load i64, i64 addrspace(11)* [[TMP5]], align 8 +; CHECK-NEXT: [[DOTNOT17:%.*]] = icmp eq i64 [[TMP8]], [[TMP9]] +; CHECK-NEXT: br i1 [[DOTNOT17]], label [[L70:%.*]], label [[L90:%.*]] +; CHECK: L70: +; CHECK-NEXT: [[TMP10:%.*]] = load i64, i64 addrspace(11)* [[TMP3]], align 8 +; CHECK-NEXT: [[TMP11:%.*]] = load i64, i64 addrspace(11)* [[TMP6]], align 8 +; CHECK-NEXT: [[DOTNOT18:%.*]] = icmp eq i64 [[TMP10]], [[TMP11]] +; CHECK-NEXT: br i1 [[DOTNOT18]], label [[L74:%.*]], label [[L90]] +; CHECK: L74: +; CHECK-NEXT: [[TMP12:%.*]] = load i64, i64 addrspace(11)* [[TMP4]], align 8 +; CHECK-NEXT: [[TMP13:%.*]] = load i64, i64 addrspace(11)* [[TMP7]], align 8 +; CHECK-NEXT: [[DOTNOT19:%.*]] = icmp eq i64 [[TMP12]], [[TMP13]] +; CHECK-NEXT: br label [[L90]] +; CHECK: L90: +; CHECK-NEXT: [[VALUE_PHI2_OFF0:%.*]] = phi i1 [ false, [[TOP:%.*]] ], [ [[DOTNOT19]], [[L74]] ], [ false, [[L70]] ] +; CHECK-NEXT: ret void +; +top: + %2 = getelementptr inbounds [2 x [5 x i64]], [2 x [5 x i64]] addrspace(11)* %0, i64 0, i64 1, i64 2 + %3 = getelementptr inbounds [2 x [5 x i64]], [2 x [5 x i64]] addrspace(11)* %0, i64 0, i64 1, i64 3 + %4 = getelementptr inbounds [2 x [5 x i64]], [2 x [5 x i64]] addrspace(11)* %0, i64 0, i64 1, i64 4 + %5 = getelementptr inbounds [2 x [5 x i64]], [2 x [5 x i64]] addrspace(11)* %1, i64 0, i64 1, i64 2 + %6 = getelementptr inbounds [2 x [5 x i64]], [2 x [5 x i64]] addrspace(11)* %1, i64 0, i64 1, i64 3 + %7 = getelementptr inbounds [2 x [5 x i64]], [2 x [5 x i64]] addrspace(11)* %1, i64 0, i64 1, i64 4 + %8 = load i64, i64 addrspace(11)* %2, align 8 + %9 = load i64, i64 addrspace(11)* %5, align 8 + %.not17 = icmp eq i64 %8, %9 + br i1 %.not17, label %L70, label %L90 + +L70: ; preds = %top + %10 = load i64, i64 addrspace(11)* %3, align 8 + %11 = load i64, i64 addrspace(11)* %6, align 8 + %.not18 = icmp eq i64 %10, %11 + br i1 %.not18, label %L74, label %L90 + +L74: ; preds = %L70 + %12 = load i64, i64 addrspace(11)* %4, align 8 + %13 = load i64, i64 addrspace(11)* %7, align 8 + %.not19 = icmp eq i64 %12, %13 + br label %L90 + +L90: ; preds = %L74, %L70, %top + %value_phi2.off0 = phi i1 [ false, %top ], [ %.not19, %L74 ], [ false, %L70 ] + ret void +} + +!llvm.module.flags = !{!0} + +!0 = !{i32 1, !"Debug Info Version", i32 3} + From 378001cffcb278dcb351b17343f31744d70b1b66 Mon Sep 17 00:00:00 2001 From: Keno Fischer Date: Sat, 30 Apr 2022 19:00:11 +0100 Subject: [PATCH 05/15] Add support for unwinding during prologue/epilogue (cherry picked from commit 5393efbd8a4c7555b9f9fdf185c486c6b05f0c19) --- libunwind/src/CompactUnwinder.hpp | 156 ++++++++++++++++++++++++++++++ 1 file changed, 156 insertions(+) diff --git a/libunwind/src/CompactUnwinder.hpp b/libunwind/src/CompactUnwinder.hpp index a7a8a153d86a4..ac8837aa37ec5 100644 --- a/libunwind/src/CompactUnwinder.hpp +++ b/libunwind/src/CompactUnwinder.hpp @@ -311,6 +311,50 @@ int CompactUnwinder_x86_64::stepWithCompactEncodingRBPFrame( uint32_t savedRegistersLocations = EXTRACT_BITS(compactEncoding, UNWIND_X86_64_RBP_FRAME_REGISTERS); + // If we have not stored EBP yet + if (functionStart == registers.getIP()) { + uint64_t rsp = registers.getSP(); + // old esp is ebp less return address + registers.setSP(rsp+8); + // pop return address into eip + registers.setIP(addressSpace.get64(rsp)); + + return UNW_STEP_SUCCESS; + } else if (functionStart + 1 == registers.getIP()) { + uint64_t rsp = registers.getSP(); + // old esp is ebp less return address + registers.setSP(rsp + 16); + // pop return address into eip + registers.setIP(addressSpace.get64(rsp + 8)); + + return UNW_STEP_SUCCESS; + } + + // If we're about to return, we've already popped the base pointer + uint8_t b = addressSpace.get8(registers.getIP()); + + // This is a hack to detect VZEROUPPER but in between popq rbp and ret + // It's not pretty but it works + if (b == 0xC5) { + if ((b = addressSpace.get8(registers.getIP() + 1)) == 0xF8 && + (b = addressSpace.get8(registers.getIP() + 2)) == 0x77) + b = addressSpace.get8(registers.getIP() + 3); + else + goto skip_ret; + } + + if (b == 0xC3 || b == 0xCB || b == 0xC2 || b == 0xCA) { + uint64_t rbp = registers.getSP(); + // old esp is ebp less return address + registers.setSP(rbp + 16); + // pop return address into eip + registers.setIP(addressSpace.get64(rbp + 8)); + + return UNW_STEP_SUCCESS; + } + + skip_ret: + uint64_t savedRegisters = registers.getRBP() - 8 * savedRegistersOffset; for (int i = 0; i < 5; ++i) { switch (savedRegistersLocations & 0x7) { @@ -431,6 +475,118 @@ int CompactUnwinder_x86_64::stepWithCompactEncodingFrameless( } } } + + // Note that the order of these registers is so that + // registersSaved[0] is the one that will be pushed onto the stack last. + // Thus, if we want to walk this from the top, we need to go in reverse. + assert(regCount <= 6); + + // check whether we are still in the prologue + uint64_t curAddr = functionStart; + if (regCount > 0) { + for (int8_t i = (int8_t)(regCount) - 1; i >= 0; --i) { + if (registers.getIP() == curAddr) { + // None of the registers have been modified yet, so we don't need to reload them + framelessUnwind(addressSpace, registers.getSP() + 8 * (regCount - (uint64_t)(i + 1)), registers); + return UNW_STEP_SUCCESS; + } else { + assert(curAddr < registers.getIP()); + } + + + // pushq %rbp and pushq %rbx is 1 byte. Everything else 2 + if ((UNWIND_X86_64_REG_RBP == registersSaved[i]) || + (UNWIND_X86_64_REG_RBX == registersSaved[i])) + curAddr += 1; + else + curAddr += 2; + } + } + if (registers.getIP() == curAddr) { + // None of the registers have been modified yet, so we don't need to reload them + framelessUnwind(addressSpace, registers.getSP() + 8*regCount, registers); + return UNW_STEP_SUCCESS; + } else { + assert(curAddr < registers.getIP()); + } + + + // And now for the epilogue + { + uint8_t i = 0; + uint64_t p = registers.getIP(); + uint8_t b = 0; + + while (true) { + b = addressSpace.get8(p++); + // This is a hack to detect VZEROUPPER but in between the popq's and ret + // It's not pretty but it works + if (b == 0xC5) { + if ((b = addressSpace.get8(p++)) == 0xF8 && (b = addressSpace.get8(p++)) == 0x77) + b = addressSpace.get8(p++); + else + break; + } + // popq %rbx popq %rbp + if (b == 0x5B || b == 0x5D) { + i++; + } else if (b == 0x41) { + b = addressSpace.get8(p++); + if (b == 0x5C || b == 0x5D || b == 0x5E || b == 0x5F) + i++; + else + break; + } else if (b == 0xC3 || b == 0xCB || b == 0xC2 || b == 0xCA) { + // i pop's haven't happened yet + uint64_t savedRegisters = registers.getSP() + 8 * i; + if (regCount > 0) { + for (int8_t j = (int8_t)(regCount) - 1; j >= (int8_t)(regCount) - i; --j) { + uint64_t addr = savedRegisters - 8 * (regCount - (uint64_t)(j)); + switch (registersSaved[j]) { + case UNWIND_X86_64_REG_RBX: + registers.setRBX(addressSpace.get64(addr)); + break; + case UNWIND_X86_64_REG_R12: + registers.setR12(addressSpace.get64(addr)); + break; + case UNWIND_X86_64_REG_R13: + registers.setR13(addressSpace.get64(addr)); + break; + case UNWIND_X86_64_REG_R14: + registers.setR14(addressSpace.get64(addr)); + break; + case UNWIND_X86_64_REG_R15: + registers.setR15(addressSpace.get64(addr)); + break; + case UNWIND_X86_64_REG_RBP: + registers.setRBP(addressSpace.get64(addr)); + break; + default: + _LIBUNWIND_DEBUG_LOG("bad register for frameless, encoding=%08X for " + "function starting at 0x%llX", + encoding, functionStart); + _LIBUNWIND_ABORT("invalid compact unwind encoding"); + } + } + } + framelessUnwind(addressSpace, savedRegisters, registers); + return UNW_STEP_SUCCESS; + } else { + break; + } + } + } + + /* + 0x10fe2733a: 5b popq %rbx + 0x10fe2733b: 41 5c popq %r12 + 0x10fe2733d: 41 5d popq %r13 + 0x10fe2733f: 41 5e popq %r14 + 0x10fe27341: 41 5f popq %r15 + 0x10fe27343: 5d popq %rbp + */ + + uint64_t savedRegisters = registers.getSP() + stackSize - 8 - 8 * regCount; for (uint32_t i = 0; i < regCount; ++i) { switch (registersSaved[i]) { From 16bd21ce16198f6e1778ba19b7ed4468e1e52713 Mon Sep 17 00:00:00 2001 From: Julian P Samaroo Date: Tue, 18 Jan 2022 13:32:28 -0600 Subject: [PATCH 06/15] [LLD] Respect LLVM_TOOLS_INSTALL_DIR Co-authored-by: Valentin Churavy Co-authored-by: Julian P Samaroo (cherry picked from commit a0defe021cee2076dc161eceeaab70297b386b91) --- lld/cmake/modules/AddLLD.cmake | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/lld/cmake/modules/AddLLD.cmake b/lld/cmake/modules/AddLLD.cmake index d3924f7243d40..01b4fe65a45ac 100644 --- a/lld/cmake/modules/AddLLD.cmake +++ b/lld/cmake/modules/AddLLD.cmake @@ -20,7 +20,7 @@ macro(add_lld_library name) ${export_to_lldtargets} LIBRARY DESTINATION lib${LLVM_LIBDIR_SUFFIX} ARCHIVE DESTINATION lib${LLVM_LIBDIR_SUFFIX} - RUNTIME DESTINATION "${CMAKE_INSTALL_BINDIR}") + RUNTIME DESTINATION ${LLVM_TOOLS_INSTALL_DIR}) if (${ARG_SHARED} AND NOT CMAKE_CONFIGURATION_TYPES) add_llvm_install_targets(install-${name} @@ -47,7 +47,7 @@ macro(add_lld_tool name) get_target_export_arg(${name} LLD export_to_lldtargets) install(TARGETS ${name} ${export_to_lldtargets} - RUNTIME DESTINATION "${CMAKE_INSTALL_BINDIR}" + RUNTIME DESTINATION ${LLVM_TOOLS_INSTALL_DIR} COMPONENT ${name}) if(NOT CMAKE_CONFIGURATION_TYPES) From 9f895fec2a15a666f706954ab772da9b9b7e20cb Mon Sep 17 00:00:00 2001 From: Valentin Churavy Date: Mon, 2 May 2022 10:04:47 -0400 Subject: [PATCH 07/15] [Sanitizers] Guard FP_XSTATE_MAGIC1 usage by GLIBC version Follow-up on https://reviews.llvm.org/D118970 FP_XSTATE_MAGIC1 is only available on glibc 2.27 and upwards Differential Revision: https://reviews.llvm.org/D124770 --- .../lib/sanitizer_common/sanitizer_platform_limits_posix.cpp | 2 +- compiler-rt/test/msan/Linux/signal_mcontext.cpp | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_platform_limits_posix.cpp b/compiler-rt/lib/sanitizer_common/sanitizer_platform_limits_posix.cpp index fc01498aa2285..cc09a22b93b50 100644 --- a/compiler-rt/lib/sanitizer_common/sanitizer_platform_limits_posix.cpp +++ b/compiler-rt/lib/sanitizer_common/sanitizer_platform_limits_posix.cpp @@ -223,7 +223,7 @@ namespace __sanitizer { unsigned struct_sockaddr_sz = sizeof(struct sockaddr); unsigned ucontext_t_sz(void *ctx) { -# if SANITIZER_GLIBC && SANITIZER_X64 +# if SANITIZER_GLIBC && SANITIZER_X64 && __GLIBC_PREREQ (2, 27) // Added in Linux kernel 3.4.0, merged to glibc in 2.16 # ifndef FP_XSTATE_MAGIC1 # define FP_XSTATE_MAGIC1 0x46505853U diff --git a/compiler-rt/test/msan/Linux/signal_mcontext.cpp b/compiler-rt/test/msan/Linux/signal_mcontext.cpp index b49451fbb730b..11ef74e7462bb 100644 --- a/compiler-rt/test/msan/Linux/signal_mcontext.cpp +++ b/compiler-rt/test/msan/Linux/signal_mcontext.cpp @@ -10,7 +10,7 @@ void handler(int sig, siginfo_t *info, void *uctx) { __msan_check_mem_is_initialized(uctx, sizeof(ucontext_t)); -#if defined(__GLIBC__) && defined(__x86_64__) +#if defined(__GLIBC__) && defined(__x86_64__) && __GLIBC_PREREQ(2, 27) auto *mctx = &static_cast(uctx)->uc_mcontext; if (auto *fpregs = mctx->fpregs) { // The member names differ across header versions, but the actual layout From 5d87216a23f5224b6868f62e21c2f7f6b7063ab2 Mon Sep 17 00:00:00 2001 From: Cody Tapscott Date: Mon, 24 May 2021 16:36:06 -0700 Subject: [PATCH 08/15] Force `.eh_frame` emission on AArch64 We need to force the emission of the EH Frame section (currently done via SupportsCompactUnwindWithoutEHFrame in the MCObjectFileInfo for the target), since libunwind doesn't yet support dynamically registering compact unwind information at run-time. (cherry picked from commit 60e041894288848e37870c42749a1aabcc2c2274) (cherry picked from commit 6275013da5e8cd5e552bd5bb7d85c7b0524ca69d) --- llvm/lib/MC/MCObjectFileInfo.cpp | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/llvm/lib/MC/MCObjectFileInfo.cpp b/llvm/lib/MC/MCObjectFileInfo.cpp index 7af00b71677c8..a3f0ca5978785 100644 --- a/llvm/lib/MC/MCObjectFileInfo.cpp +++ b/llvm/lib/MC/MCObjectFileInfo.cpp @@ -61,9 +61,10 @@ void MCObjectFileInfo::initMachOMCObjectFileInfo(const Triple &T) { MachO::S_ATTR_STRIP_STATIC_SYMS | MachO::S_ATTR_LIVE_SUPPORT, SectionKind::getReadOnly()); - if (T.isOSDarwin() && - (T.getArch() == Triple::aarch64 || T.getArch() == Triple::aarch64_32)) - SupportsCompactUnwindWithoutEHFrame = true; + // Disabled for now, since we need to emit EH Frames for stack unwinding in the JIT + // if (T.isOSDarwin() && + // (T.getArch() == Triple::aarch64 || T.getArch() == Triple::aarch64_32)) + // SupportsCompactUnwindWithoutEHFrame = true; switch (Ctx->emitDwarfUnwindInfo()) { case EmitDwarfUnwindType::Always: From f8af70bdfffc983703c7cd60b4061965ca6946ae Mon Sep 17 00:00:00 2001 From: Gabriel Baraldi Date: Mon, 22 Aug 2022 13:17:12 -0300 Subject: [PATCH 09/15] Add patches for msan --- .../Instrumentation/MemorySanitizer/alloca.ll | 30 +++++++++++++++++++ 1 file changed, 30 insertions(+) diff --git a/llvm/test/Instrumentation/MemorySanitizer/alloca.ll b/llvm/test/Instrumentation/MemorySanitizer/alloca.ll index 25a44ecd9d241..738680e573462 100644 --- a/llvm/test/Instrumentation/MemorySanitizer/alloca.ll +++ b/llvm/test/Instrumentation/MemorySanitizer/alloca.ll @@ -72,6 +72,20 @@ entry: ; KMSAN: call void @__msan_poison_alloca(ptr {{.*}}, i64 20, ; CHECK: ret void +define void @array32() sanitize_memory { +entry: + %x = alloca i32, i32 5, align 4 + ret void +} + +; CHECK-LABEL: define void @array32( +; INLINE: call void @llvm.memset.p0i8.i64(i8* align 4 {{.*}}, i8 -1, i64 20, i1 false) +; CALL: call void @__msan_poison_stack(i8* {{.*}}, i64 20) +; ORIGIN: call void @__msan_set_alloca_origin_with_descr(i8* {{.*}}, i64 20, +; ORIGIN-LEAN: call void @__msan_set_alloca_origin_no_descr(i8* {{.*}}, i64 20, +; KMSAN: call void @__msan_poison_alloca(i8* {{.*}}, i64 20, +; CHECK: ret void + define void @array_non_const(i64 %cnt) sanitize_memory { entry: %x = alloca i32, i64 %cnt, align 4 @@ -103,6 +117,22 @@ entry: ; KMSAN: call void @__msan_poison_alloca(ptr {{.*}}, i64 %[[A]], ; CHECK: ret void +define void @array_non_const32(i32 %cnt) sanitize_memory { +entry: + %x = alloca i32, i32 %cnt, align 4 + ret void +} + +; CHECK-LABEL: define void @array_non_const32( +; CHECK: %[[Z:.*]] = zext i32 %cnt to i64 +; CHECK: %[[A:.*]] = mul i64 4, %[[Z]] +; INLINE: call void @llvm.memset.p0i8.i64(i8* align 4 {{.*}}, i8 -1, i64 %[[A]], i1 false) +; CALL: call void @__msan_poison_stack(i8* {{.*}}, i64 %[[A]]) +; ORIGIN: call void @__msan_set_alloca_origin_with_descr(i8* {{.*}}, i64 %[[A]], +; ORIGIN-LEAN: call void @__msan_set_alloca_origin_no_descr(i8* {{.*}}, i64 %[[A]], +; KMSAN: call void @__msan_poison_alloca(i8* {{.*}}, i64 %[[A]], +; CHECK: ret void + ; Check that the local is unpoisoned in the absence of sanitize_memory define void @unpoison_local() { entry: From c007a4d247d8ab60bc5a000c6399d26597b881e2 Mon Sep 17 00:00:00 2001 From: Gabriel Baraldi Date: Mon, 22 Aug 2022 13:18:19 -0300 Subject: [PATCH 10/15] Try keno's tentative TLS fix --- compiler-rt/lib/sanitizer_common/sanitizer_tls_get_addr.cpp | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_tls_get_addr.cpp b/compiler-rt/lib/sanitizer_common/sanitizer_tls_get_addr.cpp index b13e2dc9e3327..e4bbba74cd7fc 100644 --- a/compiler-rt/lib/sanitizer_common/sanitizer_tls_get_addr.cpp +++ b/compiler-rt/lib/sanitizer_common/sanitizer_tls_get_addr.cpp @@ -16,6 +16,8 @@ #include "sanitizer_flags.h" #include "sanitizer_platform_interceptors.h" +#include + namespace __sanitizer { #if SANITIZER_INTERCEPT_TLS_GET_ADDR @@ -139,6 +141,8 @@ DTLS::DTV *DTLS_on_tls_get_addr(void *arg_void, void *res, tls_beg = header->start; VReport(2, "__tls_get_addr: glibc >=2.19 suspected; tls={0x%zx 0x%zx}\n", tls_beg, tls_size); + } else if (uptr size = malloc_usable_size((void *)tls_beg)) { + tls_size = size; } else { VReport(2, "__tls_get_addr: Can't guess glibc version\n"); // This may happen inside the DTOR of main thread, so just ignore it. From a74d1a27d458deb0e0e70141fbddec0872bf8ee6 Mon Sep 17 00:00:00 2001 From: Gabriel Baraldi Date: Wed, 31 Aug 2022 20:06:40 -0300 Subject: [PATCH 11/15] Make include conditional to macos --- compiler-rt/lib/sanitizer_common/sanitizer_tls_get_addr.cpp | 2 ++ 1 file changed, 2 insertions(+) diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_tls_get_addr.cpp b/compiler-rt/lib/sanitizer_common/sanitizer_tls_get_addr.cpp index e4bbba74cd7fc..45c52763cebfe 100644 --- a/compiler-rt/lib/sanitizer_common/sanitizer_tls_get_addr.cpp +++ b/compiler-rt/lib/sanitizer_common/sanitizer_tls_get_addr.cpp @@ -16,7 +16,9 @@ #include "sanitizer_flags.h" #include "sanitizer_platform_interceptors.h" +#if !defined(__APPLE__) #include +#endif namespace __sanitizer { #if SANITIZER_INTERCEPT_TLS_GET_ADDR From d587cd4a645409b2daeb25f4b151f59278921d0e Mon Sep 17 00:00:00 2001 From: Prem Chintalapudi Date: Fri, 14 Apr 2023 17:35:07 -0700 Subject: [PATCH 12/15] [NewPM] Use PassID instead of pass name PrintIRInstrumentation::shouldPrintAfterPass accepts a pass ID instead of a pass name Reviewed By: aeubanks Differential Revision: https://reviews.llvm.org/D147394 (cherry picked from commit d4de7c2e1e7954ea03545f1551fda9f6bb9387cf) --- llvm/lib/Passes/StandardInstrumentations.cpp | 3 +-- .../loop-print-after-pass-invalidated.ll | 21 +++++++++++++++++++ 2 files changed, 22 insertions(+), 2 deletions(-) create mode 100644 llvm/test/Other/loop-print-after-pass-invalidated.ll diff --git a/llvm/lib/Passes/StandardInstrumentations.cpp b/llvm/lib/Passes/StandardInstrumentations.cpp index ad2504eca2fbb..ce7308d882a0b 100644 --- a/llvm/lib/Passes/StandardInstrumentations.cpp +++ b/llvm/lib/Passes/StandardInstrumentations.cpp @@ -726,8 +726,7 @@ void PrintIRInstrumentation::printAfterPass(StringRef PassID, Any IR) { } void PrintIRInstrumentation::printAfterPassInvalidated(StringRef PassID) { - StringRef PassName = PIC->getPassNameForClassName(PassID); - if (!shouldPrintAfterPass(PassName)) + if (!shouldPrintAfterPass(PassID)) return; if (isIgnored(PassID)) diff --git a/llvm/test/Other/loop-print-after-pass-invalidated.ll b/llvm/test/Other/loop-print-after-pass-invalidated.ll new file mode 100644 index 0000000000000..63106f62ae132 --- /dev/null +++ b/llvm/test/Other/loop-print-after-pass-invalidated.ll @@ -0,0 +1,21 @@ +; RUN: opt < %s 2>&1 -disable-output \ +; RUN: -passes='simple-loop-unswitch' \ +; RUN: -print-after=simple-loop-unswitch \ +; RUN: | FileCheck %s + +; CHECK: *** IR Dump After SimpleLoopUnswitchPass on for.cond *** +; CHECK: *** IR Dump After SimpleLoopUnswitchPass on for.cond.us *** + +define void @loop(i1 %w) { +entry: + br label %for.cond +; Loop: +for.cond: ; preds = %for.inc, %entry + br i1 %w, label %for.inc, label %if.then + +if.then: ; preds = %for.cond + br label %for.inc + +for.inc: ; preds = %if.then, %for.cond + br label %for.cond +} From 757a1b47d03631dcc84529af9687f51170691e9b Mon Sep 17 00:00:00 2001 From: Elliot Saba Date: Mon, 27 Mar 2023 17:43:54 -0700 Subject: [PATCH 13/15] Disable pathologically expensive `SimplifySelectOps` optimization `SimplifySelectOps` is a late optimization in LLVM that attempts to translate `select(C, load(A), load(B))` into `load(select(C, A, B))`. However, in order for it to do this optimization, it needs to check that `C` does not depend on the result of `load(A)` or `load(B)`. Unfortunately (unlikely Julia and LLVM at the IR level), LLVM does not have a topological order of statements computed at this stage of the compiler, so LLVM needs to iterate through all statements in the function in order to perform this legality check. For large functions, this is extremely expensive, accounting for the majority of all compilation time for such functions. On the other hand, the optimization itself is minor, allowing at most the elision of one additional load (and doesn't fire particularly often, because the middle end can perform similar optimizations). Until there is a proper solution in LLVM, simply disable this optimizations, making LLVM several orders of magnitude faster on real world benchmarks. X-ref: https://github.com/llvm/llvm-project/issues/60132 --- llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index d9cde609e5992..e10ba1d3a66b9 100644 --- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -25219,6 +25219,7 @@ bool DAGCombiner::SimplifySelectOps(SDNode *TheSelect, SDValue LHS, !TLI.isOperationLegalOrCustom(TheSelect->getOpcode(), LLD->getBasePtr().getValueType())) return false; + return false; // The loads must not depend on one another. if (LLD->isPredecessorOf(RLD) || RLD->isPredecessorOf(LLD)) From 7a3b2cd54240ca24e60639e6d087e9a0777d829c Mon Sep 17 00:00:00 2001 From: Sanjay Patel Date: Fri, 10 Feb 2023 10:53:22 -0500 Subject: [PATCH 14/15] [VectorCombine] fix insertion point of shuffles As shown in issue #60649, the new shuffles were being inserted before a phi, and that is invalid. It seems like most test coverage for this fold (foldSelectShuffle) lives in the AArch64 dir, but this doesn't repro there for a base target. --- .../Transforms/Vectorize/VectorCombine.cpp | 8 ++-- .../VectorCombine/X86/select-shuffle.ll | 38 +++++++++++++++++++ 2 files changed, 42 insertions(+), 4 deletions(-) create mode 100644 llvm/test/Transforms/VectorCombine/X86/select-shuffle.ll diff --git a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp index 2e489757ebc18..6fc8a024be36a 100644 --- a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp +++ b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp @@ -1660,16 +1660,16 @@ bool VectorCombine::foldSelectShuffle(Instruction &I, bool FromReduction) { return SSV->getOperand(Op); return SV->getOperand(Op); }; - Builder.SetInsertPoint(SVI0A->getNextNode()); + Builder.SetInsertPoint(SVI0A->getInsertionPointAfterDef()); Value *NSV0A = Builder.CreateShuffleVector(GetShuffleOperand(SVI0A, 0), GetShuffleOperand(SVI0A, 1), V1A); - Builder.SetInsertPoint(SVI0B->getNextNode()); + Builder.SetInsertPoint(SVI0B->getInsertionPointAfterDef()); Value *NSV0B = Builder.CreateShuffleVector(GetShuffleOperand(SVI0B, 0), GetShuffleOperand(SVI0B, 1), V1B); - Builder.SetInsertPoint(SVI1A->getNextNode()); + Builder.SetInsertPoint(SVI1A->getInsertionPointAfterDef()); Value *NSV1A = Builder.CreateShuffleVector(GetShuffleOperand(SVI1A, 0), GetShuffleOperand(SVI1A, 1), V2A); - Builder.SetInsertPoint(SVI1B->getNextNode()); + Builder.SetInsertPoint(SVI1B->getInsertionPointAfterDef()); Value *NSV1B = Builder.CreateShuffleVector(GetShuffleOperand(SVI1B, 0), GetShuffleOperand(SVI1B, 1), V2B); Builder.SetInsertPoint(Op0); diff --git a/llvm/test/Transforms/VectorCombine/X86/select-shuffle.ll b/llvm/test/Transforms/VectorCombine/X86/select-shuffle.ll new file mode 100644 index 0000000000000..d51ac6a33911d --- /dev/null +++ b/llvm/test/Transforms/VectorCombine/X86/select-shuffle.ll @@ -0,0 +1,38 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt < %s -passes=vector-combine -S -mtriple=x86_64-- | FileCheck %s + +target datalayout = "e-p:64:64-i64:64-f80:128-n8:16:32:64-S128" + +; This would insert before a phi instruction which is invalid IR. + +define <4 x double> @PR60649() { +; CHECK-LABEL: @PR60649( +; CHECK-NEXT: entry: +; CHECK-NEXT: br label [[END:%.*]] +; CHECK: unreachable: +; CHECK-NEXT: br label [[END]] +; CHECK: end: +; CHECK-NEXT: [[T0:%.*]] = phi <4 x double> [ zeroinitializer, [[ENTRY:%.*]] ], [ zeroinitializer, [[UNREACHABLE:%.*]] ] +; CHECK-NEXT: [[T1:%.*]] = phi <4 x double> [ zeroinitializer, [[ENTRY]] ], [ zeroinitializer, [[UNREACHABLE]] ] +; CHECK-NEXT: [[TMP0:%.*]] = shufflevector <4 x double> [[T0]], <4 x double> [[T0]], <4 x i32> +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x double> [[T0]], <4 x double> [[T0]], <4 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = fdiv <4 x double> [[TMP1]], +; CHECK-NEXT: [[TMP3:%.*]] = fmul <4 x double> [[TMP0]], +; CHECK-NEXT: [[T5:%.*]] = shufflevector <4 x double> [[TMP2]], <4 x double> [[TMP3]], <4 x i32> +; CHECK-NEXT: ret <4 x double> [[T5]] +; +entry: + br label %end + +unreachable: + br label %end + +end: + %t0 = phi <4 x double> [ zeroinitializer, %entry ], [ zeroinitializer, %unreachable ] + %t1 = phi <4 x double> [ zeroinitializer, %entry ], [ zeroinitializer, %unreachable ] + %t2 = shufflevector <4 x double> zeroinitializer, <4 x double> zeroinitializer, <4 x i32> + %t3 = fdiv <4 x double> %t0, %t2 + %t4 = fmul <4 x double> %t0, %t2 + %t5 = shufflevector <4 x double> %t3, <4 x double> %t4, <4 x i32> + ret <4 x double> %t5 +} From 4011f2912893f9e76b78990d5db7619777b6cf47 Mon Sep 17 00:00:00 2001 From: Gabriel Baraldi Date: Wed, 16 Aug 2023 17:58:55 -0300 Subject: [PATCH 15/15] Initial test for a mapper that doesn't make too many bindings --- .../JITLink/JITLinkMemoryManager.h | 11 ++++- .../JITLink/JITLinkMemoryManager.cpp | 25 +++++++++++ .../Orc/MapperJITLinkMemoryManager.cpp | 41 +++++++++++++------ 3 files changed, 64 insertions(+), 13 deletions(-) diff --git a/llvm/include/llvm/ExecutionEngine/JITLink/JITLinkMemoryManager.h b/llvm/include/llvm/ExecutionEngine/JITLink/JITLinkMemoryManager.h index 6ef4a0bd0c982..d3dcb48105d0a 100644 --- a/llvm/include/llvm/ExecutionEngine/JITLink/JITLinkMemoryManager.h +++ b/llvm/include/llvm/ExecutionEngine/JITLink/JITLinkMemoryManager.h @@ -241,10 +241,16 @@ class BasicLayout { struct ContiguousPageBasedLayoutSizes { uint64_t StandardSegs = 0; uint64_t FinalizeSegs = 0; - uint64_t total() const { return StandardSegs + FinalizeSegs; } }; + struct SplitPageBasedLayoutSizes { + uint64_t TextSegs = 0; + uint64_t DataSegs = 0; + uint64_t FinalizeSegs = 0; + uint64_t total() const { return TextSegs + DataSegs + FinalizeSegs; } + }; + private: using SegmentMap = orc::AllocGroupSmallMap; @@ -266,6 +272,9 @@ class BasicLayout { Expected getContiguousPageBasedLayoutSizes(uint64_t PageSize); + // Same as above but returns the segments split into text, data, and finalize + Expected + getSplitPageBasedLayoutSizes(uint64_t PageSize); /// Returns an iterator over the segments of the layout. iterator_range segments() { return {Segments.begin(), Segments.end()}; diff --git a/llvm/lib/ExecutionEngine/JITLink/JITLinkMemoryManager.cpp b/llvm/lib/ExecutionEngine/JITLink/JITLinkMemoryManager.cpp index bd44b86f30819..708d34346a0d5 100644 --- a/llvm/lib/ExecutionEngine/JITLink/JITLinkMemoryManager.cpp +++ b/llvm/lib/ExecutionEngine/JITLink/JITLinkMemoryManager.cpp @@ -98,6 +98,31 @@ BasicLayout::getContiguousPageBasedLayoutSizes(uint64_t PageSize) { return SegsSizes; } +Expected +BasicLayout::getSplitPageBasedLayoutSizes(uint64_t PageSize) { + SplitPageBasedLayoutSizes SegsSizes; + + for (auto &KV : segments()) { + auto &AG = KV.first; + auto &Seg = KV.second; + + if (Seg.Alignment > PageSize) + return make_error("Segment alignment greater than page size", + inconvertibleErrorCode()); + + uint64_t SegSize = alignTo(Seg.ContentSize + Seg.ZeroFillSize, PageSize); + if (AG.getMemDeallocPolicy() == orc::MemDeallocPolicy::Standard) + if ((AG.getMemProt() & orc::MemProt::Exec) != orc::MemProt::None) // Text + SegsSizes.TextSegs += SegSize; + else // Data + SegsSizes.DataSegs += SegSize; + else + SegsSizes.FinalizeSegs += SegSize; + } + + return SegsSizes; +} + Error BasicLayout::apply() { for (auto &KV : Segments) { auto &Seg = KV.second; diff --git a/llvm/lib/ExecutionEngine/Orc/MapperJITLinkMemoryManager.cpp b/llvm/lib/ExecutionEngine/Orc/MapperJITLinkMemoryManager.cpp index d099a251232e7..73baf15efd0a2 100644 --- a/llvm/lib/ExecutionEngine/Orc/MapperJITLinkMemoryManager.cpp +++ b/llvm/lib/ExecutionEngine/Orc/MapperJITLinkMemoryManager.cpp @@ -64,7 +64,8 @@ void MapperJITLinkMemoryManager::allocate(const JITLinkDylib *JD, LinkGraph &G, BasicLayout BL(G); // find required address space - auto SegsSizes = BL.getContiguousPageBasedLayoutSizes(Mapper->getPageSize()); + auto SegsSizes = BL.getSplitPageBasedLayoutSizes(Mapper->getPageSize()); + if (!SegsSizes) { OnAllocated(SegsSizes.takeError()); return; @@ -72,7 +73,7 @@ void MapperJITLinkMemoryManager::allocate(const JITLinkDylib *JD, LinkGraph &G, auto TotalSize = SegsSizes->total(); - auto CompleteAllocation = [this, &G, BL = std::move(BL), + auto CompleteAllocation = [this, &SegsSizes, &G, BL = std::move(BL), OnAllocated = std::move(OnAllocated)]( Expected Result) mutable { if (!Result) { @@ -80,20 +81,34 @@ void MapperJITLinkMemoryManager::allocate(const JITLinkDylib *JD, LinkGraph &G, return OnAllocated(Result.takeError()); } - auto NextSegAddr = Result->Start; + auto DataSegAddr = Result->Start; + ExecutorAddr TextSegAddr(alignDown(Result->End.getValue() - SegsSizes->TextSegs, Mapper->getPageSize())); + auto FinalizeSegAddr = Result->Start + alignTo(SegsSizes->DataSegs,Mapper->getPageSize()); + auto FinalizeSegAddrInit = FinalizeSegAddr; + auto TextSegAddrInit = TextSegAddr; + assert((FinalizeSegAddr + SegsSizes->FinalizeSegs) < (TextSegAddr, Mapper->getPageSize()) && "Not enough memory in the slab"); std::vector SegInfos; for (auto &KV : BL.segments()) { auto &AG = KV.first; auto &Seg = KV.second; - auto TotalSize = Seg.ContentSize + Seg.ZeroFillSize; - Seg.Addr = NextSegAddr; - Seg.WorkingMem = Mapper->prepare(NextSegAddr, TotalSize); + ExecutorAddr *CurrAddr; + if (AG.getMemDeallocPolicy() == orc::MemDeallocPolicy::Standard) { + if ((AG.getMemProt() & orc::MemProt::Exec) != orc::MemProt::None) { + CurrAddr = &TextSegAddr; + } else { + CurrAddr = &DataSegAddr; + } + } else { + CurrAddr = &FinalizeSegAddr; + } - NextSegAddr += alignTo(TotalSize, Mapper->getPageSize()); + Seg.Addr = *CurrAddr; + Seg.WorkingMem = Mapper->prepare(*CurrAddr, TotalSize); + *CurrAddr += alignTo(TotalSize, Mapper->getPageSize()); MemoryMapper::AllocInfo::SegInfo SI; SI.Offset = Seg.Addr - Result->Start; @@ -101,15 +116,17 @@ void MapperJITLinkMemoryManager::allocate(const JITLinkDylib *JD, LinkGraph &G, SI.ZeroFillSize = Seg.ZeroFillSize; SI.AG = AG; SI.WorkingMem = Seg.WorkingMem; - SegInfos.push_back(SI); } + assert(DataSegAddr < FinalizeSegAddrInit && "Data overwrote the finalize segment"); + assert(FinalizeSegAddr < TextSegAddrInit && "Finalize overwrote the text segment"); + assert(TextSegAddr < Result->End && "Text overwrote the end of the slab"); - UsedMemory.insert({Result->Start, NextSegAddr - Result->Start}); - - if (NextSegAddr < Result->End) { + UsedMemory.insert({Result->Start, FinalizeSegAddr - Result->Start}); + UsedMemory.insert({TextSegAddrInit, Result->End - TextSegAddrInit}); + if (FinalizeSegAddr < TextSegAddrInit) { // Save the remaining memory for reuse in next allocation(s) - AvailableMemory.insert(NextSegAddr, Result->End - 1, true); + AvailableMemory.insert(FinalizeSegAddr, TextSegAddrInit - 1, true); } Mutex.unlock();