diff --git a/llvm/lib/Target/X86/X86FrameLowering.cpp b/llvm/lib/Target/X86/X86FrameLowering.cpp index a35b04606e595..a7f2a1d510c6e 100644 --- a/llvm/lib/Target/X86/X86FrameLowering.cpp +++ b/llvm/lib/Target/X86/X86FrameLowering.cpp @@ -798,18 +798,40 @@ void X86FrameLowering::emitStackProbeInlineGenericLoop( : Is64Bit ? X86::R11D : X86::EAX; - BuildMI(MBB, MBBI, DL, TII.get(TargetOpcode::COPY), FinalStackProbed) - .addReg(StackPtr) - .setMIFlag(MachineInstr::FrameSetup); - // save loop bound { - const unsigned BoundOffset = alignDown(Offset, StackProbeSize); - const unsigned SUBOpc = getSUBriOpcode(Uses64BitFramePtr); - BuildMI(MBB, MBBI, DL, TII.get(SUBOpc), FinalStackProbed) - .addReg(FinalStackProbed) - .addImm(BoundOffset) - .setMIFlag(MachineInstr::FrameSetup); + const uint64_t BoundOffset = alignDown(Offset, StackProbeSize); + + // Can we calculate the loop bound using SUB with a 32-bit immediate? + // Note that the immediate gets sign-extended when used with a 64-bit + // register, so in that case we only have 31 bits to work with. + bool canUseSub = + Uses64BitFramePtr ? isUInt<31>(BoundOffset) : isUInt<32>(BoundOffset); + + if (canUseSub) { + const unsigned SUBOpc = getSUBriOpcode(Uses64BitFramePtr); + + BuildMI(MBB, MBBI, DL, TII.get(TargetOpcode::COPY), FinalStackProbed) + .addReg(StackPtr) + .setMIFlag(MachineInstr::FrameSetup); + BuildMI(MBB, MBBI, DL, TII.get(SUBOpc), FinalStackProbed) + .addReg(FinalStackProbed) + .addImm(BoundOffset) + .setMIFlag(MachineInstr::FrameSetup); + } else if (Uses64BitFramePtr) { + BuildMI(MBB, MBBI, DL, TII.get(X86::MOV64ri), FinalStackProbed) + .addImm(-BoundOffset) + .setMIFlag(MachineInstr::FrameSetup); + BuildMI(MBB, MBBI, DL, TII.get(X86::ADD64rr), FinalStackProbed) + .addReg(FinalStackProbed) + .addReg(StackPtr) + .setMIFlag(MachineInstr::FrameSetup); + } else { + // We're being asked to probe a stack frame that's 4 GiB or larger, + // but our stack pointer is only 32 bits. This might be unreachable + // code, so don't complain now; just trap if it's reached at runtime. + BuildMI(MBB, MBBI, DL, TII.get(X86::TRAP)); + } // while in the loop, use loop-invariant reg for CFI, // instead of the stack pointer, which changes during the loop diff --git a/llvm/test/CodeGen/X86/stack-clash-extra-huge.ll b/llvm/test/CodeGen/X86/stack-clash-extra-huge.ll new file mode 100644 index 0000000000000..59cbcd0689fbf --- /dev/null +++ b/llvm/test/CodeGen/X86/stack-clash-extra-huge.ll @@ -0,0 +1,82 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --no_x86_scrub_sp +; RUN: llc -mtriple=x86_64-linux-android < %s | FileCheck -check-prefix=CHECK-X64 %s +; RUN: llc -mtriple=i686-linux-android < %s | FileCheck -check-prefix=CHECK-X86 %s +; RUN: llc -mtriple=x86_64-linux-gnux32 < %s | FileCheck -check-prefix=CHECK-X32 %s + +define i32 @foo() local_unnamed_addr #0 { +; CHECK-X64-LABEL: foo: +; CHECK-X64: # %bb.0: +; CHECK-X64-NEXT: movabsq $-4799995904, %r11 # imm = 0xFFFFFFFEE1E5E000 +; CHECK-X64-NEXT: addq %rsp, %r11 +; CHECK-X64-NEXT: .cfi_def_cfa_register %r11 +; CHECK-X64-NEXT: .cfi_adjust_cfa_offset 4799995904 +; CHECK-X64-NEXT: .LBB0_1: # =>This Inner Loop Header: Depth=1 +; CHECK-X64-NEXT: subq $4096, %rsp # imm = 0x1000 +; CHECK-X64-NEXT: movq $0, (%rsp) +; CHECK-X64-NEXT: cmpq %r11, %rsp +; CHECK-X64-NEXT: jne .LBB0_1 +; CHECK-X64-NEXT: # %bb.2: +; CHECK-X64-NEXT: subq $3976, %rsp # imm = 0xF88 +; CHECK-X64-NEXT: .cfi_def_cfa_register %rsp +; CHECK-X64-NEXT: .cfi_def_cfa_offset 4799999888 +; CHECK-X64-NEXT: movl $1, 264(%rsp) +; CHECK-X64-NEXT: movl $1, 28664(%rsp) +; CHECK-X64-NEXT: movl -128(%rsp), %eax +; CHECK-X64-NEXT: movabsq $4799999880, %rcx # imm = 0x11E1A2F88 +; CHECK-X64-NEXT: addq %rcx, %rsp +; CHECK-X64-NEXT: .cfi_def_cfa_offset 8 +; CHECK-X64-NEXT: retq +; +; CHECK-X86-LABEL: foo: +; CHECK-X86: # %bb.0: +; CHECK-X86-NEXT: ud2 +; CHECK-X86-NEXT: .cfi_def_cfa_register %eax +; CHECK-X86-NEXT: .cfi_adjust_cfa_offset 4800000000 +; CHECK-X86-NEXT: .LBB0_1: # =>This Inner Loop Header: Depth=1 +; CHECK-X86-NEXT: subl $4096, %esp # imm = 0x1000 +; CHECK-X86-NEXT: movl $0, (%esp) +; CHECK-X86-NEXT: cmpl %eax, %esp +; CHECK-X86-NEXT: jne .LBB0_1 +; CHECK-X86-NEXT: # %bb.2: +; CHECK-X86-NEXT: subl $12, %esp +; CHECK-X86-NEXT: .cfi_def_cfa_register %esp +; CHECK-X86-NEXT: .cfi_def_cfa_offset 4800000016 +; CHECK-X86-NEXT: movl $1, 392(%esp) +; CHECK-X86-NEXT: movl $1, 28792(%esp) +; CHECK-X86-NEXT: movl (%esp), %eax +; CHECK-X86-NEXT: movl $4800000012, %ecx # imm = 0x11E1A300C +; CHECK-X86-NEXT: addl %ecx, %esp +; CHECK-X86-NEXT: .cfi_def_cfa_offset 4 +; CHECK-X86-NEXT: retl +; +; CHECK-X32-LABEL: foo: +; CHECK-X32: # %bb.0: +; CHECK-X32-NEXT: ud2 +; CHECK-X32-NEXT: .cfi_def_cfa_register %r11 +; CHECK-X32-NEXT: .cfi_adjust_cfa_offset 4799995904 +; CHECK-X32-NEXT: .LBB0_1: # =>This Inner Loop Header: Depth=1 +; CHECK-X32-NEXT: subl $4096, %esp # imm = 0x1000 +; CHECK-X32-NEXT: movq $0, (%esp) +; CHECK-X32-NEXT: cmpl %r11d, %esp +; CHECK-X32-NEXT: jne .LBB0_1 +; CHECK-X32-NEXT: # %bb.2: +; CHECK-X32-NEXT: subl $3976, %esp # imm = 0xF88 +; CHECK-X32-NEXT: .cfi_def_cfa_register %rsp +; CHECK-X32-NEXT: .cfi_def_cfa_offset 4799999888 +; CHECK-X32-NEXT: movl $1, 264(%esp) +; CHECK-X32-NEXT: movl $1, 28664(%esp) +; CHECK-X32-NEXT: movl -128(%esp), %eax +; CHECK-X32-NEXT: movabsq $4799999880, %rcx # imm = 0x11E1A2F88 +; CHECK-X32-NEXT: addq %rcx, %esp +; CHECK-X32-NEXT: .cfi_def_cfa_offset 8 +; CHECK-X32-NEXT: retq + %a = alloca i32, i64 1200000000, align 16 + %b0 = getelementptr inbounds i32, ptr %a, i64 98 + %b1 = getelementptr inbounds i32, ptr %a, i64 7198 + store volatile i32 1, ptr %b0 + store volatile i32 1, ptr %b1 + %c = load volatile i32, ptr %a + ret i32 %c +} + +attributes #0 = {"probe-stack"="inline-asm"} diff --git a/llvm/test/CodeGen/X86/stack-clash-huge.ll b/llvm/test/CodeGen/X86/stack-clash-huge.ll new file mode 100644 index 0000000000000..03f028dfc2506 --- /dev/null +++ b/llvm/test/CodeGen/X86/stack-clash-huge.ll @@ -0,0 +1,84 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --no_x86_scrub_sp +; RUN: llc -mtriple=x86_64-linux-android < %s | FileCheck -check-prefix=CHECK-X64 %s +; RUN: llc -mtriple=i686-linux-android < %s | FileCheck -check-prefix=CHECK-X86 %s +; RUN: llc -mtriple=x86_64-linux-gnux32 < %s | FileCheck -check-prefix=CHECK-X32 %s + +define i32 @foo() local_unnamed_addr #0 { +; CHECK-X64-LABEL: foo: +; CHECK-X64: # %bb.0: +; CHECK-X64-NEXT: movabsq $-2399997952, %r11 # imm = 0xFFFFFFFF70F2F000 +; CHECK-X64-NEXT: addq %rsp, %r11 +; CHECK-X64-NEXT: .cfi_def_cfa_register %r11 +; CHECK-X64-NEXT: .cfi_adjust_cfa_offset 2399997952 +; CHECK-X64-NEXT: .LBB0_1: # =>This Inner Loop Header: Depth=1 +; CHECK-X64-NEXT: subq $4096, %rsp # imm = 0x1000 +; CHECK-X64-NEXT: movq $0, (%rsp) +; CHECK-X64-NEXT: cmpq %r11, %rsp +; CHECK-X64-NEXT: jne .LBB0_1 +; CHECK-X64-NEXT: # %bb.2: +; CHECK-X64-NEXT: subq $1928, %rsp # imm = 0x788 +; CHECK-X64-NEXT: .cfi_def_cfa_register %rsp +; CHECK-X64-NEXT: .cfi_def_cfa_offset 2399999888 +; CHECK-X64-NEXT: movl $1, 264(%rsp) +; CHECK-X64-NEXT: movl $1, 28664(%rsp) +; CHECK-X64-NEXT: movl -128(%rsp), %eax +; CHECK-X64-NEXT: movl $2399999880, %ecx # imm = 0x8F0D1788 +; CHECK-X64-NEXT: addq %rcx, %rsp +; CHECK-X64-NEXT: .cfi_def_cfa_offset 8 +; CHECK-X64-NEXT: retq +; +; CHECK-X86-LABEL: foo: +; CHECK-X86: # %bb.0: +; CHECK-X86-NEXT: movl %esp, %eax +; CHECK-X86-NEXT: subl $2399997952, %eax # imm = 0x8F0D1000 +; CHECK-X86-NEXT: .cfi_def_cfa_register %eax +; CHECK-X86-NEXT: .cfi_adjust_cfa_offset 2399997952 +; CHECK-X86-NEXT: .LBB0_1: # =>This Inner Loop Header: Depth=1 +; CHECK-X86-NEXT: subl $4096, %esp # imm = 0x1000 +; CHECK-X86-NEXT: movl $0, (%esp) +; CHECK-X86-NEXT: cmpl %eax, %esp +; CHECK-X86-NEXT: jne .LBB0_1 +; CHECK-X86-NEXT: # %bb.2: +; CHECK-X86-NEXT: subl $2060, %esp # imm = 0x80C +; CHECK-X86-NEXT: .cfi_def_cfa_register %esp +; CHECK-X86-NEXT: .cfi_def_cfa_offset 2400000016 +; CHECK-X86-NEXT: movl $1, 392(%esp) +; CHECK-X86-NEXT: movl $1, 28792(%esp) +; CHECK-X86-NEXT: movl (%esp), %eax +; CHECK-X86-NEXT: movl $2400000012, %ecx # imm = 0x8F0D180C +; CHECK-X86-NEXT: addl %ecx, %esp +; CHECK-X86-NEXT: .cfi_def_cfa_offset 4 +; CHECK-X86-NEXT: retl +; +; CHECK-X32-LABEL: foo: +; CHECK-X32: # %bb.0: +; CHECK-X32-NEXT: movl %esp, %r11d +; CHECK-X32-NEXT: subl $2399997952, %r11d # imm = 0x8F0D1000 +; CHECK-X32-NEXT: .cfi_def_cfa_register %r11 +; CHECK-X32-NEXT: .cfi_adjust_cfa_offset 2399997952 +; CHECK-X32-NEXT: .LBB0_1: # =>This Inner Loop Header: Depth=1 +; CHECK-X32-NEXT: subl $4096, %esp # imm = 0x1000 +; CHECK-X32-NEXT: movq $0, (%esp) +; CHECK-X32-NEXT: cmpl %r11d, %esp +; CHECK-X32-NEXT: jne .LBB0_1 +; CHECK-X32-NEXT: # %bb.2: +; CHECK-X32-NEXT: subl $1928, %esp # imm = 0x788 +; CHECK-X32-NEXT: .cfi_def_cfa_register %rsp +; CHECK-X32-NEXT: .cfi_def_cfa_offset 2399999888 +; CHECK-X32-NEXT: movl $1, 264(%esp) +; CHECK-X32-NEXT: movl $1, 28664(%esp) +; CHECK-X32-NEXT: movl -128(%esp), %eax +; CHECK-X32-NEXT: movl $2399999880, %ecx # imm = 0x8F0D1788 +; CHECK-X32-NEXT: addq %rcx, %esp +; CHECK-X32-NEXT: .cfi_def_cfa_offset 8 +; CHECK-X32-NEXT: retq + %a = alloca i32, i64 600000000, align 16 + %b0 = getelementptr inbounds i32, ptr %a, i64 98 + %b1 = getelementptr inbounds i32, ptr %a, i64 7198 + store volatile i32 1, ptr %b0 + store volatile i32 1, ptr %b1 + %c = load volatile i32, ptr %a + ret i32 %c +} + +attributes #0 = {"probe-stack"="inline-asm"}