Skip to content

[X86] Fix ABI for passing after i128 #124134

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 4 commits into from
Jan 24, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
34 changes: 34 additions & 0 deletions llvm/lib/Target/X86/X86CallingConv.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -340,5 +340,39 @@ static bool CC_X86_64_Pointer(unsigned &ValNo, MVT &ValVT, MVT &LocVT,
return false;
}

/// Special handling for i128: Either allocate the value to two consecutive
/// i64 registers, or to the stack. Do not partially allocate in registers,
/// and do not reserve any registers when allocating to the stack.
static bool CC_X86_64_I128(unsigned &ValNo, MVT &ValVT, MVT &LocVT,
CCValAssign::LocInfo &LocInfo,
ISD::ArgFlagsTy &ArgFlags, CCState &State) {
assert(ValVT == MVT::i64 && "Should have i64 parts");
SmallVectorImpl<CCValAssign> &PendingMembers = State.getPendingLocs();
PendingMembers.push_back(
CCValAssign::getPending(ValNo, ValVT, LocVT, LocInfo));

if (!ArgFlags.isInConsecutiveRegsLast())
return true;

unsigned NumRegs = PendingMembers.size();
assert(NumRegs == 2 && "Should have two parts");

static const MCPhysReg Regs[] = {X86::RDI, X86::RSI, X86::RDX,
X86::RCX, X86::R8, X86::R9};
ArrayRef<MCPhysReg> Allocated = State.AllocateRegBlock(Regs, NumRegs);
if (!Allocated.empty()) {
PendingMembers[0].convertToReg(Allocated[0]);
PendingMembers[1].convertToReg(Allocated[1]);
} else {
int64_t Offset = State.AllocateStack(16, Align(16));
PendingMembers[0].convertToMem(Offset);
PendingMembers[1].convertToMem(Offset + 8);
}
State.addLoc(PendingMembers[0]);
State.addLoc(PendingMembers[1]);
PendingMembers.clear();
return true;
}

// Provides entry points of CC_X86 and RetCC_X86.
#include "X86GenCallingConv.inc"
6 changes: 2 additions & 4 deletions llvm/lib/Target/X86/X86CallingConv.td
Original file line number Diff line number Diff line change
Expand Up @@ -548,11 +548,9 @@ def CC_X86_64_C : CallingConv<[
CCIfType<[i32], CCAssignToReg<[EDI, ESI, EDX, ECX, R8D, R9D]>>,

// i128 can be either passed in two i64 registers, or on the stack, but
// not split across register and stack. As such, do not allow using R9
// for a split i64.
// not split across register and stack. Handle this with a custom function.
CCIfType<[i64],
CCIfSplit<CCAssignToReg<[RDI, RSI, RDX, RCX, R8]>>>,
CCIfType<[i64], CCIfSplit<CCAssignToStackWithShadow<8, 16, [R9]>>>,
CCIfConsecutiveRegs<CCCustom<"CC_X86_64_I128">>>,

CCIfType<[i64], CCAssignToReg<[RDI, RSI, RDX, RCX, R8 , R9 ]>>,

Expand Down
4 changes: 4 additions & 0 deletions llvm/lib/Target/X86/X86ISelLowering.h
Original file line number Diff line number Diff line change
Expand Up @@ -1604,6 +1604,10 @@ namespace llvm {
LLVMContext &Context, CallingConv::ID CC, EVT VT, EVT &IntermediateVT,
unsigned &NumIntermediates, MVT &RegisterVT) const override;

bool functionArgumentNeedsConsecutiveRegisters(
Type *Ty, CallingConv::ID CallConv, bool isVarArg,
const DataLayout &DL) const override;

bool isIntDivCheap(EVT VT, AttributeList Attr) const override;

bool supportSwiftError() const override;
Expand Down
8 changes: 8 additions & 0 deletions llvm/lib/Target/X86/X86ISelLoweringCall.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -233,6 +233,14 @@ EVT X86TargetLowering::getSetCCResultType(const DataLayout &DL,
return VT.changeVectorElementTypeToInteger();
}

bool X86TargetLowering::functionArgumentNeedsConsecutiveRegisters(
Type *Ty, CallingConv::ID CallConv, bool isVarArg,
const DataLayout &DL) const {
// i128 split into i64 needs to be allocated to two consecutive registers,
// or spilled to the stack as a whole.
return Ty->isIntegerTy(128);
}

/// Helper for getByValTypeAlignment to determine
/// the desired ByVal argument alignment.
static void getMaxByValAlign(Type *Ty, Align &MaxAlign) {
Expand Down
2 changes: 1 addition & 1 deletion llvm/test/CodeGen/X86/addcarry.ll
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@ define i256 @add256(i256 %a, i256 %b) nounwind {
; CHECK-LABEL: add256:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: movq %rdi, %rax
; CHECK-NEXT: addq {{[0-9]+}}(%rsp), %rsi
; CHECK-NEXT: addq %r9, %rsi
; CHECK-NEXT: adcq {{[0-9]+}}(%rsp), %rdx
; CHECK-NEXT: adcq {{[0-9]+}}(%rsp), %rcx
; CHECK-NEXT: adcq {{[0-9]+}}(%rsp), %r8
Expand Down
8 changes: 4 additions & 4 deletions llvm/test/CodeGen/X86/apx/flags-copy-lowering.ll
Original file line number Diff line number Diff line change
Expand Up @@ -31,15 +31,15 @@ define <2 x i128> @flag_copy_2(<2 x i128> %x, <2 x i128> %y) nounwind {
; CHECK-NEXT: movq %r8, %rdi
; CHECK-NEXT: {nf} sarq $63, %rdi
; CHECK-NEXT: cmovoq %rdi, %rcx
; CHECK-NEXT: movabsq $-9223372036854775808, %r9 # imm = 0x8000000000000000
; CHECK-NEXT: {nf} xorq %r9, %rdi
; CHECK-NEXT: movabsq $-9223372036854775808, %r10 # imm = 0x8000000000000000
; CHECK-NEXT: {nf} xorq %r10, %rdi
; CHECK-NEXT: cmovnoq %r8, %rdi
; CHECK-NEXT: subq {{[0-9]+}}(%rsp), %rsi
; CHECK-NEXT: subq %r9, %rsi
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is this correct? The fourth i128 seems be split to R9 and stack.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ok, so you mean either this or four single i128 is not legal argument, because even for the later, the fourth i128 should be turn into memory by FE. In this way, I think we may not need to handle the problem here.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yeah, I'm assuming that the frontend will handle <2 x i128> by directly using byval (https://clang.godbolt.org/z/bznzTKohz -- interestingly clang still directly returns the vector, I would have expected it to use sret, rather than relying on sret demotion in the backend...)

I could extend this code to also handle vectors of i128 and require the whole vector argument to be in consecutive arguments. I'm just not sure it makes sense to handle this, as there is no defined psABI for <2 x i128> in the first place, and if there were, the frontend would handle that part.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Returning is another story, we emit warnings sometimes https://clang.godbolt.org/z/fYo937x3K
I think we can leave it as it.

; CHECK-NEXT: sbbq {{[0-9]+}}(%rsp), %rdx
; CHECK-NEXT: movq %rdx, %r8
; CHECK-NEXT: {nf} sarq $63, %r8
; CHECK-NEXT: cmovoq %r8, %rsi
; CHECK-NEXT: {nf} xorq %r9, %r8
; CHECK-NEXT: {nf} xorq %r10, %r8
; CHECK-NEXT: cmovnoq %rdx, %r8
; CHECK-NEXT: movq %rcx, 16(%rax)
; CHECK-NEXT: movq %rsi, (%rax)
Expand Down
2 changes: 1 addition & 1 deletion llvm/test/CodeGen/X86/avgflooru-i128.ll
Original file line number Diff line number Diff line change
Expand Up @@ -119,7 +119,7 @@ define <2 x i128> @avgflooru_i128_vec(<2 x i128> %x, <2 x i128> %y) {
; CHECK-LABEL: avgflooru_i128_vec:
; CHECK: # %bb.0: # %start
; CHECK-NEXT: movq %rdi, %rax
; CHECK-NEXT: addq {{[0-9]+}}(%rsp), %rsi
; CHECK-NEXT: addq %r9, %rsi
; CHECK-NEXT: adcq {{[0-9]+}}(%rsp), %rdx
; CHECK-NEXT: setb %dil
; CHECK-NEXT: movzbl %dil, %edi
Expand Down
54 changes: 27 additions & 27 deletions llvm/test/CodeGen/X86/fmuladd-soft-float.ll
Original file line number Diff line number Diff line change
Expand Up @@ -1555,30 +1555,30 @@ define <4 x double> @fmuladd_contract_v4f64(<4 x double> %a, <4 x double> %b, <4
; SOFT-FLOAT-64-NEXT: .cfi_offset %r14, -32
; SOFT-FLOAT-64-NEXT: .cfi_offset %r15, -24
; SOFT-FLOAT-64-NEXT: .cfi_offset %rbp, -16
; SOFT-FLOAT-64-NEXT: movq %r9, %rbp
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

There's no i128 but <4 x double>. This test should not be affected, right?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Alright, soft-float, another case where we can ignore the ABI..

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Right. The previous implementation also (unintentionally) affected cases like an illegal <4 x i64> vector (or <4 x double> for softfloat) and wouldn't allow the first element of the vector to use r9. So the whole value could still be split across regs and stacks, just not between the 1st and 2nd element...

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why it doesn't trigger assert(NumRegs == 2 && "Should have two parts");? I assume there will be 4 consecutive i64 to handle.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Because functionArgumentNeedsConsecutiveRegisters only returns true for i128, so <4 x double> gets the default behavior.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I see, thanks!

; SOFT-FLOAT-64-NEXT: movq %rcx, %r14
; SOFT-FLOAT-64-NEXT: movq %rdx, %r15
; SOFT-FLOAT-64-NEXT: movq %rsi, %r12
; SOFT-FLOAT-64-NEXT: movq %rsi, %r13
; SOFT-FLOAT-64-NEXT: movq %rdi, %rbx
; SOFT-FLOAT-64-NEXT: movq {{[0-9]+}}(%rsp), %rbp
; SOFT-FLOAT-64-NEXT: movq {{[0-9]+}}(%rsp), %rsi
; SOFT-FLOAT-64-NEXT: movq %r8, %rdi
; SOFT-FLOAT-64-NEXT: callq __muldf3@PLT
; SOFT-FLOAT-64-NEXT: movq %rax, %r13
; SOFT-FLOAT-64-NEXT: movq %rax, %r12
; SOFT-FLOAT-64-NEXT: movq %r14, %rdi
; SOFT-FLOAT-64-NEXT: movq %rbp, %rsi
; SOFT-FLOAT-64-NEXT: movq {{[0-9]+}}(%rsp), %rsi
; SOFT-FLOAT-64-NEXT: callq __muldf3@PLT
; SOFT-FLOAT-64-NEXT: movq %rax, %r14
; SOFT-FLOAT-64-NEXT: movq %r15, %rdi
; SOFT-FLOAT-64-NEXT: movq {{[0-9]+}}(%rsp), %rsi
; SOFT-FLOAT-64-NEXT: callq __muldf3@PLT
; SOFT-FLOAT-64-NEXT: movq %rax, %r15
; SOFT-FLOAT-64-NEXT: movq %r12, %rdi
; SOFT-FLOAT-64-NEXT: movq {{[0-9]+}}(%rsp), %rsi
; SOFT-FLOAT-64-NEXT: movq %r13, %rdi
; SOFT-FLOAT-64-NEXT: movq %rbp, %rsi
; SOFT-FLOAT-64-NEXT: callq __muldf3@PLT
; SOFT-FLOAT-64-NEXT: movq %rax, %rdi
; SOFT-FLOAT-64-NEXT: movq {{[0-9]+}}(%rsp), %rsi
; SOFT-FLOAT-64-NEXT: callq __adddf3@PLT
; SOFT-FLOAT-64-NEXT: movq %rax, %r12
; SOFT-FLOAT-64-NEXT: movq %rax, %r13
; SOFT-FLOAT-64-NEXT: movq %r15, %rdi
; SOFT-FLOAT-64-NEXT: movq {{[0-9]+}}(%rsp), %rsi
; SOFT-FLOAT-64-NEXT: callq __adddf3@PLT
Expand All @@ -1587,13 +1587,13 @@ define <4 x double> @fmuladd_contract_v4f64(<4 x double> %a, <4 x double> %b, <4
; SOFT-FLOAT-64-NEXT: movq {{[0-9]+}}(%rsp), %rsi
; SOFT-FLOAT-64-NEXT: callq __adddf3@PLT
; SOFT-FLOAT-64-NEXT: movq %rax, %r14
; SOFT-FLOAT-64-NEXT: movq %r13, %rdi
; SOFT-FLOAT-64-NEXT: movq %r12, %rdi
; SOFT-FLOAT-64-NEXT: movq {{[0-9]+}}(%rsp), %rsi
; SOFT-FLOAT-64-NEXT: callq __adddf3@PLT
; SOFT-FLOAT-64-NEXT: movq %rax, 24(%rbx)
; SOFT-FLOAT-64-NEXT: movq %r14, 16(%rbx)
; SOFT-FLOAT-64-NEXT: movq %r15, 8(%rbx)
; SOFT-FLOAT-64-NEXT: movq %r12, (%rbx)
; SOFT-FLOAT-64-NEXT: movq %r13, (%rbx)
; SOFT-FLOAT-64-NEXT: movq %rbx, %rax
; SOFT-FLOAT-64-NEXT: addq $8, %rsp
; SOFT-FLOAT-64-NEXT: .cfi_def_cfa_offset 56
Expand Down Expand Up @@ -1633,30 +1633,30 @@ define <4 x double> @fmuladd_contract_v4f64(<4 x double> %a, <4 x double> %b, <4
; SOFT-FLOAT-64-FMA-NEXT: .cfi_offset %r14, -32
; SOFT-FLOAT-64-FMA-NEXT: .cfi_offset %r15, -24
; SOFT-FLOAT-64-FMA-NEXT: .cfi_offset %rbp, -16
; SOFT-FLOAT-64-FMA-NEXT: movq %r9, %rbp
; SOFT-FLOAT-64-FMA-NEXT: movq %rcx, %r14
; SOFT-FLOAT-64-FMA-NEXT: movq %rdx, %r15
; SOFT-FLOAT-64-FMA-NEXT: movq %rsi, %r12
; SOFT-FLOAT-64-FMA-NEXT: movq %rsi, %r13
; SOFT-FLOAT-64-FMA-NEXT: movq %rdi, %rbx
; SOFT-FLOAT-64-FMA-NEXT: movq {{[0-9]+}}(%rsp), %rbp
; SOFT-FLOAT-64-FMA-NEXT: movq {{[0-9]+}}(%rsp), %rsi
; SOFT-FLOAT-64-FMA-NEXT: movq %r8, %rdi
; SOFT-FLOAT-64-FMA-NEXT: callq __muldf3@PLT
; SOFT-FLOAT-64-FMA-NEXT: movq %rax, %r13
; SOFT-FLOAT-64-FMA-NEXT: movq %rax, %r12
; SOFT-FLOAT-64-FMA-NEXT: movq %r14, %rdi
; SOFT-FLOAT-64-FMA-NEXT: movq %rbp, %rsi
; SOFT-FLOAT-64-FMA-NEXT: movq {{[0-9]+}}(%rsp), %rsi
; SOFT-FLOAT-64-FMA-NEXT: callq __muldf3@PLT
; SOFT-FLOAT-64-FMA-NEXT: movq %rax, %r14
; SOFT-FLOAT-64-FMA-NEXT: movq %r15, %rdi
; SOFT-FLOAT-64-FMA-NEXT: movq {{[0-9]+}}(%rsp), %rsi
; SOFT-FLOAT-64-FMA-NEXT: callq __muldf3@PLT
; SOFT-FLOAT-64-FMA-NEXT: movq %rax, %r15
; SOFT-FLOAT-64-FMA-NEXT: movq %r12, %rdi
; SOFT-FLOAT-64-FMA-NEXT: movq {{[0-9]+}}(%rsp), %rsi
; SOFT-FLOAT-64-FMA-NEXT: movq %r13, %rdi
; SOFT-FLOAT-64-FMA-NEXT: movq %rbp, %rsi
; SOFT-FLOAT-64-FMA-NEXT: callq __muldf3@PLT
; SOFT-FLOAT-64-FMA-NEXT: movq %rax, %rdi
; SOFT-FLOAT-64-FMA-NEXT: movq {{[0-9]+}}(%rsp), %rsi
; SOFT-FLOAT-64-FMA-NEXT: callq __adddf3@PLT
; SOFT-FLOAT-64-FMA-NEXT: movq %rax, %r12
; SOFT-FLOAT-64-FMA-NEXT: movq %rax, %r13
; SOFT-FLOAT-64-FMA-NEXT: movq %r15, %rdi
; SOFT-FLOAT-64-FMA-NEXT: movq {{[0-9]+}}(%rsp), %rsi
; SOFT-FLOAT-64-FMA-NEXT: callq __adddf3@PLT
Expand All @@ -1665,13 +1665,13 @@ define <4 x double> @fmuladd_contract_v4f64(<4 x double> %a, <4 x double> %b, <4
; SOFT-FLOAT-64-FMA-NEXT: movq {{[0-9]+}}(%rsp), %rsi
; SOFT-FLOAT-64-FMA-NEXT: callq __adddf3@PLT
; SOFT-FLOAT-64-FMA-NEXT: movq %rax, %r14
; SOFT-FLOAT-64-FMA-NEXT: movq %r13, %rdi
; SOFT-FLOAT-64-FMA-NEXT: movq %r12, %rdi
; SOFT-FLOAT-64-FMA-NEXT: movq {{[0-9]+}}(%rsp), %rsi
; SOFT-FLOAT-64-FMA-NEXT: callq __adddf3@PLT
; SOFT-FLOAT-64-FMA-NEXT: movq %rax, 24(%rbx)
; SOFT-FLOAT-64-FMA-NEXT: movq %r14, 16(%rbx)
; SOFT-FLOAT-64-FMA-NEXT: movq %r15, 8(%rbx)
; SOFT-FLOAT-64-FMA-NEXT: movq %r12, (%rbx)
; SOFT-FLOAT-64-FMA-NEXT: movq %r13, (%rbx)
; SOFT-FLOAT-64-FMA-NEXT: movq %rbx, %rax
; SOFT-FLOAT-64-FMA-NEXT: addq $8, %rsp
; SOFT-FLOAT-64-FMA-NEXT: .cfi_def_cfa_offset 56
Expand Down Expand Up @@ -1711,30 +1711,30 @@ define <4 x double> @fmuladd_contract_v4f64(<4 x double> %a, <4 x double> %b, <4
; SOFT-FLOAT-64-FMA4-NEXT: .cfi_offset %r14, -32
; SOFT-FLOAT-64-FMA4-NEXT: .cfi_offset %r15, -24
; SOFT-FLOAT-64-FMA4-NEXT: .cfi_offset %rbp, -16
; SOFT-FLOAT-64-FMA4-NEXT: movq %r9, %rbp
; SOFT-FLOAT-64-FMA4-NEXT: movq %rcx, %r14
; SOFT-FLOAT-64-FMA4-NEXT: movq %rdx, %r15
; SOFT-FLOAT-64-FMA4-NEXT: movq %rsi, %r12
; SOFT-FLOAT-64-FMA4-NEXT: movq %rsi, %r13
; SOFT-FLOAT-64-FMA4-NEXT: movq %rdi, %rbx
; SOFT-FLOAT-64-FMA4-NEXT: movq {{[0-9]+}}(%rsp), %rbp
; SOFT-FLOAT-64-FMA4-NEXT: movq {{[0-9]+}}(%rsp), %rsi
; SOFT-FLOAT-64-FMA4-NEXT: movq %r8, %rdi
; SOFT-FLOAT-64-FMA4-NEXT: callq __muldf3@PLT
; SOFT-FLOAT-64-FMA4-NEXT: movq %rax, %r13
; SOFT-FLOAT-64-FMA4-NEXT: movq %rax, %r12
; SOFT-FLOAT-64-FMA4-NEXT: movq %r14, %rdi
; SOFT-FLOAT-64-FMA4-NEXT: movq %rbp, %rsi
; SOFT-FLOAT-64-FMA4-NEXT: movq {{[0-9]+}}(%rsp), %rsi
; SOFT-FLOAT-64-FMA4-NEXT: callq __muldf3@PLT
; SOFT-FLOAT-64-FMA4-NEXT: movq %rax, %r14
; SOFT-FLOAT-64-FMA4-NEXT: movq %r15, %rdi
; SOFT-FLOAT-64-FMA4-NEXT: movq {{[0-9]+}}(%rsp), %rsi
; SOFT-FLOAT-64-FMA4-NEXT: callq __muldf3@PLT
; SOFT-FLOAT-64-FMA4-NEXT: movq %rax, %r15
; SOFT-FLOAT-64-FMA4-NEXT: movq %r12, %rdi
; SOFT-FLOAT-64-FMA4-NEXT: movq {{[0-9]+}}(%rsp), %rsi
; SOFT-FLOAT-64-FMA4-NEXT: movq %r13, %rdi
; SOFT-FLOAT-64-FMA4-NEXT: movq %rbp, %rsi
; SOFT-FLOAT-64-FMA4-NEXT: callq __muldf3@PLT
; SOFT-FLOAT-64-FMA4-NEXT: movq %rax, %rdi
; SOFT-FLOAT-64-FMA4-NEXT: movq {{[0-9]+}}(%rsp), %rsi
; SOFT-FLOAT-64-FMA4-NEXT: callq __adddf3@PLT
; SOFT-FLOAT-64-FMA4-NEXT: movq %rax, %r12
; SOFT-FLOAT-64-FMA4-NEXT: movq %rax, %r13
; SOFT-FLOAT-64-FMA4-NEXT: movq %r15, %rdi
; SOFT-FLOAT-64-FMA4-NEXT: movq {{[0-9]+}}(%rsp), %rsi
; SOFT-FLOAT-64-FMA4-NEXT: callq __adddf3@PLT
Expand All @@ -1743,13 +1743,13 @@ define <4 x double> @fmuladd_contract_v4f64(<4 x double> %a, <4 x double> %b, <4
; SOFT-FLOAT-64-FMA4-NEXT: movq {{[0-9]+}}(%rsp), %rsi
; SOFT-FLOAT-64-FMA4-NEXT: callq __adddf3@PLT
; SOFT-FLOAT-64-FMA4-NEXT: movq %rax, %r14
; SOFT-FLOAT-64-FMA4-NEXT: movq %r13, %rdi
; SOFT-FLOAT-64-FMA4-NEXT: movq %r12, %rdi
; SOFT-FLOAT-64-FMA4-NEXT: movq {{[0-9]+}}(%rsp), %rsi
; SOFT-FLOAT-64-FMA4-NEXT: callq __adddf3@PLT
; SOFT-FLOAT-64-FMA4-NEXT: movq %rax, 24(%rbx)
; SOFT-FLOAT-64-FMA4-NEXT: movq %r14, 16(%rbx)
; SOFT-FLOAT-64-FMA4-NEXT: movq %r15, 8(%rbx)
; SOFT-FLOAT-64-FMA4-NEXT: movq %r12, (%rbx)
; SOFT-FLOAT-64-FMA4-NEXT: movq %r13, (%rbx)
; SOFT-FLOAT-64-FMA4-NEXT: movq %rbx, %rax
; SOFT-FLOAT-64-FMA4-NEXT: addq $8, %rsp
; SOFT-FLOAT-64-FMA4-NEXT: .cfi_def_cfa_offset 56
Expand Down
10 changes: 4 additions & 6 deletions llvm/test/CodeGen/X86/i128-abi.ll
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ define i128 @on_stack2(i64 %a0, i64 %a1, i64 %a2, i64 %a3, i64 %a4, i128 %a5, i1
define i64 @trailing_arg_on_stack(i64 %a0, i64 %a1, i64 %a2, i64 %a3, i64 %a4, i128 %a5, i64 %a6) {
; CHECK-LABEL: trailing_arg_on_stack:
; CHECK: # %bb.0:
; CHECK-NEXT: movq 24(%rsp), %rax
; CHECK-NEXT: movq %r9, %rax
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is the most relevant test diff.

; CHECK-NEXT: retq
ret i64 %a6
}
Expand Down Expand Up @@ -78,20 +78,18 @@ define void @call_trailing_arg_on_stack(i128 %x, i64 %y) nounwind {
; CHECK-LABEL: call_trailing_arg_on_stack:
; CHECK: # %bb.0:
; CHECK-NEXT: pushq %rax
; CHECK-NEXT: movq %rdx, %rax
; CHECK-NEXT: movq %rsi, %r9
; CHECK-NEXT: movq %rdx, %r9
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is relevant too. The %y is passing through R9 now for trailing_arg_on_stack. (So the function name is not correct anymore :)

; CHECK-NEXT: movq %rsi, %rax
; CHECK-NEXT: movq %rdi, %r10
; CHECK-NEXT: subq $8, %rsp
; CHECK-NEXT: movl $1, %esi
; CHECK-NEXT: movl $2, %edx
; CHECK-NEXT: movl $3, %ecx
; CHECK-NEXT: movl $4, %r8d
; CHECK-NEXT: xorl %edi, %edi
; CHECK-NEXT: pushq %rax
; CHECK-NEXT: pushq %r9
; CHECK-NEXT: pushq %r10
; CHECK-NEXT: callq trailing_arg_on_stack@PLT
; CHECK-NEXT: addq $32, %rsp
; CHECK-NEXT: addq $16, %rsp
; CHECK-NEXT: popq %rax
; CHECK-NEXT: retq
call i128 @trailing_arg_on_stack(i64 0, i64 1, i64 2, i64 3, i64 4, i128 %x, i64 %y)
Expand Down
36 changes: 18 additions & 18 deletions llvm/test/CodeGen/X86/sadd_sat_vec.ll
Original file line number Diff line number Diff line change
Expand Up @@ -1795,27 +1795,27 @@ define <2 x i128> @v2i128(<2 x i128> %x, <2 x i128> %y) nounwind {
; SSE-NEXT: addq {{[0-9]+}}(%rsp), %rcx
; SSE-NEXT: adcq {{[0-9]+}}(%rsp), %r8
; SSE-NEXT: seto %dil
; SSE-NEXT: movq %r8, %r9
; SSE-NEXT: sarq $63, %r9
; SSE-NEXT: movq %r8, %r10
; SSE-NEXT: sarq $63, %r10
; SSE-NEXT: testb %dil, %dil
; SSE-NEXT: cmovneq %r9, %rcx
; SSE-NEXT: movabsq $-9223372036854775808, %r10 # imm = 0x8000000000000000
; SSE-NEXT: xorq %r10, %r9
; SSE-NEXT: cmovneq %r10, %rcx
; SSE-NEXT: movabsq $-9223372036854775808, %r11 # imm = 0x8000000000000000
; SSE-NEXT: xorq %r11, %r10
; SSE-NEXT: testb %dil, %dil
; SSE-NEXT: cmoveq %r8, %r9
; SSE-NEXT: addq {{[0-9]+}}(%rsp), %rsi
; SSE-NEXT: cmoveq %r8, %r10
; SSE-NEXT: addq %r9, %rsi
; SSE-NEXT: adcq {{[0-9]+}}(%rsp), %rdx
; SSE-NEXT: seto %dil
; SSE-NEXT: movq %rdx, %r8
; SSE-NEXT: sarq $63, %r8
; SSE-NEXT: testb %dil, %dil
; SSE-NEXT: cmovneq %r8, %rsi
; SSE-NEXT: xorq %r10, %r8
; SSE-NEXT: xorq %r11, %r8
; SSE-NEXT: testb %dil, %dil
; SSE-NEXT: cmoveq %rdx, %r8
; SSE-NEXT: movq %rcx, 16(%rax)
; SSE-NEXT: movq %rsi, (%rax)
; SSE-NEXT: movq %r9, 24(%rax)
; SSE-NEXT: movq %r10, 24(%rax)
; SSE-NEXT: movq %r8, 8(%rax)
; SSE-NEXT: retq
;
Expand All @@ -1825,27 +1825,27 @@ define <2 x i128> @v2i128(<2 x i128> %x, <2 x i128> %y) nounwind {
; AVX-NEXT: addq {{[0-9]+}}(%rsp), %rcx
; AVX-NEXT: adcq {{[0-9]+}}(%rsp), %r8
; AVX-NEXT: seto %dil
; AVX-NEXT: movq %r8, %r9
; AVX-NEXT: sarq $63, %r9
; AVX-NEXT: movq %r8, %r10
; AVX-NEXT: sarq $63, %r10
; AVX-NEXT: testb %dil, %dil
; AVX-NEXT: cmovneq %r9, %rcx
; AVX-NEXT: movabsq $-9223372036854775808, %r10 # imm = 0x8000000000000000
; AVX-NEXT: xorq %r10, %r9
; AVX-NEXT: cmovneq %r10, %rcx
; AVX-NEXT: movabsq $-9223372036854775808, %r11 # imm = 0x8000000000000000
; AVX-NEXT: xorq %r11, %r10
; AVX-NEXT: testb %dil, %dil
; AVX-NEXT: cmoveq %r8, %r9
; AVX-NEXT: addq {{[0-9]+}}(%rsp), %rsi
; AVX-NEXT: cmoveq %r8, %r10
; AVX-NEXT: addq %r9, %rsi
; AVX-NEXT: adcq {{[0-9]+}}(%rsp), %rdx
; AVX-NEXT: seto %dil
; AVX-NEXT: movq %rdx, %r8
; AVX-NEXT: sarq $63, %r8
; AVX-NEXT: testb %dil, %dil
; AVX-NEXT: cmovneq %r8, %rsi
; AVX-NEXT: xorq %r10, %r8
; AVX-NEXT: xorq %r11, %r8
; AVX-NEXT: testb %dil, %dil
; AVX-NEXT: cmoveq %rdx, %r8
; AVX-NEXT: movq %rcx, 16(%rax)
; AVX-NEXT: movq %rsi, (%rax)
; AVX-NEXT: movq %r9, 24(%rax)
; AVX-NEXT: movq %r10, 24(%rax)
; AVX-NEXT: movq %r8, 8(%rax)
; AVX-NEXT: retq
%z = call <2 x i128> @llvm.sadd.sat.v2i128(<2 x i128> %x, <2 x i128> %y)
Expand Down
Loading
Loading