Skip to content

Commit 2068b1b

Browse files
authored
[X86] Fix ABI for passing after i128 (#124134)
If we're passing an i128 value and we no longer have enough argument registers (only r9 unallocated), the value gets passed via the stack. However, r9 is still allocated as a shadow register, which means that a following i64 argument will not use it. This doesn't match the x86-64 psABI. Fix this by making i128 arguments as requiring consecutive registers, and then adding a custom CC lowering that will allocate both parts of the i128 at the same time, either to register or to stack, without reserving a shadow register. Fixes #123935.
1 parent f2b253b commit 2068b1b

14 files changed

+130
-88
lines changed

llvm/lib/Target/X86/X86CallingConv.cpp

Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -340,5 +340,39 @@ static bool CC_X86_64_Pointer(unsigned &ValNo, MVT &ValVT, MVT &LocVT,
340340
return false;
341341
}
342342

343+
/// Special handling for i128: Either allocate the value to two consecutive
344+
/// i64 registers, or to the stack. Do not partially allocate in registers,
345+
/// and do not reserve any registers when allocating to the stack.
346+
static bool CC_X86_64_I128(unsigned &ValNo, MVT &ValVT, MVT &LocVT,
347+
CCValAssign::LocInfo &LocInfo,
348+
ISD::ArgFlagsTy &ArgFlags, CCState &State) {
349+
assert(ValVT == MVT::i64 && "Should have i64 parts");
350+
SmallVectorImpl<CCValAssign> &PendingMembers = State.getPendingLocs();
351+
PendingMembers.push_back(
352+
CCValAssign::getPending(ValNo, ValVT, LocVT, LocInfo));
353+
354+
if (!ArgFlags.isInConsecutiveRegsLast())
355+
return true;
356+
357+
unsigned NumRegs = PendingMembers.size();
358+
assert(NumRegs == 2 && "Should have two parts");
359+
360+
static const MCPhysReg Regs[] = {X86::RDI, X86::RSI, X86::RDX,
361+
X86::RCX, X86::R8, X86::R9};
362+
ArrayRef<MCPhysReg> Allocated = State.AllocateRegBlock(Regs, NumRegs);
363+
if (!Allocated.empty()) {
364+
PendingMembers[0].convertToReg(Allocated[0]);
365+
PendingMembers[1].convertToReg(Allocated[1]);
366+
} else {
367+
int64_t Offset = State.AllocateStack(16, Align(16));
368+
PendingMembers[0].convertToMem(Offset);
369+
PendingMembers[1].convertToMem(Offset + 8);
370+
}
371+
State.addLoc(PendingMembers[0]);
372+
State.addLoc(PendingMembers[1]);
373+
PendingMembers.clear();
374+
return true;
375+
}
376+
343377
// Provides entry points of CC_X86 and RetCC_X86.
344378
#include "X86GenCallingConv.inc"

llvm/lib/Target/X86/X86CallingConv.td

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -548,11 +548,9 @@ def CC_X86_64_C : CallingConv<[
548548
CCIfType<[i32], CCAssignToReg<[EDI, ESI, EDX, ECX, R8D, R9D]>>,
549549

550550
// i128 can be either passed in two i64 registers, or on the stack, but
551-
// not split across register and stack. As such, do not allow using R9
552-
// for a split i64.
551+
// not split across register and stack. Handle this with a custom function.
553552
CCIfType<[i64],
554-
CCIfSplit<CCAssignToReg<[RDI, RSI, RDX, RCX, R8]>>>,
555-
CCIfType<[i64], CCIfSplit<CCAssignToStackWithShadow<8, 16, [R9]>>>,
553+
CCIfConsecutiveRegs<CCCustom<"CC_X86_64_I128">>>,
556554

557555
CCIfType<[i64], CCAssignToReg<[RDI, RSI, RDX, RCX, R8 , R9 ]>>,
558556

llvm/lib/Target/X86/X86ISelLowering.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1604,6 +1604,10 @@ namespace llvm {
16041604
LLVMContext &Context, CallingConv::ID CC, EVT VT, EVT &IntermediateVT,
16051605
unsigned &NumIntermediates, MVT &RegisterVT) const override;
16061606

1607+
bool functionArgumentNeedsConsecutiveRegisters(
1608+
Type *Ty, CallingConv::ID CallConv, bool isVarArg,
1609+
const DataLayout &DL) const override;
1610+
16071611
bool isIntDivCheap(EVT VT, AttributeList Attr) const override;
16081612

16091613
bool supportSwiftError() const override;

llvm/lib/Target/X86/X86ISelLoweringCall.cpp

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -233,6 +233,14 @@ EVT X86TargetLowering::getSetCCResultType(const DataLayout &DL,
233233
return VT.changeVectorElementTypeToInteger();
234234
}
235235

236+
bool X86TargetLowering::functionArgumentNeedsConsecutiveRegisters(
237+
Type *Ty, CallingConv::ID CallConv, bool isVarArg,
238+
const DataLayout &DL) const {
239+
// i128 split into i64 needs to be allocated to two consecutive registers,
240+
// or spilled to the stack as a whole.
241+
return Ty->isIntegerTy(128);
242+
}
243+
236244
/// Helper for getByValTypeAlignment to determine
237245
/// the desired ByVal argument alignment.
238246
static void getMaxByValAlign(Type *Ty, Align &MaxAlign) {

llvm/test/CodeGen/X86/addcarry.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -49,7 +49,7 @@ define i256 @add256(i256 %a, i256 %b) nounwind {
4949
; CHECK-LABEL: add256:
5050
; CHECK: # %bb.0: # %entry
5151
; CHECK-NEXT: movq %rdi, %rax
52-
; CHECK-NEXT: addq {{[0-9]+}}(%rsp), %rsi
52+
; CHECK-NEXT: addq %r9, %rsi
5353
; CHECK-NEXT: adcq {{[0-9]+}}(%rsp), %rdx
5454
; CHECK-NEXT: adcq {{[0-9]+}}(%rsp), %rcx
5555
; CHECK-NEXT: adcq {{[0-9]+}}(%rsp), %r8

llvm/test/CodeGen/X86/apx/flags-copy-lowering.ll

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -31,15 +31,15 @@ define <2 x i128> @flag_copy_2(<2 x i128> %x, <2 x i128> %y) nounwind {
3131
; CHECK-NEXT: movq %r8, %rdi
3232
; CHECK-NEXT: {nf} sarq $63, %rdi
3333
; CHECK-NEXT: cmovoq %rdi, %rcx
34-
; CHECK-NEXT: movabsq $-9223372036854775808, %r9 # imm = 0x8000000000000000
35-
; CHECK-NEXT: {nf} xorq %r9, %rdi
34+
; CHECK-NEXT: movabsq $-9223372036854775808, %r10 # imm = 0x8000000000000000
35+
; CHECK-NEXT: {nf} xorq %r10, %rdi
3636
; CHECK-NEXT: cmovnoq %r8, %rdi
37-
; CHECK-NEXT: subq {{[0-9]+}}(%rsp), %rsi
37+
; CHECK-NEXT: subq %r9, %rsi
3838
; CHECK-NEXT: sbbq {{[0-9]+}}(%rsp), %rdx
3939
; CHECK-NEXT: movq %rdx, %r8
4040
; CHECK-NEXT: {nf} sarq $63, %r8
4141
; CHECK-NEXT: cmovoq %r8, %rsi
42-
; CHECK-NEXT: {nf} xorq %r9, %r8
42+
; CHECK-NEXT: {nf} xorq %r10, %r8
4343
; CHECK-NEXT: cmovnoq %rdx, %r8
4444
; CHECK-NEXT: movq %rcx, 16(%rax)
4545
; CHECK-NEXT: movq %rsi, (%rax)

llvm/test/CodeGen/X86/avgflooru-i128.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -119,7 +119,7 @@ define <2 x i128> @avgflooru_i128_vec(<2 x i128> %x, <2 x i128> %y) {
119119
; CHECK-LABEL: avgflooru_i128_vec:
120120
; CHECK: # %bb.0: # %start
121121
; CHECK-NEXT: movq %rdi, %rax
122-
; CHECK-NEXT: addq {{[0-9]+}}(%rsp), %rsi
122+
; CHECK-NEXT: addq %r9, %rsi
123123
; CHECK-NEXT: adcq {{[0-9]+}}(%rsp), %rdx
124124
; CHECK-NEXT: setb %dil
125125
; CHECK-NEXT: movzbl %dil, %edi

llvm/test/CodeGen/X86/fmuladd-soft-float.ll

Lines changed: 27 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -1555,30 +1555,30 @@ define <4 x double> @fmuladd_contract_v4f64(<4 x double> %a, <4 x double> %b, <4
15551555
; SOFT-FLOAT-64-NEXT: .cfi_offset %r14, -32
15561556
; SOFT-FLOAT-64-NEXT: .cfi_offset %r15, -24
15571557
; SOFT-FLOAT-64-NEXT: .cfi_offset %rbp, -16
1558+
; SOFT-FLOAT-64-NEXT: movq %r9, %rbp
15581559
; SOFT-FLOAT-64-NEXT: movq %rcx, %r14
15591560
; SOFT-FLOAT-64-NEXT: movq %rdx, %r15
1560-
; SOFT-FLOAT-64-NEXT: movq %rsi, %r12
1561+
; SOFT-FLOAT-64-NEXT: movq %rsi, %r13
15611562
; SOFT-FLOAT-64-NEXT: movq %rdi, %rbx
1562-
; SOFT-FLOAT-64-NEXT: movq {{[0-9]+}}(%rsp), %rbp
15631563
; SOFT-FLOAT-64-NEXT: movq {{[0-9]+}}(%rsp), %rsi
15641564
; SOFT-FLOAT-64-NEXT: movq %r8, %rdi
15651565
; SOFT-FLOAT-64-NEXT: callq __muldf3@PLT
1566-
; SOFT-FLOAT-64-NEXT: movq %rax, %r13
1566+
; SOFT-FLOAT-64-NEXT: movq %rax, %r12
15671567
; SOFT-FLOAT-64-NEXT: movq %r14, %rdi
1568-
; SOFT-FLOAT-64-NEXT: movq %rbp, %rsi
1568+
; SOFT-FLOAT-64-NEXT: movq {{[0-9]+}}(%rsp), %rsi
15691569
; SOFT-FLOAT-64-NEXT: callq __muldf3@PLT
15701570
; SOFT-FLOAT-64-NEXT: movq %rax, %r14
15711571
; SOFT-FLOAT-64-NEXT: movq %r15, %rdi
15721572
; SOFT-FLOAT-64-NEXT: movq {{[0-9]+}}(%rsp), %rsi
15731573
; SOFT-FLOAT-64-NEXT: callq __muldf3@PLT
15741574
; SOFT-FLOAT-64-NEXT: movq %rax, %r15
1575-
; SOFT-FLOAT-64-NEXT: movq %r12, %rdi
1576-
; SOFT-FLOAT-64-NEXT: movq {{[0-9]+}}(%rsp), %rsi
1575+
; SOFT-FLOAT-64-NEXT: movq %r13, %rdi
1576+
; SOFT-FLOAT-64-NEXT: movq %rbp, %rsi
15771577
; SOFT-FLOAT-64-NEXT: callq __muldf3@PLT
15781578
; SOFT-FLOAT-64-NEXT: movq %rax, %rdi
15791579
; SOFT-FLOAT-64-NEXT: movq {{[0-9]+}}(%rsp), %rsi
15801580
; SOFT-FLOAT-64-NEXT: callq __adddf3@PLT
1581-
; SOFT-FLOAT-64-NEXT: movq %rax, %r12
1581+
; SOFT-FLOAT-64-NEXT: movq %rax, %r13
15821582
; SOFT-FLOAT-64-NEXT: movq %r15, %rdi
15831583
; SOFT-FLOAT-64-NEXT: movq {{[0-9]+}}(%rsp), %rsi
15841584
; SOFT-FLOAT-64-NEXT: callq __adddf3@PLT
@@ -1587,13 +1587,13 @@ define <4 x double> @fmuladd_contract_v4f64(<4 x double> %a, <4 x double> %b, <4
15871587
; SOFT-FLOAT-64-NEXT: movq {{[0-9]+}}(%rsp), %rsi
15881588
; SOFT-FLOAT-64-NEXT: callq __adddf3@PLT
15891589
; SOFT-FLOAT-64-NEXT: movq %rax, %r14
1590-
; SOFT-FLOAT-64-NEXT: movq %r13, %rdi
1590+
; SOFT-FLOAT-64-NEXT: movq %r12, %rdi
15911591
; SOFT-FLOAT-64-NEXT: movq {{[0-9]+}}(%rsp), %rsi
15921592
; SOFT-FLOAT-64-NEXT: callq __adddf3@PLT
15931593
; SOFT-FLOAT-64-NEXT: movq %rax, 24(%rbx)
15941594
; SOFT-FLOAT-64-NEXT: movq %r14, 16(%rbx)
15951595
; SOFT-FLOAT-64-NEXT: movq %r15, 8(%rbx)
1596-
; SOFT-FLOAT-64-NEXT: movq %r12, (%rbx)
1596+
; SOFT-FLOAT-64-NEXT: movq %r13, (%rbx)
15971597
; SOFT-FLOAT-64-NEXT: movq %rbx, %rax
15981598
; SOFT-FLOAT-64-NEXT: addq $8, %rsp
15991599
; SOFT-FLOAT-64-NEXT: .cfi_def_cfa_offset 56
@@ -1633,30 +1633,30 @@ define <4 x double> @fmuladd_contract_v4f64(<4 x double> %a, <4 x double> %b, <4
16331633
; SOFT-FLOAT-64-FMA-NEXT: .cfi_offset %r14, -32
16341634
; SOFT-FLOAT-64-FMA-NEXT: .cfi_offset %r15, -24
16351635
; SOFT-FLOAT-64-FMA-NEXT: .cfi_offset %rbp, -16
1636+
; SOFT-FLOAT-64-FMA-NEXT: movq %r9, %rbp
16361637
; SOFT-FLOAT-64-FMA-NEXT: movq %rcx, %r14
16371638
; SOFT-FLOAT-64-FMA-NEXT: movq %rdx, %r15
1638-
; SOFT-FLOAT-64-FMA-NEXT: movq %rsi, %r12
1639+
; SOFT-FLOAT-64-FMA-NEXT: movq %rsi, %r13
16391640
; SOFT-FLOAT-64-FMA-NEXT: movq %rdi, %rbx
1640-
; SOFT-FLOAT-64-FMA-NEXT: movq {{[0-9]+}}(%rsp), %rbp
16411641
; SOFT-FLOAT-64-FMA-NEXT: movq {{[0-9]+}}(%rsp), %rsi
16421642
; SOFT-FLOAT-64-FMA-NEXT: movq %r8, %rdi
16431643
; SOFT-FLOAT-64-FMA-NEXT: callq __muldf3@PLT
1644-
; SOFT-FLOAT-64-FMA-NEXT: movq %rax, %r13
1644+
; SOFT-FLOAT-64-FMA-NEXT: movq %rax, %r12
16451645
; SOFT-FLOAT-64-FMA-NEXT: movq %r14, %rdi
1646-
; SOFT-FLOAT-64-FMA-NEXT: movq %rbp, %rsi
1646+
; SOFT-FLOAT-64-FMA-NEXT: movq {{[0-9]+}}(%rsp), %rsi
16471647
; SOFT-FLOAT-64-FMA-NEXT: callq __muldf3@PLT
16481648
; SOFT-FLOAT-64-FMA-NEXT: movq %rax, %r14
16491649
; SOFT-FLOAT-64-FMA-NEXT: movq %r15, %rdi
16501650
; SOFT-FLOAT-64-FMA-NEXT: movq {{[0-9]+}}(%rsp), %rsi
16511651
; SOFT-FLOAT-64-FMA-NEXT: callq __muldf3@PLT
16521652
; SOFT-FLOAT-64-FMA-NEXT: movq %rax, %r15
1653-
; SOFT-FLOAT-64-FMA-NEXT: movq %r12, %rdi
1654-
; SOFT-FLOAT-64-FMA-NEXT: movq {{[0-9]+}}(%rsp), %rsi
1653+
; SOFT-FLOAT-64-FMA-NEXT: movq %r13, %rdi
1654+
; SOFT-FLOAT-64-FMA-NEXT: movq %rbp, %rsi
16551655
; SOFT-FLOAT-64-FMA-NEXT: callq __muldf3@PLT
16561656
; SOFT-FLOAT-64-FMA-NEXT: movq %rax, %rdi
16571657
; SOFT-FLOAT-64-FMA-NEXT: movq {{[0-9]+}}(%rsp), %rsi
16581658
; SOFT-FLOAT-64-FMA-NEXT: callq __adddf3@PLT
1659-
; SOFT-FLOAT-64-FMA-NEXT: movq %rax, %r12
1659+
; SOFT-FLOAT-64-FMA-NEXT: movq %rax, %r13
16601660
; SOFT-FLOAT-64-FMA-NEXT: movq %r15, %rdi
16611661
; SOFT-FLOAT-64-FMA-NEXT: movq {{[0-9]+}}(%rsp), %rsi
16621662
; SOFT-FLOAT-64-FMA-NEXT: callq __adddf3@PLT
@@ -1665,13 +1665,13 @@ define <4 x double> @fmuladd_contract_v4f64(<4 x double> %a, <4 x double> %b, <4
16651665
; SOFT-FLOAT-64-FMA-NEXT: movq {{[0-9]+}}(%rsp), %rsi
16661666
; SOFT-FLOAT-64-FMA-NEXT: callq __adddf3@PLT
16671667
; SOFT-FLOAT-64-FMA-NEXT: movq %rax, %r14
1668-
; SOFT-FLOAT-64-FMA-NEXT: movq %r13, %rdi
1668+
; SOFT-FLOAT-64-FMA-NEXT: movq %r12, %rdi
16691669
; SOFT-FLOAT-64-FMA-NEXT: movq {{[0-9]+}}(%rsp), %rsi
16701670
; SOFT-FLOAT-64-FMA-NEXT: callq __adddf3@PLT
16711671
; SOFT-FLOAT-64-FMA-NEXT: movq %rax, 24(%rbx)
16721672
; SOFT-FLOAT-64-FMA-NEXT: movq %r14, 16(%rbx)
16731673
; SOFT-FLOAT-64-FMA-NEXT: movq %r15, 8(%rbx)
1674-
; SOFT-FLOAT-64-FMA-NEXT: movq %r12, (%rbx)
1674+
; SOFT-FLOAT-64-FMA-NEXT: movq %r13, (%rbx)
16751675
; SOFT-FLOAT-64-FMA-NEXT: movq %rbx, %rax
16761676
; SOFT-FLOAT-64-FMA-NEXT: addq $8, %rsp
16771677
; SOFT-FLOAT-64-FMA-NEXT: .cfi_def_cfa_offset 56
@@ -1711,30 +1711,30 @@ define <4 x double> @fmuladd_contract_v4f64(<4 x double> %a, <4 x double> %b, <4
17111711
; SOFT-FLOAT-64-FMA4-NEXT: .cfi_offset %r14, -32
17121712
; SOFT-FLOAT-64-FMA4-NEXT: .cfi_offset %r15, -24
17131713
; SOFT-FLOAT-64-FMA4-NEXT: .cfi_offset %rbp, -16
1714+
; SOFT-FLOAT-64-FMA4-NEXT: movq %r9, %rbp
17141715
; SOFT-FLOAT-64-FMA4-NEXT: movq %rcx, %r14
17151716
; SOFT-FLOAT-64-FMA4-NEXT: movq %rdx, %r15
1716-
; SOFT-FLOAT-64-FMA4-NEXT: movq %rsi, %r12
1717+
; SOFT-FLOAT-64-FMA4-NEXT: movq %rsi, %r13
17171718
; SOFT-FLOAT-64-FMA4-NEXT: movq %rdi, %rbx
1718-
; SOFT-FLOAT-64-FMA4-NEXT: movq {{[0-9]+}}(%rsp), %rbp
17191719
; SOFT-FLOAT-64-FMA4-NEXT: movq {{[0-9]+}}(%rsp), %rsi
17201720
; SOFT-FLOAT-64-FMA4-NEXT: movq %r8, %rdi
17211721
; SOFT-FLOAT-64-FMA4-NEXT: callq __muldf3@PLT
1722-
; SOFT-FLOAT-64-FMA4-NEXT: movq %rax, %r13
1722+
; SOFT-FLOAT-64-FMA4-NEXT: movq %rax, %r12
17231723
; SOFT-FLOAT-64-FMA4-NEXT: movq %r14, %rdi
1724-
; SOFT-FLOAT-64-FMA4-NEXT: movq %rbp, %rsi
1724+
; SOFT-FLOAT-64-FMA4-NEXT: movq {{[0-9]+}}(%rsp), %rsi
17251725
; SOFT-FLOAT-64-FMA4-NEXT: callq __muldf3@PLT
17261726
; SOFT-FLOAT-64-FMA4-NEXT: movq %rax, %r14
17271727
; SOFT-FLOAT-64-FMA4-NEXT: movq %r15, %rdi
17281728
; SOFT-FLOAT-64-FMA4-NEXT: movq {{[0-9]+}}(%rsp), %rsi
17291729
; SOFT-FLOAT-64-FMA4-NEXT: callq __muldf3@PLT
17301730
; SOFT-FLOAT-64-FMA4-NEXT: movq %rax, %r15
1731-
; SOFT-FLOAT-64-FMA4-NEXT: movq %r12, %rdi
1732-
; SOFT-FLOAT-64-FMA4-NEXT: movq {{[0-9]+}}(%rsp), %rsi
1731+
; SOFT-FLOAT-64-FMA4-NEXT: movq %r13, %rdi
1732+
; SOFT-FLOAT-64-FMA4-NEXT: movq %rbp, %rsi
17331733
; SOFT-FLOAT-64-FMA4-NEXT: callq __muldf3@PLT
17341734
; SOFT-FLOAT-64-FMA4-NEXT: movq %rax, %rdi
17351735
; SOFT-FLOAT-64-FMA4-NEXT: movq {{[0-9]+}}(%rsp), %rsi
17361736
; SOFT-FLOAT-64-FMA4-NEXT: callq __adddf3@PLT
1737-
; SOFT-FLOAT-64-FMA4-NEXT: movq %rax, %r12
1737+
; SOFT-FLOAT-64-FMA4-NEXT: movq %rax, %r13
17381738
; SOFT-FLOAT-64-FMA4-NEXT: movq %r15, %rdi
17391739
; SOFT-FLOAT-64-FMA4-NEXT: movq {{[0-9]+}}(%rsp), %rsi
17401740
; SOFT-FLOAT-64-FMA4-NEXT: callq __adddf3@PLT
@@ -1743,13 +1743,13 @@ define <4 x double> @fmuladd_contract_v4f64(<4 x double> %a, <4 x double> %b, <4
17431743
; SOFT-FLOAT-64-FMA4-NEXT: movq {{[0-9]+}}(%rsp), %rsi
17441744
; SOFT-FLOAT-64-FMA4-NEXT: callq __adddf3@PLT
17451745
; SOFT-FLOAT-64-FMA4-NEXT: movq %rax, %r14
1746-
; SOFT-FLOAT-64-FMA4-NEXT: movq %r13, %rdi
1746+
; SOFT-FLOAT-64-FMA4-NEXT: movq %r12, %rdi
17471747
; SOFT-FLOAT-64-FMA4-NEXT: movq {{[0-9]+}}(%rsp), %rsi
17481748
; SOFT-FLOAT-64-FMA4-NEXT: callq __adddf3@PLT
17491749
; SOFT-FLOAT-64-FMA4-NEXT: movq %rax, 24(%rbx)
17501750
; SOFT-FLOAT-64-FMA4-NEXT: movq %r14, 16(%rbx)
17511751
; SOFT-FLOAT-64-FMA4-NEXT: movq %r15, 8(%rbx)
1752-
; SOFT-FLOAT-64-FMA4-NEXT: movq %r12, (%rbx)
1752+
; SOFT-FLOAT-64-FMA4-NEXT: movq %r13, (%rbx)
17531753
; SOFT-FLOAT-64-FMA4-NEXT: movq %rbx, %rax
17541754
; SOFT-FLOAT-64-FMA4-NEXT: addq $8, %rsp
17551755
; SOFT-FLOAT-64-FMA4-NEXT: .cfi_def_cfa_offset 56

llvm/test/CodeGen/X86/i128-abi.ll

Lines changed: 4 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,7 @@ define i128 @on_stack2(i64 %a0, i64 %a1, i64 %a2, i64 %a3, i64 %a4, i128 %a5, i1
3131
define i64 @trailing_arg_on_stack(i64 %a0, i64 %a1, i64 %a2, i64 %a3, i64 %a4, i128 %a5, i64 %a6) {
3232
; CHECK-LABEL: trailing_arg_on_stack:
3333
; CHECK: # %bb.0:
34-
; CHECK-NEXT: movq 24(%rsp), %rax
34+
; CHECK-NEXT: movq %r9, %rax
3535
; CHECK-NEXT: retq
3636
ret i64 %a6
3737
}
@@ -78,20 +78,18 @@ define void @call_trailing_arg_on_stack(i128 %x, i64 %y) nounwind {
7878
; CHECK-LABEL: call_trailing_arg_on_stack:
7979
; CHECK: # %bb.0:
8080
; CHECK-NEXT: pushq %rax
81-
; CHECK-NEXT: movq %rdx, %rax
82-
; CHECK-NEXT: movq %rsi, %r9
81+
; CHECK-NEXT: movq %rdx, %r9
82+
; CHECK-NEXT: movq %rsi, %rax
8383
; CHECK-NEXT: movq %rdi, %r10
84-
; CHECK-NEXT: subq $8, %rsp
8584
; CHECK-NEXT: movl $1, %esi
8685
; CHECK-NEXT: movl $2, %edx
8786
; CHECK-NEXT: movl $3, %ecx
8887
; CHECK-NEXT: movl $4, %r8d
8988
; CHECK-NEXT: xorl %edi, %edi
9089
; CHECK-NEXT: pushq %rax
91-
; CHECK-NEXT: pushq %r9
9290
; CHECK-NEXT: pushq %r10
9391
; CHECK-NEXT: callq trailing_arg_on_stack@PLT
94-
; CHECK-NEXT: addq $32, %rsp
92+
; CHECK-NEXT: addq $16, %rsp
9593
; CHECK-NEXT: popq %rax
9694
; CHECK-NEXT: retq
9795
call i128 @trailing_arg_on_stack(i64 0, i64 1, i64 2, i64 3, i64 4, i128 %x, i64 %y)

llvm/test/CodeGen/X86/sadd_sat_vec.ll

Lines changed: 18 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -1795,27 +1795,27 @@ define <2 x i128> @v2i128(<2 x i128> %x, <2 x i128> %y) nounwind {
17951795
; SSE-NEXT: addq {{[0-9]+}}(%rsp), %rcx
17961796
; SSE-NEXT: adcq {{[0-9]+}}(%rsp), %r8
17971797
; SSE-NEXT: seto %dil
1798-
; SSE-NEXT: movq %r8, %r9
1799-
; SSE-NEXT: sarq $63, %r9
1798+
; SSE-NEXT: movq %r8, %r10
1799+
; SSE-NEXT: sarq $63, %r10
18001800
; SSE-NEXT: testb %dil, %dil
1801-
; SSE-NEXT: cmovneq %r9, %rcx
1802-
; SSE-NEXT: movabsq $-9223372036854775808, %r10 # imm = 0x8000000000000000
1803-
; SSE-NEXT: xorq %r10, %r9
1801+
; SSE-NEXT: cmovneq %r10, %rcx
1802+
; SSE-NEXT: movabsq $-9223372036854775808, %r11 # imm = 0x8000000000000000
1803+
; SSE-NEXT: xorq %r11, %r10
18041804
; SSE-NEXT: testb %dil, %dil
1805-
; SSE-NEXT: cmoveq %r8, %r9
1806-
; SSE-NEXT: addq {{[0-9]+}}(%rsp), %rsi
1805+
; SSE-NEXT: cmoveq %r8, %r10
1806+
; SSE-NEXT: addq %r9, %rsi
18071807
; SSE-NEXT: adcq {{[0-9]+}}(%rsp), %rdx
18081808
; SSE-NEXT: seto %dil
18091809
; SSE-NEXT: movq %rdx, %r8
18101810
; SSE-NEXT: sarq $63, %r8
18111811
; SSE-NEXT: testb %dil, %dil
18121812
; SSE-NEXT: cmovneq %r8, %rsi
1813-
; SSE-NEXT: xorq %r10, %r8
1813+
; SSE-NEXT: xorq %r11, %r8
18141814
; SSE-NEXT: testb %dil, %dil
18151815
; SSE-NEXT: cmoveq %rdx, %r8
18161816
; SSE-NEXT: movq %rcx, 16(%rax)
18171817
; SSE-NEXT: movq %rsi, (%rax)
1818-
; SSE-NEXT: movq %r9, 24(%rax)
1818+
; SSE-NEXT: movq %r10, 24(%rax)
18191819
; SSE-NEXT: movq %r8, 8(%rax)
18201820
; SSE-NEXT: retq
18211821
;
@@ -1825,27 +1825,27 @@ define <2 x i128> @v2i128(<2 x i128> %x, <2 x i128> %y) nounwind {
18251825
; AVX-NEXT: addq {{[0-9]+}}(%rsp), %rcx
18261826
; AVX-NEXT: adcq {{[0-9]+}}(%rsp), %r8
18271827
; AVX-NEXT: seto %dil
1828-
; AVX-NEXT: movq %r8, %r9
1829-
; AVX-NEXT: sarq $63, %r9
1828+
; AVX-NEXT: movq %r8, %r10
1829+
; AVX-NEXT: sarq $63, %r10
18301830
; AVX-NEXT: testb %dil, %dil
1831-
; AVX-NEXT: cmovneq %r9, %rcx
1832-
; AVX-NEXT: movabsq $-9223372036854775808, %r10 # imm = 0x8000000000000000
1833-
; AVX-NEXT: xorq %r10, %r9
1831+
; AVX-NEXT: cmovneq %r10, %rcx
1832+
; AVX-NEXT: movabsq $-9223372036854775808, %r11 # imm = 0x8000000000000000
1833+
; AVX-NEXT: xorq %r11, %r10
18341834
; AVX-NEXT: testb %dil, %dil
1835-
; AVX-NEXT: cmoveq %r8, %r9
1836-
; AVX-NEXT: addq {{[0-9]+}}(%rsp), %rsi
1835+
; AVX-NEXT: cmoveq %r8, %r10
1836+
; AVX-NEXT: addq %r9, %rsi
18371837
; AVX-NEXT: adcq {{[0-9]+}}(%rsp), %rdx
18381838
; AVX-NEXT: seto %dil
18391839
; AVX-NEXT: movq %rdx, %r8
18401840
; AVX-NEXT: sarq $63, %r8
18411841
; AVX-NEXT: testb %dil, %dil
18421842
; AVX-NEXT: cmovneq %r8, %rsi
1843-
; AVX-NEXT: xorq %r10, %r8
1843+
; AVX-NEXT: xorq %r11, %r8
18441844
; AVX-NEXT: testb %dil, %dil
18451845
; AVX-NEXT: cmoveq %rdx, %r8
18461846
; AVX-NEXT: movq %rcx, 16(%rax)
18471847
; AVX-NEXT: movq %rsi, (%rax)
1848-
; AVX-NEXT: movq %r9, 24(%rax)
1848+
; AVX-NEXT: movq %r10, 24(%rax)
18491849
; AVX-NEXT: movq %r8, 8(%rax)
18501850
; AVX-NEXT: retq
18511851
%z = call <2 x i128> @llvm.sadd.sat.v2i128(<2 x i128> %x, <2 x i128> %y)

0 commit comments

Comments
 (0)