Skip to content

Commit af7166a

Browse files
authored
[AArch64] Ensure the LR is preserved if we must call __arm_get_current_vg (#145760)
Fixes #145635
1 parent 35a0c18 commit af7166a

File tree

2 files changed

+57
-2
lines changed

2 files changed

+57
-2
lines changed

llvm/lib/Target/AArch64/AArch64FrameLowering.cpp

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3934,6 +3934,11 @@ void AArch64FrameLowering::determineCalleeSaves(MachineFunction &MF,
39343934
CSStackSize += SpillSize;
39353935
}
39363936

3937+
// Save number of saved regs, so we can easily update CSStackSize later to
3938+
// account for any additional 64-bit GPR saves. Note: After this point
3939+
// only 64-bit GPRs can be added to SavedRegs.
3940+
unsigned NumSavedRegs = SavedRegs.count();
3941+
39373942
// Increase the callee-saved stack size if the function has streaming mode
39383943
// changes, as we will need to spill the value of the VG register.
39393944
// For locally streaming functions, we spill both the streaming and
@@ -3952,8 +3957,9 @@ void AArch64FrameLowering::determineCalleeSaves(MachineFunction &MF,
39523957
if (AFI->hasStackHazardSlotIndex())
39533958
CSStackSize += getStackHazardSize(MF);
39543959

3955-
// Save number of saved regs, so we can easily update CSStackSize later.
3956-
unsigned NumSavedRegs = SavedRegs.count();
3960+
// If we must call __arm_get_current_vg in the prologue preserve the LR.
3961+
if (requiresSaveVG(MF) && !Subtarget.hasSVE())
3962+
SavedRegs.set(AArch64::LR);
39573963

39583964
// The frame record needs to be created by saving the appropriate registers
39593965
uint64_t EstimatedStackSize = MFI.estimateStackSize(MF);
Lines changed: 49 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,49 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
2+
; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sme -O0 < %s | FileCheck %s
3+
4+
; Example of locally streaming function that (at -O0) must preserve the LR (X30)
5+
; before calling __arm_get_current_vg.
6+
define void @foo() "aarch64_pstate_sm_body" {
7+
; CHECK-LABEL: foo:
8+
; CHECK: // %bb.0:
9+
; CHECK-NEXT: stp d15, d14, [sp, #-96]! // 16-byte Folded Spill
10+
; CHECK-NEXT: .cfi_def_cfa_offset 96
11+
; CHECK-NEXT: stp d13, d12, [sp, #16] // 16-byte Folded Spill
12+
; CHECK-NEXT: stp d11, d10, [sp, #32] // 16-byte Folded Spill
13+
; CHECK-NEXT: stp d9, d8, [sp, #48] // 16-byte Folded Spill
14+
; CHECK-NEXT: str x30, [sp, #64] // 8-byte Folded Spill
15+
; CHECK-NEXT: rdsvl x9, #1
16+
; CHECK-NEXT: lsr x9, x9, #3
17+
; CHECK-NEXT: str x9, [sp, #72] // 8-byte Folded Spill
18+
; CHECK-NEXT: bl __arm_get_current_vg
19+
; CHECK-NEXT: str x0, [sp, #80] // 8-byte Folded Spill
20+
; CHECK-NEXT: .cfi_offset vg, -16
21+
; CHECK-NEXT: .cfi_offset w30, -32
22+
; CHECK-NEXT: .cfi_offset b8, -40
23+
; CHECK-NEXT: .cfi_offset b9, -48
24+
; CHECK-NEXT: .cfi_offset b10, -56
25+
; CHECK-NEXT: .cfi_offset b11, -64
26+
; CHECK-NEXT: .cfi_offset b12, -72
27+
; CHECK-NEXT: .cfi_offset b13, -80
28+
; CHECK-NEXT: .cfi_offset b14, -88
29+
; CHECK-NEXT: .cfi_offset b15, -96
30+
; CHECK-NEXT: smstart sm
31+
; CHECK-NEXT: smstop sm
32+
; CHECK-NEXT: ldr x30, [sp, #64] // 8-byte Folded Reload
33+
; CHECK-NEXT: ldp d9, d8, [sp, #48] // 16-byte Folded Reload
34+
; CHECK-NEXT: ldp d11, d10, [sp, #32] // 16-byte Folded Reload
35+
; CHECK-NEXT: ldp d13, d12, [sp, #16] // 16-byte Folded Reload
36+
; CHECK-NEXT: ldp d15, d14, [sp], #96 // 16-byte Folded Reload
37+
; CHECK-NEXT: .cfi_def_cfa_offset 0
38+
; CHECK-NEXT: .cfi_restore w30
39+
; CHECK-NEXT: .cfi_restore b8
40+
; CHECK-NEXT: .cfi_restore b9
41+
; CHECK-NEXT: .cfi_restore b10
42+
; CHECK-NEXT: .cfi_restore b11
43+
; CHECK-NEXT: .cfi_restore b12
44+
; CHECK-NEXT: .cfi_restore b13
45+
; CHECK-NEXT: .cfi_restore b14
46+
; CHECK-NEXT: .cfi_restore b15
47+
; CHECK-NEXT: ret
48+
ret void
49+
}

0 commit comments

Comments
 (0)