From b39e20256454e9b27a1348ed0e30277b80a52a26 Mon Sep 17 00:00:00 2001 From: Benjamin Maxwell Date: Mon, 13 Jan 2025 16:15:53 +0000 Subject: [PATCH 1/8] [AArch64][SME] Spill p-regs as z-regs when streaming hazards are possible This patch adds a new option `-aarch64-enable-zpr-predicate-spills` (which is disabled by default), this option replaces predicate spills with vector spills in streaming[-compatible] functions. For example: ``` str p8, [sp, #7, mul vl] // 2-byte Folded Spill // ... ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload ``` Becomes: ``` mov z0.b, p8/z, #1 str z0, [sp] // 16-byte Folded Spill // ... ldr z0, [sp] // 16-byte Folded Reload ptrue p4.b cmpne p8.b, p4/z, z0.b, #0 ``` This is done to avoid streaming memory hazards between FPR/vector and predicate spills, which currently occupy the same stack area even when the `-aarch64-stack-hazard-size` flag is set. This is implemented with two new pseudos SPILL_PPR_TO_ZPR_SLOT_PSEUDO and FILL_PPR_FROM_ZPR_SLOT_PSEUDO. The expansion of these pseudos handles scavenging the required registers (z0 in the above example) and, in the worst case spilling a register to an emergency stack slot in the expansion. The condition flags are also preserved around the `cmpne` in case they are live at the expansion point. --- .../Target/AArch64/AArch64FrameLowering.cpp | 335 +++++- llvm/lib/Target/AArch64/AArch64InstrInfo.cpp | 16 +- .../Target/AArch64/AArch64RegisterInfo.cpp | 4 +- llvm/lib/Target/AArch64/AArch64RegisterInfo.h | 2 +- .../lib/Target/AArch64/AArch64RegisterInfo.td | 11 +- llvm/lib/Target/AArch64/AArch64Subtarget.cpp | 22 + llvm/lib/Target/AArch64/AArch64Subtarget.h | 2 + llvm/lib/Target/AArch64/SMEInstrFormats.td | 14 + .../AArch64/spill-fill-zpr-predicates.mir | 1035 +++++++++++++++++ .../AArch64/ssve-stack-hazard-remarks.ll | 13 +- 10 files changed, 1444 insertions(+), 10 deletions(-) create mode 100644 llvm/test/CodeGen/AArch64/spill-fill-zpr-predicates.mir diff --git a/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp b/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp index a082a1ebe95bf..9b852e2c59564 100644 --- a/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp @@ -1634,6 +1634,9 @@ static bool IsSVECalleeSave(MachineBasicBlock::iterator I) { case AArch64::STR_PXI: case AArch64::LDR_ZXI: case AArch64::LDR_PXI: + case AArch64::PTRUE_B: + case AArch64::CPY_ZPzI_B: + case AArch64::CMPNE_PPzZI_B: return I->getFlag(MachineInstr::FrameSetup) || I->getFlag(MachineInstr::FrameDestroy); } @@ -3265,7 +3268,8 @@ bool AArch64FrameLowering::spillCalleeSavedRegisters( StrOpc = RPI.isPaired() ? AArch64::ST1B_2Z_IMM : AArch64::STR_ZXI; break; case RegPairInfo::PPR: - StrOpc = AArch64::STR_PXI; + StrOpc = + Size == 16 ? AArch64::SPILL_PPR_TO_ZPR_SLOT_PSEUDO : AArch64::STR_PXI; break; case RegPairInfo::VG: StrOpc = AArch64::STRXui; @@ -3494,7 +3498,8 @@ bool AArch64FrameLowering::restoreCalleeSavedRegisters( LdrOpc = RPI.isPaired() ? AArch64::LD1B_2Z_IMM : AArch64::LDR_ZXI; break; case RegPairInfo::PPR: - LdrOpc = AArch64::LDR_PXI; + LdrOpc = Size == 16 ? AArch64::FILL_PPR_FROM_ZPR_SLOT_PSEUDO + : AArch64::LDR_PXI; break; case RegPairInfo::VG: continue; @@ -3720,6 +3725,14 @@ void AArch64FrameLowering::determineCalleeSaves(MachineFunction &MF, continue; } + // Always save P4 when PPR spills are ZPR-sized and a predicate above p8 is + // spilled. If all of p0-p3 are used as return values p4 is must be free + // to reload p8-p15. + if (RegInfo->getSpillSize(AArch64::PPRRegClass) == 16 && + AArch64::PPR_p8to15RegClass.contains(Reg)) { + SavedRegs.set(AArch64::P4); + } + // MachO's compact unwind format relies on all registers being stored in // pairs. // FIXME: the usual format is actually better if unwinding isn't needed. @@ -4159,8 +4172,318 @@ int64_t AArch64FrameLowering::assignSVEStackObjectOffsets( true); } +/// Attempts to scavenge a register from \p ScavengeableRegs given the used +/// registers in \p UsedRegs. +static Register tryScavengeRegister(LiveRegUnits const &UsedRegs, + BitVector const &ScavengeableRegs) { + for (auto Reg : ScavengeableRegs.set_bits()) { + if (UsedRegs.available(Reg)) + return Reg; + } + return AArch64::NoRegister; +} + +/// Propagates frame-setup/destroy flags from \p SourceMI to all instructions in +/// \p MachineInstrs. +static void propagateFrameFlags(MachineInstr &SourceMI, + ArrayRef MachineInstrs) { + for (MachineInstr *MI : MachineInstrs) { + if (SourceMI.getFlag(MachineInstr::FrameSetup)) + MI->setFlag(MachineInstr::FrameSetup); + if (SourceMI.getFlag(MachineInstr::FrameDestroy)) + MI->setFlag(MachineInstr::FrameDestroy); + } +} + +/// RAII helper class for scavenging or spilling a register. On construction +/// attempts to find a free register of class \p RC (given \p UsedRegs and \p +/// AllocatableRegs), if no register can be found spills \p SpillCandidate to \p +/// MaybeSpillFI to free a register. The free'd register is returned via the \p +/// FreeReg output parameter. On destruction, if there is a spill, its previous +/// value is reloaded. The spilling and scavenging is only valid at the +/// insertion point \p MBBI, this class should _not_ be used in places that +/// create or manipulate basic blocks, moving the expected insertion point. +struct ScopedScavengeOrSpill { + ScopedScavengeOrSpill(const ScopedScavengeOrSpill &) = delete; + ScopedScavengeOrSpill(ScopedScavengeOrSpill &&) = delete; + + ScopedScavengeOrSpill(MachineFunction &MF, MachineBasicBlock &MBB, + MachineBasicBlock::iterator MBBI, Register &FreeReg, + Register SpillCandidate, const TargetRegisterClass &RC, + LiveRegUnits const &UsedRegs, + BitVector const &AllocatableRegs, + std::optional &MaybeSpillFI) + : MBB(MBB), MBBI(MBBI), RC(RC), TII(static_cast( + *MF.getSubtarget().getInstrInfo())), + TRI(*MF.getSubtarget().getRegisterInfo()) { + FreeReg = tryScavengeRegister(UsedRegs, AllocatableRegs); + if (FreeReg != AArch64::NoRegister) + return; + if (!MaybeSpillFI) { + MachineFrameInfo &MFI = MF.getFrameInfo(); + MaybeSpillFI = MFI.CreateSpillStackObject(TRI.getSpillSize(RC), + TRI.getSpillAlign(RC)); + } + FreeReg = SpilledReg = SpillCandidate; + SpillFI = *MaybeSpillFI; + TII.storeRegToStackSlot(MBB, MBBI, SpilledReg, false, SpillFI, &RC, &TRI, + Register()); + } + + bool hasSpilled() const { return SpilledReg != AArch64::NoRegister; } + + ~ScopedScavengeOrSpill() { + if (hasSpilled()) + TII.loadRegFromStackSlot(MBB, MBBI, SpilledReg, SpillFI, &RC, &TRI, + Register()); + } + +private: + MachineBasicBlock &MBB; + MachineBasicBlock::iterator MBBI; + const TargetRegisterClass &RC; + const AArch64InstrInfo &TII; + const TargetRegisterInfo &TRI; + Register SpilledReg = AArch64::NoRegister; + int SpillFI = -1; +}; + +/// Emergency stack slots for expanding SPILL_PPR_TO_ZPR_SLOT_PSEUDO and +/// FILL_PPR_FROM_ZPR_SLOT_PSEUDO. +struct EmergencyStackSlots { + std::optional ZPRSpillFI; + std::optional PPRSpillFI; + std::optional GPRSpillFI; +}; + +/// Expands: +/// ``` +/// SPILL_PPR_TO_ZPR_SLOT_PSEUDO $p0, %stack.0, 0 +/// ``` +/// To: +/// ``` +/// $z0 = CPY_ZPzI_B $p0, 1, 0 +/// STR_ZXI $z0, $stack.0, 0 +/// ``` +/// While ensuring a ZPR ($z0 in this example) is free for the predicate ( +/// spilling if necessary). +static void expandSpillPPRToZPRSlotPseudo(MachineBasicBlock &MBB, + MachineInstr &MI, + const TargetRegisterInfo &TRI, + LiveRegUnits const &UsedRegs, + BitVector const &ZPRRegs, + EmergencyStackSlots &SpillSlots) { + MachineFunction &MF = *MBB.getParent(); + auto *TII = + static_cast(MF.getSubtarget().getInstrInfo()); + + Register ZPredReg = AArch64::NoRegister; + ScopedScavengeOrSpill FindZPRReg(MF, MBB, MachineBasicBlock::iterator(MI), + ZPredReg, AArch64::Z0, AArch64::ZPRRegClass, + UsedRegs, ZPRRegs, SpillSlots.ZPRSpillFI); + +#ifndef NDEBUG + bool InPrologueOrEpilogue = MI.getFlag(MachineInstr::FrameSetup) || + MI.getFlag(MachineInstr::FrameDestroy); + assert((!FindZPRReg.hasSpilled() || !InPrologueOrEpilogue) && + "SPILL_PPR_TO_ZPR_SLOT_PSEUDO expansion should not spill in prologue " + "or epilogue"); +#endif + + SmallVector MachineInstrs; + const DebugLoc &DL = MI.getDebugLoc(); + MachineInstrs.push_back(BuildMI(MBB, MI, DL, TII->get(AArch64::CPY_ZPzI_B)) + .addReg(ZPredReg, RegState::Define) + .add(MI.getOperand(0)) + .addImm(1) + .addImm(0) + .getInstr()); + MachineInstrs.push_back(BuildMI(MBB, MI, DL, TII->get(AArch64::STR_ZXI)) + .addReg(ZPredReg) + .add(MI.getOperand(1)) + .addImm(MI.getOperand(2).getImm()) + .setMemRefs(MI.memoperands()) + .getInstr()); + propagateFrameFlags(MI, MachineInstrs); +} + +/// Expands: +/// ``` +/// $p0 = FILL_PPR_FROM_ZPR_SLOT_PSEUDO %stack.0, 0 +/// ``` +/// To: +/// ``` +/// $z0 = LDR_ZXI %stack.0, 0 +/// $p0 = PTRUE_B 31, implicit $vg +/// $p0 = CMPNE_PPzZI_B $p0, $z0, 0, implicit-def $nzcv, implicit-def $nzcv +/// ``` +/// While ensuring a ZPR ($z0 in this example) is free for the predicate ( +/// spilling if necessary). If the status flags are in use at the point of +/// expansion they are preserved (by moving them to/from a GPR). This may cause +/// an additional spill if no GPR is free at the expansion point. +static bool expandFillPPRFromZPRSlotPseudo( + MachineBasicBlock &MBB, MachineInstr &MI, const TargetRegisterInfo &TRI, + LiveRegUnits const &UsedRegs, BitVector const &ZPRRegs, + BitVector const &PPR3bRegs, BitVector const &GPRRegs, + EmergencyStackSlots &SpillSlots) { + MachineFunction &MF = *MBB.getParent(); + auto *TII = + static_cast(MF.getSubtarget().getInstrInfo()); + + Register ZPredReg = AArch64::NoRegister; + ScopedScavengeOrSpill FindZPRReg(MF, MBB, MachineBasicBlock::iterator(MI), + ZPredReg, AArch64::Z0, AArch64::ZPRRegClass, + UsedRegs, ZPRRegs, SpillSlots.ZPRSpillFI); + + Register PredReg = AArch64::NoRegister; + std::optional FindPPR3bReg; + if (AArch64::PPR_3bRegClass.contains(MI.getOperand(0).getReg())) + PredReg = MI.getOperand(0).getReg(); + else + FindPPR3bReg.emplace(MF, MBB, MachineBasicBlock::iterator(MI), PredReg, + AArch64::P0, AArch64::PPR_3bRegClass, UsedRegs, + PPR3bRegs, SpillSlots.PPRSpillFI); + + // Elide NZCV spills if we know it is not used. + Register NZCVSaveReg = AArch64::NoRegister; + bool IsNZCVUsed = !UsedRegs.available(AArch64::NZCV); + std::optional FindGPRReg; + if (IsNZCVUsed) + FindGPRReg.emplace(MF, MBB, MachineBasicBlock::iterator(MI), NZCVSaveReg, + AArch64::X0, AArch64::GPR64RegClass, UsedRegs, GPRRegs, + SpillSlots.GPRSpillFI); + +#ifndef NDEBUG + bool Spilled = FindZPRReg.hasSpilled() || + (FindPPR3bReg && FindPPR3bReg->hasSpilled()) || + (FindGPRReg && FindGPRReg->hasSpilled()); + bool InPrologueOrEpilogue = MI.getFlag(MachineInstr::FrameSetup) || + MI.getFlag(MachineInstr::FrameDestroy); + assert((!Spilled || !InPrologueOrEpilogue) && + "FILL_PPR_FROM_ZPR_SLOT_PSEUDO expansion should not spill in prologue " + "or epilogue"); +#endif + + SmallVector MachineInstrs; + const DebugLoc &DL = MI.getDebugLoc(); + MachineInstrs.push_back(BuildMI(MBB, MI, DL, TII->get(AArch64::LDR_ZXI)) + .addReg(ZPredReg, RegState::Define) + .add(MI.getOperand(1)) + .addImm(MI.getOperand(2).getImm()) + .setMemRefs(MI.memoperands()) + .getInstr()); + if (IsNZCVUsed) + MachineInstrs.push_back(BuildMI(MBB, MI, DL, TII->get(AArch64::MRS)) + .addReg(NZCVSaveReg, RegState::Define) + .addImm(AArch64SysReg::NZCV) + .addReg(AArch64::NZCV, RegState::Implicit) + .getInstr()); + MachineInstrs.push_back(BuildMI(MBB, MI, DL, TII->get(AArch64::PTRUE_B)) + .addReg(PredReg, RegState::Define) + .addImm(31)); + MachineInstrs.push_back( + BuildMI(MBB, MI, DL, TII->get(AArch64::CMPNE_PPzZI_B)) + .addReg(MI.getOperand(0).getReg(), RegState::Define) + .addReg(PredReg) + .addReg(ZPredReg) + .addImm(0) + .addReg(AArch64::NZCV, RegState::ImplicitDefine) + .getInstr()); + if (IsNZCVUsed) + MachineInstrs.push_back(BuildMI(MBB, MI, DL, TII->get(AArch64::MSR)) + .addImm(AArch64SysReg::NZCV) + .addReg(NZCVSaveReg) + .addReg(AArch64::NZCV, RegState::ImplicitDefine) + .getInstr()); + + propagateFrameFlags(MI, MachineInstrs); + return FindPPR3bReg && FindPPR3bReg->hasSpilled(); +} + +/// Expands all FILL_PPR_FROM_ZPR_SLOT_PSEUDO and SPILL_PPR_TO_ZPR_SLOT_PSEUDO +/// operations within the MachineBasicBlock \p MBB. +static bool expandSMEPPRToZPRSpillPseudos(MachineBasicBlock &MBB, + const TargetRegisterInfo &TRI, + BitVector const &ZPRRegs, + BitVector const &PPR3bRegs, + BitVector const &GPRRegs, + EmergencyStackSlots &SpillSlots) { + LiveRegUnits UsedRegs(TRI); + UsedRegs.addLiveOuts(MBB); + bool HasPPRSpills = false; + for (MachineInstr &MI : make_early_inc_range(reverse(MBB))) { + UsedRegs.stepBackward(MI); + switch (MI.getOpcode()) { + case AArch64::FILL_PPR_FROM_ZPR_SLOT_PSEUDO: + HasPPRSpills |= expandFillPPRFromZPRSlotPseudo( + MBB, MI, TRI, UsedRegs, ZPRRegs, PPR3bRegs, GPRRegs, SpillSlots); + MI.eraseFromParent(); + break; + case AArch64::SPILL_PPR_TO_ZPR_SLOT_PSEUDO: + expandSpillPPRToZPRSlotPseudo(MBB, MI, TRI, UsedRegs, ZPRRegs, + SpillSlots); + MI.eraseFromParent(); + break; + default: + break; + } + } + + return HasPPRSpills; +} + void AArch64FrameLowering::processFunctionBeforeFrameFinalized( MachineFunction &MF, RegScavenger *RS) const { + + AArch64FunctionInfo *AFI = MF.getInfo(); + const TargetSubtargetInfo &TSI = MF.getSubtarget(); + const TargetRegisterInfo &TRI = *TSI.getRegisterInfo(); + if (AFI->hasStackFrame() && TRI.getSpillSize(AArch64::PPRRegClass) == 16) { + const uint32_t *CSRMask = + TRI.getCallPreservedMask(MF, MF.getFunction().getCallingConv()); + const MachineFrameInfo &MFI = MF.getFrameInfo(); + assert(MFI.isCalleeSavedInfoValid()); + + auto ComputeScavengeableRegisters = [&](unsigned RegClassID) { + BitVector ScavengeableRegs = + TRI.getAllocatableSet(MF, TRI.getRegClass(RegClassID)); + if (CSRMask) + ScavengeableRegs.clearBitsInMask(CSRMask); + // TODO: Allow reusing callee-saved registers that have been saved. + return ScavengeableRegs; + }; + + // If predicates spills are 16-bytes we may need to expand + // SPILL_PPR_TO_ZPR_SLOT_PSEUDO/FILL_PPR_FROM_ZPR_SLOT_PSEUDO. + // These are handled separately as we need to compute register liveness to + // scavenge a ZPR and PPR during the expansion. + BitVector ZPRRegs = ComputeScavengeableRegisters(AArch64::ZPRRegClassID); + // Only p0-7 are possible as the second operand of cmpne (needed for fills). + BitVector PPR3bRegs = + ComputeScavengeableRegisters(AArch64::PPR_3bRegClassID); + BitVector GPRRegs = ComputeScavengeableRegisters(AArch64::GPR64RegClassID); + + bool SpillsAboveP7 = + any_of(MFI.getCalleeSavedInfo(), [](const CalleeSavedInfo &CSI) { + return AArch64::PPR_p8to15RegClass.contains(CSI.getReg()); + }); + // We spill p4 in determineCalleeSaves() if a predicate above p8 is spilled, + // as it may be needed to reload callee saves (if p0-p3 are used as + // returns). + if (SpillsAboveP7) + PPR3bRegs.set(AArch64::P4); + + EmergencyStackSlots SpillSlots; + for (MachineBasicBlock &MBB : MF) { + for (int Pass = 0; Pass < 2; Pass++) { + bool HasPPRSpills = expandSMEPPRToZPRSpillPseudos( + MBB, TRI, ZPRRegs, PPR3bRegs, GPRRegs, SpillSlots); + if (!HasPPRSpills) + break; + } + } + } + MachineFrameInfo &MFI = MF.getFrameInfo(); assert(getStackGrowthDirection() == TargetFrameLowering::StackGrowsDown && @@ -4170,7 +4493,6 @@ void AArch64FrameLowering::processFunctionBeforeFrameFinalized( int64_t SVEStackSize = assignSVEStackObjectOffsets(MFI, MinCSFrameIndex, MaxCSFrameIndex); - AArch64FunctionInfo *AFI = MF.getInfo(); AFI->setStackSizeSVE(alignTo(SVEStackSize, 16U)); AFI->setMinMaxSVECSFrameIndex(MinCSFrameIndex, MaxCSFrameIndex); @@ -5204,7 +5526,12 @@ void AArch64FrameLowering::emitRemarks( unsigned RegTy = StackAccess::AccessType::GPR; if (MFI.getStackID(FrameIdx) == TargetStackID::ScalableVector) { - if (AArch64::PPRRegClass.contains(MI.getOperand(0).getReg())) + // SPILL_PPR_TO_ZPR_SLOT_PSEUDO and FILL_PPR_FROM_ZPR_SLOT_PSEUDO + // spill/fill the predicate as a data vector (so are an FPR acess). + if (!is_contained({AArch64::SPILL_PPR_TO_ZPR_SLOT_PSEUDO, + AArch64::FILL_PPR_FROM_ZPR_SLOT_PSEUDO}, + MI.getOpcode()) && + AArch64::PPRRegClass.contains(MI.getOperand(0).getReg())) RegTy = StackAccess::PPR; else RegTy = StackAccess::FPR; diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp index 17dd8a073eff0..0f2b969fba35c 100644 --- a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp +++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp @@ -81,7 +81,7 @@ static cl::opt AArch64InstrInfo::AArch64InstrInfo(const AArch64Subtarget &STI) : AArch64GenInstrInfo(AArch64::ADJCALLSTACKDOWN, AArch64::ADJCALLSTACKUP, AArch64::CATCHRET), - RI(STI.getTargetTriple()), Subtarget(STI) {} + RI(STI.getTargetTriple(), STI.getHwMode()), Subtarget(STI) {} /// GetInstSize - Return the number of bytes of code the specified /// instruction may be. This returns the maximum number of bytes. @@ -2438,6 +2438,8 @@ unsigned AArch64InstrInfo::getLoadStoreImmIdx(unsigned Opc) { case AArch64::STZ2Gi: case AArch64::STZGi: case AArch64::TAGPstack: + case AArch64::SPILL_PPR_TO_ZPR_SLOT_PSEUDO: + case AArch64::FILL_PPR_FROM_ZPR_SLOT_PSEUDO: return 2; case AArch64::LD1B_D_IMM: case AArch64::LD1B_H_IMM: @@ -4223,6 +4225,8 @@ bool AArch64InstrInfo::getMemOpInfo(unsigned Opcode, TypeSize &Scale, MinOffset = -256; MaxOffset = 254; break; + case AArch64::SPILL_PPR_TO_ZPR_SLOT_PSEUDO: + case AArch64::FILL_PPR_FROM_ZPR_SLOT_PSEUDO: case AArch64::LDR_ZXI: case AArch64::STR_ZXI: Scale = TypeSize::getScalable(16); @@ -5355,6 +5359,11 @@ void AArch64InstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB, "Unexpected register store without SVE store instructions"); Opc = AArch64::STR_ZXI; StackID = TargetStackID::ScalableVector; + } else if (AArch64::PPRRegClass.hasSubClassEq(RC)) { + assert(Subtarget.isSVEorStreamingSVEAvailable() && + "Unexpected predicate store without SVE store instructions"); + Opc = AArch64::SPILL_PPR_TO_ZPR_SLOT_PSEUDO; + StackID = TargetStackID::ScalableVector; } break; case 24: @@ -5527,6 +5536,11 @@ void AArch64InstrInfo::loadRegFromStackSlot( "Unexpected register load without SVE load instructions"); Opc = AArch64::LDR_ZXI; StackID = TargetStackID::ScalableVector; + } else if (AArch64::PPRRegClass.hasSubClassEq(RC)) { + assert(Subtarget.isSVEorStreamingSVEAvailable() && + "Unexpected predicate load without SVE load instructions"); + Opc = AArch64::FILL_PPR_FROM_ZPR_SLOT_PSEUDO; + StackID = TargetStackID::ScalableVector; } break; case 24: diff --git a/llvm/lib/Target/AArch64/AArch64RegisterInfo.cpp b/llvm/lib/Target/AArch64/AArch64RegisterInfo.cpp index 49f6860346fa1..8fd34325bb00d 100644 --- a/llvm/lib/Target/AArch64/AArch64RegisterInfo.cpp +++ b/llvm/lib/Target/AArch64/AArch64RegisterInfo.cpp @@ -39,8 +39,8 @@ using namespace llvm; #define GET_REGINFO_TARGET_DESC #include "AArch64GenRegisterInfo.inc" -AArch64RegisterInfo::AArch64RegisterInfo(const Triple &TT) - : AArch64GenRegisterInfo(AArch64::LR), TT(TT) { +AArch64RegisterInfo::AArch64RegisterInfo(const Triple &TT, unsigned HwMode) + : AArch64GenRegisterInfo(AArch64::LR, 0, 0, 0, HwMode), TT(TT) { AArch64_MC::initLLVMToCVRegMapping(this); } diff --git a/llvm/lib/Target/AArch64/AArch64RegisterInfo.h b/llvm/lib/Target/AArch64/AArch64RegisterInfo.h index 11da624af4881..898a509f75908 100644 --- a/llvm/lib/Target/AArch64/AArch64RegisterInfo.h +++ b/llvm/lib/Target/AArch64/AArch64RegisterInfo.h @@ -27,7 +27,7 @@ class AArch64RegisterInfo final : public AArch64GenRegisterInfo { const Triple &TT; public: - AArch64RegisterInfo(const Triple &TT); + AArch64RegisterInfo(const Triple &TT, unsigned HwMode); // FIXME: This should be tablegen'd like getDwarfRegNum is int getSEHRegNum(unsigned i) const { diff --git a/llvm/lib/Target/AArch64/AArch64RegisterInfo.td b/llvm/lib/Target/AArch64/AArch64RegisterInfo.td index dd4f2549929f8..6b6884c545758 100644 --- a/llvm/lib/Target/AArch64/AArch64RegisterInfo.td +++ b/llvm/lib/Target/AArch64/AArch64RegisterInfo.td @@ -979,10 +979,19 @@ class ZPRRegOp ]>; + +def PPRSpillFillRI : RegInfoByHwMode< + [DefaultMode, SMEWithStreamingMemoryHazards], + [RegInfo<16,16,16>, RegInfo<16,128,128>]>; + class PPRClass : RegisterClass<"AArch64", [ nxv16i1, nxv8i1, nxv4i1, nxv2i1, nxv1i1 ], 16, (sequence "P%u", firstreg, lastreg, step)> { - let Size = 16; + let RegInfos = PPRSpillFillRI; } def PPR : PPRClass<0, 15> { diff --git a/llvm/lib/Target/AArch64/AArch64Subtarget.cpp b/llvm/lib/Target/AArch64/AArch64Subtarget.cpp index bc921f07e1dbf..5864f57582e21 100644 --- a/llvm/lib/Target/AArch64/AArch64Subtarget.cpp +++ b/llvm/lib/Target/AArch64/AArch64Subtarget.cpp @@ -86,6 +86,11 @@ static cl::alias AArch64StreamingStackHazardSize( cl::desc("alias for -aarch64-streaming-hazard-size"), cl::aliasopt(AArch64StreamingHazardSize)); +static cl::opt EnableZPRPredicateSpills( + "aarch64-enable-zpr-predicate-spills", cl::init(false), cl::Hidden, + cl::desc( + "Enables spilling/reloading SVE predicates as data vectors (ZPRs)")); + // Subreg liveness tracking is disabled by default for now until all issues // are ironed out. This option allows the feature to be used in tests. static cl::opt @@ -400,6 +405,23 @@ AArch64Subtarget::AArch64Subtarget(const Triple &TT, StringRef CPU, EnableSubregLiveness = EnableSubregLivenessTracking.getValue(); } +unsigned AArch64Subtarget::getHwModeSet() const { + unsigned Modes = 0; + + // Use a special hardware mode in streaming functions with stack hazards. + // This changes the spill size (and alignment) for the predicate register + // class. + // + // FIXME: This overrides the table-gen'd `getHwModeSet()` which only looks at + // CPU features. + if (EnableZPRPredicateSpills.getValue() && + (isStreaming() || isStreamingCompatible())) { + Modes |= (1 << 0); + } + + return Modes; +} + const CallLowering *AArch64Subtarget::getCallLowering() const { return CallLoweringInfo.get(); } diff --git a/llvm/lib/Target/AArch64/AArch64Subtarget.h b/llvm/lib/Target/AArch64/AArch64Subtarget.h index d22991224d496..e7757907a6643 100644 --- a/llvm/lib/Target/AArch64/AArch64Subtarget.h +++ b/llvm/lib/Target/AArch64/AArch64Subtarget.h @@ -130,6 +130,8 @@ class AArch64Subtarget final : public AArch64GenSubtargetInfo { bool IsStreaming = false, bool IsStreamingCompatible = false, bool HasMinSize = false); + virtual unsigned getHwModeSet() const override; + // Getters for SubtargetFeatures defined in tablegen #define GET_SUBTARGETINFO_MACRO(ATTRIBUTE, DEFAULT, GETTER) \ bool GETTER() const { return ATTRIBUTE; } diff --git a/llvm/lib/Target/AArch64/SMEInstrFormats.td b/llvm/lib/Target/AArch64/SMEInstrFormats.td index a01d59d0e5c43..0ac131e48c4f8 100644 --- a/llvm/lib/Target/AArch64/SMEInstrFormats.td +++ b/llvm/lib/Target/AArch64/SMEInstrFormats.td @@ -59,6 +59,20 @@ def FORM_TRANSPOSED_REG_TUPLE_X4_PSEUDO : let hasPostISelHook = 1; } +def SPILL_PPR_TO_ZPR_SLOT_PSEUDO : + Pseudo<(outs), (ins PPRorPNRAny:$Pt, GPR64sp:$Rn, simm9:$imm9), []>, Sched<[]> +{ + let mayStore = 1; + let hasSideEffects = 0; +} + +def FILL_PPR_FROM_ZPR_SLOT_PSEUDO : + Pseudo<(outs PPRorPNRAny:$Pt), (ins GPR64sp:$Rn, simm9:$imm9), []>, Sched<[]> +{ + let mayLoad = 1; + let hasSideEffects = 0; +} + def SDTZALoadStore : SDTypeProfile<0, 3, [SDTCisInt<0>, SDTCisPtrTy<1>, SDTCisInt<2>]>; def AArch64SMELdr : SDNode<"AArch64ISD::SME_ZA_LDR", SDTZALoadStore, [SDNPHasChain, SDNPSideEffect, SDNPMayLoad]>; diff --git a/llvm/test/CodeGen/AArch64/spill-fill-zpr-predicates.mir b/llvm/test/CodeGen/AArch64/spill-fill-zpr-predicates.mir new file mode 100644 index 0000000000000..a432a61384e42 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/spill-fill-zpr-predicates.mir @@ -0,0 +1,1035 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 5 +# RUN: llc -mtriple=aarch64-linux-gnu -aarch64-enable-zpr-predicate-spills -run-pass=greedy %s -o - | FileCheck %s +# RUN: llc -mtriple=aarch64-linux-gnu -aarch64-enable-zpr-predicate-spills -start-before=greedy -stop-after=aarch64-expand-pseudo -verify-machineinstrs %s -o - | FileCheck %s --check-prefix=EXPAND +--- | + source_filename = "" + target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128" + target triple = "aarch64--linux-gnu" + + define aarch64_sve_vector_pcs void @zpr_predicate_spill() #0 { entry: unreachable } + + define aarch64_sve_vector_pcs void @zpr_predicate_spill__save_restore_nzcv() #0 { entry: unreachable } + + define aarch64_sve_vector_pcs void @zpr_predicate_spill__save_restore_nzcv__spill_gpr() #0 { entry: unreachable } + + define aarch64_sve_vector_pcs void @zpr_predicate_spill__spill_zpr() #0 { entry: unreachable } + + define aarch64_sve_vector_pcs void @zpr_predicate_spill_above_p7() #0 { entry: unreachable } + + define aarch64_sve_vector_pcs void @zpr_predicate_spill_p4_saved() #0 { entry: unreachable } + + attributes #0 = {nounwind "target-features"="+sme,+sve" "aarch64_pstate_sm_compatible"} +... +--- +name: zpr_predicate_spill +tracksRegLiveness: true +stack: +liveins: + - { reg: '$p0' } +body: | + bb.0.entry: + liveins: $p0 + + ; CHECK-LABEL: name: zpr_predicate_spill + ; CHECK: stack: + ; CHECK: - { id: 0, name: '', type: spill-slot, offset: 0, size: 16, alignment: 16, + ; CHECK-NEXT: stack-id: scalable-vector, callee-saved-register: + ; CHECK: liveins: $p0 + ; CHECK-NEXT: {{ $}} + ; + ; CHECK-NEXT: SPILL_PPR_TO_ZPR_SLOT_PSEUDO $p0, %stack.0, 0 :: (store (s128) into %stack.0) + ; + ; CHECK-NEXT: $p0 = IMPLICIT_DEF + ; CHECK-NEXT: $p1 = IMPLICIT_DEF + ; CHECK-NEXT: $p2 = IMPLICIT_DEF + ; CHECK-NEXT: $p3 = IMPLICIT_DEF + ; CHECK-NEXT: $p4 = IMPLICIT_DEF + ; CHECK-NEXT: $p5 = IMPLICIT_DEF + ; CHECK-NEXT: $p6 = IMPLICIT_DEF + ; CHECK-NEXT: $p7 = IMPLICIT_DEF + ; CHECK-NEXT: $p8 = IMPLICIT_DEF + ; CHECK-NEXT: $p9 = IMPLICIT_DEF + ; CHECK-NEXT: $p10 = IMPLICIT_DEF + ; CHECK-NEXT: $p11 = IMPLICIT_DEF + ; CHECK-NEXT: $p12 = IMPLICIT_DEF + ; CHECK-NEXT: $p13 = IMPLICIT_DEF + ; CHECK-NEXT: $p14 = IMPLICIT_DEF + ; CHECK-NEXT: $p15 = IMPLICIT_DEF + ; + ; CHECK-NEXT: $p0 = FILL_PPR_FROM_ZPR_SLOT_PSEUDO %stack.0, 0 :: (load (s128) from %stack.0) + ; CHECK-NEXT: RET_ReallyLR implicit $p0 + + ; EXPAND-LABEL: name: zpr_predicate_spill + ; EXPAND: liveins: $p0, $fp, $p15, $p14, $p13, $p12, $p11, $p10, $p9, $p8, $p7, $p6, $p5, $p4 + ; EXPAND-NEXT: {{ $}} + ; + ; EXPAND-NEXT: early-clobber $sp = frame-setup STRXpre killed $fp, $sp, -16 :: (store (s64) into %stack.13) + ; EXPAND-NEXT: $sp = frame-setup ADDVL_XXI $sp, -12, implicit $vg + ; EXPAND-NEXT: $z0 = frame-setup CPY_ZPzI_B killed $p15, 1, 0 + ; EXPAND-NEXT: frame-setup STR_ZXI $z0, $sp, 0 :: (store (s128) into %stack.12) + ; EXPAND-NEXT: $z0 = frame-setup CPY_ZPzI_B killed $p14, 1, 0 + ; EXPAND-NEXT: frame-setup STR_ZXI $z0, $sp, 1 :: (store (s128) into %stack.11) + ; EXPAND-NEXT: $z0 = frame-setup CPY_ZPzI_B killed $p13, 1, 0 + ; EXPAND-NEXT: frame-setup STR_ZXI $z0, $sp, 2 :: (store (s128) into %stack.10) + ; EXPAND-NEXT: $z0 = frame-setup CPY_ZPzI_B killed $p12, 1, 0 + ; EXPAND-NEXT: frame-setup STR_ZXI $z0, $sp, 3 :: (store (s128) into %stack.9) + ; EXPAND-NEXT: $z0 = frame-setup CPY_ZPzI_B killed $p11, 1, 0 + ; EXPAND-NEXT: frame-setup STR_ZXI $z0, $sp, 4 :: (store (s128) into %stack.8) + ; EXPAND-NEXT: $z0 = frame-setup CPY_ZPzI_B killed $p10, 1, 0 + ; EXPAND-NEXT: frame-setup STR_ZXI $z0, $sp, 5 :: (store (s128) into %stack.7) + ; EXPAND-NEXT: $z0 = frame-setup CPY_ZPzI_B killed $p9, 1, 0 + ; EXPAND-NEXT: frame-setup STR_ZXI $z0, $sp, 6 :: (store (s128) into %stack.6) + ; EXPAND-NEXT: $z0 = frame-setup CPY_ZPzI_B killed $p8, 1, 0 + ; EXPAND-NEXT: frame-setup STR_ZXI $z0, $sp, 7 :: (store (s128) into %stack.5) + ; EXPAND-NEXT: $z0 = frame-setup CPY_ZPzI_B killed $p7, 1, 0 + ; EXPAND-NEXT: frame-setup STR_ZXI $z0, $sp, 8 :: (store (s128) into %stack.4) + ; EXPAND-NEXT: $z0 = frame-setup CPY_ZPzI_B killed $p6, 1, 0 + ; EXPAND-NEXT: frame-setup STR_ZXI $z0, $sp, 9 :: (store (s128) into %stack.3) + ; EXPAND-NEXT: $z0 = frame-setup CPY_ZPzI_B killed $p5, 1, 0 + ; EXPAND-NEXT: frame-setup STR_ZXI $z0, $sp, 10 :: (store (s128) into %stack.2) + ; EXPAND-NEXT: $z0 = frame-setup CPY_ZPzI_B killed $p4, 1, 0 + ; EXPAND-NEXT: frame-setup STR_ZXI $z0, $sp, 11 :: (store (s128) into %stack.1) + ; EXPAND-NEXT: $sp = frame-setup ADDVL_XXI $sp, -1, implicit $vg + ; + ; EXPAND-NEXT: $z0 = CPY_ZPzI_B $p0, 1, 0 + ; EXPAND-NEXT: STR_ZXI $z0, $sp, 0 :: (store (s128) into %stack.0) + ; + ; EXPAND-NEXT: $p0 = IMPLICIT_DEF + ; EXPAND-NEXT: $p1 = IMPLICIT_DEF + ; EXPAND-NEXT: $p2 = IMPLICIT_DEF + ; EXPAND-NEXT: $p3 = IMPLICIT_DEF + ; EXPAND-NEXT: $p4 = IMPLICIT_DEF + ; EXPAND-NEXT: $p5 = IMPLICIT_DEF + ; EXPAND-NEXT: $p6 = IMPLICIT_DEF + ; EXPAND-NEXT: $p7 = IMPLICIT_DEF + ; EXPAND-NEXT: $p8 = IMPLICIT_DEF + ; EXPAND-NEXT: $p9 = IMPLICIT_DEF + ; EXPAND-NEXT: $p10 = IMPLICIT_DEF + ; EXPAND-NEXT: $p11 = IMPLICIT_DEF + ; EXPAND-NEXT: $p12 = IMPLICIT_DEF + ; EXPAND-NEXT: $p13 = IMPLICIT_DEF + ; EXPAND-NEXT: $p14 = IMPLICIT_DEF + ; EXPAND-NEXT: $p15 = IMPLICIT_DEF + ; + ; EXPAND-NEXT: $z0 = LDR_ZXI $sp, 0 :: (load (s128) from %stack.0) + ; EXPAND-NEXT: $p0 = PTRUE_B 31, implicit $vg + ; EXPAND-NEXT: $p0 = CMPNE_PPzZI_B $p0, $z0, 0, implicit-def $nzcv, implicit-def $nzcv + ; + ; EXPAND-NEXT: $sp = frame-destroy ADDVL_XXI $sp, 1, implicit $vg + ; EXPAND-NEXT: $z0 = frame-destroy LDR_ZXI $sp, 0 :: (load (s128) from %stack.12) + ; EXPAND-NEXT: $p1 = frame-destroy PTRUE_B 31, implicit $vg + ; EXPAND-NEXT: $p15 = frame-destroy CMPNE_PPzZI_B $p1, $z0, 0, implicit-def $nzcv, implicit-def $nzcv + ; EXPAND-NEXT: $z0 = frame-destroy LDR_ZXI $sp, 1 :: (load (s128) from %stack.11) + ; EXPAND-NEXT: $p1 = frame-destroy PTRUE_B 31, implicit $vg + ; EXPAND-NEXT: $p14 = frame-destroy CMPNE_PPzZI_B $p1, $z0, 0, implicit-def $nzcv, implicit-def $nzcv + ; EXPAND-NEXT: $z0 = frame-destroy LDR_ZXI $sp, 2 :: (load (s128) from %stack.10) + ; EXPAND-NEXT: $p1 = frame-destroy PTRUE_B 31, implicit $vg + ; EXPAND-NEXT: $p13 = frame-destroy CMPNE_PPzZI_B $p1, $z0, 0, implicit-def $nzcv, implicit-def $nzcv + ; EXPAND-NEXT: $z0 = frame-destroy LDR_ZXI $sp, 3 :: (load (s128) from %stack.9) + ; EXPAND-NEXT: $p1 = frame-destroy PTRUE_B 31, implicit $vg + ; EXPAND-NEXT: $p12 = frame-destroy CMPNE_PPzZI_B $p1, $z0, 0, implicit-def $nzcv, implicit-def $nzcv + ; EXPAND-NEXT: $z0 = frame-destroy LDR_ZXI $sp, 4 :: (load (s128) from %stack.8) + ; EXPAND-NEXT: $p1 = frame-destroy PTRUE_B 31, implicit $vg + ; EXPAND-NEXT: $p11 = frame-destroy CMPNE_PPzZI_B $p1, $z0, 0, implicit-def $nzcv, implicit-def $nzcv + ; EXPAND-NEXT: $z0 = frame-destroy LDR_ZXI $sp, 5 :: (load (s128) from %stack.7) + ; EXPAND-NEXT: $p1 = frame-destroy PTRUE_B 31, implicit $vg + ; EXPAND-NEXT: $p10 = frame-destroy CMPNE_PPzZI_B $p1, $z0, 0, implicit-def $nzcv, implicit-def $nzcv + ; EXPAND-NEXT: $z0 = frame-destroy LDR_ZXI $sp, 6 :: (load (s128) from %stack.6) + ; EXPAND-NEXT: $p1 = frame-destroy PTRUE_B 31, implicit $vg + ; EXPAND-NEXT: $p9 = frame-destroy CMPNE_PPzZI_B $p1, $z0, 0, implicit-def $nzcv, implicit-def $nzcv + ; EXPAND-NEXT: $z0 = frame-destroy LDR_ZXI $sp, 7 :: (load (s128) from %stack.5) + ; EXPAND-NEXT: $p1 = frame-destroy PTRUE_B 31, implicit $vg + ; EXPAND-NEXT: $p8 = frame-destroy CMPNE_PPzZI_B $p1, $z0, 0, implicit-def $nzcv, implicit-def $nzcv + ; EXPAND-NEXT: $z0 = frame-destroy LDR_ZXI $sp, 8 :: (load (s128) from %stack.4) + ; EXPAND-NEXT: $p7 = frame-destroy PTRUE_B 31, implicit $vg + ; EXPAND-NEXT: $p7 = frame-destroy CMPNE_PPzZI_B $p7, $z0, 0, implicit-def $nzcv, implicit-def $nzcv + ; EXPAND-NEXT: $z0 = frame-destroy LDR_ZXI $sp, 9 :: (load (s128) from %stack.3) + ; EXPAND-NEXT: $p6 = frame-destroy PTRUE_B 31, implicit $vg + ; EXPAND-NEXT: $p6 = frame-destroy CMPNE_PPzZI_B $p6, $z0, 0, implicit-def $nzcv, implicit-def $nzcv + ; EXPAND-NEXT: $z0 = frame-destroy LDR_ZXI $sp, 10 :: (load (s128) from %stack.2) + ; EXPAND-NEXT: $p5 = frame-destroy PTRUE_B 31, implicit $vg + ; EXPAND-NEXT: $p5 = frame-destroy CMPNE_PPzZI_B $p5, $z0, 0, implicit-def $nzcv, implicit-def $nzcv + ; EXPAND-NEXT: $z0 = frame-destroy LDR_ZXI $sp, 11 :: (load (s128) from %stack.1) + ; EXPAND-NEXT: $p4 = frame-destroy PTRUE_B 31, implicit $vg + ; EXPAND-NEXT: $p4 = frame-destroy CMPNE_PPzZI_B $p4, $z0, 0, implicit-def $nzcv, implicit-def $nzcv + ; EXPAND-NEXT: $sp = frame-destroy ADDVL_XXI $sp, 12, implicit $vg + ; EXPAND-NEXT: early-clobber $sp, $fp = frame-destroy LDRXpost $sp, 16 :: (load (s64) from %stack.13) + ; EXPAND-NEXT: RET undef $lr, implicit $p0 + %1:ppr = COPY $p0 + + $p0 = IMPLICIT_DEF + $p1 = IMPLICIT_DEF + $p2 = IMPLICIT_DEF + $p3 = IMPLICIT_DEF + $p4 = IMPLICIT_DEF + $p5 = IMPLICIT_DEF + $p6 = IMPLICIT_DEF + $p7 = IMPLICIT_DEF + $p8 = IMPLICIT_DEF + $p9 = IMPLICIT_DEF + $p10 = IMPLICIT_DEF + $p11 = IMPLICIT_DEF + $p12 = IMPLICIT_DEF + $p13 = IMPLICIT_DEF + $p14 = IMPLICIT_DEF + $p15 = IMPLICIT_DEF + + $p0 = COPY %1 + + RET_ReallyLR implicit $p0 +... +--- +name: zpr_predicate_spill__save_restore_nzcv +tracksRegLiveness: true +stack: +liveins: + - { reg: '$p0' } +body: | + bb.0.entry: + liveins: $p0 + + ; CHECK-LABEL: name: zpr_predicate_spill__save_restore_nzcv + ; CHECK: stack: + ; CHECK: - { id: 0, name: '', type: spill-slot, offset: 0, size: 16, alignment: 16, + ; CHECK-NEXT: stack-id: scalable-vector, callee-saved-register: + ; CHECK: liveins: $p0 + ; CHECK-NEXT: {{ $}} + ; + ; CHECK-NEXT: $nzcv = IMPLICIT_DEF + ; + ; CHECK-NEXT: SPILL_PPR_TO_ZPR_SLOT_PSEUDO $p0, %stack.0, 0 :: (store (s128) into %stack.0) + ; + ; CHECK-NEXT: $p0 = IMPLICIT_DEF + ; CHECK-NEXT: $p1 = IMPLICIT_DEF + ; CHECK-NEXT: $p2 = IMPLICIT_DEF + ; CHECK-NEXT: $p3 = IMPLICIT_DEF + ; CHECK-NEXT: $p4 = IMPLICIT_DEF + ; CHECK-NEXT: $p5 = IMPLICIT_DEF + ; CHECK-NEXT: $p6 = IMPLICIT_DEF + ; CHECK-NEXT: $p7 = IMPLICIT_DEF + ; CHECK-NEXT: $p8 = IMPLICIT_DEF + ; CHECK-NEXT: $p9 = IMPLICIT_DEF + ; CHECK-NEXT: $p10 = IMPLICIT_DEF + ; CHECK-NEXT: $p11 = IMPLICIT_DEF + ; CHECK-NEXT: $p12 = IMPLICIT_DEF + ; CHECK-NEXT: $p13 = IMPLICIT_DEF + ; CHECK-NEXT: $p14 = IMPLICIT_DEF + ; CHECK-NEXT: $p15 = IMPLICIT_DEF + ; + ; CHECK-NEXT: $p0 = FILL_PPR_FROM_ZPR_SLOT_PSEUDO %stack.0, 0 :: (load (s128) from %stack.0) + ; + ; CHECK-NEXT: FAKE_USE implicit $nzcv + ; CHECK-NEXT: RET_ReallyLR implicit $p0 + + ; EXPAND-LABEL: name: zpr_predicate_spill__save_restore_nzcv + ; EXPAND: liveins: $p0, $fp, $p15, $p14, $p13, $p12, $p11, $p10, $p9, $p8, $p7, $p6, $p5, $p4 + ; EXPAND-NEXT: {{ $}} + ; + ; EXPAND-NEXT: early-clobber $sp = frame-setup STRXpre killed $fp, $sp, -16 :: (store (s64) into %stack.13) + ; EXPAND-NEXT: $sp = frame-setup ADDVL_XXI $sp, -12, implicit $vg + ; EXPAND-NEXT: $z0 = frame-setup CPY_ZPzI_B killed $p15, 1, 0 + ; EXPAND-NEXT: frame-setup STR_ZXI $z0, $sp, 0 :: (store (s128) into %stack.12) + ; EXPAND-NEXT: $z0 = frame-setup CPY_ZPzI_B killed $p14, 1, 0 + ; EXPAND-NEXT: frame-setup STR_ZXI $z0, $sp, 1 :: (store (s128) into %stack.11) + ; EXPAND-NEXT: $z0 = frame-setup CPY_ZPzI_B killed $p13, 1, 0 + ; EXPAND-NEXT: frame-setup STR_ZXI $z0, $sp, 2 :: (store (s128) into %stack.10) + ; EXPAND-NEXT: $z0 = frame-setup CPY_ZPzI_B killed $p12, 1, 0 + ; EXPAND-NEXT: frame-setup STR_ZXI $z0, $sp, 3 :: (store (s128) into %stack.9) + ; EXPAND-NEXT: $z0 = frame-setup CPY_ZPzI_B killed $p11, 1, 0 + ; EXPAND-NEXT: frame-setup STR_ZXI $z0, $sp, 4 :: (store (s128) into %stack.8) + ; EXPAND-NEXT: $z0 = frame-setup CPY_ZPzI_B killed $p10, 1, 0 + ; EXPAND-NEXT: frame-setup STR_ZXI $z0, $sp, 5 :: (store (s128) into %stack.7) + ; EXPAND-NEXT: $z0 = frame-setup CPY_ZPzI_B killed $p9, 1, 0 + ; EXPAND-NEXT: frame-setup STR_ZXI $z0, $sp, 6 :: (store (s128) into %stack.6) + ; EXPAND-NEXT: $z0 = frame-setup CPY_ZPzI_B killed $p8, 1, 0 + ; EXPAND-NEXT: frame-setup STR_ZXI $z0, $sp, 7 :: (store (s128) into %stack.5) + ; EXPAND-NEXT: $z0 = frame-setup CPY_ZPzI_B killed $p7, 1, 0 + ; EXPAND-NEXT: frame-setup STR_ZXI $z0, $sp, 8 :: (store (s128) into %stack.4) + ; EXPAND-NEXT: $z0 = frame-setup CPY_ZPzI_B killed $p6, 1, 0 + ; EXPAND-NEXT: frame-setup STR_ZXI $z0, $sp, 9 :: (store (s128) into %stack.3) + ; EXPAND-NEXT: $z0 = frame-setup CPY_ZPzI_B killed $p5, 1, 0 + ; EXPAND-NEXT: frame-setup STR_ZXI $z0, $sp, 10 :: (store (s128) into %stack.2) + ; EXPAND-NEXT: $z0 = frame-setup CPY_ZPzI_B killed $p4, 1, 0 + ; EXPAND-NEXT: frame-setup STR_ZXI $z0, $sp, 11 :: (store (s128) into %stack.1) + ; EXPAND-NEXT: $sp = frame-setup ADDVL_XXI $sp, -1, implicit $vg + ; + ; EXPAND-NEXT: $nzcv = IMPLICIT_DEF + ; + ; EXPAND-NEXT: $z0 = CPY_ZPzI_B $p0, 1, 0 + ; EXPAND-NEXT: STR_ZXI $z0, $sp, 0 :: (store (s128) into %stack.0) + ; + ; EXPAND-NEXT: $p0 = IMPLICIT_DEF + ; EXPAND-NEXT: $p1 = IMPLICIT_DEF + ; EXPAND-NEXT: $p2 = IMPLICIT_DEF + ; EXPAND-NEXT: $p3 = IMPLICIT_DEF + ; EXPAND-NEXT: $p4 = IMPLICIT_DEF + ; EXPAND-NEXT: $p5 = IMPLICIT_DEF + ; EXPAND-NEXT: $p6 = IMPLICIT_DEF + ; EXPAND-NEXT: $p7 = IMPLICIT_DEF + ; EXPAND-NEXT: $p8 = IMPLICIT_DEF + ; EXPAND-NEXT: $p9 = IMPLICIT_DEF + ; EXPAND-NEXT: $p10 = IMPLICIT_DEF + ; EXPAND-NEXT: $p11 = IMPLICIT_DEF + ; EXPAND-NEXT: $p12 = IMPLICIT_DEF + ; EXPAND-NEXT: $p13 = IMPLICIT_DEF + ; EXPAND-NEXT: $p14 = IMPLICIT_DEF + ; EXPAND-NEXT: $p15 = IMPLICIT_DEF + ; + ; EXPAND-NEXT: $z0 = LDR_ZXI $sp, 0 :: (load (s128) from %stack.0) + ; EXPAND-NEXT: $x0 = MRS 55824, implicit-def $nzcv, implicit $nzcv + ; EXPAND-NEXT: $p0 = PTRUE_B 31, implicit $vg + ; EXPAND-NEXT: $p0 = CMPNE_PPzZI_B $p0, $z0, 0, implicit-def $nzcv, implicit-def $nzcv + ; EXPAND-NEXT: MSR 55824, $x0, implicit-def $nzcv + ; + ; EXPAND-NEXT: FAKE_USE implicit $nzcv + ; + ; EXPAND-NEXT: $sp = frame-destroy ADDVL_XXI $sp, 1, implicit $vg + ; EXPAND-NEXT: $z0 = frame-destroy LDR_ZXI $sp, 0 :: (load (s128) from %stack.12) + ; EXPAND-NEXT: $p1 = frame-destroy PTRUE_B 31, implicit $vg + ; EXPAND-NEXT: $p15 = frame-destroy CMPNE_PPzZI_B $p1, $z0, 0, implicit-def $nzcv, implicit-def $nzcv + ; EXPAND-NEXT: $z0 = frame-destroy LDR_ZXI $sp, 1 :: (load (s128) from %stack.11) + ; EXPAND-NEXT: $p1 = frame-destroy PTRUE_B 31, implicit $vg + ; EXPAND-NEXT: $p14 = frame-destroy CMPNE_PPzZI_B $p1, $z0, 0, implicit-def $nzcv, implicit-def $nzcv + ; EXPAND-NEXT: $z0 = frame-destroy LDR_ZXI $sp, 2 :: (load (s128) from %stack.10) + ; EXPAND-NEXT: $p1 = frame-destroy PTRUE_B 31, implicit $vg + ; EXPAND-NEXT: $p13 = frame-destroy CMPNE_PPzZI_B $p1, $z0, 0, implicit-def $nzcv, implicit-def $nzcv + ; EXPAND-NEXT: $z0 = frame-destroy LDR_ZXI $sp, 3 :: (load (s128) from %stack.9) + ; EXPAND-NEXT: $p1 = frame-destroy PTRUE_B 31, implicit $vg + ; EXPAND-NEXT: $p12 = frame-destroy CMPNE_PPzZI_B $p1, $z0, 0, implicit-def $nzcv, implicit-def $nzcv + ; EXPAND-NEXT: $z0 = frame-destroy LDR_ZXI $sp, 4 :: (load (s128) from %stack.8) + ; EXPAND-NEXT: $p1 = frame-destroy PTRUE_B 31, implicit $vg + ; EXPAND-NEXT: $p11 = frame-destroy CMPNE_PPzZI_B $p1, $z0, 0, implicit-def $nzcv, implicit-def $nzcv + ; EXPAND-NEXT: $z0 = frame-destroy LDR_ZXI $sp, 5 :: (load (s128) from %stack.7) + ; EXPAND-NEXT: $p1 = frame-destroy PTRUE_B 31, implicit $vg + ; EXPAND-NEXT: $p10 = frame-destroy CMPNE_PPzZI_B $p1, $z0, 0, implicit-def $nzcv, implicit-def $nzcv + ; EXPAND-NEXT: $z0 = frame-destroy LDR_ZXI $sp, 6 :: (load (s128) from %stack.6) + ; EXPAND-NEXT: $p1 = frame-destroy PTRUE_B 31, implicit $vg + ; EXPAND-NEXT: $p9 = frame-destroy CMPNE_PPzZI_B $p1, $z0, 0, implicit-def $nzcv, implicit-def $nzcv + ; EXPAND-NEXT: $z0 = frame-destroy LDR_ZXI $sp, 7 :: (load (s128) from %stack.5) + ; EXPAND-NEXT: $p1 = frame-destroy PTRUE_B 31, implicit $vg + ; EXPAND-NEXT: $p8 = frame-destroy CMPNE_PPzZI_B $p1, $z0, 0, implicit-def $nzcv, implicit-def $nzcv + ; EXPAND-NEXT: $z0 = frame-destroy LDR_ZXI $sp, 8 :: (load (s128) from %stack.4) + ; EXPAND-NEXT: $p7 = frame-destroy PTRUE_B 31, implicit $vg + ; EXPAND-NEXT: $p7 = frame-destroy CMPNE_PPzZI_B $p7, $z0, 0, implicit-def $nzcv, implicit-def $nzcv + ; EXPAND-NEXT: $z0 = frame-destroy LDR_ZXI $sp, 9 :: (load (s128) from %stack.3) + ; EXPAND-NEXT: $p6 = frame-destroy PTRUE_B 31, implicit $vg + ; EXPAND-NEXT: $p6 = frame-destroy CMPNE_PPzZI_B $p6, $z0, 0, implicit-def $nzcv, implicit-def $nzcv + ; EXPAND-NEXT: $z0 = frame-destroy LDR_ZXI $sp, 10 :: (load (s128) from %stack.2) + ; EXPAND-NEXT: $p5 = frame-destroy PTRUE_B 31, implicit $vg + ; EXPAND-NEXT: $p5 = frame-destroy CMPNE_PPzZI_B $p5, $z0, 0, implicit-def $nzcv, implicit-def $nzcv + ; EXPAND-NEXT: $z0 = frame-destroy LDR_ZXI $sp, 11 :: (load (s128) from %stack.1) + ; EXPAND-NEXT: $p4 = frame-destroy PTRUE_B 31, implicit $vg + ; EXPAND-NEXT: $p4 = frame-destroy CMPNE_PPzZI_B $p4, $z0, 0, implicit-def $nzcv, implicit-def $nzcv + ; EXPAND-NEXT: $sp = frame-destroy ADDVL_XXI $sp, 12, implicit $vg + ; EXPAND-NEXT: early-clobber $sp, $fp = frame-destroy LDRXpost $sp, 16 :: (load (s64) from %stack.13) + ; EXPAND-NEXT: RET undef $lr, implicit $p0 + $nzcv = IMPLICIT_DEF + + %1:ppr = COPY $p0 + + $p0 = IMPLICIT_DEF + $p1 = IMPLICIT_DEF + $p2 = IMPLICIT_DEF + $p3 = IMPLICIT_DEF + $p4 = IMPLICIT_DEF + $p5 = IMPLICIT_DEF + $p6 = IMPLICIT_DEF + $p7 = IMPLICIT_DEF + $p8 = IMPLICIT_DEF + $p9 = IMPLICIT_DEF + $p10 = IMPLICIT_DEF + $p11 = IMPLICIT_DEF + $p12 = IMPLICIT_DEF + $p13 = IMPLICIT_DEF + $p14 = IMPLICIT_DEF + $p15 = IMPLICIT_DEF + + $p0 = COPY %1 + + FAKE_USE implicit $nzcv + + RET_ReallyLR implicit $p0 +... +--- +name: zpr_predicate_spill__save_restore_nzcv__spill_gpr +tracksRegLiveness: true +stack: +liveins: + - { reg: '$p0' } + - { reg: '$x0' } + - { reg: '$x1' } + - { reg: '$x2' } + - { reg: '$x3' } + - { reg: '$x4' } + - { reg: '$x5' } + - { reg: '$x6' } + - { reg: '$x7' } +body: | + bb.0.entry: + liveins: $p0, $x0, $x1, $x2, $x3, $x4, $x5, $x6, $x7 + + ; CHECK-LABEL: name: zpr_predicate_spill__save_restore_nzcv__spill_gpr + ; CHECK: stack: + ; CHECK: - { id: 0, name: '', type: spill-slot, offset: 0, size: 16, alignment: 16, + ; CHECK-NEXT: stack-id: scalable-vector, callee-saved-register: + ; CHECK: liveins: $p0, $x0, $x1, $x2, $x3, $x4, $x5, $x6, $x7 + ; CHECK-NEXT: {{ $}} + ; + ; CHECK-NEXT: $nzcv = IMPLICIT_DEF + ; CHECK-NEXT: $x8 = IMPLICIT_DEF + ; CHECK-NEXT: $x9 = IMPLICIT_DEF + ; CHECK-NEXT: $x10 = IMPLICIT_DEF + ; CHECK-NEXT: $x11 = IMPLICIT_DEF + ; CHECK-NEXT: $x12 = IMPLICIT_DEF + ; CHECK-NEXT: $x13 = IMPLICIT_DEF + ; CHECK-NEXT: $x14 = IMPLICIT_DEF + ; CHECK-NEXT: $x15 = IMPLICIT_DEF + ; CHECK-NEXT: $x16 = IMPLICIT_DEF + ; CHECK-NEXT: $x17 = IMPLICIT_DEF + ; CHECK-NEXT: $x18 = IMPLICIT_DEF + ; + ; CHECK-NEXT: SPILL_PPR_TO_ZPR_SLOT_PSEUDO $p0, %stack.0, 0 :: (store (s128) into %stack.0) + ; + ; CHECK-NEXT: $p0 = IMPLICIT_DEF + ; CHECK-NEXT: $p1 = IMPLICIT_DEF + ; CHECK-NEXT: $p2 = IMPLICIT_DEF + ; CHECK-NEXT: $p3 = IMPLICIT_DEF + ; CHECK-NEXT: $p4 = IMPLICIT_DEF + ; CHECK-NEXT: $p5 = IMPLICIT_DEF + ; CHECK-NEXT: $p6 = IMPLICIT_DEF + ; CHECK-NEXT: $p7 = IMPLICIT_DEF + ; CHECK-NEXT: $p8 = IMPLICIT_DEF + ; CHECK-NEXT: $p9 = IMPLICIT_DEF + ; CHECK-NEXT: $p10 = IMPLICIT_DEF + ; CHECK-NEXT: $p11 = IMPLICIT_DEF + ; CHECK-NEXT: $p12 = IMPLICIT_DEF + ; CHECK-NEXT: $p13 = IMPLICIT_DEF + ; CHECK-NEXT: $p14 = IMPLICIT_DEF + ; CHECK-NEXT: $p15 = IMPLICIT_DEF + ; + ; CHECK-NEXT: $p0 = FILL_PPR_FROM_ZPR_SLOT_PSEUDO %stack.0, 0 :: (load (s128) from %stack.0) + ; + ; CHECK-NEXT: FAKE_USE implicit $nzcv, implicit $x8, implicit $x9, implicit $x10, implicit $x11, implicit $x12, implicit $x13, implicit $x14, implicit $x15, implicit $x16, implicit $x17, implicit $x18 + ; CHECK-NEXT: RET_ReallyLR implicit $p0, implicit $x0, implicit $x1, implicit $x2, implicit $x3, implicit $x4, implicit $x5, implicit $x6, implicit $x7, implicit $x8, implicit $x9, implicit $x10, implicit $x11, implicit $x12, implicit $x13, implicit $x14, implicit $x15, implicit $x16, implicit $x17, implicit $x18 + + ; EXPAND-LABEL: name: zpr_predicate_spill__save_restore_nzcv__spill_gpr + ; EXPAND: liveins: $p0, $x0, $x1, $x2, $x3, $x4, $x5, $x6, $x7, $fp, $p15, $p14, $p13, $p12, $p11, $p10, $p9, $p8, $p7, $p6, $p5, $p4 + ; EXPAND-NEXT: {{ $}} + ; + ; EXPAND-NEXT: early-clobber $sp = frame-setup STRXpre killed $fp, $sp, -16 :: (store (s64) into %stack.13) + ; EXPAND-NEXT: $sp = frame-setup ADDVL_XXI $sp, -12, implicit $vg + ; EXPAND-NEXT: $z0 = frame-setup CPY_ZPzI_B killed $p15, 1, 0 + ; EXPAND-NEXT: frame-setup STR_ZXI $z0, $sp, 0 :: (store (s128) into %stack.12) + ; EXPAND-NEXT: $z0 = frame-setup CPY_ZPzI_B killed $p14, 1, 0 + ; EXPAND-NEXT: frame-setup STR_ZXI $z0, $sp, 1 :: (store (s128) into %stack.11) + ; EXPAND-NEXT: $z0 = frame-setup CPY_ZPzI_B killed $p13, 1, 0 + ; EXPAND-NEXT: frame-setup STR_ZXI $z0, $sp, 2 :: (store (s128) into %stack.10) + ; EXPAND-NEXT: $z0 = frame-setup CPY_ZPzI_B killed $p12, 1, 0 + ; EXPAND-NEXT: frame-setup STR_ZXI $z0, $sp, 3 :: (store (s128) into %stack.9) + ; EXPAND-NEXT: $z0 = frame-setup CPY_ZPzI_B killed $p11, 1, 0 + ; EXPAND-NEXT: frame-setup STR_ZXI $z0, $sp, 4 :: (store (s128) into %stack.8) + ; EXPAND-NEXT: $z0 = frame-setup CPY_ZPzI_B killed $p10, 1, 0 + ; EXPAND-NEXT: frame-setup STR_ZXI $z0, $sp, 5 :: (store (s128) into %stack.7) + ; EXPAND-NEXT: $z0 = frame-setup CPY_ZPzI_B killed $p9, 1, 0 + ; EXPAND-NEXT: frame-setup STR_ZXI $z0, $sp, 6 :: (store (s128) into %stack.6) + ; EXPAND-NEXT: $z0 = frame-setup CPY_ZPzI_B killed $p8, 1, 0 + ; EXPAND-NEXT: frame-setup STR_ZXI $z0, $sp, 7 :: (store (s128) into %stack.5) + ; EXPAND-NEXT: $z0 = frame-setup CPY_ZPzI_B killed $p7, 1, 0 + ; EXPAND-NEXT: frame-setup STR_ZXI $z0, $sp, 8 :: (store (s128) into %stack.4) + ; EXPAND-NEXT: $z0 = frame-setup CPY_ZPzI_B killed $p6, 1, 0 + ; EXPAND-NEXT: frame-setup STR_ZXI $z0, $sp, 9 :: (store (s128) into %stack.3) + ; EXPAND-NEXT: $z0 = frame-setup CPY_ZPzI_B killed $p5, 1, 0 + ; EXPAND-NEXT: frame-setup STR_ZXI $z0, $sp, 10 :: (store (s128) into %stack.2) + ; EXPAND-NEXT: $z0 = frame-setup CPY_ZPzI_B killed $p4, 1, 0 + ; EXPAND-NEXT: frame-setup STR_ZXI $z0, $sp, 11 :: (store (s128) into %stack.1) + ; EXPAND-NEXT: $sp = frame-setup ADDVL_XXI $sp, -1, implicit $vg + ; + ; EXPAND-NEXT: $nzcv = IMPLICIT_DEF + ; EXPAND-NEXT: $x8 = IMPLICIT_DEF + ; EXPAND-NEXT: $x9 = IMPLICIT_DEF + ; EXPAND-NEXT: $x10 = IMPLICIT_DEF + ; EXPAND-NEXT: $x11 = IMPLICIT_DEF + ; EXPAND-NEXT: $x12 = IMPLICIT_DEF + ; EXPAND-NEXT: $x13 = IMPLICIT_DEF + ; EXPAND-NEXT: $x14 = IMPLICIT_DEF + ; EXPAND-NEXT: $x15 = IMPLICIT_DEF + ; EXPAND-NEXT: $x16 = IMPLICIT_DEF + ; EXPAND-NEXT: $x17 = IMPLICIT_DEF + ; EXPAND-NEXT: $x18 = IMPLICIT_DEF + ; + ; EXPAND-NEXT: $z0 = CPY_ZPzI_B $p0, 1, 0 + ; EXPAND-NEXT: STR_ZXI $z0, $sp, 0 :: (store (s128) into %stack.0) + ; + ; EXPAND-NEXT: $p0 = IMPLICIT_DEF + ; EXPAND-NEXT: $p1 = IMPLICIT_DEF + ; EXPAND-NEXT: $p2 = IMPLICIT_DEF + ; EXPAND-NEXT: $p3 = IMPLICIT_DEF + ; EXPAND-NEXT: $p4 = IMPLICIT_DEF + ; EXPAND-NEXT: $p5 = IMPLICIT_DEF + ; EXPAND-NEXT: $p6 = IMPLICIT_DEF + ; EXPAND-NEXT: $p7 = IMPLICIT_DEF + ; EXPAND-NEXT: $p8 = IMPLICIT_DEF + ; EXPAND-NEXT: $p9 = IMPLICIT_DEF + ; EXPAND-NEXT: $p10 = IMPLICIT_DEF + ; EXPAND-NEXT: $p11 = IMPLICIT_DEF + ; EXPAND-NEXT: $p12 = IMPLICIT_DEF + ; EXPAND-NEXT: $p13 = IMPLICIT_DEF + ; EXPAND-NEXT: $p14 = IMPLICIT_DEF + ; EXPAND-NEXT: $p15 = IMPLICIT_DEF + ; + ; EXPAND-NEXT: $fp = ADDVL_XXI $sp, 13, implicit $vg + ; EXPAND-NEXT: STRXui $x0, killed $fp, 1 :: (store (s64) into %stack.14) + ; EXPAND-NEXT: $z0 = LDR_ZXI $sp, 0 :: (load (s128) from %stack.0) + ; EXPAND-NEXT: $x0 = MRS 55824, implicit-def $nzcv, implicit $nzcv + ; EXPAND-NEXT: $p0 = PTRUE_B 31, implicit $vg + ; EXPAND-NEXT: $p0 = CMPNE_PPzZI_B $p0, $z0, 0, implicit-def $nzcv, implicit-def $nzcv + ; EXPAND-NEXT: MSR 55824, $x0, implicit-def $nzcv + ; EXPAND-NEXT: $fp = ADDVL_XXI $sp, 13, implicit $vg + ; EXPAND-NEXT: $x0 = LDRXui killed $fp, 1 :: (load (s64) from %stack.14) + ; + ; EXPAND-NEXT: FAKE_USE implicit $nzcv, implicit $x8, implicit $x9, implicit $x10, implicit $x11, implicit $x12, implicit $x13, implicit $x14, implicit $x15, implicit $x16, implicit $x17, implicit $x18 + ; EXPAND-NEXT: $sp = frame-destroy ADDVL_XXI $sp, 1, implicit $vg + ; EXPAND-NEXT: $z0 = frame-destroy LDR_ZXI $sp, 0 :: (load (s128) from %stack.12) + ; EXPAND-NEXT: $p1 = frame-destroy PTRUE_B 31, implicit $vg + ; EXPAND-NEXT: $p15 = frame-destroy CMPNE_PPzZI_B $p1, $z0, 0, implicit-def $nzcv, implicit-def $nzcv + ; EXPAND-NEXT: $z0 = frame-destroy LDR_ZXI $sp, 1 :: (load (s128) from %stack.11) + ; EXPAND-NEXT: $p1 = frame-destroy PTRUE_B 31, implicit $vg + ; EXPAND-NEXT: $p14 = frame-destroy CMPNE_PPzZI_B $p1, $z0, 0, implicit-def $nzcv, implicit-def $nzcv + ; EXPAND-NEXT: $z0 = frame-destroy LDR_ZXI $sp, 2 :: (load (s128) from %stack.10) + ; EXPAND-NEXT: $p1 = frame-destroy PTRUE_B 31, implicit $vg + ; EXPAND-NEXT: $p13 = frame-destroy CMPNE_PPzZI_B $p1, $z0, 0, implicit-def $nzcv, implicit-def $nzcv + ; EXPAND-NEXT: $z0 = frame-destroy LDR_ZXI $sp, 3 :: (load (s128) from %stack.9) + ; EXPAND-NEXT: $p1 = frame-destroy PTRUE_B 31, implicit $vg + ; EXPAND-NEXT: $p12 = frame-destroy CMPNE_PPzZI_B $p1, $z0, 0, implicit-def $nzcv, implicit-def $nzcv + ; EXPAND-NEXT: $z0 = frame-destroy LDR_ZXI $sp, 4 :: (load (s128) from %stack.8) + ; EXPAND-NEXT: $p1 = frame-destroy PTRUE_B 31, implicit $vg + ; EXPAND-NEXT: $p11 = frame-destroy CMPNE_PPzZI_B $p1, $z0, 0, implicit-def $nzcv, implicit-def $nzcv + ; EXPAND-NEXT: $z0 = frame-destroy LDR_ZXI $sp, 5 :: (load (s128) from %stack.7) + ; EXPAND-NEXT: $p1 = frame-destroy PTRUE_B 31, implicit $vg + ; EXPAND-NEXT: $p10 = frame-destroy CMPNE_PPzZI_B $p1, $z0, 0, implicit-def $nzcv, implicit-def $nzcv + ; EXPAND-NEXT: $z0 = frame-destroy LDR_ZXI $sp, 6 :: (load (s128) from %stack.6) + ; EXPAND-NEXT: $p1 = frame-destroy PTRUE_B 31, implicit $vg + ; EXPAND-NEXT: $p9 = frame-destroy CMPNE_PPzZI_B $p1, $z0, 0, implicit-def $nzcv, implicit-def $nzcv + ; EXPAND-NEXT: $z0 = frame-destroy LDR_ZXI $sp, 7 :: (load (s128) from %stack.5) + ; EXPAND-NEXT: $p1 = frame-destroy PTRUE_B 31, implicit $vg + ; EXPAND-NEXT: $p8 = frame-destroy CMPNE_PPzZI_B $p1, $z0, 0, implicit-def $nzcv, implicit-def $nzcv + ; EXPAND-NEXT: $z0 = frame-destroy LDR_ZXI $sp, 8 :: (load (s128) from %stack.4) + ; EXPAND-NEXT: $p7 = frame-destroy PTRUE_B 31, implicit $vg + ; EXPAND-NEXT: $p7 = frame-destroy CMPNE_PPzZI_B $p7, $z0, 0, implicit-def $nzcv, implicit-def $nzcv + ; EXPAND-NEXT: $z0 = frame-destroy LDR_ZXI $sp, 9 :: (load (s128) from %stack.3) + ; EXPAND-NEXT: $p6 = frame-destroy PTRUE_B 31, implicit $vg + ; EXPAND-NEXT: $p6 = frame-destroy CMPNE_PPzZI_B $p6, $z0, 0, implicit-def $nzcv, implicit-def $nzcv + ; EXPAND-NEXT: $z0 = frame-destroy LDR_ZXI $sp, 10 :: (load (s128) from %stack.2) + ; EXPAND-NEXT: $p5 = frame-destroy PTRUE_B 31, implicit $vg + ; EXPAND-NEXT: $p5 = frame-destroy CMPNE_PPzZI_B $p5, $z0, 0, implicit-def $nzcv, implicit-def $nzcv + ; EXPAND-NEXT: $z0 = frame-destroy LDR_ZXI $sp, 11 :: (load (s128) from %stack.1) + ; EXPAND-NEXT: $p4 = frame-destroy PTRUE_B 31, implicit $vg + ; EXPAND-NEXT: $p4 = frame-destroy CMPNE_PPzZI_B $p4, $z0, 0, implicit-def $nzcv, implicit-def $nzcv + ; EXPAND-NEXT: $sp = frame-destroy ADDVL_XXI $sp, 12, implicit $vg + ; EXPAND-NEXT: early-clobber $sp, $fp = frame-destroy LDRXpost $sp, 16 :: (load (s64) from %stack.13) + ; EXPAND-NEXT: RET undef $lr, implicit $p0, implicit $x0, implicit $x1, implicit $x2, implicit $x3, implicit $x4, implicit $x5, implicit $x6, implicit $x7, implicit $x8, implicit $x9, implicit $x10, implicit $x11, implicit $x12, implicit $x13, implicit $x14, implicit $x15, implicit $x16, implicit $x17, implicit $x18 + $nzcv = IMPLICIT_DEF + $x8 = IMPLICIT_DEF + $x9 = IMPLICIT_DEF + $x10 = IMPLICIT_DEF + $x11 = IMPLICIT_DEF + $x12 = IMPLICIT_DEF + $x13 = IMPLICIT_DEF + $x14 = IMPLICIT_DEF + $x15 = IMPLICIT_DEF + $x16 = IMPLICIT_DEF + $x17 = IMPLICIT_DEF + $x18 = IMPLICIT_DEF + + %1:ppr = COPY $p0 + + $p0 = IMPLICIT_DEF + $p1 = IMPLICIT_DEF + $p2 = IMPLICIT_DEF + $p3 = IMPLICIT_DEF + $p4 = IMPLICIT_DEF + $p5 = IMPLICIT_DEF + $p6 = IMPLICIT_DEF + $p7 = IMPLICIT_DEF + $p8 = IMPLICIT_DEF + $p9 = IMPLICIT_DEF + $p10 = IMPLICIT_DEF + $p11 = IMPLICIT_DEF + $p12 = IMPLICIT_DEF + $p13 = IMPLICIT_DEF + $p14 = IMPLICIT_DEF + $p15 = IMPLICIT_DEF + + $p0 = COPY %1 + + FAKE_USE implicit $nzcv, implicit $x8, implicit $x9, implicit $x10, implicit $x11, implicit $x12, implicit $x13, implicit $x14, implicit $x15, implicit $x16, implicit $x17, implicit $x18 + + RET_ReallyLR implicit $p0, implicit $x0, implicit $x1, implicit $x2, implicit $x3, implicit $x4, implicit $x5, implicit $x6, implicit $x7, implicit $x8, implicit $x9, implicit $x10, implicit $x11, implicit $x12, implicit $x13, implicit $x14, implicit $x15, implicit $x16, implicit $x17, implicit $x18 +... +--- +name: zpr_predicate_spill__spill_zpr +tracksRegLiveness: true +stack: +liveins: + - { reg: '$p0' } + - { reg: '$z0' } + - { reg: '$z1' } + - { reg: '$z2' } + - { reg: '$z3' } + - { reg: '$z4' } + - { reg: '$z5' } + - { reg: '$z6' } + - { reg: '$z7' } +body: | + bb.0.entry: + liveins: $p0, $z0, $z1, $z2, $z3, $z4, $z5, $z6, $z7 + + ; CHECK-LABEL: name: zpr_predicate_spill__spill_zpr + ; CHECK: stack: + ; CHECK: - { id: 0, name: '', type: spill-slot, offset: 0, size: 16, alignment: 16, + ; CHECK-NEXT: stack-id: scalable-vector, callee-saved-register: + ; CHECK: liveins: $p0, $z0, $z1, $z2, $z3, $z4, $z5, $z6, $z7 + ; CHECK-NEXT: {{ $}} + ; + ; CHECK-NEXT: $z16 = IMPLICIT_DEF + ; CHECK-NEXT: $z17 = IMPLICIT_DEF + ; CHECK-NEXT: $z18 = IMPLICIT_DEF + ; CHECK-NEXT: $z19 = IMPLICIT_DEF + ; CHECK-NEXT: $z20 = IMPLICIT_DEF + ; CHECK-NEXT: $z21 = IMPLICIT_DEF + ; CHECK-NEXT: $z22 = IMPLICIT_DEF + ; CHECK-NEXT: $z23 = IMPLICIT_DEF + ; CHECK-NEXT: $z24 = IMPLICIT_DEF + ; CHECK-NEXT: $z25 = IMPLICIT_DEF + ; CHECK-NEXT: $z26 = IMPLICIT_DEF + ; CHECK-NEXT: $z27 = IMPLICIT_DEF + ; CHECK-NEXT: $z28 = IMPLICIT_DEF + ; CHECK-NEXT: $z29 = IMPLICIT_DEF + ; CHECK-NEXT: $z30 = IMPLICIT_DEF + ; CHECK-NEXT: $z31 = IMPLICIT_DEF + ; + ; CHECK-NEXT: SPILL_PPR_TO_ZPR_SLOT_PSEUDO $p0, %stack.0, 0 :: (store (s128) into %stack.0) + ; + ; CHECK-NEXT: $p0 = IMPLICIT_DEF + ; CHECK-NEXT: $p1 = IMPLICIT_DEF + ; CHECK-NEXT: $p2 = IMPLICIT_DEF + ; CHECK-NEXT: $p3 = IMPLICIT_DEF + ; CHECK-NEXT: $p4 = IMPLICIT_DEF + ; CHECK-NEXT: $p5 = IMPLICIT_DEF + ; CHECK-NEXT: $p6 = IMPLICIT_DEF + ; CHECK-NEXT: $p7 = IMPLICIT_DEF + ; CHECK-NEXT: $p8 = IMPLICIT_DEF + ; CHECK-NEXT: $p9 = IMPLICIT_DEF + ; CHECK-NEXT: $p10 = IMPLICIT_DEF + ; CHECK-NEXT: $p11 = IMPLICIT_DEF + ; CHECK-NEXT: $p12 = IMPLICIT_DEF + ; CHECK-NEXT: $p13 = IMPLICIT_DEF + ; CHECK-NEXT: $p14 = IMPLICIT_DEF + ; CHECK-NEXT: $p15 = IMPLICIT_DEF + ; + ; CHECK-NEXT: $p0 = FILL_PPR_FROM_ZPR_SLOT_PSEUDO %stack.0, 0 :: (load (s128) from %stack.0) + ; + ; CHECK-NEXT: FAKE_USE implicit $z16, implicit $z17, implicit $z18, implicit $z19, implicit $z20, implicit $z21, implicit $z22, implicit $z23, implicit $z24, implicit $z25, implicit $z26, implicit $z27, implicit $z28, implicit $z29, implicit $z30, implicit $z31 + ; CHECK-NEXT: RET_ReallyLR implicit $p0, implicit $z0, implicit $z1, implicit $z2, implicit $z3, implicit $z4, implicit $z5, implicit $z6, implicit $z7 + + ; EXPAND-LABEL: name: zpr_predicate_spill__spill_zpr + ; EXPAND: liveins: $p0, $z0, $z1, $z2, $z3, $z4, $z5, $z6, $z7, $fp, $p15, $p14, $p13, $p12, $p11, $p10, $p9, $p8, $p7, $p6, $p5, $p4, $z23, $z22, $z21, $z20, $z19, $z18, $z17, $z16 + ; EXPAND-NEXT: {{ $}} + ; + ; EXPAND-NEXT: early-clobber $sp = frame-setup STRXpre killed $fp, $sp, -16 :: (store (s64) into %stack.21) + ; EXPAND-NEXT: $sp = frame-setup ADDVL_XXI $sp, -20, implicit $vg + ; EXPAND-NEXT: $z24 = frame-setup CPY_ZPzI_B killed $p15, 1, 0 + ; EXPAND-NEXT: frame-setup STR_ZXI $z24, $sp, 0 :: (store (s128) into %stack.20) + ; EXPAND-NEXT: $z24 = frame-setup CPY_ZPzI_B killed $p14, 1, 0 + ; EXPAND-NEXT: frame-setup STR_ZXI $z24, $sp, 1 :: (store (s128) into %stack.19) + ; EXPAND-NEXT: $z24 = frame-setup CPY_ZPzI_B killed $p13, 1, 0 + ; EXPAND-NEXT: frame-setup STR_ZXI $z24, $sp, 2 :: (store (s128) into %stack.18) + ; EXPAND-NEXT: $z24 = frame-setup CPY_ZPzI_B killed $p12, 1, 0 + ; EXPAND-NEXT: frame-setup STR_ZXI $z24, $sp, 3 :: (store (s128) into %stack.17) + ; EXPAND-NEXT: $z24 = frame-setup CPY_ZPzI_B killed $p11, 1, 0 + ; EXPAND-NEXT: frame-setup STR_ZXI $z24, $sp, 4 :: (store (s128) into %stack.16) + ; EXPAND-NEXT: $z24 = frame-setup CPY_ZPzI_B killed $p10, 1, 0 + ; EXPAND-NEXT: frame-setup STR_ZXI $z24, $sp, 5 :: (store (s128) into %stack.15) + ; EXPAND-NEXT: $z24 = frame-setup CPY_ZPzI_B killed $p9, 1, 0 + ; EXPAND-NEXT: frame-setup STR_ZXI $z24, $sp, 6 :: (store (s128) into %stack.14) + ; EXPAND-NEXT: $z24 = frame-setup CPY_ZPzI_B killed $p8, 1, 0 + ; EXPAND-NEXT: frame-setup STR_ZXI $z24, $sp, 7 :: (store (s128) into %stack.13) + ; EXPAND-NEXT: $z24 = frame-setup CPY_ZPzI_B killed $p7, 1, 0 + ; EXPAND-NEXT: frame-setup STR_ZXI $z24, $sp, 8 :: (store (s128) into %stack.12) + ; EXPAND-NEXT: $z24 = frame-setup CPY_ZPzI_B killed $p6, 1, 0 + ; EXPAND-NEXT: frame-setup STR_ZXI $z24, $sp, 9 :: (store (s128) into %stack.11) + ; EXPAND-NEXT: $z24 = frame-setup CPY_ZPzI_B killed $p5, 1, 0 + ; EXPAND-NEXT: frame-setup STR_ZXI $z24, $sp, 10 :: (store (s128) into %stack.10) + ; EXPAND-NEXT: $z24 = frame-setup CPY_ZPzI_B killed $p4, 1, 0 + ; EXPAND-NEXT: frame-setup STR_ZXI $z24, $sp, 11 :: (store (s128) into %stack.9) + ; EXPAND-NEXT: frame-setup STR_ZXI killed $z23, $sp, 12 :: (store (s128) into %stack.8) + ; EXPAND-NEXT: frame-setup STR_ZXI killed $z22, $sp, 13 :: (store (s128) into %stack.7) + ; EXPAND-NEXT: frame-setup STR_ZXI killed $z21, $sp, 14 :: (store (s128) into %stack.6) + ; EXPAND-NEXT: frame-setup STR_ZXI killed $z20, $sp, 15 :: (store (s128) into %stack.5) + ; EXPAND-NEXT: frame-setup STR_ZXI killed $z19, $sp, 16 :: (store (s128) into %stack.4) + ; EXPAND-NEXT: frame-setup STR_ZXI killed $z18, $sp, 17 :: (store (s128) into %stack.3) + ; EXPAND-NEXT: frame-setup STR_ZXI killed $z17, $sp, 18 :: (store (s128) into %stack.2) + ; EXPAND-NEXT: frame-setup STR_ZXI killed $z16, $sp, 19 :: (store (s128) into %stack.1) + ; EXPAND-NEXT: $sp = frame-setup ADDVL_XXI $sp, -2, implicit $vg + ; + ; EXPAND-NEXT: $z16 = IMPLICIT_DEF + ; EXPAND-NEXT: $z17 = IMPLICIT_DEF + ; EXPAND-NEXT: $z18 = IMPLICIT_DEF + ; EXPAND-NEXT: $z19 = IMPLICIT_DEF + ; EXPAND-NEXT: $z20 = IMPLICIT_DEF + ; EXPAND-NEXT: $z21 = IMPLICIT_DEF + ; EXPAND-NEXT: $z22 = IMPLICIT_DEF + ; EXPAND-NEXT: $z23 = IMPLICIT_DEF + ; EXPAND-NEXT: $z24 = IMPLICIT_DEF + ; EXPAND-NEXT: $z25 = IMPLICIT_DEF + ; EXPAND-NEXT: $z26 = IMPLICIT_DEF + ; EXPAND-NEXT: $z27 = IMPLICIT_DEF + ; EXPAND-NEXT: $z28 = IMPLICIT_DEF + ; EXPAND-NEXT: $z29 = IMPLICIT_DEF + ; EXPAND-NEXT: $z30 = IMPLICIT_DEF + ; EXPAND-NEXT: $z31 = IMPLICIT_DEF + ; + ; EXPAND-NEXT: STR_ZXI $z0, $sp, 0 :: (store (s128) into %stack.22) + ; EXPAND-NEXT: $z0 = CPY_ZPzI_B $p0, 1, 0 + ; EXPAND-NEXT: STR_ZXI $z0, $sp, 1 :: (store (s128) into %stack.0) + ; EXPAND-NEXT: $z0 = LDR_ZXI $sp, 0 :: (load (s128) from %stack.22) + ; + ; EXPAND-NEXT: $p0 = IMPLICIT_DEF + ; EXPAND-NEXT: $p1 = IMPLICIT_DEF + ; EXPAND-NEXT: $p2 = IMPLICIT_DEF + ; EXPAND-NEXT: $p3 = IMPLICIT_DEF + ; EXPAND-NEXT: $p4 = IMPLICIT_DEF + ; EXPAND-NEXT: $p5 = IMPLICIT_DEF + ; EXPAND-NEXT: $p6 = IMPLICIT_DEF + ; EXPAND-NEXT: $p7 = IMPLICIT_DEF + ; EXPAND-NEXT: $p8 = IMPLICIT_DEF + ; EXPAND-NEXT: $p9 = IMPLICIT_DEF + ; EXPAND-NEXT: $p10 = IMPLICIT_DEF + ; EXPAND-NEXT: $p11 = IMPLICIT_DEF + ; EXPAND-NEXT: $p12 = IMPLICIT_DEF + ; EXPAND-NEXT: $p13 = IMPLICIT_DEF + ; EXPAND-NEXT: $p14 = IMPLICIT_DEF + ; EXPAND-NEXT: $p15 = IMPLICIT_DEF + ; + ; EXPAND-NEXT: STR_ZXI $z0, $sp, 0 :: (store (s128) into %stack.22) + ; EXPAND-NEXT: $z0 = LDR_ZXI $sp, 1 :: (load (s128) from %stack.0) + ; EXPAND-NEXT: $p0 = PTRUE_B 31, implicit $vg + ; EXPAND-NEXT: $p0 = CMPNE_PPzZI_B $p0, $z0, 0, implicit-def $nzcv, implicit-def $nzcv + ; EXPAND-NEXT: $z0 = LDR_ZXI $sp, 0 :: (load (s128) from %stack.22) + ; + ; EXPAND-NEXT: FAKE_USE implicit $z16, implicit $z17, implicit $z18, implicit $z19, implicit $z20, implicit $z21, implicit $z22, implicit $z23, implicit $z24, implicit $z25, implicit $z26, implicit $z27, implicit $z28, implicit $z29, implicit $z30, implicit $z31 + ; EXPAND-NEXT: $sp = frame-destroy ADDVL_XXI $sp, 2, implicit $vg + ; EXPAND-NEXT: $z23 = frame-destroy LDR_ZXI $sp, 12 :: (load (s128) from %stack.8) + ; EXPAND-NEXT: $z22 = frame-destroy LDR_ZXI $sp, 13 :: (load (s128) from %stack.7) + ; EXPAND-NEXT: $z21 = frame-destroy LDR_ZXI $sp, 14 :: (load (s128) from %stack.6) + ; EXPAND-NEXT: $z20 = frame-destroy LDR_ZXI $sp, 15 :: (load (s128) from %stack.5) + ; EXPAND-NEXT: $z19 = frame-destroy LDR_ZXI $sp, 16 :: (load (s128) from %stack.4) + ; EXPAND-NEXT: $z18 = frame-destroy LDR_ZXI $sp, 17 :: (load (s128) from %stack.3) + ; EXPAND-NEXT: $z17 = frame-destroy LDR_ZXI $sp, 18 :: (load (s128) from %stack.2) + ; EXPAND-NEXT: $z16 = frame-destroy LDR_ZXI $sp, 19 :: (load (s128) from %stack.1) + ; EXPAND-NEXT: $z24 = frame-destroy LDR_ZXI $sp, 0 :: (load (s128) from %stack.20) + ; EXPAND-NEXT: $p1 = frame-destroy PTRUE_B 31, implicit $vg + ; EXPAND-NEXT: $p15 = frame-destroy CMPNE_PPzZI_B $p1, $z24, 0, implicit-def $nzcv, implicit-def $nzcv + ; EXPAND-NEXT: $z24 = frame-destroy LDR_ZXI $sp, 1 :: (load (s128) from %stack.19) + ; EXPAND-NEXT: $p1 = frame-destroy PTRUE_B 31, implicit $vg + ; EXPAND-NEXT: $p14 = frame-destroy CMPNE_PPzZI_B $p1, $z24, 0, implicit-def $nzcv, implicit-def $nzcv + ; EXPAND-NEXT: $z24 = frame-destroy LDR_ZXI $sp, 2 :: (load (s128) from %stack.18) + ; EXPAND-NEXT: $p1 = frame-destroy PTRUE_B 31, implicit $vg + ; EXPAND-NEXT: $p13 = frame-destroy CMPNE_PPzZI_B $p1, $z24, 0, implicit-def $nzcv, implicit-def $nzcv + ; EXPAND-NEXT: $z24 = frame-destroy LDR_ZXI $sp, 3 :: (load (s128) from %stack.17) + ; EXPAND-NEXT: $p1 = frame-destroy PTRUE_B 31, implicit $vg + ; EXPAND-NEXT: $p12 = frame-destroy CMPNE_PPzZI_B $p1, $z24, 0, implicit-def $nzcv, implicit-def $nzcv + ; EXPAND-NEXT: $z24 = frame-destroy LDR_ZXI $sp, 4 :: (load (s128) from %stack.16) + ; EXPAND-NEXT: $p1 = frame-destroy PTRUE_B 31, implicit $vg + ; EXPAND-NEXT: $p11 = frame-destroy CMPNE_PPzZI_B $p1, $z24, 0, implicit-def $nzcv, implicit-def $nzcv + ; EXPAND-NEXT: $z24 = frame-destroy LDR_ZXI $sp, 5 :: (load (s128) from %stack.15) + ; EXPAND-NEXT: $p1 = frame-destroy PTRUE_B 31, implicit $vg + ; EXPAND-NEXT: $p10 = frame-destroy CMPNE_PPzZI_B $p1, $z24, 0, implicit-def $nzcv, implicit-def $nzcv + ; EXPAND-NEXT: $z24 = frame-destroy LDR_ZXI $sp, 6 :: (load (s128) from %stack.14) + ; EXPAND-NEXT: $p1 = frame-destroy PTRUE_B 31, implicit $vg + ; EXPAND-NEXT: $p9 = frame-destroy CMPNE_PPzZI_B $p1, $z24, 0, implicit-def $nzcv, implicit-def $nzcv + ; EXPAND-NEXT: $z24 = frame-destroy LDR_ZXI $sp, 7 :: (load (s128) from %stack.13) + ; EXPAND-NEXT: $p1 = frame-destroy PTRUE_B 31, implicit $vg + ; EXPAND-NEXT: $p8 = frame-destroy CMPNE_PPzZI_B $p1, $z24, 0, implicit-def $nzcv, implicit-def $nzcv + ; EXPAND-NEXT: $z24 = frame-destroy LDR_ZXI $sp, 8 :: (load (s128) from %stack.12) + ; EXPAND-NEXT: $p7 = frame-destroy PTRUE_B 31, implicit $vg + ; EXPAND-NEXT: $p7 = frame-destroy CMPNE_PPzZI_B $p7, $z24, 0, implicit-def $nzcv, implicit-def $nzcv + ; EXPAND-NEXT: $z24 = frame-destroy LDR_ZXI $sp, 9 :: (load (s128) from %stack.11) + ; EXPAND-NEXT: $p6 = frame-destroy PTRUE_B 31, implicit $vg + ; EXPAND-NEXT: $p6 = frame-destroy CMPNE_PPzZI_B $p6, $z24, 0, implicit-def $nzcv, implicit-def $nzcv + ; EXPAND-NEXT: $z24 = frame-destroy LDR_ZXI $sp, 10 :: (load (s128) from %stack.10) + ; EXPAND-NEXT: $p5 = frame-destroy PTRUE_B 31, implicit $vg + ; EXPAND-NEXT: $p5 = frame-destroy CMPNE_PPzZI_B $p5, $z24, 0, implicit-def $nzcv, implicit-def $nzcv + ; EXPAND-NEXT: $z24 = frame-destroy LDR_ZXI $sp, 11 :: (load (s128) from %stack.9) + ; EXPAND-NEXT: $p4 = frame-destroy PTRUE_B 31, implicit $vg + ; EXPAND-NEXT: $p4 = frame-destroy CMPNE_PPzZI_B $p4, $z24, 0, implicit-def $nzcv, implicit-def $nzcv + ; EXPAND-NEXT: $sp = frame-destroy ADDVL_XXI $sp, 20, implicit $vg + ; EXPAND-NEXT: early-clobber $sp, $fp = frame-destroy LDRXpost $sp, 16 :: (load (s64) from %stack.21) + ; EXPAND-NEXT: RET undef $lr, implicit $p0, implicit $z0, implicit $z1, implicit $z2, implicit $z3, implicit $z4, implicit $z5, implicit $z6, implicit $z7 + $z16 = IMPLICIT_DEF + $z17 = IMPLICIT_DEF + $z18 = IMPLICIT_DEF + $z19 = IMPLICIT_DEF + $z20 = IMPLICIT_DEF + $z21 = IMPLICIT_DEF + $z22 = IMPLICIT_DEF + $z23 = IMPLICIT_DEF + $z24 = IMPLICIT_DEF + $z25 = IMPLICIT_DEF + $z26 = IMPLICIT_DEF + $z27 = IMPLICIT_DEF + $z28 = IMPLICIT_DEF + $z29 = IMPLICIT_DEF + $z30 = IMPLICIT_DEF + $z31 = IMPLICIT_DEF + + %1:ppr = COPY $p0 + + $p0 = IMPLICIT_DEF + $p1 = IMPLICIT_DEF + $p2 = IMPLICIT_DEF + $p3 = IMPLICIT_DEF + $p4 = IMPLICIT_DEF + $p5 = IMPLICIT_DEF + $p6 = IMPLICIT_DEF + $p7 = IMPLICIT_DEF + $p8 = IMPLICIT_DEF + $p9 = IMPLICIT_DEF + $p10 = IMPLICIT_DEF + $p11 = IMPLICIT_DEF + $p12 = IMPLICIT_DEF + $p13 = IMPLICIT_DEF + $p14 = IMPLICIT_DEF + $p15 = IMPLICIT_DEF + + $p0 = COPY %1 + + FAKE_USE implicit $z16, implicit $z17, implicit $z18, implicit $z19, implicit $z20, implicit $z21, implicit $z22, implicit $z23, implicit $z24, implicit $z25, implicit $z26, implicit $z27, implicit $z28, implicit $z29, implicit $z30, implicit $z31 + + RET_ReallyLR implicit $p0, implicit $z0, implicit $z1, implicit $z2, implicit $z3, implicit $z4, implicit $z5, implicit $z6, implicit $z7 +... +--- +name: zpr_predicate_spill_above_p7 +tracksRegLiveness: true +stack: +liveins: + - { reg: '$p0' } + - { reg: '$p1' } + - { reg: '$p2' } + - { reg: '$p3' } +body: | + bb.0.entry: + liveins: $p0, $p1, $p2, $p3 + + ; CHECK-LABEL: name: zpr_predicate_spill_above_p7 + ; CHECK: stack: + ; CHECK: - { id: 0, name: '', type: spill-slot, offset: 0, size: 16, alignment: 16, + ; CHECK-NEXT: stack-id: scalable-vector, callee-saved-register: + ; CHECK: liveins: $p0, $p1, $p2, $p3 + ; CHECK-NEXT: {{ $}} + ; + ; CHECK-NEXT: $p15 = IMPLICIT_DEF + ; + ; CHECK-NEXT: SPILL_PPR_TO_ZPR_SLOT_PSEUDO $p15, %stack.0, 0 :: (store (s128) into %stack.0) + ; + ; CHECK-NEXT: $p0 = IMPLICIT_DEF + ; CHECK-NEXT: $p1 = IMPLICIT_DEF + ; CHECK-NEXT: $p2 = IMPLICIT_DEF + ; CHECK-NEXT: $p3 = IMPLICIT_DEF + ; CHECK-NEXT: $p4 = IMPLICIT_DEF + ; CHECK-NEXT: $p5 = IMPLICIT_DEF + ; CHECK-NEXT: $p6 = IMPLICIT_DEF + ; CHECK-NEXT: $p7 = IMPLICIT_DEF + ; CHECK-NEXT: $p8 = IMPLICIT_DEF + ; CHECK-NEXT: $p9 = IMPLICIT_DEF + ; CHECK-NEXT: $p10 = IMPLICIT_DEF + ; CHECK-NEXT: $p11 = IMPLICIT_DEF + ; CHECK-NEXT: $p12 = IMPLICIT_DEF + ; CHECK-NEXT: $p13 = IMPLICIT_DEF + ; CHECK-NEXT: $p14 = IMPLICIT_DEF + ; CHECK-NEXT: $p15 = IMPLICIT_DEF + ; + ; CHECK-NEXT: $p15 = FILL_PPR_FROM_ZPR_SLOT_PSEUDO %stack.0, 0 :: (load (s128) from %stack.0) + ; + ; CHECK-NEXT: FAKE_USE implicit $p4, implicit $p5, implicit $p6, implicit $p7 + ; CHECK-NEXT: RET_ReallyLR implicit $p0, implicit $p1, implicit $p2, implicit $p3 + + ; EXPAND-LABEL: name: zpr_predicate_spill_above_p7 + ; EXPAND: liveins: $p0, $p1, $p2, $p3, $fp, $p15, $p14, $p13, $p12, $p11, $p10, $p9, $p8, $p7, $p6, $p5, $p4 + ; EXPAND-NEXT: {{ $}} + ; + ; EXPAND-NEXT: early-clobber $sp = frame-setup STRXpre killed $fp, $sp, -16 :: (store (s64) into %stack.13) + ; EXPAND-NEXT: $sp = frame-setup ADDVL_XXI $sp, -12, implicit $vg + ; EXPAND-NEXT: $z0 = frame-setup CPY_ZPzI_B killed $p15, 1, 0 + ; EXPAND-NEXT: frame-setup STR_ZXI $z0, $sp, 0 :: (store (s128) into %stack.12) + ; EXPAND-NEXT: $z0 = frame-setup CPY_ZPzI_B killed $p14, 1, 0 + ; EXPAND-NEXT: frame-setup STR_ZXI $z0, $sp, 1 :: (store (s128) into %stack.11) + ; EXPAND-NEXT: $z0 = frame-setup CPY_ZPzI_B killed $p13, 1, 0 + ; EXPAND-NEXT: frame-setup STR_ZXI $z0, $sp, 2 :: (store (s128) into %stack.10) + ; EXPAND-NEXT: $z0 = frame-setup CPY_ZPzI_B killed $p12, 1, 0 + ; EXPAND-NEXT: frame-setup STR_ZXI $z0, $sp, 3 :: (store (s128) into %stack.9) + ; EXPAND-NEXT: $z0 = frame-setup CPY_ZPzI_B killed $p11, 1, 0 + ; EXPAND-NEXT: frame-setup STR_ZXI $z0, $sp, 4 :: (store (s128) into %stack.8) + ; EXPAND-NEXT: $z0 = frame-setup CPY_ZPzI_B killed $p10, 1, 0 + ; EXPAND-NEXT: frame-setup STR_ZXI $z0, $sp, 5 :: (store (s128) into %stack.7) + ; EXPAND-NEXT: $z0 = frame-setup CPY_ZPzI_B killed $p9, 1, 0 + ; EXPAND-NEXT: frame-setup STR_ZXI $z0, $sp, 6 :: (store (s128) into %stack.6) + ; EXPAND-NEXT: $z0 = frame-setup CPY_ZPzI_B killed $p8, 1, 0 + ; EXPAND-NEXT: frame-setup STR_ZXI $z0, $sp, 7 :: (store (s128) into %stack.5) + ; EXPAND-NEXT: $z0 = frame-setup CPY_ZPzI_B killed $p7, 1, 0 + ; EXPAND-NEXT: frame-setup STR_ZXI $z0, $sp, 8 :: (store (s128) into %stack.4) + ; EXPAND-NEXT: $z0 = frame-setup CPY_ZPzI_B killed $p6, 1, 0 + ; EXPAND-NEXT: frame-setup STR_ZXI $z0, $sp, 9 :: (store (s128) into %stack.3) + ; EXPAND-NEXT: $z0 = frame-setup CPY_ZPzI_B killed $p5, 1, 0 + ; EXPAND-NEXT: frame-setup STR_ZXI $z0, $sp, 10 :: (store (s128) into %stack.2) + ; EXPAND-NEXT: $z0 = frame-setup CPY_ZPzI_B killed $p4, 1, 0 + ; EXPAND-NEXT: frame-setup STR_ZXI $z0, $sp, 11 :: (store (s128) into %stack.1) + ; EXPAND-NEXT: $sp = frame-setup ADDVL_XXI $sp, -2, implicit $vg + ; + ; EXPAND-NEXT: $p15 = IMPLICIT_DEF + ; + ; EXPAND-NEXT: $z0 = CPY_ZPzI_B $p15, 1, 0 + ; EXPAND-NEXT: STR_ZXI $z0, $sp, 1 :: (store (s128) into %stack.0) + ; + ; EXPAND-NEXT: $p0 = IMPLICIT_DEF + ; EXPAND-NEXT: $p1 = IMPLICIT_DEF + ; EXPAND-NEXT: $p2 = IMPLICIT_DEF + ; EXPAND-NEXT: $p3 = IMPLICIT_DEF + ; EXPAND-NEXT: $p4 = IMPLICIT_DEF + ; EXPAND-NEXT: $p5 = IMPLICIT_DEF + ; EXPAND-NEXT: $p6 = IMPLICIT_DEF + ; EXPAND-NEXT: $p7 = IMPLICIT_DEF + ; EXPAND-NEXT: $p8 = IMPLICIT_DEF + ; EXPAND-NEXT: $p9 = IMPLICIT_DEF + ; EXPAND-NEXT: $p10 = IMPLICIT_DEF + ; EXPAND-NEXT: $p11 = IMPLICIT_DEF + ; EXPAND-NEXT: $p12 = IMPLICIT_DEF + ; EXPAND-NEXT: $p13 = IMPLICIT_DEF + ; EXPAND-NEXT: $p14 = IMPLICIT_DEF + ; EXPAND-NEXT: $p15 = IMPLICIT_DEF + ; + ; EXPAND-NEXT: $z0 = CPY_ZPzI_B $p0, 1, 0 + ; EXPAND-NEXT: STR_ZXI $z0, $sp, 0 :: (store (s128) into %stack.14) + ; EXPAND-NEXT: $z0 = LDR_ZXI $sp, 1 :: (load (s128) from %stack.0) + ; EXPAND-NEXT: $p0 = PTRUE_B 31, implicit $vg + ; EXPAND-NEXT: $p15 = CMPNE_PPzZI_B $p0, $z0, 0, implicit-def $nzcv, implicit-def $nzcv + ; EXPAND-NEXT: $z0 = LDR_ZXI $sp, 0 :: (load (s128) from %stack.14) + ; EXPAND-NEXT: $p0 = PTRUE_B 31, implicit $vg + ; EXPAND-NEXT: $p0 = CMPNE_PPzZI_B $p0, $z0, 0, implicit-def $nzcv, implicit-def $nzcv + ; + ; EXPAND-NEXT: FAKE_USE implicit $p4, implicit $p5, implicit $p6, implicit $p7 + ; EXPAND-NEXT: $sp = frame-destroy ADDVL_XXI $sp, 2, implicit $vg + ; EXPAND-NEXT: $z0 = frame-destroy LDR_ZXI $sp, 0 :: (load (s128) from %stack.12) + ; EXPAND-NEXT: $p4 = frame-destroy PTRUE_B 31, implicit $vg + ; EXPAND-NEXT: $p15 = frame-destroy CMPNE_PPzZI_B $p4, $z0, 0, implicit-def $nzcv, implicit-def $nzcv + ; EXPAND-NEXT: $z0 = frame-destroy LDR_ZXI $sp, 1 :: (load (s128) from %stack.11) + ; EXPAND-NEXT: $p4 = frame-destroy PTRUE_B 31, implicit $vg + ; EXPAND-NEXT: $p14 = frame-destroy CMPNE_PPzZI_B $p4, $z0, 0, implicit-def $nzcv, implicit-def $nzcv + ; EXPAND-NEXT: $z0 = frame-destroy LDR_ZXI $sp, 2 :: (load (s128) from %stack.10) + ; EXPAND-NEXT: $p4 = frame-destroy PTRUE_B 31, implicit $vg + ; EXPAND-NEXT: $p13 = frame-destroy CMPNE_PPzZI_B $p4, $z0, 0, implicit-def $nzcv, implicit-def $nzcv + ; EXPAND-NEXT: $z0 = frame-destroy LDR_ZXI $sp, 3 :: (load (s128) from %stack.9) + ; EXPAND-NEXT: $p4 = frame-destroy PTRUE_B 31, implicit $vg + ; EXPAND-NEXT: $p12 = frame-destroy CMPNE_PPzZI_B $p4, $z0, 0, implicit-def $nzcv, implicit-def $nzcv + ; EXPAND-NEXT: $z0 = frame-destroy LDR_ZXI $sp, 4 :: (load (s128) from %stack.8) + ; EXPAND-NEXT: $p4 = frame-destroy PTRUE_B 31, implicit $vg + ; EXPAND-NEXT: $p11 = frame-destroy CMPNE_PPzZI_B $p4, $z0, 0, implicit-def $nzcv, implicit-def $nzcv + ; EXPAND-NEXT: $z0 = frame-destroy LDR_ZXI $sp, 5 :: (load (s128) from %stack.7) + ; EXPAND-NEXT: $p4 = frame-destroy PTRUE_B 31, implicit $vg + ; EXPAND-NEXT: $p10 = frame-destroy CMPNE_PPzZI_B $p4, $z0, 0, implicit-def $nzcv, implicit-def $nzcv + ; EXPAND-NEXT: $z0 = frame-destroy LDR_ZXI $sp, 6 :: (load (s128) from %stack.6) + ; EXPAND-NEXT: $p4 = frame-destroy PTRUE_B 31, implicit $vg + ; EXPAND-NEXT: $p9 = frame-destroy CMPNE_PPzZI_B $p4, $z0, 0, implicit-def $nzcv, implicit-def $nzcv + ; EXPAND-NEXT: $z0 = frame-destroy LDR_ZXI $sp, 7 :: (load (s128) from %stack.5) + ; EXPAND-NEXT: $p4 = frame-destroy PTRUE_B 31, implicit $vg + ; EXPAND-NEXT: $p8 = frame-destroy CMPNE_PPzZI_B $p4, $z0, 0, implicit-def $nzcv, implicit-def $nzcv + ; EXPAND-NEXT: $z0 = frame-destroy LDR_ZXI $sp, 8 :: (load (s128) from %stack.4) + ; EXPAND-NEXT: $p7 = frame-destroy PTRUE_B 31, implicit $vg + ; EXPAND-NEXT: $p7 = frame-destroy CMPNE_PPzZI_B $p7, $z0, 0, implicit-def $nzcv, implicit-def $nzcv + ; EXPAND-NEXT: $z0 = frame-destroy LDR_ZXI $sp, 9 :: (load (s128) from %stack.3) + ; EXPAND-NEXT: $p6 = frame-destroy PTRUE_B 31, implicit $vg + ; EXPAND-NEXT: $p6 = frame-destroy CMPNE_PPzZI_B $p6, $z0, 0, implicit-def $nzcv, implicit-def $nzcv + ; EXPAND-NEXT: $z0 = frame-destroy LDR_ZXI $sp, 10 :: (load (s128) from %stack.2) + ; EXPAND-NEXT: $p5 = frame-destroy PTRUE_B 31, implicit $vg + ; EXPAND-NEXT: $p5 = frame-destroy CMPNE_PPzZI_B $p5, $z0, 0, implicit-def $nzcv, implicit-def $nzcv + ; EXPAND-NEXT: $z0 = frame-destroy LDR_ZXI $sp, 11 :: (load (s128) from %stack.1) + ; EXPAND-NEXT: $p4 = frame-destroy PTRUE_B 31, implicit $vg + ; EXPAND-NEXT: $p4 = frame-destroy CMPNE_PPzZI_B $p4, $z0, 0, implicit-def $nzcv, implicit-def $nzcv + ; EXPAND-NEXT: $sp = frame-destroy ADDVL_XXI $sp, 12, implicit $vg + ; EXPAND-NEXT: early-clobber $sp, $fp = frame-destroy LDRXpost $sp, 16 :: (load (s64) from %stack.13) + ; EXPAND-NEXT: RET undef $lr, implicit $p0, implicit $p1, implicit $p2, implicit $p3 + $p15 = IMPLICIT_DEF + %1:ppr = COPY $p15 + + $p0 = IMPLICIT_DEF + $p1 = IMPLICIT_DEF + $p2 = IMPLICIT_DEF + $p3 = IMPLICIT_DEF + $p4 = IMPLICIT_DEF + $p5 = IMPLICIT_DEF + $p6 = IMPLICIT_DEF + $p7 = IMPLICIT_DEF + $p8 = IMPLICIT_DEF + $p9 = IMPLICIT_DEF + $p10 = IMPLICIT_DEF + $p11 = IMPLICIT_DEF + $p12 = IMPLICIT_DEF + $p13 = IMPLICIT_DEF + $p14 = IMPLICIT_DEF + $p15 = IMPLICIT_DEF + + $p15 = COPY %1 + + FAKE_USE implicit $p4, implicit $p5, implicit $p6, implicit $p7 + + RET_ReallyLR implicit $p0, implicit $p1, implicit $p2, implicit $p3 +... +--- +name: zpr_predicate_spill_p4_saved +tracksRegLiveness: true +stack: +liveins: + - { reg: '$p0' } + - { reg: '$p1' } + - { reg: '$p2' } + - { reg: '$p3' } +body: | + bb.0.entry: + liveins: $p0, $p1, $p2, $p3 + + ; CHECK-LABEL: name: zpr_predicate_spill_p4_saved + ; CHECK: liveins: $p0, $p1, $p2, $p3 + ; CHECK-NEXT: {{ $}} + ; + ; CHECK-NEXT: $p8 = IMPLICIT_DEF + ; + ; CHECK-NEXT: RET_ReallyLR implicit $p0, implicit $p1, implicit $p2, implicit $p3 + + ; EXPAND-LABEL: name: zpr_predicate_spill_p4_saved + ; EXPAND: liveins: $p0, $p1, $p2, $p3, $fp, $p8, $p4 + ; EXPAND-NEXT: {{ $}} + ; EXPAND-NEXT: early-clobber $sp = frame-setup STRXpre killed $fp, $sp, -16 :: (store (s64) into %stack.2) + ; EXPAND-NEXT: $sp = frame-setup ADDVL_XXI $sp, -2, implicit $vg + ; EXPAND-NEXT: $z0 = frame-setup CPY_ZPzI_B killed $p8, 1, 0 + ; EXPAND-NEXT: frame-setup STR_ZXI $z0, $sp, 0 :: (store (s128) into %stack.1) + ; EXPAND-NEXT: $z0 = frame-setup CPY_ZPzI_B killed $p4, 1, 0 + ; EXPAND-NEXT: frame-setup STR_ZXI $z0, $sp, 1 :: (store (s128) into %stack.0) + ; + ; EXPAND-NEXT: $p8 = IMPLICIT_DEF + ; + ; EXPAND-NEXT: $z0 = frame-destroy LDR_ZXI $sp, 0 :: (load (s128) from %stack.1) + ; EXPAND-NEXT: $p4 = frame-destroy PTRUE_B 31, implicit $vg + ; EXPAND-NEXT: $p8 = frame-destroy CMPNE_PPzZI_B $p4, $z0, 0, implicit-def $nzcv, implicit-def $nzcv + ; EXPAND-NEXT: $z0 = frame-destroy LDR_ZXI $sp, 1 :: (load (s128) from %stack.0) + ; EXPAND-NEXT: $p4 = frame-destroy PTRUE_B 31, implicit $vg + ; EXPAND-NEXT: $p4 = frame-destroy CMPNE_PPzZI_B $p4, $z0, 0, implicit-def $nzcv, implicit-def $nzcv + ; EXPAND-NEXT: $sp = frame-destroy ADDVL_XXI $sp, 2, implicit $vg + ; EXPAND-NEXT: early-clobber $sp, $fp = frame-destroy LDRXpost $sp, 16 :: (load (s64) from %stack.2) + ; EXPAND-NEXT: RET undef $lr, implicit $p0, implicit $p1, implicit $p2, implicit $p3 + + ; If we spill a register above p8, p4 must also be saved, so we can guarantee + ; they will be a register (in the range p0-p7 to for the cmpne reload). + $p8 = IMPLICIT_DEF + + RET_ReallyLR implicit $p0, implicit $p1, implicit $p2, implicit $p3 +... diff --git a/llvm/test/CodeGen/AArch64/ssve-stack-hazard-remarks.ll b/llvm/test/CodeGen/AArch64/ssve-stack-hazard-remarks.ll index 0b6bf3892a0c2..c67d91952c618 100644 --- a/llvm/test/CodeGen/AArch64/ssve-stack-hazard-remarks.ll +++ b/llvm/test/CodeGen/AArch64/ssve-stack-hazard-remarks.ll @@ -1,5 +1,7 @@ ; RUN: llc < %s -mtriple=aarch64 -mattr=+sve2 -pass-remarks-analysis=sme -aarch64-stack-hazard-remark-size=64 -o /dev/null < %s 2>&1 | FileCheck %s --check-prefixes=CHECK ; RUN: llc < %s -mtriple=aarch64 -mattr=+sve2 -pass-remarks-analysis=sme -aarch64-stack-hazard-size=1024 -o /dev/null < %s 2>&1 | FileCheck %s --check-prefixes=CHECK-PADDING +; RUN: llc < %s -mtriple=aarch64 -mattr=+sve2 -pass-remarks-analysis=sme -aarch64-enable-zpr-predicate-spills -aarch64-stack-hazard-remark-size=64 -o /dev/null < %s 2>&1 | FileCheck %s --check-prefixes=CHECK-ZPR-PRED-SPILLS +; RUN: llc < %s -mtriple=aarch64 -mattr=+sve2 -pass-remarks-analysis=sme -aarch64-enable-zpr-predicate-spills -aarch64-stack-hazard-size=1024 -o /dev/null < %s 2>&1 | FileCheck %s --check-prefixes=CHECK-ZPR-PRED-SPILLS-WITH-PADDING ; Don't emit remarks for non-streaming functions. define float @csr_x20_stackargs_notsc(float %a, float %b, float %c, float %d, float %e, float %f, float %g, float %h, float %i) { @@ -66,13 +68,18 @@ entry: } ; SVE calling conventions -; Predicate register spills end up in FP region, currently. +; Predicate register spills end up in FP region, currently. This can be +; mitigated with the -aarch64-enable-zpr-predicate-spills option. define i32 @svecc_call(<4 x i16> %P0, ptr %P1, i32 %P2, %P3, i16 %P4) #2 { ; CHECK: remark: :0:0: stack hazard in 'svecc_call': PPR stack object at [SP-48-258 * vscale] is too close to FPR stack object at [SP-48-256 * vscale] ; CHECK: remark: :0:0: stack hazard in 'svecc_call': FPR stack object at [SP-48-16 * vscale] is too close to GPR stack object at [SP-48] ; CHECK-PADDING: remark: :0:0: stack hazard in 'svecc_call': PPR stack object at [SP-1072-258 * vscale] is too close to FPR stack object at [SP-1072-256 * vscale] ; CHECK-PADDING-NOT: remark: :0:0: stack hazard in 'svecc_call': +; CHECK-ZPR-PRED-SPILLS-NOT: :0:0: stack hazard in 'svecc_call': PPR stack object at {{.*}} is too close to FPR stack object +; CHECK-ZPR-PRED-SPILLS: :0:0: stack hazard in 'svecc_call': FPR stack object at [SP-48-16 * vscale] is too close to GPR stack object at [SP-48] +; CHECK-ZPR-PRED-SPILLS-WITH-PADDING-NOT: :0:0: stack hazard in 'svecc_call': PPR stack object at {{.*}} is too close to FPR stack object +; CHECK-ZPR-PRED-SPILLS-WITH-PADDING-NOT: :0:0: stack hazard in 'svecc_call': FPR stack object at {{.*}} is too close to GPR stack object entry: tail call void asm sideeffect "", "~{x0},~{x28},~{x27},~{x3}"() #2 %call = call ptr @memset(ptr noundef nonnull %P1, i32 noundef 45, i32 noundef 37) @@ -84,6 +91,10 @@ define i32 @svecc_alloca_call(<4 x i16> %P0, ptr %P1, i32 %P2, :0:0: stack hazard in 'svecc_alloca_call': FPR stack object at [SP-48-16 * vscale] is too close to GPR stack object at [SP-48] ; CHECK-PADDING: remark: :0:0: stack hazard in 'svecc_alloca_call': PPR stack object at [SP-1072-258 * vscale] is too close to FPR stack object at [SP-1072-256 * vscale] ; CHECK-PADDING-NOT: remark: :0:0: stack hazard in 'svecc_alloca_call': +; CHECK-ZPR-PRED-SPILLS-NOT: :0:0: stack hazard in 'svecc_call': PPR stack object at {{.*}} is too close to FPR stack object +; CHECK-ZPR-PRED-SPILLS: :0:0: stack hazard in 'svecc_alloca_call': FPR stack object at [SP-48-16 * vscale] is too close to GPR stack object at [SP-48] +; CHECK-ZPR-PRED-SPILLS-WITH-PADDING-NOT: :0:0: stack hazard in 'svecc_alloca_call': PPR stack object at {{.*}} is too close to FPR stack object +; CHECK-ZPR-PRED-SPILLS-WITH-PADDING-NOT: :0:0: stack hazard in 'svecc_alloca_call': FPR stack object at {{.*}} is too close to GPR stack object entry: tail call void asm sideeffect "", "~{x0},~{x28},~{x27},~{x3}"() #2 %0 = alloca [37 x i8], align 16 From d3ff57971a655f5424cc86a6e2535e2bf31d719d Mon Sep 17 00:00:00 2001 From: Benjamin Maxwell Date: Thu, 23 Jan 2025 16:45:52 +0000 Subject: [PATCH 2/8] Fixups --- .../Target/AArch64/AArch64FrameLowering.cpp | 27 +- llvm/lib/Target/AArch64/AArch64Subtarget.cpp | 2 +- .../AArch64/spill-fill-zpr-predicates.mir | 388 ++++++++++-------- 3 files changed, 223 insertions(+), 194 deletions(-) diff --git a/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp b/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp index 9b852e2c59564..1fb8fc759b3f7 100644 --- a/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp @@ -4336,13 +4336,9 @@ static bool expandFillPPRFromZPRSlotPseudo( UsedRegs, ZPRRegs, SpillSlots.ZPRSpillFI); Register PredReg = AArch64::NoRegister; - std::optional FindPPR3bReg; - if (AArch64::PPR_3bRegClass.contains(MI.getOperand(0).getReg())) - PredReg = MI.getOperand(0).getReg(); - else - FindPPR3bReg.emplace(MF, MBB, MachineBasicBlock::iterator(MI), PredReg, - AArch64::P0, AArch64::PPR_3bRegClass, UsedRegs, - PPR3bRegs, SpillSlots.PPRSpillFI); + ScopedScavengeOrSpill FindPPR3bReg( + MF, MBB, MachineBasicBlock::iterator(MI), PredReg, AArch64::P0, + AArch64::PPR_3bRegClass, UsedRegs, PPR3bRegs, SpillSlots.PPRSpillFI); // Elide NZCV spills if we know it is not used. Register NZCVSaveReg = AArch64::NoRegister; @@ -4354,8 +4350,7 @@ static bool expandFillPPRFromZPRSlotPseudo( SpillSlots.GPRSpillFI); #ifndef NDEBUG - bool Spilled = FindZPRReg.hasSpilled() || - (FindPPR3bReg && FindPPR3bReg->hasSpilled()) || + bool Spilled = FindZPRReg.hasSpilled() || FindPPR3bReg.hasSpilled() || (FindGPRReg && FindGPRReg->hasSpilled()); bool InPrologueOrEpilogue = MI.getFlag(MachineInstr::FrameSetup) || MI.getFlag(MachineInstr::FrameDestroy); @@ -4397,7 +4392,7 @@ static bool expandFillPPRFromZPRSlotPseudo( .getInstr()); propagateFrameFlags(MI, MachineInstrs); - return FindPPR3bReg && FindPPR3bReg->hasSpilled(); + return FindPPR3bReg.hasSpilled(); } /// Expands all FILL_PPR_FROM_ZPR_SLOT_PSEUDO and SPILL_PPR_TO_ZPR_SLOT_PSEUDO @@ -4450,6 +4445,7 @@ void AArch64FrameLowering::processFunctionBeforeFrameFinalized( if (CSRMask) ScavengeableRegs.clearBitsInMask(CSRMask); // TODO: Allow reusing callee-saved registers that have been saved. + assert(ScavengeableRegs.count() > 0 && "Expected scavengeable registers"); return ScavengeableRegs; }; @@ -4475,9 +4471,15 @@ void AArch64FrameLowering::processFunctionBeforeFrameFinalized( EmergencyStackSlots SpillSlots; for (MachineBasicBlock &MBB : MF) { + // In the case we had to spill a predicate (in the range p0-p7) to reload + // a predicate (>= p8), additional spill/fill pseudos will be created. + // These need an additional expansion pass. Note: There will only be at + // most two expansion passes, as spilling/filling a predicate in the range + // p0-p7 never requires spilling another predicate. for (int Pass = 0; Pass < 2; Pass++) { bool HasPPRSpills = expandSMEPPRToZPRSpillPseudos( MBB, TRI, ZPRRegs, PPR3bRegs, GPRRegs, SpillSlots); + assert((Pass == 0 || !HasPPRSpills) && "Did not expect PPR spills"); if (!HasPPRSpills) break; } @@ -5528,9 +5530,8 @@ void AArch64FrameLowering::emitRemarks( if (MFI.getStackID(FrameIdx) == TargetStackID::ScalableVector) { // SPILL_PPR_TO_ZPR_SLOT_PSEUDO and FILL_PPR_FROM_ZPR_SLOT_PSEUDO // spill/fill the predicate as a data vector (so are an FPR acess). - if (!is_contained({AArch64::SPILL_PPR_TO_ZPR_SLOT_PSEUDO, - AArch64::FILL_PPR_FROM_ZPR_SLOT_PSEUDO}, - MI.getOpcode()) && + if (MI.getOpcode() != AArch64::SPILL_PPR_TO_ZPR_SLOT_PSEUDO && + MI.getOpcode() != AArch64::FILL_PPR_FROM_ZPR_SLOT_PSEUDO && AArch64::PPRRegClass.contains(MI.getOperand(0).getReg())) RegTy = StackAccess::PPR; else diff --git a/llvm/lib/Target/AArch64/AArch64Subtarget.cpp b/llvm/lib/Target/AArch64/AArch64Subtarget.cpp index 5864f57582e21..34d05c6457e05 100644 --- a/llvm/lib/Target/AArch64/AArch64Subtarget.cpp +++ b/llvm/lib/Target/AArch64/AArch64Subtarget.cpp @@ -414,7 +414,7 @@ unsigned AArch64Subtarget::getHwModeSet() const { // // FIXME: This overrides the table-gen'd `getHwModeSet()` which only looks at // CPU features. - if (EnableZPRPredicateSpills.getValue() && + if (EnableZPRPredicateSpills.getValue() && getStreamingHazardSize() > 0 && (isStreaming() || isStreamingCompatible())) { Modes |= (1 << 0); } diff --git a/llvm/test/CodeGen/AArch64/spill-fill-zpr-predicates.mir b/llvm/test/CodeGen/AArch64/spill-fill-zpr-predicates.mir index a432a61384e42..8aa957f04efc0 100644 --- a/llvm/test/CodeGen/AArch64/spill-fill-zpr-predicates.mir +++ b/llvm/test/CodeGen/AArch64/spill-fill-zpr-predicates.mir @@ -31,7 +31,6 @@ body: | liveins: $p0 ; CHECK-LABEL: name: zpr_predicate_spill - ; CHECK: stack: ; CHECK: - { id: 0, name: '', type: spill-slot, offset: 0, size: 16, alignment: 16, ; CHECK-NEXT: stack-id: scalable-vector, callee-saved-register: ; CHECK: liveins: $p0 @@ -57,42 +56,46 @@ body: | ; CHECK-NEXT: $p15 = IMPLICIT_DEF ; ; CHECK-NEXT: $p0 = FILL_PPR_FROM_ZPR_SLOT_PSEUDO %stack.0, 0 :: (load (s128) from %stack.0) + ; ; CHECK-NEXT: RET_ReallyLR implicit $p0 ; EXPAND-LABEL: name: zpr_predicate_spill ; EXPAND: liveins: $p0, $fp, $p15, $p14, $p13, $p12, $p11, $p10, $p9, $p8, $p7, $p6, $p5, $p4 ; EXPAND-NEXT: {{ $}} ; - ; EXPAND-NEXT: early-clobber $sp = frame-setup STRXpre killed $fp, $sp, -16 :: (store (s64) into %stack.13) + ; EXPAND-NEXT: $sp = frame-setup SUBXri $sp, 1040, 0 + ; EXPAND-NEXT: frame-setup STRXui killed $fp, $sp, 128 :: (store (s64) into %stack.14) ; EXPAND-NEXT: $sp = frame-setup ADDVL_XXI $sp, -12, implicit $vg ; EXPAND-NEXT: $z0 = frame-setup CPY_ZPzI_B killed $p15, 1, 0 - ; EXPAND-NEXT: frame-setup STR_ZXI $z0, $sp, 0 :: (store (s128) into %stack.12) + ; EXPAND-NEXT: frame-setup STR_ZXI $z0, $sp, 0 :: (store (s128) into %stack.13) ; EXPAND-NEXT: $z0 = frame-setup CPY_ZPzI_B killed $p14, 1, 0 - ; EXPAND-NEXT: frame-setup STR_ZXI $z0, $sp, 1 :: (store (s128) into %stack.11) + ; EXPAND-NEXT: frame-setup STR_ZXI $z0, $sp, 1 :: (store (s128) into %stack.12) ; EXPAND-NEXT: $z0 = frame-setup CPY_ZPzI_B killed $p13, 1, 0 - ; EXPAND-NEXT: frame-setup STR_ZXI $z0, $sp, 2 :: (store (s128) into %stack.10) + ; EXPAND-NEXT: frame-setup STR_ZXI $z0, $sp, 2 :: (store (s128) into %stack.11) ; EXPAND-NEXT: $z0 = frame-setup CPY_ZPzI_B killed $p12, 1, 0 - ; EXPAND-NEXT: frame-setup STR_ZXI $z0, $sp, 3 :: (store (s128) into %stack.9) + ; EXPAND-NEXT: frame-setup STR_ZXI $z0, $sp, 3 :: (store (s128) into %stack.10) ; EXPAND-NEXT: $z0 = frame-setup CPY_ZPzI_B killed $p11, 1, 0 - ; EXPAND-NEXT: frame-setup STR_ZXI $z0, $sp, 4 :: (store (s128) into %stack.8) + ; EXPAND-NEXT: frame-setup STR_ZXI $z0, $sp, 4 :: (store (s128) into %stack.9) ; EXPAND-NEXT: $z0 = frame-setup CPY_ZPzI_B killed $p10, 1, 0 - ; EXPAND-NEXT: frame-setup STR_ZXI $z0, $sp, 5 :: (store (s128) into %stack.7) + ; EXPAND-NEXT: frame-setup STR_ZXI $z0, $sp, 5 :: (store (s128) into %stack.8) ; EXPAND-NEXT: $z0 = frame-setup CPY_ZPzI_B killed $p9, 1, 0 - ; EXPAND-NEXT: frame-setup STR_ZXI $z0, $sp, 6 :: (store (s128) into %stack.6) + ; EXPAND-NEXT: frame-setup STR_ZXI $z0, $sp, 6 :: (store (s128) into %stack.7) ; EXPAND-NEXT: $z0 = frame-setup CPY_ZPzI_B killed $p8, 1, 0 - ; EXPAND-NEXT: frame-setup STR_ZXI $z0, $sp, 7 :: (store (s128) into %stack.5) + ; EXPAND-NEXT: frame-setup STR_ZXI $z0, $sp, 7 :: (store (s128) into %stack.6) ; EXPAND-NEXT: $z0 = frame-setup CPY_ZPzI_B killed $p7, 1, 0 - ; EXPAND-NEXT: frame-setup STR_ZXI $z0, $sp, 8 :: (store (s128) into %stack.4) + ; EXPAND-NEXT: frame-setup STR_ZXI $z0, $sp, 8 :: (store (s128) into %stack.5) ; EXPAND-NEXT: $z0 = frame-setup CPY_ZPzI_B killed $p6, 1, 0 - ; EXPAND-NEXT: frame-setup STR_ZXI $z0, $sp, 9 :: (store (s128) into %stack.3) + ; EXPAND-NEXT: frame-setup STR_ZXI $z0, $sp, 9 :: (store (s128) into %stack.4) ; EXPAND-NEXT: $z0 = frame-setup CPY_ZPzI_B killed $p5, 1, 0 - ; EXPAND-NEXT: frame-setup STR_ZXI $z0, $sp, 10 :: (store (s128) into %stack.2) + ; EXPAND-NEXT: frame-setup STR_ZXI $z0, $sp, 10 :: (store (s128) into %stack.3) ; EXPAND-NEXT: $z0 = frame-setup CPY_ZPzI_B killed $p4, 1, 0 - ; EXPAND-NEXT: frame-setup STR_ZXI $z0, $sp, 11 :: (store (s128) into %stack.1) + ; EXPAND-NEXT: frame-setup STR_ZXI $z0, $sp, 11 :: (store (s128) into %stack.2) + ; EXPAND-NEXT: $sp = frame-setup SUBXri $sp, 1024, 0 ; EXPAND-NEXT: $sp = frame-setup ADDVL_XXI $sp, -1, implicit $vg ; ; EXPAND-NEXT: $z0 = CPY_ZPzI_B $p0, 1, 0 - ; EXPAND-NEXT: STR_ZXI $z0, $sp, 0 :: (store (s128) into %stack.0) + ; EXPAND-NEXT: $x8 = ADDXri $sp, 1024, 0 + ; EXPAND-NEXT: STR_ZXI $z0, $x8, 0 :: (store (s128) into %stack.0) ; ; EXPAND-NEXT: $p0 = IMPLICIT_DEF ; EXPAND-NEXT: $p1 = IMPLICIT_DEF @@ -111,49 +114,51 @@ body: | ; EXPAND-NEXT: $p14 = IMPLICIT_DEF ; EXPAND-NEXT: $p15 = IMPLICIT_DEF ; - ; EXPAND-NEXT: $z0 = LDR_ZXI $sp, 0 :: (load (s128) from %stack.0) + ; EXPAND-NEXT: $z0 = LDR_ZXI killed $x8, 0 :: (load (s128) from %stack.0) ; EXPAND-NEXT: $p0 = PTRUE_B 31, implicit $vg ; EXPAND-NEXT: $p0 = CMPNE_PPzZI_B $p0, $z0, 0, implicit-def $nzcv, implicit-def $nzcv ; + ; EXPAND-NEXT: $sp = frame-destroy ADDXri $sp, 1024, 0 ; EXPAND-NEXT: $sp = frame-destroy ADDVL_XXI $sp, 1, implicit $vg - ; EXPAND-NEXT: $z0 = frame-destroy LDR_ZXI $sp, 0 :: (load (s128) from %stack.12) + ; EXPAND-NEXT: $z0 = frame-destroy LDR_ZXI $sp, 0 :: (load (s128) from %stack.13) ; EXPAND-NEXT: $p1 = frame-destroy PTRUE_B 31, implicit $vg ; EXPAND-NEXT: $p15 = frame-destroy CMPNE_PPzZI_B $p1, $z0, 0, implicit-def $nzcv, implicit-def $nzcv - ; EXPAND-NEXT: $z0 = frame-destroy LDR_ZXI $sp, 1 :: (load (s128) from %stack.11) + ; EXPAND-NEXT: $z0 = frame-destroy LDR_ZXI $sp, 1 :: (load (s128) from %stack.12) ; EXPAND-NEXT: $p1 = frame-destroy PTRUE_B 31, implicit $vg ; EXPAND-NEXT: $p14 = frame-destroy CMPNE_PPzZI_B $p1, $z0, 0, implicit-def $nzcv, implicit-def $nzcv - ; EXPAND-NEXT: $z0 = frame-destroy LDR_ZXI $sp, 2 :: (load (s128) from %stack.10) + ; EXPAND-NEXT: $z0 = frame-destroy LDR_ZXI $sp, 2 :: (load (s128) from %stack.11) ; EXPAND-NEXT: $p1 = frame-destroy PTRUE_B 31, implicit $vg ; EXPAND-NEXT: $p13 = frame-destroy CMPNE_PPzZI_B $p1, $z0, 0, implicit-def $nzcv, implicit-def $nzcv - ; EXPAND-NEXT: $z0 = frame-destroy LDR_ZXI $sp, 3 :: (load (s128) from %stack.9) + ; EXPAND-NEXT: $z0 = frame-destroy LDR_ZXI $sp, 3 :: (load (s128) from %stack.10) ; EXPAND-NEXT: $p1 = frame-destroy PTRUE_B 31, implicit $vg ; EXPAND-NEXT: $p12 = frame-destroy CMPNE_PPzZI_B $p1, $z0, 0, implicit-def $nzcv, implicit-def $nzcv - ; EXPAND-NEXT: $z0 = frame-destroy LDR_ZXI $sp, 4 :: (load (s128) from %stack.8) + ; EXPAND-NEXT: $z0 = frame-destroy LDR_ZXI $sp, 4 :: (load (s128) from %stack.9) ; EXPAND-NEXT: $p1 = frame-destroy PTRUE_B 31, implicit $vg ; EXPAND-NEXT: $p11 = frame-destroy CMPNE_PPzZI_B $p1, $z0, 0, implicit-def $nzcv, implicit-def $nzcv - ; EXPAND-NEXT: $z0 = frame-destroy LDR_ZXI $sp, 5 :: (load (s128) from %stack.7) + ; EXPAND-NEXT: $z0 = frame-destroy LDR_ZXI $sp, 5 :: (load (s128) from %stack.8) ; EXPAND-NEXT: $p1 = frame-destroy PTRUE_B 31, implicit $vg ; EXPAND-NEXT: $p10 = frame-destroy CMPNE_PPzZI_B $p1, $z0, 0, implicit-def $nzcv, implicit-def $nzcv - ; EXPAND-NEXT: $z0 = frame-destroy LDR_ZXI $sp, 6 :: (load (s128) from %stack.6) + ; EXPAND-NEXT: $z0 = frame-destroy LDR_ZXI $sp, 6 :: (load (s128) from %stack.7) ; EXPAND-NEXT: $p1 = frame-destroy PTRUE_B 31, implicit $vg ; EXPAND-NEXT: $p9 = frame-destroy CMPNE_PPzZI_B $p1, $z0, 0, implicit-def $nzcv, implicit-def $nzcv - ; EXPAND-NEXT: $z0 = frame-destroy LDR_ZXI $sp, 7 :: (load (s128) from %stack.5) + ; EXPAND-NEXT: $z0 = frame-destroy LDR_ZXI $sp, 7 :: (load (s128) from %stack.6) ; EXPAND-NEXT: $p1 = frame-destroy PTRUE_B 31, implicit $vg ; EXPAND-NEXT: $p8 = frame-destroy CMPNE_PPzZI_B $p1, $z0, 0, implicit-def $nzcv, implicit-def $nzcv - ; EXPAND-NEXT: $z0 = frame-destroy LDR_ZXI $sp, 8 :: (load (s128) from %stack.4) + ; EXPAND-NEXT: $z0 = frame-destroy LDR_ZXI $sp, 8 :: (load (s128) from %stack.5) ; EXPAND-NEXT: $p7 = frame-destroy PTRUE_B 31, implicit $vg ; EXPAND-NEXT: $p7 = frame-destroy CMPNE_PPzZI_B $p7, $z0, 0, implicit-def $nzcv, implicit-def $nzcv - ; EXPAND-NEXT: $z0 = frame-destroy LDR_ZXI $sp, 9 :: (load (s128) from %stack.3) + ; EXPAND-NEXT: $z0 = frame-destroy LDR_ZXI $sp, 9 :: (load (s128) from %stack.4) ; EXPAND-NEXT: $p6 = frame-destroy PTRUE_B 31, implicit $vg ; EXPAND-NEXT: $p6 = frame-destroy CMPNE_PPzZI_B $p6, $z0, 0, implicit-def $nzcv, implicit-def $nzcv - ; EXPAND-NEXT: $z0 = frame-destroy LDR_ZXI $sp, 10 :: (load (s128) from %stack.2) + ; EXPAND-NEXT: $z0 = frame-destroy LDR_ZXI $sp, 10 :: (load (s128) from %stack.3) ; EXPAND-NEXT: $p5 = frame-destroy PTRUE_B 31, implicit $vg ; EXPAND-NEXT: $p5 = frame-destroy CMPNE_PPzZI_B $p5, $z0, 0, implicit-def $nzcv, implicit-def $nzcv - ; EXPAND-NEXT: $z0 = frame-destroy LDR_ZXI $sp, 11 :: (load (s128) from %stack.1) + ; EXPAND-NEXT: $z0 = frame-destroy LDR_ZXI $sp, 11 :: (load (s128) from %stack.2) ; EXPAND-NEXT: $p4 = frame-destroy PTRUE_B 31, implicit $vg ; EXPAND-NEXT: $p4 = frame-destroy CMPNE_PPzZI_B $p4, $z0, 0, implicit-def $nzcv, implicit-def $nzcv ; EXPAND-NEXT: $sp = frame-destroy ADDVL_XXI $sp, 12, implicit $vg - ; EXPAND-NEXT: early-clobber $sp, $fp = frame-destroy LDRXpost $sp, 16 :: (load (s64) from %stack.13) + ; EXPAND-NEXT: $fp = frame-destroy LDRXui $sp, 128 :: (load (s64) from %stack.14) + ; EXPAND-NEXT: $sp = frame-destroy ADDXri $sp, 1040, 0 ; EXPAND-NEXT: RET undef $lr, implicit $p0 %1:ppr = COPY $p0 @@ -189,7 +194,6 @@ body: | liveins: $p0 ; CHECK-LABEL: name: zpr_predicate_spill__save_restore_nzcv - ; CHECK: stack: ; CHECK: - { id: 0, name: '', type: spill-slot, offset: 0, size: 16, alignment: 16, ; CHECK-NEXT: stack-id: scalable-vector, callee-saved-register: ; CHECK: liveins: $p0 @@ -219,44 +223,48 @@ body: | ; CHECK-NEXT: $p0 = FILL_PPR_FROM_ZPR_SLOT_PSEUDO %stack.0, 0 :: (load (s128) from %stack.0) ; ; CHECK-NEXT: FAKE_USE implicit $nzcv + ; ; CHECK-NEXT: RET_ReallyLR implicit $p0 ; EXPAND-LABEL: name: zpr_predicate_spill__save_restore_nzcv ; EXPAND: liveins: $p0, $fp, $p15, $p14, $p13, $p12, $p11, $p10, $p9, $p8, $p7, $p6, $p5, $p4 ; EXPAND-NEXT: {{ $}} ; - ; EXPAND-NEXT: early-clobber $sp = frame-setup STRXpre killed $fp, $sp, -16 :: (store (s64) into %stack.13) + ; EXPAND-NEXT: $sp = frame-setup SUBXri $sp, 1040, 0 + ; EXPAND-NEXT: frame-setup STRXui killed $fp, $sp, 128 :: (store (s64) into %stack.14) ; EXPAND-NEXT: $sp = frame-setup ADDVL_XXI $sp, -12, implicit $vg ; EXPAND-NEXT: $z0 = frame-setup CPY_ZPzI_B killed $p15, 1, 0 - ; EXPAND-NEXT: frame-setup STR_ZXI $z0, $sp, 0 :: (store (s128) into %stack.12) + ; EXPAND-NEXT: frame-setup STR_ZXI $z0, $sp, 0 :: (store (s128) into %stack.13) ; EXPAND-NEXT: $z0 = frame-setup CPY_ZPzI_B killed $p14, 1, 0 - ; EXPAND-NEXT: frame-setup STR_ZXI $z0, $sp, 1 :: (store (s128) into %stack.11) + ; EXPAND-NEXT: frame-setup STR_ZXI $z0, $sp, 1 :: (store (s128) into %stack.12) ; EXPAND-NEXT: $z0 = frame-setup CPY_ZPzI_B killed $p13, 1, 0 - ; EXPAND-NEXT: frame-setup STR_ZXI $z0, $sp, 2 :: (store (s128) into %stack.10) + ; EXPAND-NEXT: frame-setup STR_ZXI $z0, $sp, 2 :: (store (s128) into %stack.11) ; EXPAND-NEXT: $z0 = frame-setup CPY_ZPzI_B killed $p12, 1, 0 - ; EXPAND-NEXT: frame-setup STR_ZXI $z0, $sp, 3 :: (store (s128) into %stack.9) + ; EXPAND-NEXT: frame-setup STR_ZXI $z0, $sp, 3 :: (store (s128) into %stack.10) ; EXPAND-NEXT: $z0 = frame-setup CPY_ZPzI_B killed $p11, 1, 0 - ; EXPAND-NEXT: frame-setup STR_ZXI $z0, $sp, 4 :: (store (s128) into %stack.8) + ; EXPAND-NEXT: frame-setup STR_ZXI $z0, $sp, 4 :: (store (s128) into %stack.9) ; EXPAND-NEXT: $z0 = frame-setup CPY_ZPzI_B killed $p10, 1, 0 - ; EXPAND-NEXT: frame-setup STR_ZXI $z0, $sp, 5 :: (store (s128) into %stack.7) + ; EXPAND-NEXT: frame-setup STR_ZXI $z0, $sp, 5 :: (store (s128) into %stack.8) ; EXPAND-NEXT: $z0 = frame-setup CPY_ZPzI_B killed $p9, 1, 0 - ; EXPAND-NEXT: frame-setup STR_ZXI $z0, $sp, 6 :: (store (s128) into %stack.6) + ; EXPAND-NEXT: frame-setup STR_ZXI $z0, $sp, 6 :: (store (s128) into %stack.7) ; EXPAND-NEXT: $z0 = frame-setup CPY_ZPzI_B killed $p8, 1, 0 - ; EXPAND-NEXT: frame-setup STR_ZXI $z0, $sp, 7 :: (store (s128) into %stack.5) + ; EXPAND-NEXT: frame-setup STR_ZXI $z0, $sp, 7 :: (store (s128) into %stack.6) ; EXPAND-NEXT: $z0 = frame-setup CPY_ZPzI_B killed $p7, 1, 0 - ; EXPAND-NEXT: frame-setup STR_ZXI $z0, $sp, 8 :: (store (s128) into %stack.4) + ; EXPAND-NEXT: frame-setup STR_ZXI $z0, $sp, 8 :: (store (s128) into %stack.5) ; EXPAND-NEXT: $z0 = frame-setup CPY_ZPzI_B killed $p6, 1, 0 - ; EXPAND-NEXT: frame-setup STR_ZXI $z0, $sp, 9 :: (store (s128) into %stack.3) + ; EXPAND-NEXT: frame-setup STR_ZXI $z0, $sp, 9 :: (store (s128) into %stack.4) ; EXPAND-NEXT: $z0 = frame-setup CPY_ZPzI_B killed $p5, 1, 0 - ; EXPAND-NEXT: frame-setup STR_ZXI $z0, $sp, 10 :: (store (s128) into %stack.2) + ; EXPAND-NEXT: frame-setup STR_ZXI $z0, $sp, 10 :: (store (s128) into %stack.3) ; EXPAND-NEXT: $z0 = frame-setup CPY_ZPzI_B killed $p4, 1, 0 - ; EXPAND-NEXT: frame-setup STR_ZXI $z0, $sp, 11 :: (store (s128) into %stack.1) + ; EXPAND-NEXT: frame-setup STR_ZXI $z0, $sp, 11 :: (store (s128) into %stack.2) + ; EXPAND-NEXT: $sp = frame-setup SUBXri $sp, 1024, 0 ; EXPAND-NEXT: $sp = frame-setup ADDVL_XXI $sp, -1, implicit $vg ; ; EXPAND-NEXT: $nzcv = IMPLICIT_DEF ; ; EXPAND-NEXT: $z0 = CPY_ZPzI_B $p0, 1, 0 - ; EXPAND-NEXT: STR_ZXI $z0, $sp, 0 :: (store (s128) into %stack.0) + ; EXPAND-NEXT: $x8 = ADDXri $sp, 1024, 0 + ; EXPAND-NEXT: STR_ZXI $z0, $x8, 0 :: (store (s128) into %stack.0) ; ; EXPAND-NEXT: $p0 = IMPLICIT_DEF ; EXPAND-NEXT: $p1 = IMPLICIT_DEF @@ -275,7 +283,7 @@ body: | ; EXPAND-NEXT: $p14 = IMPLICIT_DEF ; EXPAND-NEXT: $p15 = IMPLICIT_DEF ; - ; EXPAND-NEXT: $z0 = LDR_ZXI $sp, 0 :: (load (s128) from %stack.0) + ; EXPAND-NEXT: $z0 = LDR_ZXI killed $x8, 0 :: (load (s128) from %stack.0) ; EXPAND-NEXT: $x0 = MRS 55824, implicit-def $nzcv, implicit $nzcv ; EXPAND-NEXT: $p0 = PTRUE_B 31, implicit $vg ; EXPAND-NEXT: $p0 = CMPNE_PPzZI_B $p0, $z0, 0, implicit-def $nzcv, implicit-def $nzcv @@ -283,45 +291,47 @@ body: | ; ; EXPAND-NEXT: FAKE_USE implicit $nzcv ; + ; EXPAND-NEXT: $sp = frame-destroy ADDXri $sp, 1024, 0 ; EXPAND-NEXT: $sp = frame-destroy ADDVL_XXI $sp, 1, implicit $vg - ; EXPAND-NEXT: $z0 = frame-destroy LDR_ZXI $sp, 0 :: (load (s128) from %stack.12) + ; EXPAND-NEXT: $z0 = frame-destroy LDR_ZXI $sp, 0 :: (load (s128) from %stack.13) ; EXPAND-NEXT: $p1 = frame-destroy PTRUE_B 31, implicit $vg ; EXPAND-NEXT: $p15 = frame-destroy CMPNE_PPzZI_B $p1, $z0, 0, implicit-def $nzcv, implicit-def $nzcv - ; EXPAND-NEXT: $z0 = frame-destroy LDR_ZXI $sp, 1 :: (load (s128) from %stack.11) + ; EXPAND-NEXT: $z0 = frame-destroy LDR_ZXI $sp, 1 :: (load (s128) from %stack.12) ; EXPAND-NEXT: $p1 = frame-destroy PTRUE_B 31, implicit $vg ; EXPAND-NEXT: $p14 = frame-destroy CMPNE_PPzZI_B $p1, $z0, 0, implicit-def $nzcv, implicit-def $nzcv - ; EXPAND-NEXT: $z0 = frame-destroy LDR_ZXI $sp, 2 :: (load (s128) from %stack.10) + ; EXPAND-NEXT: $z0 = frame-destroy LDR_ZXI $sp, 2 :: (load (s128) from %stack.11) ; EXPAND-NEXT: $p1 = frame-destroy PTRUE_B 31, implicit $vg ; EXPAND-NEXT: $p13 = frame-destroy CMPNE_PPzZI_B $p1, $z0, 0, implicit-def $nzcv, implicit-def $nzcv - ; EXPAND-NEXT: $z0 = frame-destroy LDR_ZXI $sp, 3 :: (load (s128) from %stack.9) + ; EXPAND-NEXT: $z0 = frame-destroy LDR_ZXI $sp, 3 :: (load (s128) from %stack.10) ; EXPAND-NEXT: $p1 = frame-destroy PTRUE_B 31, implicit $vg ; EXPAND-NEXT: $p12 = frame-destroy CMPNE_PPzZI_B $p1, $z0, 0, implicit-def $nzcv, implicit-def $nzcv - ; EXPAND-NEXT: $z0 = frame-destroy LDR_ZXI $sp, 4 :: (load (s128) from %stack.8) + ; EXPAND-NEXT: $z0 = frame-destroy LDR_ZXI $sp, 4 :: (load (s128) from %stack.9) ; EXPAND-NEXT: $p1 = frame-destroy PTRUE_B 31, implicit $vg ; EXPAND-NEXT: $p11 = frame-destroy CMPNE_PPzZI_B $p1, $z0, 0, implicit-def $nzcv, implicit-def $nzcv - ; EXPAND-NEXT: $z0 = frame-destroy LDR_ZXI $sp, 5 :: (load (s128) from %stack.7) + ; EXPAND-NEXT: $z0 = frame-destroy LDR_ZXI $sp, 5 :: (load (s128) from %stack.8) ; EXPAND-NEXT: $p1 = frame-destroy PTRUE_B 31, implicit $vg ; EXPAND-NEXT: $p10 = frame-destroy CMPNE_PPzZI_B $p1, $z0, 0, implicit-def $nzcv, implicit-def $nzcv - ; EXPAND-NEXT: $z0 = frame-destroy LDR_ZXI $sp, 6 :: (load (s128) from %stack.6) + ; EXPAND-NEXT: $z0 = frame-destroy LDR_ZXI $sp, 6 :: (load (s128) from %stack.7) ; EXPAND-NEXT: $p1 = frame-destroy PTRUE_B 31, implicit $vg ; EXPAND-NEXT: $p9 = frame-destroy CMPNE_PPzZI_B $p1, $z0, 0, implicit-def $nzcv, implicit-def $nzcv - ; EXPAND-NEXT: $z0 = frame-destroy LDR_ZXI $sp, 7 :: (load (s128) from %stack.5) + ; EXPAND-NEXT: $z0 = frame-destroy LDR_ZXI $sp, 7 :: (load (s128) from %stack.6) ; EXPAND-NEXT: $p1 = frame-destroy PTRUE_B 31, implicit $vg ; EXPAND-NEXT: $p8 = frame-destroy CMPNE_PPzZI_B $p1, $z0, 0, implicit-def $nzcv, implicit-def $nzcv - ; EXPAND-NEXT: $z0 = frame-destroy LDR_ZXI $sp, 8 :: (load (s128) from %stack.4) + ; EXPAND-NEXT: $z0 = frame-destroy LDR_ZXI $sp, 8 :: (load (s128) from %stack.5) ; EXPAND-NEXT: $p7 = frame-destroy PTRUE_B 31, implicit $vg ; EXPAND-NEXT: $p7 = frame-destroy CMPNE_PPzZI_B $p7, $z0, 0, implicit-def $nzcv, implicit-def $nzcv - ; EXPAND-NEXT: $z0 = frame-destroy LDR_ZXI $sp, 9 :: (load (s128) from %stack.3) + ; EXPAND-NEXT: $z0 = frame-destroy LDR_ZXI $sp, 9 :: (load (s128) from %stack.4) ; EXPAND-NEXT: $p6 = frame-destroy PTRUE_B 31, implicit $vg ; EXPAND-NEXT: $p6 = frame-destroy CMPNE_PPzZI_B $p6, $z0, 0, implicit-def $nzcv, implicit-def $nzcv - ; EXPAND-NEXT: $z0 = frame-destroy LDR_ZXI $sp, 10 :: (load (s128) from %stack.2) + ; EXPAND-NEXT: $z0 = frame-destroy LDR_ZXI $sp, 10 :: (load (s128) from %stack.3) ; EXPAND-NEXT: $p5 = frame-destroy PTRUE_B 31, implicit $vg ; EXPAND-NEXT: $p5 = frame-destroy CMPNE_PPzZI_B $p5, $z0, 0, implicit-def $nzcv, implicit-def $nzcv - ; EXPAND-NEXT: $z0 = frame-destroy LDR_ZXI $sp, 11 :: (load (s128) from %stack.1) + ; EXPAND-NEXT: $z0 = frame-destroy LDR_ZXI $sp, 11 :: (load (s128) from %stack.2) ; EXPAND-NEXT: $p4 = frame-destroy PTRUE_B 31, implicit $vg ; EXPAND-NEXT: $p4 = frame-destroy CMPNE_PPzZI_B $p4, $z0, 0, implicit-def $nzcv, implicit-def $nzcv ; EXPAND-NEXT: $sp = frame-destroy ADDVL_XXI $sp, 12, implicit $vg - ; EXPAND-NEXT: early-clobber $sp, $fp = frame-destroy LDRXpost $sp, 16 :: (load (s64) from %stack.13) + ; EXPAND-NEXT: $fp = frame-destroy LDRXui $sp, 128 :: (load (s64) from %stack.14) + ; EXPAND-NEXT: $sp = frame-destroy ADDXri $sp, 1040, 0 ; EXPAND-NEXT: RET undef $lr, implicit $p0 $nzcv = IMPLICIT_DEF @@ -369,7 +379,6 @@ body: | liveins: $p0, $x0, $x1, $x2, $x3, $x4, $x5, $x6, $x7 ; CHECK-LABEL: name: zpr_predicate_spill__save_restore_nzcv__spill_gpr - ; CHECK: stack: ; CHECK: - { id: 0, name: '', type: spill-slot, offset: 0, size: 16, alignment: 16, ; CHECK-NEXT: stack-id: scalable-vector, callee-saved-register: ; CHECK: liveins: $p0, $x0, $x1, $x2, $x3, $x4, $x5, $x6, $x7 @@ -410,38 +419,41 @@ body: | ; CHECK-NEXT: $p0 = FILL_PPR_FROM_ZPR_SLOT_PSEUDO %stack.0, 0 :: (load (s128) from %stack.0) ; ; CHECK-NEXT: FAKE_USE implicit $nzcv, implicit $x8, implicit $x9, implicit $x10, implicit $x11, implicit $x12, implicit $x13, implicit $x14, implicit $x15, implicit $x16, implicit $x17, implicit $x18 + ; ; CHECK-NEXT: RET_ReallyLR implicit $p0, implicit $x0, implicit $x1, implicit $x2, implicit $x3, implicit $x4, implicit $x5, implicit $x6, implicit $x7, implicit $x8, implicit $x9, implicit $x10, implicit $x11, implicit $x12, implicit $x13, implicit $x14, implicit $x15, implicit $x16, implicit $x17, implicit $x18 ; EXPAND-LABEL: name: zpr_predicate_spill__save_restore_nzcv__spill_gpr ; EXPAND: liveins: $p0, $x0, $x1, $x2, $x3, $x4, $x5, $x6, $x7, $fp, $p15, $p14, $p13, $p12, $p11, $p10, $p9, $p8, $p7, $p6, $p5, $p4 ; EXPAND-NEXT: {{ $}} ; - ; EXPAND-NEXT: early-clobber $sp = frame-setup STRXpre killed $fp, $sp, -16 :: (store (s64) into %stack.13) + ; EXPAND-NEXT: $sp = frame-setup SUBXri $sp, 1040, 0 + ; EXPAND-NEXT: frame-setup STRXui killed $fp, $sp, 128 :: (store (s64) into %stack.14) ; EXPAND-NEXT: $sp = frame-setup ADDVL_XXI $sp, -12, implicit $vg ; EXPAND-NEXT: $z0 = frame-setup CPY_ZPzI_B killed $p15, 1, 0 - ; EXPAND-NEXT: frame-setup STR_ZXI $z0, $sp, 0 :: (store (s128) into %stack.12) + ; EXPAND-NEXT: frame-setup STR_ZXI $z0, $sp, 0 :: (store (s128) into %stack.13) ; EXPAND-NEXT: $z0 = frame-setup CPY_ZPzI_B killed $p14, 1, 0 - ; EXPAND-NEXT: frame-setup STR_ZXI $z0, $sp, 1 :: (store (s128) into %stack.11) + ; EXPAND-NEXT: frame-setup STR_ZXI $z0, $sp, 1 :: (store (s128) into %stack.12) ; EXPAND-NEXT: $z0 = frame-setup CPY_ZPzI_B killed $p13, 1, 0 - ; EXPAND-NEXT: frame-setup STR_ZXI $z0, $sp, 2 :: (store (s128) into %stack.10) + ; EXPAND-NEXT: frame-setup STR_ZXI $z0, $sp, 2 :: (store (s128) into %stack.11) ; EXPAND-NEXT: $z0 = frame-setup CPY_ZPzI_B killed $p12, 1, 0 - ; EXPAND-NEXT: frame-setup STR_ZXI $z0, $sp, 3 :: (store (s128) into %stack.9) + ; EXPAND-NEXT: frame-setup STR_ZXI $z0, $sp, 3 :: (store (s128) into %stack.10) ; EXPAND-NEXT: $z0 = frame-setup CPY_ZPzI_B killed $p11, 1, 0 - ; EXPAND-NEXT: frame-setup STR_ZXI $z0, $sp, 4 :: (store (s128) into %stack.8) + ; EXPAND-NEXT: frame-setup STR_ZXI $z0, $sp, 4 :: (store (s128) into %stack.9) ; EXPAND-NEXT: $z0 = frame-setup CPY_ZPzI_B killed $p10, 1, 0 - ; EXPAND-NEXT: frame-setup STR_ZXI $z0, $sp, 5 :: (store (s128) into %stack.7) + ; EXPAND-NEXT: frame-setup STR_ZXI $z0, $sp, 5 :: (store (s128) into %stack.8) ; EXPAND-NEXT: $z0 = frame-setup CPY_ZPzI_B killed $p9, 1, 0 - ; EXPAND-NEXT: frame-setup STR_ZXI $z0, $sp, 6 :: (store (s128) into %stack.6) + ; EXPAND-NEXT: frame-setup STR_ZXI $z0, $sp, 6 :: (store (s128) into %stack.7) ; EXPAND-NEXT: $z0 = frame-setup CPY_ZPzI_B killed $p8, 1, 0 - ; EXPAND-NEXT: frame-setup STR_ZXI $z0, $sp, 7 :: (store (s128) into %stack.5) + ; EXPAND-NEXT: frame-setup STR_ZXI $z0, $sp, 7 :: (store (s128) into %stack.6) ; EXPAND-NEXT: $z0 = frame-setup CPY_ZPzI_B killed $p7, 1, 0 - ; EXPAND-NEXT: frame-setup STR_ZXI $z0, $sp, 8 :: (store (s128) into %stack.4) + ; EXPAND-NEXT: frame-setup STR_ZXI $z0, $sp, 8 :: (store (s128) into %stack.5) ; EXPAND-NEXT: $z0 = frame-setup CPY_ZPzI_B killed $p6, 1, 0 - ; EXPAND-NEXT: frame-setup STR_ZXI $z0, $sp, 9 :: (store (s128) into %stack.3) + ; EXPAND-NEXT: frame-setup STR_ZXI $z0, $sp, 9 :: (store (s128) into %stack.4) ; EXPAND-NEXT: $z0 = frame-setup CPY_ZPzI_B killed $p5, 1, 0 - ; EXPAND-NEXT: frame-setup STR_ZXI $z0, $sp, 10 :: (store (s128) into %stack.2) + ; EXPAND-NEXT: frame-setup STR_ZXI $z0, $sp, 10 :: (store (s128) into %stack.3) ; EXPAND-NEXT: $z0 = frame-setup CPY_ZPzI_B killed $p4, 1, 0 - ; EXPAND-NEXT: frame-setup STR_ZXI $z0, $sp, 11 :: (store (s128) into %stack.1) + ; EXPAND-NEXT: frame-setup STR_ZXI $z0, $sp, 11 :: (store (s128) into %stack.2) + ; EXPAND-NEXT: $sp = frame-setup SUBXri $sp, 1040, 0 ; EXPAND-NEXT: $sp = frame-setup ADDVL_XXI $sp, -1, implicit $vg ; ; EXPAND-NEXT: $nzcv = IMPLICIT_DEF @@ -458,7 +470,8 @@ body: | ; EXPAND-NEXT: $x18 = IMPLICIT_DEF ; ; EXPAND-NEXT: $z0 = CPY_ZPzI_B $p0, 1, 0 - ; EXPAND-NEXT: STR_ZXI $z0, $sp, 0 :: (store (s128) into %stack.0) + ; EXPAND-NEXT: $fp = ADDXri $sp, 1040, 0 + ; EXPAND-NEXT: STR_ZXI $z0, $fp, 0 :: (store (s128) into %stack.0) ; ; EXPAND-NEXT: $p0 = IMPLICIT_DEF ; EXPAND-NEXT: $p1 = IMPLICIT_DEF @@ -477,56 +490,57 @@ body: | ; EXPAND-NEXT: $p14 = IMPLICIT_DEF ; EXPAND-NEXT: $p15 = IMPLICIT_DEF ; - ; EXPAND-NEXT: $fp = ADDVL_XXI $sp, 13, implicit $vg - ; EXPAND-NEXT: STRXui $x0, killed $fp, 1 :: (store (s64) into %stack.14) - ; EXPAND-NEXT: $z0 = LDR_ZXI $sp, 0 :: (load (s128) from %stack.0) + ; EXPAND-NEXT: STRXui $x0, $sp, 1 :: (store (s64) into %stack.16) + ; EXPAND-NEXT: $z0 = LDR_ZXI killed $fp, 0 :: (load (s128) from %stack.0) ; EXPAND-NEXT: $x0 = MRS 55824, implicit-def $nzcv, implicit $nzcv ; EXPAND-NEXT: $p0 = PTRUE_B 31, implicit $vg ; EXPAND-NEXT: $p0 = CMPNE_PPzZI_B $p0, $z0, 0, implicit-def $nzcv, implicit-def $nzcv ; EXPAND-NEXT: MSR 55824, $x0, implicit-def $nzcv - ; EXPAND-NEXT: $fp = ADDVL_XXI $sp, 13, implicit $vg - ; EXPAND-NEXT: $x0 = LDRXui killed $fp, 1 :: (load (s64) from %stack.14) + ; EXPAND-NEXT: $x0 = LDRXui $sp, 1 :: (load (s64) from %stack.16) ; ; EXPAND-NEXT: FAKE_USE implicit $nzcv, implicit $x8, implicit $x9, implicit $x10, implicit $x11, implicit $x12, implicit $x13, implicit $x14, implicit $x15, implicit $x16, implicit $x17, implicit $x18 + ; + ; EXPAND-NEXT: $sp = frame-destroy ADDXri $sp, 1040, 0 ; EXPAND-NEXT: $sp = frame-destroy ADDVL_XXI $sp, 1, implicit $vg - ; EXPAND-NEXT: $z0 = frame-destroy LDR_ZXI $sp, 0 :: (load (s128) from %stack.12) + ; EXPAND-NEXT: $z0 = frame-destroy LDR_ZXI $sp, 0 :: (load (s128) from %stack.13) ; EXPAND-NEXT: $p1 = frame-destroy PTRUE_B 31, implicit $vg ; EXPAND-NEXT: $p15 = frame-destroy CMPNE_PPzZI_B $p1, $z0, 0, implicit-def $nzcv, implicit-def $nzcv - ; EXPAND-NEXT: $z0 = frame-destroy LDR_ZXI $sp, 1 :: (load (s128) from %stack.11) + ; EXPAND-NEXT: $z0 = frame-destroy LDR_ZXI $sp, 1 :: (load (s128) from %stack.12) ; EXPAND-NEXT: $p1 = frame-destroy PTRUE_B 31, implicit $vg ; EXPAND-NEXT: $p14 = frame-destroy CMPNE_PPzZI_B $p1, $z0, 0, implicit-def $nzcv, implicit-def $nzcv - ; EXPAND-NEXT: $z0 = frame-destroy LDR_ZXI $sp, 2 :: (load (s128) from %stack.10) + ; EXPAND-NEXT: $z0 = frame-destroy LDR_ZXI $sp, 2 :: (load (s128) from %stack.11) ; EXPAND-NEXT: $p1 = frame-destroy PTRUE_B 31, implicit $vg ; EXPAND-NEXT: $p13 = frame-destroy CMPNE_PPzZI_B $p1, $z0, 0, implicit-def $nzcv, implicit-def $nzcv - ; EXPAND-NEXT: $z0 = frame-destroy LDR_ZXI $sp, 3 :: (load (s128) from %stack.9) + ; EXPAND-NEXT: $z0 = frame-destroy LDR_ZXI $sp, 3 :: (load (s128) from %stack.10) ; EXPAND-NEXT: $p1 = frame-destroy PTRUE_B 31, implicit $vg ; EXPAND-NEXT: $p12 = frame-destroy CMPNE_PPzZI_B $p1, $z0, 0, implicit-def $nzcv, implicit-def $nzcv - ; EXPAND-NEXT: $z0 = frame-destroy LDR_ZXI $sp, 4 :: (load (s128) from %stack.8) + ; EXPAND-NEXT: $z0 = frame-destroy LDR_ZXI $sp, 4 :: (load (s128) from %stack.9) ; EXPAND-NEXT: $p1 = frame-destroy PTRUE_B 31, implicit $vg ; EXPAND-NEXT: $p11 = frame-destroy CMPNE_PPzZI_B $p1, $z0, 0, implicit-def $nzcv, implicit-def $nzcv - ; EXPAND-NEXT: $z0 = frame-destroy LDR_ZXI $sp, 5 :: (load (s128) from %stack.7) + ; EXPAND-NEXT: $z0 = frame-destroy LDR_ZXI $sp, 5 :: (load (s128) from %stack.8) ; EXPAND-NEXT: $p1 = frame-destroy PTRUE_B 31, implicit $vg ; EXPAND-NEXT: $p10 = frame-destroy CMPNE_PPzZI_B $p1, $z0, 0, implicit-def $nzcv, implicit-def $nzcv - ; EXPAND-NEXT: $z0 = frame-destroy LDR_ZXI $sp, 6 :: (load (s128) from %stack.6) + ; EXPAND-NEXT: $z0 = frame-destroy LDR_ZXI $sp, 6 :: (load (s128) from %stack.7) ; EXPAND-NEXT: $p1 = frame-destroy PTRUE_B 31, implicit $vg ; EXPAND-NEXT: $p9 = frame-destroy CMPNE_PPzZI_B $p1, $z0, 0, implicit-def $nzcv, implicit-def $nzcv - ; EXPAND-NEXT: $z0 = frame-destroy LDR_ZXI $sp, 7 :: (load (s128) from %stack.5) + ; EXPAND-NEXT: $z0 = frame-destroy LDR_ZXI $sp, 7 :: (load (s128) from %stack.6) ; EXPAND-NEXT: $p1 = frame-destroy PTRUE_B 31, implicit $vg ; EXPAND-NEXT: $p8 = frame-destroy CMPNE_PPzZI_B $p1, $z0, 0, implicit-def $nzcv, implicit-def $nzcv - ; EXPAND-NEXT: $z0 = frame-destroy LDR_ZXI $sp, 8 :: (load (s128) from %stack.4) + ; EXPAND-NEXT: $z0 = frame-destroy LDR_ZXI $sp, 8 :: (load (s128) from %stack.5) ; EXPAND-NEXT: $p7 = frame-destroy PTRUE_B 31, implicit $vg ; EXPAND-NEXT: $p7 = frame-destroy CMPNE_PPzZI_B $p7, $z0, 0, implicit-def $nzcv, implicit-def $nzcv - ; EXPAND-NEXT: $z0 = frame-destroy LDR_ZXI $sp, 9 :: (load (s128) from %stack.3) + ; EXPAND-NEXT: $z0 = frame-destroy LDR_ZXI $sp, 9 :: (load (s128) from %stack.4) ; EXPAND-NEXT: $p6 = frame-destroy PTRUE_B 31, implicit $vg ; EXPAND-NEXT: $p6 = frame-destroy CMPNE_PPzZI_B $p6, $z0, 0, implicit-def $nzcv, implicit-def $nzcv - ; EXPAND-NEXT: $z0 = frame-destroy LDR_ZXI $sp, 10 :: (load (s128) from %stack.2) + ; EXPAND-NEXT: $z0 = frame-destroy LDR_ZXI $sp, 10 :: (load (s128) from %stack.3) ; EXPAND-NEXT: $p5 = frame-destroy PTRUE_B 31, implicit $vg ; EXPAND-NEXT: $p5 = frame-destroy CMPNE_PPzZI_B $p5, $z0, 0, implicit-def $nzcv, implicit-def $nzcv - ; EXPAND-NEXT: $z0 = frame-destroy LDR_ZXI $sp, 11 :: (load (s128) from %stack.1) + ; EXPAND-NEXT: $z0 = frame-destroy LDR_ZXI $sp, 11 :: (load (s128) from %stack.2) ; EXPAND-NEXT: $p4 = frame-destroy PTRUE_B 31, implicit $vg ; EXPAND-NEXT: $p4 = frame-destroy CMPNE_PPzZI_B $p4, $z0, 0, implicit-def $nzcv, implicit-def $nzcv ; EXPAND-NEXT: $sp = frame-destroy ADDVL_XXI $sp, 12, implicit $vg - ; EXPAND-NEXT: early-clobber $sp, $fp = frame-destroy LDRXpost $sp, 16 :: (load (s64) from %stack.13) + ; EXPAND-NEXT: $fp = frame-destroy LDRXui $sp, 128 :: (load (s64) from %stack.14) + ; EXPAND-NEXT: $sp = frame-destroy ADDXri $sp, 1040, 0 ; EXPAND-NEXT: RET undef $lr, implicit $p0, implicit $x0, implicit $x1, implicit $x2, implicit $x3, implicit $x4, implicit $x5, implicit $x6, implicit $x7, implicit $x8, implicit $x9, implicit $x10, implicit $x11, implicit $x12, implicit $x13, implicit $x14, implicit $x15, implicit $x16, implicit $x17, implicit $x18 $nzcv = IMPLICIT_DEF $x8 = IMPLICIT_DEF @@ -585,7 +599,6 @@ body: | liveins: $p0, $z0, $z1, $z2, $z3, $z4, $z5, $z6, $z7 ; CHECK-LABEL: name: zpr_predicate_spill__spill_zpr - ; CHECK: stack: ; CHECK: - { id: 0, name: '', type: spill-slot, offset: 0, size: 16, alignment: 16, ; CHECK-NEXT: stack-id: scalable-vector, callee-saved-register: ; CHECK: liveins: $p0, $z0, $z1, $z2, $z3, $z4, $z5, $z6, $z7 @@ -630,46 +643,49 @@ body: | ; CHECK-NEXT: $p0 = FILL_PPR_FROM_ZPR_SLOT_PSEUDO %stack.0, 0 :: (load (s128) from %stack.0) ; ; CHECK-NEXT: FAKE_USE implicit $z16, implicit $z17, implicit $z18, implicit $z19, implicit $z20, implicit $z21, implicit $z22, implicit $z23, implicit $z24, implicit $z25, implicit $z26, implicit $z27, implicit $z28, implicit $z29, implicit $z30, implicit $z31 + ; ; CHECK-NEXT: RET_ReallyLR implicit $p0, implicit $z0, implicit $z1, implicit $z2, implicit $z3, implicit $z4, implicit $z5, implicit $z6, implicit $z7 ; EXPAND-LABEL: name: zpr_predicate_spill__spill_zpr ; EXPAND: liveins: $p0, $z0, $z1, $z2, $z3, $z4, $z5, $z6, $z7, $fp, $p15, $p14, $p13, $p12, $p11, $p10, $p9, $p8, $p7, $p6, $p5, $p4, $z23, $z22, $z21, $z20, $z19, $z18, $z17, $z16 ; EXPAND-NEXT: {{ $}} ; - ; EXPAND-NEXT: early-clobber $sp = frame-setup STRXpre killed $fp, $sp, -16 :: (store (s64) into %stack.21) + ; EXPAND-NEXT: $sp = frame-setup SUBXri $sp, 1040, 0 + ; EXPAND-NEXT: frame-setup STRXui killed $fp, $sp, 128 :: (store (s64) into %stack.22) ; EXPAND-NEXT: $sp = frame-setup ADDVL_XXI $sp, -20, implicit $vg ; EXPAND-NEXT: $z24 = frame-setup CPY_ZPzI_B killed $p15, 1, 0 - ; EXPAND-NEXT: frame-setup STR_ZXI $z24, $sp, 0 :: (store (s128) into %stack.20) + ; EXPAND-NEXT: frame-setup STR_ZXI $z24, $sp, 0 :: (store (s128) into %stack.21) ; EXPAND-NEXT: $z24 = frame-setup CPY_ZPzI_B killed $p14, 1, 0 - ; EXPAND-NEXT: frame-setup STR_ZXI $z24, $sp, 1 :: (store (s128) into %stack.19) + ; EXPAND-NEXT: frame-setup STR_ZXI $z24, $sp, 1 :: (store (s128) into %stack.20) ; EXPAND-NEXT: $z24 = frame-setup CPY_ZPzI_B killed $p13, 1, 0 - ; EXPAND-NEXT: frame-setup STR_ZXI $z24, $sp, 2 :: (store (s128) into %stack.18) + ; EXPAND-NEXT: frame-setup STR_ZXI $z24, $sp, 2 :: (store (s128) into %stack.19) ; EXPAND-NEXT: $z24 = frame-setup CPY_ZPzI_B killed $p12, 1, 0 - ; EXPAND-NEXT: frame-setup STR_ZXI $z24, $sp, 3 :: (store (s128) into %stack.17) + ; EXPAND-NEXT: frame-setup STR_ZXI $z24, $sp, 3 :: (store (s128) into %stack.18) ; EXPAND-NEXT: $z24 = frame-setup CPY_ZPzI_B killed $p11, 1, 0 - ; EXPAND-NEXT: frame-setup STR_ZXI $z24, $sp, 4 :: (store (s128) into %stack.16) + ; EXPAND-NEXT: frame-setup STR_ZXI $z24, $sp, 4 :: (store (s128) into %stack.17) ; EXPAND-NEXT: $z24 = frame-setup CPY_ZPzI_B killed $p10, 1, 0 - ; EXPAND-NEXT: frame-setup STR_ZXI $z24, $sp, 5 :: (store (s128) into %stack.15) + ; EXPAND-NEXT: frame-setup STR_ZXI $z24, $sp, 5 :: (store (s128) into %stack.16) ; EXPAND-NEXT: $z24 = frame-setup CPY_ZPzI_B killed $p9, 1, 0 - ; EXPAND-NEXT: frame-setup STR_ZXI $z24, $sp, 6 :: (store (s128) into %stack.14) + ; EXPAND-NEXT: frame-setup STR_ZXI $z24, $sp, 6 :: (store (s128) into %stack.15) ; EXPAND-NEXT: $z24 = frame-setup CPY_ZPzI_B killed $p8, 1, 0 - ; EXPAND-NEXT: frame-setup STR_ZXI $z24, $sp, 7 :: (store (s128) into %stack.13) + ; EXPAND-NEXT: frame-setup STR_ZXI $z24, $sp, 7 :: (store (s128) into %stack.14) ; EXPAND-NEXT: $z24 = frame-setup CPY_ZPzI_B killed $p7, 1, 0 - ; EXPAND-NEXT: frame-setup STR_ZXI $z24, $sp, 8 :: (store (s128) into %stack.12) + ; EXPAND-NEXT: frame-setup STR_ZXI $z24, $sp, 8 :: (store (s128) into %stack.13) ; EXPAND-NEXT: $z24 = frame-setup CPY_ZPzI_B killed $p6, 1, 0 - ; EXPAND-NEXT: frame-setup STR_ZXI $z24, $sp, 9 :: (store (s128) into %stack.11) + ; EXPAND-NEXT: frame-setup STR_ZXI $z24, $sp, 9 :: (store (s128) into %stack.12) ; EXPAND-NEXT: $z24 = frame-setup CPY_ZPzI_B killed $p5, 1, 0 - ; EXPAND-NEXT: frame-setup STR_ZXI $z24, $sp, 10 :: (store (s128) into %stack.10) + ; EXPAND-NEXT: frame-setup STR_ZXI $z24, $sp, 10 :: (store (s128) into %stack.11) ; EXPAND-NEXT: $z24 = frame-setup CPY_ZPzI_B killed $p4, 1, 0 - ; EXPAND-NEXT: frame-setup STR_ZXI $z24, $sp, 11 :: (store (s128) into %stack.9) - ; EXPAND-NEXT: frame-setup STR_ZXI killed $z23, $sp, 12 :: (store (s128) into %stack.8) - ; EXPAND-NEXT: frame-setup STR_ZXI killed $z22, $sp, 13 :: (store (s128) into %stack.7) - ; EXPAND-NEXT: frame-setup STR_ZXI killed $z21, $sp, 14 :: (store (s128) into %stack.6) - ; EXPAND-NEXT: frame-setup STR_ZXI killed $z20, $sp, 15 :: (store (s128) into %stack.5) - ; EXPAND-NEXT: frame-setup STR_ZXI killed $z19, $sp, 16 :: (store (s128) into %stack.4) - ; EXPAND-NEXT: frame-setup STR_ZXI killed $z18, $sp, 17 :: (store (s128) into %stack.3) - ; EXPAND-NEXT: frame-setup STR_ZXI killed $z17, $sp, 18 :: (store (s128) into %stack.2) - ; EXPAND-NEXT: frame-setup STR_ZXI killed $z16, $sp, 19 :: (store (s128) into %stack.1) + ; EXPAND-NEXT: frame-setup STR_ZXI $z24, $sp, 11 :: (store (s128) into %stack.10) + ; EXPAND-NEXT: frame-setup STR_ZXI killed $z23, $sp, 12 :: (store (s128) into %stack.9) + ; EXPAND-NEXT: frame-setup STR_ZXI killed $z22, $sp, 13 :: (store (s128) into %stack.8) + ; EXPAND-NEXT: frame-setup STR_ZXI killed $z21, $sp, 14 :: (store (s128) into %stack.7) + ; EXPAND-NEXT: frame-setup STR_ZXI killed $z20, $sp, 15 :: (store (s128) into %stack.6) + ; EXPAND-NEXT: frame-setup STR_ZXI killed $z19, $sp, 16 :: (store (s128) into %stack.5) + ; EXPAND-NEXT: frame-setup STR_ZXI killed $z18, $sp, 17 :: (store (s128) into %stack.4) + ; EXPAND-NEXT: frame-setup STR_ZXI killed $z17, $sp, 18 :: (store (s128) into %stack.3) + ; EXPAND-NEXT: frame-setup STR_ZXI killed $z16, $sp, 19 :: (store (s128) into %stack.2) + ; EXPAND-NEXT: $sp = frame-setup SUBXri $sp, 1024, 0 ; EXPAND-NEXT: $sp = frame-setup ADDVL_XXI $sp, -2, implicit $vg ; ; EXPAND-NEXT: $z16 = IMPLICIT_DEF @@ -689,10 +705,11 @@ body: | ; EXPAND-NEXT: $z30 = IMPLICIT_DEF ; EXPAND-NEXT: $z31 = IMPLICIT_DEF ; - ; EXPAND-NEXT: STR_ZXI $z0, $sp, 0 :: (store (s128) into %stack.22) + ; EXPAND-NEXT: $x8 = ADDXri $sp, 1024, 0 + ; EXPAND-NEXT: STR_ZXI $z0, $x8, 0 :: (store (s128) into %stack.24) ; EXPAND-NEXT: $z0 = CPY_ZPzI_B $p0, 1, 0 - ; EXPAND-NEXT: STR_ZXI $z0, $sp, 1 :: (store (s128) into %stack.0) - ; EXPAND-NEXT: $z0 = LDR_ZXI $sp, 0 :: (load (s128) from %stack.22) + ; EXPAND-NEXT: STR_ZXI $z0, $x8, 1 :: (store (s128) into %stack.0) + ; EXPAND-NEXT: $z0 = LDR_ZXI $x8, 0 :: (load (s128) from %stack.24) ; ; EXPAND-NEXT: $p0 = IMPLICIT_DEF ; EXPAND-NEXT: $p1 = IMPLICIT_DEF @@ -711,60 +728,63 @@ body: | ; EXPAND-NEXT: $p14 = IMPLICIT_DEF ; EXPAND-NEXT: $p15 = IMPLICIT_DEF ; - ; EXPAND-NEXT: STR_ZXI $z0, $sp, 0 :: (store (s128) into %stack.22) - ; EXPAND-NEXT: $z0 = LDR_ZXI $sp, 1 :: (load (s128) from %stack.0) + ; EXPAND-NEXT: STR_ZXI $z0, $x8, 0 :: (store (s128) into %stack.24) + ; EXPAND-NEXT: $z0 = LDR_ZXI $x8, 1 :: (load (s128) from %stack.0) ; EXPAND-NEXT: $p0 = PTRUE_B 31, implicit $vg ; EXPAND-NEXT: $p0 = CMPNE_PPzZI_B $p0, $z0, 0, implicit-def $nzcv, implicit-def $nzcv - ; EXPAND-NEXT: $z0 = LDR_ZXI $sp, 0 :: (load (s128) from %stack.22) + ; EXPAND-NEXT: $z0 = LDR_ZXI killed $x8, 0 :: (load (s128) from %stack.24) ; ; EXPAND-NEXT: FAKE_USE implicit $z16, implicit $z17, implicit $z18, implicit $z19, implicit $z20, implicit $z21, implicit $z22, implicit $z23, implicit $z24, implicit $z25, implicit $z26, implicit $z27, implicit $z28, implicit $z29, implicit $z30, implicit $z31 + ; + ; EXPAND-NEXT: $sp = frame-destroy ADDXri $sp, 1024, 0 ; EXPAND-NEXT: $sp = frame-destroy ADDVL_XXI $sp, 2, implicit $vg - ; EXPAND-NEXT: $z23 = frame-destroy LDR_ZXI $sp, 12 :: (load (s128) from %stack.8) - ; EXPAND-NEXT: $z22 = frame-destroy LDR_ZXI $sp, 13 :: (load (s128) from %stack.7) - ; EXPAND-NEXT: $z21 = frame-destroy LDR_ZXI $sp, 14 :: (load (s128) from %stack.6) - ; EXPAND-NEXT: $z20 = frame-destroy LDR_ZXI $sp, 15 :: (load (s128) from %stack.5) - ; EXPAND-NEXT: $z19 = frame-destroy LDR_ZXI $sp, 16 :: (load (s128) from %stack.4) - ; EXPAND-NEXT: $z18 = frame-destroy LDR_ZXI $sp, 17 :: (load (s128) from %stack.3) - ; EXPAND-NEXT: $z17 = frame-destroy LDR_ZXI $sp, 18 :: (load (s128) from %stack.2) - ; EXPAND-NEXT: $z16 = frame-destroy LDR_ZXI $sp, 19 :: (load (s128) from %stack.1) - ; EXPAND-NEXT: $z24 = frame-destroy LDR_ZXI $sp, 0 :: (load (s128) from %stack.20) + ; EXPAND-NEXT: $z23 = frame-destroy LDR_ZXI $sp, 12 :: (load (s128) from %stack.9) + ; EXPAND-NEXT: $z22 = frame-destroy LDR_ZXI $sp, 13 :: (load (s128) from %stack.8) + ; EXPAND-NEXT: $z21 = frame-destroy LDR_ZXI $sp, 14 :: (load (s128) from %stack.7) + ; EXPAND-NEXT: $z20 = frame-destroy LDR_ZXI $sp, 15 :: (load (s128) from %stack.6) + ; EXPAND-NEXT: $z19 = frame-destroy LDR_ZXI $sp, 16 :: (load (s128) from %stack.5) + ; EXPAND-NEXT: $z18 = frame-destroy LDR_ZXI $sp, 17 :: (load (s128) from %stack.4) + ; EXPAND-NEXT: $z17 = frame-destroy LDR_ZXI $sp, 18 :: (load (s128) from %stack.3) + ; EXPAND-NEXT: $z16 = frame-destroy LDR_ZXI $sp, 19 :: (load (s128) from %stack.2) + ; EXPAND-NEXT: $z24 = frame-destroy LDR_ZXI $sp, 0 :: (load (s128) from %stack.21) ; EXPAND-NEXT: $p1 = frame-destroy PTRUE_B 31, implicit $vg ; EXPAND-NEXT: $p15 = frame-destroy CMPNE_PPzZI_B $p1, $z24, 0, implicit-def $nzcv, implicit-def $nzcv - ; EXPAND-NEXT: $z24 = frame-destroy LDR_ZXI $sp, 1 :: (load (s128) from %stack.19) + ; EXPAND-NEXT: $z24 = frame-destroy LDR_ZXI $sp, 1 :: (load (s128) from %stack.20) ; EXPAND-NEXT: $p1 = frame-destroy PTRUE_B 31, implicit $vg ; EXPAND-NEXT: $p14 = frame-destroy CMPNE_PPzZI_B $p1, $z24, 0, implicit-def $nzcv, implicit-def $nzcv - ; EXPAND-NEXT: $z24 = frame-destroy LDR_ZXI $sp, 2 :: (load (s128) from %stack.18) + ; EXPAND-NEXT: $z24 = frame-destroy LDR_ZXI $sp, 2 :: (load (s128) from %stack.19) ; EXPAND-NEXT: $p1 = frame-destroy PTRUE_B 31, implicit $vg ; EXPAND-NEXT: $p13 = frame-destroy CMPNE_PPzZI_B $p1, $z24, 0, implicit-def $nzcv, implicit-def $nzcv - ; EXPAND-NEXT: $z24 = frame-destroy LDR_ZXI $sp, 3 :: (load (s128) from %stack.17) + ; EXPAND-NEXT: $z24 = frame-destroy LDR_ZXI $sp, 3 :: (load (s128) from %stack.18) ; EXPAND-NEXT: $p1 = frame-destroy PTRUE_B 31, implicit $vg ; EXPAND-NEXT: $p12 = frame-destroy CMPNE_PPzZI_B $p1, $z24, 0, implicit-def $nzcv, implicit-def $nzcv - ; EXPAND-NEXT: $z24 = frame-destroy LDR_ZXI $sp, 4 :: (load (s128) from %stack.16) + ; EXPAND-NEXT: $z24 = frame-destroy LDR_ZXI $sp, 4 :: (load (s128) from %stack.17) ; EXPAND-NEXT: $p1 = frame-destroy PTRUE_B 31, implicit $vg ; EXPAND-NEXT: $p11 = frame-destroy CMPNE_PPzZI_B $p1, $z24, 0, implicit-def $nzcv, implicit-def $nzcv - ; EXPAND-NEXT: $z24 = frame-destroy LDR_ZXI $sp, 5 :: (load (s128) from %stack.15) + ; EXPAND-NEXT: $z24 = frame-destroy LDR_ZXI $sp, 5 :: (load (s128) from %stack.16) ; EXPAND-NEXT: $p1 = frame-destroy PTRUE_B 31, implicit $vg ; EXPAND-NEXT: $p10 = frame-destroy CMPNE_PPzZI_B $p1, $z24, 0, implicit-def $nzcv, implicit-def $nzcv - ; EXPAND-NEXT: $z24 = frame-destroy LDR_ZXI $sp, 6 :: (load (s128) from %stack.14) + ; EXPAND-NEXT: $z24 = frame-destroy LDR_ZXI $sp, 6 :: (load (s128) from %stack.15) ; EXPAND-NEXT: $p1 = frame-destroy PTRUE_B 31, implicit $vg ; EXPAND-NEXT: $p9 = frame-destroy CMPNE_PPzZI_B $p1, $z24, 0, implicit-def $nzcv, implicit-def $nzcv - ; EXPAND-NEXT: $z24 = frame-destroy LDR_ZXI $sp, 7 :: (load (s128) from %stack.13) + ; EXPAND-NEXT: $z24 = frame-destroy LDR_ZXI $sp, 7 :: (load (s128) from %stack.14) ; EXPAND-NEXT: $p1 = frame-destroy PTRUE_B 31, implicit $vg ; EXPAND-NEXT: $p8 = frame-destroy CMPNE_PPzZI_B $p1, $z24, 0, implicit-def $nzcv, implicit-def $nzcv - ; EXPAND-NEXT: $z24 = frame-destroy LDR_ZXI $sp, 8 :: (load (s128) from %stack.12) + ; EXPAND-NEXT: $z24 = frame-destroy LDR_ZXI $sp, 8 :: (load (s128) from %stack.13) ; EXPAND-NEXT: $p7 = frame-destroy PTRUE_B 31, implicit $vg ; EXPAND-NEXT: $p7 = frame-destroy CMPNE_PPzZI_B $p7, $z24, 0, implicit-def $nzcv, implicit-def $nzcv - ; EXPAND-NEXT: $z24 = frame-destroy LDR_ZXI $sp, 9 :: (load (s128) from %stack.11) + ; EXPAND-NEXT: $z24 = frame-destroy LDR_ZXI $sp, 9 :: (load (s128) from %stack.12) ; EXPAND-NEXT: $p6 = frame-destroy PTRUE_B 31, implicit $vg ; EXPAND-NEXT: $p6 = frame-destroy CMPNE_PPzZI_B $p6, $z24, 0, implicit-def $nzcv, implicit-def $nzcv - ; EXPAND-NEXT: $z24 = frame-destroy LDR_ZXI $sp, 10 :: (load (s128) from %stack.10) + ; EXPAND-NEXT: $z24 = frame-destroy LDR_ZXI $sp, 10 :: (load (s128) from %stack.11) ; EXPAND-NEXT: $p5 = frame-destroy PTRUE_B 31, implicit $vg ; EXPAND-NEXT: $p5 = frame-destroy CMPNE_PPzZI_B $p5, $z24, 0, implicit-def $nzcv, implicit-def $nzcv - ; EXPAND-NEXT: $z24 = frame-destroy LDR_ZXI $sp, 11 :: (load (s128) from %stack.9) + ; EXPAND-NEXT: $z24 = frame-destroy LDR_ZXI $sp, 11 :: (load (s128) from %stack.10) ; EXPAND-NEXT: $p4 = frame-destroy PTRUE_B 31, implicit $vg ; EXPAND-NEXT: $p4 = frame-destroy CMPNE_PPzZI_B $p4, $z24, 0, implicit-def $nzcv, implicit-def $nzcv ; EXPAND-NEXT: $sp = frame-destroy ADDVL_XXI $sp, 20, implicit $vg - ; EXPAND-NEXT: early-clobber $sp, $fp = frame-destroy LDRXpost $sp, 16 :: (load (s64) from %stack.21) + ; EXPAND-NEXT: $fp = frame-destroy LDRXui $sp, 128 :: (load (s64) from %stack.22) + ; EXPAND-NEXT: $sp = frame-destroy ADDXri $sp, 1040, 0 ; EXPAND-NEXT: RET undef $lr, implicit $p0, implicit $z0, implicit $z1, implicit $z2, implicit $z3, implicit $z4, implicit $z5, implicit $z6, implicit $z7 $z16 = IMPLICIT_DEF $z17 = IMPLICIT_DEF @@ -822,7 +842,6 @@ body: | liveins: $p0, $p1, $p2, $p3 ; CHECK-LABEL: name: zpr_predicate_spill_above_p7 - ; CHECK: stack: ; CHECK: - { id: 0, name: '', type: spill-slot, offset: 0, size: 16, alignment: 16, ; CHECK-NEXT: stack-id: scalable-vector, callee-saved-register: ; CHECK: liveins: $p0, $p1, $p2, $p3 @@ -852,44 +871,48 @@ body: | ; CHECK-NEXT: $p15 = FILL_PPR_FROM_ZPR_SLOT_PSEUDO %stack.0, 0 :: (load (s128) from %stack.0) ; ; CHECK-NEXT: FAKE_USE implicit $p4, implicit $p5, implicit $p6, implicit $p7 + ; ; CHECK-NEXT: RET_ReallyLR implicit $p0, implicit $p1, implicit $p2, implicit $p3 ; EXPAND-LABEL: name: zpr_predicate_spill_above_p7 ; EXPAND: liveins: $p0, $p1, $p2, $p3, $fp, $p15, $p14, $p13, $p12, $p11, $p10, $p9, $p8, $p7, $p6, $p5, $p4 ; EXPAND-NEXT: {{ $}} ; - ; EXPAND-NEXT: early-clobber $sp = frame-setup STRXpre killed $fp, $sp, -16 :: (store (s64) into %stack.13) + ; EXPAND-NEXT: $sp = frame-setup SUBXri $sp, 1040, 0 + ; EXPAND-NEXT: frame-setup STRXui killed $fp, $sp, 128 :: (store (s64) into %stack.14) ; EXPAND-NEXT: $sp = frame-setup ADDVL_XXI $sp, -12, implicit $vg ; EXPAND-NEXT: $z0 = frame-setup CPY_ZPzI_B killed $p15, 1, 0 - ; EXPAND-NEXT: frame-setup STR_ZXI $z0, $sp, 0 :: (store (s128) into %stack.12) + ; EXPAND-NEXT: frame-setup STR_ZXI $z0, $sp, 0 :: (store (s128) into %stack.13) ; EXPAND-NEXT: $z0 = frame-setup CPY_ZPzI_B killed $p14, 1, 0 - ; EXPAND-NEXT: frame-setup STR_ZXI $z0, $sp, 1 :: (store (s128) into %stack.11) + ; EXPAND-NEXT: frame-setup STR_ZXI $z0, $sp, 1 :: (store (s128) into %stack.12) ; EXPAND-NEXT: $z0 = frame-setup CPY_ZPzI_B killed $p13, 1, 0 - ; EXPAND-NEXT: frame-setup STR_ZXI $z0, $sp, 2 :: (store (s128) into %stack.10) + ; EXPAND-NEXT: frame-setup STR_ZXI $z0, $sp, 2 :: (store (s128) into %stack.11) ; EXPAND-NEXT: $z0 = frame-setup CPY_ZPzI_B killed $p12, 1, 0 - ; EXPAND-NEXT: frame-setup STR_ZXI $z0, $sp, 3 :: (store (s128) into %stack.9) + ; EXPAND-NEXT: frame-setup STR_ZXI $z0, $sp, 3 :: (store (s128) into %stack.10) ; EXPAND-NEXT: $z0 = frame-setup CPY_ZPzI_B killed $p11, 1, 0 - ; EXPAND-NEXT: frame-setup STR_ZXI $z0, $sp, 4 :: (store (s128) into %stack.8) + ; EXPAND-NEXT: frame-setup STR_ZXI $z0, $sp, 4 :: (store (s128) into %stack.9) ; EXPAND-NEXT: $z0 = frame-setup CPY_ZPzI_B killed $p10, 1, 0 - ; EXPAND-NEXT: frame-setup STR_ZXI $z0, $sp, 5 :: (store (s128) into %stack.7) + ; EXPAND-NEXT: frame-setup STR_ZXI $z0, $sp, 5 :: (store (s128) into %stack.8) ; EXPAND-NEXT: $z0 = frame-setup CPY_ZPzI_B killed $p9, 1, 0 - ; EXPAND-NEXT: frame-setup STR_ZXI $z0, $sp, 6 :: (store (s128) into %stack.6) + ; EXPAND-NEXT: frame-setup STR_ZXI $z0, $sp, 6 :: (store (s128) into %stack.7) ; EXPAND-NEXT: $z0 = frame-setup CPY_ZPzI_B killed $p8, 1, 0 - ; EXPAND-NEXT: frame-setup STR_ZXI $z0, $sp, 7 :: (store (s128) into %stack.5) + ; EXPAND-NEXT: frame-setup STR_ZXI $z0, $sp, 7 :: (store (s128) into %stack.6) ; EXPAND-NEXT: $z0 = frame-setup CPY_ZPzI_B killed $p7, 1, 0 - ; EXPAND-NEXT: frame-setup STR_ZXI $z0, $sp, 8 :: (store (s128) into %stack.4) + ; EXPAND-NEXT: frame-setup STR_ZXI $z0, $sp, 8 :: (store (s128) into %stack.5) ; EXPAND-NEXT: $z0 = frame-setup CPY_ZPzI_B killed $p6, 1, 0 - ; EXPAND-NEXT: frame-setup STR_ZXI $z0, $sp, 9 :: (store (s128) into %stack.3) + ; EXPAND-NEXT: frame-setup STR_ZXI $z0, $sp, 9 :: (store (s128) into %stack.4) ; EXPAND-NEXT: $z0 = frame-setup CPY_ZPzI_B killed $p5, 1, 0 - ; EXPAND-NEXT: frame-setup STR_ZXI $z0, $sp, 10 :: (store (s128) into %stack.2) + ; EXPAND-NEXT: frame-setup STR_ZXI $z0, $sp, 10 :: (store (s128) into %stack.3) ; EXPAND-NEXT: $z0 = frame-setup CPY_ZPzI_B killed $p4, 1, 0 - ; EXPAND-NEXT: frame-setup STR_ZXI $z0, $sp, 11 :: (store (s128) into %stack.1) + ; EXPAND-NEXT: frame-setup STR_ZXI $z0, $sp, 11 :: (store (s128) into %stack.2) + ; EXPAND-NEXT: $sp = frame-setup SUBXri $sp, 1024, 0 ; EXPAND-NEXT: $sp = frame-setup ADDVL_XXI $sp, -2, implicit $vg ; ; EXPAND-NEXT: $p15 = IMPLICIT_DEF ; ; EXPAND-NEXT: $z0 = CPY_ZPzI_B $p15, 1, 0 - ; EXPAND-NEXT: STR_ZXI $z0, $sp, 1 :: (store (s128) into %stack.0) + ; EXPAND-NEXT: $x8 = ADDXri $sp, 1024, 0 + ; EXPAND-NEXT: STR_ZXI $z0, $x8, 1 :: (store (s128) into %stack.0) ; ; EXPAND-NEXT: $p0 = IMPLICIT_DEF ; EXPAND-NEXT: $p1 = IMPLICIT_DEF @@ -909,54 +932,57 @@ body: | ; EXPAND-NEXT: $p15 = IMPLICIT_DEF ; ; EXPAND-NEXT: $z0 = CPY_ZPzI_B $p0, 1, 0 - ; EXPAND-NEXT: STR_ZXI $z0, $sp, 0 :: (store (s128) into %stack.14) - ; EXPAND-NEXT: $z0 = LDR_ZXI $sp, 1 :: (load (s128) from %stack.0) + ; EXPAND-NEXT: STR_ZXI $z0, $x8, 0 :: (store (s128) into %stack.16) + ; EXPAND-NEXT: $z0 = LDR_ZXI $x8, 1 :: (load (s128) from %stack.0) ; EXPAND-NEXT: $p0 = PTRUE_B 31, implicit $vg ; EXPAND-NEXT: $p15 = CMPNE_PPzZI_B $p0, $z0, 0, implicit-def $nzcv, implicit-def $nzcv - ; EXPAND-NEXT: $z0 = LDR_ZXI $sp, 0 :: (load (s128) from %stack.14) + ; EXPAND-NEXT: $z0 = LDR_ZXI killed $x8, 0 :: (load (s128) from %stack.16) ; EXPAND-NEXT: $p0 = PTRUE_B 31, implicit $vg ; EXPAND-NEXT: $p0 = CMPNE_PPzZI_B $p0, $z0, 0, implicit-def $nzcv, implicit-def $nzcv ; ; EXPAND-NEXT: FAKE_USE implicit $p4, implicit $p5, implicit $p6, implicit $p7 + ; + ; EXPAND-NEXT: $sp = frame-destroy ADDXri $sp, 1024, 0 ; EXPAND-NEXT: $sp = frame-destroy ADDVL_XXI $sp, 2, implicit $vg - ; EXPAND-NEXT: $z0 = frame-destroy LDR_ZXI $sp, 0 :: (load (s128) from %stack.12) + ; EXPAND-NEXT: $z0 = frame-destroy LDR_ZXI $sp, 0 :: (load (s128) from %stack.13) ; EXPAND-NEXT: $p4 = frame-destroy PTRUE_B 31, implicit $vg ; EXPAND-NEXT: $p15 = frame-destroy CMPNE_PPzZI_B $p4, $z0, 0, implicit-def $nzcv, implicit-def $nzcv - ; EXPAND-NEXT: $z0 = frame-destroy LDR_ZXI $sp, 1 :: (load (s128) from %stack.11) + ; EXPAND-NEXT: $z0 = frame-destroy LDR_ZXI $sp, 1 :: (load (s128) from %stack.12) ; EXPAND-NEXT: $p4 = frame-destroy PTRUE_B 31, implicit $vg ; EXPAND-NEXT: $p14 = frame-destroy CMPNE_PPzZI_B $p4, $z0, 0, implicit-def $nzcv, implicit-def $nzcv - ; EXPAND-NEXT: $z0 = frame-destroy LDR_ZXI $sp, 2 :: (load (s128) from %stack.10) + ; EXPAND-NEXT: $z0 = frame-destroy LDR_ZXI $sp, 2 :: (load (s128) from %stack.11) ; EXPAND-NEXT: $p4 = frame-destroy PTRUE_B 31, implicit $vg ; EXPAND-NEXT: $p13 = frame-destroy CMPNE_PPzZI_B $p4, $z0, 0, implicit-def $nzcv, implicit-def $nzcv - ; EXPAND-NEXT: $z0 = frame-destroy LDR_ZXI $sp, 3 :: (load (s128) from %stack.9) + ; EXPAND-NEXT: $z0 = frame-destroy LDR_ZXI $sp, 3 :: (load (s128) from %stack.10) ; EXPAND-NEXT: $p4 = frame-destroy PTRUE_B 31, implicit $vg ; EXPAND-NEXT: $p12 = frame-destroy CMPNE_PPzZI_B $p4, $z0, 0, implicit-def $nzcv, implicit-def $nzcv - ; EXPAND-NEXT: $z0 = frame-destroy LDR_ZXI $sp, 4 :: (load (s128) from %stack.8) + ; EXPAND-NEXT: $z0 = frame-destroy LDR_ZXI $sp, 4 :: (load (s128) from %stack.9) ; EXPAND-NEXT: $p4 = frame-destroy PTRUE_B 31, implicit $vg ; EXPAND-NEXT: $p11 = frame-destroy CMPNE_PPzZI_B $p4, $z0, 0, implicit-def $nzcv, implicit-def $nzcv - ; EXPAND-NEXT: $z0 = frame-destroy LDR_ZXI $sp, 5 :: (load (s128) from %stack.7) + ; EXPAND-NEXT: $z0 = frame-destroy LDR_ZXI $sp, 5 :: (load (s128) from %stack.8) ; EXPAND-NEXT: $p4 = frame-destroy PTRUE_B 31, implicit $vg ; EXPAND-NEXT: $p10 = frame-destroy CMPNE_PPzZI_B $p4, $z0, 0, implicit-def $nzcv, implicit-def $nzcv - ; EXPAND-NEXT: $z0 = frame-destroy LDR_ZXI $sp, 6 :: (load (s128) from %stack.6) + ; EXPAND-NEXT: $z0 = frame-destroy LDR_ZXI $sp, 6 :: (load (s128) from %stack.7) ; EXPAND-NEXT: $p4 = frame-destroy PTRUE_B 31, implicit $vg ; EXPAND-NEXT: $p9 = frame-destroy CMPNE_PPzZI_B $p4, $z0, 0, implicit-def $nzcv, implicit-def $nzcv - ; EXPAND-NEXT: $z0 = frame-destroy LDR_ZXI $sp, 7 :: (load (s128) from %stack.5) + ; EXPAND-NEXT: $z0 = frame-destroy LDR_ZXI $sp, 7 :: (load (s128) from %stack.6) ; EXPAND-NEXT: $p4 = frame-destroy PTRUE_B 31, implicit $vg ; EXPAND-NEXT: $p8 = frame-destroy CMPNE_PPzZI_B $p4, $z0, 0, implicit-def $nzcv, implicit-def $nzcv - ; EXPAND-NEXT: $z0 = frame-destroy LDR_ZXI $sp, 8 :: (load (s128) from %stack.4) + ; EXPAND-NEXT: $z0 = frame-destroy LDR_ZXI $sp, 8 :: (load (s128) from %stack.5) ; EXPAND-NEXT: $p7 = frame-destroy PTRUE_B 31, implicit $vg ; EXPAND-NEXT: $p7 = frame-destroy CMPNE_PPzZI_B $p7, $z0, 0, implicit-def $nzcv, implicit-def $nzcv - ; EXPAND-NEXT: $z0 = frame-destroy LDR_ZXI $sp, 9 :: (load (s128) from %stack.3) + ; EXPAND-NEXT: $z0 = frame-destroy LDR_ZXI $sp, 9 :: (load (s128) from %stack.4) ; EXPAND-NEXT: $p6 = frame-destroy PTRUE_B 31, implicit $vg ; EXPAND-NEXT: $p6 = frame-destroy CMPNE_PPzZI_B $p6, $z0, 0, implicit-def $nzcv, implicit-def $nzcv - ; EXPAND-NEXT: $z0 = frame-destroy LDR_ZXI $sp, 10 :: (load (s128) from %stack.2) + ; EXPAND-NEXT: $z0 = frame-destroy LDR_ZXI $sp, 10 :: (load (s128) from %stack.3) ; EXPAND-NEXT: $p5 = frame-destroy PTRUE_B 31, implicit $vg ; EXPAND-NEXT: $p5 = frame-destroy CMPNE_PPzZI_B $p5, $z0, 0, implicit-def $nzcv, implicit-def $nzcv - ; EXPAND-NEXT: $z0 = frame-destroy LDR_ZXI $sp, 11 :: (load (s128) from %stack.1) + ; EXPAND-NEXT: $z0 = frame-destroy LDR_ZXI $sp, 11 :: (load (s128) from %stack.2) ; EXPAND-NEXT: $p4 = frame-destroy PTRUE_B 31, implicit $vg ; EXPAND-NEXT: $p4 = frame-destroy CMPNE_PPzZI_B $p4, $z0, 0, implicit-def $nzcv, implicit-def $nzcv ; EXPAND-NEXT: $sp = frame-destroy ADDVL_XXI $sp, 12, implicit $vg - ; EXPAND-NEXT: early-clobber $sp, $fp = frame-destroy LDRXpost $sp, 16 :: (load (s64) from %stack.13) + ; EXPAND-NEXT: $fp = frame-destroy LDRXui $sp, 128 :: (load (s64) from %stack.14) + ; EXPAND-NEXT: $sp = frame-destroy ADDXri $sp, 1040, 0 ; EXPAND-NEXT: RET undef $lr, implicit $p0, implicit $p1, implicit $p2, implicit $p3 $p15 = IMPLICIT_DEF %1:ppr = COPY $p15 @@ -1000,31 +1026,33 @@ body: | ; CHECK-LABEL: name: zpr_predicate_spill_p4_saved ; CHECK: liveins: $p0, $p1, $p2, $p3 ; CHECK-NEXT: {{ $}} - ; ; CHECK-NEXT: $p8 = IMPLICIT_DEF - ; ; CHECK-NEXT: RET_ReallyLR implicit $p0, implicit $p1, implicit $p2, implicit $p3 - + ; ; EXPAND-LABEL: name: zpr_predicate_spill_p4_saved ; EXPAND: liveins: $p0, $p1, $p2, $p3, $fp, $p8, $p4 ; EXPAND-NEXT: {{ $}} - ; EXPAND-NEXT: early-clobber $sp = frame-setup STRXpre killed $fp, $sp, -16 :: (store (s64) into %stack.2) + ; EXPAND-NEXT: $sp = frame-setup SUBXri $sp, 1040, 0 + ; EXPAND-NEXT: frame-setup STRXui killed $fp, $sp, 128 :: (store (s64) into %stack.3) ; EXPAND-NEXT: $sp = frame-setup ADDVL_XXI $sp, -2, implicit $vg ; EXPAND-NEXT: $z0 = frame-setup CPY_ZPzI_B killed $p8, 1, 0 - ; EXPAND-NEXT: frame-setup STR_ZXI $z0, $sp, 0 :: (store (s128) into %stack.1) + ; EXPAND-NEXT: frame-setup STR_ZXI $z0, $sp, 0 :: (store (s128) into %stack.2) ; EXPAND-NEXT: $z0 = frame-setup CPY_ZPzI_B killed $p4, 1, 0 - ; EXPAND-NEXT: frame-setup STR_ZXI $z0, $sp, 1 :: (store (s128) into %stack.0) + ; EXPAND-NEXT: frame-setup STR_ZXI $z0, $sp, 1 :: (store (s128) into %stack.1) + ; EXPAND-NEXT: $sp = frame-setup SUBXri $sp, 1024, 0 ; ; EXPAND-NEXT: $p8 = IMPLICIT_DEF ; - ; EXPAND-NEXT: $z0 = frame-destroy LDR_ZXI $sp, 0 :: (load (s128) from %stack.1) + ; EXPAND-NEXT: $sp = frame-destroy ADDXri $sp, 1024, 0 + ; EXPAND-NEXT: $z0 = frame-destroy LDR_ZXI $sp, 0 :: (load (s128) from %stack.2) ; EXPAND-NEXT: $p4 = frame-destroy PTRUE_B 31, implicit $vg ; EXPAND-NEXT: $p8 = frame-destroy CMPNE_PPzZI_B $p4, $z0, 0, implicit-def $nzcv, implicit-def $nzcv - ; EXPAND-NEXT: $z0 = frame-destroy LDR_ZXI $sp, 1 :: (load (s128) from %stack.0) + ; EXPAND-NEXT: $z0 = frame-destroy LDR_ZXI $sp, 1 :: (load (s128) from %stack.1) ; EXPAND-NEXT: $p4 = frame-destroy PTRUE_B 31, implicit $vg ; EXPAND-NEXT: $p4 = frame-destroy CMPNE_PPzZI_B $p4, $z0, 0, implicit-def $nzcv, implicit-def $nzcv ; EXPAND-NEXT: $sp = frame-destroy ADDVL_XXI $sp, 2, implicit $vg - ; EXPAND-NEXT: early-clobber $sp, $fp = frame-destroy LDRXpost $sp, 16 :: (load (s64) from %stack.2) + ; EXPAND-NEXT: $fp = frame-destroy LDRXui $sp, 128 :: (load (s64) from %stack.3) + ; EXPAND-NEXT: $sp = frame-destroy ADDXri $sp, 1040, 0 ; EXPAND-NEXT: RET undef $lr, implicit $p0, implicit $p1, implicit $p2, implicit $p3 ; If we spill a register above p8, p4 must also be saved, so we can guarantee From 636ddf68d25aa10291a37c5eb81008d5a2f30efd Mon Sep 17 00:00:00 2001 From: Benjamin Maxwell Date: Fri, 24 Jan 2025 17:16:14 +0000 Subject: [PATCH 3/8] Fixups --- .../Target/AArch64/AArch64FrameLowering.cpp | 168 ++++++++++-------- llvm/lib/Target/AArch64/AArch64Subtarget.cpp | 2 +- .../AArch64/spill-fill-zpr-predicates.mir | 111 ++++++------ 3 files changed, 149 insertions(+), 132 deletions(-) diff --git a/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp b/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp index 1fb8fc759b3f7..74bc0e7cae0ca 100644 --- a/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp @@ -4212,20 +4212,22 @@ struct ScopedScavengeOrSpill { Register SpillCandidate, const TargetRegisterClass &RC, LiveRegUnits const &UsedRegs, BitVector const &AllocatableRegs, - std::optional &MaybeSpillFI) + std::optional *MaybeSpillFI) : MBB(MBB), MBBI(MBBI), RC(RC), TII(static_cast( *MF.getSubtarget().getInstrInfo())), TRI(*MF.getSubtarget().getRegisterInfo()) { FreeReg = tryScavengeRegister(UsedRegs, AllocatableRegs); if (FreeReg != AArch64::NoRegister) return; - if (!MaybeSpillFI) { + assert(MaybeSpillFI && "Expected emergency spill slot FI information " + "(attempted to spill in prologue/epilogue?)"); + if (!MaybeSpillFI->has_value()) { MachineFrameInfo &MFI = MF.getFrameInfo(); - MaybeSpillFI = MFI.CreateSpillStackObject(TRI.getSpillSize(RC), - TRI.getSpillAlign(RC)); + *MaybeSpillFI = MFI.CreateSpillStackObject(TRI.getSpillSize(RC), + TRI.getSpillAlign(RC)); } FreeReg = SpilledReg = SpillCandidate; - SpillFI = *MaybeSpillFI; + SpillFI = MaybeSpillFI->value(); TII.storeRegToStackSlot(MBB, MBBI, SpilledReg, false, SpillFI, &RC, &TRI, Register()); } @@ -4256,6 +4258,18 @@ struct EmergencyStackSlots { std::optional GPRSpillFI; }; +/// Registers available for scavenging (ZPR, PPR3b, GPR). +struct ScavengeableRegs { + BitVector ZPRRegs; + BitVector PPR3bRegs; + BitVector GPRRegs; +}; + +static bool isInPrologueOrEpilogue(const MachineInstr &MI) { + return MI.getFlag(MachineInstr::FrameSetup) || + MI.getFlag(MachineInstr::FrameDestroy); +} + /// Expands: /// ``` /// SPILL_PPR_TO_ZPR_SLOT_PSEUDO $p0, %stack.0, 0 @@ -4271,24 +4285,17 @@ static void expandSpillPPRToZPRSlotPseudo(MachineBasicBlock &MBB, MachineInstr &MI, const TargetRegisterInfo &TRI, LiveRegUnits const &UsedRegs, - BitVector const &ZPRRegs, + ScavengeableRegs const &Regs, EmergencyStackSlots &SpillSlots) { MachineFunction &MF = *MBB.getParent(); auto *TII = static_cast(MF.getSubtarget().getInstrInfo()); Register ZPredReg = AArch64::NoRegister; - ScopedScavengeOrSpill FindZPRReg(MF, MBB, MachineBasicBlock::iterator(MI), - ZPredReg, AArch64::Z0, AArch64::ZPRRegClass, - UsedRegs, ZPRRegs, SpillSlots.ZPRSpillFI); - -#ifndef NDEBUG - bool InPrologueOrEpilogue = MI.getFlag(MachineInstr::FrameSetup) || - MI.getFlag(MachineInstr::FrameDestroy); - assert((!FindZPRReg.hasSpilled() || !InPrologueOrEpilogue) && - "SPILL_PPR_TO_ZPR_SLOT_PSEUDO expansion should not spill in prologue " - "or epilogue"); -#endif + ScopedScavengeOrSpill FindZPRReg( + MF, MBB, MachineBasicBlock::iterator(MI), ZPredReg, AArch64::Z0, + AArch64::ZPRRegClass, UsedRegs, Regs.ZPRRegs, + isInPrologueOrEpilogue(MI) ? nullptr : &SpillSlots.ZPRSpillFI); SmallVector MachineInstrs; const DebugLoc &DL = MI.getDebugLoc(); @@ -4321,44 +4328,37 @@ static void expandSpillPPRToZPRSlotPseudo(MachineBasicBlock &MBB, /// spilling if necessary). If the status flags are in use at the point of /// expansion they are preserved (by moving them to/from a GPR). This may cause /// an additional spill if no GPR is free at the expansion point. -static bool expandFillPPRFromZPRSlotPseudo( - MachineBasicBlock &MBB, MachineInstr &MI, const TargetRegisterInfo &TRI, - LiveRegUnits const &UsedRegs, BitVector const &ZPRRegs, - BitVector const &PPR3bRegs, BitVector const &GPRRegs, - EmergencyStackSlots &SpillSlots) { +static bool expandFillPPRFromZPRSlotPseudo(MachineBasicBlock &MBB, + MachineInstr &MI, + const TargetRegisterInfo &TRI, + LiveRegUnits const &UsedRegs, + ScavengeableRegs const &Regs, + EmergencyStackSlots &SpillSlots) { MachineFunction &MF = *MBB.getParent(); auto *TII = static_cast(MF.getSubtarget().getInstrInfo()); Register ZPredReg = AArch64::NoRegister; - ScopedScavengeOrSpill FindZPRReg(MF, MBB, MachineBasicBlock::iterator(MI), - ZPredReg, AArch64::Z0, AArch64::ZPRRegClass, - UsedRegs, ZPRRegs, SpillSlots.ZPRSpillFI); + ScopedScavengeOrSpill FindZPRReg( + MF, MBB, MachineBasicBlock::iterator(MI), ZPredReg, AArch64::Z0, + AArch64::ZPRRegClass, UsedRegs, Regs.ZPRRegs, + isInPrologueOrEpilogue(MI) ? nullptr : &SpillSlots.ZPRSpillFI); Register PredReg = AArch64::NoRegister; ScopedScavengeOrSpill FindPPR3bReg( MF, MBB, MachineBasicBlock::iterator(MI), PredReg, AArch64::P0, - AArch64::PPR_3bRegClass, UsedRegs, PPR3bRegs, SpillSlots.PPRSpillFI); + AArch64::PPR_3bRegClass, UsedRegs, Regs.PPR3bRegs, + isInPrologueOrEpilogue(MI) ? nullptr : &SpillSlots.PPRSpillFI); // Elide NZCV spills if we know it is not used. Register NZCVSaveReg = AArch64::NoRegister; bool IsNZCVUsed = !UsedRegs.available(AArch64::NZCV); std::optional FindGPRReg; if (IsNZCVUsed) - FindGPRReg.emplace(MF, MBB, MachineBasicBlock::iterator(MI), NZCVSaveReg, - AArch64::X0, AArch64::GPR64RegClass, UsedRegs, GPRRegs, - SpillSlots.GPRSpillFI); - -#ifndef NDEBUG - bool Spilled = FindZPRReg.hasSpilled() || FindPPR3bReg.hasSpilled() || - (FindGPRReg && FindGPRReg->hasSpilled()); - bool InPrologueOrEpilogue = MI.getFlag(MachineInstr::FrameSetup) || - MI.getFlag(MachineInstr::FrameDestroy); - assert((!Spilled || !InPrologueOrEpilogue) && - "FILL_PPR_FROM_ZPR_SLOT_PSEUDO expansion should not spill in prologue " - "or epilogue"); -#endif - + FindGPRReg.emplace( + MF, MBB, MachineBasicBlock::iterator(MI), NZCVSaveReg, AArch64::X0, + AArch64::GPR64RegClass, UsedRegs, Regs.GPRRegs, + isInPrologueOrEpilogue(MI) ? nullptr : &SpillSlots.GPRSpillFI); SmallVector MachineInstrs; const DebugLoc &DL = MI.getDebugLoc(); MachineInstrs.push_back(BuildMI(MBB, MI, DL, TII->get(AArch64::LDR_ZXI)) @@ -4397,26 +4397,27 @@ static bool expandFillPPRFromZPRSlotPseudo( /// Expands all FILL_PPR_FROM_ZPR_SLOT_PSEUDO and SPILL_PPR_TO_ZPR_SLOT_PSEUDO /// operations within the MachineBasicBlock \p MBB. -static bool expandSMEPPRToZPRSpillPseudos(MachineBasicBlock &MBB, - const TargetRegisterInfo &TRI, - BitVector const &ZPRRegs, - BitVector const &PPR3bRegs, - BitVector const &GPRRegs, - EmergencyStackSlots &SpillSlots) { +static bool expandSMEPPRToZPRSpillPseudos( + MachineBasicBlock &MBB, const TargetRegisterInfo &TRI, + ScavengeableRegs const &ScavengeableRegsBody, + ScavengeableRegs const &ScavengeableRegsFrameSetup, + EmergencyStackSlots &SpillSlots) { LiveRegUnits UsedRegs(TRI); UsedRegs.addLiveOuts(MBB); bool HasPPRSpills = false; for (MachineInstr &MI : make_early_inc_range(reverse(MBB))) { UsedRegs.stepBackward(MI); + ScavengeableRegs const &Regs = isInPrologueOrEpilogue(MI) + ? ScavengeableRegsFrameSetup + : ScavengeableRegsBody; switch (MI.getOpcode()) { case AArch64::FILL_PPR_FROM_ZPR_SLOT_PSEUDO: - HasPPRSpills |= expandFillPPRFromZPRSlotPseudo( - MBB, MI, TRI, UsedRegs, ZPRRegs, PPR3bRegs, GPRRegs, SpillSlots); + HasPPRSpills |= expandFillPPRFromZPRSlotPseudo(MBB, MI, TRI, UsedRegs, + Regs, SpillSlots); MI.eraseFromParent(); break; case AArch64::SPILL_PPR_TO_ZPR_SLOT_PSEUDO: - expandSpillPPRToZPRSlotPseudo(MBB, MI, TRI, UsedRegs, ZPRRegs, - SpillSlots); + expandSpillPPRToZPRSlotPseudo(MBB, MI, TRI, UsedRegs, Regs, SpillSlots); MI.eraseFromParent(); break; default: @@ -4434,40 +4435,47 @@ void AArch64FrameLowering::processFunctionBeforeFrameFinalized( const TargetSubtargetInfo &TSI = MF.getSubtarget(); const TargetRegisterInfo &TRI = *TSI.getRegisterInfo(); if (AFI->hasStackFrame() && TRI.getSpillSize(AArch64::PPRRegClass) == 16) { - const uint32_t *CSRMask = - TRI.getCallPreservedMask(MF, MF.getFunction().getCallingConv()); + // If predicates spills are 16-bytes we may need to expand + // SPILL_PPR_TO_ZPR_SLOT_PSEUDO/FILL_PPR_FROM_ZPR_SLOT_PSEUDO. + const MachineFrameInfo &MFI = MF.getFrameInfo(); assert(MFI.isCalleeSavedInfoValid()); + const std::vector &CSI = MFI.getCalleeSavedInfo(); auto ComputeScavengeableRegisters = [&](unsigned RegClassID) { - BitVector ScavengeableRegs = - TRI.getAllocatableSet(MF, TRI.getRegClass(RegClassID)); - if (CSRMask) - ScavengeableRegs.clearBitsInMask(CSRMask); - // TODO: Allow reusing callee-saved registers that have been saved. - assert(ScavengeableRegs.count() > 0 && "Expected scavengeable registers"); - return ScavengeableRegs; + BitVector Regs = TRI.getAllocatableSet(MF, TRI.getRegClass(RegClassID)); + + for (const CalleeSavedInfo &I : CSI) + if (TRI.getRegClass(RegClassID)->contains(I.getReg())) + Regs.set(I.getReg()); + + assert(Regs.count() > 0 && "Expected scavengeable registers"); + return Regs; }; - // If predicates spills are 16-bytes we may need to expand - // SPILL_PPR_TO_ZPR_SLOT_PSEUDO/FILL_PPR_FROM_ZPR_SLOT_PSEUDO. - // These are handled separately as we need to compute register liveness to - // scavenge a ZPR and PPR during the expansion. - BitVector ZPRRegs = ComputeScavengeableRegisters(AArch64::ZPRRegClassID); + const uint32_t *CSRMask = + TRI.getCallPreservedMask(MF, MF.getFunction().getCallingConv()); + + // Registers free to scavenge in the function body. + ScavengeableRegs ScavengeableRegsBody; + ScavengeableRegsBody.ZPRRegs = + ComputeScavengeableRegisters(AArch64::ZPRRegClassID); // Only p0-7 are possible as the second operand of cmpne (needed for fills). - BitVector PPR3bRegs = + ScavengeableRegsBody.PPR3bRegs = ComputeScavengeableRegisters(AArch64::PPR_3bRegClassID); - BitVector GPRRegs = ComputeScavengeableRegisters(AArch64::GPR64RegClassID); - - bool SpillsAboveP7 = - any_of(MFI.getCalleeSavedInfo(), [](const CalleeSavedInfo &CSI) { - return AArch64::PPR_p8to15RegClass.contains(CSI.getReg()); - }); - // We spill p4 in determineCalleeSaves() if a predicate above p8 is spilled, - // as it may be needed to reload callee saves (if p0-p3 are used as - // returns). - if (SpillsAboveP7) - PPR3bRegs.set(AArch64::P4); + ScavengeableRegsBody.GPRRegs = + ComputeScavengeableRegisters(AArch64::GPR64RegClassID); + + // Registers free to scavenge in the prologue/epilogue. + ScavengeableRegs ScavengeableRegsFrameSetup = ScavengeableRegsBody; + ScavengeableRegsFrameSetup.ZPRRegs.clearBitsInMask(CSRMask); + ScavengeableRegsFrameSetup.GPRRegs.clearBitsInMask(CSRMask); + // Note: If p4 was available allow it to be scavenged (even though it is a + // CSR). P4 is reloaded last in the epilogue and is needed to reload + // predicates >= p8 if p0-p3 are used as return values. + ScavengeableRegsFrameSetup.PPR3bRegs.clearBitsInMask(CSRMask); + if (ScavengeableRegsBody.PPR3bRegs[AArch64::P4]) + ScavengeableRegsFrameSetup.PPR3bRegs.set(AArch64::P4); EmergencyStackSlots SpillSlots; for (MachineBasicBlock &MBB : MF) { @@ -4478,7 +4486,8 @@ void AArch64FrameLowering::processFunctionBeforeFrameFinalized( // p0-p7 never requires spilling another predicate. for (int Pass = 0; Pass < 2; Pass++) { bool HasPPRSpills = expandSMEPPRToZPRSpillPseudos( - MBB, TRI, ZPRRegs, PPR3bRegs, GPRRegs, SpillSlots); + MBB, TRI, ScavengeableRegsBody, ScavengeableRegsFrameSetup, + SpillSlots); assert((Pass == 0 || !HasPPRSpills) && "Did not expect PPR spills"); if (!HasPPRSpills) break; @@ -5532,9 +5541,10 @@ void AArch64FrameLowering::emitRemarks( // spill/fill the predicate as a data vector (so are an FPR acess). if (MI.getOpcode() != AArch64::SPILL_PPR_TO_ZPR_SLOT_PSEUDO && MI.getOpcode() != AArch64::FILL_PPR_FROM_ZPR_SLOT_PSEUDO && - AArch64::PPRRegClass.contains(MI.getOperand(0).getReg())) + AArch64::PPRRegClass.contains(MI.getOperand(0).getReg())) { + MI.dump(); RegTy = StackAccess::PPR; - else + } else RegTy = StackAccess::FPR; } else if (AArch64InstrInfo::isFpOrNEON(MI)) { RegTy = StackAccess::FPR; diff --git a/llvm/lib/Target/AArch64/AArch64Subtarget.cpp b/llvm/lib/Target/AArch64/AArch64Subtarget.cpp index 34d05c6457e05..5864f57582e21 100644 --- a/llvm/lib/Target/AArch64/AArch64Subtarget.cpp +++ b/llvm/lib/Target/AArch64/AArch64Subtarget.cpp @@ -414,7 +414,7 @@ unsigned AArch64Subtarget::getHwModeSet() const { // // FIXME: This overrides the table-gen'd `getHwModeSet()` which only looks at // CPU features. - if (EnableZPRPredicateSpills.getValue() && getStreamingHazardSize() > 0 && + if (EnableZPRPredicateSpills.getValue() && (isStreaming() || isStreamingCompatible())) { Modes |= (1 << 0); } diff --git a/llvm/test/CodeGen/AArch64/spill-fill-zpr-predicates.mir b/llvm/test/CodeGen/AArch64/spill-fill-zpr-predicates.mir index 8aa957f04efc0..b58f91ac68a93 100644 --- a/llvm/test/CodeGen/AArch64/spill-fill-zpr-predicates.mir +++ b/llvm/test/CodeGen/AArch64/spill-fill-zpr-predicates.mir @@ -10,7 +10,7 @@ define aarch64_sve_vector_pcs void @zpr_predicate_spill__save_restore_nzcv() #0 { entry: unreachable } - define aarch64_sve_vector_pcs void @zpr_predicate_spill__save_restore_nzcv__spill_gpr() #0 { entry: unreachable } + define aarch64_sve_vector_pcs void @zpr_predicate_spill__save_restore_nzcv__scavenge_csr_gpr() #0 { entry: unreachable } define aarch64_sve_vector_pcs void @zpr_predicate_spill__spill_zpr() #0 { entry: unreachable } @@ -31,6 +31,7 @@ body: | liveins: $p0 ; CHECK-LABEL: name: zpr_predicate_spill + ; CHECK: stack: ; CHECK: - { id: 0, name: '', type: spill-slot, offset: 0, size: 16, alignment: 16, ; CHECK-NEXT: stack-id: scalable-vector, callee-saved-register: ; CHECK: liveins: $p0 @@ -145,17 +146,17 @@ body: | ; EXPAND-NEXT: $p1 = frame-destroy PTRUE_B 31, implicit $vg ; EXPAND-NEXT: $p8 = frame-destroy CMPNE_PPzZI_B $p1, $z0, 0, implicit-def $nzcv, implicit-def $nzcv ; EXPAND-NEXT: $z0 = frame-destroy LDR_ZXI $sp, 8 :: (load (s128) from %stack.5) - ; EXPAND-NEXT: $p7 = frame-destroy PTRUE_B 31, implicit $vg - ; EXPAND-NEXT: $p7 = frame-destroy CMPNE_PPzZI_B $p7, $z0, 0, implicit-def $nzcv, implicit-def $nzcv + ; EXPAND-NEXT: $p1 = frame-destroy PTRUE_B 31, implicit $vg + ; EXPAND-NEXT: $p7 = frame-destroy CMPNE_PPzZI_B $p1, $z0, 0, implicit-def $nzcv, implicit-def $nzcv ; EXPAND-NEXT: $z0 = frame-destroy LDR_ZXI $sp, 9 :: (load (s128) from %stack.4) - ; EXPAND-NEXT: $p6 = frame-destroy PTRUE_B 31, implicit $vg - ; EXPAND-NEXT: $p6 = frame-destroy CMPNE_PPzZI_B $p6, $z0, 0, implicit-def $nzcv, implicit-def $nzcv + ; EXPAND-NEXT: $p1 = frame-destroy PTRUE_B 31, implicit $vg + ; EXPAND-NEXT: $p6 = frame-destroy CMPNE_PPzZI_B $p1, $z0, 0, implicit-def $nzcv, implicit-def $nzcv ; EXPAND-NEXT: $z0 = frame-destroy LDR_ZXI $sp, 10 :: (load (s128) from %stack.3) - ; EXPAND-NEXT: $p5 = frame-destroy PTRUE_B 31, implicit $vg - ; EXPAND-NEXT: $p5 = frame-destroy CMPNE_PPzZI_B $p5, $z0, 0, implicit-def $nzcv, implicit-def $nzcv + ; EXPAND-NEXT: $p1 = frame-destroy PTRUE_B 31, implicit $vg + ; EXPAND-NEXT: $p5 = frame-destroy CMPNE_PPzZI_B $p1, $z0, 0, implicit-def $nzcv, implicit-def $nzcv ; EXPAND-NEXT: $z0 = frame-destroy LDR_ZXI $sp, 11 :: (load (s128) from %stack.2) - ; EXPAND-NEXT: $p4 = frame-destroy PTRUE_B 31, implicit $vg - ; EXPAND-NEXT: $p4 = frame-destroy CMPNE_PPzZI_B $p4, $z0, 0, implicit-def $nzcv, implicit-def $nzcv + ; EXPAND-NEXT: $p1 = frame-destroy PTRUE_B 31, implicit $vg + ; EXPAND-NEXT: $p4 = frame-destroy CMPNE_PPzZI_B $p1, $z0, 0, implicit-def $nzcv, implicit-def $nzcv ; EXPAND-NEXT: $sp = frame-destroy ADDVL_XXI $sp, 12, implicit $vg ; EXPAND-NEXT: $fp = frame-destroy LDRXui $sp, 128 :: (load (s64) from %stack.14) ; EXPAND-NEXT: $sp = frame-destroy ADDXri $sp, 1040, 0 @@ -194,6 +195,7 @@ body: | liveins: $p0 ; CHECK-LABEL: name: zpr_predicate_spill__save_restore_nzcv + ; CHECK: stack: ; CHECK: - { id: 0, name: '', type: spill-slot, offset: 0, size: 16, alignment: 16, ; CHECK-NEXT: stack-id: scalable-vector, callee-saved-register: ; CHECK: liveins: $p0 @@ -284,10 +286,10 @@ body: | ; EXPAND-NEXT: $p15 = IMPLICIT_DEF ; ; EXPAND-NEXT: $z0 = LDR_ZXI killed $x8, 0 :: (load (s128) from %stack.0) - ; EXPAND-NEXT: $x0 = MRS 55824, implicit-def $nzcv, implicit $nzcv + ; EXPAND-NEXT: $fp = MRS 55824, implicit-def $nzcv, implicit $nzcv ; EXPAND-NEXT: $p0 = PTRUE_B 31, implicit $vg ; EXPAND-NEXT: $p0 = CMPNE_PPzZI_B $p0, $z0, 0, implicit-def $nzcv, implicit-def $nzcv - ; EXPAND-NEXT: MSR 55824, $x0, implicit-def $nzcv + ; EXPAND-NEXT: MSR 55824, $fp, implicit-def $nzcv ; ; EXPAND-NEXT: FAKE_USE implicit $nzcv ; @@ -318,17 +320,17 @@ body: | ; EXPAND-NEXT: $p1 = frame-destroy PTRUE_B 31, implicit $vg ; EXPAND-NEXT: $p8 = frame-destroy CMPNE_PPzZI_B $p1, $z0, 0, implicit-def $nzcv, implicit-def $nzcv ; EXPAND-NEXT: $z0 = frame-destroy LDR_ZXI $sp, 8 :: (load (s128) from %stack.5) - ; EXPAND-NEXT: $p7 = frame-destroy PTRUE_B 31, implicit $vg - ; EXPAND-NEXT: $p7 = frame-destroy CMPNE_PPzZI_B $p7, $z0, 0, implicit-def $nzcv, implicit-def $nzcv + ; EXPAND-NEXT: $p1 = frame-destroy PTRUE_B 31, implicit $vg + ; EXPAND-NEXT: $p7 = frame-destroy CMPNE_PPzZI_B $p1, $z0, 0, implicit-def $nzcv, implicit-def $nzcv ; EXPAND-NEXT: $z0 = frame-destroy LDR_ZXI $sp, 9 :: (load (s128) from %stack.4) - ; EXPAND-NEXT: $p6 = frame-destroy PTRUE_B 31, implicit $vg - ; EXPAND-NEXT: $p6 = frame-destroy CMPNE_PPzZI_B $p6, $z0, 0, implicit-def $nzcv, implicit-def $nzcv + ; EXPAND-NEXT: $p1 = frame-destroy PTRUE_B 31, implicit $vg + ; EXPAND-NEXT: $p6 = frame-destroy CMPNE_PPzZI_B $p1, $z0, 0, implicit-def $nzcv, implicit-def $nzcv ; EXPAND-NEXT: $z0 = frame-destroy LDR_ZXI $sp, 10 :: (load (s128) from %stack.3) - ; EXPAND-NEXT: $p5 = frame-destroy PTRUE_B 31, implicit $vg - ; EXPAND-NEXT: $p5 = frame-destroy CMPNE_PPzZI_B $p5, $z0, 0, implicit-def $nzcv, implicit-def $nzcv + ; EXPAND-NEXT: $p1 = frame-destroy PTRUE_B 31, implicit $vg + ; EXPAND-NEXT: $p5 = frame-destroy CMPNE_PPzZI_B $p1, $z0, 0, implicit-def $nzcv, implicit-def $nzcv ; EXPAND-NEXT: $z0 = frame-destroy LDR_ZXI $sp, 11 :: (load (s128) from %stack.2) - ; EXPAND-NEXT: $p4 = frame-destroy PTRUE_B 31, implicit $vg - ; EXPAND-NEXT: $p4 = frame-destroy CMPNE_PPzZI_B $p4, $z0, 0, implicit-def $nzcv, implicit-def $nzcv + ; EXPAND-NEXT: $p1 = frame-destroy PTRUE_B 31, implicit $vg + ; EXPAND-NEXT: $p4 = frame-destroy CMPNE_PPzZI_B $p1, $z0, 0, implicit-def $nzcv, implicit-def $nzcv ; EXPAND-NEXT: $sp = frame-destroy ADDVL_XXI $sp, 12, implicit $vg ; EXPAND-NEXT: $fp = frame-destroy LDRXui $sp, 128 :: (load (s64) from %stack.14) ; EXPAND-NEXT: $sp = frame-destroy ADDXri $sp, 1040, 0 @@ -361,7 +363,7 @@ body: | RET_ReallyLR implicit $p0 ... --- -name: zpr_predicate_spill__save_restore_nzcv__spill_gpr +name: zpr_predicate_spill__save_restore_nzcv__scavenge_csr_gpr tracksRegLiveness: true stack: liveins: @@ -378,13 +380,15 @@ body: | bb.0.entry: liveins: $p0, $x0, $x1, $x2, $x3, $x4, $x5, $x6, $x7 - ; CHECK-LABEL: name: zpr_predicate_spill__save_restore_nzcv__spill_gpr + ; CHECK-LABEL: name: zpr_predicate_spill__save_restore_nzcv__scavenge_csr_gpr + ; CHECK: stack: ; CHECK: - { id: 0, name: '', type: spill-slot, offset: 0, size: 16, alignment: 16, ; CHECK-NEXT: stack-id: scalable-vector, callee-saved-register: ; CHECK: liveins: $p0, $x0, $x1, $x2, $x3, $x4, $x5, $x6, $x7 ; CHECK-NEXT: {{ $}} ; ; CHECK-NEXT: $nzcv = IMPLICIT_DEF + ; ; CHECK-NEXT: $x8 = IMPLICIT_DEF ; CHECK-NEXT: $x9 = IMPLICIT_DEF ; CHECK-NEXT: $x10 = IMPLICIT_DEF @@ -422,7 +426,7 @@ body: | ; ; CHECK-NEXT: RET_ReallyLR implicit $p0, implicit $x0, implicit $x1, implicit $x2, implicit $x3, implicit $x4, implicit $x5, implicit $x6, implicit $x7, implicit $x8, implicit $x9, implicit $x10, implicit $x11, implicit $x12, implicit $x13, implicit $x14, implicit $x15, implicit $x16, implicit $x17, implicit $x18 - ; EXPAND-LABEL: name: zpr_predicate_spill__save_restore_nzcv__spill_gpr + ; EXPAND-LABEL: name: zpr_predicate_spill__save_restore_nzcv__scavenge_csr_gpr ; EXPAND: liveins: $p0, $x0, $x1, $x2, $x3, $x4, $x5, $x6, $x7, $fp, $p15, $p14, $p13, $p12, $p11, $p10, $p9, $p8, $p7, $p6, $p5, $p4 ; EXPAND-NEXT: {{ $}} ; @@ -453,10 +457,11 @@ body: | ; EXPAND-NEXT: frame-setup STR_ZXI $z0, $sp, 10 :: (store (s128) into %stack.3) ; EXPAND-NEXT: $z0 = frame-setup CPY_ZPzI_B killed $p4, 1, 0 ; EXPAND-NEXT: frame-setup STR_ZXI $z0, $sp, 11 :: (store (s128) into %stack.2) - ; EXPAND-NEXT: $sp = frame-setup SUBXri $sp, 1040, 0 + ; EXPAND-NEXT: $sp = frame-setup SUBXri $sp, 1024, 0 ; EXPAND-NEXT: $sp = frame-setup ADDVL_XXI $sp, -1, implicit $vg ; ; EXPAND-NEXT: $nzcv = IMPLICIT_DEF + ; ; EXPAND-NEXT: $x8 = IMPLICIT_DEF ; EXPAND-NEXT: $x9 = IMPLICIT_DEF ; EXPAND-NEXT: $x10 = IMPLICIT_DEF @@ -470,7 +475,7 @@ body: | ; EXPAND-NEXT: $x18 = IMPLICIT_DEF ; ; EXPAND-NEXT: $z0 = CPY_ZPzI_B $p0, 1, 0 - ; EXPAND-NEXT: $fp = ADDXri $sp, 1040, 0 + ; EXPAND-NEXT: $fp = ADDXri $sp, 1024, 0 ; EXPAND-NEXT: STR_ZXI $z0, $fp, 0 :: (store (s128) into %stack.0) ; ; EXPAND-NEXT: $p0 = IMPLICIT_DEF @@ -490,17 +495,15 @@ body: | ; EXPAND-NEXT: $p14 = IMPLICIT_DEF ; EXPAND-NEXT: $p15 = IMPLICIT_DEF ; - ; EXPAND-NEXT: STRXui $x0, $sp, 1 :: (store (s64) into %stack.16) ; EXPAND-NEXT: $z0 = LDR_ZXI killed $fp, 0 :: (load (s128) from %stack.0) - ; EXPAND-NEXT: $x0 = MRS 55824, implicit-def $nzcv, implicit $nzcv + ; EXPAND-NEXT: $fp = MRS 55824, implicit-def $nzcv, implicit $nzcv ; EXPAND-NEXT: $p0 = PTRUE_B 31, implicit $vg ; EXPAND-NEXT: $p0 = CMPNE_PPzZI_B $p0, $z0, 0, implicit-def $nzcv, implicit-def $nzcv - ; EXPAND-NEXT: MSR 55824, $x0, implicit-def $nzcv - ; EXPAND-NEXT: $x0 = LDRXui $sp, 1 :: (load (s64) from %stack.16) + ; EXPAND-NEXT: MSR 55824, $fp, implicit-def $nzcv ; ; EXPAND-NEXT: FAKE_USE implicit $nzcv, implicit $x8, implicit $x9, implicit $x10, implicit $x11, implicit $x12, implicit $x13, implicit $x14, implicit $x15, implicit $x16, implicit $x17, implicit $x18 ; - ; EXPAND-NEXT: $sp = frame-destroy ADDXri $sp, 1040, 0 + ; EXPAND-NEXT: $sp = frame-destroy ADDXri $sp, 1024, 0 ; EXPAND-NEXT: $sp = frame-destroy ADDVL_XXI $sp, 1, implicit $vg ; EXPAND-NEXT: $z0 = frame-destroy LDR_ZXI $sp, 0 :: (load (s128) from %stack.13) ; EXPAND-NEXT: $p1 = frame-destroy PTRUE_B 31, implicit $vg @@ -527,17 +530,17 @@ body: | ; EXPAND-NEXT: $p1 = frame-destroy PTRUE_B 31, implicit $vg ; EXPAND-NEXT: $p8 = frame-destroy CMPNE_PPzZI_B $p1, $z0, 0, implicit-def $nzcv, implicit-def $nzcv ; EXPAND-NEXT: $z0 = frame-destroy LDR_ZXI $sp, 8 :: (load (s128) from %stack.5) - ; EXPAND-NEXT: $p7 = frame-destroy PTRUE_B 31, implicit $vg - ; EXPAND-NEXT: $p7 = frame-destroy CMPNE_PPzZI_B $p7, $z0, 0, implicit-def $nzcv, implicit-def $nzcv + ; EXPAND-NEXT: $p1 = frame-destroy PTRUE_B 31, implicit $vg + ; EXPAND-NEXT: $p7 = frame-destroy CMPNE_PPzZI_B $p1, $z0, 0, implicit-def $nzcv, implicit-def $nzcv ; EXPAND-NEXT: $z0 = frame-destroy LDR_ZXI $sp, 9 :: (load (s128) from %stack.4) - ; EXPAND-NEXT: $p6 = frame-destroy PTRUE_B 31, implicit $vg - ; EXPAND-NEXT: $p6 = frame-destroy CMPNE_PPzZI_B $p6, $z0, 0, implicit-def $nzcv, implicit-def $nzcv + ; EXPAND-NEXT: $p1 = frame-destroy PTRUE_B 31, implicit $vg + ; EXPAND-NEXT: $p6 = frame-destroy CMPNE_PPzZI_B $p1, $z0, 0, implicit-def $nzcv, implicit-def $nzcv ; EXPAND-NEXT: $z0 = frame-destroy LDR_ZXI $sp, 10 :: (load (s128) from %stack.3) - ; EXPAND-NEXT: $p5 = frame-destroy PTRUE_B 31, implicit $vg - ; EXPAND-NEXT: $p5 = frame-destroy CMPNE_PPzZI_B $p5, $z0, 0, implicit-def $nzcv, implicit-def $nzcv + ; EXPAND-NEXT: $p1 = frame-destroy PTRUE_B 31, implicit $vg + ; EXPAND-NEXT: $p5 = frame-destroy CMPNE_PPzZI_B $p1, $z0, 0, implicit-def $nzcv, implicit-def $nzcv ; EXPAND-NEXT: $z0 = frame-destroy LDR_ZXI $sp, 11 :: (load (s128) from %stack.2) - ; EXPAND-NEXT: $p4 = frame-destroy PTRUE_B 31, implicit $vg - ; EXPAND-NEXT: $p4 = frame-destroy CMPNE_PPzZI_B $p4, $z0, 0, implicit-def $nzcv, implicit-def $nzcv + ; EXPAND-NEXT: $p1 = frame-destroy PTRUE_B 31, implicit $vg + ; EXPAND-NEXT: $p4 = frame-destroy CMPNE_PPzZI_B $p1, $z0, 0, implicit-def $nzcv, implicit-def $nzcv ; EXPAND-NEXT: $sp = frame-destroy ADDVL_XXI $sp, 12, implicit $vg ; EXPAND-NEXT: $fp = frame-destroy LDRXui $sp, 128 :: (load (s64) from %stack.14) ; EXPAND-NEXT: $sp = frame-destroy ADDXri $sp, 1040, 0 @@ -599,6 +602,7 @@ body: | liveins: $p0, $z0, $z1, $z2, $z3, $z4, $z5, $z6, $z7 ; CHECK-LABEL: name: zpr_predicate_spill__spill_zpr + ; CHECK: stack: ; CHECK: - { id: 0, name: '', type: spill-slot, offset: 0, size: 16, alignment: 16, ; CHECK-NEXT: stack-id: scalable-vector, callee-saved-register: ; CHECK: liveins: $p0, $z0, $z1, $z2, $z3, $z4, $z5, $z6, $z7 @@ -771,17 +775,17 @@ body: | ; EXPAND-NEXT: $p1 = frame-destroy PTRUE_B 31, implicit $vg ; EXPAND-NEXT: $p8 = frame-destroy CMPNE_PPzZI_B $p1, $z24, 0, implicit-def $nzcv, implicit-def $nzcv ; EXPAND-NEXT: $z24 = frame-destroy LDR_ZXI $sp, 8 :: (load (s128) from %stack.13) - ; EXPAND-NEXT: $p7 = frame-destroy PTRUE_B 31, implicit $vg - ; EXPAND-NEXT: $p7 = frame-destroy CMPNE_PPzZI_B $p7, $z24, 0, implicit-def $nzcv, implicit-def $nzcv + ; EXPAND-NEXT: $p1 = frame-destroy PTRUE_B 31, implicit $vg + ; EXPAND-NEXT: $p7 = frame-destroy CMPNE_PPzZI_B $p1, $z24, 0, implicit-def $nzcv, implicit-def $nzcv ; EXPAND-NEXT: $z24 = frame-destroy LDR_ZXI $sp, 9 :: (load (s128) from %stack.12) - ; EXPAND-NEXT: $p6 = frame-destroy PTRUE_B 31, implicit $vg - ; EXPAND-NEXT: $p6 = frame-destroy CMPNE_PPzZI_B $p6, $z24, 0, implicit-def $nzcv, implicit-def $nzcv + ; EXPAND-NEXT: $p1 = frame-destroy PTRUE_B 31, implicit $vg + ; EXPAND-NEXT: $p6 = frame-destroy CMPNE_PPzZI_B $p1, $z24, 0, implicit-def $nzcv, implicit-def $nzcv ; EXPAND-NEXT: $z24 = frame-destroy LDR_ZXI $sp, 10 :: (load (s128) from %stack.11) - ; EXPAND-NEXT: $p5 = frame-destroy PTRUE_B 31, implicit $vg - ; EXPAND-NEXT: $p5 = frame-destroy CMPNE_PPzZI_B $p5, $z24, 0, implicit-def $nzcv, implicit-def $nzcv + ; EXPAND-NEXT: $p1 = frame-destroy PTRUE_B 31, implicit $vg + ; EXPAND-NEXT: $p5 = frame-destroy CMPNE_PPzZI_B $p1, $z24, 0, implicit-def $nzcv, implicit-def $nzcv ; EXPAND-NEXT: $z24 = frame-destroy LDR_ZXI $sp, 11 :: (load (s128) from %stack.10) - ; EXPAND-NEXT: $p4 = frame-destroy PTRUE_B 31, implicit $vg - ; EXPAND-NEXT: $p4 = frame-destroy CMPNE_PPzZI_B $p4, $z24, 0, implicit-def $nzcv, implicit-def $nzcv + ; EXPAND-NEXT: $p1 = frame-destroy PTRUE_B 31, implicit $vg + ; EXPAND-NEXT: $p4 = frame-destroy CMPNE_PPzZI_B $p1, $z24, 0, implicit-def $nzcv, implicit-def $nzcv ; EXPAND-NEXT: $sp = frame-destroy ADDVL_XXI $sp, 20, implicit $vg ; EXPAND-NEXT: $fp = frame-destroy LDRXui $sp, 128 :: (load (s64) from %stack.22) ; EXPAND-NEXT: $sp = frame-destroy ADDXri $sp, 1040, 0 @@ -842,6 +846,7 @@ body: | liveins: $p0, $p1, $p2, $p3 ; CHECK-LABEL: name: zpr_predicate_spill_above_p7 + ; CHECK: stack: ; CHECK: - { id: 0, name: '', type: spill-slot, offset: 0, size: 16, alignment: 16, ; CHECK-NEXT: stack-id: scalable-vector, callee-saved-register: ; CHECK: liveins: $p0, $p1, $p2, $p3 @@ -969,14 +974,14 @@ body: | ; EXPAND-NEXT: $p4 = frame-destroy PTRUE_B 31, implicit $vg ; EXPAND-NEXT: $p8 = frame-destroy CMPNE_PPzZI_B $p4, $z0, 0, implicit-def $nzcv, implicit-def $nzcv ; EXPAND-NEXT: $z0 = frame-destroy LDR_ZXI $sp, 8 :: (load (s128) from %stack.5) - ; EXPAND-NEXT: $p7 = frame-destroy PTRUE_B 31, implicit $vg - ; EXPAND-NEXT: $p7 = frame-destroy CMPNE_PPzZI_B $p7, $z0, 0, implicit-def $nzcv, implicit-def $nzcv + ; EXPAND-NEXT: $p4 = frame-destroy PTRUE_B 31, implicit $vg + ; EXPAND-NEXT: $p7 = frame-destroy CMPNE_PPzZI_B $p4, $z0, 0, implicit-def $nzcv, implicit-def $nzcv ; EXPAND-NEXT: $z0 = frame-destroy LDR_ZXI $sp, 9 :: (load (s128) from %stack.4) - ; EXPAND-NEXT: $p6 = frame-destroy PTRUE_B 31, implicit $vg - ; EXPAND-NEXT: $p6 = frame-destroy CMPNE_PPzZI_B $p6, $z0, 0, implicit-def $nzcv, implicit-def $nzcv + ; EXPAND-NEXT: $p4 = frame-destroy PTRUE_B 31, implicit $vg + ; EXPAND-NEXT: $p6 = frame-destroy CMPNE_PPzZI_B $p4, $z0, 0, implicit-def $nzcv, implicit-def $nzcv ; EXPAND-NEXT: $z0 = frame-destroy LDR_ZXI $sp, 10 :: (load (s128) from %stack.3) - ; EXPAND-NEXT: $p5 = frame-destroy PTRUE_B 31, implicit $vg - ; EXPAND-NEXT: $p5 = frame-destroy CMPNE_PPzZI_B $p5, $z0, 0, implicit-def $nzcv, implicit-def $nzcv + ; EXPAND-NEXT: $p4 = frame-destroy PTRUE_B 31, implicit $vg + ; EXPAND-NEXT: $p5 = frame-destroy CMPNE_PPzZI_B $p4, $z0, 0, implicit-def $nzcv, implicit-def $nzcv ; EXPAND-NEXT: $z0 = frame-destroy LDR_ZXI $sp, 11 :: (load (s128) from %stack.2) ; EXPAND-NEXT: $p4 = frame-destroy PTRUE_B 31, implicit $vg ; EXPAND-NEXT: $p4 = frame-destroy CMPNE_PPzZI_B $p4, $z0, 0, implicit-def $nzcv, implicit-def $nzcv @@ -1026,9 +1031,11 @@ body: | ; CHECK-LABEL: name: zpr_predicate_spill_p4_saved ; CHECK: liveins: $p0, $p1, $p2, $p3 ; CHECK-NEXT: {{ $}} + ; ; CHECK-NEXT: $p8 = IMPLICIT_DEF - ; CHECK-NEXT: RET_ReallyLR implicit $p0, implicit $p1, implicit $p2, implicit $p3 ; + ; CHECK-NEXT: RET_ReallyLR implicit $p0, implicit $p1, implicit $p2, implicit $p3 + ; EXPAND-LABEL: name: zpr_predicate_spill_p4_saved ; EXPAND: liveins: $p0, $p1, $p2, $p3, $fp, $p8, $p4 ; EXPAND-NEXT: {{ $}} From 13c810386f64c9dfe84e5e3068313702e1626924 Mon Sep 17 00:00:00 2001 From: Benjamin Maxwell Date: Fri, 24 Jan 2025 17:48:52 +0000 Subject: [PATCH 4/8] Fixups --- .../Target/AArch64/AArch64FrameLowering.cpp | 55 +++++++++++-------- 1 file changed, 31 insertions(+), 24 deletions(-) diff --git a/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp b/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp index 74bc0e7cae0ca..9d470555e64f5 100644 --- a/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp @@ -4438,42 +4438,49 @@ void AArch64FrameLowering::processFunctionBeforeFrameFinalized( // If predicates spills are 16-bytes we may need to expand // SPILL_PPR_TO_ZPR_SLOT_PSEUDO/FILL_PPR_FROM_ZPR_SLOT_PSEUDO. - const MachineFrameInfo &MFI = MF.getFrameInfo(); - assert(MFI.isCalleeSavedInfoValid()); - const std::vector &CSI = MFI.getCalleeSavedInfo(); + const uint32_t *CSRMask = + TRI.getCallPreservedMask(MF, MF.getFunction().getCallingConv()); auto ComputeScavengeableRegisters = [&](unsigned RegClassID) { BitVector Regs = TRI.getAllocatableSet(MF, TRI.getRegClass(RegClassID)); - - for (const CalleeSavedInfo &I : CSI) - if (TRI.getRegClass(RegClassID)->contains(I.getReg())) - Regs.set(I.getReg()); - + Regs.clearBitsInMask(CSRMask); assert(Regs.count() > 0 && "Expected scavengeable registers"); return Regs; }; - const uint32_t *CSRMask = - TRI.getCallPreservedMask(MF, MF.getFunction().getCallingConv()); - - // Registers free to scavenge in the function body. - ScavengeableRegs ScavengeableRegsBody; - ScavengeableRegsBody.ZPRRegs = + // Registers free to scavenge in the prologue/epilogue. + ScavengeableRegs ScavengeableRegsFrameSetup; + ScavengeableRegsFrameSetup.ZPRRegs = ComputeScavengeableRegisters(AArch64::ZPRRegClassID); // Only p0-7 are possible as the second operand of cmpne (needed for fills). - ScavengeableRegsBody.PPR3bRegs = + ScavengeableRegsFrameSetup.PPR3bRegs = ComputeScavengeableRegisters(AArch64::PPR_3bRegClassID); - ScavengeableRegsBody.GPRRegs = + ScavengeableRegsFrameSetup.GPRRegs = ComputeScavengeableRegisters(AArch64::GPR64RegClassID); - // Registers free to scavenge in the prologue/epilogue. - ScavengeableRegs ScavengeableRegsFrameSetup = ScavengeableRegsBody; - ScavengeableRegsFrameSetup.ZPRRegs.clearBitsInMask(CSRMask); - ScavengeableRegsFrameSetup.GPRRegs.clearBitsInMask(CSRMask); - // Note: If p4 was available allow it to be scavenged (even though it is a - // CSR). P4 is reloaded last in the epilogue and is needed to reload - // predicates >= p8 if p0-p3 are used as return values. - ScavengeableRegsFrameSetup.PPR3bRegs.clearBitsInMask(CSRMask); + const MachineFrameInfo &MFI = MF.getFrameInfo(); + assert(MFI.isCalleeSavedInfoValid()); + const std::vector &CSI = MFI.getCalleeSavedInfo(); + auto MarkSavedRegistersAsAvailable = + [&, &Reserved = MF.getRegInfo().getReservedRegs()]( + BitVector &Regs, unsigned RegClassID) { + for (const CalleeSavedInfo &I : CSI) + if (!Reserved[I.getReg()] && + TRI.getRegClass(RegClassID)->contains(I.getReg())) + Regs.set(I.getReg()); + }; + + // Registers free to scavenge in the function body. + ScavengeableRegs ScavengeableRegsBody = ScavengeableRegsFrameSetup; + MarkSavedRegistersAsAvailable(ScavengeableRegsBody.ZPRRegs, + AArch64::ZPRRegClassID); + MarkSavedRegistersAsAvailable(ScavengeableRegsBody.PPR3bRegs, + AArch64::PPR_3bRegClassID); + MarkSavedRegistersAsAvailable(ScavengeableRegsBody.GPRRegs, + AArch64::GPR64RegClassID); + + // p4 (CSR) is reloaded last in the epilogue, so if it is saved, it can be + // used to reload other predicates. if (ScavengeableRegsBody.PPR3bRegs[AArch64::P4]) ScavengeableRegsFrameSetup.PPR3bRegs.set(AArch64::P4); From 29e484a6870e71ee597ff8e4be17a7c69d925c9b Mon Sep 17 00:00:00 2001 From: Benjamin Maxwell Date: Fri, 24 Jan 2025 17:53:41 +0000 Subject: [PATCH 5/8] Fixups --- llvm/lib/Target/AArch64/AArch64FrameLowering.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp b/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp index 9d470555e64f5..154fefe13664a 100644 --- a/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp @@ -4443,7 +4443,8 @@ void AArch64FrameLowering::processFunctionBeforeFrameFinalized( auto ComputeScavengeableRegisters = [&](unsigned RegClassID) { BitVector Regs = TRI.getAllocatableSet(MF, TRI.getRegClass(RegClassID)); - Regs.clearBitsInMask(CSRMask); + if (CSRMask) + Regs.clearBitsInMask(CSRMask); assert(Regs.count() > 0 && "Expected scavengeable registers"); return Regs; }; From d618b1404a9e57ec09096324b9eb054dda8991bb Mon Sep 17 00:00:00 2001 From: Benjamin Maxwell Date: Wed, 29 Jan 2025 16:00:02 +0000 Subject: [PATCH 6/8] Remove overly cautious code Turns out LiveRegUnits will already reserve unsaved callee-saved registers so we don't need to worry about doing this here. --- .../Target/AArch64/AArch64FrameLowering.cpp | 84 +++++-------------- 1 file changed, 22 insertions(+), 62 deletions(-) diff --git a/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp b/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp index 154fefe13664a..9698343cdf284 100644 --- a/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp @@ -4285,7 +4285,7 @@ static void expandSpillPPRToZPRSlotPseudo(MachineBasicBlock &MBB, MachineInstr &MI, const TargetRegisterInfo &TRI, LiveRegUnits const &UsedRegs, - ScavengeableRegs const &Regs, + ScavengeableRegs const &SR, EmergencyStackSlots &SpillSlots) { MachineFunction &MF = *MBB.getParent(); auto *TII = @@ -4294,7 +4294,7 @@ static void expandSpillPPRToZPRSlotPseudo(MachineBasicBlock &MBB, Register ZPredReg = AArch64::NoRegister; ScopedScavengeOrSpill FindZPRReg( MF, MBB, MachineBasicBlock::iterator(MI), ZPredReg, AArch64::Z0, - AArch64::ZPRRegClass, UsedRegs, Regs.ZPRRegs, + AArch64::ZPRRegClass, UsedRegs, SR.ZPRRegs, isInPrologueOrEpilogue(MI) ? nullptr : &SpillSlots.ZPRSpillFI); SmallVector MachineInstrs; @@ -4332,7 +4332,7 @@ static bool expandFillPPRFromZPRSlotPseudo(MachineBasicBlock &MBB, MachineInstr &MI, const TargetRegisterInfo &TRI, LiveRegUnits const &UsedRegs, - ScavengeableRegs const &Regs, + ScavengeableRegs const &SR, EmergencyStackSlots &SpillSlots) { MachineFunction &MF = *MBB.getParent(); auto *TII = @@ -4341,13 +4341,13 @@ static bool expandFillPPRFromZPRSlotPseudo(MachineBasicBlock &MBB, Register ZPredReg = AArch64::NoRegister; ScopedScavengeOrSpill FindZPRReg( MF, MBB, MachineBasicBlock::iterator(MI), ZPredReg, AArch64::Z0, - AArch64::ZPRRegClass, UsedRegs, Regs.ZPRRegs, + AArch64::ZPRRegClass, UsedRegs, SR.ZPRRegs, isInPrologueOrEpilogue(MI) ? nullptr : &SpillSlots.ZPRSpillFI); Register PredReg = AArch64::NoRegister; ScopedScavengeOrSpill FindPPR3bReg( MF, MBB, MachineBasicBlock::iterator(MI), PredReg, AArch64::P0, - AArch64::PPR_3bRegClass, UsedRegs, Regs.PPR3bRegs, + AArch64::PPR_3bRegClass, UsedRegs, SR.PPR3bRegs, isInPrologueOrEpilogue(MI) ? nullptr : &SpillSlots.PPRSpillFI); // Elide NZCV spills if we know it is not used. @@ -4357,7 +4357,7 @@ static bool expandFillPPRFromZPRSlotPseudo(MachineBasicBlock &MBB, if (IsNZCVUsed) FindGPRReg.emplace( MF, MBB, MachineBasicBlock::iterator(MI), NZCVSaveReg, AArch64::X0, - AArch64::GPR64RegClass, UsedRegs, Regs.GPRRegs, + AArch64::GPR64RegClass, UsedRegs, SR.GPRRegs, isInPrologueOrEpilogue(MI) ? nullptr : &SpillSlots.GPRSpillFI); SmallVector MachineInstrs; const DebugLoc &DL = MI.getDebugLoc(); @@ -4397,27 +4397,23 @@ static bool expandFillPPRFromZPRSlotPseudo(MachineBasicBlock &MBB, /// Expands all FILL_PPR_FROM_ZPR_SLOT_PSEUDO and SPILL_PPR_TO_ZPR_SLOT_PSEUDO /// operations within the MachineBasicBlock \p MBB. -static bool expandSMEPPRToZPRSpillPseudos( - MachineBasicBlock &MBB, const TargetRegisterInfo &TRI, - ScavengeableRegs const &ScavengeableRegsBody, - ScavengeableRegs const &ScavengeableRegsFrameSetup, - EmergencyStackSlots &SpillSlots) { +static bool expandSMEPPRToZPRSpillPseudos(MachineBasicBlock &MBB, + const TargetRegisterInfo &TRI, + ScavengeableRegs const &SR, + EmergencyStackSlots &SpillSlots) { LiveRegUnits UsedRegs(TRI); UsedRegs.addLiveOuts(MBB); bool HasPPRSpills = false; for (MachineInstr &MI : make_early_inc_range(reverse(MBB))) { UsedRegs.stepBackward(MI); - ScavengeableRegs const &Regs = isInPrologueOrEpilogue(MI) - ? ScavengeableRegsFrameSetup - : ScavengeableRegsBody; switch (MI.getOpcode()) { case AArch64::FILL_PPR_FROM_ZPR_SLOT_PSEUDO: - HasPPRSpills |= expandFillPPRFromZPRSlotPseudo(MBB, MI, TRI, UsedRegs, - Regs, SpillSlots); + HasPPRSpills |= expandFillPPRFromZPRSlotPseudo(MBB, MI, TRI, UsedRegs, SR, + SpillSlots); MI.eraseFromParent(); break; case AArch64::SPILL_PPR_TO_ZPR_SLOT_PSEUDO: - expandSpillPPRToZPRSlotPseudo(MBB, MI, TRI, UsedRegs, Regs, SpillSlots); + expandSpillPPRToZPRSlotPseudo(MBB, MI, TRI, UsedRegs, SR, SpillSlots); MI.eraseFromParent(); break; default: @@ -4434,56 +4430,21 @@ void AArch64FrameLowering::processFunctionBeforeFrameFinalized( AArch64FunctionInfo *AFI = MF.getInfo(); const TargetSubtargetInfo &TSI = MF.getSubtarget(); const TargetRegisterInfo &TRI = *TSI.getRegisterInfo(); - if (AFI->hasStackFrame() && TRI.getSpillSize(AArch64::PPRRegClass) == 16) { - // If predicates spills are 16-bytes we may need to expand - // SPILL_PPR_TO_ZPR_SLOT_PSEUDO/FILL_PPR_FROM_ZPR_SLOT_PSEUDO. - - const uint32_t *CSRMask = - TRI.getCallPreservedMask(MF, MF.getFunction().getCallingConv()); + // If predicates spills are 16-bytes we may need to expand + // SPILL_PPR_TO_ZPR_SLOT_PSEUDO/FILL_PPR_FROM_ZPR_SLOT_PSEUDO. + if (AFI->hasStackFrame() && TRI.getSpillSize(AArch64::PPRRegClass) == 16) { auto ComputeScavengeableRegisters = [&](unsigned RegClassID) { BitVector Regs = TRI.getAllocatableSet(MF, TRI.getRegClass(RegClassID)); - if (CSRMask) - Regs.clearBitsInMask(CSRMask); assert(Regs.count() > 0 && "Expected scavengeable registers"); return Regs; }; - // Registers free to scavenge in the prologue/epilogue. - ScavengeableRegs ScavengeableRegsFrameSetup; - ScavengeableRegsFrameSetup.ZPRRegs = - ComputeScavengeableRegisters(AArch64::ZPRRegClassID); + ScavengeableRegs SR{}; + SR.ZPRRegs = ComputeScavengeableRegisters(AArch64::ZPRRegClassID); // Only p0-7 are possible as the second operand of cmpne (needed for fills). - ScavengeableRegsFrameSetup.PPR3bRegs = - ComputeScavengeableRegisters(AArch64::PPR_3bRegClassID); - ScavengeableRegsFrameSetup.GPRRegs = - ComputeScavengeableRegisters(AArch64::GPR64RegClassID); - - const MachineFrameInfo &MFI = MF.getFrameInfo(); - assert(MFI.isCalleeSavedInfoValid()); - const std::vector &CSI = MFI.getCalleeSavedInfo(); - auto MarkSavedRegistersAsAvailable = - [&, &Reserved = MF.getRegInfo().getReservedRegs()]( - BitVector &Regs, unsigned RegClassID) { - for (const CalleeSavedInfo &I : CSI) - if (!Reserved[I.getReg()] && - TRI.getRegClass(RegClassID)->contains(I.getReg())) - Regs.set(I.getReg()); - }; - - // Registers free to scavenge in the function body. - ScavengeableRegs ScavengeableRegsBody = ScavengeableRegsFrameSetup; - MarkSavedRegistersAsAvailable(ScavengeableRegsBody.ZPRRegs, - AArch64::ZPRRegClassID); - MarkSavedRegistersAsAvailable(ScavengeableRegsBody.PPR3bRegs, - AArch64::PPR_3bRegClassID); - MarkSavedRegistersAsAvailable(ScavengeableRegsBody.GPRRegs, - AArch64::GPR64RegClassID); - - // p4 (CSR) is reloaded last in the epilogue, so if it is saved, it can be - // used to reload other predicates. - if (ScavengeableRegsBody.PPR3bRegs[AArch64::P4]) - ScavengeableRegsFrameSetup.PPR3bRegs.set(AArch64::P4); + SR.PPR3bRegs = ComputeScavengeableRegisters(AArch64::PPR_3bRegClassID); + SR.GPRRegs = ComputeScavengeableRegisters(AArch64::GPR64RegClassID); EmergencyStackSlots SpillSlots; for (MachineBasicBlock &MBB : MF) { @@ -4493,9 +4454,8 @@ void AArch64FrameLowering::processFunctionBeforeFrameFinalized( // most two expansion passes, as spilling/filling a predicate in the range // p0-p7 never requires spilling another predicate. for (int Pass = 0; Pass < 2; Pass++) { - bool HasPPRSpills = expandSMEPPRToZPRSpillPseudos( - MBB, TRI, ScavengeableRegsBody, ScavengeableRegsFrameSetup, - SpillSlots); + bool HasPPRSpills = + expandSMEPPRToZPRSpillPseudos(MBB, TRI, SR, SpillSlots); assert((Pass == 0 || !HasPPRSpills) && "Did not expect PPR spills"); if (!HasPPRSpills) break; From e6ae6b093bc2b1a0854a8da30553f536f7f6f199 Mon Sep 17 00:00:00 2001 From: Benjamin Maxwell Date: Thu, 30 Jan 2025 17:30:14 +0000 Subject: [PATCH 7/8] Fixups --- .../Target/AArch64/AArch64FrameLowering.cpp | 73 +++++++++---------- .../lib/Target/AArch64/AArch64RegisterInfo.td | 4 +- llvm/lib/Target/AArch64/AArch64Subtarget.cpp | 15 ++-- llvm/utils/TableGen/SubtargetEmitter.cpp | 22 +++++- 4 files changed, 63 insertions(+), 51 deletions(-) diff --git a/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp b/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp index 9698343cdf284..81523adeefcee 100644 --- a/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp @@ -4208,7 +4208,7 @@ struct ScopedScavengeOrSpill { ScopedScavengeOrSpill(ScopedScavengeOrSpill &&) = delete; ScopedScavengeOrSpill(MachineFunction &MF, MachineBasicBlock &MBB, - MachineBasicBlock::iterator MBBI, Register &FreeReg, + MachineBasicBlock::iterator MBBI, Register SpillCandidate, const TargetRegisterClass &RC, LiveRegUnits const &UsedRegs, BitVector const &AllocatableRegs, @@ -4226,17 +4226,22 @@ struct ScopedScavengeOrSpill { *MaybeSpillFI = MFI.CreateSpillStackObject(TRI.getSpillSize(RC), TRI.getSpillAlign(RC)); } - FreeReg = SpilledReg = SpillCandidate; + FreeReg = SpillCandidate; SpillFI = MaybeSpillFI->value(); - TII.storeRegToStackSlot(MBB, MBBI, SpilledReg, false, SpillFI, &RC, &TRI, + TII.storeRegToStackSlot(MBB, MBBI, FreeReg, false, *SpillFI, &RC, &TRI, Register()); } - bool hasSpilled() const { return SpilledReg != AArch64::NoRegister; } + bool hasSpilled() const { return SpillFI.has_value(); } + + /// Returns the free register (found from scavenging or spilling a register). + Register freeRegister() const { return FreeReg; } + + Register operator*() const { return freeRegister(); } ~ScopedScavengeOrSpill() { if (hasSpilled()) - TII.loadRegFromStackSlot(MBB, MBBI, SpilledReg, SpillFI, &RC, &TRI, + TII.loadRegFromStackSlot(MBB, MBBI, FreeReg, *SpillFI, &RC, &TRI, Register()); } @@ -4246,8 +4251,8 @@ struct ScopedScavengeOrSpill { const TargetRegisterClass &RC; const AArch64InstrInfo &TII; const TargetRegisterInfo &TRI; - Register SpilledReg = AArch64::NoRegister; - int SpillFI = -1; + Register FreeReg = AArch64::NoRegister; + std::optional SpillFI; }; /// Emergency stack slots for expanding SPILL_PPR_TO_ZPR_SLOT_PSEUDO and @@ -4291,22 +4296,20 @@ static void expandSpillPPRToZPRSlotPseudo(MachineBasicBlock &MBB, auto *TII = static_cast(MF.getSubtarget().getInstrInfo()); - Register ZPredReg = AArch64::NoRegister; - ScopedScavengeOrSpill FindZPRReg( - MF, MBB, MachineBasicBlock::iterator(MI), ZPredReg, AArch64::Z0, - AArch64::ZPRRegClass, UsedRegs, SR.ZPRRegs, + ScopedScavengeOrSpill ZPredReg( + MF, MBB, MI, AArch64::Z0, AArch64::ZPRRegClass, UsedRegs, SR.ZPRRegs, isInPrologueOrEpilogue(MI) ? nullptr : &SpillSlots.ZPRSpillFI); SmallVector MachineInstrs; const DebugLoc &DL = MI.getDebugLoc(); MachineInstrs.push_back(BuildMI(MBB, MI, DL, TII->get(AArch64::CPY_ZPzI_B)) - .addReg(ZPredReg, RegState::Define) + .addReg(*ZPredReg, RegState::Define) .add(MI.getOperand(0)) .addImm(1) .addImm(0) .getInstr()); MachineInstrs.push_back(BuildMI(MBB, MI, DL, TII->get(AArch64::STR_ZXI)) - .addReg(ZPredReg) + .addReg(*ZPredReg) .add(MI.getOperand(1)) .addImm(MI.getOperand(2).getImm()) .setMemRefs(MI.memoperands()) @@ -4338,61 +4341,56 @@ static bool expandFillPPRFromZPRSlotPseudo(MachineBasicBlock &MBB, auto *TII = static_cast(MF.getSubtarget().getInstrInfo()); - Register ZPredReg = AArch64::NoRegister; - ScopedScavengeOrSpill FindZPRReg( - MF, MBB, MachineBasicBlock::iterator(MI), ZPredReg, AArch64::Z0, - AArch64::ZPRRegClass, UsedRegs, SR.ZPRRegs, + ScopedScavengeOrSpill ZPredReg( + MF, MBB, MI, AArch64::Z0, AArch64::ZPRRegClass, UsedRegs, SR.ZPRRegs, isInPrologueOrEpilogue(MI) ? nullptr : &SpillSlots.ZPRSpillFI); - Register PredReg = AArch64::NoRegister; - ScopedScavengeOrSpill FindPPR3bReg( - MF, MBB, MachineBasicBlock::iterator(MI), PredReg, AArch64::P0, - AArch64::PPR_3bRegClass, UsedRegs, SR.PPR3bRegs, + ScopedScavengeOrSpill PredReg( + MF, MBB, MI, AArch64::P0, AArch64::PPR_3bRegClass, UsedRegs, SR.PPR3bRegs, isInPrologueOrEpilogue(MI) ? nullptr : &SpillSlots.PPRSpillFI); // Elide NZCV spills if we know it is not used. - Register NZCVSaveReg = AArch64::NoRegister; bool IsNZCVUsed = !UsedRegs.available(AArch64::NZCV); - std::optional FindGPRReg; + std::optional NZCVSaveReg; if (IsNZCVUsed) - FindGPRReg.emplace( - MF, MBB, MachineBasicBlock::iterator(MI), NZCVSaveReg, AArch64::X0, - AArch64::GPR64RegClass, UsedRegs, SR.GPRRegs, + NZCVSaveReg.emplace( + MF, MBB, MI, AArch64::X0, AArch64::GPR64RegClass, UsedRegs, SR.GPRRegs, isInPrologueOrEpilogue(MI) ? nullptr : &SpillSlots.GPRSpillFI); SmallVector MachineInstrs; const DebugLoc &DL = MI.getDebugLoc(); MachineInstrs.push_back(BuildMI(MBB, MI, DL, TII->get(AArch64::LDR_ZXI)) - .addReg(ZPredReg, RegState::Define) + .addReg(*ZPredReg, RegState::Define) .add(MI.getOperand(1)) .addImm(MI.getOperand(2).getImm()) .setMemRefs(MI.memoperands()) .getInstr()); if (IsNZCVUsed) - MachineInstrs.push_back(BuildMI(MBB, MI, DL, TII->get(AArch64::MRS)) - .addReg(NZCVSaveReg, RegState::Define) - .addImm(AArch64SysReg::NZCV) - .addReg(AArch64::NZCV, RegState::Implicit) - .getInstr()); + MachineInstrs.push_back( + BuildMI(MBB, MI, DL, TII->get(AArch64::MRS)) + .addReg(NZCVSaveReg->freeRegister(), RegState::Define) + .addImm(AArch64SysReg::NZCV) + .addReg(AArch64::NZCV, RegState::Implicit) + .getInstr()); MachineInstrs.push_back(BuildMI(MBB, MI, DL, TII->get(AArch64::PTRUE_B)) - .addReg(PredReg, RegState::Define) + .addReg(*PredReg, RegState::Define) .addImm(31)); MachineInstrs.push_back( BuildMI(MBB, MI, DL, TII->get(AArch64::CMPNE_PPzZI_B)) .addReg(MI.getOperand(0).getReg(), RegState::Define) - .addReg(PredReg) - .addReg(ZPredReg) + .addReg(*PredReg) + .addReg(*ZPredReg) .addImm(0) .addReg(AArch64::NZCV, RegState::ImplicitDefine) .getInstr()); if (IsNZCVUsed) MachineInstrs.push_back(BuildMI(MBB, MI, DL, TII->get(AArch64::MSR)) .addImm(AArch64SysReg::NZCV) - .addReg(NZCVSaveReg) + .addReg(NZCVSaveReg->freeRegister()) .addReg(AArch64::NZCV, RegState::ImplicitDefine) .getInstr()); propagateFrameFlags(MI, MachineInstrs); - return FindPPR3bReg.hasSpilled(); + return PredReg.hasSpilled(); } /// Expands all FILL_PPR_FROM_ZPR_SLOT_PSEUDO and SPILL_PPR_TO_ZPR_SLOT_PSEUDO @@ -5510,7 +5508,6 @@ void AArch64FrameLowering::emitRemarks( if (MI.getOpcode() != AArch64::SPILL_PPR_TO_ZPR_SLOT_PSEUDO && MI.getOpcode() != AArch64::FILL_PPR_FROM_ZPR_SLOT_PSEUDO && AArch64::PPRRegClass.contains(MI.getOperand(0).getReg())) { - MI.dump(); RegTy = StackAccess::PPR; } else RegTy = StackAccess::FPR; diff --git a/llvm/lib/Target/AArch64/AArch64RegisterInfo.td b/llvm/lib/Target/AArch64/AArch64RegisterInfo.td index 6b6884c545758..fed9b7b173e9c 100644 --- a/llvm/lib/Target/AArch64/AArch64RegisterInfo.td +++ b/llvm/lib/Target/AArch64/AArch64RegisterInfo.td @@ -982,10 +982,10 @@ class ZPRRegOp ]>; +def SMEWithZPRPredicateSpills : HwMode<"", [Predicate<"false">]>; def PPRSpillFillRI : RegInfoByHwMode< - [DefaultMode, SMEWithStreamingMemoryHazards], + [DefaultMode, SMEWithZPRPredicateSpills], [RegInfo<16,16,16>, RegInfo<16,128,128>]>; class PPRClass : RegisterClass<"AArch64", diff --git a/llvm/lib/Target/AArch64/AArch64Subtarget.cpp b/llvm/lib/Target/AArch64/AArch64Subtarget.cpp index 5864f57582e21..c4e3fcc1acef8 100644 --- a/llvm/lib/Target/AArch64/AArch64Subtarget.cpp +++ b/llvm/lib/Target/AArch64/AArch64Subtarget.cpp @@ -406,20 +406,15 @@ AArch64Subtarget::AArch64Subtarget(const Triple &TT, StringRef CPU, } unsigned AArch64Subtarget::getHwModeSet() const { - unsigned Modes = 0; - - // Use a special hardware mode in streaming functions with stack hazards. - // This changes the spill size (and alignment) for the predicate register - // class. - // - // FIXME: This overrides the table-gen'd `getHwModeSet()` which only looks at - // CPU features. + // Use a special hardware mode in streaming[-compatible] functions with + // aarch64-enable-zpr-predicate-spills. This changes the spill size (and + // alignment) for the predicate register class. if (EnableZPRPredicateSpills.getValue() && (isStreaming() || isStreamingCompatible())) { - Modes |= (1 << 0); + return to_underlying(AArch64HwModeBits::SMEWithZPRPredicateSpills); } - return Modes; + return to_underlying(AArch64HwModeBits::DefaultMode); } const CallLowering *AArch64Subtarget::getCallLowering() const { diff --git a/llvm/utils/TableGen/SubtargetEmitter.cpp b/llvm/utils/TableGen/SubtargetEmitter.cpp index 3db3ae65cc555..e1a416ac4f7fc 100644 --- a/llvm/utils/TableGen/SubtargetEmitter.cpp +++ b/llvm/utils/TableGen/SubtargetEmitter.cpp @@ -2082,6 +2082,7 @@ void SubtargetEmitter::run(raw_ostream &OS) { OS << "\n#ifdef GET_SUBTARGETINFO_TARGET_DESC\n"; OS << "#undef GET_SUBTARGETINFO_TARGET_DESC\n\n"; + OS << "#include \"llvm/ADT/BitmaskEnum.h\"\n"; OS << "#include \"llvm/Support/Debug.h\"\n"; OS << "#include \"llvm/Support/raw_ostream.h\"\n\n"; if (Target == "AArch64") @@ -2113,7 +2114,26 @@ void SubtargetEmitter::run(raw_ostream &OS) { << " unsigned CPUID) const override;\n" << " DFAPacketizer *createDFAPacketizer(const InstrItineraryData *IID)" << " const;\n"; - if (TGT.getHwModes().getNumModeIds() > 1) { + + const CodeGenHwModes &CGH = TGT.getHwModes(); + if (CGH.getNumModeIds() > 1) { + OS << " enum class " << Target << "HwModeBits : unsigned {\n"; + for (unsigned M = 0, NumModes = CGH.getNumModeIds(); M != NumModes; ++M) { + StringRef ModeName = CGH.getModeName(M, /*IncludeDefault=*/true); + OS << " " << ModeName << " = "; + if (M == CodeGenHwModes::DefaultMode) + OS << "0"; + else + OS << "(1 << " << (M - 1) << ")"; + OS << ",\n"; + if (M == NumModes - 1) { + OS << "\n"; + OS << " LLVM_MARK_AS_BITMASK_ENUM(/*LargestValue=*/" << ModeName + << "),\n"; + } + } + OS << " };\n"; + OS << " unsigned getHwModeSet() const override;\n"; OS << " unsigned getHwMode(enum HwModeType type = HwMode_Default) const " "override;\n"; From 5110d1193c70f127449b2801ad7a7006908b780d Mon Sep 17 00:00:00 2001 From: Benjamin Maxwell Date: Fri, 31 Jan 2025 15:59:25 +0000 Subject: [PATCH 8/8] Tweak --- llvm/lib/Target/AArch64/AArch64Subtarget.cpp | 6 ++++-- llvm/utils/TableGen/SubtargetEmitter.cpp | 2 +- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/llvm/lib/Target/AArch64/AArch64Subtarget.cpp b/llvm/lib/Target/AArch64/AArch64Subtarget.cpp index c4e3fcc1acef8..68c386585a79a 100644 --- a/llvm/lib/Target/AArch64/AArch64Subtarget.cpp +++ b/llvm/lib/Target/AArch64/AArch64Subtarget.cpp @@ -406,15 +406,17 @@ AArch64Subtarget::AArch64Subtarget(const Triple &TT, StringRef CPU, } unsigned AArch64Subtarget::getHwModeSet() const { + AArch64HwModeBits Modes = AArch64HwModeBits::DefaultMode; + // Use a special hardware mode in streaming[-compatible] functions with // aarch64-enable-zpr-predicate-spills. This changes the spill size (and // alignment) for the predicate register class. if (EnableZPRPredicateSpills.getValue() && (isStreaming() || isStreamingCompatible())) { - return to_underlying(AArch64HwModeBits::SMEWithZPRPredicateSpills); + Modes |= AArch64HwModeBits::SMEWithZPRPredicateSpills; } - return to_underlying(AArch64HwModeBits::DefaultMode); + return to_underlying(Modes); } const CallLowering *AArch64Subtarget::getCallLowering() const { diff --git a/llvm/utils/TableGen/SubtargetEmitter.cpp b/llvm/utils/TableGen/SubtargetEmitter.cpp index e1a416ac4f7fc..49362ff5ef655 100644 --- a/llvm/utils/TableGen/SubtargetEmitter.cpp +++ b/llvm/utils/TableGen/SubtargetEmitter.cpp @@ -2121,7 +2121,7 @@ void SubtargetEmitter::run(raw_ostream &OS) { for (unsigned M = 0, NumModes = CGH.getNumModeIds(); M != NumModes; ++M) { StringRef ModeName = CGH.getModeName(M, /*IncludeDefault=*/true); OS << " " << ModeName << " = "; - if (M == CodeGenHwModes::DefaultMode) + if (M == 0) OS << "0"; else OS << "(1 << " << (M - 1) << ")";