Skip to content

Commit 96ec1c2

Browse files
authored
[RISCV] Add nds.bfos and nds.bfoz for the short forward branch optimization. (#145836)
This adds nds.bfos and nds.bfoz, which are also supported by Andes 45-series CPUs for short forward branch optimization.
1 parent 0515449 commit 96ec1c2

File tree

4 files changed

+191
-16
lines changed

4 files changed

+191
-16
lines changed

llvm/lib/Target/RISCV/RISCVExpandPseudoInsts.cpp

Lines changed: 15 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -147,6 +147,8 @@ bool RISCVExpandPseudo::expandMI(MachineBasicBlock &MBB,
147147
case RISCV::PseudoCCANDN:
148148
case RISCV::PseudoCCORN:
149149
case RISCV::PseudoCCXNOR:
150+
case RISCV::PseudoCCNDS_BFOS:
151+
case RISCV::PseudoCCNDS_BFOZ:
150152
return expandCCOp(MBB, MBBI, NextMBBI);
151153
case RISCV::PseudoVMCLR_M_B1:
152154
case RISCV::PseudoVMCLR_M_B2:
@@ -240,10 +242,20 @@ bool RISCVExpandPseudo::expandCCOp(MachineBasicBlock &MBB,
240242
case RISCV::PseudoCCANDN: NewOpc = RISCV::ANDN; break;
241243
case RISCV::PseudoCCORN: NewOpc = RISCV::ORN; break;
242244
case RISCV::PseudoCCXNOR: NewOpc = RISCV::XNOR; break;
245+
case RISCV::PseudoCCNDS_BFOS: NewOpc = RISCV::NDS_BFOS; break;
246+
case RISCV::PseudoCCNDS_BFOZ: NewOpc = RISCV::NDS_BFOZ; break;
247+
}
248+
249+
if (NewOpc == RISCV::NDS_BFOZ || NewOpc == RISCV::NDS_BFOS) {
250+
BuildMI(TrueBB, DL, TII->get(NewOpc), DestReg)
251+
.add(MI.getOperand(5))
252+
.add(MI.getOperand(6))
253+
.add(MI.getOperand(7));
254+
} else {
255+
BuildMI(TrueBB, DL, TII->get(NewOpc), DestReg)
256+
.add(MI.getOperand(5))
257+
.add(MI.getOperand(6));
243258
}
244-
BuildMI(TrueBB, DL, TII->get(NewOpc), DestReg)
245-
.add(MI.getOperand(5))
246-
.add(MI.getOperand(6));
247259
}
248260

249261
TrueBB->addSuccessor(MergeBB);

llvm/lib/Target/RISCV/RISCVInstrInfo.cpp

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1637,6 +1637,9 @@ unsigned getPredicatedOpcode(unsigned Opcode) {
16371637
case RISCV::ANDN: return RISCV::PseudoCCANDN; break;
16381638
case RISCV::ORN: return RISCV::PseudoCCORN; break;
16391639
case RISCV::XNOR: return RISCV::PseudoCCXNOR; break;
1640+
1641+
case RISCV::NDS_BFOS: return RISCV::PseudoCCNDS_BFOS; break;
1642+
case RISCV::NDS_BFOZ: return RISCV::PseudoCCNDS_BFOZ; break;
16401643
}
16411644

16421645
return RISCV::INSTRUCTION_LIST_END;

llvm/lib/Target/RISCV/RISCVInstrInfoXAndes.td

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -625,3 +625,21 @@ defset list<VTypeInfoToWide> AllQuadWidenableVD4DOTVectors = {
625625
defm : VPatTernaryVD4DOT_VV<"int_riscv_nds_vd4dots", "PseudoNDS_VD4DOTS", AllQuadWidenableVD4DOTVectors>;
626626
defm : VPatTernaryVD4DOT_VV<"int_riscv_nds_vd4dotu", "PseudoNDS_VD4DOTU", AllQuadWidenableVD4DOTVectors>;
627627
defm : VPatTernaryVD4DOT_VV<"int_riscv_nds_vd4dotsu", "PseudoNDS_VD4DOTSU", AllQuadWidenableVD4DOTVectors>;
628+
629+
//===----------------------------------------------------------------------===//
630+
// Pseudo-instructions for SFB (Short Forward Branch)
631+
//===----------------------------------------------------------------------===//
632+
633+
let Predicates = [HasShortForwardBranchOpt], hasSideEffects = 0,
634+
mayLoad = 0, mayStore = 0, Size = 8, Constraints = "$dst = $falsev" in {
635+
def PseudoCCNDS_BFOS : Pseudo<(outs GPR:$dst),
636+
(ins GPR:$lhs, GPR:$rhs, ixlenimm:$cc,
637+
GPR:$falsev, GPR:$rs1, uimmlog2xlen:$msb, uimmlog2xlen:$lsb), []>,
638+
Sched<[WriteSFB, ReadSFBJmp, ReadSFBJmp, ReadSFBALU,
639+
ReadSFBALU]>;
640+
def PseudoCCNDS_BFOZ : Pseudo<(outs GPR:$dst),
641+
(ins GPR:$lhs, GPR:$rhs, ixlenimm:$cc,
642+
GPR:$falsev, GPR:$rs1, uimmlog2xlen:$msb, uimmlog2xlen:$lsb), []>,
643+
Sched<[WriteSFB, ReadSFBJmp, ReadSFBJmp, ReadSFBALU,
644+
ReadSFBALU]>;
645+
}

llvm/test/CodeGen/RISCV/short-forward-branch-opt.ll

Lines changed: 155 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,9 @@
22
; RUN: llc -mtriple=riscv64 -mattr=+c,+zbb -verify-machineinstrs < %s \
33
; RUN: | FileCheck -check-prefix=NOSFB %s
44
; RUN: llc -mtriple=riscv64 -mcpu=sifive-u74 -mattr=+zbb -verify-machineinstrs < %s \
5-
; RUN: | FileCheck -check-prefixes=SFB,NOZICOND,RV64SFB %s
5+
; RUN: | FileCheck -check-prefixes=SFB,NOZICOND,RV64SFB,RV64SFBSIFIVEU74 %s
6+
; RUN: llc -mtriple=riscv64 -mcpu=andes-ax45 -mattr=+zbb -verify-machineinstrs < %s \
7+
; RUN: | FileCheck -check-prefixes=SFB,NOZICOND,RV64SFB,RV64SFBANDESAX45 %s
68
; RUN: llc -mtriple=riscv64 -mcpu=sifive-u74 -mattr=+zicond,+zbb \
79
; RUN: -verify-machineinstrs < %s | FileCheck -check-prefixes=SFB,ZICOND %s
810
; RUN: llc -mtriple=riscv32 -mcpu=sifive-e76 -mattr=+zbb -verify-machineinstrs < %s \
@@ -67,18 +69,31 @@ define signext i32 @test3(i32 signext %v, i32 signext %w, i32 signext %x, i32 si
6769
; NOSFB-NEXT: addw a0, a1, a2
6870
; NOSFB-NEXT: ret
6971
;
70-
; RV64SFB-LABEL: test3:
71-
; RV64SFB: # %bb.0:
72-
; RV64SFB-NEXT: beqz a4, .LBB2_2
73-
; RV64SFB-NEXT: # %bb.1:
74-
; RV64SFB-NEXT: mv a2, a3
75-
; RV64SFB-NEXT: .LBB2_2:
76-
; RV64SFB-NEXT: bnez a4, .LBB2_4
77-
; RV64SFB-NEXT: # %bb.3:
78-
; RV64SFB-NEXT: mv a0, a1
79-
; RV64SFB-NEXT: .LBB2_4:
80-
; RV64SFB-NEXT: addw a0, a0, a2
81-
; RV64SFB-NEXT: ret
72+
; RV64SFBSIFIVEU74-LABEL: test3:
73+
; RV64SFBSIFIVEU74: # %bb.0:
74+
; RV64SFBSIFIVEU74-NEXT: beqz a4, .LBB2_2
75+
; RV64SFBSIFIVEU74-NEXT: # %bb.1:
76+
; RV64SFBSIFIVEU74-NEXT: mv a2, a3
77+
; RV64SFBSIFIVEU74-NEXT: .LBB2_2:
78+
; RV64SFBSIFIVEU74-NEXT: bnez a4, .LBB2_4
79+
; RV64SFBSIFIVEU74-NEXT: # %bb.3:
80+
; RV64SFBSIFIVEU74-NEXT: mv a0, a1
81+
; RV64SFBSIFIVEU74-NEXT: .LBB2_4:
82+
; RV64SFBSIFIVEU74-NEXT: addw a0, a0, a2
83+
; RV64SFBSIFIVEU74-NEXT: ret
84+
;
85+
; RV64SFBANDESAX45-LABEL: test3:
86+
; RV64SFBANDESAX45: # %bb.0:
87+
; RV64SFBANDESAX45-NEXT: bnez a4, .LBB2_2
88+
; RV64SFBANDESAX45-NEXT: # %bb.1:
89+
; RV64SFBANDESAX45-NEXT: mv a0, a1
90+
; RV64SFBANDESAX45-NEXT: .LBB2_2:
91+
; RV64SFBANDESAX45-NEXT: beqz a4, .LBB2_4
92+
; RV64SFBANDESAX45-NEXT: # %bb.3:
93+
; RV64SFBANDESAX45-NEXT: mv a2, a3
94+
; RV64SFBANDESAX45-NEXT: .LBB2_4:
95+
; RV64SFBANDESAX45-NEXT: addw a0, a0, a2
96+
; RV64SFBANDESAX45-NEXT: ret
8297
;
8398
; ZICOND-LABEL: test3:
8499
; ZICOND: # %bb.0:
@@ -1692,3 +1707,130 @@ entry:
16921707
%2 = select i1 %cond, i64 %C, i64 %1
16931708
ret i64 %2
16941709
}
1710+
1711+
define i64 @select_bfoz(i64 %A, i64 %B, i1 zeroext %cond) {
1712+
; NOSFB-LABEL: select_bfoz:
1713+
; NOSFB: # %bb.0: # %entry
1714+
; NOSFB-NEXT: bnez a2, .LBB39_2
1715+
; NOSFB-NEXT: # %bb.1: # %entry
1716+
; NOSFB-NEXT: slli a0, a0, 38
1717+
; NOSFB-NEXT: srli a1, a0, 61
1718+
; NOSFB-NEXT: .LBB39_2: # %entry
1719+
; NOSFB-NEXT: mv a0, a1
1720+
; NOSFB-NEXT: ret
1721+
;
1722+
; RV64SFBSIFIVEU74-LABEL: select_bfoz:
1723+
; RV64SFBSIFIVEU74: # %bb.0: # %entry
1724+
; RV64SFBSIFIVEU74-NEXT: slli a0, a0, 38
1725+
; RV64SFBSIFIVEU74-NEXT: bnez a2, .LBB39_2
1726+
; RV64SFBSIFIVEU74-NEXT: # %bb.1: # %entry
1727+
; RV64SFBSIFIVEU74-NEXT: srli a1, a0, 61
1728+
; RV64SFBSIFIVEU74-NEXT: .LBB39_2: # %entry
1729+
; RV64SFBSIFIVEU74-NEXT: mv a0, a1
1730+
; RV64SFBSIFIVEU74-NEXT: ret
1731+
;
1732+
; RV64SFBANDESAX45-LABEL: select_bfoz:
1733+
; RV64SFBANDESAX45: # %bb.0: # %entry
1734+
; RV64SFBANDESAX45-NEXT: bnez a2, .LBB39_2
1735+
; RV64SFBANDESAX45-NEXT: # %bb.1: # %entry
1736+
; RV64SFBANDESAX45-NEXT: nds.bfoz a1, a0, 25, 23
1737+
; RV64SFBANDESAX45-NEXT: .LBB39_2: # %entry
1738+
; RV64SFBANDESAX45-NEXT: mv a0, a1
1739+
; RV64SFBANDESAX45-NEXT: ret
1740+
;
1741+
; ZICOND-LABEL: select_bfoz:
1742+
; ZICOND: # %bb.0: # %entry
1743+
; ZICOND-NEXT: slli a0, a0, 38
1744+
; ZICOND-NEXT: bnez a2, .LBB39_2
1745+
; ZICOND-NEXT: # %bb.1: # %entry
1746+
; ZICOND-NEXT: srli a1, a0, 61
1747+
; ZICOND-NEXT: .LBB39_2: # %entry
1748+
; ZICOND-NEXT: mv a0, a1
1749+
; ZICOND-NEXT: ret
1750+
;
1751+
; RV32SFB-LABEL: select_bfoz:
1752+
; RV32SFB: # %bb.0: # %entry
1753+
; RV32SFB-NEXT: slli a0, a0, 6
1754+
; RV32SFB-NEXT: mv a1, a3
1755+
; RV32SFB-NEXT: bnez a4, .LBB39_2
1756+
; RV32SFB-NEXT: # %bb.1: # %entry
1757+
; RV32SFB-NEXT: srli a2, a0, 29
1758+
; RV32SFB-NEXT: .LBB39_2: # %entry
1759+
; RV32SFB-NEXT: bnez a4, .LBB39_4
1760+
; RV32SFB-NEXT: # %bb.3: # %entry
1761+
; RV32SFB-NEXT: li a1, 0
1762+
; RV32SFB-NEXT: .LBB39_4: # %entry
1763+
; RV32SFB-NEXT: mv a0, a2
1764+
; RV32SFB-NEXT: ret
1765+
entry:
1766+
%0 = lshr i64 %A, 23
1767+
%1 = and i64 %0, 7
1768+
%2 = select i1 %cond, i64 %B, i64 %1
1769+
ret i64 %2
1770+
}
1771+
1772+
define i64 @select_bfos(i64 %A, i64 %B, i1 zeroext %cond) {
1773+
; NOSFB-LABEL: select_bfos:
1774+
; NOSFB: # %bb.0: # %entry
1775+
; NOSFB-NEXT: bnez a2, .LBB40_2
1776+
; NOSFB-NEXT: # %bb.1: # %entry
1777+
; NOSFB-NEXT: slli a0, a0, 31
1778+
; NOSFB-NEXT: srai a1, a0, 17
1779+
; NOSFB-NEXT: .LBB40_2: # %entry
1780+
; NOSFB-NEXT: mv a0, a1
1781+
; NOSFB-NEXT: ret
1782+
;
1783+
; RV64SFBSIFIVEU74-LABEL: select_bfos:
1784+
; RV64SFBSIFIVEU74: # %bb.0: # %entry
1785+
; RV64SFBSIFIVEU74-NEXT: slli a0, a0, 31
1786+
; RV64SFBSIFIVEU74-NEXT: bnez a2, .LBB40_2
1787+
; RV64SFBSIFIVEU74-NEXT: # %bb.1: # %entry
1788+
; RV64SFBSIFIVEU74-NEXT: srai a1, a0, 17
1789+
; RV64SFBSIFIVEU74-NEXT: .LBB40_2: # %entry
1790+
; RV64SFBSIFIVEU74-NEXT: mv a0, a1
1791+
; RV64SFBSIFIVEU74-NEXT: ret
1792+
;
1793+
; RV64SFBANDESAX45-LABEL: select_bfos:
1794+
; RV64SFBANDESAX45: # %bb.0: # %entry
1795+
; RV64SFBANDESAX45-NEXT: bnez a2, .LBB40_2
1796+
; RV64SFBANDESAX45-NEXT: # %bb.1: # %entry
1797+
; RV64SFBANDESAX45-NEXT: nds.bfos a1, a0, 14, 46
1798+
; RV64SFBANDESAX45-NEXT: .LBB40_2: # %entry
1799+
; RV64SFBANDESAX45-NEXT: mv a0, a1
1800+
; RV64SFBANDESAX45-NEXT: ret
1801+
;
1802+
; ZICOND-LABEL: select_bfos:
1803+
; ZICOND: # %bb.0: # %entry
1804+
; ZICOND-NEXT: slli a0, a0, 31
1805+
; ZICOND-NEXT: bnez a2, .LBB40_2
1806+
; ZICOND-NEXT: # %bb.1: # %entry
1807+
; ZICOND-NEXT: srai a1, a0, 17
1808+
; ZICOND-NEXT: .LBB40_2: # %entry
1809+
; ZICOND-NEXT: mv a0, a1
1810+
; ZICOND-NEXT: ret
1811+
;
1812+
; RV32SFB-LABEL: select_bfos:
1813+
; RV32SFB: # %bb.0: # %entry
1814+
; RV32SFB-NEXT: srli a5, a0, 1
1815+
; RV32SFB-NEXT: slli a6, a1, 31
1816+
; RV32SFB-NEXT: slli a0, a0, 31
1817+
; RV32SFB-NEXT: slli a1, a5, 15
1818+
; RV32SFB-NEXT: srli a0, a0, 17
1819+
; RV32SFB-NEXT: or a5, a6, a5
1820+
; RV32SFB-NEXT: bnez a4, .LBB40_2
1821+
; RV32SFB-NEXT: # %bb.1: # %entry
1822+
; RV32SFB-NEXT: or a2, a0, a1
1823+
; RV32SFB-NEXT: .LBB40_2: # %entry
1824+
; RV32SFB-NEXT: bnez a4, .LBB40_4
1825+
; RV32SFB-NEXT: # %bb.3: # %entry
1826+
; RV32SFB-NEXT: srai a3, a5, 17
1827+
; RV32SFB-NEXT: .LBB40_4: # %entry
1828+
; RV32SFB-NEXT: mv a0, a2
1829+
; RV32SFB-NEXT: mv a1, a3
1830+
; RV32SFB-NEXT: ret
1831+
entry:
1832+
%0 = shl i64 %A, 31
1833+
%1 = ashr i64 %0, 17
1834+
%2 = select i1 %cond, i64 %B, i64 %1
1835+
ret i64 %2
1836+
}

0 commit comments

Comments
 (0)