Skip to content

Commit 83a98f4

Browse files
asbDisasm
authored andcommitted
[RISCV] Custom-legalise i32 SDIV/UDIV/UREM on RV64M
Follow the same custom legalisation strategy as used in D57085 for variable-length shifts (see that patch summary for more discussion). Although we may lose out on some late-stage DAG combines, I think this custom legalisation strategy is ultimately easier to reason about. There are some codegen changes in rv64m-exhaustive-w-insts.ll but they are all neutral in terms of the number of instructions. Differential Revision: https://reviews.llvm.org/D57096 llvm-svn: 352171
1 parent 0a77c0e commit 83a98f4

File tree

4 files changed

+71
-65
lines changed

4 files changed

+71
-65
lines changed

llvm/lib/Target/RISCV/RISCVISelLowering.cpp

Lines changed: 28 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -81,7 +81,6 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
8181
setOperationAction(ISD::SIGN_EXTEND_INREG, VT, Expand);
8282

8383
if (Subtarget.is64Bit()) {
84-
setTargetDAGCombine(ISD::ANY_EXTEND);
8584
setOperationAction(ISD::SHL, MVT::i32, Custom);
8685
setOperationAction(ISD::SRA, MVT::i32, Custom);
8786
setOperationAction(ISD::SRL, MVT::i32, Custom);
@@ -97,6 +96,12 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
9796
setOperationAction(ISD::UREM, XLenVT, Expand);
9897
}
9998

99+
if (Subtarget.is64Bit() && Subtarget.hasStdExtM()) {
100+
setOperationAction(ISD::SDIV, MVT::i32, Custom);
101+
setOperationAction(ISD::UDIV, MVT::i32, Custom);
102+
setOperationAction(ISD::UREM, MVT::i32, Custom);
103+
}
104+
100105
setOperationAction(ISD::SDIVREM, XLenVT, Expand);
101106
setOperationAction(ISD::UDIVREM, XLenVT, Expand);
102107
setOperationAction(ISD::SMUL_LOHI, XLenVT, Expand);
@@ -525,6 +530,12 @@ static RISCVISD::NodeType getRISCVWOpcode(unsigned Opcode) {
525530
return RISCVISD::SRAW;
526531
case ISD::SRL:
527532
return RISCVISD::SRLW;
533+
case ISD::SDIV:
534+
return RISCVISD::DIVW;
535+
case ISD::UDIV:
536+
return RISCVISD::DIVUW;
537+
case ISD::UREM:
538+
return RISCVISD::REMUW;
528539
}
529540
}
530541

@@ -559,46 +570,24 @@ void RISCVTargetLowering::ReplaceNodeResults(SDNode *N,
559570
return;
560571
Results.push_back(customLegalizeToWOp(N, DAG));
561572
break;
562-
}
563-
}
564-
565-
// Returns true if the given node is an sdiv, udiv, or urem with non-constant
566-
// operands.
567-
static bool isVariableSDivUDivURem(SDValue Val) {
568-
switch (Val.getOpcode()) {
569-
default:
570-
return false;
571573
case ISD::SDIV:
572574
case ISD::UDIV:
573575
case ISD::UREM:
574-
return Val.getOperand(0).getOpcode() != ISD::Constant &&
575-
Val.getOperand(1).getOpcode() != ISD::Constant;
576+
assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
577+
Subtarget.hasStdExtM() && "Unexpected custom legalisation");
578+
if (N->getOperand(0).getOpcode() == ISD::Constant ||
579+
N->getOperand(1).getOpcode() == ISD::Constant)
580+
return;
581+
Results.push_back(customLegalizeToWOp(N, DAG));
582+
break;
576583
}
577584
}
578585

579586
SDValue RISCVTargetLowering::PerformDAGCombine(SDNode *N,
580587
DAGCombinerInfo &DCI) const {
581-
SelectionDAG &DAG = DCI.DAG;
582-
583588
switch (N->getOpcode()) {
584589
default:
585590
break;
586-
case ISD::ANY_EXTEND: {
587-
// If any-extending an i32 sdiv/udiv/urem to i64, then instead sign-extend
588-
// in order to increase the chance of being able to select the
589-
// divw/divuw/remuw instructions.
590-
SDValue Src = N->getOperand(0);
591-
if (N->getValueType(0) != MVT::i64 || Src.getValueType() != MVT::i32)
592-
break;
593-
if (!(Subtarget.hasStdExtM() && isVariableSDivUDivURem(Src)))
594-
break;
595-
SDLoc DL(N);
596-
// Don't add the new node to the DAGCombiner worklist, in order to avoid
597-
// an infinite cycle due to SimplifyDemandedBits converting the
598-
// SIGN_EXTEND back to ANY_EXTEND.
599-
return DCI.CombineTo(N, DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i64, Src),
600-
false);
601-
}
602591
case RISCVISD::SplitF64: {
603592
// If the input to SplitF64 is just BuildPairF64 then the operation is
604593
// redundant. Instead, use BuildPairF64's operands directly.
@@ -634,6 +623,9 @@ unsigned RISCVTargetLowering::ComputeNumSignBitsForTargetNode(
634623
case RISCVISD::SLLW:
635624
case RISCVISD::SRAW:
636625
case RISCVISD::SRLW:
626+
case RISCVISD::DIVW:
627+
case RISCVISD::DIVUW:
628+
case RISCVISD::REMUW:
637629
// TODO: As the result is sign-extended, this is conservatively correct. A
638630
// more precise answer could be calculated for SRAW depending on known
639631
// bits in the shift amount.
@@ -1737,6 +1729,12 @@ const char *RISCVTargetLowering::getTargetNodeName(unsigned Opcode) const {
17371729
return "RISCVISD::SRAW";
17381730
case RISCVISD::SRLW:
17391731
return "RISCVISD::SRLW";
1732+
case RISCVISD::DIVW:
1733+
return "RISCVISD::DIVW";
1734+
case RISCVISD::DIVUW:
1735+
return "RISCVISD::DIVUW";
1736+
case RISCVISD::REMUW:
1737+
return "RISCVISD::REMUW";
17401738
}
17411739
return nullptr;
17421740
}

llvm/lib/Target/RISCV/RISCVISelLowering.h

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -37,7 +37,12 @@ enum NodeType : unsigned {
3737
// instructions.
3838
SLLW,
3939
SRAW,
40-
SRLW
40+
SRLW,
41+
// 32-bit operations from RV64M that can't be simply matched with a pattern
42+
// at instruction selection time.
43+
DIVW,
44+
DIVUW,
45+
REMUW
4146
};
4247
}
4348

llvm/lib/Target/RISCV/RISCVInstrInfoM.td

Lines changed: 21 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,14 @@
1212
//
1313
//===----------------------------------------------------------------------===//
1414

15+
//===----------------------------------------------------------------------===//
16+
// RISC-V specific DAG Nodes.
17+
//===----------------------------------------------------------------------===//
18+
19+
def riscv_divw : SDNode<"RISCVISD::DIVW", SDTIntBinOp>;
20+
def riscv_divuw : SDNode<"RISCVISD::DIVUW", SDTIntBinOp>;
21+
def riscv_remuw : SDNode<"RISCVISD::REMUW", SDTIntBinOp>;
22+
1523
//===----------------------------------------------------------------------===//
1624
// Instructions
1725
//===----------------------------------------------------------------------===//
@@ -53,18 +61,19 @@ def : PatGprGpr<urem, REMU>;
5361
let Predicates = [HasStdExtM, IsRV64] in {
5462
def : Pat<(sext_inreg (mul GPR:$rs1, GPR:$rs2), i32),
5563
(MULW GPR:$rs1, GPR:$rs2)>;
56-
def : Pat<(sext_inreg (sdiv (sexti32 GPR:$rs1),
57-
(sexti32 GPR:$rs2)), i32),
58-
(DIVW GPR:$rs1, GPR:$rs2)>;
59-
def : Pat<(zexti32 (sdiv (sexti32 GPR:$rs1),
60-
(sexti32 GPR:$rs2))),
61-
(SRLI (SLLI (DIVW GPR:$rs1, GPR:$rs2), 32), 32)>;
62-
def : Pat<(sext_inreg (udiv (zexti32 GPR:$rs1), (zexti32 GPR:$rs2)), i32),
63-
(DIVUW GPR:$rs1, GPR:$rs2)>;
64-
// It's cheaper to perform a divuw and zero-extend the result than to
65-
// zero-extend both inputs to a udiv.
66-
def : Pat<(udiv (and GPR:$rs1, 0xffffffff), (and GPR:$rs2, 0xffffffff)),
67-
(SRLI (SLLI (DIVUW GPR:$rs1, GPR:$rs2), 32), 32)>;
64+
65+
def : PatGprGpr<riscv_divw, DIVW>;
66+
def : PatGprGpr<riscv_divuw, DIVUW>;
67+
def : PatGprGpr<riscv_remuw, REMUW>;
68+
69+
// Handle the specific cases where using DIVU/REMU would be correct and result
70+
// in fewer instructions than emitting DIVUW/REMUW then zero-extending the
71+
// result.
72+
def : Pat<(zexti32 (riscv_divuw (zexti32 GPR:$rs1), (zexti32 GPR:$rs2))),
73+
(DIVU GPR:$rs1, GPR:$rs2)>;
74+
def : Pat<(zexti32 (riscv_remuw (zexti32 GPR:$rs1), (zexti32 GPR:$rs2))),
75+
(REMU GPR:$rs1, GPR:$rs2)>;
76+
6877
// Although the sexti32 operands may not have originated from an i32 srem,
6978
// this pattern is safe as it is impossible for two sign extended inputs to
7079
// produce a result where res[63:32]=0 and res[31]=1.
@@ -73,10 +82,4 @@ def : Pat<(srem (sexti32 GPR:$rs1), (sexti32 GPR:$rs2)),
7382
def : Pat<(sext_inreg (srem (sexti32 GPR:$rs1),
7483
(sexti32 GPR:$rs2)), i32),
7584
(REMW GPR:$rs1, GPR:$rs2)>;
76-
def : Pat<(sext_inreg (urem (zexti32 GPR:$rs1), (zexti32 GPR:$rs2)), i32),
77-
(REMUW GPR:$rs1, GPR:$rs2)>;
78-
// It's cheaper to perform a remuw and zero-extend the result than to
79-
// zero-extend both inputs to a urem.
80-
def : Pat<(urem (and GPR:$rs1, 0xffffffff), (and GPR:$rs2, 0xffffffff)),
81-
(SRLI (SLLI (REMUW GPR:$rs1, GPR:$rs2), 32), 32)>;
8285
} // Predicates = [HasStdExtM, IsRV64]

llvm/test/CodeGen/RISCV/rv64m-exhaustive-w-insts.ll

Lines changed: 16 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -454,9 +454,9 @@ define zeroext i32 @zext_divuw_aext_sext(i32 %a, i32 signext %b) nounwind {
454454
define zeroext i32 @zext_divuw_aext_zext(i32 %a, i32 zeroext %b) nounwind {
455455
; RV64IM-LABEL: zext_divuw_aext_zext:
456456
; RV64IM: # %bb.0:
457+
; RV64IM-NEXT: divuw a0, a0, a1
457458
; RV64IM-NEXT: slli a0, a0, 32
458459
; RV64IM-NEXT: srli a0, a0, 32
459-
; RV64IM-NEXT: divu a0, a0, a1
460460
; RV64IM-NEXT: ret
461461
%1 = udiv i32 %a, %b
462462
ret i32 %1
@@ -487,9 +487,9 @@ define zeroext i32 @zext_divuw_sext_sext(i32 signext %a, i32 signext %b) nounwin
487487
define zeroext i32 @zext_divuw_sext_zext(i32 signext %a, i32 zeroext %b) nounwind {
488488
; RV64IM-LABEL: zext_divuw_sext_zext:
489489
; RV64IM: # %bb.0:
490+
; RV64IM-NEXT: divuw a0, a0, a1
490491
; RV64IM-NEXT: slli a0, a0, 32
491492
; RV64IM-NEXT: srli a0, a0, 32
492-
; RV64IM-NEXT: divu a0, a0, a1
493493
; RV64IM-NEXT: ret
494494
%1 = udiv i32 %a, %b
495495
ret i32 %1
@@ -498,9 +498,9 @@ define zeroext i32 @zext_divuw_sext_zext(i32 signext %a, i32 zeroext %b) nounwin
498498
define zeroext i32 @zext_divuw_zext_aext(i32 zeroext %a, i32 %b) nounwind {
499499
; RV64IM-LABEL: zext_divuw_zext_aext:
500500
; RV64IM: # %bb.0:
501-
; RV64IM-NEXT: slli a1, a1, 32
502-
; RV64IM-NEXT: srli a1, a1, 32
503-
; RV64IM-NEXT: divu a0, a0, a1
501+
; RV64IM-NEXT: divuw a0, a0, a1
502+
; RV64IM-NEXT: slli a0, a0, 32
503+
; RV64IM-NEXT: srli a0, a0, 32
504504
; RV64IM-NEXT: ret
505505
%1 = udiv i32 %a, %b
506506
ret i32 %1
@@ -509,9 +509,9 @@ define zeroext i32 @zext_divuw_zext_aext(i32 zeroext %a, i32 %b) nounwind {
509509
define zeroext i32 @zext_divuw_zext_sext(i32 zeroext %a, i32 signext %b) nounwind {
510510
; RV64IM-LABEL: zext_divuw_zext_sext:
511511
; RV64IM: # %bb.0:
512-
; RV64IM-NEXT: slli a1, a1, 32
513-
; RV64IM-NEXT: srli a1, a1, 32
514-
; RV64IM-NEXT: divu a0, a0, a1
512+
; RV64IM-NEXT: divuw a0, a0, a1
513+
; RV64IM-NEXT: slli a0, a0, 32
514+
; RV64IM-NEXT: srli a0, a0, 32
515515
; RV64IM-NEXT: ret
516516
%1 = udiv i32 %a, %b
517517
ret i32 %1
@@ -1235,9 +1235,9 @@ define zeroext i32 @zext_remuw_aext_sext(i32 %a, i32 signext %b) nounwind {
12351235
define zeroext i32 @zext_remuw_aext_zext(i32 %a, i32 zeroext %b) nounwind {
12361236
; RV64IM-LABEL: zext_remuw_aext_zext:
12371237
; RV64IM: # %bb.0:
1238+
; RV64IM-NEXT: remuw a0, a0, a1
12381239
; RV64IM-NEXT: slli a0, a0, 32
12391240
; RV64IM-NEXT: srli a0, a0, 32
1240-
; RV64IM-NEXT: remu a0, a0, a1
12411241
; RV64IM-NEXT: ret
12421242
%1 = urem i32 %a, %b
12431243
ret i32 %1
@@ -1268,9 +1268,9 @@ define zeroext i32 @zext_remuw_sext_sext(i32 signext %a, i32 signext %b) nounwin
12681268
define zeroext i32 @zext_remuw_sext_zext(i32 signext %a, i32 zeroext %b) nounwind {
12691269
; RV64IM-LABEL: zext_remuw_sext_zext:
12701270
; RV64IM: # %bb.0:
1271+
; RV64IM-NEXT: remuw a0, a0, a1
12711272
; RV64IM-NEXT: slli a0, a0, 32
12721273
; RV64IM-NEXT: srli a0, a0, 32
1273-
; RV64IM-NEXT: remu a0, a0, a1
12741274
; RV64IM-NEXT: ret
12751275
%1 = urem i32 %a, %b
12761276
ret i32 %1
@@ -1279,9 +1279,9 @@ define zeroext i32 @zext_remuw_sext_zext(i32 signext %a, i32 zeroext %b) nounwin
12791279
define zeroext i32 @zext_remuw_zext_aext(i32 zeroext %a, i32 %b) nounwind {
12801280
; RV64IM-LABEL: zext_remuw_zext_aext:
12811281
; RV64IM: # %bb.0:
1282-
; RV64IM-NEXT: slli a1, a1, 32
1283-
; RV64IM-NEXT: srli a1, a1, 32
1284-
; RV64IM-NEXT: remu a0, a0, a1
1282+
; RV64IM-NEXT: remuw a0, a0, a1
1283+
; RV64IM-NEXT: slli a0, a0, 32
1284+
; RV64IM-NEXT: srli a0, a0, 32
12851285
; RV64IM-NEXT: ret
12861286
%1 = urem i32 %a, %b
12871287
ret i32 %1
@@ -1290,9 +1290,9 @@ define zeroext i32 @zext_remuw_zext_aext(i32 zeroext %a, i32 %b) nounwind {
12901290
define zeroext i32 @zext_remuw_zext_sext(i32 zeroext %a, i32 signext %b) nounwind {
12911291
; RV64IM-LABEL: zext_remuw_zext_sext:
12921292
; RV64IM: # %bb.0:
1293-
; RV64IM-NEXT: slli a1, a1, 32
1294-
; RV64IM-NEXT: srli a1, a1, 32
1295-
; RV64IM-NEXT: remu a0, a0, a1
1293+
; RV64IM-NEXT: remuw a0, a0, a1
1294+
; RV64IM-NEXT: slli a0, a0, 32
1295+
; RV64IM-NEXT: srli a0, a0, 32
12961296
; RV64IM-NEXT: ret
12971297
%1 = urem i32 %a, %b
12981298
ret i32 %1

0 commit comments

Comments
 (0)