Skip to content

Commit 16feacf

Browse files
committed
[AArch64][SelectionDAG] Lower multiplication by a constant to shl+sub+shl+sub
Change the costmodel to lower a = b * C where C = 1 - (1 - 2^m) * 2^n to sub w8, w0, w0, lsl #m sub w0, w0, w8, lsl #n Fix #89430
1 parent 431be86 commit 16feacf

File tree

2 files changed

+85
-2
lines changed

2 files changed

+85
-2
lines changed

llvm/lib/Target/AArch64/AArch64ISelLowering.cpp

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17603,6 +17603,23 @@ static SDValue performMulCombine(SDNode *N, SelectionDAG &DAG,
1760317603
return false;
1760417604
};
1760517605

17606+
// Can the const C be decomposed into (1 - (1 - 2^M) * 2^N), eg:
17607+
// C = 29 is equal to 1 - (1 - 2^3) * 2^2.
17608+
auto isPowMinusMinusOneConst = [](APInt C, APInt &M, APInt &N) {
17609+
APInt CVMinus1 = C - 1;
17610+
if (CVMinus1.isNegative())
17611+
return false;
17612+
unsigned TrailingZeroes = CVMinus1.countr_zero();
17613+
APInt CVPlus1 = CVMinus1.ashr(TrailingZeroes) + 1;
17614+
if (CVPlus1.isPowerOf2()) {
17615+
unsigned BitWidth = CVPlus1.getBitWidth();
17616+
M = APInt(BitWidth, CVPlus1.logBase2());
17617+
N = APInt(BitWidth, TrailingZeroes);
17618+
return true;
17619+
}
17620+
return false;
17621+
};
17622+
1760617623
if (ConstValue.isNonNegative()) {
1760717624
// (mul x, (2^N + 1) * 2^M) => (shl (add (shl x, N), x), M)
1760817625
// (mul x, 2^N - 1) => (sub (shl x, N), x)
@@ -17611,6 +17628,8 @@ static SDValue performMulCombine(SDNode *N, SelectionDAG &DAG,
1761117628
// => MV = (add (shl x, M), x); (add (shl MV, N), MV)
1761217629
// (mul x, (2^M + 1) * 2^N + 1))
1761317630
// => MV = add (shl x, M), x); add (shl MV, N), x)
17631+
// (mul x, 1 - (1 - 2^M) * 2^N))
17632+
// => MV = sub (x - (shl x, M)); add (x - (shl x, M))
1761417633
APInt SCVMinus1 = ShiftedConstValue - 1;
1761517634
APInt SCVPlus1 = ShiftedConstValue + 1;
1761617635
APInt CVPlus1 = ConstValue + 1;
@@ -17647,6 +17666,17 @@ static SDValue performMulCombine(SDNode *N, SelectionDAG &DAG,
1764717666
return Add(Shl(MVal, CVN.getZExtValue()), N0);
1764817667
}
1764917668
}
17669+
17670+
if (Subtarget->hasALULSLFast() &&
17671+
isPowMinusMinusOneConst(ConstValue, CVM, CVN)) {
17672+
unsigned ShiftM = CVM.getZExtValue();
17673+
unsigned ShiftN = CVN.getZExtValue();
17674+
// ALULSLFast implicate that Shifts <= 4 places are fast
17675+
if (ShiftM <= 4 && ShiftN <= 4) {
17676+
SDValue MVal = Sub(N0, Shl(N0, CVM.getZExtValue()));
17677+
return Sub(N0, Shl(MVal, CVN.getZExtValue()));
17678+
}
17679+
}
1765017680
} else {
1765117681
// (mul x, -(2^N - 1)) => (sub x, (shl x, N))
1765217682
// (mul x, -(2^N + 1)) => - (add (shl x, N), x)

llvm/test/CodeGen/AArch64/mul_pow2.ll

Lines changed: 55 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -527,6 +527,23 @@ define i32 @test25_fast_shift(i32 %x) "target-features"="+alu-lsl-fast" {
527527
ret i32 %mul
528528
}
529529

530+
define i32 @test29_fast_shift(i32 %x) "target-features"="+alu-lsl-fast" {
531+
; CHECK-LABEL: test29_fast_shift:
532+
; CHECK: // %bb.0:
533+
; CHECK-NEXT: sub w8, w0, w0, lsl #3
534+
; CHECK-NEXT: sub w0, w0, w8, lsl #2
535+
; CHECK-NEXT: ret
536+
;
537+
; GISEL-LABEL: test29_fast_shift:
538+
; GISEL: // %bb.0:
539+
; GISEL-NEXT: mov w8, #29 // =0x1d
540+
; GISEL-NEXT: mul w0, w0, w8
541+
; GISEL-NEXT: ret
542+
543+
%mul = mul nsw i32 %x, 29 ; 29 = 1 - (1-8) * 4
544+
ret i32 %mul
545+
}
546+
530547
define i32 @test45_fast_shift(i32 %x) "target-features"="+alu-lsl-fast" {
531548
; CHECK-LABEL: test45_fast_shift:
532549
; CHECK: // %bb.0:
@@ -615,6 +632,42 @@ define i32 @test97_fast_shift(i32 %x) "target-features"="+alu-lsl-fast" {
615632
ret i32 %mul
616633
}
617634

635+
; Negative test: The shift number 5 is out of bound
636+
define i32 @test125_fast_shift(i32 %x) "target-features"="+alu-lsl-fast" {
637+
; CHECK-LABEL: test125_fast_shift:
638+
; CHECK: // %bb.0:
639+
; CHECK-NEXT: mov w8, #125 // =0x7d
640+
; CHECK-NEXT: mul w0, w0, w8
641+
; CHECK-NEXT: ret
642+
;
643+
; GISEL-LABEL: test125_fast_shift:
644+
; GISEL: // %bb.0:
645+
; GISEL-NEXT: mov w8, #125 // =0x7d
646+
; GISEL-NEXT: mul w0, w0, w8
647+
; GISEL-NEXT: ret
648+
649+
%mul = mul nsw i32 %x, 125 ; 63 = 1 - ((1-32) << 2)
650+
ret i32 %mul
651+
}
652+
653+
; Negative test: The shift number 5 is out of bound
654+
define i32 @test225_fast_shift(i32 %x) "target-features"="+alu-lsl-fast" {
655+
; CHECK-LABEL: test225_fast_shift:
656+
; CHECK: // %bb.0:
657+
; CHECK-NEXT: mov w8, #225 // =0xe1
658+
; CHECK-NEXT: mul w0, w0, w8
659+
; CHECK-NEXT: ret
660+
;
661+
; GISEL-LABEL: test225_fast_shift:
662+
; GISEL: // %bb.0:
663+
; GISEL-NEXT: mov w8, #225 // =0xe1
664+
; GISEL-NEXT: mul w0, w0, w8
665+
; GISEL-NEXT: ret
666+
667+
%mul = mul nsw i32 %x, 225 ; 225 = 1 - ((1-8) << 5)
668+
ret i32 %mul
669+
}
670+
618671
; Negative test: The shift amount 5 larger than 4
619672
define i32 @test297_fast_shift(i32 %x) "target-features"="+alu-lsl-fast" {
620673
; CHECK-LABEL: test297_fast_shift:
@@ -910,9 +963,9 @@ define <4 x i32> @muladd_demand_commute(<4 x i32> %x, <4 x i32> %y) {
910963
;
911964
; GISEL-LABEL: muladd_demand_commute:
912965
; GISEL: // %bb.0:
913-
; GISEL-NEXT: adrp x8, .LCPI52_0
966+
; GISEL-NEXT: adrp x8, .LCPI55_0
914967
; GISEL-NEXT: movi v3.4s, #1, msl #16
915-
; GISEL-NEXT: ldr q2, [x8, :lo12:.LCPI52_0]
968+
; GISEL-NEXT: ldr q2, [x8, :lo12:.LCPI55_0]
916969
; GISEL-NEXT: mla v1.4s, v0.4s, v2.4s
917970
; GISEL-NEXT: and v0.16b, v1.16b, v3.16b
918971
; GISEL-NEXT: ret

0 commit comments

Comments
 (0)