Skip to content

[AArch64][SelectionDAG] Lower multiplication by a constant to shl+sub+shl+sub #90199

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 2 commits into from
May 6, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
30 changes: 30 additions & 0 deletions llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -17603,6 +17603,23 @@ static SDValue performMulCombine(SDNode *N, SelectionDAG &DAG,
return false;
};

// Can the const C be decomposed into (1 - (1 - 2^M) * 2^N), eg:
// C = 29 is equal to 1 - (1 - 2^3) * 2^2.
auto isPowMinusMinusOneConst = [](APInt C, APInt &M, APInt &N) {
APInt CVMinus1 = C - 1;
if (CVMinus1.isNegative())
return false;
unsigned TrailingZeroes = CVMinus1.countr_zero();
APInt CVPlus1 = CVMinus1.ashr(TrailingZeroes) + 1;
if (CVPlus1.isPowerOf2()) {
unsigned BitWidth = CVPlus1.getBitWidth();
M = APInt(BitWidth, CVPlus1.logBase2());
N = APInt(BitWidth, TrailingZeroes);
return true;
}
return false;
};

if (ConstValue.isNonNegative()) {
// (mul x, (2^N + 1) * 2^M) => (shl (add (shl x, N), x), M)
// (mul x, 2^N - 1) => (sub (shl x, N), x)
Expand All @@ -17611,6 +17628,8 @@ static SDValue performMulCombine(SDNode *N, SelectionDAG &DAG,
// => MV = (add (shl x, M), x); (add (shl MV, N), MV)
// (mul x, (2^M + 1) * 2^N + 1))
// => MV = add (shl x, M), x); add (shl MV, N), x)
// (mul x, 1 - (1 - 2^M) * 2^N))
// => MV = sub (x - (shl x, M)); sub (x - (shl MV, N))
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Formula looks inconsistent with other formulas.

Copy link
Contributor Author

@vfdff vfdff Apr 28, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Updated, it is MV = sub (x - (shl x, M)); sub (x - (shl MV, N)), thanks.

APInt SCVMinus1 = ShiftedConstValue - 1;
APInt SCVPlus1 = ShiftedConstValue + 1;
APInt CVPlus1 = ConstValue + 1;
Expand Down Expand Up @@ -17647,6 +17666,17 @@ static SDValue performMulCombine(SDNode *N, SelectionDAG &DAG,
return Add(Shl(MVal, CVN.getZExtValue()), N0);
}
}

if (Subtarget->hasALULSLFast() &&
isPowMinusMinusOneConst(ConstValue, CVM, CVN)) {
unsigned ShiftM = CVM.getZExtValue();
unsigned ShiftN = CVN.getZExtValue();
// ALULSLFast implicate that Shifts <= 4 places are fast
if (ShiftM <= 4 && ShiftN <= 4) {
SDValue MVal = Sub(N0, Shl(N0, CVM.getZExtValue()));
return Sub(N0, Shl(MVal, CVN.getZExtValue()));
}
}
} else {
// (mul x, -(2^N - 1)) => (sub x, (shl x, N))
// (mul x, -(2^N + 1)) => - (add (shl x, N), x)
Expand Down
75 changes: 73 additions & 2 deletions llvm/test/CodeGen/AArch64/mul_pow2.ll
Original file line number Diff line number Diff line change
Expand Up @@ -527,6 +527,23 @@ define i32 @test25_fast_shift(i32 %x) "target-features"="+alu-lsl-fast" {
ret i32 %mul
}

define i32 @test29_fast_shift(i32 %x) "target-features"="+alu-lsl-fast" {
; CHECK-LABEL: test29_fast_shift:
; CHECK: // %bb.0:
; CHECK-NEXT: sub w8, w0, w0, lsl #3
; CHECK-NEXT: sub w0, w0, w8, lsl #2
; CHECK-NEXT: ret
;
; GISEL-LABEL: test29_fast_shift:
; GISEL: // %bb.0:
; GISEL-NEXT: mov w8, #29 // =0x1d
; GISEL-NEXT: mul w0, w0, w8
; GISEL-NEXT: ret

%mul = mul nsw i32 %x, 29 ; 29 = 1 - (1-8) * 4
ret i32 %mul
}

define i32 @test45_fast_shift(i32 %x) "target-features"="+alu-lsl-fast" {
; CHECK-LABEL: test45_fast_shift:
; CHECK: // %bb.0:
Expand Down Expand Up @@ -615,6 +632,42 @@ define i32 @test97_fast_shift(i32 %x) "target-features"="+alu-lsl-fast" {
ret i32 %mul
}

; Negative test: The shift number 5 is out of bound
define i32 @test125_fast_shift(i32 %x) "target-features"="+alu-lsl-fast" {
; CHECK-LABEL: test125_fast_shift:
; CHECK: // %bb.0:
; CHECK-NEXT: mov w8, #125 // =0x7d
; CHECK-NEXT: mul w0, w0, w8
; CHECK-NEXT: ret
;
; GISEL-LABEL: test125_fast_shift:
; GISEL: // %bb.0:
; GISEL-NEXT: mov w8, #125 // =0x7d
; GISEL-NEXT: mul w0, w0, w8
; GISEL-NEXT: ret

%mul = mul nsw i32 %x, 125 ; 125 = 1 - ((1-32) << 2)
ret i32 %mul
}

; TODO: (1 - 2^M) * (1 - 2^N)
define i32 @test225_fast_shift(i32 %x) "target-features"="+alu-lsl-fast" {
; CHECK-LABEL: test225_fast_shift:
; CHECK: // %bb.0:
; CHECK-NEXT: mov w8, #225 // =0xe1
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

You can write x*225 as x*-15*-15.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

oh, Yes. 225 = (1-16) * (1-16), which is not supported now.

; CHECK-NEXT: mul w0, w0, w8
; CHECK-NEXT: ret
;
; GISEL-LABEL: test225_fast_shift:
; GISEL: // %bb.0:
; GISEL-NEXT: mov w8, #225 // =0xe1
; GISEL-NEXT: mul w0, w0, w8
; GISEL-NEXT: ret

%mul = mul nsw i32 %x, 225 ; 225 = (1-16)*(1-16)
ret i32 %mul
}

; Negative test: The shift amount 5 larger than 4
define i32 @test297_fast_shift(i32 %x) "target-features"="+alu-lsl-fast" {
; CHECK-LABEL: test297_fast_shift:
Expand All @@ -633,6 +686,24 @@ define i32 @test297_fast_shift(i32 %x) "target-features"="+alu-lsl-fast" {
ret i32 %mul
}

; Negative test: The shift number 5 is out of bound
define i32 @test481_fast_shift(i32 %x) "target-features"="+alu-lsl-fast" {
; CHECK-LABEL: test481_fast_shift:
; CHECK: // %bb.0:
; CHECK-NEXT: mov w8, #481 // =0x1e1
; CHECK-NEXT: mul w0, w0, w8
; CHECK-NEXT: ret
;
; GISEL-LABEL: test481_fast_shift:
; GISEL: // %bb.0:
; GISEL-NEXT: mov w8, #481 // =0x1e1
; GISEL-NEXT: mul w0, w0, w8
; GISEL-NEXT: ret

%mul = mul nsw i32 %x, 481 ; 481 = 1 - ((1-16) << 5)
ret i32 %mul
}

; Convert mul x, -pow2 to shift.
; Convert mul x, -(pow2 +/- 1) to shift + add/sub.
; Lowering other negative constants are not supported yet.
Expand Down Expand Up @@ -910,9 +981,9 @@ define <4 x i32> @muladd_demand_commute(<4 x i32> %x, <4 x i32> %y) {
;
; GISEL-LABEL: muladd_demand_commute:
; GISEL: // %bb.0:
; GISEL-NEXT: adrp x8, .LCPI52_0
; GISEL-NEXT: adrp x8, .LCPI56_0
; GISEL-NEXT: movi v3.4s, #1, msl #16
; GISEL-NEXT: ldr q2, [x8, :lo12:.LCPI52_0]
; GISEL-NEXT: ldr q2, [x8, :lo12:.LCPI56_0]
; GISEL-NEXT: mla v1.4s, v0.4s, v2.4s
; GISEL-NEXT: and v0.16b, v1.16b, v3.16b
; GISEL-NEXT: ret
Expand Down