diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp index 8e9782c1930c3..8aa1d2ba36915 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -17603,6 +17603,23 @@ static SDValue performMulCombine(SDNode *N, SelectionDAG &DAG, return false; }; + // Can the const C be decomposed into (1 - (1 - 2^M) * 2^N), eg: + // C = 29 is equal to 1 - (1 - 2^3) * 2^2. + auto isPowMinusMinusOneConst = [](APInt C, APInt &M, APInt &N) { + APInt CVMinus1 = C - 1; + if (CVMinus1.isNegative()) + return false; + unsigned TrailingZeroes = CVMinus1.countr_zero(); + APInt CVPlus1 = CVMinus1.ashr(TrailingZeroes) + 1; + if (CVPlus1.isPowerOf2()) { + unsigned BitWidth = CVPlus1.getBitWidth(); + M = APInt(BitWidth, CVPlus1.logBase2()); + N = APInt(BitWidth, TrailingZeroes); + return true; + } + return false; + }; + if (ConstValue.isNonNegative()) { // (mul x, (2^N + 1) * 2^M) => (shl (add (shl x, N), x), M) // (mul x, 2^N - 1) => (sub (shl x, N), x) @@ -17611,6 +17628,8 @@ static SDValue performMulCombine(SDNode *N, SelectionDAG &DAG, // => MV = (add (shl x, M), x); (add (shl MV, N), MV) // (mul x, (2^M + 1) * 2^N + 1)) // => MV = add (shl x, M), x); add (shl MV, N), x) + // (mul x, 1 - (1 - 2^M) * 2^N)) + // => MV = sub (x - (shl x, M)); sub (x - (shl MV, N)) APInt SCVMinus1 = ShiftedConstValue - 1; APInt SCVPlus1 = ShiftedConstValue + 1; APInt CVPlus1 = ConstValue + 1; @@ -17647,6 +17666,17 @@ static SDValue performMulCombine(SDNode *N, SelectionDAG &DAG, return Add(Shl(MVal, CVN.getZExtValue()), N0); } } + + if (Subtarget->hasALULSLFast() && + isPowMinusMinusOneConst(ConstValue, CVM, CVN)) { + unsigned ShiftM = CVM.getZExtValue(); + unsigned ShiftN = CVN.getZExtValue(); + // ALULSLFast implicate that Shifts <= 4 places are fast + if (ShiftM <= 4 && ShiftN <= 4) { + SDValue MVal = Sub(N0, Shl(N0, CVM.getZExtValue())); + return Sub(N0, Shl(MVal, CVN.getZExtValue())); + } + } } else { // (mul x, -(2^N - 1)) => (sub x, (shl x, N)) // (mul x, -(2^N + 1)) => - (add (shl x, N), x) diff --git a/llvm/test/CodeGen/AArch64/mul_pow2.ll b/llvm/test/CodeGen/AArch64/mul_pow2.ll index 0c9ea51ba367e..c4839175ded5a 100644 --- a/llvm/test/CodeGen/AArch64/mul_pow2.ll +++ b/llvm/test/CodeGen/AArch64/mul_pow2.ll @@ -527,6 +527,23 @@ define i32 @test25_fast_shift(i32 %x) "target-features"="+alu-lsl-fast" { ret i32 %mul } +define i32 @test29_fast_shift(i32 %x) "target-features"="+alu-lsl-fast" { +; CHECK-LABEL: test29_fast_shift: +; CHECK: // %bb.0: +; CHECK-NEXT: sub w8, w0, w0, lsl #3 +; CHECK-NEXT: sub w0, w0, w8, lsl #2 +; CHECK-NEXT: ret +; +; GISEL-LABEL: test29_fast_shift: +; GISEL: // %bb.0: +; GISEL-NEXT: mov w8, #29 // =0x1d +; GISEL-NEXT: mul w0, w0, w8 +; GISEL-NEXT: ret + + %mul = mul nsw i32 %x, 29 ; 29 = 1 - (1-8) * 4 + ret i32 %mul +} + define i32 @test45_fast_shift(i32 %x) "target-features"="+alu-lsl-fast" { ; CHECK-LABEL: test45_fast_shift: ; CHECK: // %bb.0: @@ -615,6 +632,42 @@ define i32 @test97_fast_shift(i32 %x) "target-features"="+alu-lsl-fast" { ret i32 %mul } +; Negative test: The shift number 5 is out of bound +define i32 @test125_fast_shift(i32 %x) "target-features"="+alu-lsl-fast" { +; CHECK-LABEL: test125_fast_shift: +; CHECK: // %bb.0: +; CHECK-NEXT: mov w8, #125 // =0x7d +; CHECK-NEXT: mul w0, w0, w8 +; CHECK-NEXT: ret +; +; GISEL-LABEL: test125_fast_shift: +; GISEL: // %bb.0: +; GISEL-NEXT: mov w8, #125 // =0x7d +; GISEL-NEXT: mul w0, w0, w8 +; GISEL-NEXT: ret + + %mul = mul nsw i32 %x, 125 ; 125 = 1 - ((1-32) << 2) + ret i32 %mul +} + +; TODO: (1 - 2^M) * (1 - 2^N) +define i32 @test225_fast_shift(i32 %x) "target-features"="+alu-lsl-fast" { +; CHECK-LABEL: test225_fast_shift: +; CHECK: // %bb.0: +; CHECK-NEXT: mov w8, #225 // =0xe1 +; CHECK-NEXT: mul w0, w0, w8 +; CHECK-NEXT: ret +; +; GISEL-LABEL: test225_fast_shift: +; GISEL: // %bb.0: +; GISEL-NEXT: mov w8, #225 // =0xe1 +; GISEL-NEXT: mul w0, w0, w8 +; GISEL-NEXT: ret + + %mul = mul nsw i32 %x, 225 ; 225 = (1-16)*(1-16) + ret i32 %mul +} + ; Negative test: The shift amount 5 larger than 4 define i32 @test297_fast_shift(i32 %x) "target-features"="+alu-lsl-fast" { ; CHECK-LABEL: test297_fast_shift: @@ -633,6 +686,24 @@ define i32 @test297_fast_shift(i32 %x) "target-features"="+alu-lsl-fast" { ret i32 %mul } +; Negative test: The shift number 5 is out of bound +define i32 @test481_fast_shift(i32 %x) "target-features"="+alu-lsl-fast" { +; CHECK-LABEL: test481_fast_shift: +; CHECK: // %bb.0: +; CHECK-NEXT: mov w8, #481 // =0x1e1 +; CHECK-NEXT: mul w0, w0, w8 +; CHECK-NEXT: ret +; +; GISEL-LABEL: test481_fast_shift: +; GISEL: // %bb.0: +; GISEL-NEXT: mov w8, #481 // =0x1e1 +; GISEL-NEXT: mul w0, w0, w8 +; GISEL-NEXT: ret + + %mul = mul nsw i32 %x, 481 ; 481 = 1 - ((1-16) << 5) + ret i32 %mul +} + ; Convert mul x, -pow2 to shift. ; Convert mul x, -(pow2 +/- 1) to shift + add/sub. ; Lowering other negative constants are not supported yet. @@ -910,9 +981,9 @@ define <4 x i32> @muladd_demand_commute(<4 x i32> %x, <4 x i32> %y) { ; ; GISEL-LABEL: muladd_demand_commute: ; GISEL: // %bb.0: -; GISEL-NEXT: adrp x8, .LCPI52_0 +; GISEL-NEXT: adrp x8, .LCPI56_0 ; GISEL-NEXT: movi v3.4s, #1, msl #16 -; GISEL-NEXT: ldr q2, [x8, :lo12:.LCPI52_0] +; GISEL-NEXT: ldr q2, [x8, :lo12:.LCPI56_0] ; GISEL-NEXT: mla v1.4s, v0.4s, v2.4s ; GISEL-NEXT: and v0.16b, v1.16b, v3.16b ; GISEL-NEXT: ret