From 9a7751d251a833c6abc82ae468342fb66ff41b03 Mon Sep 17 00:00:00 2001 From: Luke Lau Date: Tue, 16 Jul 2024 00:08:47 +0800 Subject: [PATCH] [RISCV] Don't expand zero stride vp.strided.load if SEW>XLEN A splat of a on RV32 will get lowered as a zero strided load anyway (and won't match any .vx splat patterns), so don't expand it to a scalar load + splat to avoid writing it to the stack. --- llvm/lib/Target/RISCV/RISCVCodeGenPrepare.cpp | 5 +++++ .../CodeGen/RISCV/rvv/fixed-vectors-strided-vpload.ll | 8 -------- llvm/test/CodeGen/RISCV/rvv/strided-vpload.ll | 8 -------- 3 files changed, 5 insertions(+), 16 deletions(-) diff --git a/llvm/lib/Target/RISCV/RISCVCodeGenPrepare.cpp b/llvm/lib/Target/RISCV/RISCVCodeGenPrepare.cpp index b3f3dc6e2256c..0e84eda0c9d07 100644 --- a/llvm/lib/Target/RISCV/RISCVCodeGenPrepare.cpp +++ b/llvm/lib/Target/RISCV/RISCVCodeGenPrepare.cpp @@ -174,6 +174,11 @@ bool RISCVCodeGenPrepare::expandVPStrideLoad(IntrinsicInst &II) { m_Value(BasePtr), m_Zero(), m_AllOnes(), m_Value(VL)))) return false; + // If SEW>XLEN then a splat will get lowered as a zero strided load anyway, so + // avoid expanding here. + if (II.getType()->getScalarSizeInBits() > ST->getXLen()) + return false; + if (!isKnownNonZero(VL, {*DL, DT, nullptr, &II})) return false; diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-strided-vpload.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-strided-vpload.ll index 95f853b77f18b..b8c7037580c46 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-strided-vpload.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-strided-vpload.ll @@ -674,17 +674,9 @@ define <4 x half> @zero_strided_unmasked_vpload_4f16(ptr %ptr) { define <4 x i64> @zero_strided_vadd.vx(<4 x i64> %v, ptr %ptr) { ; CHECK-RV32-LABEL: zero_strided_vadd.vx: ; CHECK-RV32: # %bb.0: -; CHECK-RV32-NEXT: addi sp, sp, -16 -; CHECK-RV32-NEXT: .cfi_def_cfa_offset 16 -; CHECK-RV32-NEXT: lw a1, 4(a0) -; CHECK-RV32-NEXT: lw a0, 0(a0) -; CHECK-RV32-NEXT: sw a1, 12(sp) -; CHECK-RV32-NEXT: sw a0, 8(sp) -; CHECK-RV32-NEXT: addi a0, sp, 8 ; CHECK-RV32-NEXT: vsetivli zero, 4, e64, m2, ta, ma ; CHECK-RV32-NEXT: vlse64.v v10, (a0), zero ; CHECK-RV32-NEXT: vadd.vv v8, v8, v10 -; CHECK-RV32-NEXT: addi sp, sp, 16 ; CHECK-RV32-NEXT: ret ; ; CHECK-RV64-LABEL: zero_strided_vadd.vx: diff --git a/llvm/test/CodeGen/RISCV/rvv/strided-vpload.ll b/llvm/test/CodeGen/RISCV/rvv/strided-vpload.ll index 563da270272c2..0010f64a93fd6 100644 --- a/llvm/test/CodeGen/RISCV/rvv/strided-vpload.ll +++ b/llvm/test/CodeGen/RISCV/rvv/strided-vpload.ll @@ -826,17 +826,9 @@ define @zero_strided_unmasked_vpload_nxv1f16(ptr %ptr) { define @zero_strided_vadd.vx( %v, ptr %ptr) { ; CHECK-RV32-LABEL: zero_strided_vadd.vx: ; CHECK-RV32: # %bb.0: -; CHECK-RV32-NEXT: addi sp, sp, -16 -; CHECK-RV32-NEXT: .cfi_def_cfa_offset 16 -; CHECK-RV32-NEXT: lw a1, 4(a0) -; CHECK-RV32-NEXT: lw a0, 0(a0) -; CHECK-RV32-NEXT: sw a1, 12(sp) -; CHECK-RV32-NEXT: sw a0, 8(sp) -; CHECK-RV32-NEXT: addi a0, sp, 8 ; CHECK-RV32-NEXT: vsetvli a1, zero, e64, m1, ta, ma ; CHECK-RV32-NEXT: vlse64.v v9, (a0), zero ; CHECK-RV32-NEXT: vadd.vv v8, v8, v9 -; CHECK-RV32-NEXT: addi sp, sp, 16 ; CHECK-RV32-NEXT: ret ; ; CHECK-RV64-LABEL: zero_strided_vadd.vx: