diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp index 712f6154732a2..1495d38dda7ea 100644 --- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -13241,6 +13241,8 @@ SDValue RISCVTargetLowering::lowerVPMergeMask(SDValue Op, SDValue RISCVTargetLowering::lowerVPSpliceExperimental(SDValue Op, SelectionDAG &DAG) const { + using namespace SDPatternMatch; + SDLoc DL(Op); SDValue Op1 = Op.getOperand(0); @@ -13285,6 +13287,42 @@ RISCVTargetLowering::lowerVPSpliceExperimental(SDValue Op, SplatZeroOp2, DAG.getUNDEF(ContainerVT), EVL2); } + auto getVectorFirstEle = [](SDValue Vec) { + SDValue FirstEle; + if (sd_match(Vec, m_InsertElt(m_Value(), m_Value(FirstEle), m_Zero()))) + return FirstEle; + + if (Vec.getOpcode() == ISD::SPLAT_VECTOR || + Vec.getOpcode() == ISD::BUILD_VECTOR) + return Vec.getOperand(0); + + return SDValue(); + }; + + if (!IsMaskVector && isNullConstant(Offset) && isOneConstant(EVL1)) + if (auto FirstEle = getVectorFirstEle(Op->getOperand(0))) { + MVT EltVT = ContainerVT.getVectorElementType(); + SDValue Result; + if ((EltVT == MVT::f16 && !Subtarget.hasVInstructionsF16()) || + EltVT == MVT::bf16) { + EltVT = EltVT.changeTypeToInteger(); + ContainerVT = ContainerVT.changeVectorElementType(EltVT); + Op2 = DAG.getBitcast(ContainerVT, Op2); + FirstEle = + DAG.getAnyExtOrTrunc(DAG.getBitcast(EltVT, FirstEle), DL, XLenVT); + } + Result = DAG.getNode(EltVT.isFloatingPoint() ? RISCVISD::VFSLIDE1UP_VL + : RISCVISD::VSLIDE1UP_VL, + DL, ContainerVT, DAG.getUNDEF(ContainerVT), Op2, + FirstEle, Mask, EVL2); + Result = DAG.getBitcast( + ContainerVT.changeVectorElementType(VT.getVectorElementType()), + Result); + return VT.isFixedLengthVector() + ? convertFromScalableVector(VT, Result, DAG, Subtarget) + : Result; + } + int64_t ImmValue = cast(Offset)->getSExtValue(); SDValue DownOffset, UpOffset; if (ImmValue >= 0) { diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vp-splice.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vp-splice.ll index 7bf22247093f7..d0562e2be346f 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vp-splice.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vp-splice.ll @@ -299,3 +299,52 @@ define <8 x half> @test_vp_splice_v8f16_masked(<8 x half> %va, <8 x half> %vb, < %v = call <8 x half> @llvm.experimental.vp.splice.v8f16(<8 x half> %va, <8 x half> %vb, i32 5, <8 x i1> %mask, i32 %evla, i32 %evlb) ret <8 x half> %v } + +define <4 x i32> @test_vp_splice_v4i32_with_firstelt(i32 %first, <4 x i32> %vb, <4 x i1> %mask, i32 zeroext %evl) { +; CHECK-LABEL: test_vp_splice_v4i32_with_firstelt: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, ma +; CHECK-NEXT: vslide1up.vx v9, v8, a0, v0.t +; CHECK-NEXT: vmv.v.v v8, v9 +; CHECK-NEXT: ret + %va = insertelement <4 x i32> poison, i32 %first, i32 0 + %v = call <4 x i32> @llvm.experimental.vp.splice.v4i32(<4 x i32> %va, <4 x i32> %vb, i32 0, <4 x i1> %mask, i32 1, i32 %evl) + ret <4 x i32> %v +} + +define <4 x i32> @test_vp_splice_v4i32_with_splat_firstelt(i32 %first, <4 x i32> %vb, <4 x i1> %mask, i32 zeroext %evl) { +; CHECK-LABEL: test_vp_splice_v4i32_with_splat_firstelt: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, ma +; CHECK-NEXT: vslide1up.vx v9, v8, a0, v0.t +; CHECK-NEXT: vmv.v.v v8, v9 +; CHECK-NEXT: ret + %ins = insertelement <4 x i32> poison, i32 %first, i32 0 + %splat = shufflevector <4 x i32> %ins, <4 x i32> poison, <4 x i32> zeroinitializer + %v = call <4 x i32> @llvm.experimental.vp.splice.v4i32(<4 x i32> %splat, <4 x i32> %vb, i32 0, <4 x i1> %mask, i32 1, i32 %evl) + ret <4 x i32> %v +} + +define <4 x float> @test_vp_splice_nxv2f32_with_firstelt(float %first, <4 x float> %vb, <4 x i1> %mask, i32 zeroext %evl) { +; CHECK-LABEL: test_vp_splice_nxv2f32_with_firstelt: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma +; CHECK-NEXT: vfslide1up.vf v9, v8, fa0, v0.t +; CHECK-NEXT: vmv.v.v v8, v9 +; CHECK-NEXT: ret + %va = insertelement <4 x float> poison, float %first, i32 0 + %v = call <4 x float> @llvm.experimental.vp.splice.nxv2f32(<4 x float> %va, <4 x float> %vb, i32 0, <4 x i1> %mask, i32 1, i32 %evl) + ret <4 x float> %v +} + +define <4 x half> @test_vp_splice_nxv2f16_with_firstelt(half %first, <4 x half> %vb, <4 x i1> %mask, i32 zeroext %evl) { +; CHECK-LABEL: test_vp_splice_nxv2f16_with_firstelt: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, ma +; CHECK-NEXT: vfslide1up.vf v9, v8, fa0, v0.t +; CHECK-NEXT: vmv1r.v v8, v9 +; CHECK-NEXT: ret + %va = insertelement <4 x half> poison, half %first, i32 0 + %v = call <4 x half> @llvm.experimental.vp.splice.nxv2f16(<4 x half> %va, <4 x half> %vb, i32 0, <4 x i1> %mask, i32 1, i32 %evl) + ret <4 x half> %v +} diff --git a/llvm/test/CodeGen/RISCV/rvv/vp-splice.ll b/llvm/test/CodeGen/RISCV/rvv/vp-splice.ll index 6008ea43e9158..9c8c5da75ff7c 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vp-splice.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vp-splice.ll @@ -1,8 +1,8 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple riscv64 -mattr=+f,+d,+v,+zvfh,+zvfbfmin -verify-machineinstrs \ -; RUN: < %s | FileCheck %s -; RUN: llc -mtriple riscv64 -mattr=+f,+d,+v,+zvfhmin,+zvfbfmin -verify-machineinstrs \ -; RUN: < %s | FileCheck %s +; RUN: llc -mtriple riscv64 -mattr=+f,+d,+v,+zfh,+zfbfmin,+zvfh,+zvfbfmin -verify-machineinstrs \ +; RUN: < %s | FileCheck %s --check-prefixes=CHECK,ZVFH +; RUN: llc -mtriple riscv64 -mattr=+f,+d,+v,+zfh,+zfbfmin,+zvfhmin,+zvfbfmin -verify-machineinstrs \ +; RUN: < %s | FileCheck %s --check-prefixes=CHECK,ZVFHMIN define @test_vp_splice_nxv2i64( %va, %vb, i32 zeroext %evla, i32 zeroext %evlb) { ; CHECK-LABEL: test_vp_splice_nxv2i64: @@ -505,3 +505,73 @@ define @test_vp_splice_nxv2bf16_masked( @llvm.experimental.vp.splice.nxv2bf16( %va, %vb, i32 5, %mask, i32 %evla, i32 %evlb) ret %v } + +define @test_vp_splice_nxv2i32_with_firstelt(i32 %first, %vb, %mask, i32 zeroext %evl) { +; CHECK-LABEL: test_vp_splice_nxv2i32_with_firstelt: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, ma +; CHECK-NEXT: vslide1up.vx v9, v8, a0, v0.t +; CHECK-NEXT: vmv.v.v v8, v9 +; CHECK-NEXT: ret + %va = insertelement poison, i32 %first, i32 0 + %v = call @llvm.experimental.vp.splice.nxv2i32( %va, %vb, i32 0, %mask, i32 1, i32 %evl) + ret %v +} + +define @test_vp_splice_nxv2i32_with_splat_firstelt(i32 %first, %vb, %mask, i32 zeroext %evl) { +; CHECK-LABEL: test_vp_splice_nxv2i32_with_splat_firstelt: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, ma +; CHECK-NEXT: vslide1up.vx v9, v8, a0, v0.t +; CHECK-NEXT: vmv.v.v v8, v9 +; CHECK-NEXT: ret + %ins = insertelement poison, i32 %first, i32 0 + %splat = shufflevector %ins, poison, zeroinitializer + %v = call @llvm.experimental.vp.splice.nxv2i32( %splat, %vb, i32 0, %mask, i32 1, i32 %evl) + ret %v +} + +define @test_vp_splice_nxv2f32_with_firstelt(float %first, %vb, %mask, i32 zeroext %evl) { +; CHECK-LABEL: test_vp_splice_nxv2f32_with_firstelt: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma +; CHECK-NEXT: vfslide1up.vf v9, v8, fa0, v0.t +; CHECK-NEXT: vmv.v.v v8, v9 +; CHECK-NEXT: ret + %va = insertelement poison, float %first, i32 0 + %v = call @llvm.experimental.vp.splice.nxv2f32( %va, %vb, i32 0, %mask, i32 1, i32 %evl) + ret %v +} + +define @test_vp_splice_nxv2f16_with_firstelt(half %first, %vb, %mask, i32 zeroext %evl) { +; ZVFH-LABEL: test_vp_splice_nxv2f16_with_firstelt: +; ZVFH: # %bb.0: +; ZVFH-NEXT: vsetvli zero, a0, e16, mf2, ta, ma +; ZVFH-NEXT: vfslide1up.vf v9, v8, fa0, v0.t +; ZVFH-NEXT: vmv1r.v v8, v9 +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: test_vp_splice_nxv2f16_with_firstelt: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: fmv.x.h a1, fa0 +; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf2, ta, ma +; ZVFHMIN-NEXT: vslide1up.vx v9, v8, a1, v0.t +; ZVFHMIN-NEXT: vmv1r.v v8, v9 +; ZVFHMIN-NEXT: ret + %va = insertelement poison, half %first, i32 0 + %v = call @llvm.experimental.vp.splice.nxv2f16( %va, %vb, i32 0, %mask, i32 1, i32 %evl) + ret %v +} + +define @test_vp_splice_nxv2bf16_with_firstelt(bfloat %first, %vb, %mask, i32 zeroext %evl) { +; CHECK-LABEL: test_vp_splice_nxv2bf16_with_firstelt: +; CHECK: # %bb.0: +; CHECK-NEXT: fmv.x.h a1, fa0 +; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, ma +; CHECK-NEXT: vslide1up.vx v9, v8, a1, v0.t +; CHECK-NEXT: vmv1r.v v8, v9 +; CHECK-NEXT: ret + %va = insertelement poison, bfloat %first, i32 0 + %v = call @llvm.experimental.vp.splice.nxv2bf16( %va, %vb, i32 0, %mask, i32 1, i32 %evl) + ret %v +}