From 62b681b6d8d160f5e28897fdf145577820236434 Mon Sep 17 00:00:00 2001 From: Ramkumar Ramachandra Date: Tue, 16 Apr 2024 15:15:11 +0100 Subject: [PATCH 1/6] ISel/AArch64/SVE: custom lower vector ISD::[L]LRINT Since 98c90a1 (ISel: introduce vector ISD::LRINT, ISD::LLRINT; custom RISCV lowering), ISD::LRINT and ISD::LLRINT now have vector variants, that are custom lowered on RISCV, and scalarized on all other targets. Since 2302e4c (Reland "VectorUtils: mark xrint as trivially vectorizable"), lrint and llrint are trivially vectorizable, so all the vectorizers in-tree will produce vector variants when possible. Add a custom lowering for AArch64 to custom-lower the vector variants natively using a combination of frintx, fcvte, and fcvtzs, when SVE is present. --- .../Target/AArch64/AArch64ISelLowering.cpp | 66 +- llvm/lib/Target/AArch64/AArch64ISelLowering.h | 1 + .../AArch64/sve-fixed-vector-llrint.ll | 893 ++++++++++++++++++ .../CodeGen/AArch64/sve-fixed-vector-lrint.ll | 893 ++++++++++++++++++ llvm/test/CodeGen/AArch64/sve-llrint.ll | 492 ++++++++++ llvm/test/CodeGen/AArch64/sve-lrint.ll | 492 ++++++++++ llvm/test/CodeGen/AArch64/vector-llrint.ll | 335 +++++++ llvm/test/CodeGen/AArch64/vector-lrint.ll | 372 +++++++- 8 files changed, 3512 insertions(+), 32 deletions(-) create mode 100644 llvm/test/CodeGen/AArch64/sve-fixed-vector-llrint.ll create mode 100644 llvm/test/CodeGen/AArch64/sve-fixed-vector-lrint.ll create mode 100644 llvm/test/CodeGen/AArch64/sve-llrint.ll create mode 100644 llvm/test/CodeGen/AArch64/sve-lrint.ll diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp index 7344387ffe552..41372b5432a0e 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -1526,6 +1526,8 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM, setOperationAction(ISD::FNEARBYINT, VT, Custom); setOperationAction(ISD::FRINT, VT, Custom); setOperationAction(ISD::FROUND, VT, Custom); + setOperationAction(ISD::LRINT, VT, Custom); + setOperationAction(ISD::LLRINT, VT, Custom); setOperationAction(ISD::FROUNDEVEN, VT, Custom); setOperationAction(ISD::FTRUNC, VT, Custom); setOperationAction(ISD::FSQRT, VT, Custom); @@ -1940,6 +1942,8 @@ void AArch64TargetLowering::addTypeForFixedLengthSVE(MVT VT) { setOperationAction(ISD::FP_TO_UINT, VT, Default); setOperationAction(ISD::FRINT, VT, Default); setOperationAction(ISD::FROUND, VT, Default); + setOperationAction(ISD::LRINT, VT, Default); + setOperationAction(ISD::LLRINT, VT, Default); setOperationAction(ISD::FROUNDEVEN, VT, Default); setOperationAction(ISD::FSQRT, VT, Default); setOperationAction(ISD::FSUB, VT, Default); @@ -4362,6 +4366,59 @@ SDValue AArch64TargetLowering::LowerFP_TO_INT_SAT(SDValue Op, return DAG.getNode(ISD::TRUNCATE, DL, DstVT, Sat); } +SDValue AArch64TargetLowering::LowerVectorXRINT(SDValue Op, + SelectionDAG &DAG) const { + EVT VT = Op.getValueType(); + SDValue Src = Op.getOperand(0); + SDLoc DL(Op); + + assert(VT.isVector() && "Expected vector type"); + + // We can't custom-lower ISD::[L]LRINT without SVE, since it requires + // AArch64ISD::FCVTZS_MERGE_PASSTHRU. + if (!Subtarget->isSVEAvailable()) + return SDValue(); + + EVT ContainerVT = VT; + EVT SrcVT = Src.getValueType(); + EVT CastVT = + ContainerVT.changeVectorElementType(SrcVT.getVectorElementType()); + + if (VT.isFixedLengthVector()) { + ContainerVT = getContainerForFixedLengthVector(DAG, VT); + CastVT = ContainerVT.changeVectorElementType(SrcVT.getVectorElementType()); + Src = convertToScalableVector(DAG, CastVT, Src); + } + + // First, round the floating-point value into a floating-point register with + // the current rounding mode. + SDValue FOp = DAG.getNode(ISD::FRINT, DL, CastVT, Src); + + // In the case of vector filled with f32, ftrunc will convert it to an i32, + // but a vector filled with i32 isn't legal. So, FP_EXTEND the f32 into the + // required size. + size_t SrcSz = SrcVT.getScalarSizeInBits(); + size_t ContainerSz = ContainerVT.getScalarSizeInBits(); + if (ContainerSz > SrcSz) { + EVT SizedVT = MVT::getVectorVT(MVT::getFloatingPointVT(ContainerSz), + ContainerVT.getVectorElementCount()); + FOp = DAG.getNode(ISD::FP_EXTEND, DL, SizedVT, FOp.getOperand(0)); + } + + // Finally, truncate the rounded floating point to an integer, rounding to + // zero. + SDValue Pred = getPredicateForVector(DAG, DL, ContainerVT); + SDValue Undef = DAG.getUNDEF(ContainerVT); + SDValue Truncated = + DAG.getNode(AArch64ISD::FCVTZS_MERGE_PASSTHRU, DL, ContainerVT, + {Pred, FOp.getOperand(0), Undef}, FOp->getFlags()); + + if (VT.isScalableVector()) + return Truncated; + + return convertFromScalableVector(DAG, VT, Truncated); +} + SDValue AArch64TargetLowering::LowerVectorINT_TO_FP(SDValue Op, SelectionDAG &DAG) const { // Warning: We maintain cost tables in AArch64TargetTransformInfo.cpp. @@ -6685,10 +6742,13 @@ SDValue AArch64TargetLowering::LowerOperation(SDValue Op, return LowerVECTOR_DEINTERLEAVE(Op, DAG); case ISD::VECTOR_INTERLEAVE: return LowerVECTOR_INTERLEAVE(Op, DAG); - case ISD::LROUND: - case ISD::LLROUND: case ISD::LRINT: - case ISD::LLRINT: { + case ISD::LLRINT: + if (Op.getValueType().isVector()) + return LowerVectorXRINT(Op, DAG); + [[fallthrough]]; + case ISD::LROUND: + case ISD::LLROUND: { assert((Op.getOperand(0).getValueType() == MVT::f16 || Op.getOperand(0).getValueType() == MVT::bf16) && "Expected custom lowering of rounding operations only for f16"); diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.h b/llvm/lib/Target/AArch64/AArch64ISelLowering.h index fbdc4de5617fe..b3e282a040603 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.h +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.h @@ -1165,6 +1165,7 @@ class AArch64TargetLowering : public TargetLowering { SDValue LowerVectorFP_TO_INT_SAT(SDValue Op, SelectionDAG &DAG) const; SDValue LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG) const; SDValue LowerFP_TO_INT_SAT(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerVectorXRINT(SDValue Op, SelectionDAG &DAG) const; SDValue LowerINT_TO_FP(SDValue Op, SelectionDAG &DAG) const; SDValue LowerVectorINT_TO_FP(SDValue Op, SelectionDAG &DAG) const; SDValue LowerVectorOR(SDValue Op, SelectionDAG &DAG) const; diff --git a/llvm/test/CodeGen/AArch64/sve-fixed-vector-llrint.ll b/llvm/test/CodeGen/AArch64/sve-fixed-vector-llrint.ll new file mode 100644 index 0000000000000..febfa785eaeff --- /dev/null +++ b/llvm/test/CodeGen/AArch64/sve-fixed-vector-llrint.ll @@ -0,0 +1,893 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=aarch64 -mattr=+sve | FileCheck %s + +define <1 x i64> @llrint_v1i64_v1f16(<1 x half> %x) { +; CHECK-LABEL: llrint_v1i64_v1f16: +; CHECK: // %bb.0: +; CHECK-NEXT: frintx h0, h0 +; CHECK-NEXT: fcvtzs x8, h0 +; CHECK-NEXT: fmov d0, x8 +; CHECK-NEXT: ret + %a = call <1 x i64> @llvm.llrint.v1i64.v1f16(<1 x half> %x) + ret <1 x i64> %a +} +declare <1 x i64> @llvm.llrint.v1i64.v1f16(<1 x half>) + +define <2 x i64> @llrint_v1i64_v2f16(<2 x half> %x) { +; CHECK-LABEL: llrint_v1i64_v2f16: +; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 +; CHECK-NEXT: mov h1, v0.h[1] +; CHECK-NEXT: frintx h0, h0 +; CHECK-NEXT: frintx h1, h1 +; CHECK-NEXT: fcvtzs x8, h0 +; CHECK-NEXT: fcvtzs x9, h1 +; CHECK-NEXT: fmov d0, x8 +; CHECK-NEXT: mov v0.d[1], x9 +; CHECK-NEXT: ret + %a = call <2 x i64> @llvm.llrint.v2i64.v2f16(<2 x half> %x) + ret <2 x i64> %a +} +declare <2 x i64> @llvm.llrint.v2i64.v2f16(<2 x half>) + +define <4 x i64> @llrint_v4i64_v4f16(<4 x half> %x) { +; CHECK-LABEL: llrint_v4i64_v4f16: +; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 +; CHECK-NEXT: mov h1, v0.h[2] +; CHECK-NEXT: mov h2, v0.h[1] +; CHECK-NEXT: mov h3, v0.h[3] +; CHECK-NEXT: frintx h0, h0 +; CHECK-NEXT: frintx h1, h1 +; CHECK-NEXT: frintx h2, h2 +; CHECK-NEXT: frintx h3, h3 +; CHECK-NEXT: fcvtzs x8, h0 +; CHECK-NEXT: fcvtzs x9, h1 +; CHECK-NEXT: fcvtzs x10, h2 +; CHECK-NEXT: fcvtzs x11, h3 +; CHECK-NEXT: fmov d0, x8 +; CHECK-NEXT: fmov d1, x9 +; CHECK-NEXT: mov v0.d[1], x10 +; CHECK-NEXT: mov v1.d[1], x11 +; CHECK-NEXT: ret + %a = call <4 x i64> @llvm.llrint.v4i64.v4f16(<4 x half> %x) + ret <4 x i64> %a +} +declare <4 x i64> @llvm.llrint.v4i64.v4f16(<4 x half>) + +define <8 x i64> @llrint_v8i64_v8f16(<8 x half> %x) { +; CHECK-LABEL: llrint_v8i64_v8f16: +; CHECK: // %bb.0: +; CHECK-NEXT: ext v1.16b, v0.16b, v0.16b, #8 +; CHECK-NEXT: mov h4, v0.h[2] +; CHECK-NEXT: mov h3, v0.h[1] +; CHECK-NEXT: mov h7, v0.h[3] +; CHECK-NEXT: frintx h0, h0 +; CHECK-NEXT: mov h2, v1.h[2] +; CHECK-NEXT: mov h5, v1.h[1] +; CHECK-NEXT: mov h6, v1.h[3] +; CHECK-NEXT: frintx h1, h1 +; CHECK-NEXT: frintx h4, h4 +; CHECK-NEXT: frintx h3, h3 +; CHECK-NEXT: frintx h7, h7 +; CHECK-NEXT: fcvtzs x9, h0 +; CHECK-NEXT: frintx h2, h2 +; CHECK-NEXT: frintx h5, h5 +; CHECK-NEXT: frintx h6, h6 +; CHECK-NEXT: fcvtzs x8, h1 +; CHECK-NEXT: fcvtzs x12, h4 +; CHECK-NEXT: fcvtzs x11, h3 +; CHECK-NEXT: fcvtzs x15, h7 +; CHECK-NEXT: fmov d0, x9 +; CHECK-NEXT: fcvtzs x10, h2 +; CHECK-NEXT: fcvtzs x13, h5 +; CHECK-NEXT: fcvtzs x14, h6 +; CHECK-NEXT: fmov d2, x8 +; CHECK-NEXT: fmov d1, x12 +; CHECK-NEXT: mov v0.d[1], x11 +; CHECK-NEXT: fmov d3, x10 +; CHECK-NEXT: mov v2.d[1], x13 +; CHECK-NEXT: mov v1.d[1], x15 +; CHECK-NEXT: mov v3.d[1], x14 +; CHECK-NEXT: ret + %a = call <8 x i64> @llvm.llrint.v8i64.v8f16(<8 x half> %x) + ret <8 x i64> %a +} +declare <8 x i64> @llvm.llrint.v8i64.v8f16(<8 x half>) + +define <16 x i64> @llrint_v16i64_v16f16(<16 x half> %x) { +; CHECK-LABEL: llrint_v16i64_v16f16: +; CHECK: // %bb.0: +; CHECK-NEXT: ext v2.16b, v0.16b, v0.16b, #8 +; CHECK-NEXT: ext v3.16b, v1.16b, v1.16b, #8 +; CHECK-NEXT: mov h4, v0.h[1] +; CHECK-NEXT: frintx h5, h0 +; CHECK-NEXT: mov h18, v0.h[2] +; CHECK-NEXT: mov h0, v0.h[3] +; CHECK-NEXT: frintx h6, h2 +; CHECK-NEXT: mov h7, v2.h[1] +; CHECK-NEXT: mov h16, v2.h[2] +; CHECK-NEXT: mov h17, v3.h[2] +; CHECK-NEXT: frintx h19, h3 +; CHECK-NEXT: frintx h4, h4 +; CHECK-NEXT: fcvtzs x8, h5 +; CHECK-NEXT: mov h5, v1.h[1] +; CHECK-NEXT: mov h2, v2.h[3] +; CHECK-NEXT: frintx h18, h18 +; CHECK-NEXT: frintx h0, h0 +; CHECK-NEXT: fcvtzs x9, h6 +; CHECK-NEXT: frintx h6, h7 +; CHECK-NEXT: frintx h7, h16 +; CHECK-NEXT: mov h16, v1.h[2] +; CHECK-NEXT: frintx h17, h17 +; CHECK-NEXT: fcvtzs x10, h19 +; CHECK-NEXT: mov h19, v3.h[1] +; CHECK-NEXT: fcvtzs x11, h4 +; CHECK-NEXT: mov h4, v1.h[3] +; CHECK-NEXT: mov h3, v3.h[3] +; CHECK-NEXT: frintx h1, h1 +; CHECK-NEXT: frintx h5, h5 +; CHECK-NEXT: fcvtzs x13, h7 +; CHECK-NEXT: fcvtzs x12, h6 +; CHECK-NEXT: fcvtzs x15, h18 +; CHECK-NEXT: frintx h7, h16 +; CHECK-NEXT: fcvtzs x14, h17 +; CHECK-NEXT: frintx h16, h2 +; CHECK-NEXT: frintx h17, h19 +; CHECK-NEXT: frintx h4, h4 +; CHECK-NEXT: fmov d2, x9 +; CHECK-NEXT: frintx h19, h3 +; CHECK-NEXT: fcvtzs x9, h1 +; CHECK-NEXT: fmov d6, x10 +; CHECK-NEXT: fmov d3, x13 +; CHECK-NEXT: fcvtzs x13, h0 +; CHECK-NEXT: fcvtzs x16, h5 +; CHECK-NEXT: fcvtzs x10, h7 +; CHECK-NEXT: fmov d7, x14 +; CHECK-NEXT: fcvtzs x14, h16 +; CHECK-NEXT: fcvtzs x17, h17 +; CHECK-NEXT: fcvtzs x0, h4 +; CHECK-NEXT: fmov d0, x8 +; CHECK-NEXT: fcvtzs x18, h19 +; CHECK-NEXT: fmov d1, x15 +; CHECK-NEXT: fmov d4, x9 +; CHECK-NEXT: mov v2.d[1], x12 +; CHECK-NEXT: fmov d5, x10 +; CHECK-NEXT: mov v0.d[1], x11 +; CHECK-NEXT: mov v3.d[1], x14 +; CHECK-NEXT: mov v1.d[1], x13 +; CHECK-NEXT: mov v4.d[1], x16 +; CHECK-NEXT: mov v6.d[1], x17 +; CHECK-NEXT: mov v7.d[1], x18 +; CHECK-NEXT: mov v5.d[1], x0 +; CHECK-NEXT: ret + %a = call <16 x i64> @llvm.llrint.v16i64.v16f16(<16 x half> %x) + ret <16 x i64> %a +} +declare <16 x i64> @llvm.llrint.v16i64.v16f16(<16 x half>) + +define <32 x i64> @llrint_v32i64_v32f16(<32 x half> %x) { +; CHECK-LABEL: llrint_v32i64_v32f16: +; CHECK: // %bb.0: +; CHECK-NEXT: ext v4.16b, v1.16b, v1.16b, #8 +; CHECK-NEXT: ext v5.16b, v2.16b, v2.16b, #8 +; CHECK-NEXT: ext v6.16b, v3.16b, v3.16b, #8 +; CHECK-NEXT: ext v7.16b, v0.16b, v0.16b, #8 +; CHECK-NEXT: frintx h21, h1 +; CHECK-NEXT: frintx h22, h2 +; CHECK-NEXT: mov h26, v2.h[2] +; CHECK-NEXT: frintx h19, h0 +; CHECK-NEXT: mov h27, v3.h[2] +; CHECK-NEXT: mov h20, v2.h[1] +; CHECK-NEXT: mov h18, v1.h[1] +; CHECK-NEXT: mov h16, v4.h[2] +; CHECK-NEXT: mov h17, v5.h[2] +; CHECK-NEXT: frintx h23, h5 +; CHECK-NEXT: frintx h24, h6 +; CHECK-NEXT: mov h25, v6.h[2] +; CHECK-NEXT: fcvtzs x9, h21 +; CHECK-NEXT: fcvtzs x11, h22 +; CHECK-NEXT: frintx h22, h7 +; CHECK-NEXT: mov h21, v3.h[3] +; CHECK-NEXT: fcvtzs x10, h19 +; CHECK-NEXT: frintx h27, h27 +; CHECK-NEXT: frintx h20, h20 +; CHECK-NEXT: frintx h16, h16 +; CHECK-NEXT: frintx h17, h17 +; CHECK-NEXT: fcvtzs x12, h23 +; CHECK-NEXT: fcvtzs x13, h24 +; CHECK-NEXT: frintx h23, h25 +; CHECK-NEXT: frintx h25, h26 +; CHECK-NEXT: mov h26, v3.h[1] +; CHECK-NEXT: mov h24, v2.h[3] +; CHECK-NEXT: fmov d19, x9 +; CHECK-NEXT: fcvtzs x9, h22 +; CHECK-NEXT: frintx h22, h3 +; CHECK-NEXT: frintx h21, h21 +; CHECK-NEXT: fcvtzs x14, h16 +; CHECK-NEXT: fcvtzs x15, h17 +; CHECK-NEXT: fmov d2, x12 +; CHECK-NEXT: fmov d16, x13 +; CHECK-NEXT: fcvtzs x12, h23 +; CHECK-NEXT: fcvtzs x13, h25 +; CHECK-NEXT: mov h23, v1.h[2] +; CHECK-NEXT: frintx h25, h26 +; CHECK-NEXT: frintx h24, h24 +; CHECK-NEXT: mov h1, v1.h[3] +; CHECK-NEXT: fmov d26, x11 +; CHECK-NEXT: fcvtzs x11, h21 +; CHECK-NEXT: fmov d3, x14 +; CHECK-NEXT: fmov d17, x15 +; CHECK-NEXT: fcvtzs x14, h22 +; CHECK-NEXT: fcvtzs x15, h27 +; CHECK-NEXT: mov h22, v0.h[2] +; CHECK-NEXT: frintx h18, h18 +; CHECK-NEXT: frintx h21, h23 +; CHECK-NEXT: fmov d23, x13 +; CHECK-NEXT: fcvtzs x13, h25 +; CHECK-NEXT: frintx h1, h1 +; CHECK-NEXT: fmov d25, x14 +; CHECK-NEXT: fcvtzs x14, h24 +; CHECK-NEXT: fmov d24, x15 +; CHECK-NEXT: frintx h22, h22 +; CHECK-NEXT: fcvtzs x15, h18 +; CHECK-NEXT: mov h18, v7.h[1] +; CHECK-NEXT: mov v25.d[1], x13 +; CHECK-NEXT: fcvtzs x13, h21 +; CHECK-NEXT: mov h21, v7.h[2] +; CHECK-NEXT: mov v24.d[1], x11 +; CHECK-NEXT: fcvtzs x11, h20 +; CHECK-NEXT: mov h20, v0.h[1] +; CHECK-NEXT: mov h0, v0.h[3] +; CHECK-NEXT: mov v23.d[1], x14 +; CHECK-NEXT: fcvtzs x14, h1 +; CHECK-NEXT: mov h1, v6.h[3] +; CHECK-NEXT: mov h6, v6.h[1] +; CHECK-NEXT: mov v19.d[1], x15 +; CHECK-NEXT: mov h7, v7.h[3] +; CHECK-NEXT: stp q25, q24, [x8, #192] +; CHECK-NEXT: fmov d24, x13 +; CHECK-NEXT: frintx h20, h20 +; CHECK-NEXT: mov v26.d[1], x11 +; CHECK-NEXT: fcvtzs x11, h22 +; CHECK-NEXT: mov h22, v5.h[1] +; CHECK-NEXT: mov h5, v5.h[3] +; CHECK-NEXT: frintx h0, h0 +; CHECK-NEXT: frintx h1, h1 +; CHECK-NEXT: mov v24.d[1], x14 +; CHECK-NEXT: mov h25, v4.h[3] +; CHECK-NEXT: frintx h6, h6 +; CHECK-NEXT: stp q26, q23, [x8, #128] +; CHECK-NEXT: fmov d23, x12 +; CHECK-NEXT: fcvtzs x12, h20 +; CHECK-NEXT: mov h20, v4.h[1] +; CHECK-NEXT: frintx h5, h5 +; CHECK-NEXT: fcvtzs x13, h0 +; CHECK-NEXT: stp q19, q24, [x8, #64] +; CHECK-NEXT: frintx h22, h22 +; CHECK-NEXT: fmov d0, x10 +; CHECK-NEXT: fmov d19, x11 +; CHECK-NEXT: frintx h4, h4 +; CHECK-NEXT: fcvtzs x10, h1 +; CHECK-NEXT: frintx h1, h21 +; CHECK-NEXT: frintx h24, h25 +; CHECK-NEXT: fcvtzs x11, h6 +; CHECK-NEXT: frintx h20, h20 +; CHECK-NEXT: frintx h6, h7 +; CHECK-NEXT: fcvtzs x14, h5 +; CHECK-NEXT: mov v19.d[1], x13 +; CHECK-NEXT: frintx h5, h18 +; CHECK-NEXT: fcvtzs x13, h22 +; CHECK-NEXT: mov v0.d[1], x12 +; CHECK-NEXT: fcvtzs x12, h4 +; CHECK-NEXT: mov v23.d[1], x10 +; CHECK-NEXT: fcvtzs x10, h1 +; CHECK-NEXT: fcvtzs x15, h24 +; CHECK-NEXT: mov v16.d[1], x11 +; CHECK-NEXT: fcvtzs x11, h20 +; CHECK-NEXT: mov v17.d[1], x14 +; CHECK-NEXT: fcvtzs x14, h6 +; CHECK-NEXT: mov v2.d[1], x13 +; CHECK-NEXT: fcvtzs x13, h5 +; CHECK-NEXT: fmov d4, x9 +; CHECK-NEXT: stp q0, q19, [x8] +; CHECK-NEXT: fmov d0, x12 +; CHECK-NEXT: stp q16, q23, [x8, #224] +; CHECK-NEXT: fmov d1, x10 +; CHECK-NEXT: mov v3.d[1], x15 +; CHECK-NEXT: stp q2, q17, [x8, #160] +; CHECK-NEXT: mov v0.d[1], x11 +; CHECK-NEXT: mov v4.d[1], x13 +; CHECK-NEXT: mov v1.d[1], x14 +; CHECK-NEXT: stp q0, q3, [x8, #96] +; CHECK-NEXT: stp q4, q1, [x8, #32] +; CHECK-NEXT: ret + %a = call <32 x i64> @llvm.llrint.v32i64.v32f16(<32 x half> %x) + ret <32 x i64> %a +} +declare <32 x i64> @llvm.llrint.v32i64.v32f16(<32 x half>) + +define <1 x i64> @llrint_v1i64_v1f32(<1 x float> %x) { +; CHECK-LABEL: llrint_v1i64_v1f32: +; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 +; CHECK-NEXT: frintx s0, s0 +; CHECK-NEXT: fcvtzs x8, s0 +; CHECK-NEXT: fmov d0, x8 +; CHECK-NEXT: ret + %a = call <1 x i64> @llvm.llrint.v1i64.v1f32(<1 x float> %x) + ret <1 x i64> %a +} +declare <1 x i64> @llvm.llrint.v1i64.v1f32(<1 x float>) + +define <2 x i64> @llrint_v2i64_v2f32(<2 x float> %x) { +; CHECK-LABEL: llrint_v2i64_v2f32: +; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 +; CHECK-NEXT: mov s1, v0.s[1] +; CHECK-NEXT: frintx s0, s0 +; CHECK-NEXT: frintx s1, s1 +; CHECK-NEXT: fcvtzs x8, s0 +; CHECK-NEXT: fcvtzs x9, s1 +; CHECK-NEXT: fmov d0, x8 +; CHECK-NEXT: mov v0.d[1], x9 +; CHECK-NEXT: ret + %a = call <2 x i64> @llvm.llrint.v2i64.v2f32(<2 x float> %x) + ret <2 x i64> %a +} +declare <2 x i64> @llvm.llrint.v2i64.v2f32(<2 x float>) + +define <4 x i64> @llrint_v4i64_v4f32(<4 x float> %x) { +; CHECK-LABEL: llrint_v4i64_v4f32: +; CHECK: // %bb.0: +; CHECK-NEXT: ext v1.16b, v0.16b, v0.16b, #8 +; CHECK-NEXT: mov s3, v0.s[1] +; CHECK-NEXT: frintx s0, s0 +; CHECK-NEXT: mov s2, v1.s[1] +; CHECK-NEXT: frintx s1, s1 +; CHECK-NEXT: frintx s3, s3 +; CHECK-NEXT: fcvtzs x9, s0 +; CHECK-NEXT: frintx s2, s2 +; CHECK-NEXT: fcvtzs x8, s1 +; CHECK-NEXT: fcvtzs x11, s3 +; CHECK-NEXT: fmov d0, x9 +; CHECK-NEXT: fcvtzs x10, s2 +; CHECK-NEXT: fmov d1, x8 +; CHECK-NEXT: mov v0.d[1], x11 +; CHECK-NEXT: mov v1.d[1], x10 +; CHECK-NEXT: ret + %a = call <4 x i64> @llvm.llrint.v4i64.v4f32(<4 x float> %x) + ret <4 x i64> %a +} +declare <4 x i64> @llvm.llrint.v4i64.v4f32(<4 x float>) + +define <8 x i64> @llrint_v8i64_v8f32(<8 x float> %x) { +; CHECK-LABEL: llrint_v8i64_v8f32: +; CHECK: // %bb.0: +; CHECK-NEXT: ext v2.16b, v0.16b, v0.16b, #8 +; CHECK-NEXT: ext v3.16b, v1.16b, v1.16b, #8 +; CHECK-NEXT: mov s4, v0.s[1] +; CHECK-NEXT: mov s7, v1.s[1] +; CHECK-NEXT: frintx s0, s0 +; CHECK-NEXT: frintx s1, s1 +; CHECK-NEXT: mov s5, v2.s[1] +; CHECK-NEXT: mov s6, v3.s[1] +; CHECK-NEXT: frintx s2, s2 +; CHECK-NEXT: frintx s3, s3 +; CHECK-NEXT: frintx s4, s4 +; CHECK-NEXT: frintx s7, s7 +; CHECK-NEXT: fcvtzs x9, s0 +; CHECK-NEXT: fcvtzs x12, s1 +; CHECK-NEXT: frintx s5, s5 +; CHECK-NEXT: frintx s6, s6 +; CHECK-NEXT: fcvtzs x8, s2 +; CHECK-NEXT: fcvtzs x10, s3 +; CHECK-NEXT: fcvtzs x11, s4 +; CHECK-NEXT: fcvtzs x15, s7 +; CHECK-NEXT: fmov d0, x9 +; CHECK-NEXT: fmov d2, x12 +; CHECK-NEXT: fcvtzs x13, s5 +; CHECK-NEXT: fcvtzs x14, s6 +; CHECK-NEXT: fmov d1, x8 +; CHECK-NEXT: fmov d3, x10 +; CHECK-NEXT: mov v0.d[1], x11 +; CHECK-NEXT: mov v2.d[1], x15 +; CHECK-NEXT: mov v1.d[1], x13 +; CHECK-NEXT: mov v3.d[1], x14 +; CHECK-NEXT: ret + %a = call <8 x i64> @llvm.llrint.v8i64.v8f32(<8 x float> %x) + ret <8 x i64> %a +} +declare <8 x i64> @llvm.llrint.v8i64.v8f32(<8 x float>) + +define <16 x i64> @llrint_v16i64_v16f32(<16 x float> %x) { +; CHECK-LABEL: llrint_v16i64_v16f32: +; CHECK: // %bb.0: +; CHECK-NEXT: ext v4.16b, v0.16b, v0.16b, #8 +; CHECK-NEXT: ext v5.16b, v1.16b, v1.16b, #8 +; CHECK-NEXT: ext v6.16b, v2.16b, v2.16b, #8 +; CHECK-NEXT: frintx s7, s0 +; CHECK-NEXT: ext v16.16b, v3.16b, v3.16b, #8 +; CHECK-NEXT: mov s0, v0.s[1] +; CHECK-NEXT: frintx s17, s4 +; CHECK-NEXT: mov s4, v4.s[1] +; CHECK-NEXT: mov s18, v5.s[1] +; CHECK-NEXT: frintx s5, s5 +; CHECK-NEXT: frintx s19, s6 +; CHECK-NEXT: fcvtzs x8, s7 +; CHECK-NEXT: frintx s7, s16 +; CHECK-NEXT: mov s6, v6.s[1] +; CHECK-NEXT: mov s16, v16.s[1] +; CHECK-NEXT: frintx s0, s0 +; CHECK-NEXT: frintx s4, s4 +; CHECK-NEXT: fcvtzs x9, s17 +; CHECK-NEXT: frintx s17, s1 +; CHECK-NEXT: mov s1, v1.s[1] +; CHECK-NEXT: frintx s18, s18 +; CHECK-NEXT: fcvtzs x10, s5 +; CHECK-NEXT: mov s5, v2.s[1] +; CHECK-NEXT: fcvtzs x11, s19 +; CHECK-NEXT: mov s19, v3.s[1] +; CHECK-NEXT: frintx s2, s2 +; CHECK-NEXT: fcvtzs x12, s7 +; CHECK-NEXT: frintx s6, s6 +; CHECK-NEXT: fcvtzs x13, s4 +; CHECK-NEXT: frintx s4, s3 +; CHECK-NEXT: frintx s16, s16 +; CHECK-NEXT: fcvtzs x14, s18 +; CHECK-NEXT: frintx s18, s1 +; CHECK-NEXT: fcvtzs x15, s17 +; CHECK-NEXT: frintx s20, s5 +; CHECK-NEXT: frintx s17, s19 +; CHECK-NEXT: fmov d1, x9 +; CHECK-NEXT: fcvtzs x9, s2 +; CHECK-NEXT: fmov d5, x11 +; CHECK-NEXT: fmov d3, x10 +; CHECK-NEXT: fcvtzs x11, s4 +; CHECK-NEXT: fcvtzs x10, s0 +; CHECK-NEXT: fmov d7, x12 +; CHECK-NEXT: fcvtzs x12, s18 +; CHECK-NEXT: fcvtzs x17, s6 +; CHECK-NEXT: fcvtzs x18, s16 +; CHECK-NEXT: fcvtzs x16, s20 +; CHECK-NEXT: fcvtzs x0, s17 +; CHECK-NEXT: fmov d0, x8 +; CHECK-NEXT: fmov d2, x15 +; CHECK-NEXT: fmov d4, x9 +; CHECK-NEXT: mov v1.d[1], x13 +; CHECK-NEXT: fmov d6, x11 +; CHECK-NEXT: mov v3.d[1], x14 +; CHECK-NEXT: mov v0.d[1], x10 +; CHECK-NEXT: mov v5.d[1], x17 +; CHECK-NEXT: mov v7.d[1], x18 +; CHECK-NEXT: mov v2.d[1], x12 +; CHECK-NEXT: mov v4.d[1], x16 +; CHECK-NEXT: mov v6.d[1], x0 +; CHECK-NEXT: ret + %a = call <16 x i64> @llvm.llrint.v16i64.v16f32(<16 x float> %x) + ret <16 x i64> %a +} +declare <16 x i64> @llvm.llrint.v16i64.v16f32(<16 x float>) + +define <32 x i64> @llrint_v32i64_v32f32(<32 x float> %x) { +; CHECK-LABEL: llrint_v32i64_v32f32: +; CHECK: // %bb.0: +; CHECK-NEXT: ext v16.16b, v2.16b, v2.16b, #8 +; CHECK-NEXT: ext v20.16b, v5.16b, v5.16b, #8 +; CHECK-NEXT: ext v17.16b, v3.16b, v3.16b, #8 +; CHECK-NEXT: ext v18.16b, v0.16b, v0.16b, #8 +; CHECK-NEXT: ext v19.16b, v4.16b, v4.16b, #8 +; CHECK-NEXT: ext v21.16b, v6.16b, v6.16b, #8 +; CHECK-NEXT: ext v22.16b, v7.16b, v7.16b, #8 +; CHECK-NEXT: frintx s24, s16 +; CHECK-NEXT: mov s28, v20.s[1] +; CHECK-NEXT: frintx s25, s17 +; CHECK-NEXT: frintx s26, s18 +; CHECK-NEXT: frintx s27, s19 +; CHECK-NEXT: frintx s29, s20 +; CHECK-NEXT: mov s30, v21.s[1] +; CHECK-NEXT: frintx s20, s21 +; CHECK-NEXT: frintx s21, s22 +; CHECK-NEXT: mov s23, v22.s[1] +; CHECK-NEXT: mov s19, v19.s[1] +; CHECK-NEXT: mov s17, v17.s[1] +; CHECK-NEXT: fcvtzs x12, s24 +; CHECK-NEXT: frintx s24, s28 +; CHECK-NEXT: fcvtzs x13, s25 +; CHECK-NEXT: mov s25, v7.s[1] +; CHECK-NEXT: fcvtzs x9, s26 +; CHECK-NEXT: fcvtzs x11, s27 +; CHECK-NEXT: fcvtzs x14, s20 +; CHECK-NEXT: fcvtzs x15, s21 +; CHECK-NEXT: frintx s26, s1 +; CHECK-NEXT: frintx s23, s23 +; CHECK-NEXT: frintx s27, s7 +; CHECK-NEXT: frintx s22, s30 +; CHECK-NEXT: fmov d20, x12 +; CHECK-NEXT: fcvtzs x12, s24 +; CHECK-NEXT: mov s24, v6.s[1] +; CHECK-NEXT: frintx s25, s25 +; CHECK-NEXT: frintx s6, s6 +; CHECK-NEXT: fcvtzs x10, s29 +; CHECK-NEXT: fmov d7, x11 +; CHECK-NEXT: fmov d21, x13 +; CHECK-NEXT: frintx s28, s5 +; CHECK-NEXT: fcvtzs x11, s23 +; CHECK-NEXT: fmov d23, x14 +; CHECK-NEXT: fcvtzs x14, s26 +; CHECK-NEXT: fmov d26, x15 +; CHECK-NEXT: fcvtzs x15, s27 +; CHECK-NEXT: frintx s24, s24 +; CHECK-NEXT: mov s27, v5.s[1] +; CHECK-NEXT: fcvtzs x13, s22 +; CHECK-NEXT: fcvtzs x17, s25 +; CHECK-NEXT: frintx s25, s4 +; CHECK-NEXT: fcvtzs x18, s6 +; CHECK-NEXT: fmov d6, x10 +; CHECK-NEXT: frintx s22, s2 +; CHECK-NEXT: mov v26.d[1], x11 +; CHECK-NEXT: fmov d5, x14 +; CHECK-NEXT: fcvtzs x10, s24 +; CHECK-NEXT: fmov d24, x15 +; CHECK-NEXT: fcvtzs x14, s28 +; CHECK-NEXT: frintx s27, s27 +; CHECK-NEXT: mov v23.d[1], x13 +; CHECK-NEXT: mov s4, v4.s[1] +; CHECK-NEXT: fcvtzs x13, s25 +; CHECK-NEXT: fmov d25, x18 +; CHECK-NEXT: mov s16, v16.s[1] +; CHECK-NEXT: mov v24.d[1], x17 +; CHECK-NEXT: fcvtzs x16, s22 +; CHECK-NEXT: frintx s22, s3 +; CHECK-NEXT: mov s3, v3.s[1] +; CHECK-NEXT: frintx s19, s19 +; CHECK-NEXT: mov s2, v2.s[1] +; CHECK-NEXT: mov v25.d[1], x10 +; CHECK-NEXT: fcvtzs x10, s27 +; CHECK-NEXT: frintx s4, s4 +; CHECK-NEXT: mov v6.d[1], x12 +; CHECK-NEXT: frintx s17, s17 +; CHECK-NEXT: mov s18, v18.s[1] +; CHECK-NEXT: stp q24, q26, [x8, #224] +; CHECK-NEXT: fmov d24, x14 +; CHECK-NEXT: fcvtzs x11, s22 +; CHECK-NEXT: ext v22.16b, v1.16b, v1.16b, #8 +; CHECK-NEXT: mov s1, v1.s[1] +; CHECK-NEXT: frintx s3, s3 +; CHECK-NEXT: stp q25, q23, [x8, #192] +; CHECK-NEXT: frintx s2, s2 +; CHECK-NEXT: fcvtzs x12, s4 +; CHECK-NEXT: mov v24.d[1], x10 +; CHECK-NEXT: fcvtzs x10, s19 +; CHECK-NEXT: mov s19, v0.s[1] +; CHECK-NEXT: frintx s16, s16 +; CHECK-NEXT: frintx s0, s0 +; CHECK-NEXT: fmov d4, x11 +; CHECK-NEXT: mov s27, v22.s[1] +; CHECK-NEXT: frintx s22, s22 +; CHECK-NEXT: frintx s1, s1 +; CHECK-NEXT: fcvtzs x11, s3 +; CHECK-NEXT: fcvtzs x14, s2 +; CHECK-NEXT: frintx s2, s18 +; CHECK-NEXT: stp q24, q6, [x8, #160] +; CHECK-NEXT: fmov d6, x13 +; CHECK-NEXT: fcvtzs x13, s17 +; CHECK-NEXT: frintx s17, s19 +; CHECK-NEXT: fmov d23, x16 +; CHECK-NEXT: mov v7.d[1], x10 +; CHECK-NEXT: frintx s3, s27 +; CHECK-NEXT: fcvtzs x10, s22 +; CHECK-NEXT: fcvtzs x15, s1 +; CHECK-NEXT: mov v6.d[1], x12 +; CHECK-NEXT: fcvtzs x12, s16 +; CHECK-NEXT: mov v4.d[1], x11 +; CHECK-NEXT: mov v21.d[1], x13 +; CHECK-NEXT: fcvtzs x13, s0 +; CHECK-NEXT: mov v23.d[1], x14 +; CHECK-NEXT: fcvtzs x14, s17 +; CHECK-NEXT: fcvtzs x11, s3 +; CHECK-NEXT: fmov d0, x10 +; CHECK-NEXT: mov v5.d[1], x15 +; CHECK-NEXT: stp q6, q7, [x8, #128] +; CHECK-NEXT: mov v20.d[1], x12 +; CHECK-NEXT: fcvtzs x12, s2 +; CHECK-NEXT: stp q4, q21, [x8, #96] +; CHECK-NEXT: fmov d1, x13 +; CHECK-NEXT: fmov d2, x9 +; CHECK-NEXT: mov v0.d[1], x11 +; CHECK-NEXT: stp q23, q20, [x8, #64] +; CHECK-NEXT: mov v1.d[1], x14 +; CHECK-NEXT: mov v2.d[1], x12 +; CHECK-NEXT: stp q5, q0, [x8, #32] +; CHECK-NEXT: stp q1, q2, [x8] +; CHECK-NEXT: ret + %a = call <32 x i64> @llvm.llrint.v32i64.v32f32(<32 x float> %x) + ret <32 x i64> %a +} +declare <32 x i64> @llvm.llrint.v32i64.v32f32(<32 x float>) + +define <1 x i64> @llrint_v1i64_v1f64(<1 x double> %x) { +; CHECK-LABEL: llrint_v1i64_v1f64: +; CHECK: // %bb.0: +; CHECK-NEXT: frintx d0, d0 +; CHECK-NEXT: fcvtzs x8, d0 +; CHECK-NEXT: fmov d0, x8 +; CHECK-NEXT: ret + %a = call <1 x i64> @llvm.llrint.v1i64.v1f64(<1 x double> %x) + ret <1 x i64> %a +} +declare <1 x i64> @llvm.llrint.v1i64.v1f64(<1 x double>) + +define <2 x i64> @llrint_v2i64_v2f64(<2 x double> %x) { +; CHECK-LABEL: llrint_v2i64_v2f64: +; CHECK: // %bb.0: +; CHECK-NEXT: mov d1, v0.d[1] +; CHECK-NEXT: frintx d0, d0 +; CHECK-NEXT: frintx d1, d1 +; CHECK-NEXT: fcvtzs x8, d0 +; CHECK-NEXT: fcvtzs x9, d1 +; CHECK-NEXT: fmov d0, x8 +; CHECK-NEXT: mov v0.d[1], x9 +; CHECK-NEXT: ret + %a = call <2 x i64> @llvm.llrint.v2i64.v2f64(<2 x double> %x) + ret <2 x i64> %a +} +declare <2 x i64> @llvm.llrint.v2i64.v2f64(<2 x double>) + +define <4 x i64> @llrint_v4i64_v4f64(<4 x double> %x) { +; CHECK-LABEL: llrint_v4i64_v4f64: +; CHECK: // %bb.0: +; CHECK-NEXT: mov d2, v0.d[1] +; CHECK-NEXT: mov d3, v1.d[1] +; CHECK-NEXT: frintx d0, d0 +; CHECK-NEXT: frintx d1, d1 +; CHECK-NEXT: frintx d2, d2 +; CHECK-NEXT: frintx d3, d3 +; CHECK-NEXT: fcvtzs x8, d0 +; CHECK-NEXT: fcvtzs x9, d1 +; CHECK-NEXT: fcvtzs x10, d2 +; CHECK-NEXT: fcvtzs x11, d3 +; CHECK-NEXT: fmov d0, x8 +; CHECK-NEXT: fmov d1, x9 +; CHECK-NEXT: mov v0.d[1], x10 +; CHECK-NEXT: mov v1.d[1], x11 +; CHECK-NEXT: ret + %a = call <4 x i64> @llvm.llrint.v4i64.v4f64(<4 x double> %x) + ret <4 x i64> %a +} +declare <4 x i64> @llvm.llrint.v4i64.v4f64(<4 x double>) + +define <8 x i64> @llrint_v8i64_v8f64(<8 x double> %x) { +; CHECK-LABEL: llrint_v8i64_v8f64: +; CHECK: // %bb.0: +; CHECK-NEXT: mov d4, v0.d[1] +; CHECK-NEXT: mov d5, v1.d[1] +; CHECK-NEXT: mov d6, v2.d[1] +; CHECK-NEXT: mov d7, v3.d[1] +; CHECK-NEXT: frintx d0, d0 +; CHECK-NEXT: frintx d1, d1 +; CHECK-NEXT: frintx d2, d2 +; CHECK-NEXT: frintx d3, d3 +; CHECK-NEXT: frintx d4, d4 +; CHECK-NEXT: frintx d5, d5 +; CHECK-NEXT: frintx d6, d6 +; CHECK-NEXT: frintx d7, d7 +; CHECK-NEXT: fcvtzs x8, d0 +; CHECK-NEXT: fcvtzs x9, d1 +; CHECK-NEXT: fcvtzs x10, d2 +; CHECK-NEXT: fcvtzs x11, d3 +; CHECK-NEXT: fcvtzs x12, d4 +; CHECK-NEXT: fcvtzs x13, d5 +; CHECK-NEXT: fcvtzs x14, d6 +; CHECK-NEXT: fcvtzs x15, d7 +; CHECK-NEXT: fmov d0, x8 +; CHECK-NEXT: fmov d1, x9 +; CHECK-NEXT: fmov d2, x10 +; CHECK-NEXT: fmov d3, x11 +; CHECK-NEXT: mov v0.d[1], x12 +; CHECK-NEXT: mov v1.d[1], x13 +; CHECK-NEXT: mov v2.d[1], x14 +; CHECK-NEXT: mov v3.d[1], x15 +; CHECK-NEXT: ret + %a = call <8 x i64> @llvm.llrint.v8i64.v8f64(<8 x double> %x) + ret <8 x i64> %a +} +declare <8 x i64> @llvm.llrint.v8i64.v8f64(<8 x double>) + +define <16 x i64> @llrint_v16f64(<16 x double> %x) { +; CHECK-LABEL: llrint_v16f64: +; CHECK: // %bb.0: +; CHECK-NEXT: mov d16, v0.d[1] +; CHECK-NEXT: mov d17, v1.d[1] +; CHECK-NEXT: frintx d0, d0 +; CHECK-NEXT: frintx d1, d1 +; CHECK-NEXT: frintx d18, d2 +; CHECK-NEXT: mov d2, v2.d[1] +; CHECK-NEXT: frintx d19, d3 +; CHECK-NEXT: mov d3, v3.d[1] +; CHECK-NEXT: frintx d16, d16 +; CHECK-NEXT: frintx d17, d17 +; CHECK-NEXT: fcvtzs x8, d0 +; CHECK-NEXT: frintx d0, d4 +; CHECK-NEXT: mov d4, v4.d[1] +; CHECK-NEXT: fcvtzs x9, d1 +; CHECK-NEXT: frintx d1, d5 +; CHECK-NEXT: mov d5, v5.d[1] +; CHECK-NEXT: fcvtzs x12, d18 +; CHECK-NEXT: frintx d2, d2 +; CHECK-NEXT: fcvtzs x13, d19 +; CHECK-NEXT: frintx d18, d3 +; CHECK-NEXT: fcvtzs x10, d16 +; CHECK-NEXT: mov d16, v6.d[1] +; CHECK-NEXT: fcvtzs x11, d17 +; CHECK-NEXT: mov d17, v7.d[1] +; CHECK-NEXT: frintx d6, d6 +; CHECK-NEXT: frintx d7, d7 +; CHECK-NEXT: frintx d4, d4 +; CHECK-NEXT: frintx d5, d5 +; CHECK-NEXT: fcvtzs x14, d0 +; CHECK-NEXT: fcvtzs x15, d1 +; CHECK-NEXT: fmov d0, x8 +; CHECK-NEXT: fmov d1, x9 +; CHECK-NEXT: frintx d16, d16 +; CHECK-NEXT: fcvtzs x9, d2 +; CHECK-NEXT: fmov d2, x12 +; CHECK-NEXT: frintx d17, d17 +; CHECK-NEXT: fcvtzs x8, d6 +; CHECK-NEXT: fcvtzs x12, d7 +; CHECK-NEXT: fmov d3, x13 +; CHECK-NEXT: fcvtzs x13, d18 +; CHECK-NEXT: fcvtzs x16, d4 +; CHECK-NEXT: fcvtzs x17, d5 +; CHECK-NEXT: fmov d4, x14 +; CHECK-NEXT: fmov d5, x15 +; CHECK-NEXT: fcvtzs x18, d16 +; CHECK-NEXT: mov v0.d[1], x10 +; CHECK-NEXT: mov v1.d[1], x11 +; CHECK-NEXT: fcvtzs x0, d17 +; CHECK-NEXT: fmov d6, x8 +; CHECK-NEXT: fmov d7, x12 +; CHECK-NEXT: mov v2.d[1], x9 +; CHECK-NEXT: mov v3.d[1], x13 +; CHECK-NEXT: mov v4.d[1], x16 +; CHECK-NEXT: mov v5.d[1], x17 +; CHECK-NEXT: mov v6.d[1], x18 +; CHECK-NEXT: mov v7.d[1], x0 +; CHECK-NEXT: ret + %a = call <16 x i64> @llvm.llrint.v16i64.v16f64(<16 x double> %x) + ret <16 x i64> %a +} +declare <16 x i64> @llvm.llrint.v16i64.v16f64(<16 x double>) + +define <32 x i64> @llrint_v32f64(<32 x double> %x) { +; CHECK-LABEL: llrint_v32f64: +; CHECK: // %bb.0: +; CHECK-NEXT: frintx d20, d0 +; CHECK-NEXT: frintx d22, d3 +; CHECK-NEXT: frintx d21, d4 +; CHECK-NEXT: ldp q19, q18, [sp, #64] +; CHECK-NEXT: frintx d23, d5 +; CHECK-NEXT: ldp q27, q26, [sp, #96] +; CHECK-NEXT: mov d4, v4.d[1] +; CHECK-NEXT: ldp q16, q17, [sp, #32] +; CHECK-NEXT: mov d5, v5.d[1] +; CHECK-NEXT: fcvtzs x9, d20 +; CHECK-NEXT: frintx d20, d6 +; CHECK-NEXT: fcvtzs x11, d22 +; CHECK-NEXT: frintx d22, d19 +; CHECK-NEXT: fcvtzs x12, d21 +; CHECK-NEXT: fcvtzs x10, d23 +; CHECK-NEXT: mov d21, v26.d[1] +; CHECK-NEXT: frintx d23, d27 +; CHECK-NEXT: mov d27, v27.d[1] +; CHECK-NEXT: frintx d24, d16 +; CHECK-NEXT: mov d19, v19.d[1] +; CHECK-NEXT: frintx d25, d17 +; CHECK-NEXT: fcvtzs x13, d20 +; CHECK-NEXT: mov d20, v18.d[1] +; CHECK-NEXT: frintx d18, d18 +; CHECK-NEXT: fcvtzs x16, d22 +; CHECK-NEXT: frintx d22, d26 +; CHECK-NEXT: mov d16, v16.d[1] +; CHECK-NEXT: frintx d21, d21 +; CHECK-NEXT: fcvtzs x17, d23 +; CHECK-NEXT: frintx d23, d27 +; CHECK-NEXT: fcvtzs x14, d24 +; CHECK-NEXT: frintx d26, d19 +; CHECK-NEXT: fmov d19, x11 +; CHECK-NEXT: frintx d20, d20 +; CHECK-NEXT: mov d27, v17.d[1] +; CHECK-NEXT: fcvtzs x15, d25 +; CHECK-NEXT: ldp q25, q24, [sp] +; CHECK-NEXT: fcvtzs x11, d22 +; CHECK-NEXT: fmov d17, x12 +; CHECK-NEXT: fcvtzs x12, d21 +; CHECK-NEXT: fcvtzs x0, d23 +; CHECK-NEXT: fmov d23, x14 +; CHECK-NEXT: fcvtzs x14, d18 +; CHECK-NEXT: fmov d18, x17 +; CHECK-NEXT: fcvtzs x17, d20 +; CHECK-NEXT: frintx d21, d7 +; CHECK-NEXT: fcvtzs x18, d26 +; CHECK-NEXT: fmov d20, x11 +; CHECK-NEXT: frintx d22, d25 +; CHECK-NEXT: frintx d26, d27 +; CHECK-NEXT: frintx d16, d16 +; CHECK-NEXT: mov v18.d[1], x0 +; CHECK-NEXT: mov d25, v25.d[1] +; CHECK-NEXT: mov d7, v7.d[1] +; CHECK-NEXT: mov d6, v6.d[1] +; CHECK-NEXT: mov d0, v0.d[1] +; CHECK-NEXT: mov v20.d[1], x12 +; CHECK-NEXT: fcvtzs x11, d21 +; CHECK-NEXT: fmov d21, x15 +; CHECK-NEXT: fcvtzs x12, d22 +; CHECK-NEXT: fmov d22, x16 +; CHECK-NEXT: fcvtzs x15, d26 +; CHECK-NEXT: fmov d26, x14 +; CHECK-NEXT: fcvtzs x14, d16 +; CHECK-NEXT: frintx d25, d25 +; CHECK-NEXT: frintx d7, d7 +; CHECK-NEXT: mov d16, v1.d[1] +; CHECK-NEXT: mov d3, v3.d[1] +; CHECK-NEXT: stp q18, q20, [x8, #224] +; CHECK-NEXT: mov d18, v24.d[1] +; CHECK-NEXT: mov v22.d[1], x18 +; CHECK-NEXT: mov v26.d[1], x17 +; CHECK-NEXT: frintx d24, d24 +; CHECK-NEXT: mov v21.d[1], x15 +; CHECK-NEXT: mov v23.d[1], x14 +; CHECK-NEXT: frintx d20, d2 +; CHECK-NEXT: mov d2, v2.d[1] +; CHECK-NEXT: frintx d6, d6 +; CHECK-NEXT: frintx d5, d5 +; CHECK-NEXT: frintx d4, d4 +; CHECK-NEXT: frintx d18, d18 +; CHECK-NEXT: frintx d1, d1 +; CHECK-NEXT: frintx d3, d3 +; CHECK-NEXT: stp q22, q26, [x8, #192] +; CHECK-NEXT: fmov d22, x10 +; CHECK-NEXT: fcvtzs x10, d24 +; CHECK-NEXT: stp q23, q21, [x8, #160] +; CHECK-NEXT: fmov d21, x11 +; CHECK-NEXT: fmov d24, x13 +; CHECK-NEXT: frintx d2, d2 +; CHECK-NEXT: fcvtzs x13, d6 +; CHECK-NEXT: frintx d6, d16 +; CHECK-NEXT: fcvtzs x11, d18 +; CHECK-NEXT: fmov d18, x12 +; CHECK-NEXT: fcvtzs x12, d25 +; CHECK-NEXT: fmov d23, x10 +; CHECK-NEXT: fcvtzs x10, d7 +; CHECK-NEXT: fcvtzs x14, d5 +; CHECK-NEXT: frintx d0, d0 +; CHECK-NEXT: fcvtzs x15, d3 +; CHECK-NEXT: mov v24.d[1], x13 +; CHECK-NEXT: fcvtzs x13, d2 +; CHECK-NEXT: fmov d2, x9 +; CHECK-NEXT: mov v23.d[1], x11 +; CHECK-NEXT: fcvtzs x11, d4 +; CHECK-NEXT: mov v18.d[1], x12 +; CHECK-NEXT: fcvtzs x12, d20 +; CHECK-NEXT: mov v21.d[1], x10 +; CHECK-NEXT: fcvtzs x10, d1 +; CHECK-NEXT: mov v22.d[1], x14 +; CHECK-NEXT: fcvtzs x14, d6 +; CHECK-NEXT: mov v19.d[1], x15 +; CHECK-NEXT: stp q18, q23, [x8, #128] +; CHECK-NEXT: mov v17.d[1], x11 +; CHECK-NEXT: fcvtzs x11, d0 +; CHECK-NEXT: stp q24, q21, [x8, #96] +; CHECK-NEXT: fmov d0, x12 +; CHECK-NEXT: fmov d1, x10 +; CHECK-NEXT: stp q17, q22, [x8, #64] +; CHECK-NEXT: mov v0.d[1], x13 +; CHECK-NEXT: mov v1.d[1], x14 +; CHECK-NEXT: mov v2.d[1], x11 +; CHECK-NEXT: stp q0, q19, [x8, #32] +; CHECK-NEXT: stp q2, q1, [x8] +; CHECK-NEXT: ret + %a = call <32 x i64> @llvm.llrint.v32i64.v16f64(<32 x double> %x) + ret <32 x i64> %a +} +declare <32 x i64> @llvm.llrint.v32i64.v32f64(<32 x double>) diff --git a/llvm/test/CodeGen/AArch64/sve-fixed-vector-lrint.ll b/llvm/test/CodeGen/AArch64/sve-fixed-vector-lrint.ll new file mode 100644 index 0000000000000..e9c5fd9b769b6 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/sve-fixed-vector-lrint.ll @@ -0,0 +1,893 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=aarch64 -mattr=+sve | FileCheck %s + +define <1 x i64> @lrint_v1f16(<1 x half> %x) { +; CHECK-LABEL: lrint_v1f16: +; CHECK: // %bb.0: +; CHECK-NEXT: frintx h0, h0 +; CHECK-NEXT: fcvtzs x8, h0 +; CHECK-NEXT: fmov d0, x8 +; CHECK-NEXT: ret + %a = call <1 x i64> @llvm.lrint.v1i64.v1f16(<1 x half> %x) + ret <1 x i64> %a +} +declare <1 x i64> @llvm.lrint.v1i64.v1f16(<1 x half>) + +define <2 x i64> @lrint_v2f16(<2 x half> %x) { +; CHECK-LABEL: lrint_v2f16: +; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 +; CHECK-NEXT: mov h1, v0.h[1] +; CHECK-NEXT: frintx h0, h0 +; CHECK-NEXT: frintx h1, h1 +; CHECK-NEXT: fcvtzs x8, h0 +; CHECK-NEXT: fcvtzs x9, h1 +; CHECK-NEXT: fmov d0, x8 +; CHECK-NEXT: mov v0.d[1], x9 +; CHECK-NEXT: ret + %a = call <2 x i64> @llvm.lrint.v2i64.v2f16(<2 x half> %x) + ret <2 x i64> %a +} +declare <2 x i64> @llvm.lrint.v2i64.v2f16(<2 x half>) + +define <4 x i64> @lrint_v4f16(<4 x half> %x) { +; CHECK-LABEL: lrint_v4f16: +; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 +; CHECK-NEXT: mov h1, v0.h[2] +; CHECK-NEXT: mov h2, v0.h[1] +; CHECK-NEXT: mov h3, v0.h[3] +; CHECK-NEXT: frintx h0, h0 +; CHECK-NEXT: frintx h1, h1 +; CHECK-NEXT: frintx h2, h2 +; CHECK-NEXT: frintx h3, h3 +; CHECK-NEXT: fcvtzs x8, h0 +; CHECK-NEXT: fcvtzs x9, h1 +; CHECK-NEXT: fcvtzs x10, h2 +; CHECK-NEXT: fcvtzs x11, h3 +; CHECK-NEXT: fmov d0, x8 +; CHECK-NEXT: fmov d1, x9 +; CHECK-NEXT: mov v0.d[1], x10 +; CHECK-NEXT: mov v1.d[1], x11 +; CHECK-NEXT: ret + %a = call <4 x i64> @llvm.lrint.v4i64.v4f16(<4 x half> %x) + ret <4 x i64> %a +} +declare <4 x i64> @llvm.lrint.v4i64.v4f16(<4 x half>) + +define <8 x i64> @lrint_v8f16(<8 x half> %x) { +; CHECK-LABEL: lrint_v8f16: +; CHECK: // %bb.0: +; CHECK-NEXT: ext v1.16b, v0.16b, v0.16b, #8 +; CHECK-NEXT: mov h4, v0.h[2] +; CHECK-NEXT: mov h3, v0.h[1] +; CHECK-NEXT: mov h7, v0.h[3] +; CHECK-NEXT: frintx h0, h0 +; CHECK-NEXT: mov h2, v1.h[2] +; CHECK-NEXT: mov h5, v1.h[1] +; CHECK-NEXT: mov h6, v1.h[3] +; CHECK-NEXT: frintx h1, h1 +; CHECK-NEXT: frintx h4, h4 +; CHECK-NEXT: frintx h3, h3 +; CHECK-NEXT: frintx h7, h7 +; CHECK-NEXT: fcvtzs x9, h0 +; CHECK-NEXT: frintx h2, h2 +; CHECK-NEXT: frintx h5, h5 +; CHECK-NEXT: frintx h6, h6 +; CHECK-NEXT: fcvtzs x8, h1 +; CHECK-NEXT: fcvtzs x12, h4 +; CHECK-NEXT: fcvtzs x11, h3 +; CHECK-NEXT: fcvtzs x15, h7 +; CHECK-NEXT: fmov d0, x9 +; CHECK-NEXT: fcvtzs x10, h2 +; CHECK-NEXT: fcvtzs x13, h5 +; CHECK-NEXT: fcvtzs x14, h6 +; CHECK-NEXT: fmov d2, x8 +; CHECK-NEXT: fmov d1, x12 +; CHECK-NEXT: mov v0.d[1], x11 +; CHECK-NEXT: fmov d3, x10 +; CHECK-NEXT: mov v2.d[1], x13 +; CHECK-NEXT: mov v1.d[1], x15 +; CHECK-NEXT: mov v3.d[1], x14 +; CHECK-NEXT: ret + %a = call <8 x i64> @llvm.lrint.v8i64.v8f16(<8 x half> %x) + ret <8 x i64> %a +} +declare <8 x i64> @llvm.lrint.v8i64.v8f16(<8 x half>) + +define <16 x i64> @lrint_v16i64_v16f16(<16 x half> %x) { +; CHECK-LABEL: lrint_v16i64_v16f16: +; CHECK: // %bb.0: +; CHECK-NEXT: ext v2.16b, v0.16b, v0.16b, #8 +; CHECK-NEXT: ext v3.16b, v1.16b, v1.16b, #8 +; CHECK-NEXT: mov h4, v0.h[1] +; CHECK-NEXT: frintx h5, h0 +; CHECK-NEXT: mov h18, v0.h[2] +; CHECK-NEXT: mov h0, v0.h[3] +; CHECK-NEXT: frintx h6, h2 +; CHECK-NEXT: mov h7, v2.h[1] +; CHECK-NEXT: mov h16, v2.h[2] +; CHECK-NEXT: mov h17, v3.h[2] +; CHECK-NEXT: frintx h19, h3 +; CHECK-NEXT: frintx h4, h4 +; CHECK-NEXT: fcvtzs x8, h5 +; CHECK-NEXT: mov h5, v1.h[1] +; CHECK-NEXT: mov h2, v2.h[3] +; CHECK-NEXT: frintx h18, h18 +; CHECK-NEXT: frintx h0, h0 +; CHECK-NEXT: fcvtzs x9, h6 +; CHECK-NEXT: frintx h6, h7 +; CHECK-NEXT: frintx h7, h16 +; CHECK-NEXT: mov h16, v1.h[2] +; CHECK-NEXT: frintx h17, h17 +; CHECK-NEXT: fcvtzs x10, h19 +; CHECK-NEXT: mov h19, v3.h[1] +; CHECK-NEXT: fcvtzs x11, h4 +; CHECK-NEXT: mov h4, v1.h[3] +; CHECK-NEXT: mov h3, v3.h[3] +; CHECK-NEXT: frintx h1, h1 +; CHECK-NEXT: frintx h5, h5 +; CHECK-NEXT: fcvtzs x13, h7 +; CHECK-NEXT: fcvtzs x12, h6 +; CHECK-NEXT: fcvtzs x15, h18 +; CHECK-NEXT: frintx h7, h16 +; CHECK-NEXT: fcvtzs x14, h17 +; CHECK-NEXT: frintx h16, h2 +; CHECK-NEXT: frintx h17, h19 +; CHECK-NEXT: frintx h4, h4 +; CHECK-NEXT: fmov d2, x9 +; CHECK-NEXT: frintx h19, h3 +; CHECK-NEXT: fcvtzs x9, h1 +; CHECK-NEXT: fmov d6, x10 +; CHECK-NEXT: fmov d3, x13 +; CHECK-NEXT: fcvtzs x13, h0 +; CHECK-NEXT: fcvtzs x16, h5 +; CHECK-NEXT: fcvtzs x10, h7 +; CHECK-NEXT: fmov d7, x14 +; CHECK-NEXT: fcvtzs x14, h16 +; CHECK-NEXT: fcvtzs x17, h17 +; CHECK-NEXT: fcvtzs x0, h4 +; CHECK-NEXT: fmov d0, x8 +; CHECK-NEXT: fcvtzs x18, h19 +; CHECK-NEXT: fmov d1, x15 +; CHECK-NEXT: fmov d4, x9 +; CHECK-NEXT: mov v2.d[1], x12 +; CHECK-NEXT: fmov d5, x10 +; CHECK-NEXT: mov v0.d[1], x11 +; CHECK-NEXT: mov v3.d[1], x14 +; CHECK-NEXT: mov v1.d[1], x13 +; CHECK-NEXT: mov v4.d[1], x16 +; CHECK-NEXT: mov v6.d[1], x17 +; CHECK-NEXT: mov v7.d[1], x18 +; CHECK-NEXT: mov v5.d[1], x0 +; CHECK-NEXT: ret + %a = call <16 x i64> @llvm.lrint.v16i64.v16f16(<16 x half> %x) + ret <16 x i64> %a +} +declare <16 x i64> @llvm.lrint.v16i64.v16f16(<16 x half>) + +define <32 x i64> @lrint_v32i64_v32f16(<32 x half> %x) { +; CHECK-LABEL: lrint_v32i64_v32f16: +; CHECK: // %bb.0: +; CHECK-NEXT: ext v4.16b, v1.16b, v1.16b, #8 +; CHECK-NEXT: ext v5.16b, v2.16b, v2.16b, #8 +; CHECK-NEXT: ext v6.16b, v3.16b, v3.16b, #8 +; CHECK-NEXT: ext v7.16b, v0.16b, v0.16b, #8 +; CHECK-NEXT: frintx h21, h1 +; CHECK-NEXT: frintx h22, h2 +; CHECK-NEXT: mov h26, v2.h[2] +; CHECK-NEXT: frintx h19, h0 +; CHECK-NEXT: mov h27, v3.h[2] +; CHECK-NEXT: mov h20, v2.h[1] +; CHECK-NEXT: mov h18, v1.h[1] +; CHECK-NEXT: mov h16, v4.h[2] +; CHECK-NEXT: mov h17, v5.h[2] +; CHECK-NEXT: frintx h23, h5 +; CHECK-NEXT: frintx h24, h6 +; CHECK-NEXT: mov h25, v6.h[2] +; CHECK-NEXT: fcvtzs x9, h21 +; CHECK-NEXT: fcvtzs x11, h22 +; CHECK-NEXT: frintx h22, h7 +; CHECK-NEXT: mov h21, v3.h[3] +; CHECK-NEXT: fcvtzs x10, h19 +; CHECK-NEXT: frintx h27, h27 +; CHECK-NEXT: frintx h20, h20 +; CHECK-NEXT: frintx h16, h16 +; CHECK-NEXT: frintx h17, h17 +; CHECK-NEXT: fcvtzs x12, h23 +; CHECK-NEXT: fcvtzs x13, h24 +; CHECK-NEXT: frintx h23, h25 +; CHECK-NEXT: frintx h25, h26 +; CHECK-NEXT: mov h26, v3.h[1] +; CHECK-NEXT: mov h24, v2.h[3] +; CHECK-NEXT: fmov d19, x9 +; CHECK-NEXT: fcvtzs x9, h22 +; CHECK-NEXT: frintx h22, h3 +; CHECK-NEXT: frintx h21, h21 +; CHECK-NEXT: fcvtzs x14, h16 +; CHECK-NEXT: fcvtzs x15, h17 +; CHECK-NEXT: fmov d2, x12 +; CHECK-NEXT: fmov d16, x13 +; CHECK-NEXT: fcvtzs x12, h23 +; CHECK-NEXT: fcvtzs x13, h25 +; CHECK-NEXT: mov h23, v1.h[2] +; CHECK-NEXT: frintx h25, h26 +; CHECK-NEXT: frintx h24, h24 +; CHECK-NEXT: mov h1, v1.h[3] +; CHECK-NEXT: fmov d26, x11 +; CHECK-NEXT: fcvtzs x11, h21 +; CHECK-NEXT: fmov d3, x14 +; CHECK-NEXT: fmov d17, x15 +; CHECK-NEXT: fcvtzs x14, h22 +; CHECK-NEXT: fcvtzs x15, h27 +; CHECK-NEXT: mov h22, v0.h[2] +; CHECK-NEXT: frintx h18, h18 +; CHECK-NEXT: frintx h21, h23 +; CHECK-NEXT: fmov d23, x13 +; CHECK-NEXT: fcvtzs x13, h25 +; CHECK-NEXT: frintx h1, h1 +; CHECK-NEXT: fmov d25, x14 +; CHECK-NEXT: fcvtzs x14, h24 +; CHECK-NEXT: fmov d24, x15 +; CHECK-NEXT: frintx h22, h22 +; CHECK-NEXT: fcvtzs x15, h18 +; CHECK-NEXT: mov h18, v7.h[1] +; CHECK-NEXT: mov v25.d[1], x13 +; CHECK-NEXT: fcvtzs x13, h21 +; CHECK-NEXT: mov h21, v7.h[2] +; CHECK-NEXT: mov v24.d[1], x11 +; CHECK-NEXT: fcvtzs x11, h20 +; CHECK-NEXT: mov h20, v0.h[1] +; CHECK-NEXT: mov h0, v0.h[3] +; CHECK-NEXT: mov v23.d[1], x14 +; CHECK-NEXT: fcvtzs x14, h1 +; CHECK-NEXT: mov h1, v6.h[3] +; CHECK-NEXT: mov h6, v6.h[1] +; CHECK-NEXT: mov v19.d[1], x15 +; CHECK-NEXT: mov h7, v7.h[3] +; CHECK-NEXT: stp q25, q24, [x8, #192] +; CHECK-NEXT: fmov d24, x13 +; CHECK-NEXT: frintx h20, h20 +; CHECK-NEXT: mov v26.d[1], x11 +; CHECK-NEXT: fcvtzs x11, h22 +; CHECK-NEXT: mov h22, v5.h[1] +; CHECK-NEXT: mov h5, v5.h[3] +; CHECK-NEXT: frintx h0, h0 +; CHECK-NEXT: frintx h1, h1 +; CHECK-NEXT: mov v24.d[1], x14 +; CHECK-NEXT: mov h25, v4.h[3] +; CHECK-NEXT: frintx h6, h6 +; CHECK-NEXT: stp q26, q23, [x8, #128] +; CHECK-NEXT: fmov d23, x12 +; CHECK-NEXT: fcvtzs x12, h20 +; CHECK-NEXT: mov h20, v4.h[1] +; CHECK-NEXT: frintx h5, h5 +; CHECK-NEXT: fcvtzs x13, h0 +; CHECK-NEXT: stp q19, q24, [x8, #64] +; CHECK-NEXT: frintx h22, h22 +; CHECK-NEXT: fmov d0, x10 +; CHECK-NEXT: fmov d19, x11 +; CHECK-NEXT: frintx h4, h4 +; CHECK-NEXT: fcvtzs x10, h1 +; CHECK-NEXT: frintx h1, h21 +; CHECK-NEXT: frintx h24, h25 +; CHECK-NEXT: fcvtzs x11, h6 +; CHECK-NEXT: frintx h20, h20 +; CHECK-NEXT: frintx h6, h7 +; CHECK-NEXT: fcvtzs x14, h5 +; CHECK-NEXT: mov v19.d[1], x13 +; CHECK-NEXT: frintx h5, h18 +; CHECK-NEXT: fcvtzs x13, h22 +; CHECK-NEXT: mov v0.d[1], x12 +; CHECK-NEXT: fcvtzs x12, h4 +; CHECK-NEXT: mov v23.d[1], x10 +; CHECK-NEXT: fcvtzs x10, h1 +; CHECK-NEXT: fcvtzs x15, h24 +; CHECK-NEXT: mov v16.d[1], x11 +; CHECK-NEXT: fcvtzs x11, h20 +; CHECK-NEXT: mov v17.d[1], x14 +; CHECK-NEXT: fcvtzs x14, h6 +; CHECK-NEXT: mov v2.d[1], x13 +; CHECK-NEXT: fcvtzs x13, h5 +; CHECK-NEXT: fmov d4, x9 +; CHECK-NEXT: stp q0, q19, [x8] +; CHECK-NEXT: fmov d0, x12 +; CHECK-NEXT: stp q16, q23, [x8, #224] +; CHECK-NEXT: fmov d1, x10 +; CHECK-NEXT: mov v3.d[1], x15 +; CHECK-NEXT: stp q2, q17, [x8, #160] +; CHECK-NEXT: mov v0.d[1], x11 +; CHECK-NEXT: mov v4.d[1], x13 +; CHECK-NEXT: mov v1.d[1], x14 +; CHECK-NEXT: stp q0, q3, [x8, #96] +; CHECK-NEXT: stp q4, q1, [x8, #32] +; CHECK-NEXT: ret + %a = call <32 x i64> @llvm.lrint.v32i64.v32f16(<32 x half> %x) + ret <32 x i64> %a +} +declare <32 x i64> @llvm.lrint.v32i64.v32f16(<32 x half>) + +define <1 x i64> @lrint_v1f32(<1 x float> %x) { +; CHECK-LABEL: lrint_v1f32: +; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 +; CHECK-NEXT: frintx s0, s0 +; CHECK-NEXT: fcvtzs x8, s0 +; CHECK-NEXT: fmov d0, x8 +; CHECK-NEXT: ret + %a = call <1 x i64> @llvm.lrint.v1i64.v1f32(<1 x float> %x) + ret <1 x i64> %a +} +declare <1 x i64> @llvm.lrint.v1i64.v1f32(<1 x float>) + +define <2 x i64> @lrint_v2f32(<2 x float> %x) { +; CHECK-LABEL: lrint_v2f32: +; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 +; CHECK-NEXT: mov s1, v0.s[1] +; CHECK-NEXT: frintx s0, s0 +; CHECK-NEXT: frintx s1, s1 +; CHECK-NEXT: fcvtzs x8, s0 +; CHECK-NEXT: fcvtzs x9, s1 +; CHECK-NEXT: fmov d0, x8 +; CHECK-NEXT: mov v0.d[1], x9 +; CHECK-NEXT: ret + %a = call <2 x i64> @llvm.lrint.v2i64.v2f32(<2 x float> %x) + ret <2 x i64> %a +} +declare <2 x i64> @llvm.lrint.v2i64.v2f32(<2 x float>) + +define <4 x i64> @lrint_v4f32(<4 x float> %x) { +; CHECK-LABEL: lrint_v4f32: +; CHECK: // %bb.0: +; CHECK-NEXT: ext v1.16b, v0.16b, v0.16b, #8 +; CHECK-NEXT: mov s3, v0.s[1] +; CHECK-NEXT: frintx s0, s0 +; CHECK-NEXT: mov s2, v1.s[1] +; CHECK-NEXT: frintx s1, s1 +; CHECK-NEXT: frintx s3, s3 +; CHECK-NEXT: fcvtzs x9, s0 +; CHECK-NEXT: frintx s2, s2 +; CHECK-NEXT: fcvtzs x8, s1 +; CHECK-NEXT: fcvtzs x11, s3 +; CHECK-NEXT: fmov d0, x9 +; CHECK-NEXT: fcvtzs x10, s2 +; CHECK-NEXT: fmov d1, x8 +; CHECK-NEXT: mov v0.d[1], x11 +; CHECK-NEXT: mov v1.d[1], x10 +; CHECK-NEXT: ret + %a = call <4 x i64> @llvm.lrint.v4i64.v4f32(<4 x float> %x) + ret <4 x i64> %a +} +declare <4 x i64> @llvm.lrint.v4i64.v4f32(<4 x float>) + +define <8 x i64> @lrint_v8f32(<8 x float> %x) { +; CHECK-LABEL: lrint_v8f32: +; CHECK: // %bb.0: +; CHECK-NEXT: ext v2.16b, v0.16b, v0.16b, #8 +; CHECK-NEXT: ext v3.16b, v1.16b, v1.16b, #8 +; CHECK-NEXT: mov s4, v0.s[1] +; CHECK-NEXT: mov s7, v1.s[1] +; CHECK-NEXT: frintx s0, s0 +; CHECK-NEXT: frintx s1, s1 +; CHECK-NEXT: mov s5, v2.s[1] +; CHECK-NEXT: mov s6, v3.s[1] +; CHECK-NEXT: frintx s2, s2 +; CHECK-NEXT: frintx s3, s3 +; CHECK-NEXT: frintx s4, s4 +; CHECK-NEXT: frintx s7, s7 +; CHECK-NEXT: fcvtzs x9, s0 +; CHECK-NEXT: fcvtzs x12, s1 +; CHECK-NEXT: frintx s5, s5 +; CHECK-NEXT: frintx s6, s6 +; CHECK-NEXT: fcvtzs x8, s2 +; CHECK-NEXT: fcvtzs x10, s3 +; CHECK-NEXT: fcvtzs x11, s4 +; CHECK-NEXT: fcvtzs x15, s7 +; CHECK-NEXT: fmov d0, x9 +; CHECK-NEXT: fmov d2, x12 +; CHECK-NEXT: fcvtzs x13, s5 +; CHECK-NEXT: fcvtzs x14, s6 +; CHECK-NEXT: fmov d1, x8 +; CHECK-NEXT: fmov d3, x10 +; CHECK-NEXT: mov v0.d[1], x11 +; CHECK-NEXT: mov v2.d[1], x15 +; CHECK-NEXT: mov v1.d[1], x13 +; CHECK-NEXT: mov v3.d[1], x14 +; CHECK-NEXT: ret + %a = call <8 x i64> @llvm.lrint.v8i64.v8f32(<8 x float> %x) + ret <8 x i64> %a +} +declare <8 x i64> @llvm.lrint.v8i64.v8f32(<8 x float>) + +define <16 x i64> @lrint_v16i64_v16f32(<16 x float> %x) { +; CHECK-LABEL: lrint_v16i64_v16f32: +; CHECK: // %bb.0: +; CHECK-NEXT: ext v4.16b, v0.16b, v0.16b, #8 +; CHECK-NEXT: ext v5.16b, v1.16b, v1.16b, #8 +; CHECK-NEXT: ext v6.16b, v2.16b, v2.16b, #8 +; CHECK-NEXT: frintx s7, s0 +; CHECK-NEXT: ext v16.16b, v3.16b, v3.16b, #8 +; CHECK-NEXT: mov s0, v0.s[1] +; CHECK-NEXT: frintx s17, s4 +; CHECK-NEXT: mov s4, v4.s[1] +; CHECK-NEXT: mov s18, v5.s[1] +; CHECK-NEXT: frintx s5, s5 +; CHECK-NEXT: frintx s19, s6 +; CHECK-NEXT: fcvtzs x8, s7 +; CHECK-NEXT: frintx s7, s16 +; CHECK-NEXT: mov s6, v6.s[1] +; CHECK-NEXT: mov s16, v16.s[1] +; CHECK-NEXT: frintx s0, s0 +; CHECK-NEXT: frintx s4, s4 +; CHECK-NEXT: fcvtzs x9, s17 +; CHECK-NEXT: frintx s17, s1 +; CHECK-NEXT: mov s1, v1.s[1] +; CHECK-NEXT: frintx s18, s18 +; CHECK-NEXT: fcvtzs x10, s5 +; CHECK-NEXT: mov s5, v2.s[1] +; CHECK-NEXT: fcvtzs x11, s19 +; CHECK-NEXT: mov s19, v3.s[1] +; CHECK-NEXT: frintx s2, s2 +; CHECK-NEXT: fcvtzs x12, s7 +; CHECK-NEXT: frintx s6, s6 +; CHECK-NEXT: fcvtzs x13, s4 +; CHECK-NEXT: frintx s4, s3 +; CHECK-NEXT: frintx s16, s16 +; CHECK-NEXT: fcvtzs x14, s18 +; CHECK-NEXT: frintx s18, s1 +; CHECK-NEXT: fcvtzs x15, s17 +; CHECK-NEXT: frintx s20, s5 +; CHECK-NEXT: frintx s17, s19 +; CHECK-NEXT: fmov d1, x9 +; CHECK-NEXT: fcvtzs x9, s2 +; CHECK-NEXT: fmov d5, x11 +; CHECK-NEXT: fmov d3, x10 +; CHECK-NEXT: fcvtzs x11, s4 +; CHECK-NEXT: fcvtzs x10, s0 +; CHECK-NEXT: fmov d7, x12 +; CHECK-NEXT: fcvtzs x12, s18 +; CHECK-NEXT: fcvtzs x17, s6 +; CHECK-NEXT: fcvtzs x18, s16 +; CHECK-NEXT: fcvtzs x16, s20 +; CHECK-NEXT: fcvtzs x0, s17 +; CHECK-NEXT: fmov d0, x8 +; CHECK-NEXT: fmov d2, x15 +; CHECK-NEXT: fmov d4, x9 +; CHECK-NEXT: mov v1.d[1], x13 +; CHECK-NEXT: fmov d6, x11 +; CHECK-NEXT: mov v3.d[1], x14 +; CHECK-NEXT: mov v0.d[1], x10 +; CHECK-NEXT: mov v5.d[1], x17 +; CHECK-NEXT: mov v7.d[1], x18 +; CHECK-NEXT: mov v2.d[1], x12 +; CHECK-NEXT: mov v4.d[1], x16 +; CHECK-NEXT: mov v6.d[1], x0 +; CHECK-NEXT: ret + %a = call <16 x i64> @llvm.lrint.v16i64.v16f32(<16 x float> %x) + ret <16 x i64> %a +} +declare <16 x i64> @llvm.lrint.v16i64.v16f32(<16 x float>) + +define <32 x i64> @lrint_v32i64_v32f32(<32 x float> %x) { +; CHECK-LABEL: lrint_v32i64_v32f32: +; CHECK: // %bb.0: +; CHECK-NEXT: ext v16.16b, v2.16b, v2.16b, #8 +; CHECK-NEXT: ext v20.16b, v5.16b, v5.16b, #8 +; CHECK-NEXT: ext v17.16b, v3.16b, v3.16b, #8 +; CHECK-NEXT: ext v18.16b, v0.16b, v0.16b, #8 +; CHECK-NEXT: ext v19.16b, v4.16b, v4.16b, #8 +; CHECK-NEXT: ext v21.16b, v6.16b, v6.16b, #8 +; CHECK-NEXT: ext v22.16b, v7.16b, v7.16b, #8 +; CHECK-NEXT: frintx s24, s16 +; CHECK-NEXT: mov s28, v20.s[1] +; CHECK-NEXT: frintx s25, s17 +; CHECK-NEXT: frintx s26, s18 +; CHECK-NEXT: frintx s27, s19 +; CHECK-NEXT: frintx s29, s20 +; CHECK-NEXT: mov s30, v21.s[1] +; CHECK-NEXT: frintx s20, s21 +; CHECK-NEXT: frintx s21, s22 +; CHECK-NEXT: mov s23, v22.s[1] +; CHECK-NEXT: mov s19, v19.s[1] +; CHECK-NEXT: mov s17, v17.s[1] +; CHECK-NEXT: fcvtzs x12, s24 +; CHECK-NEXT: frintx s24, s28 +; CHECK-NEXT: fcvtzs x13, s25 +; CHECK-NEXT: mov s25, v7.s[1] +; CHECK-NEXT: fcvtzs x9, s26 +; CHECK-NEXT: fcvtzs x11, s27 +; CHECK-NEXT: fcvtzs x14, s20 +; CHECK-NEXT: fcvtzs x15, s21 +; CHECK-NEXT: frintx s26, s1 +; CHECK-NEXT: frintx s23, s23 +; CHECK-NEXT: frintx s27, s7 +; CHECK-NEXT: frintx s22, s30 +; CHECK-NEXT: fmov d20, x12 +; CHECK-NEXT: fcvtzs x12, s24 +; CHECK-NEXT: mov s24, v6.s[1] +; CHECK-NEXT: frintx s25, s25 +; CHECK-NEXT: frintx s6, s6 +; CHECK-NEXT: fcvtzs x10, s29 +; CHECK-NEXT: fmov d7, x11 +; CHECK-NEXT: fmov d21, x13 +; CHECK-NEXT: frintx s28, s5 +; CHECK-NEXT: fcvtzs x11, s23 +; CHECK-NEXT: fmov d23, x14 +; CHECK-NEXT: fcvtzs x14, s26 +; CHECK-NEXT: fmov d26, x15 +; CHECK-NEXT: fcvtzs x15, s27 +; CHECK-NEXT: frintx s24, s24 +; CHECK-NEXT: mov s27, v5.s[1] +; CHECK-NEXT: fcvtzs x13, s22 +; CHECK-NEXT: fcvtzs x17, s25 +; CHECK-NEXT: frintx s25, s4 +; CHECK-NEXT: fcvtzs x18, s6 +; CHECK-NEXT: fmov d6, x10 +; CHECK-NEXT: frintx s22, s2 +; CHECK-NEXT: mov v26.d[1], x11 +; CHECK-NEXT: fmov d5, x14 +; CHECK-NEXT: fcvtzs x10, s24 +; CHECK-NEXT: fmov d24, x15 +; CHECK-NEXT: fcvtzs x14, s28 +; CHECK-NEXT: frintx s27, s27 +; CHECK-NEXT: mov v23.d[1], x13 +; CHECK-NEXT: mov s4, v4.s[1] +; CHECK-NEXT: fcvtzs x13, s25 +; CHECK-NEXT: fmov d25, x18 +; CHECK-NEXT: mov s16, v16.s[1] +; CHECK-NEXT: mov v24.d[1], x17 +; CHECK-NEXT: fcvtzs x16, s22 +; CHECK-NEXT: frintx s22, s3 +; CHECK-NEXT: mov s3, v3.s[1] +; CHECK-NEXT: frintx s19, s19 +; CHECK-NEXT: mov s2, v2.s[1] +; CHECK-NEXT: mov v25.d[1], x10 +; CHECK-NEXT: fcvtzs x10, s27 +; CHECK-NEXT: frintx s4, s4 +; CHECK-NEXT: mov v6.d[1], x12 +; CHECK-NEXT: frintx s17, s17 +; CHECK-NEXT: mov s18, v18.s[1] +; CHECK-NEXT: stp q24, q26, [x8, #224] +; CHECK-NEXT: fmov d24, x14 +; CHECK-NEXT: fcvtzs x11, s22 +; CHECK-NEXT: ext v22.16b, v1.16b, v1.16b, #8 +; CHECK-NEXT: mov s1, v1.s[1] +; CHECK-NEXT: frintx s3, s3 +; CHECK-NEXT: stp q25, q23, [x8, #192] +; CHECK-NEXT: frintx s2, s2 +; CHECK-NEXT: fcvtzs x12, s4 +; CHECK-NEXT: mov v24.d[1], x10 +; CHECK-NEXT: fcvtzs x10, s19 +; CHECK-NEXT: mov s19, v0.s[1] +; CHECK-NEXT: frintx s16, s16 +; CHECK-NEXT: frintx s0, s0 +; CHECK-NEXT: fmov d4, x11 +; CHECK-NEXT: mov s27, v22.s[1] +; CHECK-NEXT: frintx s22, s22 +; CHECK-NEXT: frintx s1, s1 +; CHECK-NEXT: fcvtzs x11, s3 +; CHECK-NEXT: fcvtzs x14, s2 +; CHECK-NEXT: frintx s2, s18 +; CHECK-NEXT: stp q24, q6, [x8, #160] +; CHECK-NEXT: fmov d6, x13 +; CHECK-NEXT: fcvtzs x13, s17 +; CHECK-NEXT: frintx s17, s19 +; CHECK-NEXT: fmov d23, x16 +; CHECK-NEXT: mov v7.d[1], x10 +; CHECK-NEXT: frintx s3, s27 +; CHECK-NEXT: fcvtzs x10, s22 +; CHECK-NEXT: fcvtzs x15, s1 +; CHECK-NEXT: mov v6.d[1], x12 +; CHECK-NEXT: fcvtzs x12, s16 +; CHECK-NEXT: mov v4.d[1], x11 +; CHECK-NEXT: mov v21.d[1], x13 +; CHECK-NEXT: fcvtzs x13, s0 +; CHECK-NEXT: mov v23.d[1], x14 +; CHECK-NEXT: fcvtzs x14, s17 +; CHECK-NEXT: fcvtzs x11, s3 +; CHECK-NEXT: fmov d0, x10 +; CHECK-NEXT: mov v5.d[1], x15 +; CHECK-NEXT: stp q6, q7, [x8, #128] +; CHECK-NEXT: mov v20.d[1], x12 +; CHECK-NEXT: fcvtzs x12, s2 +; CHECK-NEXT: stp q4, q21, [x8, #96] +; CHECK-NEXT: fmov d1, x13 +; CHECK-NEXT: fmov d2, x9 +; CHECK-NEXT: mov v0.d[1], x11 +; CHECK-NEXT: stp q23, q20, [x8, #64] +; CHECK-NEXT: mov v1.d[1], x14 +; CHECK-NEXT: mov v2.d[1], x12 +; CHECK-NEXT: stp q5, q0, [x8, #32] +; CHECK-NEXT: stp q1, q2, [x8] +; CHECK-NEXT: ret + %a = call <32 x i64> @llvm.lrint.v32i64.v32f32(<32 x float> %x) + ret <32 x i64> %a +} +declare <32 x i64> @llvm.lrint.v32i64.v32f32(<32 x float>) + +define <1 x i64> @lrint_v1f64(<1 x double> %x) { +; CHECK-LABEL: lrint_v1f64: +; CHECK: // %bb.0: +; CHECK-NEXT: frintx d0, d0 +; CHECK-NEXT: fcvtzs x8, d0 +; CHECK-NEXT: fmov d0, x8 +; CHECK-NEXT: ret + %a = call <1 x i64> @llvm.lrint.v1i64.v1f64(<1 x double> %x) + ret <1 x i64> %a +} +declare <1 x i64> @llvm.lrint.v1i64.v1f64(<1 x double>) + +define <2 x i64> @lrint_v2f64(<2 x double> %x) { +; CHECK-LABEL: lrint_v2f64: +; CHECK: // %bb.0: +; CHECK-NEXT: mov d1, v0.d[1] +; CHECK-NEXT: frintx d0, d0 +; CHECK-NEXT: frintx d1, d1 +; CHECK-NEXT: fcvtzs x8, d0 +; CHECK-NEXT: fcvtzs x9, d1 +; CHECK-NEXT: fmov d0, x8 +; CHECK-NEXT: mov v0.d[1], x9 +; CHECK-NEXT: ret + %a = call <2 x i64> @llvm.lrint.v2i64.v2f64(<2 x double> %x) + ret <2 x i64> %a +} +declare <2 x i64> @llvm.lrint.v2i64.v2f64(<2 x double>) + +define <4 x i64> @lrint_v4f64(<4 x double> %x) { +; CHECK-LABEL: lrint_v4f64: +; CHECK: // %bb.0: +; CHECK-NEXT: mov d2, v0.d[1] +; CHECK-NEXT: mov d3, v1.d[1] +; CHECK-NEXT: frintx d0, d0 +; CHECK-NEXT: frintx d1, d1 +; CHECK-NEXT: frintx d2, d2 +; CHECK-NEXT: frintx d3, d3 +; CHECK-NEXT: fcvtzs x8, d0 +; CHECK-NEXT: fcvtzs x9, d1 +; CHECK-NEXT: fcvtzs x10, d2 +; CHECK-NEXT: fcvtzs x11, d3 +; CHECK-NEXT: fmov d0, x8 +; CHECK-NEXT: fmov d1, x9 +; CHECK-NEXT: mov v0.d[1], x10 +; CHECK-NEXT: mov v1.d[1], x11 +; CHECK-NEXT: ret + %a = call <4 x i64> @llvm.lrint.v4i64.v4f64(<4 x double> %x) + ret <4 x i64> %a +} +declare <4 x i64> @llvm.lrint.v4i64.v4f64(<4 x double>) + +define <8 x i64> @lrint_v8f64(<8 x double> %x) { +; CHECK-LABEL: lrint_v8f64: +; CHECK: // %bb.0: +; CHECK-NEXT: mov d4, v0.d[1] +; CHECK-NEXT: mov d5, v1.d[1] +; CHECK-NEXT: mov d6, v2.d[1] +; CHECK-NEXT: mov d7, v3.d[1] +; CHECK-NEXT: frintx d0, d0 +; CHECK-NEXT: frintx d1, d1 +; CHECK-NEXT: frintx d2, d2 +; CHECK-NEXT: frintx d3, d3 +; CHECK-NEXT: frintx d4, d4 +; CHECK-NEXT: frintx d5, d5 +; CHECK-NEXT: frintx d6, d6 +; CHECK-NEXT: frintx d7, d7 +; CHECK-NEXT: fcvtzs x8, d0 +; CHECK-NEXT: fcvtzs x9, d1 +; CHECK-NEXT: fcvtzs x10, d2 +; CHECK-NEXT: fcvtzs x11, d3 +; CHECK-NEXT: fcvtzs x12, d4 +; CHECK-NEXT: fcvtzs x13, d5 +; CHECK-NEXT: fcvtzs x14, d6 +; CHECK-NEXT: fcvtzs x15, d7 +; CHECK-NEXT: fmov d0, x8 +; CHECK-NEXT: fmov d1, x9 +; CHECK-NEXT: fmov d2, x10 +; CHECK-NEXT: fmov d3, x11 +; CHECK-NEXT: mov v0.d[1], x12 +; CHECK-NEXT: mov v1.d[1], x13 +; CHECK-NEXT: mov v2.d[1], x14 +; CHECK-NEXT: mov v3.d[1], x15 +; CHECK-NEXT: ret + %a = call <8 x i64> @llvm.lrint.v8i64.v8f64(<8 x double> %x) + ret <8 x i64> %a +} +declare <8 x i64> @llvm.lrint.v8i64.v8f64(<8 x double>) + +define <16 x i64> @lrint_v16f64(<16 x double> %x) { +; CHECK-LABEL: lrint_v16f64: +; CHECK: // %bb.0: +; CHECK-NEXT: mov d16, v0.d[1] +; CHECK-NEXT: mov d17, v1.d[1] +; CHECK-NEXT: frintx d0, d0 +; CHECK-NEXT: frintx d1, d1 +; CHECK-NEXT: frintx d18, d2 +; CHECK-NEXT: mov d2, v2.d[1] +; CHECK-NEXT: frintx d19, d3 +; CHECK-NEXT: mov d3, v3.d[1] +; CHECK-NEXT: frintx d16, d16 +; CHECK-NEXT: frintx d17, d17 +; CHECK-NEXT: fcvtzs x8, d0 +; CHECK-NEXT: frintx d0, d4 +; CHECK-NEXT: mov d4, v4.d[1] +; CHECK-NEXT: fcvtzs x9, d1 +; CHECK-NEXT: frintx d1, d5 +; CHECK-NEXT: mov d5, v5.d[1] +; CHECK-NEXT: fcvtzs x12, d18 +; CHECK-NEXT: frintx d2, d2 +; CHECK-NEXT: fcvtzs x13, d19 +; CHECK-NEXT: frintx d18, d3 +; CHECK-NEXT: fcvtzs x10, d16 +; CHECK-NEXT: mov d16, v6.d[1] +; CHECK-NEXT: fcvtzs x11, d17 +; CHECK-NEXT: mov d17, v7.d[1] +; CHECK-NEXT: frintx d6, d6 +; CHECK-NEXT: frintx d7, d7 +; CHECK-NEXT: frintx d4, d4 +; CHECK-NEXT: frintx d5, d5 +; CHECK-NEXT: fcvtzs x14, d0 +; CHECK-NEXT: fcvtzs x15, d1 +; CHECK-NEXT: fmov d0, x8 +; CHECK-NEXT: fmov d1, x9 +; CHECK-NEXT: frintx d16, d16 +; CHECK-NEXT: fcvtzs x9, d2 +; CHECK-NEXT: fmov d2, x12 +; CHECK-NEXT: frintx d17, d17 +; CHECK-NEXT: fcvtzs x8, d6 +; CHECK-NEXT: fcvtzs x12, d7 +; CHECK-NEXT: fmov d3, x13 +; CHECK-NEXT: fcvtzs x13, d18 +; CHECK-NEXT: fcvtzs x16, d4 +; CHECK-NEXT: fcvtzs x17, d5 +; CHECK-NEXT: fmov d4, x14 +; CHECK-NEXT: fmov d5, x15 +; CHECK-NEXT: fcvtzs x18, d16 +; CHECK-NEXT: mov v0.d[1], x10 +; CHECK-NEXT: mov v1.d[1], x11 +; CHECK-NEXT: fcvtzs x0, d17 +; CHECK-NEXT: fmov d6, x8 +; CHECK-NEXT: fmov d7, x12 +; CHECK-NEXT: mov v2.d[1], x9 +; CHECK-NEXT: mov v3.d[1], x13 +; CHECK-NEXT: mov v4.d[1], x16 +; CHECK-NEXT: mov v5.d[1], x17 +; CHECK-NEXT: mov v6.d[1], x18 +; CHECK-NEXT: mov v7.d[1], x0 +; CHECK-NEXT: ret + %a = call <16 x i64> @llvm.lrint.v16i64.v16f64(<16 x double> %x) + ret <16 x i64> %a +} +declare <16 x i64> @llvm.lrint.v16i64.v16f64(<16 x double>) + +define <32 x i64> @lrint_v32f64(<32 x double> %x) { +; CHECK-LABEL: lrint_v32f64: +; CHECK: // %bb.0: +; CHECK-NEXT: frintx d20, d0 +; CHECK-NEXT: frintx d22, d3 +; CHECK-NEXT: frintx d21, d4 +; CHECK-NEXT: ldp q19, q18, [sp, #64] +; CHECK-NEXT: frintx d23, d5 +; CHECK-NEXT: ldp q27, q26, [sp, #96] +; CHECK-NEXT: mov d4, v4.d[1] +; CHECK-NEXT: ldp q16, q17, [sp, #32] +; CHECK-NEXT: mov d5, v5.d[1] +; CHECK-NEXT: fcvtzs x9, d20 +; CHECK-NEXT: frintx d20, d6 +; CHECK-NEXT: fcvtzs x11, d22 +; CHECK-NEXT: frintx d22, d19 +; CHECK-NEXT: fcvtzs x12, d21 +; CHECK-NEXT: fcvtzs x10, d23 +; CHECK-NEXT: mov d21, v26.d[1] +; CHECK-NEXT: frintx d23, d27 +; CHECK-NEXT: mov d27, v27.d[1] +; CHECK-NEXT: frintx d24, d16 +; CHECK-NEXT: mov d19, v19.d[1] +; CHECK-NEXT: frintx d25, d17 +; CHECK-NEXT: fcvtzs x13, d20 +; CHECK-NEXT: mov d20, v18.d[1] +; CHECK-NEXT: frintx d18, d18 +; CHECK-NEXT: fcvtzs x16, d22 +; CHECK-NEXT: frintx d22, d26 +; CHECK-NEXT: mov d16, v16.d[1] +; CHECK-NEXT: frintx d21, d21 +; CHECK-NEXT: fcvtzs x17, d23 +; CHECK-NEXT: frintx d23, d27 +; CHECK-NEXT: fcvtzs x14, d24 +; CHECK-NEXT: frintx d26, d19 +; CHECK-NEXT: fmov d19, x11 +; CHECK-NEXT: frintx d20, d20 +; CHECK-NEXT: mov d27, v17.d[1] +; CHECK-NEXT: fcvtzs x15, d25 +; CHECK-NEXT: ldp q25, q24, [sp] +; CHECK-NEXT: fcvtzs x11, d22 +; CHECK-NEXT: fmov d17, x12 +; CHECK-NEXT: fcvtzs x12, d21 +; CHECK-NEXT: fcvtzs x0, d23 +; CHECK-NEXT: fmov d23, x14 +; CHECK-NEXT: fcvtzs x14, d18 +; CHECK-NEXT: fmov d18, x17 +; CHECK-NEXT: fcvtzs x17, d20 +; CHECK-NEXT: frintx d21, d7 +; CHECK-NEXT: fcvtzs x18, d26 +; CHECK-NEXT: fmov d20, x11 +; CHECK-NEXT: frintx d22, d25 +; CHECK-NEXT: frintx d26, d27 +; CHECK-NEXT: frintx d16, d16 +; CHECK-NEXT: mov v18.d[1], x0 +; CHECK-NEXT: mov d25, v25.d[1] +; CHECK-NEXT: mov d7, v7.d[1] +; CHECK-NEXT: mov d6, v6.d[1] +; CHECK-NEXT: mov d0, v0.d[1] +; CHECK-NEXT: mov v20.d[1], x12 +; CHECK-NEXT: fcvtzs x11, d21 +; CHECK-NEXT: fmov d21, x15 +; CHECK-NEXT: fcvtzs x12, d22 +; CHECK-NEXT: fmov d22, x16 +; CHECK-NEXT: fcvtzs x15, d26 +; CHECK-NEXT: fmov d26, x14 +; CHECK-NEXT: fcvtzs x14, d16 +; CHECK-NEXT: frintx d25, d25 +; CHECK-NEXT: frintx d7, d7 +; CHECK-NEXT: mov d16, v1.d[1] +; CHECK-NEXT: mov d3, v3.d[1] +; CHECK-NEXT: stp q18, q20, [x8, #224] +; CHECK-NEXT: mov d18, v24.d[1] +; CHECK-NEXT: mov v22.d[1], x18 +; CHECK-NEXT: mov v26.d[1], x17 +; CHECK-NEXT: frintx d24, d24 +; CHECK-NEXT: mov v21.d[1], x15 +; CHECK-NEXT: mov v23.d[1], x14 +; CHECK-NEXT: frintx d20, d2 +; CHECK-NEXT: mov d2, v2.d[1] +; CHECK-NEXT: frintx d6, d6 +; CHECK-NEXT: frintx d5, d5 +; CHECK-NEXT: frintx d4, d4 +; CHECK-NEXT: frintx d18, d18 +; CHECK-NEXT: frintx d1, d1 +; CHECK-NEXT: frintx d3, d3 +; CHECK-NEXT: stp q22, q26, [x8, #192] +; CHECK-NEXT: fmov d22, x10 +; CHECK-NEXT: fcvtzs x10, d24 +; CHECK-NEXT: stp q23, q21, [x8, #160] +; CHECK-NEXT: fmov d21, x11 +; CHECK-NEXT: fmov d24, x13 +; CHECK-NEXT: frintx d2, d2 +; CHECK-NEXT: fcvtzs x13, d6 +; CHECK-NEXT: frintx d6, d16 +; CHECK-NEXT: fcvtzs x11, d18 +; CHECK-NEXT: fmov d18, x12 +; CHECK-NEXT: fcvtzs x12, d25 +; CHECK-NEXT: fmov d23, x10 +; CHECK-NEXT: fcvtzs x10, d7 +; CHECK-NEXT: fcvtzs x14, d5 +; CHECK-NEXT: frintx d0, d0 +; CHECK-NEXT: fcvtzs x15, d3 +; CHECK-NEXT: mov v24.d[1], x13 +; CHECK-NEXT: fcvtzs x13, d2 +; CHECK-NEXT: fmov d2, x9 +; CHECK-NEXT: mov v23.d[1], x11 +; CHECK-NEXT: fcvtzs x11, d4 +; CHECK-NEXT: mov v18.d[1], x12 +; CHECK-NEXT: fcvtzs x12, d20 +; CHECK-NEXT: mov v21.d[1], x10 +; CHECK-NEXT: fcvtzs x10, d1 +; CHECK-NEXT: mov v22.d[1], x14 +; CHECK-NEXT: fcvtzs x14, d6 +; CHECK-NEXT: mov v19.d[1], x15 +; CHECK-NEXT: stp q18, q23, [x8, #128] +; CHECK-NEXT: mov v17.d[1], x11 +; CHECK-NEXT: fcvtzs x11, d0 +; CHECK-NEXT: stp q24, q21, [x8, #96] +; CHECK-NEXT: fmov d0, x12 +; CHECK-NEXT: fmov d1, x10 +; CHECK-NEXT: stp q17, q22, [x8, #64] +; CHECK-NEXT: mov v0.d[1], x13 +; CHECK-NEXT: mov v1.d[1], x14 +; CHECK-NEXT: mov v2.d[1], x11 +; CHECK-NEXT: stp q0, q19, [x8, #32] +; CHECK-NEXT: stp q2, q1, [x8] +; CHECK-NEXT: ret + %a = call <32 x i64> @llvm.lrint.v32i64.v16f64(<32 x double> %x) + ret <32 x i64> %a +} +declare <32 x i64> @llvm.lrint.v32i64.v32f64(<32 x double>) diff --git a/llvm/test/CodeGen/AArch64/sve-llrint.ll b/llvm/test/CodeGen/AArch64/sve-llrint.ll new file mode 100644 index 0000000000000..11d45b3a43521 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/sve-llrint.ll @@ -0,0 +1,492 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=aarch64 -mattr=+sve | FileCheck %s + +define @llrint_v1i64_v1f16( %x) { +; CHECK-LABEL: llrint_v1i64_v1f16: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: fcvtzs z0.d, p0/m, z0.h +; CHECK-NEXT: ret + %a = call @llvm.llrint.nxv1i64.nxv1f16( %x) + ret %a +} +declare @llvm.llrint.nxv1i64.nxv1f16() + +define @llrint_v1i64_v2f16( %x) { +; CHECK-LABEL: llrint_v1i64_v2f16: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: fcvtzs z0.d, p0/m, z0.h +; CHECK-NEXT: ret + %a = call @llvm.llrint.nxv2i64.nxv2f16( %x) + ret %a +} +declare @llvm.llrint.nxv2i64.nxv2f16() + +define @llrint_v4i64_v4f16( %x) { +; CHECK-LABEL: llrint_v4i64_v4f16: +; CHECK: // %bb.0: +; CHECK-NEXT: uunpklo z1.d, z0.s +; CHECK-NEXT: uunpkhi z2.d, z0.s +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: movprfx z0, z1 +; CHECK-NEXT: fcvtzs z0.d, p0/m, z1.h +; CHECK-NEXT: movprfx z1, z2 +; CHECK-NEXT: fcvtzs z1.d, p0/m, z2.h +; CHECK-NEXT: ret + %a = call @llvm.llrint.nxv4i64.nxv4f16( %x) + ret %a +} +declare @llvm.llrint.nxv4i64.nxv4f16() + +define @llrint_v8i64_v8f16( %x) { +; CHECK-LABEL: llrint_v8i64_v8f16: +; CHECK: // %bb.0: +; CHECK-NEXT: uunpklo z1.s, z0.h +; CHECK-NEXT: uunpkhi z0.s, z0.h +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: uunpklo z2.d, z1.s +; CHECK-NEXT: uunpkhi z1.d, z1.s +; CHECK-NEXT: uunpklo z3.d, z0.s +; CHECK-NEXT: uunpkhi z4.d, z0.s +; CHECK-NEXT: fcvtzs z1.d, p0/m, z1.h +; CHECK-NEXT: movprfx z0, z2 +; CHECK-NEXT: fcvtzs z0.d, p0/m, z2.h +; CHECK-NEXT: movprfx z2, z3 +; CHECK-NEXT: fcvtzs z2.d, p0/m, z3.h +; CHECK-NEXT: movprfx z3, z4 +; CHECK-NEXT: fcvtzs z3.d, p0/m, z4.h +; CHECK-NEXT: ret + %a = call @llvm.llrint.nxv8i64.nxv8f16( %x) + ret %a +} +declare @llvm.llrint.nxv8i64.nxv8f16() + +define @llrint_v16i64_v16f16( %x) { +; CHECK-LABEL: llrint_v16i64_v16f16: +; CHECK: // %bb.0: +; CHECK-NEXT: uunpklo z2.s, z0.h +; CHECK-NEXT: uunpkhi z0.s, z0.h +; CHECK-NEXT: uunpklo z3.s, z1.h +; CHECK-NEXT: uunpkhi z1.s, z1.h +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: uunpklo z4.d, z2.s +; CHECK-NEXT: uunpkhi z2.d, z2.s +; CHECK-NEXT: uunpklo z5.d, z0.s +; CHECK-NEXT: uunpkhi z6.d, z0.s +; CHECK-NEXT: uunpklo z7.d, z3.s +; CHECK-NEXT: uunpkhi z24.d, z3.s +; CHECK-NEXT: uunpklo z25.d, z1.s +; CHECK-NEXT: uunpkhi z26.d, z1.s +; CHECK-NEXT: movprfx z0, z4 +; CHECK-NEXT: fcvtzs z0.d, p0/m, z4.h +; CHECK-NEXT: movprfx z1, z2 +; CHECK-NEXT: fcvtzs z1.d, p0/m, z2.h +; CHECK-NEXT: movprfx z2, z5 +; CHECK-NEXT: fcvtzs z2.d, p0/m, z5.h +; CHECK-NEXT: movprfx z3, z6 +; CHECK-NEXT: fcvtzs z3.d, p0/m, z6.h +; CHECK-NEXT: movprfx z4, z7 +; CHECK-NEXT: fcvtzs z4.d, p0/m, z7.h +; CHECK-NEXT: movprfx z5, z24 +; CHECK-NEXT: fcvtzs z5.d, p0/m, z24.h +; CHECK-NEXT: movprfx z6, z25 +; CHECK-NEXT: fcvtzs z6.d, p0/m, z25.h +; CHECK-NEXT: movprfx z7, z26 +; CHECK-NEXT: fcvtzs z7.d, p0/m, z26.h +; CHECK-NEXT: ret + %a = call @llvm.llrint.nxv16i64.nxv16f16( %x) + ret %a +} +declare @llvm.llrint.nxv16i64.nxv16f16() + +define @llrint_v32i64_v32f16( %x) { +; CHECK-LABEL: llrint_v32i64_v32f16: +; CHECK: // %bb.0: +; CHECK-NEXT: uunpkhi z4.s, z3.h +; CHECK-NEXT: uunpklo z3.s, z3.h +; CHECK-NEXT: rdvl x9, #15 +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: uunpkhi z7.s, z2.h +; CHECK-NEXT: uunpklo z2.s, z2.h +; CHECK-NEXT: ptrue p1.b +; CHECK-NEXT: uunpklo z24.s, z0.h +; CHECK-NEXT: uunpkhi z0.s, z0.h +; CHECK-NEXT: uunpkhi z5.d, z4.s +; CHECK-NEXT: uunpklo z4.d, z4.s +; CHECK-NEXT: uunpkhi z6.d, z3.s +; CHECK-NEXT: uunpklo z3.d, z3.s +; CHECK-NEXT: uunpkhi z25.d, z2.s +; CHECK-NEXT: uunpklo z2.d, z2.s +; CHECK-NEXT: uunpklo z26.d, z0.s +; CHECK-NEXT: uunpkhi z0.d, z0.s +; CHECK-NEXT: fcvtzs z5.d, p0/m, z5.h +; CHECK-NEXT: fcvtzs z4.d, p0/m, z4.h +; CHECK-NEXT: fcvtzs z3.d, p0/m, z3.h +; CHECK-NEXT: fcvtzs z25.d, p0/m, z25.h +; CHECK-NEXT: fcvtzs z2.d, p0/m, z2.h +; CHECK-NEXT: fcvtzs z0.d, p0/m, z0.h +; CHECK-NEXT: st1b { z5.b }, p1, [x8, x9] +; CHECK-NEXT: rdvl x9, #14 +; CHECK-NEXT: movprfx z5, z6 +; CHECK-NEXT: fcvtzs z5.d, p0/m, z6.h +; CHECK-NEXT: uunpkhi z6.d, z7.s +; CHECK-NEXT: st1b { z4.b }, p1, [x8, x9] +; CHECK-NEXT: uunpkhi z4.s, z1.h +; CHECK-NEXT: uunpklo z7.d, z7.s +; CHECK-NEXT: rdvl x9, #13 +; CHECK-NEXT: uunpklo z1.s, z1.h +; CHECK-NEXT: st1b { z5.b }, p1, [x8, x9] +; CHECK-NEXT: rdvl x9, #12 +; CHECK-NEXT: movprfx z5, z6 +; CHECK-NEXT: fcvtzs z5.d, p0/m, z6.h +; CHECK-NEXT: st1b { z3.b }, p1, [x8, x9] +; CHECK-NEXT: uunpkhi z3.d, z4.s +; CHECK-NEXT: uunpklo z4.d, z4.s +; CHECK-NEXT: fcvtzs z7.d, p0/m, z7.h +; CHECK-NEXT: rdvl x9, #11 +; CHECK-NEXT: uunpkhi z6.d, z24.s +; CHECK-NEXT: uunpkhi z27.d, z1.s +; CHECK-NEXT: uunpklo z1.d, z1.s +; CHECK-NEXT: st1b { z5.b }, p1, [x8, x9] +; CHECK-NEXT: rdvl x9, #10 +; CHECK-NEXT: fcvtzs z3.d, p0/m, z3.h +; CHECK-NEXT: st1b { z7.b }, p1, [x8, x9] +; CHECK-NEXT: fcvtzs z4.d, p0/m, z4.h +; CHECK-NEXT: uunpklo z7.d, z24.s +; CHECK-NEXT: rdvl x9, #9 +; CHECK-NEXT: movprfx z5, z27 +; CHECK-NEXT: fcvtzs z5.d, p0/m, z27.h +; CHECK-NEXT: fcvtzs z1.d, p0/m, z1.h +; CHECK-NEXT: st1b { z25.b }, p1, [x8, x9] +; CHECK-NEXT: rdvl x9, #8 +; CHECK-NEXT: st1b { z2.b }, p1, [x8, x9] +; CHECK-NEXT: movprfx z2, z26 +; CHECK-NEXT: fcvtzs z2.d, p0/m, z26.h +; CHECK-NEXT: st1d { z3.d }, p0, [x8, #7, mul vl] +; CHECK-NEXT: movprfx z3, z6 +; CHECK-NEXT: fcvtzs z3.d, p0/m, z6.h +; CHECK-NEXT: st1d { z4.d }, p0, [x8, #6, mul vl] +; CHECK-NEXT: movprfx z4, z7 +; CHECK-NEXT: fcvtzs z4.d, p0/m, z7.h +; CHECK-NEXT: st1d { z5.d }, p0, [x8, #5, mul vl] +; CHECK-NEXT: st1d { z1.d }, p0, [x8, #4, mul vl] +; CHECK-NEXT: st1d { z0.d }, p0, [x8, #3, mul vl] +; CHECK-NEXT: st1d { z2.d }, p0, [x8, #2, mul vl] +; CHECK-NEXT: st1d { z3.d }, p0, [x8, #1, mul vl] +; CHECK-NEXT: st1d { z4.d }, p0, [x8] +; CHECK-NEXT: ret + %a = call @llvm.llrint.nxv32i64.nxv32f16( %x) + ret %a +} +declare @llvm.llrint.nxv32i64.nxv32f16() + +define @llrint_v1i64_v1f32( %x) { +; CHECK-LABEL: llrint_v1i64_v1f32: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: fcvtzs z0.d, p0/m, z0.s +; CHECK-NEXT: ret + %a = call @llvm.llrint.nxv1i64.nxv1f32( %x) + ret %a +} +declare @llvm.llrint.nxv1i64.nxv1f32() + +define @llrint_v2i64_v2f32( %x) { +; CHECK-LABEL: llrint_v2i64_v2f32: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: fcvtzs z0.d, p0/m, z0.s +; CHECK-NEXT: ret + %a = call @llvm.llrint.nxv2i64.nxv2f32( %x) + ret %a +} +declare @llvm.llrint.nxv2i64.nxv2f32() + +define @llrint_v4i64_v4f32( %x) { +; CHECK-LABEL: llrint_v4i64_v4f32: +; CHECK: // %bb.0: +; CHECK-NEXT: uunpklo z1.d, z0.s +; CHECK-NEXT: uunpkhi z2.d, z0.s +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: movprfx z0, z1 +; CHECK-NEXT: fcvtzs z0.d, p0/m, z1.s +; CHECK-NEXT: movprfx z1, z2 +; CHECK-NEXT: fcvtzs z1.d, p0/m, z2.s +; CHECK-NEXT: ret + %a = call @llvm.llrint.nxv4i64.nxv4f32( %x) + ret %a +} +declare @llvm.llrint.nxv4i64.nxv4f32() + +define @llrint_v8i64_v8f32( %x) { +; CHECK-LABEL: llrint_v8i64_v8f32: +; CHECK: // %bb.0: +; CHECK-NEXT: uunpklo z2.d, z0.s +; CHECK-NEXT: uunpkhi z3.d, z0.s +; CHECK-NEXT: uunpklo z4.d, z1.s +; CHECK-NEXT: uunpkhi z5.d, z1.s +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: movprfx z0, z2 +; CHECK-NEXT: fcvtzs z0.d, p0/m, z2.s +; CHECK-NEXT: movprfx z1, z3 +; CHECK-NEXT: fcvtzs z1.d, p0/m, z3.s +; CHECK-NEXT: movprfx z2, z4 +; CHECK-NEXT: fcvtzs z2.d, p0/m, z4.s +; CHECK-NEXT: movprfx z3, z5 +; CHECK-NEXT: fcvtzs z3.d, p0/m, z5.s +; CHECK-NEXT: ret + %a = call @llvm.llrint.nxv8i64.nxv8f32( %x) + ret %a +} +declare @llvm.llrint.nxv8i64.nxv8f32() + +define @llrint_v16i64_v16f32( %x) { +; CHECK-LABEL: llrint_v16i64_v16f32: +; CHECK: // %bb.0: +; CHECK-NEXT: uunpklo z4.d, z0.s +; CHECK-NEXT: uunpkhi z5.d, z0.s +; CHECK-NEXT: uunpklo z6.d, z1.s +; CHECK-NEXT: uunpkhi z7.d, z1.s +; CHECK-NEXT: uunpklo z24.d, z2.s +; CHECK-NEXT: uunpkhi z25.d, z2.s +; CHECK-NEXT: uunpklo z26.d, z3.s +; CHECK-NEXT: uunpkhi z27.d, z3.s +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: movprfx z0, z4 +; CHECK-NEXT: fcvtzs z0.d, p0/m, z4.s +; CHECK-NEXT: movprfx z1, z5 +; CHECK-NEXT: fcvtzs z1.d, p0/m, z5.s +; CHECK-NEXT: movprfx z2, z6 +; CHECK-NEXT: fcvtzs z2.d, p0/m, z6.s +; CHECK-NEXT: movprfx z3, z7 +; CHECK-NEXT: fcvtzs z3.d, p0/m, z7.s +; CHECK-NEXT: movprfx z4, z24 +; CHECK-NEXT: fcvtzs z4.d, p0/m, z24.s +; CHECK-NEXT: movprfx z5, z25 +; CHECK-NEXT: fcvtzs z5.d, p0/m, z25.s +; CHECK-NEXT: movprfx z6, z26 +; CHECK-NEXT: fcvtzs z6.d, p0/m, z26.s +; CHECK-NEXT: movprfx z7, z27 +; CHECK-NEXT: fcvtzs z7.d, p0/m, z27.s +; CHECK-NEXT: ret + %a = call @llvm.llrint.nxv16i64.nxv16f32( %x) + ret %a +} +declare @llvm.llrint.nxv16i64.nxv16f32() + +define @llrint_v32i64_v32f32( %x) { +; CHECK-LABEL: llrint_v32i64_v32f32: +; CHECK: // %bb.0: +; CHECK-NEXT: uunpkhi z24.d, z7.s +; CHECK-NEXT: uunpklo z7.d, z7.s +; CHECK-NEXT: rdvl x9, #15 +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: uunpkhi z27.d, z6.s +; CHECK-NEXT: uunpklo z6.d, z6.s +; CHECK-NEXT: uunpkhi z30.d, z5.s +; CHECK-NEXT: uunpklo z5.d, z5.s +; CHECK-NEXT: uunpkhi z31.d, z4.s +; CHECK-NEXT: ptrue p1.b +; CHECK-NEXT: uunpklo z29.d, z3.s +; CHECK-NEXT: uunpkhi z3.d, z3.s +; CHECK-NEXT: fcvtzs z24.d, p0/m, z24.s +; CHECK-NEXT: fcvtzs z7.d, p0/m, z7.s +; CHECK-NEXT: uunpklo z4.d, z4.s +; CHECK-NEXT: fcvtzs z27.d, p0/m, z27.s +; CHECK-NEXT: fcvtzs z6.d, p0/m, z6.s +; CHECK-NEXT: uunpkhi z25.d, z0.s +; CHECK-NEXT: fcvtzs z30.d, p0/m, z30.s +; CHECK-NEXT: fcvtzs z5.d, p0/m, z5.s +; CHECK-NEXT: uunpklo z26.d, z1.s +; CHECK-NEXT: uunpkhi z1.d, z1.s +; CHECK-NEXT: uunpklo z28.d, z2.s +; CHECK-NEXT: uunpkhi z2.d, z2.s +; CHECK-NEXT: st1b { z24.b }, p1, [x8, x9] +; CHECK-NEXT: rdvl x9, #14 +; CHECK-NEXT: fcvtzs z4.d, p0/m, z4.s +; CHECK-NEXT: st1b { z7.b }, p1, [x8, x9] +; CHECK-NEXT: rdvl x9, #13 +; CHECK-NEXT: movprfx z7, z31 +; CHECK-NEXT: fcvtzs z7.d, p0/m, z31.s +; CHECK-NEXT: st1b { z27.b }, p1, [x8, x9] +; CHECK-NEXT: rdvl x9, #12 +; CHECK-NEXT: fcvtzs z3.d, p0/m, z3.s +; CHECK-NEXT: st1b { z6.b }, p1, [x8, x9] +; CHECK-NEXT: rdvl x9, #11 +; CHECK-NEXT: movprfx z6, z29 +; CHECK-NEXT: fcvtzs z6.d, p0/m, z29.s +; CHECK-NEXT: uunpklo z0.d, z0.s +; CHECK-NEXT: st1b { z30.b }, p1, [x8, x9] +; CHECK-NEXT: rdvl x9, #10 +; CHECK-NEXT: st1b { z5.b }, p1, [x8, x9] +; CHECK-NEXT: rdvl x9, #9 +; CHECK-NEXT: fcvtzs z2.d, p0/m, z2.s +; CHECK-NEXT: st1b { z7.b }, p1, [x8, x9] +; CHECK-NEXT: rdvl x9, #8 +; CHECK-NEXT: movprfx z5, z28 +; CHECK-NEXT: fcvtzs z5.d, p0/m, z28.s +; CHECK-NEXT: st1b { z4.b }, p1, [x8, x9] +; CHECK-NEXT: fcvtzs z1.d, p0/m, z1.s +; CHECK-NEXT: movprfx z4, z25 +; CHECK-NEXT: fcvtzs z4.d, p0/m, z25.s +; CHECK-NEXT: st1d { z3.d }, p0, [x8, #7, mul vl] +; CHECK-NEXT: movprfx z3, z26 +; CHECK-NEXT: fcvtzs z3.d, p0/m, z26.s +; CHECK-NEXT: fcvtzs z0.d, p0/m, z0.s +; CHECK-NEXT: st1d { z6.d }, p0, [x8, #6, mul vl] +; CHECK-NEXT: st1d { z2.d }, p0, [x8, #5, mul vl] +; CHECK-NEXT: st1d { z5.d }, p0, [x8, #4, mul vl] +; CHECK-NEXT: st1d { z1.d }, p0, [x8, #3, mul vl] +; CHECK-NEXT: st1d { z3.d }, p0, [x8, #2, mul vl] +; CHECK-NEXT: st1d { z4.d }, p0, [x8, #1, mul vl] +; CHECK-NEXT: st1d { z0.d }, p0, [x8] +; CHECK-NEXT: ret + %a = call @llvm.llrint.nxv32i64.nxv32f32( %x) + ret %a +} +declare @llvm.llrint.nxv32i64.nxv32f32() + +define @llrint_v1i64_v1f64( %x) { +; CHECK-LABEL: llrint_v1i64_v1f64: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: fcvtzs z0.d, p0/m, z0.d +; CHECK-NEXT: ret + %a = call @llvm.llrint.nxv1i64.nxv1f64( %x) + ret %a +} +declare @llvm.llrint.nxv1i64.nxv1f64() + +define @llrint_v2i64_v2f64( %x) { +; CHECK-LABEL: llrint_v2i64_v2f64: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: fcvtzs z0.d, p0/m, z0.d +; CHECK-NEXT: ret + %a = call @llvm.llrint.nxv2i64.nxv2f64( %x) + ret %a +} +declare @llvm.llrint.nxv2i64.nxv2f64() + +define @llrint_v4i64_v4f64( %x) { +; CHECK-LABEL: llrint_v4i64_v4f64: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: fcvtzs z0.d, p0/m, z0.d +; CHECK-NEXT: fcvtzs z1.d, p0/m, z1.d +; CHECK-NEXT: ret + %a = call @llvm.llrint.nxv4i64.nxv4f64( %x) + ret %a +} +declare @llvm.llrint.nxv4i64.nxv4f64() + +define @llrint_v8i64_v8f64( %x) { +; CHECK-LABEL: llrint_v8i64_v8f64: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: fcvtzs z0.d, p0/m, z0.d +; CHECK-NEXT: fcvtzs z1.d, p0/m, z1.d +; CHECK-NEXT: fcvtzs z2.d, p0/m, z2.d +; CHECK-NEXT: fcvtzs z3.d, p0/m, z3.d +; CHECK-NEXT: ret + %a = call @llvm.llrint.nxv8i64.nxv8f64( %x) + ret %a +} +declare @llvm.llrint.nxv8i64.nxv8f64() + +define @llrint_v16f64( %x) { +; CHECK-LABEL: llrint_v16f64: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: fcvtzs z0.d, p0/m, z0.d +; CHECK-NEXT: fcvtzs z1.d, p0/m, z1.d +; CHECK-NEXT: fcvtzs z2.d, p0/m, z2.d +; CHECK-NEXT: fcvtzs z3.d, p0/m, z3.d +; CHECK-NEXT: fcvtzs z4.d, p0/m, z4.d +; CHECK-NEXT: fcvtzs z5.d, p0/m, z5.d +; CHECK-NEXT: fcvtzs z6.d, p0/m, z6.d +; CHECK-NEXT: fcvtzs z7.d, p0/m, z7.d +; CHECK-NEXT: ret + %a = call @llvm.llrint.nxv16i64.nxv16f64( %x) + ret %a +} +declare @llvm.llrint.nxv16i64.nxv16f64() + +define @llrint_v32f64( %x) { +; CHECK-LABEL: llrint_v32f64: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.b +; CHECK-NEXT: rdvl x9, #15 +; CHECK-NEXT: rdvl x10, #14 +; CHECK-NEXT: ptrue p1.d +; CHECK-NEXT: rdvl x11, #13 +; CHECK-NEXT: rdvl x12, #12 +; CHECK-NEXT: ld1b { z0.b }, p0/z, [x0, x9] +; CHECK-NEXT: rdvl x13, #11 +; CHECK-NEXT: rdvl x14, #10 +; CHECK-NEXT: ld1b { z1.b }, p0/z, [x0, x10] +; CHECK-NEXT: ld1b { z2.b }, p0/z, [x0, x11] +; CHECK-NEXT: ld1b { z3.b }, p0/z, [x0, x12] +; CHECK-NEXT: ld1b { z4.b }, p0/z, [x0, x13] +; CHECK-NEXT: ld1b { z5.b }, p0/z, [x0, x14] +; CHECK-NEXT: rdvl x15, #9 +; CHECK-NEXT: fcvtzs z0.d, p1/m, z0.d +; CHECK-NEXT: rdvl x16, #8 +; CHECK-NEXT: ld1b { z6.b }, p0/z, [x0, x15] +; CHECK-NEXT: ld1b { z7.b }, p0/z, [x0, x16] +; CHECK-NEXT: ld1d { z24.d }, p1/z, [x0, #7, mul vl] +; CHECK-NEXT: ld1d { z25.d }, p1/z, [x0, #6, mul vl] +; CHECK-NEXT: ld1d { z26.d }, p1/z, [x0, #5, mul vl] +; CHECK-NEXT: fcvtzs z1.d, p1/m, z1.d +; CHECK-NEXT: ld1d { z27.d }, p1/z, [x0, #4, mul vl] +; CHECK-NEXT: ld1d { z28.d }, p1/z, [x0, #3, mul vl] +; CHECK-NEXT: fcvtzs z2.d, p1/m, z2.d +; CHECK-NEXT: ld1d { z29.d }, p1/z, [x0, #2, mul vl] +; CHECK-NEXT: ld1d { z30.d }, p1/z, [x0, #1, mul vl] +; CHECK-NEXT: fcvtzs z3.d, p1/m, z3.d +; CHECK-NEXT: ld1d { z31.d }, p1/z, [x0] +; CHECK-NEXT: fcvtzs z4.d, p1/m, z4.d +; CHECK-NEXT: st1b { z0.b }, p0, [x8, x9] +; CHECK-NEXT: movprfx z0, z5 +; CHECK-NEXT: fcvtzs z0.d, p1/m, z5.d +; CHECK-NEXT: st1b { z1.b }, p0, [x8, x10] +; CHECK-NEXT: movprfx z1, z6 +; CHECK-NEXT: fcvtzs z1.d, p1/m, z6.d +; CHECK-NEXT: st1b { z2.b }, p0, [x8, x11] +; CHECK-NEXT: movprfx z2, z7 +; CHECK-NEXT: fcvtzs z2.d, p1/m, z7.d +; CHECK-NEXT: st1b { z3.b }, p0, [x8, x12] +; CHECK-NEXT: movprfx z3, z24 +; CHECK-NEXT: fcvtzs z3.d, p1/m, z24.d +; CHECK-NEXT: st1b { z4.b }, p0, [x8, x13] +; CHECK-NEXT: movprfx z4, z25 +; CHECK-NEXT: fcvtzs z4.d, p1/m, z25.d +; CHECK-NEXT: st1b { z0.b }, p0, [x8, x14] +; CHECK-NEXT: movprfx z0, z26 +; CHECK-NEXT: fcvtzs z0.d, p1/m, z26.d +; CHECK-NEXT: st1b { z1.b }, p0, [x8, x15] +; CHECK-NEXT: movprfx z1, z27 +; CHECK-NEXT: fcvtzs z1.d, p1/m, z27.d +; CHECK-NEXT: st1b { z2.b }, p0, [x8, x16] +; CHECK-NEXT: movprfx z2, z28 +; CHECK-NEXT: fcvtzs z2.d, p1/m, z28.d +; CHECK-NEXT: st1d { z3.d }, p1, [x8, #7, mul vl] +; CHECK-NEXT: movprfx z3, z29 +; CHECK-NEXT: fcvtzs z3.d, p1/m, z29.d +; CHECK-NEXT: st1d { z4.d }, p1, [x8, #6, mul vl] +; CHECK-NEXT: movprfx z4, z30 +; CHECK-NEXT: fcvtzs z4.d, p1/m, z30.d +; CHECK-NEXT: st1d { z0.d }, p1, [x8, #5, mul vl] +; CHECK-NEXT: movprfx z0, z31 +; CHECK-NEXT: fcvtzs z0.d, p1/m, z31.d +; CHECK-NEXT: st1d { z1.d }, p1, [x8, #4, mul vl] +; CHECK-NEXT: st1d { z2.d }, p1, [x8, #3, mul vl] +; CHECK-NEXT: st1d { z3.d }, p1, [x8, #2, mul vl] +; CHECK-NEXT: st1d { z4.d }, p1, [x8, #1, mul vl] +; CHECK-NEXT: st1d { z0.d }, p1, [x8] +; CHECK-NEXT: ret + %a = call @llvm.llrint.nxv32i64.nxv16f64( %x) + ret %a +} +declare @llvm.llrint.nxv32i64.nxv32f64() diff --git a/llvm/test/CodeGen/AArch64/sve-lrint.ll b/llvm/test/CodeGen/AArch64/sve-lrint.ll new file mode 100644 index 0000000000000..1e7bf2e280ce8 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/sve-lrint.ll @@ -0,0 +1,492 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=aarch64 -mattr=+sve | FileCheck %s + +define @lrint_v1f16( %x) { +; CHECK-LABEL: lrint_v1f16: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: fcvtzs z0.d, p0/m, z0.h +; CHECK-NEXT: ret + %a = call @llvm.lrint.nxv1i64.nxv1f16( %x) + ret %a +} +declare @llvm.lrint.nxv1i64.nxv1f16() + +define @lrint_v2f16( %x) { +; CHECK-LABEL: lrint_v2f16: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: fcvtzs z0.d, p0/m, z0.h +; CHECK-NEXT: ret + %a = call @llvm.lrint.nxv2i64.nxv2f16( %x) + ret %a +} +declare @llvm.lrint.nxv2i64.nxv2f16() + +define @lrint_v4f16( %x) { +; CHECK-LABEL: lrint_v4f16: +; CHECK: // %bb.0: +; CHECK-NEXT: uunpklo z1.d, z0.s +; CHECK-NEXT: uunpkhi z2.d, z0.s +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: movprfx z0, z1 +; CHECK-NEXT: fcvtzs z0.d, p0/m, z1.h +; CHECK-NEXT: movprfx z1, z2 +; CHECK-NEXT: fcvtzs z1.d, p0/m, z2.h +; CHECK-NEXT: ret + %a = call @llvm.lrint.nxv4i64.nxv4f16( %x) + ret %a +} +declare @llvm.lrint.nxv4i64.nxv4f16() + +define @lrint_v8f16( %x) { +; CHECK-LABEL: lrint_v8f16: +; CHECK: // %bb.0: +; CHECK-NEXT: uunpklo z1.s, z0.h +; CHECK-NEXT: uunpkhi z0.s, z0.h +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: uunpklo z2.d, z1.s +; CHECK-NEXT: uunpkhi z1.d, z1.s +; CHECK-NEXT: uunpklo z3.d, z0.s +; CHECK-NEXT: uunpkhi z4.d, z0.s +; CHECK-NEXT: fcvtzs z1.d, p0/m, z1.h +; CHECK-NEXT: movprfx z0, z2 +; CHECK-NEXT: fcvtzs z0.d, p0/m, z2.h +; CHECK-NEXT: movprfx z2, z3 +; CHECK-NEXT: fcvtzs z2.d, p0/m, z3.h +; CHECK-NEXT: movprfx z3, z4 +; CHECK-NEXT: fcvtzs z3.d, p0/m, z4.h +; CHECK-NEXT: ret + %a = call @llvm.lrint.nxv8i64.nxv8f16( %x) + ret %a +} +declare @llvm.lrint.nxv8i64.nxv8f16() + +define @lrint_v16i64_v16f16( %x) { +; CHECK-LABEL: lrint_v16i64_v16f16: +; CHECK: // %bb.0: +; CHECK-NEXT: uunpklo z2.s, z0.h +; CHECK-NEXT: uunpkhi z0.s, z0.h +; CHECK-NEXT: uunpklo z3.s, z1.h +; CHECK-NEXT: uunpkhi z1.s, z1.h +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: uunpklo z4.d, z2.s +; CHECK-NEXT: uunpkhi z2.d, z2.s +; CHECK-NEXT: uunpklo z5.d, z0.s +; CHECK-NEXT: uunpkhi z6.d, z0.s +; CHECK-NEXT: uunpklo z7.d, z3.s +; CHECK-NEXT: uunpkhi z24.d, z3.s +; CHECK-NEXT: uunpklo z25.d, z1.s +; CHECK-NEXT: uunpkhi z26.d, z1.s +; CHECK-NEXT: movprfx z0, z4 +; CHECK-NEXT: fcvtzs z0.d, p0/m, z4.h +; CHECK-NEXT: movprfx z1, z2 +; CHECK-NEXT: fcvtzs z1.d, p0/m, z2.h +; CHECK-NEXT: movprfx z2, z5 +; CHECK-NEXT: fcvtzs z2.d, p0/m, z5.h +; CHECK-NEXT: movprfx z3, z6 +; CHECK-NEXT: fcvtzs z3.d, p0/m, z6.h +; CHECK-NEXT: movprfx z4, z7 +; CHECK-NEXT: fcvtzs z4.d, p0/m, z7.h +; CHECK-NEXT: movprfx z5, z24 +; CHECK-NEXT: fcvtzs z5.d, p0/m, z24.h +; CHECK-NEXT: movprfx z6, z25 +; CHECK-NEXT: fcvtzs z6.d, p0/m, z25.h +; CHECK-NEXT: movprfx z7, z26 +; CHECK-NEXT: fcvtzs z7.d, p0/m, z26.h +; CHECK-NEXT: ret + %a = call @llvm.lrint.nxv16i64.nxv16f16( %x) + ret %a +} +declare @llvm.lrint.nxv16i64.nxv16f16() + +define @lrint_v32i64_v32f16( %x) { +; CHECK-LABEL: lrint_v32i64_v32f16: +; CHECK: // %bb.0: +; CHECK-NEXT: uunpkhi z4.s, z3.h +; CHECK-NEXT: uunpklo z3.s, z3.h +; CHECK-NEXT: rdvl x9, #15 +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: uunpkhi z7.s, z2.h +; CHECK-NEXT: uunpklo z2.s, z2.h +; CHECK-NEXT: ptrue p1.b +; CHECK-NEXT: uunpklo z24.s, z0.h +; CHECK-NEXT: uunpkhi z0.s, z0.h +; CHECK-NEXT: uunpkhi z5.d, z4.s +; CHECK-NEXT: uunpklo z4.d, z4.s +; CHECK-NEXT: uunpkhi z6.d, z3.s +; CHECK-NEXT: uunpklo z3.d, z3.s +; CHECK-NEXT: uunpkhi z25.d, z2.s +; CHECK-NEXT: uunpklo z2.d, z2.s +; CHECK-NEXT: uunpklo z26.d, z0.s +; CHECK-NEXT: uunpkhi z0.d, z0.s +; CHECK-NEXT: fcvtzs z5.d, p0/m, z5.h +; CHECK-NEXT: fcvtzs z4.d, p0/m, z4.h +; CHECK-NEXT: fcvtzs z3.d, p0/m, z3.h +; CHECK-NEXT: fcvtzs z25.d, p0/m, z25.h +; CHECK-NEXT: fcvtzs z2.d, p0/m, z2.h +; CHECK-NEXT: fcvtzs z0.d, p0/m, z0.h +; CHECK-NEXT: st1b { z5.b }, p1, [x8, x9] +; CHECK-NEXT: rdvl x9, #14 +; CHECK-NEXT: movprfx z5, z6 +; CHECK-NEXT: fcvtzs z5.d, p0/m, z6.h +; CHECK-NEXT: uunpkhi z6.d, z7.s +; CHECK-NEXT: st1b { z4.b }, p1, [x8, x9] +; CHECK-NEXT: uunpkhi z4.s, z1.h +; CHECK-NEXT: uunpklo z7.d, z7.s +; CHECK-NEXT: rdvl x9, #13 +; CHECK-NEXT: uunpklo z1.s, z1.h +; CHECK-NEXT: st1b { z5.b }, p1, [x8, x9] +; CHECK-NEXT: rdvl x9, #12 +; CHECK-NEXT: movprfx z5, z6 +; CHECK-NEXT: fcvtzs z5.d, p0/m, z6.h +; CHECK-NEXT: st1b { z3.b }, p1, [x8, x9] +; CHECK-NEXT: uunpkhi z3.d, z4.s +; CHECK-NEXT: uunpklo z4.d, z4.s +; CHECK-NEXT: fcvtzs z7.d, p0/m, z7.h +; CHECK-NEXT: rdvl x9, #11 +; CHECK-NEXT: uunpkhi z6.d, z24.s +; CHECK-NEXT: uunpkhi z27.d, z1.s +; CHECK-NEXT: uunpklo z1.d, z1.s +; CHECK-NEXT: st1b { z5.b }, p1, [x8, x9] +; CHECK-NEXT: rdvl x9, #10 +; CHECK-NEXT: fcvtzs z3.d, p0/m, z3.h +; CHECK-NEXT: st1b { z7.b }, p1, [x8, x9] +; CHECK-NEXT: fcvtzs z4.d, p0/m, z4.h +; CHECK-NEXT: uunpklo z7.d, z24.s +; CHECK-NEXT: rdvl x9, #9 +; CHECK-NEXT: movprfx z5, z27 +; CHECK-NEXT: fcvtzs z5.d, p0/m, z27.h +; CHECK-NEXT: fcvtzs z1.d, p0/m, z1.h +; CHECK-NEXT: st1b { z25.b }, p1, [x8, x9] +; CHECK-NEXT: rdvl x9, #8 +; CHECK-NEXT: st1b { z2.b }, p1, [x8, x9] +; CHECK-NEXT: movprfx z2, z26 +; CHECK-NEXT: fcvtzs z2.d, p0/m, z26.h +; CHECK-NEXT: st1d { z3.d }, p0, [x8, #7, mul vl] +; CHECK-NEXT: movprfx z3, z6 +; CHECK-NEXT: fcvtzs z3.d, p0/m, z6.h +; CHECK-NEXT: st1d { z4.d }, p0, [x8, #6, mul vl] +; CHECK-NEXT: movprfx z4, z7 +; CHECK-NEXT: fcvtzs z4.d, p0/m, z7.h +; CHECK-NEXT: st1d { z5.d }, p0, [x8, #5, mul vl] +; CHECK-NEXT: st1d { z1.d }, p0, [x8, #4, mul vl] +; CHECK-NEXT: st1d { z0.d }, p0, [x8, #3, mul vl] +; CHECK-NEXT: st1d { z2.d }, p0, [x8, #2, mul vl] +; CHECK-NEXT: st1d { z3.d }, p0, [x8, #1, mul vl] +; CHECK-NEXT: st1d { z4.d }, p0, [x8] +; CHECK-NEXT: ret + %a = call @llvm.lrint.nxv32i64.nxv32f16( %x) + ret %a +} +declare @llvm.lrint.nxv32i64.nxv32f16() + +define @lrint_v1f32( %x) { +; CHECK-LABEL: lrint_v1f32: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: fcvtzs z0.d, p0/m, z0.s +; CHECK-NEXT: ret + %a = call @llvm.lrint.nxv1i64.nxv1f32( %x) + ret %a +} +declare @llvm.lrint.nxv1i64.nxv1f32() + +define @lrint_v2f32( %x) { +; CHECK-LABEL: lrint_v2f32: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: fcvtzs z0.d, p0/m, z0.s +; CHECK-NEXT: ret + %a = call @llvm.lrint.nxv2i64.nxv2f32( %x) + ret %a +} +declare @llvm.lrint.nxv2i64.nxv2f32() + +define @lrint_v4f32( %x) { +; CHECK-LABEL: lrint_v4f32: +; CHECK: // %bb.0: +; CHECK-NEXT: uunpklo z1.d, z0.s +; CHECK-NEXT: uunpkhi z2.d, z0.s +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: movprfx z0, z1 +; CHECK-NEXT: fcvtzs z0.d, p0/m, z1.s +; CHECK-NEXT: movprfx z1, z2 +; CHECK-NEXT: fcvtzs z1.d, p0/m, z2.s +; CHECK-NEXT: ret + %a = call @llvm.lrint.nxv4i64.nxv4f32( %x) + ret %a +} +declare @llvm.lrint.nxv4i64.nxv4f32() + +define @lrint_v8f32( %x) { +; CHECK-LABEL: lrint_v8f32: +; CHECK: // %bb.0: +; CHECK-NEXT: uunpklo z2.d, z0.s +; CHECK-NEXT: uunpkhi z3.d, z0.s +; CHECK-NEXT: uunpklo z4.d, z1.s +; CHECK-NEXT: uunpkhi z5.d, z1.s +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: movprfx z0, z2 +; CHECK-NEXT: fcvtzs z0.d, p0/m, z2.s +; CHECK-NEXT: movprfx z1, z3 +; CHECK-NEXT: fcvtzs z1.d, p0/m, z3.s +; CHECK-NEXT: movprfx z2, z4 +; CHECK-NEXT: fcvtzs z2.d, p0/m, z4.s +; CHECK-NEXT: movprfx z3, z5 +; CHECK-NEXT: fcvtzs z3.d, p0/m, z5.s +; CHECK-NEXT: ret + %a = call @llvm.lrint.nxv8i64.nxv8f32( %x) + ret %a +} +declare @llvm.lrint.nxv8i64.nxv8f32() + +define @lrint_v16i64_v16f32( %x) { +; CHECK-LABEL: lrint_v16i64_v16f32: +; CHECK: // %bb.0: +; CHECK-NEXT: uunpklo z4.d, z0.s +; CHECK-NEXT: uunpkhi z5.d, z0.s +; CHECK-NEXT: uunpklo z6.d, z1.s +; CHECK-NEXT: uunpkhi z7.d, z1.s +; CHECK-NEXT: uunpklo z24.d, z2.s +; CHECK-NEXT: uunpkhi z25.d, z2.s +; CHECK-NEXT: uunpklo z26.d, z3.s +; CHECK-NEXT: uunpkhi z27.d, z3.s +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: movprfx z0, z4 +; CHECK-NEXT: fcvtzs z0.d, p0/m, z4.s +; CHECK-NEXT: movprfx z1, z5 +; CHECK-NEXT: fcvtzs z1.d, p0/m, z5.s +; CHECK-NEXT: movprfx z2, z6 +; CHECK-NEXT: fcvtzs z2.d, p0/m, z6.s +; CHECK-NEXT: movprfx z3, z7 +; CHECK-NEXT: fcvtzs z3.d, p0/m, z7.s +; CHECK-NEXT: movprfx z4, z24 +; CHECK-NEXT: fcvtzs z4.d, p0/m, z24.s +; CHECK-NEXT: movprfx z5, z25 +; CHECK-NEXT: fcvtzs z5.d, p0/m, z25.s +; CHECK-NEXT: movprfx z6, z26 +; CHECK-NEXT: fcvtzs z6.d, p0/m, z26.s +; CHECK-NEXT: movprfx z7, z27 +; CHECK-NEXT: fcvtzs z7.d, p0/m, z27.s +; CHECK-NEXT: ret + %a = call @llvm.lrint.nxv16i64.nxv16f32( %x) + ret %a +} +declare @llvm.lrint.nxv16i64.nxv16f32() + +define @lrint_v32i64_v32f32( %x) { +; CHECK-LABEL: lrint_v32i64_v32f32: +; CHECK: // %bb.0: +; CHECK-NEXT: uunpkhi z24.d, z7.s +; CHECK-NEXT: uunpklo z7.d, z7.s +; CHECK-NEXT: rdvl x9, #15 +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: uunpkhi z27.d, z6.s +; CHECK-NEXT: uunpklo z6.d, z6.s +; CHECK-NEXT: uunpkhi z30.d, z5.s +; CHECK-NEXT: uunpklo z5.d, z5.s +; CHECK-NEXT: uunpkhi z31.d, z4.s +; CHECK-NEXT: ptrue p1.b +; CHECK-NEXT: uunpklo z29.d, z3.s +; CHECK-NEXT: uunpkhi z3.d, z3.s +; CHECK-NEXT: fcvtzs z24.d, p0/m, z24.s +; CHECK-NEXT: fcvtzs z7.d, p0/m, z7.s +; CHECK-NEXT: uunpklo z4.d, z4.s +; CHECK-NEXT: fcvtzs z27.d, p0/m, z27.s +; CHECK-NEXT: fcvtzs z6.d, p0/m, z6.s +; CHECK-NEXT: uunpkhi z25.d, z0.s +; CHECK-NEXT: fcvtzs z30.d, p0/m, z30.s +; CHECK-NEXT: fcvtzs z5.d, p0/m, z5.s +; CHECK-NEXT: uunpklo z26.d, z1.s +; CHECK-NEXT: uunpkhi z1.d, z1.s +; CHECK-NEXT: uunpklo z28.d, z2.s +; CHECK-NEXT: uunpkhi z2.d, z2.s +; CHECK-NEXT: st1b { z24.b }, p1, [x8, x9] +; CHECK-NEXT: rdvl x9, #14 +; CHECK-NEXT: fcvtzs z4.d, p0/m, z4.s +; CHECK-NEXT: st1b { z7.b }, p1, [x8, x9] +; CHECK-NEXT: rdvl x9, #13 +; CHECK-NEXT: movprfx z7, z31 +; CHECK-NEXT: fcvtzs z7.d, p0/m, z31.s +; CHECK-NEXT: st1b { z27.b }, p1, [x8, x9] +; CHECK-NEXT: rdvl x9, #12 +; CHECK-NEXT: fcvtzs z3.d, p0/m, z3.s +; CHECK-NEXT: st1b { z6.b }, p1, [x8, x9] +; CHECK-NEXT: rdvl x9, #11 +; CHECK-NEXT: movprfx z6, z29 +; CHECK-NEXT: fcvtzs z6.d, p0/m, z29.s +; CHECK-NEXT: uunpklo z0.d, z0.s +; CHECK-NEXT: st1b { z30.b }, p1, [x8, x9] +; CHECK-NEXT: rdvl x9, #10 +; CHECK-NEXT: st1b { z5.b }, p1, [x8, x9] +; CHECK-NEXT: rdvl x9, #9 +; CHECK-NEXT: fcvtzs z2.d, p0/m, z2.s +; CHECK-NEXT: st1b { z7.b }, p1, [x8, x9] +; CHECK-NEXT: rdvl x9, #8 +; CHECK-NEXT: movprfx z5, z28 +; CHECK-NEXT: fcvtzs z5.d, p0/m, z28.s +; CHECK-NEXT: st1b { z4.b }, p1, [x8, x9] +; CHECK-NEXT: fcvtzs z1.d, p0/m, z1.s +; CHECK-NEXT: movprfx z4, z25 +; CHECK-NEXT: fcvtzs z4.d, p0/m, z25.s +; CHECK-NEXT: st1d { z3.d }, p0, [x8, #7, mul vl] +; CHECK-NEXT: movprfx z3, z26 +; CHECK-NEXT: fcvtzs z3.d, p0/m, z26.s +; CHECK-NEXT: fcvtzs z0.d, p0/m, z0.s +; CHECK-NEXT: st1d { z6.d }, p0, [x8, #6, mul vl] +; CHECK-NEXT: st1d { z2.d }, p0, [x8, #5, mul vl] +; CHECK-NEXT: st1d { z5.d }, p0, [x8, #4, mul vl] +; CHECK-NEXT: st1d { z1.d }, p0, [x8, #3, mul vl] +; CHECK-NEXT: st1d { z3.d }, p0, [x8, #2, mul vl] +; CHECK-NEXT: st1d { z4.d }, p0, [x8, #1, mul vl] +; CHECK-NEXT: st1d { z0.d }, p0, [x8] +; CHECK-NEXT: ret + %a = call @llvm.lrint.nxv32i64.nxv32f32( %x) + ret %a +} +declare @llvm.lrint.nxv32i64.nxv32f32() + +define @lrint_v1f64( %x) { +; CHECK-LABEL: lrint_v1f64: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: fcvtzs z0.d, p0/m, z0.d +; CHECK-NEXT: ret + %a = call @llvm.lrint.nxv1i64.nxv1f64( %x) + ret %a +} +declare @llvm.lrint.nxv1i64.nxv1f64() + +define @lrint_v2f64( %x) { +; CHECK-LABEL: lrint_v2f64: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: fcvtzs z0.d, p0/m, z0.d +; CHECK-NEXT: ret + %a = call @llvm.lrint.nxv2i64.nxv2f64( %x) + ret %a +} +declare @llvm.lrint.nxv2i64.nxv2f64() + +define @lrint_v4f64( %x) { +; CHECK-LABEL: lrint_v4f64: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: fcvtzs z0.d, p0/m, z0.d +; CHECK-NEXT: fcvtzs z1.d, p0/m, z1.d +; CHECK-NEXT: ret + %a = call @llvm.lrint.nxv4i64.nxv4f64( %x) + ret %a +} +declare @llvm.lrint.nxv4i64.nxv4f64() + +define @lrint_v8f64( %x) { +; CHECK-LABEL: lrint_v8f64: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: fcvtzs z0.d, p0/m, z0.d +; CHECK-NEXT: fcvtzs z1.d, p0/m, z1.d +; CHECK-NEXT: fcvtzs z2.d, p0/m, z2.d +; CHECK-NEXT: fcvtzs z3.d, p0/m, z3.d +; CHECK-NEXT: ret + %a = call @llvm.lrint.nxv8i64.nxv8f64( %x) + ret %a +} +declare @llvm.lrint.nxv8i64.nxv8f64() + +define @lrint_v16f64( %x) { +; CHECK-LABEL: lrint_v16f64: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: fcvtzs z0.d, p0/m, z0.d +; CHECK-NEXT: fcvtzs z1.d, p0/m, z1.d +; CHECK-NEXT: fcvtzs z2.d, p0/m, z2.d +; CHECK-NEXT: fcvtzs z3.d, p0/m, z3.d +; CHECK-NEXT: fcvtzs z4.d, p0/m, z4.d +; CHECK-NEXT: fcvtzs z5.d, p0/m, z5.d +; CHECK-NEXT: fcvtzs z6.d, p0/m, z6.d +; CHECK-NEXT: fcvtzs z7.d, p0/m, z7.d +; CHECK-NEXT: ret + %a = call @llvm.lrint.nxv16i64.nxv16f64( %x) + ret %a +} +declare @llvm.lrint.nxv16i64.nxv16f64() + +define @lrint_v32f64( %x) { +; CHECK-LABEL: lrint_v32f64: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.b +; CHECK-NEXT: rdvl x9, #15 +; CHECK-NEXT: rdvl x10, #14 +; CHECK-NEXT: ptrue p1.d +; CHECK-NEXT: rdvl x11, #13 +; CHECK-NEXT: rdvl x12, #12 +; CHECK-NEXT: ld1b { z0.b }, p0/z, [x0, x9] +; CHECK-NEXT: rdvl x13, #11 +; CHECK-NEXT: rdvl x14, #10 +; CHECK-NEXT: ld1b { z1.b }, p0/z, [x0, x10] +; CHECK-NEXT: ld1b { z2.b }, p0/z, [x0, x11] +; CHECK-NEXT: ld1b { z3.b }, p0/z, [x0, x12] +; CHECK-NEXT: ld1b { z4.b }, p0/z, [x0, x13] +; CHECK-NEXT: ld1b { z5.b }, p0/z, [x0, x14] +; CHECK-NEXT: rdvl x15, #9 +; CHECK-NEXT: fcvtzs z0.d, p1/m, z0.d +; CHECK-NEXT: rdvl x16, #8 +; CHECK-NEXT: ld1b { z6.b }, p0/z, [x0, x15] +; CHECK-NEXT: ld1b { z7.b }, p0/z, [x0, x16] +; CHECK-NEXT: ld1d { z24.d }, p1/z, [x0, #7, mul vl] +; CHECK-NEXT: ld1d { z25.d }, p1/z, [x0, #6, mul vl] +; CHECK-NEXT: ld1d { z26.d }, p1/z, [x0, #5, mul vl] +; CHECK-NEXT: fcvtzs z1.d, p1/m, z1.d +; CHECK-NEXT: ld1d { z27.d }, p1/z, [x0, #4, mul vl] +; CHECK-NEXT: ld1d { z28.d }, p1/z, [x0, #3, mul vl] +; CHECK-NEXT: fcvtzs z2.d, p1/m, z2.d +; CHECK-NEXT: ld1d { z29.d }, p1/z, [x0, #2, mul vl] +; CHECK-NEXT: ld1d { z30.d }, p1/z, [x0, #1, mul vl] +; CHECK-NEXT: fcvtzs z3.d, p1/m, z3.d +; CHECK-NEXT: ld1d { z31.d }, p1/z, [x0] +; CHECK-NEXT: fcvtzs z4.d, p1/m, z4.d +; CHECK-NEXT: st1b { z0.b }, p0, [x8, x9] +; CHECK-NEXT: movprfx z0, z5 +; CHECK-NEXT: fcvtzs z0.d, p1/m, z5.d +; CHECK-NEXT: st1b { z1.b }, p0, [x8, x10] +; CHECK-NEXT: movprfx z1, z6 +; CHECK-NEXT: fcvtzs z1.d, p1/m, z6.d +; CHECK-NEXT: st1b { z2.b }, p0, [x8, x11] +; CHECK-NEXT: movprfx z2, z7 +; CHECK-NEXT: fcvtzs z2.d, p1/m, z7.d +; CHECK-NEXT: st1b { z3.b }, p0, [x8, x12] +; CHECK-NEXT: movprfx z3, z24 +; CHECK-NEXT: fcvtzs z3.d, p1/m, z24.d +; CHECK-NEXT: st1b { z4.b }, p0, [x8, x13] +; CHECK-NEXT: movprfx z4, z25 +; CHECK-NEXT: fcvtzs z4.d, p1/m, z25.d +; CHECK-NEXT: st1b { z0.b }, p0, [x8, x14] +; CHECK-NEXT: movprfx z0, z26 +; CHECK-NEXT: fcvtzs z0.d, p1/m, z26.d +; CHECK-NEXT: st1b { z1.b }, p0, [x8, x15] +; CHECK-NEXT: movprfx z1, z27 +; CHECK-NEXT: fcvtzs z1.d, p1/m, z27.d +; CHECK-NEXT: st1b { z2.b }, p0, [x8, x16] +; CHECK-NEXT: movprfx z2, z28 +; CHECK-NEXT: fcvtzs z2.d, p1/m, z28.d +; CHECK-NEXT: st1d { z3.d }, p1, [x8, #7, mul vl] +; CHECK-NEXT: movprfx z3, z29 +; CHECK-NEXT: fcvtzs z3.d, p1/m, z29.d +; CHECK-NEXT: st1d { z4.d }, p1, [x8, #6, mul vl] +; CHECK-NEXT: movprfx z4, z30 +; CHECK-NEXT: fcvtzs z4.d, p1/m, z30.d +; CHECK-NEXT: st1d { z0.d }, p1, [x8, #5, mul vl] +; CHECK-NEXT: movprfx z0, z31 +; CHECK-NEXT: fcvtzs z0.d, p1/m, z31.d +; CHECK-NEXT: st1d { z1.d }, p1, [x8, #4, mul vl] +; CHECK-NEXT: st1d { z2.d }, p1, [x8, #3, mul vl] +; CHECK-NEXT: st1d { z3.d }, p1, [x8, #2, mul vl] +; CHECK-NEXT: st1d { z4.d }, p1, [x8, #1, mul vl] +; CHECK-NEXT: st1d { z0.d }, p1, [x8] +; CHECK-NEXT: ret + %a = call @llvm.lrint.nxv32i64.nxv16f64( %x) + ret %a +} +declare @llvm.lrint.nxv32i64.nxv32f64() diff --git a/llvm/test/CodeGen/AArch64/vector-llrint.ll b/llvm/test/CodeGen/AArch64/vector-llrint.ll index beb2b6a134600..d4d3fbb0e96b5 100644 --- a/llvm/test/CodeGen/AArch64/vector-llrint.ll +++ b/llvm/test/CodeGen/AArch64/vector-llrint.ll @@ -532,6 +532,143 @@ define <16 x i64> @llrint_v16i64_v16f32(<16 x float> %x) { } declare <16 x i64> @llvm.llrint.v16i64.v16f32(<16 x float>) +define <32 x i64> @llrint_v32i64_v32f32(<32 x float> %x) { +; CHECK-LABEL: llrint_v32i64_v32f32: +; CHECK: // %bb.0: +; CHECK-NEXT: ext v16.16b, v2.16b, v2.16b, #8 +; CHECK-NEXT: ext v20.16b, v5.16b, v5.16b, #8 +; CHECK-NEXT: ext v17.16b, v3.16b, v3.16b, #8 +; CHECK-NEXT: ext v18.16b, v0.16b, v0.16b, #8 +; CHECK-NEXT: ext v19.16b, v4.16b, v4.16b, #8 +; CHECK-NEXT: ext v21.16b, v6.16b, v6.16b, #8 +; CHECK-NEXT: ext v22.16b, v7.16b, v7.16b, #8 +; CHECK-NEXT: frintx s24, s16 +; CHECK-NEXT: mov s28, v20.s[1] +; CHECK-NEXT: frintx s25, s17 +; CHECK-NEXT: frintx s26, s18 +; CHECK-NEXT: frintx s27, s19 +; CHECK-NEXT: frintx s29, s20 +; CHECK-NEXT: mov s30, v21.s[1] +; CHECK-NEXT: frintx s20, s21 +; CHECK-NEXT: frintx s21, s22 +; CHECK-NEXT: mov s23, v22.s[1] +; CHECK-NEXT: mov s19, v19.s[1] +; CHECK-NEXT: mov s17, v17.s[1] +; CHECK-NEXT: fcvtzs x12, s24 +; CHECK-NEXT: frintx s24, s28 +; CHECK-NEXT: fcvtzs x13, s25 +; CHECK-NEXT: mov s25, v7.s[1] +; CHECK-NEXT: fcvtzs x9, s26 +; CHECK-NEXT: fcvtzs x11, s27 +; CHECK-NEXT: fcvtzs x14, s20 +; CHECK-NEXT: fcvtzs x15, s21 +; CHECK-NEXT: frintx s26, s1 +; CHECK-NEXT: frintx s23, s23 +; CHECK-NEXT: frintx s27, s7 +; CHECK-NEXT: frintx s22, s30 +; CHECK-NEXT: fmov d20, x12 +; CHECK-NEXT: fcvtzs x12, s24 +; CHECK-NEXT: mov s24, v6.s[1] +; CHECK-NEXT: frintx s25, s25 +; CHECK-NEXT: frintx s6, s6 +; CHECK-NEXT: fcvtzs x10, s29 +; CHECK-NEXT: fmov d7, x11 +; CHECK-NEXT: fmov d21, x13 +; CHECK-NEXT: frintx s28, s5 +; CHECK-NEXT: fcvtzs x11, s23 +; CHECK-NEXT: fmov d23, x14 +; CHECK-NEXT: fcvtzs x14, s26 +; CHECK-NEXT: fmov d26, x15 +; CHECK-NEXT: fcvtzs x15, s27 +; CHECK-NEXT: frintx s24, s24 +; CHECK-NEXT: mov s27, v5.s[1] +; CHECK-NEXT: fcvtzs x13, s22 +; CHECK-NEXT: fcvtzs x17, s25 +; CHECK-NEXT: frintx s25, s4 +; CHECK-NEXT: fcvtzs x18, s6 +; CHECK-NEXT: fmov d6, x10 +; CHECK-NEXT: frintx s22, s2 +; CHECK-NEXT: mov v26.d[1], x11 +; CHECK-NEXT: fmov d5, x14 +; CHECK-NEXT: fcvtzs x10, s24 +; CHECK-NEXT: fmov d24, x15 +; CHECK-NEXT: fcvtzs x14, s28 +; CHECK-NEXT: frintx s27, s27 +; CHECK-NEXT: mov v23.d[1], x13 +; CHECK-NEXT: mov s4, v4.s[1] +; CHECK-NEXT: fcvtzs x13, s25 +; CHECK-NEXT: fmov d25, x18 +; CHECK-NEXT: mov s16, v16.s[1] +; CHECK-NEXT: mov v24.d[1], x17 +; CHECK-NEXT: fcvtzs x16, s22 +; CHECK-NEXT: frintx s22, s3 +; CHECK-NEXT: mov s3, v3.s[1] +; CHECK-NEXT: frintx s19, s19 +; CHECK-NEXT: mov s2, v2.s[1] +; CHECK-NEXT: mov v25.d[1], x10 +; CHECK-NEXT: fcvtzs x10, s27 +; CHECK-NEXT: frintx s4, s4 +; CHECK-NEXT: mov v6.d[1], x12 +; CHECK-NEXT: frintx s17, s17 +; CHECK-NEXT: mov s18, v18.s[1] +; CHECK-NEXT: stp q24, q26, [x8, #224] +; CHECK-NEXT: fmov d24, x14 +; CHECK-NEXT: fcvtzs x11, s22 +; CHECK-NEXT: ext v22.16b, v1.16b, v1.16b, #8 +; CHECK-NEXT: mov s1, v1.s[1] +; CHECK-NEXT: frintx s3, s3 +; CHECK-NEXT: stp q25, q23, [x8, #192] +; CHECK-NEXT: frintx s2, s2 +; CHECK-NEXT: fcvtzs x12, s4 +; CHECK-NEXT: mov v24.d[1], x10 +; CHECK-NEXT: fcvtzs x10, s19 +; CHECK-NEXT: mov s19, v0.s[1] +; CHECK-NEXT: frintx s16, s16 +; CHECK-NEXT: frintx s0, s0 +; CHECK-NEXT: fmov d4, x11 +; CHECK-NEXT: mov s27, v22.s[1] +; CHECK-NEXT: frintx s22, s22 +; CHECK-NEXT: frintx s1, s1 +; CHECK-NEXT: fcvtzs x11, s3 +; CHECK-NEXT: fcvtzs x14, s2 +; CHECK-NEXT: frintx s2, s18 +; CHECK-NEXT: stp q24, q6, [x8, #160] +; CHECK-NEXT: fmov d6, x13 +; CHECK-NEXT: fcvtzs x13, s17 +; CHECK-NEXT: frintx s17, s19 +; CHECK-NEXT: fmov d23, x16 +; CHECK-NEXT: mov v7.d[1], x10 +; CHECK-NEXT: frintx s3, s27 +; CHECK-NEXT: fcvtzs x10, s22 +; CHECK-NEXT: fcvtzs x15, s1 +; CHECK-NEXT: mov v6.d[1], x12 +; CHECK-NEXT: fcvtzs x12, s16 +; CHECK-NEXT: mov v4.d[1], x11 +; CHECK-NEXT: mov v21.d[1], x13 +; CHECK-NEXT: fcvtzs x13, s0 +; CHECK-NEXT: mov v23.d[1], x14 +; CHECK-NEXT: fcvtzs x14, s17 +; CHECK-NEXT: fcvtzs x11, s3 +; CHECK-NEXT: fmov d0, x10 +; CHECK-NEXT: mov v5.d[1], x15 +; CHECK-NEXT: stp q6, q7, [x8, #128] +; CHECK-NEXT: mov v20.d[1], x12 +; CHECK-NEXT: fcvtzs x12, s2 +; CHECK-NEXT: stp q4, q21, [x8, #96] +; CHECK-NEXT: fmov d1, x13 +; CHECK-NEXT: fmov d2, x9 +; CHECK-NEXT: mov v0.d[1], x11 +; CHECK-NEXT: stp q23, q20, [x8, #64] +; CHECK-NEXT: mov v1.d[1], x14 +; CHECK-NEXT: mov v2.d[1], x12 +; CHECK-NEXT: stp q5, q0, [x8, #32] +; CHECK-NEXT: stp q1, q2, [x8] +; CHECK-NEXT: ret + %a = call <32 x i64> @llvm.llrint.v32i64.v32f32(<32 x float> %x) + ret <32 x i64> %a +} +declare <32 x i64> @llvm.llrint.v32i64.v32f32(<32 x float>) + define <1 x i64> @llrint_v1i64_v1f64(<1 x double> %x) { ; CHECK-LABEL: llrint_v1i64_v1f64: ; CHECK: // %bb.0: @@ -619,3 +756,201 @@ define <8 x i64> @llrint_v8i64_v8f64(<8 x double> %x) { ret <8 x i64> %a } declare <8 x i64> @llvm.llrint.v8i64.v8f64(<8 x double>) + +define <16 x i64> @llrint_v16f64(<16 x double> %x) { +; CHECK-LABEL: llrint_v16f64: +; CHECK: // %bb.0: +; CHECK-NEXT: mov d16, v0.d[1] +; CHECK-NEXT: mov d17, v1.d[1] +; CHECK-NEXT: frintx d0, d0 +; CHECK-NEXT: frintx d1, d1 +; CHECK-NEXT: frintx d18, d2 +; CHECK-NEXT: mov d2, v2.d[1] +; CHECK-NEXT: frintx d19, d3 +; CHECK-NEXT: mov d3, v3.d[1] +; CHECK-NEXT: frintx d16, d16 +; CHECK-NEXT: frintx d17, d17 +; CHECK-NEXT: fcvtzs x8, d0 +; CHECK-NEXT: frintx d0, d4 +; CHECK-NEXT: mov d4, v4.d[1] +; CHECK-NEXT: fcvtzs x9, d1 +; CHECK-NEXT: frintx d1, d5 +; CHECK-NEXT: mov d5, v5.d[1] +; CHECK-NEXT: fcvtzs x12, d18 +; CHECK-NEXT: frintx d2, d2 +; CHECK-NEXT: fcvtzs x13, d19 +; CHECK-NEXT: frintx d18, d3 +; CHECK-NEXT: fcvtzs x10, d16 +; CHECK-NEXT: mov d16, v6.d[1] +; CHECK-NEXT: fcvtzs x11, d17 +; CHECK-NEXT: mov d17, v7.d[1] +; CHECK-NEXT: frintx d6, d6 +; CHECK-NEXT: frintx d7, d7 +; CHECK-NEXT: frintx d4, d4 +; CHECK-NEXT: frintx d5, d5 +; CHECK-NEXT: fcvtzs x14, d0 +; CHECK-NEXT: fcvtzs x15, d1 +; CHECK-NEXT: fmov d0, x8 +; CHECK-NEXT: fmov d1, x9 +; CHECK-NEXT: frintx d16, d16 +; CHECK-NEXT: fcvtzs x9, d2 +; CHECK-NEXT: fmov d2, x12 +; CHECK-NEXT: frintx d17, d17 +; CHECK-NEXT: fcvtzs x8, d6 +; CHECK-NEXT: fcvtzs x12, d7 +; CHECK-NEXT: fmov d3, x13 +; CHECK-NEXT: fcvtzs x13, d18 +; CHECK-NEXT: fcvtzs x16, d4 +; CHECK-NEXT: fcvtzs x17, d5 +; CHECK-NEXT: fmov d4, x14 +; CHECK-NEXT: fmov d5, x15 +; CHECK-NEXT: fcvtzs x18, d16 +; CHECK-NEXT: mov v0.d[1], x10 +; CHECK-NEXT: mov v1.d[1], x11 +; CHECK-NEXT: fcvtzs x0, d17 +; CHECK-NEXT: fmov d6, x8 +; CHECK-NEXT: fmov d7, x12 +; CHECK-NEXT: mov v2.d[1], x9 +; CHECK-NEXT: mov v3.d[1], x13 +; CHECK-NEXT: mov v4.d[1], x16 +; CHECK-NEXT: mov v5.d[1], x17 +; CHECK-NEXT: mov v6.d[1], x18 +; CHECK-NEXT: mov v7.d[1], x0 +; CHECK-NEXT: ret + %a = call <16 x i64> @llvm.llrint.v16i64.v16f64(<16 x double> %x) + ret <16 x i64> %a +} +declare <16 x i64> @llvm.llrint.v16i64.v16f64(<16 x double>) + +define <32 x i64> @llrint_v32f64(<32 x double> %x) { +; CHECK-LABEL: llrint_v32f64: +; CHECK: // %bb.0: +; CHECK-NEXT: frintx d20, d0 +; CHECK-NEXT: frintx d22, d3 +; CHECK-NEXT: frintx d21, d4 +; CHECK-NEXT: ldp q19, q18, [sp, #64] +; CHECK-NEXT: frintx d23, d5 +; CHECK-NEXT: ldp q27, q26, [sp, #96] +; CHECK-NEXT: mov d4, v4.d[1] +; CHECK-NEXT: ldp q16, q17, [sp, #32] +; CHECK-NEXT: mov d5, v5.d[1] +; CHECK-NEXT: fcvtzs x9, d20 +; CHECK-NEXT: frintx d20, d6 +; CHECK-NEXT: fcvtzs x11, d22 +; CHECK-NEXT: frintx d22, d19 +; CHECK-NEXT: fcvtzs x12, d21 +; CHECK-NEXT: fcvtzs x10, d23 +; CHECK-NEXT: mov d21, v26.d[1] +; CHECK-NEXT: frintx d23, d27 +; CHECK-NEXT: mov d27, v27.d[1] +; CHECK-NEXT: frintx d24, d16 +; CHECK-NEXT: mov d19, v19.d[1] +; CHECK-NEXT: frintx d25, d17 +; CHECK-NEXT: fcvtzs x13, d20 +; CHECK-NEXT: mov d20, v18.d[1] +; CHECK-NEXT: frintx d18, d18 +; CHECK-NEXT: fcvtzs x16, d22 +; CHECK-NEXT: frintx d22, d26 +; CHECK-NEXT: mov d16, v16.d[1] +; CHECK-NEXT: frintx d21, d21 +; CHECK-NEXT: fcvtzs x17, d23 +; CHECK-NEXT: frintx d23, d27 +; CHECK-NEXT: fcvtzs x14, d24 +; CHECK-NEXT: frintx d26, d19 +; CHECK-NEXT: fmov d19, x11 +; CHECK-NEXT: frintx d20, d20 +; CHECK-NEXT: mov d27, v17.d[1] +; CHECK-NEXT: fcvtzs x15, d25 +; CHECK-NEXT: ldp q25, q24, [sp] +; CHECK-NEXT: fcvtzs x11, d22 +; CHECK-NEXT: fmov d17, x12 +; CHECK-NEXT: fcvtzs x12, d21 +; CHECK-NEXT: fcvtzs x0, d23 +; CHECK-NEXT: fmov d23, x14 +; CHECK-NEXT: fcvtzs x14, d18 +; CHECK-NEXT: fmov d18, x17 +; CHECK-NEXT: fcvtzs x17, d20 +; CHECK-NEXT: frintx d21, d7 +; CHECK-NEXT: fcvtzs x18, d26 +; CHECK-NEXT: fmov d20, x11 +; CHECK-NEXT: frintx d22, d25 +; CHECK-NEXT: frintx d26, d27 +; CHECK-NEXT: frintx d16, d16 +; CHECK-NEXT: mov v18.d[1], x0 +; CHECK-NEXT: mov d25, v25.d[1] +; CHECK-NEXT: mov d7, v7.d[1] +; CHECK-NEXT: mov d6, v6.d[1] +; CHECK-NEXT: mov d0, v0.d[1] +; CHECK-NEXT: mov v20.d[1], x12 +; CHECK-NEXT: fcvtzs x11, d21 +; CHECK-NEXT: fmov d21, x15 +; CHECK-NEXT: fcvtzs x12, d22 +; CHECK-NEXT: fmov d22, x16 +; CHECK-NEXT: fcvtzs x15, d26 +; CHECK-NEXT: fmov d26, x14 +; CHECK-NEXT: fcvtzs x14, d16 +; CHECK-NEXT: frintx d25, d25 +; CHECK-NEXT: frintx d7, d7 +; CHECK-NEXT: mov d16, v1.d[1] +; CHECK-NEXT: mov d3, v3.d[1] +; CHECK-NEXT: stp q18, q20, [x8, #224] +; CHECK-NEXT: mov d18, v24.d[1] +; CHECK-NEXT: mov v22.d[1], x18 +; CHECK-NEXT: mov v26.d[1], x17 +; CHECK-NEXT: frintx d24, d24 +; CHECK-NEXT: mov v21.d[1], x15 +; CHECK-NEXT: mov v23.d[1], x14 +; CHECK-NEXT: frintx d20, d2 +; CHECK-NEXT: mov d2, v2.d[1] +; CHECK-NEXT: frintx d6, d6 +; CHECK-NEXT: frintx d5, d5 +; CHECK-NEXT: frintx d4, d4 +; CHECK-NEXT: frintx d18, d18 +; CHECK-NEXT: frintx d1, d1 +; CHECK-NEXT: frintx d3, d3 +; CHECK-NEXT: stp q22, q26, [x8, #192] +; CHECK-NEXT: fmov d22, x10 +; CHECK-NEXT: fcvtzs x10, d24 +; CHECK-NEXT: stp q23, q21, [x8, #160] +; CHECK-NEXT: fmov d21, x11 +; CHECK-NEXT: fmov d24, x13 +; CHECK-NEXT: frintx d2, d2 +; CHECK-NEXT: fcvtzs x13, d6 +; CHECK-NEXT: frintx d6, d16 +; CHECK-NEXT: fcvtzs x11, d18 +; CHECK-NEXT: fmov d18, x12 +; CHECK-NEXT: fcvtzs x12, d25 +; CHECK-NEXT: fmov d23, x10 +; CHECK-NEXT: fcvtzs x10, d7 +; CHECK-NEXT: fcvtzs x14, d5 +; CHECK-NEXT: frintx d0, d0 +; CHECK-NEXT: fcvtzs x15, d3 +; CHECK-NEXT: mov v24.d[1], x13 +; CHECK-NEXT: fcvtzs x13, d2 +; CHECK-NEXT: fmov d2, x9 +; CHECK-NEXT: mov v23.d[1], x11 +; CHECK-NEXT: fcvtzs x11, d4 +; CHECK-NEXT: mov v18.d[1], x12 +; CHECK-NEXT: fcvtzs x12, d20 +; CHECK-NEXT: mov v21.d[1], x10 +; CHECK-NEXT: fcvtzs x10, d1 +; CHECK-NEXT: mov v22.d[1], x14 +; CHECK-NEXT: fcvtzs x14, d6 +; CHECK-NEXT: mov v19.d[1], x15 +; CHECK-NEXT: stp q18, q23, [x8, #128] +; CHECK-NEXT: mov v17.d[1], x11 +; CHECK-NEXT: fcvtzs x11, d0 +; CHECK-NEXT: stp q24, q21, [x8, #96] +; CHECK-NEXT: fmov d0, x12 +; CHECK-NEXT: fmov d1, x10 +; CHECK-NEXT: stp q17, q22, [x8, #64] +; CHECK-NEXT: mov v0.d[1], x13 +; CHECK-NEXT: mov v1.d[1], x14 +; CHECK-NEXT: mov v2.d[1], x11 +; CHECK-NEXT: stp q0, q19, [x8, #32] +; CHECK-NEXT: stp q2, q1, [x8] +; CHECK-NEXT: ret + %a = call <32 x i64> @llvm.llrint.v32i64.v16f64(<32 x double> %x) + ret <32 x i64> %a +} +declare <32 x i64> @llvm.llrint.v32i64.v32f64(<32 x double>) diff --git a/llvm/test/CodeGen/AArch64/vector-lrint.ll b/llvm/test/CodeGen/AArch64/vector-lrint.ll index db85b23428216..a58be8dcb7455 100644 --- a/llvm/test/CodeGen/AArch64/vector-lrint.ll +++ b/llvm/test/CodeGen/AArch64/vector-lrint.ll @@ -1,19 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc < %s -mtriple=aarch64 | FileCheck %s --check-prefixes=CHECK,CHECK-SD -; RUN: llc < %s -mtriple=aarch64 -global-isel -global-isel-abort=2 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-GI - -; CHECK-GI: warning: Instruction selection used fallback path for lrint_v2f16 -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for lrint_v4f16 -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for lrint_v8f16 -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for lrint_v16i64_v16f16 -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for lrint_v32i64_v32f16 -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for lrint_v2f32 -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for lrint_v4f32 -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for lrint_v8f32 -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for lrint_v16i64_v16f32 -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for lrint_v2f64 -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for lrint_v4f64 -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for lrint_v8f64 +; RUN: llc < %s -mtriple=aarch64 -mattr=+neon | FileCheck %s define <1 x i64> @lrint_v1f16(<1 x half> %x) { ; CHECK-LABEL: lrint_v1f16: @@ -385,20 +371,13 @@ define <32 x i64> @lrint_v32i64_v32f16(<32 x half> %x) { declare <32 x i64> @llvm.lrint.v32i64.v32f16(<32 x half>) define <1 x i64> @lrint_v1f32(<1 x float> %x) { -; CHECK-SD-LABEL: lrint_v1f32: -; CHECK-SD: // %bb.0: -; CHECK-SD-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-SD-NEXT: frintx s0, s0 -; CHECK-SD-NEXT: fcvtzs x8, s0 -; CHECK-SD-NEXT: fmov d0, x8 -; CHECK-SD-NEXT: ret -; -; CHECK-GI-LABEL: lrint_v1f32: -; CHECK-GI: // %bb.0: -; CHECK-GI-NEXT: frintx s0, s0 -; CHECK-GI-NEXT: fcvtzs x8, s0 -; CHECK-GI-NEXT: fmov d0, x8 -; CHECK-GI-NEXT: ret +; CHECK-LABEL: lrint_v1f32: +; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 +; CHECK-NEXT: frintx s0, s0 +; CHECK-NEXT: fcvtzs x8, s0 +; CHECK-NEXT: fmov d0, x8 +; CHECK-NEXT: ret %a = call <1 x i64> @llvm.lrint.v1i64.v1f32(<1 x float> %x) ret <1 x i64> %a } @@ -553,6 +532,143 @@ define <16 x i64> @lrint_v16i64_v16f32(<16 x float> %x) { } declare <16 x i64> @llvm.lrint.v16i64.v16f32(<16 x float>) +define <32 x i64> @lrint_v32i64_v32f32(<32 x float> %x) { +; CHECK-LABEL: lrint_v32i64_v32f32: +; CHECK: // %bb.0: +; CHECK-NEXT: ext v16.16b, v2.16b, v2.16b, #8 +; CHECK-NEXT: ext v20.16b, v5.16b, v5.16b, #8 +; CHECK-NEXT: ext v17.16b, v3.16b, v3.16b, #8 +; CHECK-NEXT: ext v18.16b, v0.16b, v0.16b, #8 +; CHECK-NEXT: ext v19.16b, v4.16b, v4.16b, #8 +; CHECK-NEXT: ext v21.16b, v6.16b, v6.16b, #8 +; CHECK-NEXT: ext v22.16b, v7.16b, v7.16b, #8 +; CHECK-NEXT: frintx s24, s16 +; CHECK-NEXT: mov s28, v20.s[1] +; CHECK-NEXT: frintx s25, s17 +; CHECK-NEXT: frintx s26, s18 +; CHECK-NEXT: frintx s27, s19 +; CHECK-NEXT: frintx s29, s20 +; CHECK-NEXT: mov s30, v21.s[1] +; CHECK-NEXT: frintx s20, s21 +; CHECK-NEXT: frintx s21, s22 +; CHECK-NEXT: mov s23, v22.s[1] +; CHECK-NEXT: mov s19, v19.s[1] +; CHECK-NEXT: mov s17, v17.s[1] +; CHECK-NEXT: fcvtzs x12, s24 +; CHECK-NEXT: frintx s24, s28 +; CHECK-NEXT: fcvtzs x13, s25 +; CHECK-NEXT: mov s25, v7.s[1] +; CHECK-NEXT: fcvtzs x9, s26 +; CHECK-NEXT: fcvtzs x11, s27 +; CHECK-NEXT: fcvtzs x14, s20 +; CHECK-NEXT: fcvtzs x15, s21 +; CHECK-NEXT: frintx s26, s1 +; CHECK-NEXT: frintx s23, s23 +; CHECK-NEXT: frintx s27, s7 +; CHECK-NEXT: frintx s22, s30 +; CHECK-NEXT: fmov d20, x12 +; CHECK-NEXT: fcvtzs x12, s24 +; CHECK-NEXT: mov s24, v6.s[1] +; CHECK-NEXT: frintx s25, s25 +; CHECK-NEXT: frintx s6, s6 +; CHECK-NEXT: fcvtzs x10, s29 +; CHECK-NEXT: fmov d7, x11 +; CHECK-NEXT: fmov d21, x13 +; CHECK-NEXT: frintx s28, s5 +; CHECK-NEXT: fcvtzs x11, s23 +; CHECK-NEXT: fmov d23, x14 +; CHECK-NEXT: fcvtzs x14, s26 +; CHECK-NEXT: fmov d26, x15 +; CHECK-NEXT: fcvtzs x15, s27 +; CHECK-NEXT: frintx s24, s24 +; CHECK-NEXT: mov s27, v5.s[1] +; CHECK-NEXT: fcvtzs x13, s22 +; CHECK-NEXT: fcvtzs x17, s25 +; CHECK-NEXT: frintx s25, s4 +; CHECK-NEXT: fcvtzs x18, s6 +; CHECK-NEXT: fmov d6, x10 +; CHECK-NEXT: frintx s22, s2 +; CHECK-NEXT: mov v26.d[1], x11 +; CHECK-NEXT: fmov d5, x14 +; CHECK-NEXT: fcvtzs x10, s24 +; CHECK-NEXT: fmov d24, x15 +; CHECK-NEXT: fcvtzs x14, s28 +; CHECK-NEXT: frintx s27, s27 +; CHECK-NEXT: mov v23.d[1], x13 +; CHECK-NEXT: mov s4, v4.s[1] +; CHECK-NEXT: fcvtzs x13, s25 +; CHECK-NEXT: fmov d25, x18 +; CHECK-NEXT: mov s16, v16.s[1] +; CHECK-NEXT: mov v24.d[1], x17 +; CHECK-NEXT: fcvtzs x16, s22 +; CHECK-NEXT: frintx s22, s3 +; CHECK-NEXT: mov s3, v3.s[1] +; CHECK-NEXT: frintx s19, s19 +; CHECK-NEXT: mov s2, v2.s[1] +; CHECK-NEXT: mov v25.d[1], x10 +; CHECK-NEXT: fcvtzs x10, s27 +; CHECK-NEXT: frintx s4, s4 +; CHECK-NEXT: mov v6.d[1], x12 +; CHECK-NEXT: frintx s17, s17 +; CHECK-NEXT: mov s18, v18.s[1] +; CHECK-NEXT: stp q24, q26, [x8, #224] +; CHECK-NEXT: fmov d24, x14 +; CHECK-NEXT: fcvtzs x11, s22 +; CHECK-NEXT: ext v22.16b, v1.16b, v1.16b, #8 +; CHECK-NEXT: mov s1, v1.s[1] +; CHECK-NEXT: frintx s3, s3 +; CHECK-NEXT: stp q25, q23, [x8, #192] +; CHECK-NEXT: frintx s2, s2 +; CHECK-NEXT: fcvtzs x12, s4 +; CHECK-NEXT: mov v24.d[1], x10 +; CHECK-NEXT: fcvtzs x10, s19 +; CHECK-NEXT: mov s19, v0.s[1] +; CHECK-NEXT: frintx s16, s16 +; CHECK-NEXT: frintx s0, s0 +; CHECK-NEXT: fmov d4, x11 +; CHECK-NEXT: mov s27, v22.s[1] +; CHECK-NEXT: frintx s22, s22 +; CHECK-NEXT: frintx s1, s1 +; CHECK-NEXT: fcvtzs x11, s3 +; CHECK-NEXT: fcvtzs x14, s2 +; CHECK-NEXT: frintx s2, s18 +; CHECK-NEXT: stp q24, q6, [x8, #160] +; CHECK-NEXT: fmov d6, x13 +; CHECK-NEXT: fcvtzs x13, s17 +; CHECK-NEXT: frintx s17, s19 +; CHECK-NEXT: fmov d23, x16 +; CHECK-NEXT: mov v7.d[1], x10 +; CHECK-NEXT: frintx s3, s27 +; CHECK-NEXT: fcvtzs x10, s22 +; CHECK-NEXT: fcvtzs x15, s1 +; CHECK-NEXT: mov v6.d[1], x12 +; CHECK-NEXT: fcvtzs x12, s16 +; CHECK-NEXT: mov v4.d[1], x11 +; CHECK-NEXT: mov v21.d[1], x13 +; CHECK-NEXT: fcvtzs x13, s0 +; CHECK-NEXT: mov v23.d[1], x14 +; CHECK-NEXT: fcvtzs x14, s17 +; CHECK-NEXT: fcvtzs x11, s3 +; CHECK-NEXT: fmov d0, x10 +; CHECK-NEXT: mov v5.d[1], x15 +; CHECK-NEXT: stp q6, q7, [x8, #128] +; CHECK-NEXT: mov v20.d[1], x12 +; CHECK-NEXT: fcvtzs x12, s2 +; CHECK-NEXT: stp q4, q21, [x8, #96] +; CHECK-NEXT: fmov d1, x13 +; CHECK-NEXT: fmov d2, x9 +; CHECK-NEXT: mov v0.d[1], x11 +; CHECK-NEXT: stp q23, q20, [x8, #64] +; CHECK-NEXT: mov v1.d[1], x14 +; CHECK-NEXT: mov v2.d[1], x12 +; CHECK-NEXT: stp q5, q0, [x8, #32] +; CHECK-NEXT: stp q1, q2, [x8] +; CHECK-NEXT: ret + %a = call <32 x i64> @llvm.lrint.v32i64.v32f32(<32 x float> %x) + ret <32 x i64> %a +} +declare <32 x i64> @llvm.lrint.v32i64.v32f32(<32 x float>) + define <1 x i64> @lrint_v1f64(<1 x double> %x) { ; CHECK-LABEL: lrint_v1f64: ; CHECK: // %bb.0: @@ -640,3 +756,201 @@ define <8 x i64> @lrint_v8f64(<8 x double> %x) { ret <8 x i64> %a } declare <8 x i64> @llvm.lrint.v8i64.v8f64(<8 x double>) + +define <16 x i64> @lrint_v16f64(<16 x double> %x) { +; CHECK-LABEL: lrint_v16f64: +; CHECK: // %bb.0: +; CHECK-NEXT: mov d16, v0.d[1] +; CHECK-NEXT: mov d17, v1.d[1] +; CHECK-NEXT: frintx d0, d0 +; CHECK-NEXT: frintx d1, d1 +; CHECK-NEXT: frintx d18, d2 +; CHECK-NEXT: mov d2, v2.d[1] +; CHECK-NEXT: frintx d19, d3 +; CHECK-NEXT: mov d3, v3.d[1] +; CHECK-NEXT: frintx d16, d16 +; CHECK-NEXT: frintx d17, d17 +; CHECK-NEXT: fcvtzs x8, d0 +; CHECK-NEXT: frintx d0, d4 +; CHECK-NEXT: mov d4, v4.d[1] +; CHECK-NEXT: fcvtzs x9, d1 +; CHECK-NEXT: frintx d1, d5 +; CHECK-NEXT: mov d5, v5.d[1] +; CHECK-NEXT: fcvtzs x12, d18 +; CHECK-NEXT: frintx d2, d2 +; CHECK-NEXT: fcvtzs x13, d19 +; CHECK-NEXT: frintx d18, d3 +; CHECK-NEXT: fcvtzs x10, d16 +; CHECK-NEXT: mov d16, v6.d[1] +; CHECK-NEXT: fcvtzs x11, d17 +; CHECK-NEXT: mov d17, v7.d[1] +; CHECK-NEXT: frintx d6, d6 +; CHECK-NEXT: frintx d7, d7 +; CHECK-NEXT: frintx d4, d4 +; CHECK-NEXT: frintx d5, d5 +; CHECK-NEXT: fcvtzs x14, d0 +; CHECK-NEXT: fcvtzs x15, d1 +; CHECK-NEXT: fmov d0, x8 +; CHECK-NEXT: fmov d1, x9 +; CHECK-NEXT: frintx d16, d16 +; CHECK-NEXT: fcvtzs x9, d2 +; CHECK-NEXT: fmov d2, x12 +; CHECK-NEXT: frintx d17, d17 +; CHECK-NEXT: fcvtzs x8, d6 +; CHECK-NEXT: fcvtzs x12, d7 +; CHECK-NEXT: fmov d3, x13 +; CHECK-NEXT: fcvtzs x13, d18 +; CHECK-NEXT: fcvtzs x16, d4 +; CHECK-NEXT: fcvtzs x17, d5 +; CHECK-NEXT: fmov d4, x14 +; CHECK-NEXT: fmov d5, x15 +; CHECK-NEXT: fcvtzs x18, d16 +; CHECK-NEXT: mov v0.d[1], x10 +; CHECK-NEXT: mov v1.d[1], x11 +; CHECK-NEXT: fcvtzs x0, d17 +; CHECK-NEXT: fmov d6, x8 +; CHECK-NEXT: fmov d7, x12 +; CHECK-NEXT: mov v2.d[1], x9 +; CHECK-NEXT: mov v3.d[1], x13 +; CHECK-NEXT: mov v4.d[1], x16 +; CHECK-NEXT: mov v5.d[1], x17 +; CHECK-NEXT: mov v6.d[1], x18 +; CHECK-NEXT: mov v7.d[1], x0 +; CHECK-NEXT: ret + %a = call <16 x i64> @llvm.lrint.v16i64.v16f64(<16 x double> %x) + ret <16 x i64> %a +} +declare <16 x i64> @llvm.lrint.v16i64.v16f64(<16 x double>) + +define <32 x i64> @lrint_v32f64(<32 x double> %x) { +; CHECK-LABEL: lrint_v32f64: +; CHECK: // %bb.0: +; CHECK-NEXT: frintx d20, d0 +; CHECK-NEXT: frintx d22, d3 +; CHECK-NEXT: frintx d21, d4 +; CHECK-NEXT: ldp q19, q18, [sp, #64] +; CHECK-NEXT: frintx d23, d5 +; CHECK-NEXT: ldp q27, q26, [sp, #96] +; CHECK-NEXT: mov d4, v4.d[1] +; CHECK-NEXT: ldp q16, q17, [sp, #32] +; CHECK-NEXT: mov d5, v5.d[1] +; CHECK-NEXT: fcvtzs x9, d20 +; CHECK-NEXT: frintx d20, d6 +; CHECK-NEXT: fcvtzs x11, d22 +; CHECK-NEXT: frintx d22, d19 +; CHECK-NEXT: fcvtzs x12, d21 +; CHECK-NEXT: fcvtzs x10, d23 +; CHECK-NEXT: mov d21, v26.d[1] +; CHECK-NEXT: frintx d23, d27 +; CHECK-NEXT: mov d27, v27.d[1] +; CHECK-NEXT: frintx d24, d16 +; CHECK-NEXT: mov d19, v19.d[1] +; CHECK-NEXT: frintx d25, d17 +; CHECK-NEXT: fcvtzs x13, d20 +; CHECK-NEXT: mov d20, v18.d[1] +; CHECK-NEXT: frintx d18, d18 +; CHECK-NEXT: fcvtzs x16, d22 +; CHECK-NEXT: frintx d22, d26 +; CHECK-NEXT: mov d16, v16.d[1] +; CHECK-NEXT: frintx d21, d21 +; CHECK-NEXT: fcvtzs x17, d23 +; CHECK-NEXT: frintx d23, d27 +; CHECK-NEXT: fcvtzs x14, d24 +; CHECK-NEXT: frintx d26, d19 +; CHECK-NEXT: fmov d19, x11 +; CHECK-NEXT: frintx d20, d20 +; CHECK-NEXT: mov d27, v17.d[1] +; CHECK-NEXT: fcvtzs x15, d25 +; CHECK-NEXT: ldp q25, q24, [sp] +; CHECK-NEXT: fcvtzs x11, d22 +; CHECK-NEXT: fmov d17, x12 +; CHECK-NEXT: fcvtzs x12, d21 +; CHECK-NEXT: fcvtzs x0, d23 +; CHECK-NEXT: fmov d23, x14 +; CHECK-NEXT: fcvtzs x14, d18 +; CHECK-NEXT: fmov d18, x17 +; CHECK-NEXT: fcvtzs x17, d20 +; CHECK-NEXT: frintx d21, d7 +; CHECK-NEXT: fcvtzs x18, d26 +; CHECK-NEXT: fmov d20, x11 +; CHECK-NEXT: frintx d22, d25 +; CHECK-NEXT: frintx d26, d27 +; CHECK-NEXT: frintx d16, d16 +; CHECK-NEXT: mov v18.d[1], x0 +; CHECK-NEXT: mov d25, v25.d[1] +; CHECK-NEXT: mov d7, v7.d[1] +; CHECK-NEXT: mov d6, v6.d[1] +; CHECK-NEXT: mov d0, v0.d[1] +; CHECK-NEXT: mov v20.d[1], x12 +; CHECK-NEXT: fcvtzs x11, d21 +; CHECK-NEXT: fmov d21, x15 +; CHECK-NEXT: fcvtzs x12, d22 +; CHECK-NEXT: fmov d22, x16 +; CHECK-NEXT: fcvtzs x15, d26 +; CHECK-NEXT: fmov d26, x14 +; CHECK-NEXT: fcvtzs x14, d16 +; CHECK-NEXT: frintx d25, d25 +; CHECK-NEXT: frintx d7, d7 +; CHECK-NEXT: mov d16, v1.d[1] +; CHECK-NEXT: mov d3, v3.d[1] +; CHECK-NEXT: stp q18, q20, [x8, #224] +; CHECK-NEXT: mov d18, v24.d[1] +; CHECK-NEXT: mov v22.d[1], x18 +; CHECK-NEXT: mov v26.d[1], x17 +; CHECK-NEXT: frintx d24, d24 +; CHECK-NEXT: mov v21.d[1], x15 +; CHECK-NEXT: mov v23.d[1], x14 +; CHECK-NEXT: frintx d20, d2 +; CHECK-NEXT: mov d2, v2.d[1] +; CHECK-NEXT: frintx d6, d6 +; CHECK-NEXT: frintx d5, d5 +; CHECK-NEXT: frintx d4, d4 +; CHECK-NEXT: frintx d18, d18 +; CHECK-NEXT: frintx d1, d1 +; CHECK-NEXT: frintx d3, d3 +; CHECK-NEXT: stp q22, q26, [x8, #192] +; CHECK-NEXT: fmov d22, x10 +; CHECK-NEXT: fcvtzs x10, d24 +; CHECK-NEXT: stp q23, q21, [x8, #160] +; CHECK-NEXT: fmov d21, x11 +; CHECK-NEXT: fmov d24, x13 +; CHECK-NEXT: frintx d2, d2 +; CHECK-NEXT: fcvtzs x13, d6 +; CHECK-NEXT: frintx d6, d16 +; CHECK-NEXT: fcvtzs x11, d18 +; CHECK-NEXT: fmov d18, x12 +; CHECK-NEXT: fcvtzs x12, d25 +; CHECK-NEXT: fmov d23, x10 +; CHECK-NEXT: fcvtzs x10, d7 +; CHECK-NEXT: fcvtzs x14, d5 +; CHECK-NEXT: frintx d0, d0 +; CHECK-NEXT: fcvtzs x15, d3 +; CHECK-NEXT: mov v24.d[1], x13 +; CHECK-NEXT: fcvtzs x13, d2 +; CHECK-NEXT: fmov d2, x9 +; CHECK-NEXT: mov v23.d[1], x11 +; CHECK-NEXT: fcvtzs x11, d4 +; CHECK-NEXT: mov v18.d[1], x12 +; CHECK-NEXT: fcvtzs x12, d20 +; CHECK-NEXT: mov v21.d[1], x10 +; CHECK-NEXT: fcvtzs x10, d1 +; CHECK-NEXT: mov v22.d[1], x14 +; CHECK-NEXT: fcvtzs x14, d6 +; CHECK-NEXT: mov v19.d[1], x15 +; CHECK-NEXT: stp q18, q23, [x8, #128] +; CHECK-NEXT: mov v17.d[1], x11 +; CHECK-NEXT: fcvtzs x11, d0 +; CHECK-NEXT: stp q24, q21, [x8, #96] +; CHECK-NEXT: fmov d0, x12 +; CHECK-NEXT: fmov d1, x10 +; CHECK-NEXT: stp q17, q22, [x8, #64] +; CHECK-NEXT: mov v0.d[1], x13 +; CHECK-NEXT: mov v1.d[1], x14 +; CHECK-NEXT: mov v2.d[1], x11 +; CHECK-NEXT: stp q0, q19, [x8, #32] +; CHECK-NEXT: stp q2, q1, [x8] +; CHECK-NEXT: ret + %a = call <32 x i64> @llvm.lrint.v32i64.v16f64(<32 x double> %x) + ret <32 x i64> %a +} +declare <32 x i64> @llvm.lrint.v32i64.v32f64(<32 x double>) From 106f35521dc27d4ba9e2780339157d129fdee996 Mon Sep 17 00:00:00 2001 From: Ramkumar Ramachandra Date: Wed, 8 May 2024 18:28:47 +0100 Subject: [PATCH 2/6] Use ISD::FP_TO_SINT --- .../Target/AArch64/AArch64ISelLowering.cpp | 22 +------------------ 1 file changed, 1 insertion(+), 21 deletions(-) diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp index 41372b5432a0e..3d3d45840f4e0 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -4374,11 +4374,6 @@ SDValue AArch64TargetLowering::LowerVectorXRINT(SDValue Op, assert(VT.isVector() && "Expected vector type"); - // We can't custom-lower ISD::[L]LRINT without SVE, since it requires - // AArch64ISD::FCVTZS_MERGE_PASSTHRU. - if (!Subtarget->isSVEAvailable()) - return SDValue(); - EVT ContainerVT = VT; EVT SrcVT = Src.getValueType(); EVT CastVT = @@ -4394,24 +4389,9 @@ SDValue AArch64TargetLowering::LowerVectorXRINT(SDValue Op, // the current rounding mode. SDValue FOp = DAG.getNode(ISD::FRINT, DL, CastVT, Src); - // In the case of vector filled with f32, ftrunc will convert it to an i32, - // but a vector filled with i32 isn't legal. So, FP_EXTEND the f32 into the - // required size. - size_t SrcSz = SrcVT.getScalarSizeInBits(); - size_t ContainerSz = ContainerVT.getScalarSizeInBits(); - if (ContainerSz > SrcSz) { - EVT SizedVT = MVT::getVectorVT(MVT::getFloatingPointVT(ContainerSz), - ContainerVT.getVectorElementCount()); - FOp = DAG.getNode(ISD::FP_EXTEND, DL, SizedVT, FOp.getOperand(0)); - } - // Finally, truncate the rounded floating point to an integer, rounding to // zero. - SDValue Pred = getPredicateForVector(DAG, DL, ContainerVT); - SDValue Undef = DAG.getUNDEF(ContainerVT); - SDValue Truncated = - DAG.getNode(AArch64ISD::FCVTZS_MERGE_PASSTHRU, DL, ContainerVT, - {Pred, FOp.getOperand(0), Undef}, FOp->getFlags()); + SDValue Truncated = DAG.getNode(ISD::FP_TO_SINT, DL, ContainerVT, FOp.getOperand(0)); if (VT.isScalableVector()) return Truncated; From ab50733b0216f6a433a0d3a1db0cc30cc8201efc Mon Sep 17 00:00:00 2001 From: Ramkumar Ramachandra Date: Wed, 8 May 2024 18:34:23 +0100 Subject: [PATCH 3/6] clang-format --- llvm/lib/Target/AArch64/AArch64ISelLowering.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp index 3d3d45840f4e0..8e530846881e6 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -4391,7 +4391,8 @@ SDValue AArch64TargetLowering::LowerVectorXRINT(SDValue Op, // Finally, truncate the rounded floating point to an integer, rounding to // zero. - SDValue Truncated = DAG.getNode(ISD::FP_TO_SINT, DL, ContainerVT, FOp.getOperand(0)); + SDValue Truncated = + DAG.getNode(ISD::FP_TO_SINT, DL, ContainerVT, FOp.getOperand(0)); if (VT.isScalableVector()) return Truncated; From 9a92adf8fc3e64ff5bca9525e300b4ca85c544af Mon Sep 17 00:00:00 2001 From: Ramkumar Ramachandra Date: Thu, 9 May 2024 12:40:31 +0100 Subject: [PATCH 4/6] ISel/AArch64: fix all issues --- .../Target/AArch64/AArch64ISelLowering.cpp | 46 +- .../AArch64/sve-fixed-vector-llrint.ll | 1077 +++++--------- .../CodeGen/AArch64/sve-fixed-vector-lrint.ll | 1077 +++++--------- llvm/test/CodeGen/AArch64/sve-llrint.ll | 434 +++--- llvm/test/CodeGen/AArch64/sve-lrint.ll | 434 +++--- llvm/test/CodeGen/AArch64/vector-llrint.ll | 1211 +++++++--------- llvm/test/CodeGen/AArch64/vector-lrint.ll | 1243 +++++++---------- 7 files changed, 2267 insertions(+), 3255 deletions(-) diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp index 8e530846881e6..dcfe07dc330d6 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -1304,6 +1304,12 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM, setOperationAction(Op, Ty, Legal); } + // LRINT and LLRINT. + for (auto VT : MVT::fp_fixedlen_vector_valuetypes()) { + setOperationAction(ISD::LRINT, VT, Custom); + setOperationAction(ISD::LLRINT, VT, Custom); + } + setTruncStoreAction(MVT::v4i16, MVT::v4i8, Custom); setOperationAction(ISD::BITCAST, MVT::i2, Custom); @@ -1419,6 +1425,12 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM, setOperationAction(ISD::OR, VT, Custom); } + // LRINT and LLRINT. + for (auto VT : MVT::fp_scalable_vector_valuetypes()) { + setOperationAction(ISD::LRINT, VT, Custom); + setOperationAction(ISD::LLRINT, VT, Custom); + } + // Illegal unpacked integer vector types. for (auto VT : {MVT::nxv8i8, MVT::nxv4i16, MVT::nxv2i32}) { setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Custom); @@ -1526,8 +1538,6 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM, setOperationAction(ISD::FNEARBYINT, VT, Custom); setOperationAction(ISD::FRINT, VT, Custom); setOperationAction(ISD::FROUND, VT, Custom); - setOperationAction(ISD::LRINT, VT, Custom); - setOperationAction(ISD::LLRINT, VT, Custom); setOperationAction(ISD::FROUNDEVEN, VT, Custom); setOperationAction(ISD::FTRUNC, VT, Custom); setOperationAction(ISD::FSQRT, VT, Custom); @@ -1667,6 +1677,11 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM, setOperationAction(ISD::MULHU, VT, Custom); } + // LRINT and LLRINT. + for (auto VT : MVT::fp_fixedlen_vector_valuetypes()) { + setOperationAction(ISD::LRINT, VT, Custom); + setOperationAction(ISD::LLRINT, VT, Custom); + } // Use SVE for vectors with more than 2 elements. for (auto VT : {MVT::v4f16, MVT::v8f16, MVT::v4f32}) @@ -1942,8 +1957,6 @@ void AArch64TargetLowering::addTypeForFixedLengthSVE(MVT VT) { setOperationAction(ISD::FP_TO_UINT, VT, Default); setOperationAction(ISD::FRINT, VT, Default); setOperationAction(ISD::FROUND, VT, Default); - setOperationAction(ISD::LRINT, VT, Default); - setOperationAction(ISD::LLRINT, VT, Default); setOperationAction(ISD::FROUNDEVEN, VT, Default); setOperationAction(ISD::FSQRT, VT, Default); setOperationAction(ISD::FSUB, VT, Default); @@ -4374,30 +4387,15 @@ SDValue AArch64TargetLowering::LowerVectorXRINT(SDValue Op, assert(VT.isVector() && "Expected vector type"); - EVT ContainerVT = VT; - EVT SrcVT = Src.getValueType(); EVT CastVT = - ContainerVT.changeVectorElementType(SrcVT.getVectorElementType()); + VT.changeVectorElementType(Src.getValueType().getVectorElementType()); - if (VT.isFixedLengthVector()) { - ContainerVT = getContainerForFixedLengthVector(DAG, VT); - CastVT = ContainerVT.changeVectorElementType(SrcVT.getVectorElementType()); - Src = convertToScalableVector(DAG, CastVT, Src); - } - - // First, round the floating-point value into a floating-point register with - // the current rounding mode. + // Round the floating-point value into a floating-point register with the + // current rounding mode. SDValue FOp = DAG.getNode(ISD::FRINT, DL, CastVT, Src); - // Finally, truncate the rounded floating point to an integer, rounding to - // zero. - SDValue Truncated = - DAG.getNode(ISD::FP_TO_SINT, DL, ContainerVT, FOp.getOperand(0)); - - if (VT.isScalableVector()) - return Truncated; - - return convertFromScalableVector(DAG, VT, Truncated); + // Truncate the rounded floating point to an integer, rounding to zero. + return DAG.getNode(ISD::FP_TO_SINT, DL, VT, FOp); } SDValue AArch64TargetLowering::LowerVectorINT_TO_FP(SDValue Op, diff --git a/llvm/test/CodeGen/AArch64/sve-fixed-vector-llrint.ll b/llvm/test/CodeGen/AArch64/sve-fixed-vector-llrint.ll index febfa785eaeff..89ef30e38849f 100644 --- a/llvm/test/CodeGen/AArch64/sve-fixed-vector-llrint.ll +++ b/llvm/test/CodeGen/AArch64/sve-fixed-vector-llrint.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc < %s -mtriple=aarch64 -mattr=+sve | FileCheck %s +; RUN: llc < %s -mtriple=aarch64 -mattr=+sve -aarch64-sve-vector-bits-min=256 | FileCheck %s define <1 x i64> @llrint_v1i64_v1f16(<1 x half> %x) { ; CHECK-LABEL: llrint_v1i64_v1f16: @@ -16,14 +16,12 @@ declare <1 x i64> @llvm.llrint.v1i64.v1f16(<1 x half>) define <2 x i64> @llrint_v1i64_v2f16(<2 x half> %x) { ; CHECK-LABEL: llrint_v1i64_v2f16: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-NEXT: mov h1, v0.h[1] -; CHECK-NEXT: frintx h0, h0 -; CHECK-NEXT: frintx h1, h1 -; CHECK-NEXT: fcvtzs x8, h0 -; CHECK-NEXT: fcvtzs x9, h1 -; CHECK-NEXT: fmov d0, x8 -; CHECK-NEXT: mov v0.d[1], x9 +; CHECK-NEXT: frintx v0.4h, v0.4h +; CHECK-NEXT: ptrue p0.d, vl4 +; CHECK-NEXT: uunpklo z0.s, z0.h +; CHECK-NEXT: uunpklo z0.d, z0.s +; CHECK-NEXT: fcvtzs z0.d, p0/m, z0.h +; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: ret %a = call <2 x i64> @llvm.llrint.v2i64.v2f16(<2 x half> %x) ret <2 x i64> %a @@ -33,22 +31,15 @@ declare <2 x i64> @llvm.llrint.v2i64.v2f16(<2 x half>) define <4 x i64> @llrint_v4i64_v4f16(<4 x half> %x) { ; CHECK-LABEL: llrint_v4i64_v4f16: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-NEXT: mov h1, v0.h[2] -; CHECK-NEXT: mov h2, v0.h[1] -; CHECK-NEXT: mov h3, v0.h[3] -; CHECK-NEXT: frintx h0, h0 -; CHECK-NEXT: frintx h1, h1 -; CHECK-NEXT: frintx h2, h2 -; CHECK-NEXT: frintx h3, h3 -; CHECK-NEXT: fcvtzs x8, h0 -; CHECK-NEXT: fcvtzs x9, h1 -; CHECK-NEXT: fcvtzs x10, h2 -; CHECK-NEXT: fcvtzs x11, h3 -; CHECK-NEXT: fmov d0, x8 -; CHECK-NEXT: fmov d1, x9 -; CHECK-NEXT: mov v0.d[1], x10 -; CHECK-NEXT: mov v1.d[1], x11 +; CHECK-NEXT: frintx v0.4h, v0.4h +; CHECK-NEXT: ptrue p0.d, vl4 +; CHECK-NEXT: uunpklo z0.s, z0.h +; CHECK-NEXT: uunpklo z0.d, z0.s +; CHECK-NEXT: fcvtzs z0.d, p0/m, z0.h +; CHECK-NEXT: mov z1.d, z0.d +; CHECK-NEXT: ext z1.b, z1.b, z0.b, #16 +; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 +; CHECK-NEXT: // kill: def $q1 killed $q1 killed $z1 ; CHECK-NEXT: ret %a = call <4 x i64> @llvm.llrint.v4i64.v4f16(<4 x half> %x) ret <4 x i64> %a @@ -59,36 +50,24 @@ define <8 x i64> @llrint_v8i64_v8f16(<8 x half> %x) { ; CHECK-LABEL: llrint_v8i64_v8f16: ; CHECK: // %bb.0: ; CHECK-NEXT: ext v1.16b, v0.16b, v0.16b, #8 -; CHECK-NEXT: mov h4, v0.h[2] -; CHECK-NEXT: mov h3, v0.h[1] -; CHECK-NEXT: mov h7, v0.h[3] -; CHECK-NEXT: frintx h0, h0 -; CHECK-NEXT: mov h2, v1.h[2] -; CHECK-NEXT: mov h5, v1.h[1] -; CHECK-NEXT: mov h6, v1.h[3] -; CHECK-NEXT: frintx h1, h1 -; CHECK-NEXT: frintx h4, h4 -; CHECK-NEXT: frintx h3, h3 -; CHECK-NEXT: frintx h7, h7 -; CHECK-NEXT: fcvtzs x9, h0 -; CHECK-NEXT: frintx h2, h2 -; CHECK-NEXT: frintx h5, h5 -; CHECK-NEXT: frintx h6, h6 -; CHECK-NEXT: fcvtzs x8, h1 -; CHECK-NEXT: fcvtzs x12, h4 -; CHECK-NEXT: fcvtzs x11, h3 -; CHECK-NEXT: fcvtzs x15, h7 -; CHECK-NEXT: fmov d0, x9 -; CHECK-NEXT: fcvtzs x10, h2 -; CHECK-NEXT: fcvtzs x13, h5 -; CHECK-NEXT: fcvtzs x14, h6 -; CHECK-NEXT: fmov d2, x8 -; CHECK-NEXT: fmov d1, x12 -; CHECK-NEXT: mov v0.d[1], x11 -; CHECK-NEXT: fmov d3, x10 -; CHECK-NEXT: mov v2.d[1], x13 -; CHECK-NEXT: mov v1.d[1], x15 -; CHECK-NEXT: mov v3.d[1], x14 +; CHECK-NEXT: frintx v0.4h, v0.4h +; CHECK-NEXT: ptrue p0.d, vl4 +; CHECK-NEXT: frintx v1.4h, v1.4h +; CHECK-NEXT: uunpklo z0.s, z0.h +; CHECK-NEXT: uunpklo z1.s, z1.h +; CHECK-NEXT: uunpklo z0.d, z0.s +; CHECK-NEXT: uunpklo z1.d, z1.s +; CHECK-NEXT: fcvtzs z0.d, p0/m, z0.h +; CHECK-NEXT: movprfx z2, z1 +; CHECK-NEXT: fcvtzs z2.d, p0/m, z1.h +; CHECK-NEXT: mov z1.d, z0.d +; CHECK-NEXT: ext z1.b, z1.b, z0.b, #16 +; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 +; CHECK-NEXT: mov z3.d, z2.d +; CHECK-NEXT: // kill: def $q1 killed $q1 killed $z1 +; CHECK-NEXT: ext z3.b, z3.b, z2.b, #16 +; CHECK-NEXT: // kill: def $q2 killed $q2 killed $z2 +; CHECK-NEXT: // kill: def $q3 killed $q3 killed $z3 ; CHECK-NEXT: ret %a = call <8 x i64> @llvm.llrint.v8i64.v8f16(<8 x half> %x) ret <8 x i64> %a @@ -100,66 +79,41 @@ define <16 x i64> @llrint_v16i64_v16f16(<16 x half> %x) { ; CHECK: // %bb.0: ; CHECK-NEXT: ext v2.16b, v0.16b, v0.16b, #8 ; CHECK-NEXT: ext v3.16b, v1.16b, v1.16b, #8 -; CHECK-NEXT: mov h4, v0.h[1] -; CHECK-NEXT: frintx h5, h0 -; CHECK-NEXT: mov h18, v0.h[2] -; CHECK-NEXT: mov h0, v0.h[3] -; CHECK-NEXT: frintx h6, h2 -; CHECK-NEXT: mov h7, v2.h[1] -; CHECK-NEXT: mov h16, v2.h[2] -; CHECK-NEXT: mov h17, v3.h[2] -; CHECK-NEXT: frintx h19, h3 -; CHECK-NEXT: frintx h4, h4 -; CHECK-NEXT: fcvtzs x8, h5 -; CHECK-NEXT: mov h5, v1.h[1] -; CHECK-NEXT: mov h2, v2.h[3] -; CHECK-NEXT: frintx h18, h18 -; CHECK-NEXT: frintx h0, h0 -; CHECK-NEXT: fcvtzs x9, h6 -; CHECK-NEXT: frintx h6, h7 -; CHECK-NEXT: frintx h7, h16 -; CHECK-NEXT: mov h16, v1.h[2] -; CHECK-NEXT: frintx h17, h17 -; CHECK-NEXT: fcvtzs x10, h19 -; CHECK-NEXT: mov h19, v3.h[1] -; CHECK-NEXT: fcvtzs x11, h4 -; CHECK-NEXT: mov h4, v1.h[3] -; CHECK-NEXT: mov h3, v3.h[3] -; CHECK-NEXT: frintx h1, h1 -; CHECK-NEXT: frintx h5, h5 -; CHECK-NEXT: fcvtzs x13, h7 -; CHECK-NEXT: fcvtzs x12, h6 -; CHECK-NEXT: fcvtzs x15, h18 -; CHECK-NEXT: frintx h7, h16 -; CHECK-NEXT: fcvtzs x14, h17 -; CHECK-NEXT: frintx h16, h2 -; CHECK-NEXT: frintx h17, h19 -; CHECK-NEXT: frintx h4, h4 -; CHECK-NEXT: fmov d2, x9 -; CHECK-NEXT: frintx h19, h3 -; CHECK-NEXT: fcvtzs x9, h1 -; CHECK-NEXT: fmov d6, x10 -; CHECK-NEXT: fmov d3, x13 -; CHECK-NEXT: fcvtzs x13, h0 -; CHECK-NEXT: fcvtzs x16, h5 -; CHECK-NEXT: fcvtzs x10, h7 -; CHECK-NEXT: fmov d7, x14 -; CHECK-NEXT: fcvtzs x14, h16 -; CHECK-NEXT: fcvtzs x17, h17 -; CHECK-NEXT: fcvtzs x0, h4 -; CHECK-NEXT: fmov d0, x8 -; CHECK-NEXT: fcvtzs x18, h19 -; CHECK-NEXT: fmov d1, x15 -; CHECK-NEXT: fmov d4, x9 -; CHECK-NEXT: mov v2.d[1], x12 -; CHECK-NEXT: fmov d5, x10 -; CHECK-NEXT: mov v0.d[1], x11 -; CHECK-NEXT: mov v3.d[1], x14 -; CHECK-NEXT: mov v1.d[1], x13 -; CHECK-NEXT: mov v4.d[1], x16 -; CHECK-NEXT: mov v6.d[1], x17 -; CHECK-NEXT: mov v7.d[1], x18 -; CHECK-NEXT: mov v5.d[1], x0 +; CHECK-NEXT: frintx v0.4h, v0.4h +; CHECK-NEXT: frintx v1.4h, v1.4h +; CHECK-NEXT: ptrue p0.d, vl4 +; CHECK-NEXT: frintx v2.4h, v2.4h +; CHECK-NEXT: frintx v3.4h, v3.4h +; CHECK-NEXT: uunpklo z0.s, z0.h +; CHECK-NEXT: uunpklo z1.s, z1.h +; CHECK-NEXT: uunpklo z2.s, z2.h +; CHECK-NEXT: uunpklo z3.s, z3.h +; CHECK-NEXT: uunpklo z0.d, z0.s +; CHECK-NEXT: uunpklo z1.d, z1.s +; CHECK-NEXT: uunpklo z2.d, z2.s +; CHECK-NEXT: uunpklo z3.d, z3.s +; CHECK-NEXT: fcvtzs z0.d, p0/m, z0.h +; CHECK-NEXT: movprfx z4, z1 +; CHECK-NEXT: fcvtzs z4.d, p0/m, z1.h +; CHECK-NEXT: fcvtzs z2.d, p0/m, z2.h +; CHECK-NEXT: movprfx z6, z3 +; CHECK-NEXT: fcvtzs z6.d, p0/m, z3.h +; CHECK-NEXT: mov z1.d, z0.d +; CHECK-NEXT: mov z5.d, z4.d +; CHECK-NEXT: ext z1.b, z1.b, z0.b, #16 +; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 +; CHECK-NEXT: mov z3.d, z2.d +; CHECK-NEXT: mov z7.d, z6.d +; CHECK-NEXT: ext z5.b, z5.b, z4.b, #16 +; CHECK-NEXT: // kill: def $q4 killed $q4 killed $z4 +; CHECK-NEXT: // kill: def $q1 killed $q1 killed $z1 +; CHECK-NEXT: // kill: def $q5 killed $q5 killed $z5 +; CHECK-NEXT: ext z3.b, z3.b, z2.b, #16 +; CHECK-NEXT: ext z7.b, z7.b, z6.b, #16 +; CHECK-NEXT: // kill: def $q2 killed $q2 killed $z2 +; CHECK-NEXT: // kill: def $q6 killed $q6 killed $z6 +; CHECK-NEXT: // kill: def $q3 killed $q3 killed $z3 +; CHECK-NEXT: // kill: def $q7 killed $q7 killed $z7 ; CHECK-NEXT: ret %a = call <16 x i64> @llvm.llrint.v16i64.v16f16(<16 x half> %x) ret <16 x i64> %a @@ -169,138 +123,61 @@ declare <16 x i64> @llvm.llrint.v16i64.v16f16(<16 x half>) define <32 x i64> @llrint_v32i64_v32f16(<32 x half> %x) { ; CHECK-LABEL: llrint_v32i64_v32f16: ; CHECK: // %bb.0: -; CHECK-NEXT: ext v4.16b, v1.16b, v1.16b, #8 +; CHECK-NEXT: ext v4.16b, v3.16b, v3.16b, #8 ; CHECK-NEXT: ext v5.16b, v2.16b, v2.16b, #8 -; CHECK-NEXT: ext v6.16b, v3.16b, v3.16b, #8 +; CHECK-NEXT: mov x9, #24 // =0x18 +; CHECK-NEXT: frintx v3.4h, v3.4h +; CHECK-NEXT: ext v6.16b, v1.16b, v1.16b, #8 +; CHECK-NEXT: frintx v2.4h, v2.4h ; CHECK-NEXT: ext v7.16b, v0.16b, v0.16b, #8 -; CHECK-NEXT: frintx h21, h1 -; CHECK-NEXT: frintx h22, h2 -; CHECK-NEXT: mov h26, v2.h[2] -; CHECK-NEXT: frintx h19, h0 -; CHECK-NEXT: mov h27, v3.h[2] -; CHECK-NEXT: mov h20, v2.h[1] -; CHECK-NEXT: mov h18, v1.h[1] -; CHECK-NEXT: mov h16, v4.h[2] -; CHECK-NEXT: mov h17, v5.h[2] -; CHECK-NEXT: frintx h23, h5 -; CHECK-NEXT: frintx h24, h6 -; CHECK-NEXT: mov h25, v6.h[2] -; CHECK-NEXT: fcvtzs x9, h21 -; CHECK-NEXT: fcvtzs x11, h22 -; CHECK-NEXT: frintx h22, h7 -; CHECK-NEXT: mov h21, v3.h[3] -; CHECK-NEXT: fcvtzs x10, h19 -; CHECK-NEXT: frintx h27, h27 -; CHECK-NEXT: frintx h20, h20 -; CHECK-NEXT: frintx h16, h16 -; CHECK-NEXT: frintx h17, h17 -; CHECK-NEXT: fcvtzs x12, h23 -; CHECK-NEXT: fcvtzs x13, h24 -; CHECK-NEXT: frintx h23, h25 -; CHECK-NEXT: frintx h25, h26 -; CHECK-NEXT: mov h26, v3.h[1] -; CHECK-NEXT: mov h24, v2.h[3] -; CHECK-NEXT: fmov d19, x9 -; CHECK-NEXT: fcvtzs x9, h22 -; CHECK-NEXT: frintx h22, h3 -; CHECK-NEXT: frintx h21, h21 -; CHECK-NEXT: fcvtzs x14, h16 -; CHECK-NEXT: fcvtzs x15, h17 -; CHECK-NEXT: fmov d2, x12 -; CHECK-NEXT: fmov d16, x13 -; CHECK-NEXT: fcvtzs x12, h23 -; CHECK-NEXT: fcvtzs x13, h25 -; CHECK-NEXT: mov h23, v1.h[2] -; CHECK-NEXT: frintx h25, h26 -; CHECK-NEXT: frintx h24, h24 -; CHECK-NEXT: mov h1, v1.h[3] -; CHECK-NEXT: fmov d26, x11 -; CHECK-NEXT: fcvtzs x11, h21 -; CHECK-NEXT: fmov d3, x14 -; CHECK-NEXT: fmov d17, x15 -; CHECK-NEXT: fcvtzs x14, h22 -; CHECK-NEXT: fcvtzs x15, h27 -; CHECK-NEXT: mov h22, v0.h[2] -; CHECK-NEXT: frintx h18, h18 -; CHECK-NEXT: frintx h21, h23 -; CHECK-NEXT: fmov d23, x13 -; CHECK-NEXT: fcvtzs x13, h25 -; CHECK-NEXT: frintx h1, h1 -; CHECK-NEXT: fmov d25, x14 -; CHECK-NEXT: fcvtzs x14, h24 -; CHECK-NEXT: fmov d24, x15 -; CHECK-NEXT: frintx h22, h22 -; CHECK-NEXT: fcvtzs x15, h18 -; CHECK-NEXT: mov h18, v7.h[1] -; CHECK-NEXT: mov v25.d[1], x13 -; CHECK-NEXT: fcvtzs x13, h21 -; CHECK-NEXT: mov h21, v7.h[2] -; CHECK-NEXT: mov v24.d[1], x11 -; CHECK-NEXT: fcvtzs x11, h20 -; CHECK-NEXT: mov h20, v0.h[1] -; CHECK-NEXT: mov h0, v0.h[3] -; CHECK-NEXT: mov v23.d[1], x14 -; CHECK-NEXT: fcvtzs x14, h1 -; CHECK-NEXT: mov h1, v6.h[3] -; CHECK-NEXT: mov h6, v6.h[1] -; CHECK-NEXT: mov v19.d[1], x15 -; CHECK-NEXT: mov h7, v7.h[3] -; CHECK-NEXT: stp q25, q24, [x8, #192] -; CHECK-NEXT: fmov d24, x13 -; CHECK-NEXT: frintx h20, h20 -; CHECK-NEXT: mov v26.d[1], x11 -; CHECK-NEXT: fcvtzs x11, h22 -; CHECK-NEXT: mov h22, v5.h[1] -; CHECK-NEXT: mov h5, v5.h[3] -; CHECK-NEXT: frintx h0, h0 -; CHECK-NEXT: frintx h1, h1 -; CHECK-NEXT: mov v24.d[1], x14 -; CHECK-NEXT: mov h25, v4.h[3] -; CHECK-NEXT: frintx h6, h6 -; CHECK-NEXT: stp q26, q23, [x8, #128] -; CHECK-NEXT: fmov d23, x12 -; CHECK-NEXT: fcvtzs x12, h20 -; CHECK-NEXT: mov h20, v4.h[1] -; CHECK-NEXT: frintx h5, h5 -; CHECK-NEXT: fcvtzs x13, h0 -; CHECK-NEXT: stp q19, q24, [x8, #64] -; CHECK-NEXT: frintx h22, h22 -; CHECK-NEXT: fmov d0, x10 -; CHECK-NEXT: fmov d19, x11 -; CHECK-NEXT: frintx h4, h4 -; CHECK-NEXT: fcvtzs x10, h1 -; CHECK-NEXT: frintx h1, h21 -; CHECK-NEXT: frintx h24, h25 -; CHECK-NEXT: fcvtzs x11, h6 -; CHECK-NEXT: frintx h20, h20 -; CHECK-NEXT: frintx h6, h7 -; CHECK-NEXT: fcvtzs x14, h5 -; CHECK-NEXT: mov v19.d[1], x13 -; CHECK-NEXT: frintx h5, h18 -; CHECK-NEXT: fcvtzs x13, h22 -; CHECK-NEXT: mov v0.d[1], x12 -; CHECK-NEXT: fcvtzs x12, h4 -; CHECK-NEXT: mov v23.d[1], x10 -; CHECK-NEXT: fcvtzs x10, h1 -; CHECK-NEXT: fcvtzs x15, h24 -; CHECK-NEXT: mov v16.d[1], x11 -; CHECK-NEXT: fcvtzs x11, h20 -; CHECK-NEXT: mov v17.d[1], x14 -; CHECK-NEXT: fcvtzs x14, h6 -; CHECK-NEXT: mov v2.d[1], x13 -; CHECK-NEXT: fcvtzs x13, h5 -; CHECK-NEXT: fmov d4, x9 -; CHECK-NEXT: stp q0, q19, [x8] -; CHECK-NEXT: fmov d0, x12 -; CHECK-NEXT: stp q16, q23, [x8, #224] -; CHECK-NEXT: fmov d1, x10 -; CHECK-NEXT: mov v3.d[1], x15 -; CHECK-NEXT: stp q2, q17, [x8, #160] -; CHECK-NEXT: mov v0.d[1], x11 -; CHECK-NEXT: mov v4.d[1], x13 -; CHECK-NEXT: mov v1.d[1], x14 -; CHECK-NEXT: stp q0, q3, [x8, #96] -; CHECK-NEXT: stp q4, q1, [x8, #32] +; CHECK-NEXT: frintx v1.4h, v1.4h +; CHECK-NEXT: frintx v0.4h, v0.4h +; CHECK-NEXT: ptrue p0.d, vl4 +; CHECK-NEXT: frintx v4.4h, v4.4h +; CHECK-NEXT: frintx v5.4h, v5.4h +; CHECK-NEXT: uunpklo z3.s, z3.h +; CHECK-NEXT: frintx v6.4h, v6.4h +; CHECK-NEXT: uunpklo z2.s, z2.h +; CHECK-NEXT: frintx v7.4h, v7.4h +; CHECK-NEXT: uunpklo z1.s, z1.h +; CHECK-NEXT: uunpklo z0.s, z0.h +; CHECK-NEXT: uunpklo z4.s, z4.h +; CHECK-NEXT: uunpklo z5.s, z5.h +; CHECK-NEXT: uunpklo z3.d, z3.s +; CHECK-NEXT: uunpklo z6.s, z6.h +; CHECK-NEXT: uunpklo z2.d, z2.s +; CHECK-NEXT: uunpklo z7.s, z7.h +; CHECK-NEXT: uunpklo z1.d, z1.s +; CHECK-NEXT: uunpklo z0.d, z0.s +; CHECK-NEXT: uunpklo z4.d, z4.s +; CHECK-NEXT: uunpklo z5.d, z5.s +; CHECK-NEXT: fcvtzs z3.d, p0/m, z3.h +; CHECK-NEXT: uunpklo z6.d, z6.s +; CHECK-NEXT: fcvtzs z2.d, p0/m, z2.h +; CHECK-NEXT: uunpklo z7.d, z7.s +; CHECK-NEXT: fcvtzs z1.d, p0/m, z1.h +; CHECK-NEXT: fcvtzs z0.d, p0/m, z0.h +; CHECK-NEXT: fcvtzs z4.d, p0/m, z4.h +; CHECK-NEXT: st1d { z3.d }, p0, [x8, x9, lsl #3] +; CHECK-NEXT: mov x9, #16 // =0x10 +; CHECK-NEXT: movprfx z3, z5 +; CHECK-NEXT: fcvtzs z3.d, p0/m, z5.h +; CHECK-NEXT: st1d { z2.d }, p0, [x8, x9, lsl #3] +; CHECK-NEXT: mov x9, #8 // =0x8 +; CHECK-NEXT: movprfx z2, z6 +; CHECK-NEXT: fcvtzs z2.d, p0/m, z6.h +; CHECK-NEXT: st1d { z1.d }, p0, [x8, x9, lsl #3] +; CHECK-NEXT: mov x9, #28 // =0x1c +; CHECK-NEXT: movprfx z1, z7 +; CHECK-NEXT: fcvtzs z1.d, p0/m, z7.h +; CHECK-NEXT: st1d { z4.d }, p0, [x8, x9, lsl #3] +; CHECK-NEXT: mov x9, #20 // =0x14 +; CHECK-NEXT: st1d { z3.d }, p0, [x8, x9, lsl #3] +; CHECK-NEXT: mov x9, #12 // =0xc +; CHECK-NEXT: st1d { z2.d }, p0, [x8, x9, lsl #3] +; CHECK-NEXT: mov x9, #4 // =0x4 +; CHECK-NEXT: st1d { z1.d }, p0, [x8, x9, lsl #3] +; CHECK-NEXT: st1d { z0.d }, p0, [x8] ; CHECK-NEXT: ret %a = call <32 x i64> @llvm.llrint.v32i64.v32f16(<32 x half> %x) ret <32 x i64> %a @@ -310,10 +187,10 @@ declare <32 x i64> @llvm.llrint.v32i64.v32f16(<32 x half>) define <1 x i64> @llrint_v1i64_v1f32(<1 x float> %x) { ; CHECK-LABEL: llrint_v1i64_v1f32: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-NEXT: frintx s0, s0 -; CHECK-NEXT: fcvtzs x8, s0 -; CHECK-NEXT: fmov d0, x8 +; CHECK-NEXT: frintx v0.2s, v0.2s +; CHECK-NEXT: fcvtl v0.2d, v0.2s +; CHECK-NEXT: fcvtzs v0.2d, v0.2d +; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0 ; CHECK-NEXT: ret %a = call <1 x i64> @llvm.llrint.v1i64.v1f32(<1 x float> %x) ret <1 x i64> %a @@ -323,14 +200,9 @@ declare <1 x i64> @llvm.llrint.v1i64.v1f32(<1 x float>) define <2 x i64> @llrint_v2i64_v2f32(<2 x float> %x) { ; CHECK-LABEL: llrint_v2i64_v2f32: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-NEXT: mov s1, v0.s[1] -; CHECK-NEXT: frintx s0, s0 -; CHECK-NEXT: frintx s1, s1 -; CHECK-NEXT: fcvtzs x8, s0 -; CHECK-NEXT: fcvtzs x9, s1 -; CHECK-NEXT: fmov d0, x8 -; CHECK-NEXT: mov v0.d[1], x9 +; CHECK-NEXT: frintx v0.2s, v0.2s +; CHECK-NEXT: fcvtl v0.2d, v0.2s +; CHECK-NEXT: fcvtzs v0.2d, v0.2d ; CHECK-NEXT: ret %a = call <2 x i64> @llvm.llrint.v2i64.v2f32(<2 x float> %x) ret <2 x i64> %a @@ -340,21 +212,14 @@ declare <2 x i64> @llvm.llrint.v2i64.v2f32(<2 x float>) define <4 x i64> @llrint_v4i64_v4f32(<4 x float> %x) { ; CHECK-LABEL: llrint_v4i64_v4f32: ; CHECK: // %bb.0: -; CHECK-NEXT: ext v1.16b, v0.16b, v0.16b, #8 -; CHECK-NEXT: mov s3, v0.s[1] -; CHECK-NEXT: frintx s0, s0 -; CHECK-NEXT: mov s2, v1.s[1] -; CHECK-NEXT: frintx s1, s1 -; CHECK-NEXT: frintx s3, s3 -; CHECK-NEXT: fcvtzs x9, s0 -; CHECK-NEXT: frintx s2, s2 -; CHECK-NEXT: fcvtzs x8, s1 -; CHECK-NEXT: fcvtzs x11, s3 -; CHECK-NEXT: fmov d0, x9 -; CHECK-NEXT: fcvtzs x10, s2 -; CHECK-NEXT: fmov d1, x8 -; CHECK-NEXT: mov v0.d[1], x11 -; CHECK-NEXT: mov v1.d[1], x10 +; CHECK-NEXT: frintx v0.4s, v0.4s +; CHECK-NEXT: ptrue p0.d, vl4 +; CHECK-NEXT: uunpklo z0.d, z0.s +; CHECK-NEXT: fcvtzs z0.d, p0/m, z0.s +; CHECK-NEXT: mov z1.d, z0.d +; CHECK-NEXT: ext z1.b, z1.b, z0.b, #16 +; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 +; CHECK-NEXT: // kill: def $q1 killed $q1 killed $z1 ; CHECK-NEXT: ret %a = call <4 x i64> @llvm.llrint.v4i64.v4f32(<4 x float> %x) ret <4 x i64> %a @@ -364,36 +229,22 @@ declare <4 x i64> @llvm.llrint.v4i64.v4f32(<4 x float>) define <8 x i64> @llrint_v8i64_v8f32(<8 x float> %x) { ; CHECK-LABEL: llrint_v8i64_v8f32: ; CHECK: // %bb.0: -; CHECK-NEXT: ext v2.16b, v0.16b, v0.16b, #8 -; CHECK-NEXT: ext v3.16b, v1.16b, v1.16b, #8 -; CHECK-NEXT: mov s4, v0.s[1] -; CHECK-NEXT: mov s7, v1.s[1] -; CHECK-NEXT: frintx s0, s0 -; CHECK-NEXT: frintx s1, s1 -; CHECK-NEXT: mov s5, v2.s[1] -; CHECK-NEXT: mov s6, v3.s[1] -; CHECK-NEXT: frintx s2, s2 -; CHECK-NEXT: frintx s3, s3 -; CHECK-NEXT: frintx s4, s4 -; CHECK-NEXT: frintx s7, s7 -; CHECK-NEXT: fcvtzs x9, s0 -; CHECK-NEXT: fcvtzs x12, s1 -; CHECK-NEXT: frintx s5, s5 -; CHECK-NEXT: frintx s6, s6 -; CHECK-NEXT: fcvtzs x8, s2 -; CHECK-NEXT: fcvtzs x10, s3 -; CHECK-NEXT: fcvtzs x11, s4 -; CHECK-NEXT: fcvtzs x15, s7 -; CHECK-NEXT: fmov d0, x9 -; CHECK-NEXT: fmov d2, x12 -; CHECK-NEXT: fcvtzs x13, s5 -; CHECK-NEXT: fcvtzs x14, s6 -; CHECK-NEXT: fmov d1, x8 -; CHECK-NEXT: fmov d3, x10 -; CHECK-NEXT: mov v0.d[1], x11 -; CHECK-NEXT: mov v2.d[1], x15 -; CHECK-NEXT: mov v1.d[1], x13 -; CHECK-NEXT: mov v3.d[1], x14 +; CHECK-NEXT: frintx v0.4s, v0.4s +; CHECK-NEXT: frintx v1.4s, v1.4s +; CHECK-NEXT: ptrue p0.d, vl4 +; CHECK-NEXT: uunpklo z0.d, z0.s +; CHECK-NEXT: uunpklo z1.d, z1.s +; CHECK-NEXT: fcvtzs z0.d, p0/m, z0.s +; CHECK-NEXT: movprfx z2, z1 +; CHECK-NEXT: fcvtzs z2.d, p0/m, z1.s +; CHECK-NEXT: mov z1.d, z0.d +; CHECK-NEXT: mov z3.d, z2.d +; CHECK-NEXT: ext z1.b, z1.b, z0.b, #16 +; CHECK-NEXT: ext z3.b, z3.b, z2.b, #16 +; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 +; CHECK-NEXT: // kill: def $q2 killed $q2 killed $z2 +; CHECK-NEXT: // kill: def $q1 killed $q1 killed $z1 +; CHECK-NEXT: // kill: def $q3 killed $q3 killed $z3 ; CHECK-NEXT: ret %a = call <8 x i64> @llvm.llrint.v8i64.v8f32(<8 x float> %x) ret <8 x i64> %a @@ -403,66 +254,37 @@ declare <8 x i64> @llvm.llrint.v8i64.v8f32(<8 x float>) define <16 x i64> @llrint_v16i64_v16f32(<16 x float> %x) { ; CHECK-LABEL: llrint_v16i64_v16f32: ; CHECK: // %bb.0: -; CHECK-NEXT: ext v4.16b, v0.16b, v0.16b, #8 -; CHECK-NEXT: ext v5.16b, v1.16b, v1.16b, #8 -; CHECK-NEXT: ext v6.16b, v2.16b, v2.16b, #8 -; CHECK-NEXT: frintx s7, s0 -; CHECK-NEXT: ext v16.16b, v3.16b, v3.16b, #8 -; CHECK-NEXT: mov s0, v0.s[1] -; CHECK-NEXT: frintx s17, s4 -; CHECK-NEXT: mov s4, v4.s[1] -; CHECK-NEXT: mov s18, v5.s[1] -; CHECK-NEXT: frintx s5, s5 -; CHECK-NEXT: frintx s19, s6 -; CHECK-NEXT: fcvtzs x8, s7 -; CHECK-NEXT: frintx s7, s16 -; CHECK-NEXT: mov s6, v6.s[1] -; CHECK-NEXT: mov s16, v16.s[1] -; CHECK-NEXT: frintx s0, s0 -; CHECK-NEXT: frintx s4, s4 -; CHECK-NEXT: fcvtzs x9, s17 -; CHECK-NEXT: frintx s17, s1 -; CHECK-NEXT: mov s1, v1.s[1] -; CHECK-NEXT: frintx s18, s18 -; CHECK-NEXT: fcvtzs x10, s5 -; CHECK-NEXT: mov s5, v2.s[1] -; CHECK-NEXT: fcvtzs x11, s19 -; CHECK-NEXT: mov s19, v3.s[1] -; CHECK-NEXT: frintx s2, s2 -; CHECK-NEXT: fcvtzs x12, s7 -; CHECK-NEXT: frintx s6, s6 -; CHECK-NEXT: fcvtzs x13, s4 -; CHECK-NEXT: frintx s4, s3 -; CHECK-NEXT: frintx s16, s16 -; CHECK-NEXT: fcvtzs x14, s18 -; CHECK-NEXT: frintx s18, s1 -; CHECK-NEXT: fcvtzs x15, s17 -; CHECK-NEXT: frintx s20, s5 -; CHECK-NEXT: frintx s17, s19 -; CHECK-NEXT: fmov d1, x9 -; CHECK-NEXT: fcvtzs x9, s2 -; CHECK-NEXT: fmov d5, x11 -; CHECK-NEXT: fmov d3, x10 -; CHECK-NEXT: fcvtzs x11, s4 -; CHECK-NEXT: fcvtzs x10, s0 -; CHECK-NEXT: fmov d7, x12 -; CHECK-NEXT: fcvtzs x12, s18 -; CHECK-NEXT: fcvtzs x17, s6 -; CHECK-NEXT: fcvtzs x18, s16 -; CHECK-NEXT: fcvtzs x16, s20 -; CHECK-NEXT: fcvtzs x0, s17 -; CHECK-NEXT: fmov d0, x8 -; CHECK-NEXT: fmov d2, x15 -; CHECK-NEXT: fmov d4, x9 -; CHECK-NEXT: mov v1.d[1], x13 -; CHECK-NEXT: fmov d6, x11 -; CHECK-NEXT: mov v3.d[1], x14 -; CHECK-NEXT: mov v0.d[1], x10 -; CHECK-NEXT: mov v5.d[1], x17 -; CHECK-NEXT: mov v7.d[1], x18 -; CHECK-NEXT: mov v2.d[1], x12 -; CHECK-NEXT: mov v4.d[1], x16 -; CHECK-NEXT: mov v6.d[1], x0 +; CHECK-NEXT: frintx v1.4s, v1.4s +; CHECK-NEXT: frintx v0.4s, v0.4s +; CHECK-NEXT: frintx v2.4s, v2.4s +; CHECK-NEXT: frintx v3.4s, v3.4s +; CHECK-NEXT: ptrue p0.d, vl4 +; CHECK-NEXT: uunpklo z0.d, z0.s +; CHECK-NEXT: uunpklo z1.d, z1.s +; CHECK-NEXT: uunpklo z4.d, z2.s +; CHECK-NEXT: uunpklo z3.d, z3.s +; CHECK-NEXT: fcvtzs z0.d, p0/m, z0.s +; CHECK-NEXT: movprfx z2, z1 +; CHECK-NEXT: fcvtzs z2.d, p0/m, z1.s +; CHECK-NEXT: fcvtzs z4.d, p0/m, z4.s +; CHECK-NEXT: movprfx z6, z3 +; CHECK-NEXT: fcvtzs z6.d, p0/m, z3.s +; CHECK-NEXT: mov z1.d, z0.d +; CHECK-NEXT: mov z3.d, z2.d +; CHECK-NEXT: mov z5.d, z4.d +; CHECK-NEXT: mov z7.d, z6.d +; CHECK-NEXT: ext z1.b, z1.b, z0.b, #16 +; CHECK-NEXT: ext z3.b, z3.b, z2.b, #16 +; CHECK-NEXT: ext z5.b, z5.b, z4.b, #16 +; CHECK-NEXT: ext z7.b, z7.b, z6.b, #16 +; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 +; CHECK-NEXT: // kill: def $q2 killed $q2 killed $z2 +; CHECK-NEXT: // kill: def $q4 killed $q4 killed $z4 +; CHECK-NEXT: // kill: def $q6 killed $q6 killed $z6 +; CHECK-NEXT: // kill: def $q1 killed $q1 killed $z1 +; CHECK-NEXT: // kill: def $q3 killed $q3 killed $z3 +; CHECK-NEXT: // kill: def $q5 killed $q5 killed $z5 +; CHECK-NEXT: // kill: def $q7 killed $q7 killed $z7 ; CHECK-NEXT: ret %a = call <16 x i64> @llvm.llrint.v16i64.v16f32(<16 x float> %x) ret <16 x i64> %a @@ -472,134 +294,46 @@ declare <16 x i64> @llvm.llrint.v16i64.v16f32(<16 x float>) define <32 x i64> @llrint_v32i64_v32f32(<32 x float> %x) { ; CHECK-LABEL: llrint_v32i64_v32f32: ; CHECK: // %bb.0: -; CHECK-NEXT: ext v16.16b, v2.16b, v2.16b, #8 -; CHECK-NEXT: ext v20.16b, v5.16b, v5.16b, #8 -; CHECK-NEXT: ext v17.16b, v3.16b, v3.16b, #8 -; CHECK-NEXT: ext v18.16b, v0.16b, v0.16b, #8 -; CHECK-NEXT: ext v19.16b, v4.16b, v4.16b, #8 -; CHECK-NEXT: ext v21.16b, v6.16b, v6.16b, #8 -; CHECK-NEXT: ext v22.16b, v7.16b, v7.16b, #8 -; CHECK-NEXT: frintx s24, s16 -; CHECK-NEXT: mov s28, v20.s[1] -; CHECK-NEXT: frintx s25, s17 -; CHECK-NEXT: frintx s26, s18 -; CHECK-NEXT: frintx s27, s19 -; CHECK-NEXT: frintx s29, s20 -; CHECK-NEXT: mov s30, v21.s[1] -; CHECK-NEXT: frintx s20, s21 -; CHECK-NEXT: frintx s21, s22 -; CHECK-NEXT: mov s23, v22.s[1] -; CHECK-NEXT: mov s19, v19.s[1] -; CHECK-NEXT: mov s17, v17.s[1] -; CHECK-NEXT: fcvtzs x12, s24 -; CHECK-NEXT: frintx s24, s28 -; CHECK-NEXT: fcvtzs x13, s25 -; CHECK-NEXT: mov s25, v7.s[1] -; CHECK-NEXT: fcvtzs x9, s26 -; CHECK-NEXT: fcvtzs x11, s27 -; CHECK-NEXT: fcvtzs x14, s20 -; CHECK-NEXT: fcvtzs x15, s21 -; CHECK-NEXT: frintx s26, s1 -; CHECK-NEXT: frintx s23, s23 -; CHECK-NEXT: frintx s27, s7 -; CHECK-NEXT: frintx s22, s30 -; CHECK-NEXT: fmov d20, x12 -; CHECK-NEXT: fcvtzs x12, s24 -; CHECK-NEXT: mov s24, v6.s[1] -; CHECK-NEXT: frintx s25, s25 -; CHECK-NEXT: frintx s6, s6 -; CHECK-NEXT: fcvtzs x10, s29 -; CHECK-NEXT: fmov d7, x11 -; CHECK-NEXT: fmov d21, x13 -; CHECK-NEXT: frintx s28, s5 -; CHECK-NEXT: fcvtzs x11, s23 -; CHECK-NEXT: fmov d23, x14 -; CHECK-NEXT: fcvtzs x14, s26 -; CHECK-NEXT: fmov d26, x15 -; CHECK-NEXT: fcvtzs x15, s27 -; CHECK-NEXT: frintx s24, s24 -; CHECK-NEXT: mov s27, v5.s[1] -; CHECK-NEXT: fcvtzs x13, s22 -; CHECK-NEXT: fcvtzs x17, s25 -; CHECK-NEXT: frintx s25, s4 -; CHECK-NEXT: fcvtzs x18, s6 -; CHECK-NEXT: fmov d6, x10 -; CHECK-NEXT: frintx s22, s2 -; CHECK-NEXT: mov v26.d[1], x11 -; CHECK-NEXT: fmov d5, x14 -; CHECK-NEXT: fcvtzs x10, s24 -; CHECK-NEXT: fmov d24, x15 -; CHECK-NEXT: fcvtzs x14, s28 -; CHECK-NEXT: frintx s27, s27 -; CHECK-NEXT: mov v23.d[1], x13 -; CHECK-NEXT: mov s4, v4.s[1] -; CHECK-NEXT: fcvtzs x13, s25 -; CHECK-NEXT: fmov d25, x18 -; CHECK-NEXT: mov s16, v16.s[1] -; CHECK-NEXT: mov v24.d[1], x17 -; CHECK-NEXT: fcvtzs x16, s22 -; CHECK-NEXT: frintx s22, s3 -; CHECK-NEXT: mov s3, v3.s[1] -; CHECK-NEXT: frintx s19, s19 -; CHECK-NEXT: mov s2, v2.s[1] -; CHECK-NEXT: mov v25.d[1], x10 -; CHECK-NEXT: fcvtzs x10, s27 -; CHECK-NEXT: frintx s4, s4 -; CHECK-NEXT: mov v6.d[1], x12 -; CHECK-NEXT: frintx s17, s17 -; CHECK-NEXT: mov s18, v18.s[1] -; CHECK-NEXT: stp q24, q26, [x8, #224] -; CHECK-NEXT: fmov d24, x14 -; CHECK-NEXT: fcvtzs x11, s22 -; CHECK-NEXT: ext v22.16b, v1.16b, v1.16b, #8 -; CHECK-NEXT: mov s1, v1.s[1] -; CHECK-NEXT: frintx s3, s3 -; CHECK-NEXT: stp q25, q23, [x8, #192] -; CHECK-NEXT: frintx s2, s2 -; CHECK-NEXT: fcvtzs x12, s4 -; CHECK-NEXT: mov v24.d[1], x10 -; CHECK-NEXT: fcvtzs x10, s19 -; CHECK-NEXT: mov s19, v0.s[1] -; CHECK-NEXT: frintx s16, s16 -; CHECK-NEXT: frintx s0, s0 -; CHECK-NEXT: fmov d4, x11 -; CHECK-NEXT: mov s27, v22.s[1] -; CHECK-NEXT: frintx s22, s22 -; CHECK-NEXT: frintx s1, s1 -; CHECK-NEXT: fcvtzs x11, s3 -; CHECK-NEXT: fcvtzs x14, s2 -; CHECK-NEXT: frintx s2, s18 -; CHECK-NEXT: stp q24, q6, [x8, #160] -; CHECK-NEXT: fmov d6, x13 -; CHECK-NEXT: fcvtzs x13, s17 -; CHECK-NEXT: frintx s17, s19 -; CHECK-NEXT: fmov d23, x16 -; CHECK-NEXT: mov v7.d[1], x10 -; CHECK-NEXT: frintx s3, s27 -; CHECK-NEXT: fcvtzs x10, s22 -; CHECK-NEXT: fcvtzs x15, s1 -; CHECK-NEXT: mov v6.d[1], x12 -; CHECK-NEXT: fcvtzs x12, s16 -; CHECK-NEXT: mov v4.d[1], x11 -; CHECK-NEXT: mov v21.d[1], x13 -; CHECK-NEXT: fcvtzs x13, s0 -; CHECK-NEXT: mov v23.d[1], x14 -; CHECK-NEXT: fcvtzs x14, s17 -; CHECK-NEXT: fcvtzs x11, s3 -; CHECK-NEXT: fmov d0, x10 -; CHECK-NEXT: mov v5.d[1], x15 -; CHECK-NEXT: stp q6, q7, [x8, #128] -; CHECK-NEXT: mov v20.d[1], x12 -; CHECK-NEXT: fcvtzs x12, s2 -; CHECK-NEXT: stp q4, q21, [x8, #96] -; CHECK-NEXT: fmov d1, x13 -; CHECK-NEXT: fmov d2, x9 -; CHECK-NEXT: mov v0.d[1], x11 -; CHECK-NEXT: stp q23, q20, [x8, #64] -; CHECK-NEXT: mov v1.d[1], x14 -; CHECK-NEXT: mov v2.d[1], x12 -; CHECK-NEXT: stp q5, q0, [x8, #32] -; CHECK-NEXT: stp q1, q2, [x8] +; CHECK-NEXT: frintx v7.4s, v7.4s +; CHECK-NEXT: frintx v6.4s, v6.4s +; CHECK-NEXT: mov x9, #28 // =0x1c +; CHECK-NEXT: frintx v5.4s, v5.4s +; CHECK-NEXT: frintx v4.4s, v4.4s +; CHECK-NEXT: frintx v3.4s, v3.4s +; CHECK-NEXT: frintx v2.4s, v2.4s +; CHECK-NEXT: ptrue p0.d, vl4 +; CHECK-NEXT: frintx v1.4s, v1.4s +; CHECK-NEXT: frintx v0.4s, v0.4s +; CHECK-NEXT: uunpklo z7.d, z7.s +; CHECK-NEXT: uunpklo z6.d, z6.s +; CHECK-NEXT: uunpklo z5.d, z5.s +; CHECK-NEXT: uunpklo z4.d, z4.s +; CHECK-NEXT: uunpklo z3.d, z3.s +; CHECK-NEXT: uunpklo z2.d, z2.s +; CHECK-NEXT: uunpklo z1.d, z1.s +; CHECK-NEXT: uunpklo z0.d, z0.s +; CHECK-NEXT: fcvtzs z7.d, p0/m, z7.s +; CHECK-NEXT: fcvtzs z6.d, p0/m, z6.s +; CHECK-NEXT: fcvtzs z5.d, p0/m, z5.s +; CHECK-NEXT: fcvtzs z4.d, p0/m, z4.s +; CHECK-NEXT: fcvtzs z3.d, p0/m, z3.s +; CHECK-NEXT: fcvtzs z2.d, p0/m, z2.s +; CHECK-NEXT: fcvtzs z1.d, p0/m, z1.s +; CHECK-NEXT: fcvtzs z0.d, p0/m, z0.s +; CHECK-NEXT: st1d { z7.d }, p0, [x8, x9, lsl #3] +; CHECK-NEXT: mov x9, #24 // =0x18 +; CHECK-NEXT: st1d { z6.d }, p0, [x8, x9, lsl #3] +; CHECK-NEXT: mov x9, #20 // =0x14 +; CHECK-NEXT: st1d { z5.d }, p0, [x8, x9, lsl #3] +; CHECK-NEXT: mov x9, #16 // =0x10 +; CHECK-NEXT: st1d { z4.d }, p0, [x8, x9, lsl #3] +; CHECK-NEXT: mov x9, #12 // =0xc +; CHECK-NEXT: st1d { z3.d }, p0, [x8, x9, lsl #3] +; CHECK-NEXT: mov x9, #8 // =0x8 +; CHECK-NEXT: st1d { z2.d }, p0, [x8, x9, lsl #3] +; CHECK-NEXT: mov x9, #4 // =0x4 +; CHECK-NEXT: st1d { z1.d }, p0, [x8, x9, lsl #3] +; CHECK-NEXT: st1d { z0.d }, p0, [x8] ; CHECK-NEXT: ret %a = call <32 x i64> @llvm.llrint.v32i64.v32f32(<32 x float> %x) ret <32 x i64> %a @@ -621,13 +355,8 @@ declare <1 x i64> @llvm.llrint.v1i64.v1f64(<1 x double>) define <2 x i64> @llrint_v2i64_v2f64(<2 x double> %x) { ; CHECK-LABEL: llrint_v2i64_v2f64: ; CHECK: // %bb.0: -; CHECK-NEXT: mov d1, v0.d[1] -; CHECK-NEXT: frintx d0, d0 -; CHECK-NEXT: frintx d1, d1 -; CHECK-NEXT: fcvtzs x8, d0 -; CHECK-NEXT: fcvtzs x9, d1 -; CHECK-NEXT: fmov d0, x8 -; CHECK-NEXT: mov v0.d[1], x9 +; CHECK-NEXT: frintx v0.2d, v0.2d +; CHECK-NEXT: fcvtzs v0.2d, v0.2d ; CHECK-NEXT: ret %a = call <2 x i64> @llvm.llrint.v2i64.v2f64(<2 x double> %x) ret <2 x i64> %a @@ -637,20 +366,17 @@ declare <2 x i64> @llvm.llrint.v2i64.v2f64(<2 x double>) define <4 x i64> @llrint_v4i64_v4f64(<4 x double> %x) { ; CHECK-LABEL: llrint_v4i64_v4f64: ; CHECK: // %bb.0: -; CHECK-NEXT: mov d2, v0.d[1] -; CHECK-NEXT: mov d3, v1.d[1] -; CHECK-NEXT: frintx d0, d0 -; CHECK-NEXT: frintx d1, d1 -; CHECK-NEXT: frintx d2, d2 -; CHECK-NEXT: frintx d3, d3 -; CHECK-NEXT: fcvtzs x8, d0 -; CHECK-NEXT: fcvtzs x9, d1 -; CHECK-NEXT: fcvtzs x10, d2 -; CHECK-NEXT: fcvtzs x11, d3 -; CHECK-NEXT: fmov d0, x8 -; CHECK-NEXT: fmov d1, x9 -; CHECK-NEXT: mov v0.d[1], x10 -; CHECK-NEXT: mov v1.d[1], x11 +; CHECK-NEXT: ptrue p0.d, vl2 +; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 +; CHECK-NEXT: // kill: def $q1 killed $q1 def $z1 +; CHECK-NEXT: splice z0.d, p0, z0.d, z1.d +; CHECK-NEXT: ptrue p0.d, vl4 +; CHECK-NEXT: frintx z0.d, p0/m, z0.d +; CHECK-NEXT: fcvtzs z0.d, p0/m, z0.d +; CHECK-NEXT: mov z1.d, z0.d +; CHECK-NEXT: ext z1.b, z1.b, z0.b, #16 +; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 +; CHECK-NEXT: // kill: def $q1 killed $q1 killed $z1 ; CHECK-NEXT: ret %a = call <4 x i64> @llvm.llrint.v4i64.v4f64(<4 x double> %x) ret <4 x i64> %a @@ -660,34 +386,28 @@ declare <4 x i64> @llvm.llrint.v4i64.v4f64(<4 x double>) define <8 x i64> @llrint_v8i64_v8f64(<8 x double> %x) { ; CHECK-LABEL: llrint_v8i64_v8f64: ; CHECK: // %bb.0: -; CHECK-NEXT: mov d4, v0.d[1] -; CHECK-NEXT: mov d5, v1.d[1] -; CHECK-NEXT: mov d6, v2.d[1] -; CHECK-NEXT: mov d7, v3.d[1] -; CHECK-NEXT: frintx d0, d0 -; CHECK-NEXT: frintx d1, d1 -; CHECK-NEXT: frintx d2, d2 -; CHECK-NEXT: frintx d3, d3 -; CHECK-NEXT: frintx d4, d4 -; CHECK-NEXT: frintx d5, d5 -; CHECK-NEXT: frintx d6, d6 -; CHECK-NEXT: frintx d7, d7 -; CHECK-NEXT: fcvtzs x8, d0 -; CHECK-NEXT: fcvtzs x9, d1 -; CHECK-NEXT: fcvtzs x10, d2 -; CHECK-NEXT: fcvtzs x11, d3 -; CHECK-NEXT: fcvtzs x12, d4 -; CHECK-NEXT: fcvtzs x13, d5 -; CHECK-NEXT: fcvtzs x14, d6 -; CHECK-NEXT: fcvtzs x15, d7 -; CHECK-NEXT: fmov d0, x8 -; CHECK-NEXT: fmov d1, x9 -; CHECK-NEXT: fmov d2, x10 -; CHECK-NEXT: fmov d3, x11 -; CHECK-NEXT: mov v0.d[1], x12 -; CHECK-NEXT: mov v1.d[1], x13 -; CHECK-NEXT: mov v2.d[1], x14 -; CHECK-NEXT: mov v3.d[1], x15 +; CHECK-NEXT: ptrue p0.d, vl2 +; CHECK-NEXT: // kill: def $q2 killed $q2 def $z2 +; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 +; CHECK-NEXT: // kill: def $q3 killed $q3 def $z3 +; CHECK-NEXT: // kill: def $q1 killed $q1 def $z1 +; CHECK-NEXT: splice z2.d, p0, z2.d, z3.d +; CHECK-NEXT: splice z0.d, p0, z0.d, z1.d +; CHECK-NEXT: ptrue p0.d, vl4 +; CHECK-NEXT: frintx z0.d, p0/m, z0.d +; CHECK-NEXT: movprfx z1, z2 +; CHECK-NEXT: frintx z1.d, p0/m, z2.d +; CHECK-NEXT: fcvtzs z0.d, p0/m, z0.d +; CHECK-NEXT: movprfx z2, z1 +; CHECK-NEXT: fcvtzs z2.d, p0/m, z1.d +; CHECK-NEXT: mov z1.d, z0.d +; CHECK-NEXT: mov z3.d, z2.d +; CHECK-NEXT: ext z1.b, z1.b, z0.b, #16 +; CHECK-NEXT: ext z3.b, z3.b, z2.b, #16 +; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 +; CHECK-NEXT: // kill: def $q2 killed $q2 killed $z2 +; CHECK-NEXT: // kill: def $q1 killed $q1 killed $z1 +; CHECK-NEXT: // kill: def $q3 killed $q3 killed $z3 ; CHECK-NEXT: ret %a = call <8 x i64> @llvm.llrint.v8i64.v8f64(<8 x double> %x) ret <8 x i64> %a @@ -697,62 +417,50 @@ declare <8 x i64> @llvm.llrint.v8i64.v8f64(<8 x double>) define <16 x i64> @llrint_v16f64(<16 x double> %x) { ; CHECK-LABEL: llrint_v16f64: ; CHECK: // %bb.0: -; CHECK-NEXT: mov d16, v0.d[1] -; CHECK-NEXT: mov d17, v1.d[1] -; CHECK-NEXT: frintx d0, d0 -; CHECK-NEXT: frintx d1, d1 -; CHECK-NEXT: frintx d18, d2 -; CHECK-NEXT: mov d2, v2.d[1] -; CHECK-NEXT: frintx d19, d3 -; CHECK-NEXT: mov d3, v3.d[1] -; CHECK-NEXT: frintx d16, d16 -; CHECK-NEXT: frintx d17, d17 -; CHECK-NEXT: fcvtzs x8, d0 -; CHECK-NEXT: frintx d0, d4 -; CHECK-NEXT: mov d4, v4.d[1] -; CHECK-NEXT: fcvtzs x9, d1 -; CHECK-NEXT: frintx d1, d5 -; CHECK-NEXT: mov d5, v5.d[1] -; CHECK-NEXT: fcvtzs x12, d18 -; CHECK-NEXT: frintx d2, d2 -; CHECK-NEXT: fcvtzs x13, d19 -; CHECK-NEXT: frintx d18, d3 -; CHECK-NEXT: fcvtzs x10, d16 -; CHECK-NEXT: mov d16, v6.d[1] -; CHECK-NEXT: fcvtzs x11, d17 -; CHECK-NEXT: mov d17, v7.d[1] -; CHECK-NEXT: frintx d6, d6 -; CHECK-NEXT: frintx d7, d7 -; CHECK-NEXT: frintx d4, d4 -; CHECK-NEXT: frintx d5, d5 -; CHECK-NEXT: fcvtzs x14, d0 -; CHECK-NEXT: fcvtzs x15, d1 -; CHECK-NEXT: fmov d0, x8 -; CHECK-NEXT: fmov d1, x9 -; CHECK-NEXT: frintx d16, d16 -; CHECK-NEXT: fcvtzs x9, d2 -; CHECK-NEXT: fmov d2, x12 -; CHECK-NEXT: frintx d17, d17 -; CHECK-NEXT: fcvtzs x8, d6 -; CHECK-NEXT: fcvtzs x12, d7 -; CHECK-NEXT: fmov d3, x13 -; CHECK-NEXT: fcvtzs x13, d18 -; CHECK-NEXT: fcvtzs x16, d4 -; CHECK-NEXT: fcvtzs x17, d5 -; CHECK-NEXT: fmov d4, x14 -; CHECK-NEXT: fmov d5, x15 -; CHECK-NEXT: fcvtzs x18, d16 -; CHECK-NEXT: mov v0.d[1], x10 -; CHECK-NEXT: mov v1.d[1], x11 -; CHECK-NEXT: fcvtzs x0, d17 -; CHECK-NEXT: fmov d6, x8 -; CHECK-NEXT: fmov d7, x12 -; CHECK-NEXT: mov v2.d[1], x9 -; CHECK-NEXT: mov v3.d[1], x13 -; CHECK-NEXT: mov v4.d[1], x16 -; CHECK-NEXT: mov v5.d[1], x17 -; CHECK-NEXT: mov v6.d[1], x18 -; CHECK-NEXT: mov v7.d[1], x0 +; CHECK-NEXT: ptrue p0.d, vl2 +; CHECK-NEXT: // kill: def $q6 killed $q6 def $z6 +; CHECK-NEXT: // kill: def $q4 killed $q4 def $z4 +; CHECK-NEXT: // kill: def $q2 killed $q2 def $z2 +; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 +; CHECK-NEXT: // kill: def $q7 killed $q7 def $z7 +; CHECK-NEXT: // kill: def $q5 killed $q5 def $z5 +; CHECK-NEXT: // kill: def $q3 killed $q3 def $z3 +; CHECK-NEXT: // kill: def $q1 killed $q1 def $z1 +; CHECK-NEXT: splice z2.d, p0, z2.d, z3.d +; CHECK-NEXT: splice z0.d, p0, z0.d, z1.d +; CHECK-NEXT: splice z6.d, p0, z6.d, z7.d +; CHECK-NEXT: splice z4.d, p0, z4.d, z5.d +; CHECK-NEXT: ptrue p0.d, vl4 +; CHECK-NEXT: frintx z0.d, p0/m, z0.d +; CHECK-NEXT: movprfx z1, z2 +; CHECK-NEXT: frintx z1.d, p0/m, z2.d +; CHECK-NEXT: movprfx z5, z6 +; CHECK-NEXT: frintx z5.d, p0/m, z6.d +; CHECK-NEXT: movprfx z3, z4 +; CHECK-NEXT: frintx z3.d, p0/m, z4.d +; CHECK-NEXT: fcvtzs z0.d, p0/m, z0.d +; CHECK-NEXT: movprfx z2, z1 +; CHECK-NEXT: fcvtzs z2.d, p0/m, z1.d +; CHECK-NEXT: movprfx z6, z5 +; CHECK-NEXT: fcvtzs z6.d, p0/m, z5.d +; CHECK-NEXT: movprfx z4, z3 +; CHECK-NEXT: fcvtzs z4.d, p0/m, z3.d +; CHECK-NEXT: mov z1.d, z0.d +; CHECK-NEXT: mov z3.d, z2.d +; CHECK-NEXT: mov z7.d, z6.d +; CHECK-NEXT: mov z5.d, z4.d +; CHECK-NEXT: ext z1.b, z1.b, z0.b, #16 +; CHECK-NEXT: ext z3.b, z3.b, z2.b, #16 +; CHECK-NEXT: ext z7.b, z7.b, z6.b, #16 +; CHECK-NEXT: ext z5.b, z5.b, z4.b, #16 +; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 +; CHECK-NEXT: // kill: def $q2 killed $q2 killed $z2 +; CHECK-NEXT: // kill: def $q4 killed $q4 killed $z4 +; CHECK-NEXT: // kill: def $q6 killed $q6 killed $z6 +; CHECK-NEXT: // kill: def $q1 killed $q1 killed $z1 +; CHECK-NEXT: // kill: def $q3 killed $q3 killed $z3 +; CHECK-NEXT: // kill: def $q7 killed $q7 killed $z7 +; CHECK-NEXT: // kill: def $q5 killed $q5 killed $z5 ; CHECK-NEXT: ret %a = call <16 x i64> @llvm.llrint.v16i64.v16f64(<16 x double> %x) ret <16 x i64> %a @@ -762,130 +470,63 @@ declare <16 x i64> @llvm.llrint.v16i64.v16f64(<16 x double>) define <32 x i64> @llrint_v32f64(<32 x double> %x) { ; CHECK-LABEL: llrint_v32f64: ; CHECK: // %bb.0: -; CHECK-NEXT: frintx d20, d0 -; CHECK-NEXT: frintx d22, d3 -; CHECK-NEXT: frintx d21, d4 +; CHECK-NEXT: ldp q17, q16, [sp, #96] +; CHECK-NEXT: ptrue p0.d, vl2 ; CHECK-NEXT: ldp q19, q18, [sp, #64] -; CHECK-NEXT: frintx d23, d5 -; CHECK-NEXT: ldp q27, q26, [sp, #96] -; CHECK-NEXT: mov d4, v4.d[1] -; CHECK-NEXT: ldp q16, q17, [sp, #32] -; CHECK-NEXT: mov d5, v5.d[1] -; CHECK-NEXT: fcvtzs x9, d20 -; CHECK-NEXT: frintx d20, d6 -; CHECK-NEXT: fcvtzs x11, d22 -; CHECK-NEXT: frintx d22, d19 -; CHECK-NEXT: fcvtzs x12, d21 -; CHECK-NEXT: fcvtzs x10, d23 -; CHECK-NEXT: mov d21, v26.d[1] -; CHECK-NEXT: frintx d23, d27 -; CHECK-NEXT: mov d27, v27.d[1] -; CHECK-NEXT: frintx d24, d16 -; CHECK-NEXT: mov d19, v19.d[1] -; CHECK-NEXT: frintx d25, d17 -; CHECK-NEXT: fcvtzs x13, d20 -; CHECK-NEXT: mov d20, v18.d[1] -; CHECK-NEXT: frintx d18, d18 -; CHECK-NEXT: fcvtzs x16, d22 -; CHECK-NEXT: frintx d22, d26 -; CHECK-NEXT: mov d16, v16.d[1] -; CHECK-NEXT: frintx d21, d21 -; CHECK-NEXT: fcvtzs x17, d23 -; CHECK-NEXT: frintx d23, d27 -; CHECK-NEXT: fcvtzs x14, d24 -; CHECK-NEXT: frintx d26, d19 -; CHECK-NEXT: fmov d19, x11 -; CHECK-NEXT: frintx d20, d20 -; CHECK-NEXT: mov d27, v17.d[1] -; CHECK-NEXT: fcvtzs x15, d25 -; CHECK-NEXT: ldp q25, q24, [sp] -; CHECK-NEXT: fcvtzs x11, d22 -; CHECK-NEXT: fmov d17, x12 -; CHECK-NEXT: fcvtzs x12, d21 -; CHECK-NEXT: fcvtzs x0, d23 -; CHECK-NEXT: fmov d23, x14 -; CHECK-NEXT: fcvtzs x14, d18 -; CHECK-NEXT: fmov d18, x17 -; CHECK-NEXT: fcvtzs x17, d20 -; CHECK-NEXT: frintx d21, d7 -; CHECK-NEXT: fcvtzs x18, d26 -; CHECK-NEXT: fmov d20, x11 -; CHECK-NEXT: frintx d22, d25 -; CHECK-NEXT: frintx d26, d27 -; CHECK-NEXT: frintx d16, d16 -; CHECK-NEXT: mov v18.d[1], x0 -; CHECK-NEXT: mov d25, v25.d[1] -; CHECK-NEXT: mov d7, v7.d[1] -; CHECK-NEXT: mov d6, v6.d[1] -; CHECK-NEXT: mov d0, v0.d[1] -; CHECK-NEXT: mov v20.d[1], x12 -; CHECK-NEXT: fcvtzs x11, d21 -; CHECK-NEXT: fmov d21, x15 -; CHECK-NEXT: fcvtzs x12, d22 -; CHECK-NEXT: fmov d22, x16 -; CHECK-NEXT: fcvtzs x15, d26 -; CHECK-NEXT: fmov d26, x14 -; CHECK-NEXT: fcvtzs x14, d16 -; CHECK-NEXT: frintx d25, d25 -; CHECK-NEXT: frintx d7, d7 -; CHECK-NEXT: mov d16, v1.d[1] -; CHECK-NEXT: mov d3, v3.d[1] -; CHECK-NEXT: stp q18, q20, [x8, #224] -; CHECK-NEXT: mov d18, v24.d[1] -; CHECK-NEXT: mov v22.d[1], x18 -; CHECK-NEXT: mov v26.d[1], x17 -; CHECK-NEXT: frintx d24, d24 -; CHECK-NEXT: mov v21.d[1], x15 -; CHECK-NEXT: mov v23.d[1], x14 -; CHECK-NEXT: frintx d20, d2 -; CHECK-NEXT: mov d2, v2.d[1] -; CHECK-NEXT: frintx d6, d6 -; CHECK-NEXT: frintx d5, d5 -; CHECK-NEXT: frintx d4, d4 -; CHECK-NEXT: frintx d18, d18 -; CHECK-NEXT: frintx d1, d1 -; CHECK-NEXT: frintx d3, d3 -; CHECK-NEXT: stp q22, q26, [x8, #192] -; CHECK-NEXT: fmov d22, x10 -; CHECK-NEXT: fcvtzs x10, d24 -; CHECK-NEXT: stp q23, q21, [x8, #160] -; CHECK-NEXT: fmov d21, x11 -; CHECK-NEXT: fmov d24, x13 -; CHECK-NEXT: frintx d2, d2 -; CHECK-NEXT: fcvtzs x13, d6 -; CHECK-NEXT: frintx d6, d16 -; CHECK-NEXT: fcvtzs x11, d18 -; CHECK-NEXT: fmov d18, x12 -; CHECK-NEXT: fcvtzs x12, d25 -; CHECK-NEXT: fmov d23, x10 -; CHECK-NEXT: fcvtzs x10, d7 -; CHECK-NEXT: fcvtzs x14, d5 -; CHECK-NEXT: frintx d0, d0 -; CHECK-NEXT: fcvtzs x15, d3 -; CHECK-NEXT: mov v24.d[1], x13 -; CHECK-NEXT: fcvtzs x13, d2 -; CHECK-NEXT: fmov d2, x9 -; CHECK-NEXT: mov v23.d[1], x11 -; CHECK-NEXT: fcvtzs x11, d4 -; CHECK-NEXT: mov v18.d[1], x12 -; CHECK-NEXT: fcvtzs x12, d20 -; CHECK-NEXT: mov v21.d[1], x10 -; CHECK-NEXT: fcvtzs x10, d1 -; CHECK-NEXT: mov v22.d[1], x14 -; CHECK-NEXT: fcvtzs x14, d6 -; CHECK-NEXT: mov v19.d[1], x15 -; CHECK-NEXT: stp q18, q23, [x8, #128] -; CHECK-NEXT: mov v17.d[1], x11 -; CHECK-NEXT: fcvtzs x11, d0 -; CHECK-NEXT: stp q24, q21, [x8, #96] -; CHECK-NEXT: fmov d0, x12 -; CHECK-NEXT: fmov d1, x10 -; CHECK-NEXT: stp q17, q22, [x8, #64] -; CHECK-NEXT: mov v0.d[1], x13 -; CHECK-NEXT: mov v1.d[1], x14 -; CHECK-NEXT: mov v2.d[1], x11 -; CHECK-NEXT: stp q0, q19, [x8, #32] -; CHECK-NEXT: stp q2, q1, [x8] +; CHECK-NEXT: ptrue p1.d, vl4 +; CHECK-NEXT: // kill: def $q7 killed $q7 def $z7 +; CHECK-NEXT: // kill: def $q6 killed $q6 def $z6 +; CHECK-NEXT: // kill: def $q5 killed $q5 def $z5 +; CHECK-NEXT: // kill: def $q4 killed $q4 def $z4 +; CHECK-NEXT: // kill: def $q3 killed $q3 def $z3 +; CHECK-NEXT: // kill: def $q2 killed $q2 def $z2 +; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 +; CHECK-NEXT: // kill: def $q1 killed $q1 def $z1 +; CHECK-NEXT: mov x9, #28 // =0x1c +; CHECK-NEXT: splice z17.d, p0, z17.d, z16.d +; CHECK-NEXT: ldp q20, q16, [sp, #32] +; CHECK-NEXT: splice z19.d, p0, z19.d, z18.d +; CHECK-NEXT: ldp q21, q18, [sp] +; CHECK-NEXT: splice z6.d, p0, z6.d, z7.d +; CHECK-NEXT: splice z4.d, p0, z4.d, z5.d +; CHECK-NEXT: splice z2.d, p0, z2.d, z3.d +; CHECK-NEXT: splice z20.d, p0, z20.d, z16.d +; CHECK-NEXT: splice z0.d, p0, z0.d, z1.d +; CHECK-NEXT: splice z21.d, p0, z21.d, z18.d +; CHECK-NEXT: movprfx z7, z17 +; CHECK-NEXT: frintx z7.d, p1/m, z17.d +; CHECK-NEXT: movprfx z5, z19 +; CHECK-NEXT: frintx z5.d, p1/m, z19.d +; CHECK-NEXT: frintx z6.d, p1/m, z6.d +; CHECK-NEXT: frintx z4.d, p1/m, z4.d +; CHECK-NEXT: frintx z2.d, p1/m, z2.d +; CHECK-NEXT: movprfx z3, z20 +; CHECK-NEXT: frintx z3.d, p1/m, z20.d +; CHECK-NEXT: frintx z0.d, p1/m, z0.d +; CHECK-NEXT: movprfx z1, z21 +; CHECK-NEXT: frintx z1.d, p1/m, z21.d +; CHECK-NEXT: fcvtzs z7.d, p1/m, z7.d +; CHECK-NEXT: fcvtzs z5.d, p1/m, z5.d +; CHECK-NEXT: fcvtzs z6.d, p1/m, z6.d +; CHECK-NEXT: fcvtzs z4.d, p1/m, z4.d +; CHECK-NEXT: fcvtzs z2.d, p1/m, z2.d +; CHECK-NEXT: fcvtzs z3.d, p1/m, z3.d +; CHECK-NEXT: fcvtzs z0.d, p1/m, z0.d +; CHECK-NEXT: fcvtzs z1.d, p1/m, z1.d +; CHECK-NEXT: st1d { z7.d }, p1, [x8, x9, lsl #3] +; CHECK-NEXT: mov x9, #24 // =0x18 +; CHECK-NEXT: st1d { z5.d }, p1, [x8, x9, lsl #3] +; CHECK-NEXT: mov x9, #20 // =0x14 +; CHECK-NEXT: st1d { z3.d }, p1, [x8, x9, lsl #3] +; CHECK-NEXT: mov x9, #16 // =0x10 +; CHECK-NEXT: st1d { z1.d }, p1, [x8, x9, lsl #3] +; CHECK-NEXT: mov x9, #12 // =0xc +; CHECK-NEXT: st1d { z6.d }, p1, [x8, x9, lsl #3] +; CHECK-NEXT: mov x9, #8 // =0x8 +; CHECK-NEXT: st1d { z4.d }, p1, [x8, x9, lsl #3] +; CHECK-NEXT: mov x9, #4 // =0x4 +; CHECK-NEXT: st1d { z2.d }, p1, [x8, x9, lsl #3] +; CHECK-NEXT: st1d { z0.d }, p1, [x8] ; CHECK-NEXT: ret %a = call <32 x i64> @llvm.llrint.v32i64.v16f64(<32 x double> %x) ret <32 x i64> %a diff --git a/llvm/test/CodeGen/AArch64/sve-fixed-vector-lrint.ll b/llvm/test/CodeGen/AArch64/sve-fixed-vector-lrint.ll index e9c5fd9b769b6..558fa88eb64bd 100644 --- a/llvm/test/CodeGen/AArch64/sve-fixed-vector-lrint.ll +++ b/llvm/test/CodeGen/AArch64/sve-fixed-vector-lrint.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc < %s -mtriple=aarch64 -mattr=+sve | FileCheck %s +; RUN: llc < %s -mtriple=aarch64 -mattr=+sve -aarch64-sve-vector-bits-min=256 | FileCheck %s define <1 x i64> @lrint_v1f16(<1 x half> %x) { ; CHECK-LABEL: lrint_v1f16: @@ -16,14 +16,12 @@ declare <1 x i64> @llvm.lrint.v1i64.v1f16(<1 x half>) define <2 x i64> @lrint_v2f16(<2 x half> %x) { ; CHECK-LABEL: lrint_v2f16: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-NEXT: mov h1, v0.h[1] -; CHECK-NEXT: frintx h0, h0 -; CHECK-NEXT: frintx h1, h1 -; CHECK-NEXT: fcvtzs x8, h0 -; CHECK-NEXT: fcvtzs x9, h1 -; CHECK-NEXT: fmov d0, x8 -; CHECK-NEXT: mov v0.d[1], x9 +; CHECK-NEXT: frintx v0.4h, v0.4h +; CHECK-NEXT: ptrue p0.d, vl4 +; CHECK-NEXT: uunpklo z0.s, z0.h +; CHECK-NEXT: uunpklo z0.d, z0.s +; CHECK-NEXT: fcvtzs z0.d, p0/m, z0.h +; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: ret %a = call <2 x i64> @llvm.lrint.v2i64.v2f16(<2 x half> %x) ret <2 x i64> %a @@ -33,22 +31,15 @@ declare <2 x i64> @llvm.lrint.v2i64.v2f16(<2 x half>) define <4 x i64> @lrint_v4f16(<4 x half> %x) { ; CHECK-LABEL: lrint_v4f16: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-NEXT: mov h1, v0.h[2] -; CHECK-NEXT: mov h2, v0.h[1] -; CHECK-NEXT: mov h3, v0.h[3] -; CHECK-NEXT: frintx h0, h0 -; CHECK-NEXT: frintx h1, h1 -; CHECK-NEXT: frintx h2, h2 -; CHECK-NEXT: frintx h3, h3 -; CHECK-NEXT: fcvtzs x8, h0 -; CHECK-NEXT: fcvtzs x9, h1 -; CHECK-NEXT: fcvtzs x10, h2 -; CHECK-NEXT: fcvtzs x11, h3 -; CHECK-NEXT: fmov d0, x8 -; CHECK-NEXT: fmov d1, x9 -; CHECK-NEXT: mov v0.d[1], x10 -; CHECK-NEXT: mov v1.d[1], x11 +; CHECK-NEXT: frintx v0.4h, v0.4h +; CHECK-NEXT: ptrue p0.d, vl4 +; CHECK-NEXT: uunpklo z0.s, z0.h +; CHECK-NEXT: uunpklo z0.d, z0.s +; CHECK-NEXT: fcvtzs z0.d, p0/m, z0.h +; CHECK-NEXT: mov z1.d, z0.d +; CHECK-NEXT: ext z1.b, z1.b, z0.b, #16 +; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 +; CHECK-NEXT: // kill: def $q1 killed $q1 killed $z1 ; CHECK-NEXT: ret %a = call <4 x i64> @llvm.lrint.v4i64.v4f16(<4 x half> %x) ret <4 x i64> %a @@ -59,36 +50,24 @@ define <8 x i64> @lrint_v8f16(<8 x half> %x) { ; CHECK-LABEL: lrint_v8f16: ; CHECK: // %bb.0: ; CHECK-NEXT: ext v1.16b, v0.16b, v0.16b, #8 -; CHECK-NEXT: mov h4, v0.h[2] -; CHECK-NEXT: mov h3, v0.h[1] -; CHECK-NEXT: mov h7, v0.h[3] -; CHECK-NEXT: frintx h0, h0 -; CHECK-NEXT: mov h2, v1.h[2] -; CHECK-NEXT: mov h5, v1.h[1] -; CHECK-NEXT: mov h6, v1.h[3] -; CHECK-NEXT: frintx h1, h1 -; CHECK-NEXT: frintx h4, h4 -; CHECK-NEXT: frintx h3, h3 -; CHECK-NEXT: frintx h7, h7 -; CHECK-NEXT: fcvtzs x9, h0 -; CHECK-NEXT: frintx h2, h2 -; CHECK-NEXT: frintx h5, h5 -; CHECK-NEXT: frintx h6, h6 -; CHECK-NEXT: fcvtzs x8, h1 -; CHECK-NEXT: fcvtzs x12, h4 -; CHECK-NEXT: fcvtzs x11, h3 -; CHECK-NEXT: fcvtzs x15, h7 -; CHECK-NEXT: fmov d0, x9 -; CHECK-NEXT: fcvtzs x10, h2 -; CHECK-NEXT: fcvtzs x13, h5 -; CHECK-NEXT: fcvtzs x14, h6 -; CHECK-NEXT: fmov d2, x8 -; CHECK-NEXT: fmov d1, x12 -; CHECK-NEXT: mov v0.d[1], x11 -; CHECK-NEXT: fmov d3, x10 -; CHECK-NEXT: mov v2.d[1], x13 -; CHECK-NEXT: mov v1.d[1], x15 -; CHECK-NEXT: mov v3.d[1], x14 +; CHECK-NEXT: frintx v0.4h, v0.4h +; CHECK-NEXT: ptrue p0.d, vl4 +; CHECK-NEXT: frintx v1.4h, v1.4h +; CHECK-NEXT: uunpklo z0.s, z0.h +; CHECK-NEXT: uunpklo z1.s, z1.h +; CHECK-NEXT: uunpklo z0.d, z0.s +; CHECK-NEXT: uunpklo z1.d, z1.s +; CHECK-NEXT: fcvtzs z0.d, p0/m, z0.h +; CHECK-NEXT: movprfx z2, z1 +; CHECK-NEXT: fcvtzs z2.d, p0/m, z1.h +; CHECK-NEXT: mov z1.d, z0.d +; CHECK-NEXT: ext z1.b, z1.b, z0.b, #16 +; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 +; CHECK-NEXT: mov z3.d, z2.d +; CHECK-NEXT: // kill: def $q1 killed $q1 killed $z1 +; CHECK-NEXT: ext z3.b, z3.b, z2.b, #16 +; CHECK-NEXT: // kill: def $q2 killed $q2 killed $z2 +; CHECK-NEXT: // kill: def $q3 killed $q3 killed $z3 ; CHECK-NEXT: ret %a = call <8 x i64> @llvm.lrint.v8i64.v8f16(<8 x half> %x) ret <8 x i64> %a @@ -100,66 +79,41 @@ define <16 x i64> @lrint_v16i64_v16f16(<16 x half> %x) { ; CHECK: // %bb.0: ; CHECK-NEXT: ext v2.16b, v0.16b, v0.16b, #8 ; CHECK-NEXT: ext v3.16b, v1.16b, v1.16b, #8 -; CHECK-NEXT: mov h4, v0.h[1] -; CHECK-NEXT: frintx h5, h0 -; CHECK-NEXT: mov h18, v0.h[2] -; CHECK-NEXT: mov h0, v0.h[3] -; CHECK-NEXT: frintx h6, h2 -; CHECK-NEXT: mov h7, v2.h[1] -; CHECK-NEXT: mov h16, v2.h[2] -; CHECK-NEXT: mov h17, v3.h[2] -; CHECK-NEXT: frintx h19, h3 -; CHECK-NEXT: frintx h4, h4 -; CHECK-NEXT: fcvtzs x8, h5 -; CHECK-NEXT: mov h5, v1.h[1] -; CHECK-NEXT: mov h2, v2.h[3] -; CHECK-NEXT: frintx h18, h18 -; CHECK-NEXT: frintx h0, h0 -; CHECK-NEXT: fcvtzs x9, h6 -; CHECK-NEXT: frintx h6, h7 -; CHECK-NEXT: frintx h7, h16 -; CHECK-NEXT: mov h16, v1.h[2] -; CHECK-NEXT: frintx h17, h17 -; CHECK-NEXT: fcvtzs x10, h19 -; CHECK-NEXT: mov h19, v3.h[1] -; CHECK-NEXT: fcvtzs x11, h4 -; CHECK-NEXT: mov h4, v1.h[3] -; CHECK-NEXT: mov h3, v3.h[3] -; CHECK-NEXT: frintx h1, h1 -; CHECK-NEXT: frintx h5, h5 -; CHECK-NEXT: fcvtzs x13, h7 -; CHECK-NEXT: fcvtzs x12, h6 -; CHECK-NEXT: fcvtzs x15, h18 -; CHECK-NEXT: frintx h7, h16 -; CHECK-NEXT: fcvtzs x14, h17 -; CHECK-NEXT: frintx h16, h2 -; CHECK-NEXT: frintx h17, h19 -; CHECK-NEXT: frintx h4, h4 -; CHECK-NEXT: fmov d2, x9 -; CHECK-NEXT: frintx h19, h3 -; CHECK-NEXT: fcvtzs x9, h1 -; CHECK-NEXT: fmov d6, x10 -; CHECK-NEXT: fmov d3, x13 -; CHECK-NEXT: fcvtzs x13, h0 -; CHECK-NEXT: fcvtzs x16, h5 -; CHECK-NEXT: fcvtzs x10, h7 -; CHECK-NEXT: fmov d7, x14 -; CHECK-NEXT: fcvtzs x14, h16 -; CHECK-NEXT: fcvtzs x17, h17 -; CHECK-NEXT: fcvtzs x0, h4 -; CHECK-NEXT: fmov d0, x8 -; CHECK-NEXT: fcvtzs x18, h19 -; CHECK-NEXT: fmov d1, x15 -; CHECK-NEXT: fmov d4, x9 -; CHECK-NEXT: mov v2.d[1], x12 -; CHECK-NEXT: fmov d5, x10 -; CHECK-NEXT: mov v0.d[1], x11 -; CHECK-NEXT: mov v3.d[1], x14 -; CHECK-NEXT: mov v1.d[1], x13 -; CHECK-NEXT: mov v4.d[1], x16 -; CHECK-NEXT: mov v6.d[1], x17 -; CHECK-NEXT: mov v7.d[1], x18 -; CHECK-NEXT: mov v5.d[1], x0 +; CHECK-NEXT: frintx v0.4h, v0.4h +; CHECK-NEXT: frintx v1.4h, v1.4h +; CHECK-NEXT: ptrue p0.d, vl4 +; CHECK-NEXT: frintx v2.4h, v2.4h +; CHECK-NEXT: frintx v3.4h, v3.4h +; CHECK-NEXT: uunpklo z0.s, z0.h +; CHECK-NEXT: uunpklo z1.s, z1.h +; CHECK-NEXT: uunpklo z2.s, z2.h +; CHECK-NEXT: uunpklo z3.s, z3.h +; CHECK-NEXT: uunpklo z0.d, z0.s +; CHECK-NEXT: uunpklo z1.d, z1.s +; CHECK-NEXT: uunpklo z2.d, z2.s +; CHECK-NEXT: uunpklo z3.d, z3.s +; CHECK-NEXT: fcvtzs z0.d, p0/m, z0.h +; CHECK-NEXT: movprfx z4, z1 +; CHECK-NEXT: fcvtzs z4.d, p0/m, z1.h +; CHECK-NEXT: fcvtzs z2.d, p0/m, z2.h +; CHECK-NEXT: movprfx z6, z3 +; CHECK-NEXT: fcvtzs z6.d, p0/m, z3.h +; CHECK-NEXT: mov z1.d, z0.d +; CHECK-NEXT: mov z5.d, z4.d +; CHECK-NEXT: ext z1.b, z1.b, z0.b, #16 +; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 +; CHECK-NEXT: mov z3.d, z2.d +; CHECK-NEXT: mov z7.d, z6.d +; CHECK-NEXT: ext z5.b, z5.b, z4.b, #16 +; CHECK-NEXT: // kill: def $q4 killed $q4 killed $z4 +; CHECK-NEXT: // kill: def $q1 killed $q1 killed $z1 +; CHECK-NEXT: // kill: def $q5 killed $q5 killed $z5 +; CHECK-NEXT: ext z3.b, z3.b, z2.b, #16 +; CHECK-NEXT: ext z7.b, z7.b, z6.b, #16 +; CHECK-NEXT: // kill: def $q2 killed $q2 killed $z2 +; CHECK-NEXT: // kill: def $q6 killed $q6 killed $z6 +; CHECK-NEXT: // kill: def $q3 killed $q3 killed $z3 +; CHECK-NEXT: // kill: def $q7 killed $q7 killed $z7 ; CHECK-NEXT: ret %a = call <16 x i64> @llvm.lrint.v16i64.v16f16(<16 x half> %x) ret <16 x i64> %a @@ -169,138 +123,61 @@ declare <16 x i64> @llvm.lrint.v16i64.v16f16(<16 x half>) define <32 x i64> @lrint_v32i64_v32f16(<32 x half> %x) { ; CHECK-LABEL: lrint_v32i64_v32f16: ; CHECK: // %bb.0: -; CHECK-NEXT: ext v4.16b, v1.16b, v1.16b, #8 +; CHECK-NEXT: ext v4.16b, v3.16b, v3.16b, #8 ; CHECK-NEXT: ext v5.16b, v2.16b, v2.16b, #8 -; CHECK-NEXT: ext v6.16b, v3.16b, v3.16b, #8 +; CHECK-NEXT: mov x9, #24 // =0x18 +; CHECK-NEXT: frintx v3.4h, v3.4h +; CHECK-NEXT: ext v6.16b, v1.16b, v1.16b, #8 +; CHECK-NEXT: frintx v2.4h, v2.4h ; CHECK-NEXT: ext v7.16b, v0.16b, v0.16b, #8 -; CHECK-NEXT: frintx h21, h1 -; CHECK-NEXT: frintx h22, h2 -; CHECK-NEXT: mov h26, v2.h[2] -; CHECK-NEXT: frintx h19, h0 -; CHECK-NEXT: mov h27, v3.h[2] -; CHECK-NEXT: mov h20, v2.h[1] -; CHECK-NEXT: mov h18, v1.h[1] -; CHECK-NEXT: mov h16, v4.h[2] -; CHECK-NEXT: mov h17, v5.h[2] -; CHECK-NEXT: frintx h23, h5 -; CHECK-NEXT: frintx h24, h6 -; CHECK-NEXT: mov h25, v6.h[2] -; CHECK-NEXT: fcvtzs x9, h21 -; CHECK-NEXT: fcvtzs x11, h22 -; CHECK-NEXT: frintx h22, h7 -; CHECK-NEXT: mov h21, v3.h[3] -; CHECK-NEXT: fcvtzs x10, h19 -; CHECK-NEXT: frintx h27, h27 -; CHECK-NEXT: frintx h20, h20 -; CHECK-NEXT: frintx h16, h16 -; CHECK-NEXT: frintx h17, h17 -; CHECK-NEXT: fcvtzs x12, h23 -; CHECK-NEXT: fcvtzs x13, h24 -; CHECK-NEXT: frintx h23, h25 -; CHECK-NEXT: frintx h25, h26 -; CHECK-NEXT: mov h26, v3.h[1] -; CHECK-NEXT: mov h24, v2.h[3] -; CHECK-NEXT: fmov d19, x9 -; CHECK-NEXT: fcvtzs x9, h22 -; CHECK-NEXT: frintx h22, h3 -; CHECK-NEXT: frintx h21, h21 -; CHECK-NEXT: fcvtzs x14, h16 -; CHECK-NEXT: fcvtzs x15, h17 -; CHECK-NEXT: fmov d2, x12 -; CHECK-NEXT: fmov d16, x13 -; CHECK-NEXT: fcvtzs x12, h23 -; CHECK-NEXT: fcvtzs x13, h25 -; CHECK-NEXT: mov h23, v1.h[2] -; CHECK-NEXT: frintx h25, h26 -; CHECK-NEXT: frintx h24, h24 -; CHECK-NEXT: mov h1, v1.h[3] -; CHECK-NEXT: fmov d26, x11 -; CHECK-NEXT: fcvtzs x11, h21 -; CHECK-NEXT: fmov d3, x14 -; CHECK-NEXT: fmov d17, x15 -; CHECK-NEXT: fcvtzs x14, h22 -; CHECK-NEXT: fcvtzs x15, h27 -; CHECK-NEXT: mov h22, v0.h[2] -; CHECK-NEXT: frintx h18, h18 -; CHECK-NEXT: frintx h21, h23 -; CHECK-NEXT: fmov d23, x13 -; CHECK-NEXT: fcvtzs x13, h25 -; CHECK-NEXT: frintx h1, h1 -; CHECK-NEXT: fmov d25, x14 -; CHECK-NEXT: fcvtzs x14, h24 -; CHECK-NEXT: fmov d24, x15 -; CHECK-NEXT: frintx h22, h22 -; CHECK-NEXT: fcvtzs x15, h18 -; CHECK-NEXT: mov h18, v7.h[1] -; CHECK-NEXT: mov v25.d[1], x13 -; CHECK-NEXT: fcvtzs x13, h21 -; CHECK-NEXT: mov h21, v7.h[2] -; CHECK-NEXT: mov v24.d[1], x11 -; CHECK-NEXT: fcvtzs x11, h20 -; CHECK-NEXT: mov h20, v0.h[1] -; CHECK-NEXT: mov h0, v0.h[3] -; CHECK-NEXT: mov v23.d[1], x14 -; CHECK-NEXT: fcvtzs x14, h1 -; CHECK-NEXT: mov h1, v6.h[3] -; CHECK-NEXT: mov h6, v6.h[1] -; CHECK-NEXT: mov v19.d[1], x15 -; CHECK-NEXT: mov h7, v7.h[3] -; CHECK-NEXT: stp q25, q24, [x8, #192] -; CHECK-NEXT: fmov d24, x13 -; CHECK-NEXT: frintx h20, h20 -; CHECK-NEXT: mov v26.d[1], x11 -; CHECK-NEXT: fcvtzs x11, h22 -; CHECK-NEXT: mov h22, v5.h[1] -; CHECK-NEXT: mov h5, v5.h[3] -; CHECK-NEXT: frintx h0, h0 -; CHECK-NEXT: frintx h1, h1 -; CHECK-NEXT: mov v24.d[1], x14 -; CHECK-NEXT: mov h25, v4.h[3] -; CHECK-NEXT: frintx h6, h6 -; CHECK-NEXT: stp q26, q23, [x8, #128] -; CHECK-NEXT: fmov d23, x12 -; CHECK-NEXT: fcvtzs x12, h20 -; CHECK-NEXT: mov h20, v4.h[1] -; CHECK-NEXT: frintx h5, h5 -; CHECK-NEXT: fcvtzs x13, h0 -; CHECK-NEXT: stp q19, q24, [x8, #64] -; CHECK-NEXT: frintx h22, h22 -; CHECK-NEXT: fmov d0, x10 -; CHECK-NEXT: fmov d19, x11 -; CHECK-NEXT: frintx h4, h4 -; CHECK-NEXT: fcvtzs x10, h1 -; CHECK-NEXT: frintx h1, h21 -; CHECK-NEXT: frintx h24, h25 -; CHECK-NEXT: fcvtzs x11, h6 -; CHECK-NEXT: frintx h20, h20 -; CHECK-NEXT: frintx h6, h7 -; CHECK-NEXT: fcvtzs x14, h5 -; CHECK-NEXT: mov v19.d[1], x13 -; CHECK-NEXT: frintx h5, h18 -; CHECK-NEXT: fcvtzs x13, h22 -; CHECK-NEXT: mov v0.d[1], x12 -; CHECK-NEXT: fcvtzs x12, h4 -; CHECK-NEXT: mov v23.d[1], x10 -; CHECK-NEXT: fcvtzs x10, h1 -; CHECK-NEXT: fcvtzs x15, h24 -; CHECK-NEXT: mov v16.d[1], x11 -; CHECK-NEXT: fcvtzs x11, h20 -; CHECK-NEXT: mov v17.d[1], x14 -; CHECK-NEXT: fcvtzs x14, h6 -; CHECK-NEXT: mov v2.d[1], x13 -; CHECK-NEXT: fcvtzs x13, h5 -; CHECK-NEXT: fmov d4, x9 -; CHECK-NEXT: stp q0, q19, [x8] -; CHECK-NEXT: fmov d0, x12 -; CHECK-NEXT: stp q16, q23, [x8, #224] -; CHECK-NEXT: fmov d1, x10 -; CHECK-NEXT: mov v3.d[1], x15 -; CHECK-NEXT: stp q2, q17, [x8, #160] -; CHECK-NEXT: mov v0.d[1], x11 -; CHECK-NEXT: mov v4.d[1], x13 -; CHECK-NEXT: mov v1.d[1], x14 -; CHECK-NEXT: stp q0, q3, [x8, #96] -; CHECK-NEXT: stp q4, q1, [x8, #32] +; CHECK-NEXT: frintx v1.4h, v1.4h +; CHECK-NEXT: frintx v0.4h, v0.4h +; CHECK-NEXT: ptrue p0.d, vl4 +; CHECK-NEXT: frintx v4.4h, v4.4h +; CHECK-NEXT: frintx v5.4h, v5.4h +; CHECK-NEXT: uunpklo z3.s, z3.h +; CHECK-NEXT: frintx v6.4h, v6.4h +; CHECK-NEXT: uunpklo z2.s, z2.h +; CHECK-NEXT: frintx v7.4h, v7.4h +; CHECK-NEXT: uunpklo z1.s, z1.h +; CHECK-NEXT: uunpklo z0.s, z0.h +; CHECK-NEXT: uunpklo z4.s, z4.h +; CHECK-NEXT: uunpklo z5.s, z5.h +; CHECK-NEXT: uunpklo z3.d, z3.s +; CHECK-NEXT: uunpklo z6.s, z6.h +; CHECK-NEXT: uunpklo z2.d, z2.s +; CHECK-NEXT: uunpklo z7.s, z7.h +; CHECK-NEXT: uunpklo z1.d, z1.s +; CHECK-NEXT: uunpklo z0.d, z0.s +; CHECK-NEXT: uunpklo z4.d, z4.s +; CHECK-NEXT: uunpklo z5.d, z5.s +; CHECK-NEXT: fcvtzs z3.d, p0/m, z3.h +; CHECK-NEXT: uunpklo z6.d, z6.s +; CHECK-NEXT: fcvtzs z2.d, p0/m, z2.h +; CHECK-NEXT: uunpklo z7.d, z7.s +; CHECK-NEXT: fcvtzs z1.d, p0/m, z1.h +; CHECK-NEXT: fcvtzs z0.d, p0/m, z0.h +; CHECK-NEXT: fcvtzs z4.d, p0/m, z4.h +; CHECK-NEXT: st1d { z3.d }, p0, [x8, x9, lsl #3] +; CHECK-NEXT: mov x9, #16 // =0x10 +; CHECK-NEXT: movprfx z3, z5 +; CHECK-NEXT: fcvtzs z3.d, p0/m, z5.h +; CHECK-NEXT: st1d { z2.d }, p0, [x8, x9, lsl #3] +; CHECK-NEXT: mov x9, #8 // =0x8 +; CHECK-NEXT: movprfx z2, z6 +; CHECK-NEXT: fcvtzs z2.d, p0/m, z6.h +; CHECK-NEXT: st1d { z1.d }, p0, [x8, x9, lsl #3] +; CHECK-NEXT: mov x9, #28 // =0x1c +; CHECK-NEXT: movprfx z1, z7 +; CHECK-NEXT: fcvtzs z1.d, p0/m, z7.h +; CHECK-NEXT: st1d { z4.d }, p0, [x8, x9, lsl #3] +; CHECK-NEXT: mov x9, #20 // =0x14 +; CHECK-NEXT: st1d { z3.d }, p0, [x8, x9, lsl #3] +; CHECK-NEXT: mov x9, #12 // =0xc +; CHECK-NEXT: st1d { z2.d }, p0, [x8, x9, lsl #3] +; CHECK-NEXT: mov x9, #4 // =0x4 +; CHECK-NEXT: st1d { z1.d }, p0, [x8, x9, lsl #3] +; CHECK-NEXT: st1d { z0.d }, p0, [x8] ; CHECK-NEXT: ret %a = call <32 x i64> @llvm.lrint.v32i64.v32f16(<32 x half> %x) ret <32 x i64> %a @@ -310,10 +187,10 @@ declare <32 x i64> @llvm.lrint.v32i64.v32f16(<32 x half>) define <1 x i64> @lrint_v1f32(<1 x float> %x) { ; CHECK-LABEL: lrint_v1f32: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-NEXT: frintx s0, s0 -; CHECK-NEXT: fcvtzs x8, s0 -; CHECK-NEXT: fmov d0, x8 +; CHECK-NEXT: frintx v0.2s, v0.2s +; CHECK-NEXT: fcvtl v0.2d, v0.2s +; CHECK-NEXT: fcvtzs v0.2d, v0.2d +; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0 ; CHECK-NEXT: ret %a = call <1 x i64> @llvm.lrint.v1i64.v1f32(<1 x float> %x) ret <1 x i64> %a @@ -323,14 +200,9 @@ declare <1 x i64> @llvm.lrint.v1i64.v1f32(<1 x float>) define <2 x i64> @lrint_v2f32(<2 x float> %x) { ; CHECK-LABEL: lrint_v2f32: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-NEXT: mov s1, v0.s[1] -; CHECK-NEXT: frintx s0, s0 -; CHECK-NEXT: frintx s1, s1 -; CHECK-NEXT: fcvtzs x8, s0 -; CHECK-NEXT: fcvtzs x9, s1 -; CHECK-NEXT: fmov d0, x8 -; CHECK-NEXT: mov v0.d[1], x9 +; CHECK-NEXT: frintx v0.2s, v0.2s +; CHECK-NEXT: fcvtl v0.2d, v0.2s +; CHECK-NEXT: fcvtzs v0.2d, v0.2d ; CHECK-NEXT: ret %a = call <2 x i64> @llvm.lrint.v2i64.v2f32(<2 x float> %x) ret <2 x i64> %a @@ -340,21 +212,14 @@ declare <2 x i64> @llvm.lrint.v2i64.v2f32(<2 x float>) define <4 x i64> @lrint_v4f32(<4 x float> %x) { ; CHECK-LABEL: lrint_v4f32: ; CHECK: // %bb.0: -; CHECK-NEXT: ext v1.16b, v0.16b, v0.16b, #8 -; CHECK-NEXT: mov s3, v0.s[1] -; CHECK-NEXT: frintx s0, s0 -; CHECK-NEXT: mov s2, v1.s[1] -; CHECK-NEXT: frintx s1, s1 -; CHECK-NEXT: frintx s3, s3 -; CHECK-NEXT: fcvtzs x9, s0 -; CHECK-NEXT: frintx s2, s2 -; CHECK-NEXT: fcvtzs x8, s1 -; CHECK-NEXT: fcvtzs x11, s3 -; CHECK-NEXT: fmov d0, x9 -; CHECK-NEXT: fcvtzs x10, s2 -; CHECK-NEXT: fmov d1, x8 -; CHECK-NEXT: mov v0.d[1], x11 -; CHECK-NEXT: mov v1.d[1], x10 +; CHECK-NEXT: frintx v0.4s, v0.4s +; CHECK-NEXT: ptrue p0.d, vl4 +; CHECK-NEXT: uunpklo z0.d, z0.s +; CHECK-NEXT: fcvtzs z0.d, p0/m, z0.s +; CHECK-NEXT: mov z1.d, z0.d +; CHECK-NEXT: ext z1.b, z1.b, z0.b, #16 +; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 +; CHECK-NEXT: // kill: def $q1 killed $q1 killed $z1 ; CHECK-NEXT: ret %a = call <4 x i64> @llvm.lrint.v4i64.v4f32(<4 x float> %x) ret <4 x i64> %a @@ -364,36 +229,22 @@ declare <4 x i64> @llvm.lrint.v4i64.v4f32(<4 x float>) define <8 x i64> @lrint_v8f32(<8 x float> %x) { ; CHECK-LABEL: lrint_v8f32: ; CHECK: // %bb.0: -; CHECK-NEXT: ext v2.16b, v0.16b, v0.16b, #8 -; CHECK-NEXT: ext v3.16b, v1.16b, v1.16b, #8 -; CHECK-NEXT: mov s4, v0.s[1] -; CHECK-NEXT: mov s7, v1.s[1] -; CHECK-NEXT: frintx s0, s0 -; CHECK-NEXT: frintx s1, s1 -; CHECK-NEXT: mov s5, v2.s[1] -; CHECK-NEXT: mov s6, v3.s[1] -; CHECK-NEXT: frintx s2, s2 -; CHECK-NEXT: frintx s3, s3 -; CHECK-NEXT: frintx s4, s4 -; CHECK-NEXT: frintx s7, s7 -; CHECK-NEXT: fcvtzs x9, s0 -; CHECK-NEXT: fcvtzs x12, s1 -; CHECK-NEXT: frintx s5, s5 -; CHECK-NEXT: frintx s6, s6 -; CHECK-NEXT: fcvtzs x8, s2 -; CHECK-NEXT: fcvtzs x10, s3 -; CHECK-NEXT: fcvtzs x11, s4 -; CHECK-NEXT: fcvtzs x15, s7 -; CHECK-NEXT: fmov d0, x9 -; CHECK-NEXT: fmov d2, x12 -; CHECK-NEXT: fcvtzs x13, s5 -; CHECK-NEXT: fcvtzs x14, s6 -; CHECK-NEXT: fmov d1, x8 -; CHECK-NEXT: fmov d3, x10 -; CHECK-NEXT: mov v0.d[1], x11 -; CHECK-NEXT: mov v2.d[1], x15 -; CHECK-NEXT: mov v1.d[1], x13 -; CHECK-NEXT: mov v3.d[1], x14 +; CHECK-NEXT: frintx v0.4s, v0.4s +; CHECK-NEXT: frintx v1.4s, v1.4s +; CHECK-NEXT: ptrue p0.d, vl4 +; CHECK-NEXT: uunpklo z0.d, z0.s +; CHECK-NEXT: uunpklo z1.d, z1.s +; CHECK-NEXT: fcvtzs z0.d, p0/m, z0.s +; CHECK-NEXT: movprfx z2, z1 +; CHECK-NEXT: fcvtzs z2.d, p0/m, z1.s +; CHECK-NEXT: mov z1.d, z0.d +; CHECK-NEXT: mov z3.d, z2.d +; CHECK-NEXT: ext z1.b, z1.b, z0.b, #16 +; CHECK-NEXT: ext z3.b, z3.b, z2.b, #16 +; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 +; CHECK-NEXT: // kill: def $q2 killed $q2 killed $z2 +; CHECK-NEXT: // kill: def $q1 killed $q1 killed $z1 +; CHECK-NEXT: // kill: def $q3 killed $q3 killed $z3 ; CHECK-NEXT: ret %a = call <8 x i64> @llvm.lrint.v8i64.v8f32(<8 x float> %x) ret <8 x i64> %a @@ -403,66 +254,37 @@ declare <8 x i64> @llvm.lrint.v8i64.v8f32(<8 x float>) define <16 x i64> @lrint_v16i64_v16f32(<16 x float> %x) { ; CHECK-LABEL: lrint_v16i64_v16f32: ; CHECK: // %bb.0: -; CHECK-NEXT: ext v4.16b, v0.16b, v0.16b, #8 -; CHECK-NEXT: ext v5.16b, v1.16b, v1.16b, #8 -; CHECK-NEXT: ext v6.16b, v2.16b, v2.16b, #8 -; CHECK-NEXT: frintx s7, s0 -; CHECK-NEXT: ext v16.16b, v3.16b, v3.16b, #8 -; CHECK-NEXT: mov s0, v0.s[1] -; CHECK-NEXT: frintx s17, s4 -; CHECK-NEXT: mov s4, v4.s[1] -; CHECK-NEXT: mov s18, v5.s[1] -; CHECK-NEXT: frintx s5, s5 -; CHECK-NEXT: frintx s19, s6 -; CHECK-NEXT: fcvtzs x8, s7 -; CHECK-NEXT: frintx s7, s16 -; CHECK-NEXT: mov s6, v6.s[1] -; CHECK-NEXT: mov s16, v16.s[1] -; CHECK-NEXT: frintx s0, s0 -; CHECK-NEXT: frintx s4, s4 -; CHECK-NEXT: fcvtzs x9, s17 -; CHECK-NEXT: frintx s17, s1 -; CHECK-NEXT: mov s1, v1.s[1] -; CHECK-NEXT: frintx s18, s18 -; CHECK-NEXT: fcvtzs x10, s5 -; CHECK-NEXT: mov s5, v2.s[1] -; CHECK-NEXT: fcvtzs x11, s19 -; CHECK-NEXT: mov s19, v3.s[1] -; CHECK-NEXT: frintx s2, s2 -; CHECK-NEXT: fcvtzs x12, s7 -; CHECK-NEXT: frintx s6, s6 -; CHECK-NEXT: fcvtzs x13, s4 -; CHECK-NEXT: frintx s4, s3 -; CHECK-NEXT: frintx s16, s16 -; CHECK-NEXT: fcvtzs x14, s18 -; CHECK-NEXT: frintx s18, s1 -; CHECK-NEXT: fcvtzs x15, s17 -; CHECK-NEXT: frintx s20, s5 -; CHECK-NEXT: frintx s17, s19 -; CHECK-NEXT: fmov d1, x9 -; CHECK-NEXT: fcvtzs x9, s2 -; CHECK-NEXT: fmov d5, x11 -; CHECK-NEXT: fmov d3, x10 -; CHECK-NEXT: fcvtzs x11, s4 -; CHECK-NEXT: fcvtzs x10, s0 -; CHECK-NEXT: fmov d7, x12 -; CHECK-NEXT: fcvtzs x12, s18 -; CHECK-NEXT: fcvtzs x17, s6 -; CHECK-NEXT: fcvtzs x18, s16 -; CHECK-NEXT: fcvtzs x16, s20 -; CHECK-NEXT: fcvtzs x0, s17 -; CHECK-NEXT: fmov d0, x8 -; CHECK-NEXT: fmov d2, x15 -; CHECK-NEXT: fmov d4, x9 -; CHECK-NEXT: mov v1.d[1], x13 -; CHECK-NEXT: fmov d6, x11 -; CHECK-NEXT: mov v3.d[1], x14 -; CHECK-NEXT: mov v0.d[1], x10 -; CHECK-NEXT: mov v5.d[1], x17 -; CHECK-NEXT: mov v7.d[1], x18 -; CHECK-NEXT: mov v2.d[1], x12 -; CHECK-NEXT: mov v4.d[1], x16 -; CHECK-NEXT: mov v6.d[1], x0 +; CHECK-NEXT: frintx v1.4s, v1.4s +; CHECK-NEXT: frintx v0.4s, v0.4s +; CHECK-NEXT: frintx v2.4s, v2.4s +; CHECK-NEXT: frintx v3.4s, v3.4s +; CHECK-NEXT: ptrue p0.d, vl4 +; CHECK-NEXT: uunpklo z0.d, z0.s +; CHECK-NEXT: uunpklo z1.d, z1.s +; CHECK-NEXT: uunpklo z4.d, z2.s +; CHECK-NEXT: uunpklo z3.d, z3.s +; CHECK-NEXT: fcvtzs z0.d, p0/m, z0.s +; CHECK-NEXT: movprfx z2, z1 +; CHECK-NEXT: fcvtzs z2.d, p0/m, z1.s +; CHECK-NEXT: fcvtzs z4.d, p0/m, z4.s +; CHECK-NEXT: movprfx z6, z3 +; CHECK-NEXT: fcvtzs z6.d, p0/m, z3.s +; CHECK-NEXT: mov z1.d, z0.d +; CHECK-NEXT: mov z3.d, z2.d +; CHECK-NEXT: mov z5.d, z4.d +; CHECK-NEXT: mov z7.d, z6.d +; CHECK-NEXT: ext z1.b, z1.b, z0.b, #16 +; CHECK-NEXT: ext z3.b, z3.b, z2.b, #16 +; CHECK-NEXT: ext z5.b, z5.b, z4.b, #16 +; CHECK-NEXT: ext z7.b, z7.b, z6.b, #16 +; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 +; CHECK-NEXT: // kill: def $q2 killed $q2 killed $z2 +; CHECK-NEXT: // kill: def $q4 killed $q4 killed $z4 +; CHECK-NEXT: // kill: def $q6 killed $q6 killed $z6 +; CHECK-NEXT: // kill: def $q1 killed $q1 killed $z1 +; CHECK-NEXT: // kill: def $q3 killed $q3 killed $z3 +; CHECK-NEXT: // kill: def $q5 killed $q5 killed $z5 +; CHECK-NEXT: // kill: def $q7 killed $q7 killed $z7 ; CHECK-NEXT: ret %a = call <16 x i64> @llvm.lrint.v16i64.v16f32(<16 x float> %x) ret <16 x i64> %a @@ -472,134 +294,46 @@ declare <16 x i64> @llvm.lrint.v16i64.v16f32(<16 x float>) define <32 x i64> @lrint_v32i64_v32f32(<32 x float> %x) { ; CHECK-LABEL: lrint_v32i64_v32f32: ; CHECK: // %bb.0: -; CHECK-NEXT: ext v16.16b, v2.16b, v2.16b, #8 -; CHECK-NEXT: ext v20.16b, v5.16b, v5.16b, #8 -; CHECK-NEXT: ext v17.16b, v3.16b, v3.16b, #8 -; CHECK-NEXT: ext v18.16b, v0.16b, v0.16b, #8 -; CHECK-NEXT: ext v19.16b, v4.16b, v4.16b, #8 -; CHECK-NEXT: ext v21.16b, v6.16b, v6.16b, #8 -; CHECK-NEXT: ext v22.16b, v7.16b, v7.16b, #8 -; CHECK-NEXT: frintx s24, s16 -; CHECK-NEXT: mov s28, v20.s[1] -; CHECK-NEXT: frintx s25, s17 -; CHECK-NEXT: frintx s26, s18 -; CHECK-NEXT: frintx s27, s19 -; CHECK-NEXT: frintx s29, s20 -; CHECK-NEXT: mov s30, v21.s[1] -; CHECK-NEXT: frintx s20, s21 -; CHECK-NEXT: frintx s21, s22 -; CHECK-NEXT: mov s23, v22.s[1] -; CHECK-NEXT: mov s19, v19.s[1] -; CHECK-NEXT: mov s17, v17.s[1] -; CHECK-NEXT: fcvtzs x12, s24 -; CHECK-NEXT: frintx s24, s28 -; CHECK-NEXT: fcvtzs x13, s25 -; CHECK-NEXT: mov s25, v7.s[1] -; CHECK-NEXT: fcvtzs x9, s26 -; CHECK-NEXT: fcvtzs x11, s27 -; CHECK-NEXT: fcvtzs x14, s20 -; CHECK-NEXT: fcvtzs x15, s21 -; CHECK-NEXT: frintx s26, s1 -; CHECK-NEXT: frintx s23, s23 -; CHECK-NEXT: frintx s27, s7 -; CHECK-NEXT: frintx s22, s30 -; CHECK-NEXT: fmov d20, x12 -; CHECK-NEXT: fcvtzs x12, s24 -; CHECK-NEXT: mov s24, v6.s[1] -; CHECK-NEXT: frintx s25, s25 -; CHECK-NEXT: frintx s6, s6 -; CHECK-NEXT: fcvtzs x10, s29 -; CHECK-NEXT: fmov d7, x11 -; CHECK-NEXT: fmov d21, x13 -; CHECK-NEXT: frintx s28, s5 -; CHECK-NEXT: fcvtzs x11, s23 -; CHECK-NEXT: fmov d23, x14 -; CHECK-NEXT: fcvtzs x14, s26 -; CHECK-NEXT: fmov d26, x15 -; CHECK-NEXT: fcvtzs x15, s27 -; CHECK-NEXT: frintx s24, s24 -; CHECK-NEXT: mov s27, v5.s[1] -; CHECK-NEXT: fcvtzs x13, s22 -; CHECK-NEXT: fcvtzs x17, s25 -; CHECK-NEXT: frintx s25, s4 -; CHECK-NEXT: fcvtzs x18, s6 -; CHECK-NEXT: fmov d6, x10 -; CHECK-NEXT: frintx s22, s2 -; CHECK-NEXT: mov v26.d[1], x11 -; CHECK-NEXT: fmov d5, x14 -; CHECK-NEXT: fcvtzs x10, s24 -; CHECK-NEXT: fmov d24, x15 -; CHECK-NEXT: fcvtzs x14, s28 -; CHECK-NEXT: frintx s27, s27 -; CHECK-NEXT: mov v23.d[1], x13 -; CHECK-NEXT: mov s4, v4.s[1] -; CHECK-NEXT: fcvtzs x13, s25 -; CHECK-NEXT: fmov d25, x18 -; CHECK-NEXT: mov s16, v16.s[1] -; CHECK-NEXT: mov v24.d[1], x17 -; CHECK-NEXT: fcvtzs x16, s22 -; CHECK-NEXT: frintx s22, s3 -; CHECK-NEXT: mov s3, v3.s[1] -; CHECK-NEXT: frintx s19, s19 -; CHECK-NEXT: mov s2, v2.s[1] -; CHECK-NEXT: mov v25.d[1], x10 -; CHECK-NEXT: fcvtzs x10, s27 -; CHECK-NEXT: frintx s4, s4 -; CHECK-NEXT: mov v6.d[1], x12 -; CHECK-NEXT: frintx s17, s17 -; CHECK-NEXT: mov s18, v18.s[1] -; CHECK-NEXT: stp q24, q26, [x8, #224] -; CHECK-NEXT: fmov d24, x14 -; CHECK-NEXT: fcvtzs x11, s22 -; CHECK-NEXT: ext v22.16b, v1.16b, v1.16b, #8 -; CHECK-NEXT: mov s1, v1.s[1] -; CHECK-NEXT: frintx s3, s3 -; CHECK-NEXT: stp q25, q23, [x8, #192] -; CHECK-NEXT: frintx s2, s2 -; CHECK-NEXT: fcvtzs x12, s4 -; CHECK-NEXT: mov v24.d[1], x10 -; CHECK-NEXT: fcvtzs x10, s19 -; CHECK-NEXT: mov s19, v0.s[1] -; CHECK-NEXT: frintx s16, s16 -; CHECK-NEXT: frintx s0, s0 -; CHECK-NEXT: fmov d4, x11 -; CHECK-NEXT: mov s27, v22.s[1] -; CHECK-NEXT: frintx s22, s22 -; CHECK-NEXT: frintx s1, s1 -; CHECK-NEXT: fcvtzs x11, s3 -; CHECK-NEXT: fcvtzs x14, s2 -; CHECK-NEXT: frintx s2, s18 -; CHECK-NEXT: stp q24, q6, [x8, #160] -; CHECK-NEXT: fmov d6, x13 -; CHECK-NEXT: fcvtzs x13, s17 -; CHECK-NEXT: frintx s17, s19 -; CHECK-NEXT: fmov d23, x16 -; CHECK-NEXT: mov v7.d[1], x10 -; CHECK-NEXT: frintx s3, s27 -; CHECK-NEXT: fcvtzs x10, s22 -; CHECK-NEXT: fcvtzs x15, s1 -; CHECK-NEXT: mov v6.d[1], x12 -; CHECK-NEXT: fcvtzs x12, s16 -; CHECK-NEXT: mov v4.d[1], x11 -; CHECK-NEXT: mov v21.d[1], x13 -; CHECK-NEXT: fcvtzs x13, s0 -; CHECK-NEXT: mov v23.d[1], x14 -; CHECK-NEXT: fcvtzs x14, s17 -; CHECK-NEXT: fcvtzs x11, s3 -; CHECK-NEXT: fmov d0, x10 -; CHECK-NEXT: mov v5.d[1], x15 -; CHECK-NEXT: stp q6, q7, [x8, #128] -; CHECK-NEXT: mov v20.d[1], x12 -; CHECK-NEXT: fcvtzs x12, s2 -; CHECK-NEXT: stp q4, q21, [x8, #96] -; CHECK-NEXT: fmov d1, x13 -; CHECK-NEXT: fmov d2, x9 -; CHECK-NEXT: mov v0.d[1], x11 -; CHECK-NEXT: stp q23, q20, [x8, #64] -; CHECK-NEXT: mov v1.d[1], x14 -; CHECK-NEXT: mov v2.d[1], x12 -; CHECK-NEXT: stp q5, q0, [x8, #32] -; CHECK-NEXT: stp q1, q2, [x8] +; CHECK-NEXT: frintx v7.4s, v7.4s +; CHECK-NEXT: frintx v6.4s, v6.4s +; CHECK-NEXT: mov x9, #28 // =0x1c +; CHECK-NEXT: frintx v5.4s, v5.4s +; CHECK-NEXT: frintx v4.4s, v4.4s +; CHECK-NEXT: frintx v3.4s, v3.4s +; CHECK-NEXT: frintx v2.4s, v2.4s +; CHECK-NEXT: ptrue p0.d, vl4 +; CHECK-NEXT: frintx v1.4s, v1.4s +; CHECK-NEXT: frintx v0.4s, v0.4s +; CHECK-NEXT: uunpklo z7.d, z7.s +; CHECK-NEXT: uunpklo z6.d, z6.s +; CHECK-NEXT: uunpklo z5.d, z5.s +; CHECK-NEXT: uunpklo z4.d, z4.s +; CHECK-NEXT: uunpklo z3.d, z3.s +; CHECK-NEXT: uunpklo z2.d, z2.s +; CHECK-NEXT: uunpklo z1.d, z1.s +; CHECK-NEXT: uunpklo z0.d, z0.s +; CHECK-NEXT: fcvtzs z7.d, p0/m, z7.s +; CHECK-NEXT: fcvtzs z6.d, p0/m, z6.s +; CHECK-NEXT: fcvtzs z5.d, p0/m, z5.s +; CHECK-NEXT: fcvtzs z4.d, p0/m, z4.s +; CHECK-NEXT: fcvtzs z3.d, p0/m, z3.s +; CHECK-NEXT: fcvtzs z2.d, p0/m, z2.s +; CHECK-NEXT: fcvtzs z1.d, p0/m, z1.s +; CHECK-NEXT: fcvtzs z0.d, p0/m, z0.s +; CHECK-NEXT: st1d { z7.d }, p0, [x8, x9, lsl #3] +; CHECK-NEXT: mov x9, #24 // =0x18 +; CHECK-NEXT: st1d { z6.d }, p0, [x8, x9, lsl #3] +; CHECK-NEXT: mov x9, #20 // =0x14 +; CHECK-NEXT: st1d { z5.d }, p0, [x8, x9, lsl #3] +; CHECK-NEXT: mov x9, #16 // =0x10 +; CHECK-NEXT: st1d { z4.d }, p0, [x8, x9, lsl #3] +; CHECK-NEXT: mov x9, #12 // =0xc +; CHECK-NEXT: st1d { z3.d }, p0, [x8, x9, lsl #3] +; CHECK-NEXT: mov x9, #8 // =0x8 +; CHECK-NEXT: st1d { z2.d }, p0, [x8, x9, lsl #3] +; CHECK-NEXT: mov x9, #4 // =0x4 +; CHECK-NEXT: st1d { z1.d }, p0, [x8, x9, lsl #3] +; CHECK-NEXT: st1d { z0.d }, p0, [x8] ; CHECK-NEXT: ret %a = call <32 x i64> @llvm.lrint.v32i64.v32f32(<32 x float> %x) ret <32 x i64> %a @@ -621,13 +355,8 @@ declare <1 x i64> @llvm.lrint.v1i64.v1f64(<1 x double>) define <2 x i64> @lrint_v2f64(<2 x double> %x) { ; CHECK-LABEL: lrint_v2f64: ; CHECK: // %bb.0: -; CHECK-NEXT: mov d1, v0.d[1] -; CHECK-NEXT: frintx d0, d0 -; CHECK-NEXT: frintx d1, d1 -; CHECK-NEXT: fcvtzs x8, d0 -; CHECK-NEXT: fcvtzs x9, d1 -; CHECK-NEXT: fmov d0, x8 -; CHECK-NEXT: mov v0.d[1], x9 +; CHECK-NEXT: frintx v0.2d, v0.2d +; CHECK-NEXT: fcvtzs v0.2d, v0.2d ; CHECK-NEXT: ret %a = call <2 x i64> @llvm.lrint.v2i64.v2f64(<2 x double> %x) ret <2 x i64> %a @@ -637,20 +366,17 @@ declare <2 x i64> @llvm.lrint.v2i64.v2f64(<2 x double>) define <4 x i64> @lrint_v4f64(<4 x double> %x) { ; CHECK-LABEL: lrint_v4f64: ; CHECK: // %bb.0: -; CHECK-NEXT: mov d2, v0.d[1] -; CHECK-NEXT: mov d3, v1.d[1] -; CHECK-NEXT: frintx d0, d0 -; CHECK-NEXT: frintx d1, d1 -; CHECK-NEXT: frintx d2, d2 -; CHECK-NEXT: frintx d3, d3 -; CHECK-NEXT: fcvtzs x8, d0 -; CHECK-NEXT: fcvtzs x9, d1 -; CHECK-NEXT: fcvtzs x10, d2 -; CHECK-NEXT: fcvtzs x11, d3 -; CHECK-NEXT: fmov d0, x8 -; CHECK-NEXT: fmov d1, x9 -; CHECK-NEXT: mov v0.d[1], x10 -; CHECK-NEXT: mov v1.d[1], x11 +; CHECK-NEXT: ptrue p0.d, vl2 +; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 +; CHECK-NEXT: // kill: def $q1 killed $q1 def $z1 +; CHECK-NEXT: splice z0.d, p0, z0.d, z1.d +; CHECK-NEXT: ptrue p0.d, vl4 +; CHECK-NEXT: frintx z0.d, p0/m, z0.d +; CHECK-NEXT: fcvtzs z0.d, p0/m, z0.d +; CHECK-NEXT: mov z1.d, z0.d +; CHECK-NEXT: ext z1.b, z1.b, z0.b, #16 +; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 +; CHECK-NEXT: // kill: def $q1 killed $q1 killed $z1 ; CHECK-NEXT: ret %a = call <4 x i64> @llvm.lrint.v4i64.v4f64(<4 x double> %x) ret <4 x i64> %a @@ -660,34 +386,28 @@ declare <4 x i64> @llvm.lrint.v4i64.v4f64(<4 x double>) define <8 x i64> @lrint_v8f64(<8 x double> %x) { ; CHECK-LABEL: lrint_v8f64: ; CHECK: // %bb.0: -; CHECK-NEXT: mov d4, v0.d[1] -; CHECK-NEXT: mov d5, v1.d[1] -; CHECK-NEXT: mov d6, v2.d[1] -; CHECK-NEXT: mov d7, v3.d[1] -; CHECK-NEXT: frintx d0, d0 -; CHECK-NEXT: frintx d1, d1 -; CHECK-NEXT: frintx d2, d2 -; CHECK-NEXT: frintx d3, d3 -; CHECK-NEXT: frintx d4, d4 -; CHECK-NEXT: frintx d5, d5 -; CHECK-NEXT: frintx d6, d6 -; CHECK-NEXT: frintx d7, d7 -; CHECK-NEXT: fcvtzs x8, d0 -; CHECK-NEXT: fcvtzs x9, d1 -; CHECK-NEXT: fcvtzs x10, d2 -; CHECK-NEXT: fcvtzs x11, d3 -; CHECK-NEXT: fcvtzs x12, d4 -; CHECK-NEXT: fcvtzs x13, d5 -; CHECK-NEXT: fcvtzs x14, d6 -; CHECK-NEXT: fcvtzs x15, d7 -; CHECK-NEXT: fmov d0, x8 -; CHECK-NEXT: fmov d1, x9 -; CHECK-NEXT: fmov d2, x10 -; CHECK-NEXT: fmov d3, x11 -; CHECK-NEXT: mov v0.d[1], x12 -; CHECK-NEXT: mov v1.d[1], x13 -; CHECK-NEXT: mov v2.d[1], x14 -; CHECK-NEXT: mov v3.d[1], x15 +; CHECK-NEXT: ptrue p0.d, vl2 +; CHECK-NEXT: // kill: def $q2 killed $q2 def $z2 +; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 +; CHECK-NEXT: // kill: def $q3 killed $q3 def $z3 +; CHECK-NEXT: // kill: def $q1 killed $q1 def $z1 +; CHECK-NEXT: splice z2.d, p0, z2.d, z3.d +; CHECK-NEXT: splice z0.d, p0, z0.d, z1.d +; CHECK-NEXT: ptrue p0.d, vl4 +; CHECK-NEXT: frintx z0.d, p0/m, z0.d +; CHECK-NEXT: movprfx z1, z2 +; CHECK-NEXT: frintx z1.d, p0/m, z2.d +; CHECK-NEXT: fcvtzs z0.d, p0/m, z0.d +; CHECK-NEXT: movprfx z2, z1 +; CHECK-NEXT: fcvtzs z2.d, p0/m, z1.d +; CHECK-NEXT: mov z1.d, z0.d +; CHECK-NEXT: mov z3.d, z2.d +; CHECK-NEXT: ext z1.b, z1.b, z0.b, #16 +; CHECK-NEXT: ext z3.b, z3.b, z2.b, #16 +; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 +; CHECK-NEXT: // kill: def $q2 killed $q2 killed $z2 +; CHECK-NEXT: // kill: def $q1 killed $q1 killed $z1 +; CHECK-NEXT: // kill: def $q3 killed $q3 killed $z3 ; CHECK-NEXT: ret %a = call <8 x i64> @llvm.lrint.v8i64.v8f64(<8 x double> %x) ret <8 x i64> %a @@ -697,62 +417,50 @@ declare <8 x i64> @llvm.lrint.v8i64.v8f64(<8 x double>) define <16 x i64> @lrint_v16f64(<16 x double> %x) { ; CHECK-LABEL: lrint_v16f64: ; CHECK: // %bb.0: -; CHECK-NEXT: mov d16, v0.d[1] -; CHECK-NEXT: mov d17, v1.d[1] -; CHECK-NEXT: frintx d0, d0 -; CHECK-NEXT: frintx d1, d1 -; CHECK-NEXT: frintx d18, d2 -; CHECK-NEXT: mov d2, v2.d[1] -; CHECK-NEXT: frintx d19, d3 -; CHECK-NEXT: mov d3, v3.d[1] -; CHECK-NEXT: frintx d16, d16 -; CHECK-NEXT: frintx d17, d17 -; CHECK-NEXT: fcvtzs x8, d0 -; CHECK-NEXT: frintx d0, d4 -; CHECK-NEXT: mov d4, v4.d[1] -; CHECK-NEXT: fcvtzs x9, d1 -; CHECK-NEXT: frintx d1, d5 -; CHECK-NEXT: mov d5, v5.d[1] -; CHECK-NEXT: fcvtzs x12, d18 -; CHECK-NEXT: frintx d2, d2 -; CHECK-NEXT: fcvtzs x13, d19 -; CHECK-NEXT: frintx d18, d3 -; CHECK-NEXT: fcvtzs x10, d16 -; CHECK-NEXT: mov d16, v6.d[1] -; CHECK-NEXT: fcvtzs x11, d17 -; CHECK-NEXT: mov d17, v7.d[1] -; CHECK-NEXT: frintx d6, d6 -; CHECK-NEXT: frintx d7, d7 -; CHECK-NEXT: frintx d4, d4 -; CHECK-NEXT: frintx d5, d5 -; CHECK-NEXT: fcvtzs x14, d0 -; CHECK-NEXT: fcvtzs x15, d1 -; CHECK-NEXT: fmov d0, x8 -; CHECK-NEXT: fmov d1, x9 -; CHECK-NEXT: frintx d16, d16 -; CHECK-NEXT: fcvtzs x9, d2 -; CHECK-NEXT: fmov d2, x12 -; CHECK-NEXT: frintx d17, d17 -; CHECK-NEXT: fcvtzs x8, d6 -; CHECK-NEXT: fcvtzs x12, d7 -; CHECK-NEXT: fmov d3, x13 -; CHECK-NEXT: fcvtzs x13, d18 -; CHECK-NEXT: fcvtzs x16, d4 -; CHECK-NEXT: fcvtzs x17, d5 -; CHECK-NEXT: fmov d4, x14 -; CHECK-NEXT: fmov d5, x15 -; CHECK-NEXT: fcvtzs x18, d16 -; CHECK-NEXT: mov v0.d[1], x10 -; CHECK-NEXT: mov v1.d[1], x11 -; CHECK-NEXT: fcvtzs x0, d17 -; CHECK-NEXT: fmov d6, x8 -; CHECK-NEXT: fmov d7, x12 -; CHECK-NEXT: mov v2.d[1], x9 -; CHECK-NEXT: mov v3.d[1], x13 -; CHECK-NEXT: mov v4.d[1], x16 -; CHECK-NEXT: mov v5.d[1], x17 -; CHECK-NEXT: mov v6.d[1], x18 -; CHECK-NEXT: mov v7.d[1], x0 +; CHECK-NEXT: ptrue p0.d, vl2 +; CHECK-NEXT: // kill: def $q6 killed $q6 def $z6 +; CHECK-NEXT: // kill: def $q4 killed $q4 def $z4 +; CHECK-NEXT: // kill: def $q2 killed $q2 def $z2 +; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 +; CHECK-NEXT: // kill: def $q7 killed $q7 def $z7 +; CHECK-NEXT: // kill: def $q5 killed $q5 def $z5 +; CHECK-NEXT: // kill: def $q3 killed $q3 def $z3 +; CHECK-NEXT: // kill: def $q1 killed $q1 def $z1 +; CHECK-NEXT: splice z2.d, p0, z2.d, z3.d +; CHECK-NEXT: splice z0.d, p0, z0.d, z1.d +; CHECK-NEXT: splice z6.d, p0, z6.d, z7.d +; CHECK-NEXT: splice z4.d, p0, z4.d, z5.d +; CHECK-NEXT: ptrue p0.d, vl4 +; CHECK-NEXT: frintx z0.d, p0/m, z0.d +; CHECK-NEXT: movprfx z1, z2 +; CHECK-NEXT: frintx z1.d, p0/m, z2.d +; CHECK-NEXT: movprfx z5, z6 +; CHECK-NEXT: frintx z5.d, p0/m, z6.d +; CHECK-NEXT: movprfx z3, z4 +; CHECK-NEXT: frintx z3.d, p0/m, z4.d +; CHECK-NEXT: fcvtzs z0.d, p0/m, z0.d +; CHECK-NEXT: movprfx z2, z1 +; CHECK-NEXT: fcvtzs z2.d, p0/m, z1.d +; CHECK-NEXT: movprfx z6, z5 +; CHECK-NEXT: fcvtzs z6.d, p0/m, z5.d +; CHECK-NEXT: movprfx z4, z3 +; CHECK-NEXT: fcvtzs z4.d, p0/m, z3.d +; CHECK-NEXT: mov z1.d, z0.d +; CHECK-NEXT: mov z3.d, z2.d +; CHECK-NEXT: mov z7.d, z6.d +; CHECK-NEXT: mov z5.d, z4.d +; CHECK-NEXT: ext z1.b, z1.b, z0.b, #16 +; CHECK-NEXT: ext z3.b, z3.b, z2.b, #16 +; CHECK-NEXT: ext z7.b, z7.b, z6.b, #16 +; CHECK-NEXT: ext z5.b, z5.b, z4.b, #16 +; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 +; CHECK-NEXT: // kill: def $q2 killed $q2 killed $z2 +; CHECK-NEXT: // kill: def $q4 killed $q4 killed $z4 +; CHECK-NEXT: // kill: def $q6 killed $q6 killed $z6 +; CHECK-NEXT: // kill: def $q1 killed $q1 killed $z1 +; CHECK-NEXT: // kill: def $q3 killed $q3 killed $z3 +; CHECK-NEXT: // kill: def $q7 killed $q7 killed $z7 +; CHECK-NEXT: // kill: def $q5 killed $q5 killed $z5 ; CHECK-NEXT: ret %a = call <16 x i64> @llvm.lrint.v16i64.v16f64(<16 x double> %x) ret <16 x i64> %a @@ -762,130 +470,63 @@ declare <16 x i64> @llvm.lrint.v16i64.v16f64(<16 x double>) define <32 x i64> @lrint_v32f64(<32 x double> %x) { ; CHECK-LABEL: lrint_v32f64: ; CHECK: // %bb.0: -; CHECK-NEXT: frintx d20, d0 -; CHECK-NEXT: frintx d22, d3 -; CHECK-NEXT: frintx d21, d4 +; CHECK-NEXT: ldp q17, q16, [sp, #96] +; CHECK-NEXT: ptrue p0.d, vl2 ; CHECK-NEXT: ldp q19, q18, [sp, #64] -; CHECK-NEXT: frintx d23, d5 -; CHECK-NEXT: ldp q27, q26, [sp, #96] -; CHECK-NEXT: mov d4, v4.d[1] -; CHECK-NEXT: ldp q16, q17, [sp, #32] -; CHECK-NEXT: mov d5, v5.d[1] -; CHECK-NEXT: fcvtzs x9, d20 -; CHECK-NEXT: frintx d20, d6 -; CHECK-NEXT: fcvtzs x11, d22 -; CHECK-NEXT: frintx d22, d19 -; CHECK-NEXT: fcvtzs x12, d21 -; CHECK-NEXT: fcvtzs x10, d23 -; CHECK-NEXT: mov d21, v26.d[1] -; CHECK-NEXT: frintx d23, d27 -; CHECK-NEXT: mov d27, v27.d[1] -; CHECK-NEXT: frintx d24, d16 -; CHECK-NEXT: mov d19, v19.d[1] -; CHECK-NEXT: frintx d25, d17 -; CHECK-NEXT: fcvtzs x13, d20 -; CHECK-NEXT: mov d20, v18.d[1] -; CHECK-NEXT: frintx d18, d18 -; CHECK-NEXT: fcvtzs x16, d22 -; CHECK-NEXT: frintx d22, d26 -; CHECK-NEXT: mov d16, v16.d[1] -; CHECK-NEXT: frintx d21, d21 -; CHECK-NEXT: fcvtzs x17, d23 -; CHECK-NEXT: frintx d23, d27 -; CHECK-NEXT: fcvtzs x14, d24 -; CHECK-NEXT: frintx d26, d19 -; CHECK-NEXT: fmov d19, x11 -; CHECK-NEXT: frintx d20, d20 -; CHECK-NEXT: mov d27, v17.d[1] -; CHECK-NEXT: fcvtzs x15, d25 -; CHECK-NEXT: ldp q25, q24, [sp] -; CHECK-NEXT: fcvtzs x11, d22 -; CHECK-NEXT: fmov d17, x12 -; CHECK-NEXT: fcvtzs x12, d21 -; CHECK-NEXT: fcvtzs x0, d23 -; CHECK-NEXT: fmov d23, x14 -; CHECK-NEXT: fcvtzs x14, d18 -; CHECK-NEXT: fmov d18, x17 -; CHECK-NEXT: fcvtzs x17, d20 -; CHECK-NEXT: frintx d21, d7 -; CHECK-NEXT: fcvtzs x18, d26 -; CHECK-NEXT: fmov d20, x11 -; CHECK-NEXT: frintx d22, d25 -; CHECK-NEXT: frintx d26, d27 -; CHECK-NEXT: frintx d16, d16 -; CHECK-NEXT: mov v18.d[1], x0 -; CHECK-NEXT: mov d25, v25.d[1] -; CHECK-NEXT: mov d7, v7.d[1] -; CHECK-NEXT: mov d6, v6.d[1] -; CHECK-NEXT: mov d0, v0.d[1] -; CHECK-NEXT: mov v20.d[1], x12 -; CHECK-NEXT: fcvtzs x11, d21 -; CHECK-NEXT: fmov d21, x15 -; CHECK-NEXT: fcvtzs x12, d22 -; CHECK-NEXT: fmov d22, x16 -; CHECK-NEXT: fcvtzs x15, d26 -; CHECK-NEXT: fmov d26, x14 -; CHECK-NEXT: fcvtzs x14, d16 -; CHECK-NEXT: frintx d25, d25 -; CHECK-NEXT: frintx d7, d7 -; CHECK-NEXT: mov d16, v1.d[1] -; CHECK-NEXT: mov d3, v3.d[1] -; CHECK-NEXT: stp q18, q20, [x8, #224] -; CHECK-NEXT: mov d18, v24.d[1] -; CHECK-NEXT: mov v22.d[1], x18 -; CHECK-NEXT: mov v26.d[1], x17 -; CHECK-NEXT: frintx d24, d24 -; CHECK-NEXT: mov v21.d[1], x15 -; CHECK-NEXT: mov v23.d[1], x14 -; CHECK-NEXT: frintx d20, d2 -; CHECK-NEXT: mov d2, v2.d[1] -; CHECK-NEXT: frintx d6, d6 -; CHECK-NEXT: frintx d5, d5 -; CHECK-NEXT: frintx d4, d4 -; CHECK-NEXT: frintx d18, d18 -; CHECK-NEXT: frintx d1, d1 -; CHECK-NEXT: frintx d3, d3 -; CHECK-NEXT: stp q22, q26, [x8, #192] -; CHECK-NEXT: fmov d22, x10 -; CHECK-NEXT: fcvtzs x10, d24 -; CHECK-NEXT: stp q23, q21, [x8, #160] -; CHECK-NEXT: fmov d21, x11 -; CHECK-NEXT: fmov d24, x13 -; CHECK-NEXT: frintx d2, d2 -; CHECK-NEXT: fcvtzs x13, d6 -; CHECK-NEXT: frintx d6, d16 -; CHECK-NEXT: fcvtzs x11, d18 -; CHECK-NEXT: fmov d18, x12 -; CHECK-NEXT: fcvtzs x12, d25 -; CHECK-NEXT: fmov d23, x10 -; CHECK-NEXT: fcvtzs x10, d7 -; CHECK-NEXT: fcvtzs x14, d5 -; CHECK-NEXT: frintx d0, d0 -; CHECK-NEXT: fcvtzs x15, d3 -; CHECK-NEXT: mov v24.d[1], x13 -; CHECK-NEXT: fcvtzs x13, d2 -; CHECK-NEXT: fmov d2, x9 -; CHECK-NEXT: mov v23.d[1], x11 -; CHECK-NEXT: fcvtzs x11, d4 -; CHECK-NEXT: mov v18.d[1], x12 -; CHECK-NEXT: fcvtzs x12, d20 -; CHECK-NEXT: mov v21.d[1], x10 -; CHECK-NEXT: fcvtzs x10, d1 -; CHECK-NEXT: mov v22.d[1], x14 -; CHECK-NEXT: fcvtzs x14, d6 -; CHECK-NEXT: mov v19.d[1], x15 -; CHECK-NEXT: stp q18, q23, [x8, #128] -; CHECK-NEXT: mov v17.d[1], x11 -; CHECK-NEXT: fcvtzs x11, d0 -; CHECK-NEXT: stp q24, q21, [x8, #96] -; CHECK-NEXT: fmov d0, x12 -; CHECK-NEXT: fmov d1, x10 -; CHECK-NEXT: stp q17, q22, [x8, #64] -; CHECK-NEXT: mov v0.d[1], x13 -; CHECK-NEXT: mov v1.d[1], x14 -; CHECK-NEXT: mov v2.d[1], x11 -; CHECK-NEXT: stp q0, q19, [x8, #32] -; CHECK-NEXT: stp q2, q1, [x8] +; CHECK-NEXT: ptrue p1.d, vl4 +; CHECK-NEXT: // kill: def $q7 killed $q7 def $z7 +; CHECK-NEXT: // kill: def $q6 killed $q6 def $z6 +; CHECK-NEXT: // kill: def $q5 killed $q5 def $z5 +; CHECK-NEXT: // kill: def $q4 killed $q4 def $z4 +; CHECK-NEXT: // kill: def $q3 killed $q3 def $z3 +; CHECK-NEXT: // kill: def $q2 killed $q2 def $z2 +; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 +; CHECK-NEXT: // kill: def $q1 killed $q1 def $z1 +; CHECK-NEXT: mov x9, #28 // =0x1c +; CHECK-NEXT: splice z17.d, p0, z17.d, z16.d +; CHECK-NEXT: ldp q20, q16, [sp, #32] +; CHECK-NEXT: splice z19.d, p0, z19.d, z18.d +; CHECK-NEXT: ldp q21, q18, [sp] +; CHECK-NEXT: splice z6.d, p0, z6.d, z7.d +; CHECK-NEXT: splice z4.d, p0, z4.d, z5.d +; CHECK-NEXT: splice z2.d, p0, z2.d, z3.d +; CHECK-NEXT: splice z20.d, p0, z20.d, z16.d +; CHECK-NEXT: splice z0.d, p0, z0.d, z1.d +; CHECK-NEXT: splice z21.d, p0, z21.d, z18.d +; CHECK-NEXT: movprfx z7, z17 +; CHECK-NEXT: frintx z7.d, p1/m, z17.d +; CHECK-NEXT: movprfx z5, z19 +; CHECK-NEXT: frintx z5.d, p1/m, z19.d +; CHECK-NEXT: frintx z6.d, p1/m, z6.d +; CHECK-NEXT: frintx z4.d, p1/m, z4.d +; CHECK-NEXT: frintx z2.d, p1/m, z2.d +; CHECK-NEXT: movprfx z3, z20 +; CHECK-NEXT: frintx z3.d, p1/m, z20.d +; CHECK-NEXT: frintx z0.d, p1/m, z0.d +; CHECK-NEXT: movprfx z1, z21 +; CHECK-NEXT: frintx z1.d, p1/m, z21.d +; CHECK-NEXT: fcvtzs z7.d, p1/m, z7.d +; CHECK-NEXT: fcvtzs z5.d, p1/m, z5.d +; CHECK-NEXT: fcvtzs z6.d, p1/m, z6.d +; CHECK-NEXT: fcvtzs z4.d, p1/m, z4.d +; CHECK-NEXT: fcvtzs z2.d, p1/m, z2.d +; CHECK-NEXT: fcvtzs z3.d, p1/m, z3.d +; CHECK-NEXT: fcvtzs z0.d, p1/m, z0.d +; CHECK-NEXT: fcvtzs z1.d, p1/m, z1.d +; CHECK-NEXT: st1d { z7.d }, p1, [x8, x9, lsl #3] +; CHECK-NEXT: mov x9, #24 // =0x18 +; CHECK-NEXT: st1d { z5.d }, p1, [x8, x9, lsl #3] +; CHECK-NEXT: mov x9, #20 // =0x14 +; CHECK-NEXT: st1d { z3.d }, p1, [x8, x9, lsl #3] +; CHECK-NEXT: mov x9, #16 // =0x10 +; CHECK-NEXT: st1d { z1.d }, p1, [x8, x9, lsl #3] +; CHECK-NEXT: mov x9, #12 // =0xc +; CHECK-NEXT: st1d { z6.d }, p1, [x8, x9, lsl #3] +; CHECK-NEXT: mov x9, #8 // =0x8 +; CHECK-NEXT: st1d { z4.d }, p1, [x8, x9, lsl #3] +; CHECK-NEXT: mov x9, #4 // =0x4 +; CHECK-NEXT: st1d { z2.d }, p1, [x8, x9, lsl #3] +; CHECK-NEXT: st1d { z0.d }, p1, [x8] ; CHECK-NEXT: ret %a = call <32 x i64> @llvm.lrint.v32i64.v16f64(<32 x double> %x) ret <32 x i64> %a diff --git a/llvm/test/CodeGen/AArch64/sve-llrint.ll b/llvm/test/CodeGen/AArch64/sve-llrint.ll index 11d45b3a43521..825ff55117d5c 100644 --- a/llvm/test/CodeGen/AArch64/sve-llrint.ll +++ b/llvm/test/CodeGen/AArch64/sve-llrint.ll @@ -5,6 +5,7 @@ define @llrint_v1i64_v1f16( %x) { ; CHECK-LABEL: llrint_v1i64_v1f16: ; CHECK: // %bb.0: ; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: frintx z0.h, p0/m, z0.h ; CHECK-NEXT: fcvtzs z0.d, p0/m, z0.h ; CHECK-NEXT: ret %a = call @llvm.llrint.nxv1i64.nxv1f16( %x) @@ -16,6 +17,7 @@ define @llrint_v1i64_v2f16( %x) { ; CHECK-LABEL: llrint_v1i64_v2f16: ; CHECK: // %bb.0: ; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: frintx z0.h, p0/m, z0.h ; CHECK-NEXT: fcvtzs z0.d, p0/m, z0.h ; CHECK-NEXT: ret %a = call @llvm.llrint.nxv2i64.nxv2f16( %x) @@ -27,8 +29,11 @@ define @llrint_v4i64_v4f16( %x) { ; CHECK-LABEL: llrint_v4i64_v4f16: ; CHECK: // %bb.0: ; CHECK-NEXT: uunpklo z1.d, z0.s -; CHECK-NEXT: uunpkhi z2.d, z0.s +; CHECK-NEXT: uunpkhi z0.d, z0.s ; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: frintx z1.h, p0/m, z1.h +; CHECK-NEXT: movprfx z2, z0 +; CHECK-NEXT: frintx z2.h, p0/m, z0.h ; CHECK-NEXT: movprfx z0, z1 ; CHECK-NEXT: fcvtzs z0.d, p0/m, z1.h ; CHECK-NEXT: movprfx z1, z2 @@ -48,7 +53,12 @@ define @llrint_v8i64_v8f16( %x) { ; CHECK-NEXT: uunpklo z2.d, z1.s ; CHECK-NEXT: uunpkhi z1.d, z1.s ; CHECK-NEXT: uunpklo z3.d, z0.s -; CHECK-NEXT: uunpkhi z4.d, z0.s +; CHECK-NEXT: uunpkhi z0.d, z0.s +; CHECK-NEXT: frintx z1.h, p0/m, z1.h +; CHECK-NEXT: frintx z2.h, p0/m, z2.h +; CHECK-NEXT: frintx z3.h, p0/m, z3.h +; CHECK-NEXT: movprfx z4, z0 +; CHECK-NEXT: frintx z4.h, p0/m, z0.h ; CHECK-NEXT: fcvtzs z1.d, p0/m, z1.h ; CHECK-NEXT: movprfx z0, z2 ; CHECK-NEXT: fcvtzs z0.d, p0/m, z2.h @@ -73,25 +83,36 @@ define @llrint_v16i64_v16f16( %x) { ; CHECK-NEXT: uunpklo z4.d, z2.s ; CHECK-NEXT: uunpkhi z2.d, z2.s ; CHECK-NEXT: uunpklo z5.d, z0.s -; CHECK-NEXT: uunpkhi z6.d, z0.s -; CHECK-NEXT: uunpklo z7.d, z3.s -; CHECK-NEXT: uunpkhi z24.d, z3.s -; CHECK-NEXT: uunpklo z25.d, z1.s -; CHECK-NEXT: uunpkhi z26.d, z1.s +; CHECK-NEXT: uunpkhi z0.d, z0.s +; CHECK-NEXT: uunpklo z6.d, z3.s +; CHECK-NEXT: uunpkhi z3.d, z3.s +; CHECK-NEXT: uunpklo z7.d, z1.s +; CHECK-NEXT: uunpkhi z1.d, z1.s +; CHECK-NEXT: frintx z4.h, p0/m, z4.h +; CHECK-NEXT: frintx z2.h, p0/m, z2.h +; CHECK-NEXT: frintx z5.h, p0/m, z5.h +; CHECK-NEXT: movprfx z24, z0 +; CHECK-NEXT: frintx z24.h, p0/m, z0.h +; CHECK-NEXT: frintx z6.h, p0/m, z6.h +; CHECK-NEXT: movprfx z25, z3 +; CHECK-NEXT: frintx z25.h, p0/m, z3.h +; CHECK-NEXT: frintx z7.h, p0/m, z7.h +; CHECK-NEXT: movprfx z26, z1 +; CHECK-NEXT: frintx z26.h, p0/m, z1.h ; CHECK-NEXT: movprfx z0, z4 ; CHECK-NEXT: fcvtzs z0.d, p0/m, z4.h ; CHECK-NEXT: movprfx z1, z2 ; CHECK-NEXT: fcvtzs z1.d, p0/m, z2.h ; CHECK-NEXT: movprfx z2, z5 ; CHECK-NEXT: fcvtzs z2.d, p0/m, z5.h -; CHECK-NEXT: movprfx z3, z6 -; CHECK-NEXT: fcvtzs z3.d, p0/m, z6.h -; CHECK-NEXT: movprfx z4, z7 -; CHECK-NEXT: fcvtzs z4.d, p0/m, z7.h -; CHECK-NEXT: movprfx z5, z24 -; CHECK-NEXT: fcvtzs z5.d, p0/m, z24.h -; CHECK-NEXT: movprfx z6, z25 -; CHECK-NEXT: fcvtzs z6.d, p0/m, z25.h +; CHECK-NEXT: movprfx z3, z24 +; CHECK-NEXT: fcvtzs z3.d, p0/m, z24.h +; CHECK-NEXT: movprfx z4, z6 +; CHECK-NEXT: fcvtzs z4.d, p0/m, z6.h +; CHECK-NEXT: movprfx z5, z25 +; CHECK-NEXT: fcvtzs z5.d, p0/m, z25.h +; CHECK-NEXT: movprfx z6, z7 +; CHECK-NEXT: fcvtzs z6.d, p0/m, z7.h ; CHECK-NEXT: movprfx z7, z26 ; CHECK-NEXT: fcvtzs z7.d, p0/m, z26.h ; CHECK-NEXT: ret @@ -110,71 +131,86 @@ define @llrint_v32i64_v32f16( %x) { ; CHECK-NEXT: uunpkhi z7.s, z2.h ; CHECK-NEXT: uunpklo z2.s, z2.h ; CHECK-NEXT: ptrue p1.b -; CHECK-NEXT: uunpklo z24.s, z0.h -; CHECK-NEXT: uunpkhi z0.s, z0.h ; CHECK-NEXT: uunpkhi z5.d, z4.s ; CHECK-NEXT: uunpklo z4.d, z4.s ; CHECK-NEXT: uunpkhi z6.d, z3.s ; CHECK-NEXT: uunpklo z3.d, z3.s -; CHECK-NEXT: uunpkhi z25.d, z2.s -; CHECK-NEXT: uunpklo z2.d, z2.s -; CHECK-NEXT: uunpklo z26.d, z0.s -; CHECK-NEXT: uunpkhi z0.d, z0.s +; CHECK-NEXT: uunpkhi z24.d, z7.s +; CHECK-NEXT: uunpklo z7.d, z7.s +; CHECK-NEXT: frintx z5.h, p0/m, z5.h +; CHECK-NEXT: frintx z4.h, p0/m, z4.h +; CHECK-NEXT: frintx z6.h, p0/m, z6.h +; CHECK-NEXT: frintx z3.h, p0/m, z3.h +; CHECK-NEXT: frintx z24.h, p0/m, z24.h +; CHECK-NEXT: frintx z7.h, p0/m, z7.h ; CHECK-NEXT: fcvtzs z5.d, p0/m, z5.h ; CHECK-NEXT: fcvtzs z4.d, p0/m, z4.h +; CHECK-NEXT: fcvtzs z6.d, p0/m, z6.h ; CHECK-NEXT: fcvtzs z3.d, p0/m, z3.h -; CHECK-NEXT: fcvtzs z25.d, p0/m, z25.h -; CHECK-NEXT: fcvtzs z2.d, p0/m, z2.h -; CHECK-NEXT: fcvtzs z0.d, p0/m, z0.h +; CHECK-NEXT: fcvtzs z7.d, p0/m, z7.h ; CHECK-NEXT: st1b { z5.b }, p1, [x8, x9] ; CHECK-NEXT: rdvl x9, #14 -; CHECK-NEXT: movprfx z5, z6 -; CHECK-NEXT: fcvtzs z5.d, p0/m, z6.h -; CHECK-NEXT: uunpkhi z6.d, z7.s +; CHECK-NEXT: uunpkhi z5.s, z1.h ; CHECK-NEXT: st1b { z4.b }, p1, [x8, x9] -; CHECK-NEXT: uunpkhi z4.s, z1.h -; CHECK-NEXT: uunpklo z7.d, z7.s +; CHECK-NEXT: uunpkhi z4.d, z2.s ; CHECK-NEXT: rdvl x9, #13 -; CHECK-NEXT: uunpklo z1.s, z1.h -; CHECK-NEXT: st1b { z5.b }, p1, [x8, x9] +; CHECK-NEXT: st1b { z6.b }, p1, [x8, x9] ; CHECK-NEXT: rdvl x9, #12 -; CHECK-NEXT: movprfx z5, z6 -; CHECK-NEXT: fcvtzs z5.d, p0/m, z6.h +; CHECK-NEXT: movprfx z6, z24 +; CHECK-NEXT: fcvtzs z6.d, p0/m, z24.h +; CHECK-NEXT: uunpklo z1.s, z1.h +; CHECK-NEXT: uunpklo z2.d, z2.s +; CHECK-NEXT: uunpkhi z24.s, z0.h ; CHECK-NEXT: st1b { z3.b }, p1, [x8, x9] -; CHECK-NEXT: uunpkhi z3.d, z4.s -; CHECK-NEXT: uunpklo z4.d, z4.s -; CHECK-NEXT: fcvtzs z7.d, p0/m, z7.h +; CHECK-NEXT: uunpklo z3.d, z5.s +; CHECK-NEXT: uunpklo z0.s, z0.h +; CHECK-NEXT: frintx z4.h, p0/m, z4.h ; CHECK-NEXT: rdvl x9, #11 -; CHECK-NEXT: uunpkhi z6.d, z24.s -; CHECK-NEXT: uunpkhi z27.d, z1.s -; CHECK-NEXT: uunpklo z1.d, z1.s -; CHECK-NEXT: st1b { z5.b }, p1, [x8, x9] +; CHECK-NEXT: uunpkhi z25.d, z5.s +; CHECK-NEXT: st1b { z6.b }, p1, [x8, x9] ; CHECK-NEXT: rdvl x9, #10 -; CHECK-NEXT: fcvtzs z3.d, p0/m, z3.h -; CHECK-NEXT: st1b { z7.b }, p1, [x8, x9] +; CHECK-NEXT: uunpkhi z5.d, z1.s +; CHECK-NEXT: uunpklo z1.d, z1.s +; CHECK-NEXT: frintx z2.h, p0/m, z2.h +; CHECK-NEXT: uunpkhi z6.d, z24.s +; CHECK-NEXT: uunpklo z24.d, z24.s +; CHECK-NEXT: frintx z3.h, p0/m, z3.h ; CHECK-NEXT: fcvtzs z4.d, p0/m, z4.h -; CHECK-NEXT: uunpklo z7.d, z24.s +; CHECK-NEXT: st1b { z7.b }, p1, [x8, x9] +; CHECK-NEXT: uunpkhi z7.d, z0.s +; CHECK-NEXT: uunpklo z0.d, z0.s ; CHECK-NEXT: rdvl x9, #9 -; CHECK-NEXT: movprfx z5, z27 -; CHECK-NEXT: fcvtzs z5.d, p0/m, z27.h -; CHECK-NEXT: fcvtzs z1.d, p0/m, z1.h -; CHECK-NEXT: st1b { z25.b }, p1, [x8, x9] +; CHECK-NEXT: frintx z25.h, p0/m, z25.h +; CHECK-NEXT: frintx z5.h, p0/m, z5.h +; CHECK-NEXT: frintx z1.h, p0/m, z1.h +; CHECK-NEXT: fcvtzs z2.d, p0/m, z2.h +; CHECK-NEXT: frintx z6.h, p0/m, z6.h +; CHECK-NEXT: fcvtzs z3.d, p0/m, z3.h +; CHECK-NEXT: st1b { z4.b }, p1, [x8, x9] +; CHECK-NEXT: movprfx z4, z24 +; CHECK-NEXT: frintx z4.h, p0/m, z24.h +; CHECK-NEXT: frintx z7.h, p0/m, z7.h +; CHECK-NEXT: frintx z0.h, p0/m, z0.h ; CHECK-NEXT: rdvl x9, #8 +; CHECK-NEXT: fcvtzs z25.d, p0/m, z25.h +; CHECK-NEXT: fcvtzs z5.d, p0/m, z5.h ; CHECK-NEXT: st1b { z2.b }, p1, [x8, x9] -; CHECK-NEXT: movprfx z2, z26 -; CHECK-NEXT: fcvtzs z2.d, p0/m, z26.h -; CHECK-NEXT: st1d { z3.d }, p0, [x8, #7, mul vl] -; CHECK-NEXT: movprfx z3, z6 -; CHECK-NEXT: fcvtzs z3.d, p0/m, z6.h -; CHECK-NEXT: st1d { z4.d }, p0, [x8, #6, mul vl] +; CHECK-NEXT: fcvtzs z1.d, p0/m, z1.h +; CHECK-NEXT: movprfx z2, z6 +; CHECK-NEXT: fcvtzs z2.d, p0/m, z6.h +; CHECK-NEXT: st1d { z3.d }, p0, [x8, #6, mul vl] +; CHECK-NEXT: movprfx z3, z4 +; CHECK-NEXT: fcvtzs z3.d, p0/m, z4.h ; CHECK-NEXT: movprfx z4, z7 ; CHECK-NEXT: fcvtzs z4.d, p0/m, z7.h +; CHECK-NEXT: fcvtzs z0.d, p0/m, z0.h +; CHECK-NEXT: st1d { z25.d }, p0, [x8, #7, mul vl] ; CHECK-NEXT: st1d { z5.d }, p0, [x8, #5, mul vl] ; CHECK-NEXT: st1d { z1.d }, p0, [x8, #4, mul vl] -; CHECK-NEXT: st1d { z0.d }, p0, [x8, #3, mul vl] -; CHECK-NEXT: st1d { z2.d }, p0, [x8, #2, mul vl] -; CHECK-NEXT: st1d { z3.d }, p0, [x8, #1, mul vl] -; CHECK-NEXT: st1d { z4.d }, p0, [x8] +; CHECK-NEXT: st1d { z2.d }, p0, [x8, #3, mul vl] +; CHECK-NEXT: st1d { z3.d }, p0, [x8, #2, mul vl] +; CHECK-NEXT: st1d { z4.d }, p0, [x8, #1, mul vl] +; CHECK-NEXT: st1d { z0.d }, p0, [x8] ; CHECK-NEXT: ret %a = call @llvm.llrint.nxv32i64.nxv32f16( %x) ret %a @@ -185,6 +221,7 @@ define @llrint_v1i64_v1f32( %x) { ; CHECK-LABEL: llrint_v1i64_v1f32: ; CHECK: // %bb.0: ; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: frintx z0.s, p0/m, z0.s ; CHECK-NEXT: fcvtzs z0.d, p0/m, z0.s ; CHECK-NEXT: ret %a = call @llvm.llrint.nxv1i64.nxv1f32( %x) @@ -196,6 +233,7 @@ define @llrint_v2i64_v2f32( %x) { ; CHECK-LABEL: llrint_v2i64_v2f32: ; CHECK: // %bb.0: ; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: frintx z0.s, p0/m, z0.s ; CHECK-NEXT: fcvtzs z0.d, p0/m, z0.s ; CHECK-NEXT: ret %a = call @llvm.llrint.nxv2i64.nxv2f32( %x) @@ -207,8 +245,11 @@ define @llrint_v4i64_v4f32( %x) { ; CHECK-LABEL: llrint_v4i64_v4f32: ; CHECK: // %bb.0: ; CHECK-NEXT: uunpklo z1.d, z0.s -; CHECK-NEXT: uunpkhi z2.d, z0.s +; CHECK-NEXT: uunpkhi z0.d, z0.s ; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: frintx z1.s, p0/m, z1.s +; CHECK-NEXT: movprfx z2, z0 +; CHECK-NEXT: frintx z2.s, p0/m, z0.s ; CHECK-NEXT: movprfx z0, z1 ; CHECK-NEXT: fcvtzs z0.d, p0/m, z1.s ; CHECK-NEXT: movprfx z1, z2 @@ -223,16 +264,22 @@ define @llrint_v8i64_v8f32( %x) { ; CHECK-LABEL: llrint_v8i64_v8f32: ; CHECK: // %bb.0: ; CHECK-NEXT: uunpklo z2.d, z0.s -; CHECK-NEXT: uunpkhi z3.d, z0.s -; CHECK-NEXT: uunpklo z4.d, z1.s -; CHECK-NEXT: uunpkhi z5.d, z1.s +; CHECK-NEXT: uunpkhi z0.d, z0.s +; CHECK-NEXT: uunpklo z3.d, z1.s +; CHECK-NEXT: uunpkhi z1.d, z1.s ; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: frintx z2.s, p0/m, z2.s +; CHECK-NEXT: movprfx z4, z0 +; CHECK-NEXT: frintx z4.s, p0/m, z0.s +; CHECK-NEXT: frintx z3.s, p0/m, z3.s +; CHECK-NEXT: movprfx z5, z1 +; CHECK-NEXT: frintx z5.s, p0/m, z1.s ; CHECK-NEXT: movprfx z0, z2 ; CHECK-NEXT: fcvtzs z0.d, p0/m, z2.s -; CHECK-NEXT: movprfx z1, z3 -; CHECK-NEXT: fcvtzs z1.d, p0/m, z3.s -; CHECK-NEXT: movprfx z2, z4 -; CHECK-NEXT: fcvtzs z2.d, p0/m, z4.s +; CHECK-NEXT: movprfx z1, z4 +; CHECK-NEXT: fcvtzs z1.d, p0/m, z4.s +; CHECK-NEXT: movprfx z2, z3 +; CHECK-NEXT: fcvtzs z2.d, p0/m, z3.s ; CHECK-NEXT: movprfx z3, z5 ; CHECK-NEXT: fcvtzs z3.d, p0/m, z5.s ; CHECK-NEXT: ret @@ -245,28 +292,40 @@ define @llrint_v16i64_v16f32( %x) { ; CHECK-LABEL: llrint_v16i64_v16f32: ; CHECK: // %bb.0: ; CHECK-NEXT: uunpklo z4.d, z0.s -; CHECK-NEXT: uunpkhi z5.d, z0.s -; CHECK-NEXT: uunpklo z6.d, z1.s -; CHECK-NEXT: uunpkhi z7.d, z1.s -; CHECK-NEXT: uunpklo z24.d, z2.s -; CHECK-NEXT: uunpkhi z25.d, z2.s -; CHECK-NEXT: uunpklo z26.d, z3.s -; CHECK-NEXT: uunpkhi z27.d, z3.s +; CHECK-NEXT: uunpkhi z0.d, z0.s +; CHECK-NEXT: uunpklo z5.d, z1.s +; CHECK-NEXT: uunpkhi z1.d, z1.s +; CHECK-NEXT: uunpklo z6.d, z2.s +; CHECK-NEXT: uunpkhi z2.d, z2.s +; CHECK-NEXT: uunpklo z7.d, z3.s +; CHECK-NEXT: uunpkhi z3.d, z3.s ; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: frintx z4.s, p0/m, z4.s +; CHECK-NEXT: movprfx z24, z0 +; CHECK-NEXT: frintx z24.s, p0/m, z0.s +; CHECK-NEXT: frintx z5.s, p0/m, z5.s +; CHECK-NEXT: movprfx z25, z1 +; CHECK-NEXT: frintx z25.s, p0/m, z1.s +; CHECK-NEXT: frintx z6.s, p0/m, z6.s +; CHECK-NEXT: movprfx z26, z2 +; CHECK-NEXT: frintx z26.s, p0/m, z2.s +; CHECK-NEXT: frintx z7.s, p0/m, z7.s +; CHECK-NEXT: movprfx z27, z3 +; CHECK-NEXT: frintx z27.s, p0/m, z3.s ; CHECK-NEXT: movprfx z0, z4 ; CHECK-NEXT: fcvtzs z0.d, p0/m, z4.s -; CHECK-NEXT: movprfx z1, z5 -; CHECK-NEXT: fcvtzs z1.d, p0/m, z5.s -; CHECK-NEXT: movprfx z2, z6 -; CHECK-NEXT: fcvtzs z2.d, p0/m, z6.s -; CHECK-NEXT: movprfx z3, z7 -; CHECK-NEXT: fcvtzs z3.d, p0/m, z7.s -; CHECK-NEXT: movprfx z4, z24 -; CHECK-NEXT: fcvtzs z4.d, p0/m, z24.s -; CHECK-NEXT: movprfx z5, z25 -; CHECK-NEXT: fcvtzs z5.d, p0/m, z25.s -; CHECK-NEXT: movprfx z6, z26 -; CHECK-NEXT: fcvtzs z6.d, p0/m, z26.s +; CHECK-NEXT: movprfx z1, z24 +; CHECK-NEXT: fcvtzs z1.d, p0/m, z24.s +; CHECK-NEXT: movprfx z2, z5 +; CHECK-NEXT: fcvtzs z2.d, p0/m, z5.s +; CHECK-NEXT: movprfx z3, z25 +; CHECK-NEXT: fcvtzs z3.d, p0/m, z25.s +; CHECK-NEXT: movprfx z4, z6 +; CHECK-NEXT: fcvtzs z4.d, p0/m, z6.s +; CHECK-NEXT: movprfx z5, z26 +; CHECK-NEXT: fcvtzs z5.d, p0/m, z26.s +; CHECK-NEXT: movprfx z6, z7 +; CHECK-NEXT: fcvtzs z6.d, p0/m, z7.s ; CHECK-NEXT: movprfx z7, z27 ; CHECK-NEXT: fcvtzs z7.d, p0/m, z27.s ; CHECK-NEXT: ret @@ -279,65 +338,83 @@ define @llrint_v32i64_v32f32( %x) { ; CHECK-LABEL: llrint_v32i64_v32f32: ; CHECK: // %bb.0: ; CHECK-NEXT: uunpkhi z24.d, z7.s -; CHECK-NEXT: uunpklo z7.d, z7.s -; CHECK-NEXT: rdvl x9, #15 ; CHECK-NEXT: ptrue p0.d -; CHECK-NEXT: uunpkhi z27.d, z6.s -; CHECK-NEXT: uunpklo z6.d, z6.s -; CHECK-NEXT: uunpkhi z30.d, z5.s -; CHECK-NEXT: uunpklo z5.d, z5.s -; CHECK-NEXT: uunpkhi z31.d, z4.s +; CHECK-NEXT: rdvl x9, #15 +; CHECK-NEXT: uunpklo z7.d, z7.s +; CHECK-NEXT: uunpkhi z25.d, z6.s ; CHECK-NEXT: ptrue p1.b +; CHECK-NEXT: uunpklo z6.d, z6.s +; CHECK-NEXT: uunpkhi z27.d, z4.s +; CHECK-NEXT: uunpklo z4.d, z4.s ; CHECK-NEXT: uunpklo z29.d, z3.s ; CHECK-NEXT: uunpkhi z3.d, z3.s -; CHECK-NEXT: fcvtzs z24.d, p0/m, z24.s -; CHECK-NEXT: fcvtzs z7.d, p0/m, z7.s -; CHECK-NEXT: uunpklo z4.d, z4.s -; CHECK-NEXT: fcvtzs z27.d, p0/m, z27.s -; CHECK-NEXT: fcvtzs z6.d, p0/m, z6.s -; CHECK-NEXT: uunpkhi z25.d, z0.s -; CHECK-NEXT: fcvtzs z30.d, p0/m, z30.s -; CHECK-NEXT: fcvtzs z5.d, p0/m, z5.s ; CHECK-NEXT: uunpklo z26.d, z1.s -; CHECK-NEXT: uunpkhi z1.d, z1.s +; CHECK-NEXT: frintx z24.s, p0/m, z24.s ; CHECK-NEXT: uunpklo z28.d, z2.s ; CHECK-NEXT: uunpkhi z2.d, z2.s +; CHECK-NEXT: frintx z7.s, p0/m, z7.s +; CHECK-NEXT: frintx z25.s, p0/m, z25.s +; CHECK-NEXT: uunpkhi z1.d, z1.s +; CHECK-NEXT: frintx z6.s, p0/m, z6.s +; CHECK-NEXT: frintx z27.s, p0/m, z27.s +; CHECK-NEXT: frintx z4.s, p0/m, z4.s +; CHECK-NEXT: frintx z3.s, p0/m, z3.s +; CHECK-NEXT: frintx z26.s, p0/m, z26.s +; CHECK-NEXT: fcvtzs z24.d, p0/m, z24.s +; CHECK-NEXT: frintx z2.s, p0/m, z2.s +; CHECK-NEXT: fcvtzs z7.d, p0/m, z7.s +; CHECK-NEXT: fcvtzs z25.d, p0/m, z25.s +; CHECK-NEXT: frintx z1.s, p0/m, z1.s +; CHECK-NEXT: fcvtzs z6.d, p0/m, z6.s +; CHECK-NEXT: fcvtzs z4.d, p0/m, z4.s +; CHECK-NEXT: fcvtzs z3.d, p0/m, z3.s ; CHECK-NEXT: st1b { z24.b }, p1, [x8, x9] ; CHECK-NEXT: rdvl x9, #14 -; CHECK-NEXT: fcvtzs z4.d, p0/m, z4.s +; CHECK-NEXT: uunpkhi z24.d, z5.s ; CHECK-NEXT: st1b { z7.b }, p1, [x8, x9] ; CHECK-NEXT: rdvl x9, #13 -; CHECK-NEXT: movprfx z7, z31 -; CHECK-NEXT: fcvtzs z7.d, p0/m, z31.s -; CHECK-NEXT: st1b { z27.b }, p1, [x8, x9] +; CHECK-NEXT: uunpklo z5.d, z5.s +; CHECK-NEXT: st1b { z25.b }, p1, [x8, x9] ; CHECK-NEXT: rdvl x9, #12 -; CHECK-NEXT: fcvtzs z3.d, p0/m, z3.s +; CHECK-NEXT: movprfx z25, z27 +; CHECK-NEXT: fcvtzs z25.d, p0/m, z27.s ; CHECK-NEXT: st1b { z6.b }, p1, [x8, x9] ; CHECK-NEXT: rdvl x9, #11 ; CHECK-NEXT: movprfx z6, z29 -; CHECK-NEXT: fcvtzs z6.d, p0/m, z29.s +; CHECK-NEXT: frintx z6.s, p0/m, z29.s +; CHECK-NEXT: frintx z24.s, p0/m, z24.s +; CHECK-NEXT: uunpkhi z7.d, z0.s ; CHECK-NEXT: uunpklo z0.d, z0.s -; CHECK-NEXT: st1b { z30.b }, p1, [x8, x9] +; CHECK-NEXT: frintx z5.s, p0/m, z5.s +; CHECK-NEXT: fcvtzs z2.d, p0/m, z2.s +; CHECK-NEXT: fcvtzs z1.d, p0/m, z1.s +; CHECK-NEXT: fcvtzs z24.d, p0/m, z24.s +; CHECK-NEXT: frintx z7.s, p0/m, z7.s +; CHECK-NEXT: frintx z0.s, p0/m, z0.s +; CHECK-NEXT: fcvtzs z5.d, p0/m, z5.s +; CHECK-NEXT: st1b { z24.b }, p1, [x8, x9] +; CHECK-NEXT: movprfx z24, z28 +; CHECK-NEXT: frintx z24.s, p0/m, z28.s ; CHECK-NEXT: rdvl x9, #10 ; CHECK-NEXT: st1b { z5.b }, p1, [x8, x9] ; CHECK-NEXT: rdvl x9, #9 -; CHECK-NEXT: fcvtzs z2.d, p0/m, z2.s -; CHECK-NEXT: st1b { z7.b }, p1, [x8, x9] +; CHECK-NEXT: movprfx z5, z6 +; CHECK-NEXT: fcvtzs z5.d, p0/m, z6.s +; CHECK-NEXT: st1b { z25.b }, p1, [x8, x9] ; CHECK-NEXT: rdvl x9, #8 -; CHECK-NEXT: movprfx z5, z28 -; CHECK-NEXT: fcvtzs z5.d, p0/m, z28.s +; CHECK-NEXT: fcvtzs z0.d, p0/m, z0.s ; CHECK-NEXT: st1b { z4.b }, p1, [x8, x9] -; CHECK-NEXT: fcvtzs z1.d, p0/m, z1.s -; CHECK-NEXT: movprfx z4, z25 -; CHECK-NEXT: fcvtzs z4.d, p0/m, z25.s +; CHECK-NEXT: movprfx z4, z7 +; CHECK-NEXT: fcvtzs z4.d, p0/m, z7.s +; CHECK-NEXT: movprfx z6, z24 +; CHECK-NEXT: fcvtzs z6.d, p0/m, z24.s ; CHECK-NEXT: st1d { z3.d }, p0, [x8, #7, mul vl] ; CHECK-NEXT: movprfx z3, z26 ; CHECK-NEXT: fcvtzs z3.d, p0/m, z26.s -; CHECK-NEXT: fcvtzs z0.d, p0/m, z0.s -; CHECK-NEXT: st1d { z6.d }, p0, [x8, #6, mul vl] +; CHECK-NEXT: st1d { z5.d }, p0, [x8, #6, mul vl] ; CHECK-NEXT: st1d { z2.d }, p0, [x8, #5, mul vl] -; CHECK-NEXT: st1d { z5.d }, p0, [x8, #4, mul vl] ; CHECK-NEXT: st1d { z1.d }, p0, [x8, #3, mul vl] +; CHECK-NEXT: st1d { z6.d }, p0, [x8, #4, mul vl] ; CHECK-NEXT: st1d { z3.d }, p0, [x8, #2, mul vl] ; CHECK-NEXT: st1d { z4.d }, p0, [x8, #1, mul vl] ; CHECK-NEXT: st1d { z0.d }, p0, [x8] @@ -351,6 +428,7 @@ define @llrint_v1i64_v1f64( %x) { ; CHECK-LABEL: llrint_v1i64_v1f64: ; CHECK: // %bb.0: ; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: frintx z0.d, p0/m, z0.d ; CHECK-NEXT: fcvtzs z0.d, p0/m, z0.d ; CHECK-NEXT: ret %a = call @llvm.llrint.nxv1i64.nxv1f64( %x) @@ -362,6 +440,7 @@ define @llrint_v2i64_v2f64( %x) { ; CHECK-LABEL: llrint_v2i64_v2f64: ; CHECK: // %bb.0: ; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: frintx z0.d, p0/m, z0.d ; CHECK-NEXT: fcvtzs z0.d, p0/m, z0.d ; CHECK-NEXT: ret %a = call @llvm.llrint.nxv2i64.nxv2f64( %x) @@ -373,6 +452,8 @@ define @llrint_v4i64_v4f64( %x) { ; CHECK-LABEL: llrint_v4i64_v4f64: ; CHECK: // %bb.0: ; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: frintx z0.d, p0/m, z0.d +; CHECK-NEXT: frintx z1.d, p0/m, z1.d ; CHECK-NEXT: fcvtzs z0.d, p0/m, z0.d ; CHECK-NEXT: fcvtzs z1.d, p0/m, z1.d ; CHECK-NEXT: ret @@ -385,6 +466,10 @@ define @llrint_v8i64_v8f64( %x) { ; CHECK-LABEL: llrint_v8i64_v8f64: ; CHECK: // %bb.0: ; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: frintx z0.d, p0/m, z0.d +; CHECK-NEXT: frintx z1.d, p0/m, z1.d +; CHECK-NEXT: frintx z2.d, p0/m, z2.d +; CHECK-NEXT: frintx z3.d, p0/m, z3.d ; CHECK-NEXT: fcvtzs z0.d, p0/m, z0.d ; CHECK-NEXT: fcvtzs z1.d, p0/m, z1.d ; CHECK-NEXT: fcvtzs z2.d, p0/m, z2.d @@ -399,6 +484,14 @@ define @llrint_v16f64( %x) { ; CHECK-LABEL: llrint_v16f64: ; CHECK: // %bb.0: ; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: frintx z0.d, p0/m, z0.d +; CHECK-NEXT: frintx z1.d, p0/m, z1.d +; CHECK-NEXT: frintx z2.d, p0/m, z2.d +; CHECK-NEXT: frintx z3.d, p0/m, z3.d +; CHECK-NEXT: frintx z4.d, p0/m, z4.d +; CHECK-NEXT: frintx z5.d, p0/m, z5.d +; CHECK-NEXT: frintx z6.d, p0/m, z6.d +; CHECK-NEXT: frintx z7.d, p0/m, z7.d ; CHECK-NEXT: fcvtzs z0.d, p0/m, z0.d ; CHECK-NEXT: fcvtzs z1.d, p0/m, z1.d ; CHECK-NEXT: fcvtzs z2.d, p0/m, z2.d @@ -417,74 +510,91 @@ define @llrint_v32f64( %x) { ; CHECK-LABEL: llrint_v32f64: ; CHECK: // %bb.0: ; CHECK-NEXT: ptrue p0.b -; CHECK-NEXT: rdvl x9, #15 -; CHECK-NEXT: rdvl x10, #14 +; CHECK-NEXT: rdvl x14, #15 +; CHECK-NEXT: rdvl x15, #14 ; CHECK-NEXT: ptrue p1.d -; CHECK-NEXT: rdvl x11, #13 +; CHECK-NEXT: rdvl x13, #13 ; CHECK-NEXT: rdvl x12, #12 -; CHECK-NEXT: ld1b { z0.b }, p0/z, [x0, x9] -; CHECK-NEXT: rdvl x13, #11 -; CHECK-NEXT: rdvl x14, #10 -; CHECK-NEXT: ld1b { z1.b }, p0/z, [x0, x10] -; CHECK-NEXT: ld1b { z2.b }, p0/z, [x0, x11] +; CHECK-NEXT: ld1b { z0.b }, p0/z, [x0, x14] +; CHECK-NEXT: ld1b { z1.b }, p0/z, [x0, x15] +; CHECK-NEXT: rdvl x10, #11 +; CHECK-NEXT: rdvl x11, #10 +; CHECK-NEXT: rdvl x9, #9 +; CHECK-NEXT: ld1b { z2.b }, p0/z, [x0, x13] ; CHECK-NEXT: ld1b { z3.b }, p0/z, [x0, x12] -; CHECK-NEXT: ld1b { z4.b }, p0/z, [x0, x13] -; CHECK-NEXT: ld1b { z5.b }, p0/z, [x0, x14] -; CHECK-NEXT: rdvl x15, #9 -; CHECK-NEXT: fcvtzs z0.d, p1/m, z0.d +; CHECK-NEXT: ld1b { z4.b }, p0/z, [x0, x10] +; CHECK-NEXT: ld1b { z5.b }, p0/z, [x0, x11] +; CHECK-NEXT: frintx z0.d, p1/m, z0.d +; CHECK-NEXT: frintx z1.d, p1/m, z1.d +; CHECK-NEXT: ld1b { z6.b }, p0/z, [x0, x9] ; CHECK-NEXT: rdvl x16, #8 -; CHECK-NEXT: ld1b { z6.b }, p0/z, [x0, x15] -; CHECK-NEXT: ld1b { z7.b }, p0/z, [x0, x16] +; CHECK-NEXT: frintx z2.d, p1/m, z2.d ; CHECK-NEXT: ld1d { z24.d }, p1/z, [x0, #7, mul vl] +; CHECK-NEXT: frintx z3.d, p1/m, z3.d +; CHECK-NEXT: frintx z4.d, p1/m, z4.d +; CHECK-NEXT: frintx z5.d, p1/m, z5.d +; CHECK-NEXT: frintx z6.d, p1/m, z6.d +; CHECK-NEXT: ld1b { z7.b }, p0/z, [x0, x16] ; CHECK-NEXT: ld1d { z25.d }, p1/z, [x0, #6, mul vl] -; CHECK-NEXT: ld1d { z26.d }, p1/z, [x0, #5, mul vl] +; CHECK-NEXT: fcvtzs z0.d, p1/m, z0.d ; CHECK-NEXT: fcvtzs z1.d, p1/m, z1.d +; CHECK-NEXT: ld1d { z26.d }, p1/z, [x0, #5, mul vl] ; CHECK-NEXT: ld1d { z27.d }, p1/z, [x0, #4, mul vl] ; CHECK-NEXT: ld1d { z28.d }, p1/z, [x0, #3, mul vl] -; CHECK-NEXT: fcvtzs z2.d, p1/m, z2.d ; CHECK-NEXT: ld1d { z29.d }, p1/z, [x0, #2, mul vl] ; CHECK-NEXT: ld1d { z30.d }, p1/z, [x0, #1, mul vl] -; CHECK-NEXT: fcvtzs z3.d, p1/m, z3.d +; CHECK-NEXT: fcvtzs z2.d, p1/m, z2.d ; CHECK-NEXT: ld1d { z31.d }, p1/z, [x0] +; CHECK-NEXT: frintx z7.d, p1/m, z7.d +; CHECK-NEXT: fcvtzs z3.d, p1/m, z3.d ; CHECK-NEXT: fcvtzs z4.d, p1/m, z4.d -; CHECK-NEXT: st1b { z0.b }, p0, [x8, x9] +; CHECK-NEXT: st1b { z0.b }, p0, [x8, x14] ; CHECK-NEXT: movprfx z0, z5 ; CHECK-NEXT: fcvtzs z0.d, p1/m, z5.d -; CHECK-NEXT: st1b { z1.b }, p0, [x8, x10] +; CHECK-NEXT: frintx z24.d, p1/m, z24.d +; CHECK-NEXT: st1b { z1.b }, p0, [x8, x15] ; CHECK-NEXT: movprfx z1, z6 ; CHECK-NEXT: fcvtzs z1.d, p1/m, z6.d -; CHECK-NEXT: st1b { z2.b }, p0, [x8, x11] +; CHECK-NEXT: movprfx z5, z25 +; CHECK-NEXT: frintx z5.d, p1/m, z25.d +; CHECK-NEXT: movprfx z6, z26 +; CHECK-NEXT: frintx z6.d, p1/m, z26.d +; CHECK-NEXT: st1b { z2.b }, p0, [x8, x13] ; CHECK-NEXT: movprfx z2, z7 ; CHECK-NEXT: fcvtzs z2.d, p1/m, z7.d +; CHECK-NEXT: movprfx z7, z27 +; CHECK-NEXT: frintx z7.d, p1/m, z27.d ; CHECK-NEXT: st1b { z3.b }, p0, [x8, x12] -; CHECK-NEXT: movprfx z3, z24 -; CHECK-NEXT: fcvtzs z3.d, p1/m, z24.d -; CHECK-NEXT: st1b { z4.b }, p0, [x8, x13] -; CHECK-NEXT: movprfx z4, z25 -; CHECK-NEXT: fcvtzs z4.d, p1/m, z25.d -; CHECK-NEXT: st1b { z0.b }, p0, [x8, x14] -; CHECK-NEXT: movprfx z0, z26 -; CHECK-NEXT: fcvtzs z0.d, p1/m, z26.d -; CHECK-NEXT: st1b { z1.b }, p0, [x8, x15] -; CHECK-NEXT: movprfx z1, z27 -; CHECK-NEXT: fcvtzs z1.d, p1/m, z27.d +; CHECK-NEXT: movprfx z3, z28 +; CHECK-NEXT: frintx z3.d, p1/m, z28.d +; CHECK-NEXT: st1b { z4.b }, p0, [x8, x10] +; CHECK-NEXT: movprfx z4, z29 +; CHECK-NEXT: frintx z4.d, p1/m, z29.d +; CHECK-NEXT: st1b { z0.b }, p0, [x8, x11] +; CHECK-NEXT: movprfx z0, z30 +; CHECK-NEXT: frintx z0.d, p1/m, z30.d +; CHECK-NEXT: fcvtzs z24.d, p1/m, z24.d +; CHECK-NEXT: st1b { z1.b }, p0, [x8, x9] +; CHECK-NEXT: movprfx z1, z31 +; CHECK-NEXT: frintx z1.d, p1/m, z31.d +; CHECK-NEXT: fcvtzs z5.d, p1/m, z5.d +; CHECK-NEXT: fcvtzs z6.d, p1/m, z6.d +; CHECK-NEXT: fcvtzs z7.d, p1/m, z7.d ; CHECK-NEXT: st1b { z2.b }, p0, [x8, x16] -; CHECK-NEXT: movprfx z2, z28 -; CHECK-NEXT: fcvtzs z2.d, p1/m, z28.d -; CHECK-NEXT: st1d { z3.d }, p1, [x8, #7, mul vl] -; CHECK-NEXT: movprfx z3, z29 -; CHECK-NEXT: fcvtzs z3.d, p1/m, z29.d -; CHECK-NEXT: st1d { z4.d }, p1, [x8, #6, mul vl] -; CHECK-NEXT: movprfx z4, z30 -; CHECK-NEXT: fcvtzs z4.d, p1/m, z30.d -; CHECK-NEXT: st1d { z0.d }, p1, [x8, #5, mul vl] -; CHECK-NEXT: movprfx z0, z31 -; CHECK-NEXT: fcvtzs z0.d, p1/m, z31.d -; CHECK-NEXT: st1d { z1.d }, p1, [x8, #4, mul vl] +; CHECK-NEXT: movprfx z2, z3 +; CHECK-NEXT: fcvtzs z2.d, p1/m, z3.d +; CHECK-NEXT: movprfx z3, z4 +; CHECK-NEXT: fcvtzs z3.d, p1/m, z4.d +; CHECK-NEXT: fcvtzs z0.d, p1/m, z0.d +; CHECK-NEXT: st1d { z24.d }, p1, [x8, #7, mul vl] +; CHECK-NEXT: fcvtzs z1.d, p1/m, z1.d +; CHECK-NEXT: st1d { z5.d }, p1, [x8, #6, mul vl] +; CHECK-NEXT: st1d { z6.d }, p1, [x8, #5, mul vl] +; CHECK-NEXT: st1d { z7.d }, p1, [x8, #4, mul vl] ; CHECK-NEXT: st1d { z2.d }, p1, [x8, #3, mul vl] ; CHECK-NEXT: st1d { z3.d }, p1, [x8, #2, mul vl] -; CHECK-NEXT: st1d { z4.d }, p1, [x8, #1, mul vl] -; CHECK-NEXT: st1d { z0.d }, p1, [x8] +; CHECK-NEXT: st1d { z0.d }, p1, [x8, #1, mul vl] +; CHECK-NEXT: st1d { z1.d }, p1, [x8] ; CHECK-NEXT: ret %a = call @llvm.llrint.nxv32i64.nxv16f64( %x) ret %a diff --git a/llvm/test/CodeGen/AArch64/sve-lrint.ll b/llvm/test/CodeGen/AArch64/sve-lrint.ll index 1e7bf2e280ce8..ce58e26ff8a75 100644 --- a/llvm/test/CodeGen/AArch64/sve-lrint.ll +++ b/llvm/test/CodeGen/AArch64/sve-lrint.ll @@ -5,6 +5,7 @@ define @lrint_v1f16( %x) { ; CHECK-LABEL: lrint_v1f16: ; CHECK: // %bb.0: ; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: frintx z0.h, p0/m, z0.h ; CHECK-NEXT: fcvtzs z0.d, p0/m, z0.h ; CHECK-NEXT: ret %a = call @llvm.lrint.nxv1i64.nxv1f16( %x) @@ -16,6 +17,7 @@ define @lrint_v2f16( %x) { ; CHECK-LABEL: lrint_v2f16: ; CHECK: // %bb.0: ; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: frintx z0.h, p0/m, z0.h ; CHECK-NEXT: fcvtzs z0.d, p0/m, z0.h ; CHECK-NEXT: ret %a = call @llvm.lrint.nxv2i64.nxv2f16( %x) @@ -27,8 +29,11 @@ define @lrint_v4f16( %x) { ; CHECK-LABEL: lrint_v4f16: ; CHECK: // %bb.0: ; CHECK-NEXT: uunpklo z1.d, z0.s -; CHECK-NEXT: uunpkhi z2.d, z0.s +; CHECK-NEXT: uunpkhi z0.d, z0.s ; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: frintx z1.h, p0/m, z1.h +; CHECK-NEXT: movprfx z2, z0 +; CHECK-NEXT: frintx z2.h, p0/m, z0.h ; CHECK-NEXT: movprfx z0, z1 ; CHECK-NEXT: fcvtzs z0.d, p0/m, z1.h ; CHECK-NEXT: movprfx z1, z2 @@ -48,7 +53,12 @@ define @lrint_v8f16( %x) { ; CHECK-NEXT: uunpklo z2.d, z1.s ; CHECK-NEXT: uunpkhi z1.d, z1.s ; CHECK-NEXT: uunpklo z3.d, z0.s -; CHECK-NEXT: uunpkhi z4.d, z0.s +; CHECK-NEXT: uunpkhi z0.d, z0.s +; CHECK-NEXT: frintx z1.h, p0/m, z1.h +; CHECK-NEXT: frintx z2.h, p0/m, z2.h +; CHECK-NEXT: frintx z3.h, p0/m, z3.h +; CHECK-NEXT: movprfx z4, z0 +; CHECK-NEXT: frintx z4.h, p0/m, z0.h ; CHECK-NEXT: fcvtzs z1.d, p0/m, z1.h ; CHECK-NEXT: movprfx z0, z2 ; CHECK-NEXT: fcvtzs z0.d, p0/m, z2.h @@ -73,25 +83,36 @@ define @lrint_v16i64_v16f16( %x) { ; CHECK-NEXT: uunpklo z4.d, z2.s ; CHECK-NEXT: uunpkhi z2.d, z2.s ; CHECK-NEXT: uunpklo z5.d, z0.s -; CHECK-NEXT: uunpkhi z6.d, z0.s -; CHECK-NEXT: uunpklo z7.d, z3.s -; CHECK-NEXT: uunpkhi z24.d, z3.s -; CHECK-NEXT: uunpklo z25.d, z1.s -; CHECK-NEXT: uunpkhi z26.d, z1.s +; CHECK-NEXT: uunpkhi z0.d, z0.s +; CHECK-NEXT: uunpklo z6.d, z3.s +; CHECK-NEXT: uunpkhi z3.d, z3.s +; CHECK-NEXT: uunpklo z7.d, z1.s +; CHECK-NEXT: uunpkhi z1.d, z1.s +; CHECK-NEXT: frintx z4.h, p0/m, z4.h +; CHECK-NEXT: frintx z2.h, p0/m, z2.h +; CHECK-NEXT: frintx z5.h, p0/m, z5.h +; CHECK-NEXT: movprfx z24, z0 +; CHECK-NEXT: frintx z24.h, p0/m, z0.h +; CHECK-NEXT: frintx z6.h, p0/m, z6.h +; CHECK-NEXT: movprfx z25, z3 +; CHECK-NEXT: frintx z25.h, p0/m, z3.h +; CHECK-NEXT: frintx z7.h, p0/m, z7.h +; CHECK-NEXT: movprfx z26, z1 +; CHECK-NEXT: frintx z26.h, p0/m, z1.h ; CHECK-NEXT: movprfx z0, z4 ; CHECK-NEXT: fcvtzs z0.d, p0/m, z4.h ; CHECK-NEXT: movprfx z1, z2 ; CHECK-NEXT: fcvtzs z1.d, p0/m, z2.h ; CHECK-NEXT: movprfx z2, z5 ; CHECK-NEXT: fcvtzs z2.d, p0/m, z5.h -; CHECK-NEXT: movprfx z3, z6 -; CHECK-NEXT: fcvtzs z3.d, p0/m, z6.h -; CHECK-NEXT: movprfx z4, z7 -; CHECK-NEXT: fcvtzs z4.d, p0/m, z7.h -; CHECK-NEXT: movprfx z5, z24 -; CHECK-NEXT: fcvtzs z5.d, p0/m, z24.h -; CHECK-NEXT: movprfx z6, z25 -; CHECK-NEXT: fcvtzs z6.d, p0/m, z25.h +; CHECK-NEXT: movprfx z3, z24 +; CHECK-NEXT: fcvtzs z3.d, p0/m, z24.h +; CHECK-NEXT: movprfx z4, z6 +; CHECK-NEXT: fcvtzs z4.d, p0/m, z6.h +; CHECK-NEXT: movprfx z5, z25 +; CHECK-NEXT: fcvtzs z5.d, p0/m, z25.h +; CHECK-NEXT: movprfx z6, z7 +; CHECK-NEXT: fcvtzs z6.d, p0/m, z7.h ; CHECK-NEXT: movprfx z7, z26 ; CHECK-NEXT: fcvtzs z7.d, p0/m, z26.h ; CHECK-NEXT: ret @@ -110,71 +131,86 @@ define @lrint_v32i64_v32f16( %x) { ; CHECK-NEXT: uunpkhi z7.s, z2.h ; CHECK-NEXT: uunpklo z2.s, z2.h ; CHECK-NEXT: ptrue p1.b -; CHECK-NEXT: uunpklo z24.s, z0.h -; CHECK-NEXT: uunpkhi z0.s, z0.h ; CHECK-NEXT: uunpkhi z5.d, z4.s ; CHECK-NEXT: uunpklo z4.d, z4.s ; CHECK-NEXT: uunpkhi z6.d, z3.s ; CHECK-NEXT: uunpklo z3.d, z3.s -; CHECK-NEXT: uunpkhi z25.d, z2.s -; CHECK-NEXT: uunpklo z2.d, z2.s -; CHECK-NEXT: uunpklo z26.d, z0.s -; CHECK-NEXT: uunpkhi z0.d, z0.s +; CHECK-NEXT: uunpkhi z24.d, z7.s +; CHECK-NEXT: uunpklo z7.d, z7.s +; CHECK-NEXT: frintx z5.h, p0/m, z5.h +; CHECK-NEXT: frintx z4.h, p0/m, z4.h +; CHECK-NEXT: frintx z6.h, p0/m, z6.h +; CHECK-NEXT: frintx z3.h, p0/m, z3.h +; CHECK-NEXT: frintx z24.h, p0/m, z24.h +; CHECK-NEXT: frintx z7.h, p0/m, z7.h ; CHECK-NEXT: fcvtzs z5.d, p0/m, z5.h ; CHECK-NEXT: fcvtzs z4.d, p0/m, z4.h +; CHECK-NEXT: fcvtzs z6.d, p0/m, z6.h ; CHECK-NEXT: fcvtzs z3.d, p0/m, z3.h -; CHECK-NEXT: fcvtzs z25.d, p0/m, z25.h -; CHECK-NEXT: fcvtzs z2.d, p0/m, z2.h -; CHECK-NEXT: fcvtzs z0.d, p0/m, z0.h +; CHECK-NEXT: fcvtzs z7.d, p0/m, z7.h ; CHECK-NEXT: st1b { z5.b }, p1, [x8, x9] ; CHECK-NEXT: rdvl x9, #14 -; CHECK-NEXT: movprfx z5, z6 -; CHECK-NEXT: fcvtzs z5.d, p0/m, z6.h -; CHECK-NEXT: uunpkhi z6.d, z7.s +; CHECK-NEXT: uunpkhi z5.s, z1.h ; CHECK-NEXT: st1b { z4.b }, p1, [x8, x9] -; CHECK-NEXT: uunpkhi z4.s, z1.h -; CHECK-NEXT: uunpklo z7.d, z7.s +; CHECK-NEXT: uunpkhi z4.d, z2.s ; CHECK-NEXT: rdvl x9, #13 -; CHECK-NEXT: uunpklo z1.s, z1.h -; CHECK-NEXT: st1b { z5.b }, p1, [x8, x9] +; CHECK-NEXT: st1b { z6.b }, p1, [x8, x9] ; CHECK-NEXT: rdvl x9, #12 -; CHECK-NEXT: movprfx z5, z6 -; CHECK-NEXT: fcvtzs z5.d, p0/m, z6.h +; CHECK-NEXT: movprfx z6, z24 +; CHECK-NEXT: fcvtzs z6.d, p0/m, z24.h +; CHECK-NEXT: uunpklo z1.s, z1.h +; CHECK-NEXT: uunpklo z2.d, z2.s +; CHECK-NEXT: uunpkhi z24.s, z0.h ; CHECK-NEXT: st1b { z3.b }, p1, [x8, x9] -; CHECK-NEXT: uunpkhi z3.d, z4.s -; CHECK-NEXT: uunpklo z4.d, z4.s -; CHECK-NEXT: fcvtzs z7.d, p0/m, z7.h +; CHECK-NEXT: uunpklo z3.d, z5.s +; CHECK-NEXT: uunpklo z0.s, z0.h +; CHECK-NEXT: frintx z4.h, p0/m, z4.h ; CHECK-NEXT: rdvl x9, #11 -; CHECK-NEXT: uunpkhi z6.d, z24.s -; CHECK-NEXT: uunpkhi z27.d, z1.s -; CHECK-NEXT: uunpklo z1.d, z1.s -; CHECK-NEXT: st1b { z5.b }, p1, [x8, x9] +; CHECK-NEXT: uunpkhi z25.d, z5.s +; CHECK-NEXT: st1b { z6.b }, p1, [x8, x9] ; CHECK-NEXT: rdvl x9, #10 -; CHECK-NEXT: fcvtzs z3.d, p0/m, z3.h -; CHECK-NEXT: st1b { z7.b }, p1, [x8, x9] +; CHECK-NEXT: uunpkhi z5.d, z1.s +; CHECK-NEXT: uunpklo z1.d, z1.s +; CHECK-NEXT: frintx z2.h, p0/m, z2.h +; CHECK-NEXT: uunpkhi z6.d, z24.s +; CHECK-NEXT: uunpklo z24.d, z24.s +; CHECK-NEXT: frintx z3.h, p0/m, z3.h ; CHECK-NEXT: fcvtzs z4.d, p0/m, z4.h -; CHECK-NEXT: uunpklo z7.d, z24.s +; CHECK-NEXT: st1b { z7.b }, p1, [x8, x9] +; CHECK-NEXT: uunpkhi z7.d, z0.s +; CHECK-NEXT: uunpklo z0.d, z0.s ; CHECK-NEXT: rdvl x9, #9 -; CHECK-NEXT: movprfx z5, z27 -; CHECK-NEXT: fcvtzs z5.d, p0/m, z27.h -; CHECK-NEXT: fcvtzs z1.d, p0/m, z1.h -; CHECK-NEXT: st1b { z25.b }, p1, [x8, x9] +; CHECK-NEXT: frintx z25.h, p0/m, z25.h +; CHECK-NEXT: frintx z5.h, p0/m, z5.h +; CHECK-NEXT: frintx z1.h, p0/m, z1.h +; CHECK-NEXT: fcvtzs z2.d, p0/m, z2.h +; CHECK-NEXT: frintx z6.h, p0/m, z6.h +; CHECK-NEXT: fcvtzs z3.d, p0/m, z3.h +; CHECK-NEXT: st1b { z4.b }, p1, [x8, x9] +; CHECK-NEXT: movprfx z4, z24 +; CHECK-NEXT: frintx z4.h, p0/m, z24.h +; CHECK-NEXT: frintx z7.h, p0/m, z7.h +; CHECK-NEXT: frintx z0.h, p0/m, z0.h ; CHECK-NEXT: rdvl x9, #8 +; CHECK-NEXT: fcvtzs z25.d, p0/m, z25.h +; CHECK-NEXT: fcvtzs z5.d, p0/m, z5.h ; CHECK-NEXT: st1b { z2.b }, p1, [x8, x9] -; CHECK-NEXT: movprfx z2, z26 -; CHECK-NEXT: fcvtzs z2.d, p0/m, z26.h -; CHECK-NEXT: st1d { z3.d }, p0, [x8, #7, mul vl] -; CHECK-NEXT: movprfx z3, z6 -; CHECK-NEXT: fcvtzs z3.d, p0/m, z6.h -; CHECK-NEXT: st1d { z4.d }, p0, [x8, #6, mul vl] +; CHECK-NEXT: fcvtzs z1.d, p0/m, z1.h +; CHECK-NEXT: movprfx z2, z6 +; CHECK-NEXT: fcvtzs z2.d, p0/m, z6.h +; CHECK-NEXT: st1d { z3.d }, p0, [x8, #6, mul vl] +; CHECK-NEXT: movprfx z3, z4 +; CHECK-NEXT: fcvtzs z3.d, p0/m, z4.h ; CHECK-NEXT: movprfx z4, z7 ; CHECK-NEXT: fcvtzs z4.d, p0/m, z7.h +; CHECK-NEXT: fcvtzs z0.d, p0/m, z0.h +; CHECK-NEXT: st1d { z25.d }, p0, [x8, #7, mul vl] ; CHECK-NEXT: st1d { z5.d }, p0, [x8, #5, mul vl] ; CHECK-NEXT: st1d { z1.d }, p0, [x8, #4, mul vl] -; CHECK-NEXT: st1d { z0.d }, p0, [x8, #3, mul vl] -; CHECK-NEXT: st1d { z2.d }, p0, [x8, #2, mul vl] -; CHECK-NEXT: st1d { z3.d }, p0, [x8, #1, mul vl] -; CHECK-NEXT: st1d { z4.d }, p0, [x8] +; CHECK-NEXT: st1d { z2.d }, p0, [x8, #3, mul vl] +; CHECK-NEXT: st1d { z3.d }, p0, [x8, #2, mul vl] +; CHECK-NEXT: st1d { z4.d }, p0, [x8, #1, mul vl] +; CHECK-NEXT: st1d { z0.d }, p0, [x8] ; CHECK-NEXT: ret %a = call @llvm.lrint.nxv32i64.nxv32f16( %x) ret %a @@ -185,6 +221,7 @@ define @lrint_v1f32( %x) { ; CHECK-LABEL: lrint_v1f32: ; CHECK: // %bb.0: ; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: frintx z0.s, p0/m, z0.s ; CHECK-NEXT: fcvtzs z0.d, p0/m, z0.s ; CHECK-NEXT: ret %a = call @llvm.lrint.nxv1i64.nxv1f32( %x) @@ -196,6 +233,7 @@ define @lrint_v2f32( %x) { ; CHECK-LABEL: lrint_v2f32: ; CHECK: // %bb.0: ; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: frintx z0.s, p0/m, z0.s ; CHECK-NEXT: fcvtzs z0.d, p0/m, z0.s ; CHECK-NEXT: ret %a = call @llvm.lrint.nxv2i64.nxv2f32( %x) @@ -207,8 +245,11 @@ define @lrint_v4f32( %x) { ; CHECK-LABEL: lrint_v4f32: ; CHECK: // %bb.0: ; CHECK-NEXT: uunpklo z1.d, z0.s -; CHECK-NEXT: uunpkhi z2.d, z0.s +; CHECK-NEXT: uunpkhi z0.d, z0.s ; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: frintx z1.s, p0/m, z1.s +; CHECK-NEXT: movprfx z2, z0 +; CHECK-NEXT: frintx z2.s, p0/m, z0.s ; CHECK-NEXT: movprfx z0, z1 ; CHECK-NEXT: fcvtzs z0.d, p0/m, z1.s ; CHECK-NEXT: movprfx z1, z2 @@ -223,16 +264,22 @@ define @lrint_v8f32( %x) { ; CHECK-LABEL: lrint_v8f32: ; CHECK: // %bb.0: ; CHECK-NEXT: uunpklo z2.d, z0.s -; CHECK-NEXT: uunpkhi z3.d, z0.s -; CHECK-NEXT: uunpklo z4.d, z1.s -; CHECK-NEXT: uunpkhi z5.d, z1.s +; CHECK-NEXT: uunpkhi z0.d, z0.s +; CHECK-NEXT: uunpklo z3.d, z1.s +; CHECK-NEXT: uunpkhi z1.d, z1.s ; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: frintx z2.s, p0/m, z2.s +; CHECK-NEXT: movprfx z4, z0 +; CHECK-NEXT: frintx z4.s, p0/m, z0.s +; CHECK-NEXT: frintx z3.s, p0/m, z3.s +; CHECK-NEXT: movprfx z5, z1 +; CHECK-NEXT: frintx z5.s, p0/m, z1.s ; CHECK-NEXT: movprfx z0, z2 ; CHECK-NEXT: fcvtzs z0.d, p0/m, z2.s -; CHECK-NEXT: movprfx z1, z3 -; CHECK-NEXT: fcvtzs z1.d, p0/m, z3.s -; CHECK-NEXT: movprfx z2, z4 -; CHECK-NEXT: fcvtzs z2.d, p0/m, z4.s +; CHECK-NEXT: movprfx z1, z4 +; CHECK-NEXT: fcvtzs z1.d, p0/m, z4.s +; CHECK-NEXT: movprfx z2, z3 +; CHECK-NEXT: fcvtzs z2.d, p0/m, z3.s ; CHECK-NEXT: movprfx z3, z5 ; CHECK-NEXT: fcvtzs z3.d, p0/m, z5.s ; CHECK-NEXT: ret @@ -245,28 +292,40 @@ define @lrint_v16i64_v16f32( %x) { ; CHECK-LABEL: lrint_v16i64_v16f32: ; CHECK: // %bb.0: ; CHECK-NEXT: uunpklo z4.d, z0.s -; CHECK-NEXT: uunpkhi z5.d, z0.s -; CHECK-NEXT: uunpklo z6.d, z1.s -; CHECK-NEXT: uunpkhi z7.d, z1.s -; CHECK-NEXT: uunpklo z24.d, z2.s -; CHECK-NEXT: uunpkhi z25.d, z2.s -; CHECK-NEXT: uunpklo z26.d, z3.s -; CHECK-NEXT: uunpkhi z27.d, z3.s +; CHECK-NEXT: uunpkhi z0.d, z0.s +; CHECK-NEXT: uunpklo z5.d, z1.s +; CHECK-NEXT: uunpkhi z1.d, z1.s +; CHECK-NEXT: uunpklo z6.d, z2.s +; CHECK-NEXT: uunpkhi z2.d, z2.s +; CHECK-NEXT: uunpklo z7.d, z3.s +; CHECK-NEXT: uunpkhi z3.d, z3.s ; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: frintx z4.s, p0/m, z4.s +; CHECK-NEXT: movprfx z24, z0 +; CHECK-NEXT: frintx z24.s, p0/m, z0.s +; CHECK-NEXT: frintx z5.s, p0/m, z5.s +; CHECK-NEXT: movprfx z25, z1 +; CHECK-NEXT: frintx z25.s, p0/m, z1.s +; CHECK-NEXT: frintx z6.s, p0/m, z6.s +; CHECK-NEXT: movprfx z26, z2 +; CHECK-NEXT: frintx z26.s, p0/m, z2.s +; CHECK-NEXT: frintx z7.s, p0/m, z7.s +; CHECK-NEXT: movprfx z27, z3 +; CHECK-NEXT: frintx z27.s, p0/m, z3.s ; CHECK-NEXT: movprfx z0, z4 ; CHECK-NEXT: fcvtzs z0.d, p0/m, z4.s -; CHECK-NEXT: movprfx z1, z5 -; CHECK-NEXT: fcvtzs z1.d, p0/m, z5.s -; CHECK-NEXT: movprfx z2, z6 -; CHECK-NEXT: fcvtzs z2.d, p0/m, z6.s -; CHECK-NEXT: movprfx z3, z7 -; CHECK-NEXT: fcvtzs z3.d, p0/m, z7.s -; CHECK-NEXT: movprfx z4, z24 -; CHECK-NEXT: fcvtzs z4.d, p0/m, z24.s -; CHECK-NEXT: movprfx z5, z25 -; CHECK-NEXT: fcvtzs z5.d, p0/m, z25.s -; CHECK-NEXT: movprfx z6, z26 -; CHECK-NEXT: fcvtzs z6.d, p0/m, z26.s +; CHECK-NEXT: movprfx z1, z24 +; CHECK-NEXT: fcvtzs z1.d, p0/m, z24.s +; CHECK-NEXT: movprfx z2, z5 +; CHECK-NEXT: fcvtzs z2.d, p0/m, z5.s +; CHECK-NEXT: movprfx z3, z25 +; CHECK-NEXT: fcvtzs z3.d, p0/m, z25.s +; CHECK-NEXT: movprfx z4, z6 +; CHECK-NEXT: fcvtzs z4.d, p0/m, z6.s +; CHECK-NEXT: movprfx z5, z26 +; CHECK-NEXT: fcvtzs z5.d, p0/m, z26.s +; CHECK-NEXT: movprfx z6, z7 +; CHECK-NEXT: fcvtzs z6.d, p0/m, z7.s ; CHECK-NEXT: movprfx z7, z27 ; CHECK-NEXT: fcvtzs z7.d, p0/m, z27.s ; CHECK-NEXT: ret @@ -279,65 +338,83 @@ define @lrint_v32i64_v32f32( %x) { ; CHECK-LABEL: lrint_v32i64_v32f32: ; CHECK: // %bb.0: ; CHECK-NEXT: uunpkhi z24.d, z7.s -; CHECK-NEXT: uunpklo z7.d, z7.s -; CHECK-NEXT: rdvl x9, #15 ; CHECK-NEXT: ptrue p0.d -; CHECK-NEXT: uunpkhi z27.d, z6.s -; CHECK-NEXT: uunpklo z6.d, z6.s -; CHECK-NEXT: uunpkhi z30.d, z5.s -; CHECK-NEXT: uunpklo z5.d, z5.s -; CHECK-NEXT: uunpkhi z31.d, z4.s +; CHECK-NEXT: rdvl x9, #15 +; CHECK-NEXT: uunpklo z7.d, z7.s +; CHECK-NEXT: uunpkhi z25.d, z6.s ; CHECK-NEXT: ptrue p1.b +; CHECK-NEXT: uunpklo z6.d, z6.s +; CHECK-NEXT: uunpkhi z27.d, z4.s +; CHECK-NEXT: uunpklo z4.d, z4.s ; CHECK-NEXT: uunpklo z29.d, z3.s ; CHECK-NEXT: uunpkhi z3.d, z3.s -; CHECK-NEXT: fcvtzs z24.d, p0/m, z24.s -; CHECK-NEXT: fcvtzs z7.d, p0/m, z7.s -; CHECK-NEXT: uunpklo z4.d, z4.s -; CHECK-NEXT: fcvtzs z27.d, p0/m, z27.s -; CHECK-NEXT: fcvtzs z6.d, p0/m, z6.s -; CHECK-NEXT: uunpkhi z25.d, z0.s -; CHECK-NEXT: fcvtzs z30.d, p0/m, z30.s -; CHECK-NEXT: fcvtzs z5.d, p0/m, z5.s ; CHECK-NEXT: uunpklo z26.d, z1.s -; CHECK-NEXT: uunpkhi z1.d, z1.s +; CHECK-NEXT: frintx z24.s, p0/m, z24.s ; CHECK-NEXT: uunpklo z28.d, z2.s ; CHECK-NEXT: uunpkhi z2.d, z2.s +; CHECK-NEXT: frintx z7.s, p0/m, z7.s +; CHECK-NEXT: frintx z25.s, p0/m, z25.s +; CHECK-NEXT: uunpkhi z1.d, z1.s +; CHECK-NEXT: frintx z6.s, p0/m, z6.s +; CHECK-NEXT: frintx z27.s, p0/m, z27.s +; CHECK-NEXT: frintx z4.s, p0/m, z4.s +; CHECK-NEXT: frintx z3.s, p0/m, z3.s +; CHECK-NEXT: frintx z26.s, p0/m, z26.s +; CHECK-NEXT: fcvtzs z24.d, p0/m, z24.s +; CHECK-NEXT: frintx z2.s, p0/m, z2.s +; CHECK-NEXT: fcvtzs z7.d, p0/m, z7.s +; CHECK-NEXT: fcvtzs z25.d, p0/m, z25.s +; CHECK-NEXT: frintx z1.s, p0/m, z1.s +; CHECK-NEXT: fcvtzs z6.d, p0/m, z6.s +; CHECK-NEXT: fcvtzs z4.d, p0/m, z4.s +; CHECK-NEXT: fcvtzs z3.d, p0/m, z3.s ; CHECK-NEXT: st1b { z24.b }, p1, [x8, x9] ; CHECK-NEXT: rdvl x9, #14 -; CHECK-NEXT: fcvtzs z4.d, p0/m, z4.s +; CHECK-NEXT: uunpkhi z24.d, z5.s ; CHECK-NEXT: st1b { z7.b }, p1, [x8, x9] ; CHECK-NEXT: rdvl x9, #13 -; CHECK-NEXT: movprfx z7, z31 -; CHECK-NEXT: fcvtzs z7.d, p0/m, z31.s -; CHECK-NEXT: st1b { z27.b }, p1, [x8, x9] +; CHECK-NEXT: uunpklo z5.d, z5.s +; CHECK-NEXT: st1b { z25.b }, p1, [x8, x9] ; CHECK-NEXT: rdvl x9, #12 -; CHECK-NEXT: fcvtzs z3.d, p0/m, z3.s +; CHECK-NEXT: movprfx z25, z27 +; CHECK-NEXT: fcvtzs z25.d, p0/m, z27.s ; CHECK-NEXT: st1b { z6.b }, p1, [x8, x9] ; CHECK-NEXT: rdvl x9, #11 ; CHECK-NEXT: movprfx z6, z29 -; CHECK-NEXT: fcvtzs z6.d, p0/m, z29.s +; CHECK-NEXT: frintx z6.s, p0/m, z29.s +; CHECK-NEXT: frintx z24.s, p0/m, z24.s +; CHECK-NEXT: uunpkhi z7.d, z0.s ; CHECK-NEXT: uunpklo z0.d, z0.s -; CHECK-NEXT: st1b { z30.b }, p1, [x8, x9] +; CHECK-NEXT: frintx z5.s, p0/m, z5.s +; CHECK-NEXT: fcvtzs z2.d, p0/m, z2.s +; CHECK-NEXT: fcvtzs z1.d, p0/m, z1.s +; CHECK-NEXT: fcvtzs z24.d, p0/m, z24.s +; CHECK-NEXT: frintx z7.s, p0/m, z7.s +; CHECK-NEXT: frintx z0.s, p0/m, z0.s +; CHECK-NEXT: fcvtzs z5.d, p0/m, z5.s +; CHECK-NEXT: st1b { z24.b }, p1, [x8, x9] +; CHECK-NEXT: movprfx z24, z28 +; CHECK-NEXT: frintx z24.s, p0/m, z28.s ; CHECK-NEXT: rdvl x9, #10 ; CHECK-NEXT: st1b { z5.b }, p1, [x8, x9] ; CHECK-NEXT: rdvl x9, #9 -; CHECK-NEXT: fcvtzs z2.d, p0/m, z2.s -; CHECK-NEXT: st1b { z7.b }, p1, [x8, x9] +; CHECK-NEXT: movprfx z5, z6 +; CHECK-NEXT: fcvtzs z5.d, p0/m, z6.s +; CHECK-NEXT: st1b { z25.b }, p1, [x8, x9] ; CHECK-NEXT: rdvl x9, #8 -; CHECK-NEXT: movprfx z5, z28 -; CHECK-NEXT: fcvtzs z5.d, p0/m, z28.s +; CHECK-NEXT: fcvtzs z0.d, p0/m, z0.s ; CHECK-NEXT: st1b { z4.b }, p1, [x8, x9] -; CHECK-NEXT: fcvtzs z1.d, p0/m, z1.s -; CHECK-NEXT: movprfx z4, z25 -; CHECK-NEXT: fcvtzs z4.d, p0/m, z25.s +; CHECK-NEXT: movprfx z4, z7 +; CHECK-NEXT: fcvtzs z4.d, p0/m, z7.s +; CHECK-NEXT: movprfx z6, z24 +; CHECK-NEXT: fcvtzs z6.d, p0/m, z24.s ; CHECK-NEXT: st1d { z3.d }, p0, [x8, #7, mul vl] ; CHECK-NEXT: movprfx z3, z26 ; CHECK-NEXT: fcvtzs z3.d, p0/m, z26.s -; CHECK-NEXT: fcvtzs z0.d, p0/m, z0.s -; CHECK-NEXT: st1d { z6.d }, p0, [x8, #6, mul vl] +; CHECK-NEXT: st1d { z5.d }, p0, [x8, #6, mul vl] ; CHECK-NEXT: st1d { z2.d }, p0, [x8, #5, mul vl] -; CHECK-NEXT: st1d { z5.d }, p0, [x8, #4, mul vl] ; CHECK-NEXT: st1d { z1.d }, p0, [x8, #3, mul vl] +; CHECK-NEXT: st1d { z6.d }, p0, [x8, #4, mul vl] ; CHECK-NEXT: st1d { z3.d }, p0, [x8, #2, mul vl] ; CHECK-NEXT: st1d { z4.d }, p0, [x8, #1, mul vl] ; CHECK-NEXT: st1d { z0.d }, p0, [x8] @@ -351,6 +428,7 @@ define @lrint_v1f64( %x) { ; CHECK-LABEL: lrint_v1f64: ; CHECK: // %bb.0: ; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: frintx z0.d, p0/m, z0.d ; CHECK-NEXT: fcvtzs z0.d, p0/m, z0.d ; CHECK-NEXT: ret %a = call @llvm.lrint.nxv1i64.nxv1f64( %x) @@ -362,6 +440,7 @@ define @lrint_v2f64( %x) { ; CHECK-LABEL: lrint_v2f64: ; CHECK: // %bb.0: ; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: frintx z0.d, p0/m, z0.d ; CHECK-NEXT: fcvtzs z0.d, p0/m, z0.d ; CHECK-NEXT: ret %a = call @llvm.lrint.nxv2i64.nxv2f64( %x) @@ -373,6 +452,8 @@ define @lrint_v4f64( %x) { ; CHECK-LABEL: lrint_v4f64: ; CHECK: // %bb.0: ; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: frintx z0.d, p0/m, z0.d +; CHECK-NEXT: frintx z1.d, p0/m, z1.d ; CHECK-NEXT: fcvtzs z0.d, p0/m, z0.d ; CHECK-NEXT: fcvtzs z1.d, p0/m, z1.d ; CHECK-NEXT: ret @@ -385,6 +466,10 @@ define @lrint_v8f64( %x) { ; CHECK-LABEL: lrint_v8f64: ; CHECK: // %bb.0: ; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: frintx z0.d, p0/m, z0.d +; CHECK-NEXT: frintx z1.d, p0/m, z1.d +; CHECK-NEXT: frintx z2.d, p0/m, z2.d +; CHECK-NEXT: frintx z3.d, p0/m, z3.d ; CHECK-NEXT: fcvtzs z0.d, p0/m, z0.d ; CHECK-NEXT: fcvtzs z1.d, p0/m, z1.d ; CHECK-NEXT: fcvtzs z2.d, p0/m, z2.d @@ -399,6 +484,14 @@ define @lrint_v16f64( %x) { ; CHECK-LABEL: lrint_v16f64: ; CHECK: // %bb.0: ; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: frintx z0.d, p0/m, z0.d +; CHECK-NEXT: frintx z1.d, p0/m, z1.d +; CHECK-NEXT: frintx z2.d, p0/m, z2.d +; CHECK-NEXT: frintx z3.d, p0/m, z3.d +; CHECK-NEXT: frintx z4.d, p0/m, z4.d +; CHECK-NEXT: frintx z5.d, p0/m, z5.d +; CHECK-NEXT: frintx z6.d, p0/m, z6.d +; CHECK-NEXT: frintx z7.d, p0/m, z7.d ; CHECK-NEXT: fcvtzs z0.d, p0/m, z0.d ; CHECK-NEXT: fcvtzs z1.d, p0/m, z1.d ; CHECK-NEXT: fcvtzs z2.d, p0/m, z2.d @@ -417,74 +510,91 @@ define @lrint_v32f64( %x) { ; CHECK-LABEL: lrint_v32f64: ; CHECK: // %bb.0: ; CHECK-NEXT: ptrue p0.b -; CHECK-NEXT: rdvl x9, #15 -; CHECK-NEXT: rdvl x10, #14 +; CHECK-NEXT: rdvl x14, #15 +; CHECK-NEXT: rdvl x15, #14 ; CHECK-NEXT: ptrue p1.d -; CHECK-NEXT: rdvl x11, #13 +; CHECK-NEXT: rdvl x13, #13 ; CHECK-NEXT: rdvl x12, #12 -; CHECK-NEXT: ld1b { z0.b }, p0/z, [x0, x9] -; CHECK-NEXT: rdvl x13, #11 -; CHECK-NEXT: rdvl x14, #10 -; CHECK-NEXT: ld1b { z1.b }, p0/z, [x0, x10] -; CHECK-NEXT: ld1b { z2.b }, p0/z, [x0, x11] +; CHECK-NEXT: ld1b { z0.b }, p0/z, [x0, x14] +; CHECK-NEXT: ld1b { z1.b }, p0/z, [x0, x15] +; CHECK-NEXT: rdvl x10, #11 +; CHECK-NEXT: rdvl x11, #10 +; CHECK-NEXT: rdvl x9, #9 +; CHECK-NEXT: ld1b { z2.b }, p0/z, [x0, x13] ; CHECK-NEXT: ld1b { z3.b }, p0/z, [x0, x12] -; CHECK-NEXT: ld1b { z4.b }, p0/z, [x0, x13] -; CHECK-NEXT: ld1b { z5.b }, p0/z, [x0, x14] -; CHECK-NEXT: rdvl x15, #9 -; CHECK-NEXT: fcvtzs z0.d, p1/m, z0.d +; CHECK-NEXT: ld1b { z4.b }, p0/z, [x0, x10] +; CHECK-NEXT: ld1b { z5.b }, p0/z, [x0, x11] +; CHECK-NEXT: frintx z0.d, p1/m, z0.d +; CHECK-NEXT: frintx z1.d, p1/m, z1.d +; CHECK-NEXT: ld1b { z6.b }, p0/z, [x0, x9] ; CHECK-NEXT: rdvl x16, #8 -; CHECK-NEXT: ld1b { z6.b }, p0/z, [x0, x15] -; CHECK-NEXT: ld1b { z7.b }, p0/z, [x0, x16] +; CHECK-NEXT: frintx z2.d, p1/m, z2.d ; CHECK-NEXT: ld1d { z24.d }, p1/z, [x0, #7, mul vl] +; CHECK-NEXT: frintx z3.d, p1/m, z3.d +; CHECK-NEXT: frintx z4.d, p1/m, z4.d +; CHECK-NEXT: frintx z5.d, p1/m, z5.d +; CHECK-NEXT: frintx z6.d, p1/m, z6.d +; CHECK-NEXT: ld1b { z7.b }, p0/z, [x0, x16] ; CHECK-NEXT: ld1d { z25.d }, p1/z, [x0, #6, mul vl] -; CHECK-NEXT: ld1d { z26.d }, p1/z, [x0, #5, mul vl] +; CHECK-NEXT: fcvtzs z0.d, p1/m, z0.d ; CHECK-NEXT: fcvtzs z1.d, p1/m, z1.d +; CHECK-NEXT: ld1d { z26.d }, p1/z, [x0, #5, mul vl] ; CHECK-NEXT: ld1d { z27.d }, p1/z, [x0, #4, mul vl] ; CHECK-NEXT: ld1d { z28.d }, p1/z, [x0, #3, mul vl] -; CHECK-NEXT: fcvtzs z2.d, p1/m, z2.d ; CHECK-NEXT: ld1d { z29.d }, p1/z, [x0, #2, mul vl] ; CHECK-NEXT: ld1d { z30.d }, p1/z, [x0, #1, mul vl] -; CHECK-NEXT: fcvtzs z3.d, p1/m, z3.d +; CHECK-NEXT: fcvtzs z2.d, p1/m, z2.d ; CHECK-NEXT: ld1d { z31.d }, p1/z, [x0] +; CHECK-NEXT: frintx z7.d, p1/m, z7.d +; CHECK-NEXT: fcvtzs z3.d, p1/m, z3.d ; CHECK-NEXT: fcvtzs z4.d, p1/m, z4.d -; CHECK-NEXT: st1b { z0.b }, p0, [x8, x9] +; CHECK-NEXT: st1b { z0.b }, p0, [x8, x14] ; CHECK-NEXT: movprfx z0, z5 ; CHECK-NEXT: fcvtzs z0.d, p1/m, z5.d -; CHECK-NEXT: st1b { z1.b }, p0, [x8, x10] +; CHECK-NEXT: frintx z24.d, p1/m, z24.d +; CHECK-NEXT: st1b { z1.b }, p0, [x8, x15] ; CHECK-NEXT: movprfx z1, z6 ; CHECK-NEXT: fcvtzs z1.d, p1/m, z6.d -; CHECK-NEXT: st1b { z2.b }, p0, [x8, x11] +; CHECK-NEXT: movprfx z5, z25 +; CHECK-NEXT: frintx z5.d, p1/m, z25.d +; CHECK-NEXT: movprfx z6, z26 +; CHECK-NEXT: frintx z6.d, p1/m, z26.d +; CHECK-NEXT: st1b { z2.b }, p0, [x8, x13] ; CHECK-NEXT: movprfx z2, z7 ; CHECK-NEXT: fcvtzs z2.d, p1/m, z7.d +; CHECK-NEXT: movprfx z7, z27 +; CHECK-NEXT: frintx z7.d, p1/m, z27.d ; CHECK-NEXT: st1b { z3.b }, p0, [x8, x12] -; CHECK-NEXT: movprfx z3, z24 -; CHECK-NEXT: fcvtzs z3.d, p1/m, z24.d -; CHECK-NEXT: st1b { z4.b }, p0, [x8, x13] -; CHECK-NEXT: movprfx z4, z25 -; CHECK-NEXT: fcvtzs z4.d, p1/m, z25.d -; CHECK-NEXT: st1b { z0.b }, p0, [x8, x14] -; CHECK-NEXT: movprfx z0, z26 -; CHECK-NEXT: fcvtzs z0.d, p1/m, z26.d -; CHECK-NEXT: st1b { z1.b }, p0, [x8, x15] -; CHECK-NEXT: movprfx z1, z27 -; CHECK-NEXT: fcvtzs z1.d, p1/m, z27.d +; CHECK-NEXT: movprfx z3, z28 +; CHECK-NEXT: frintx z3.d, p1/m, z28.d +; CHECK-NEXT: st1b { z4.b }, p0, [x8, x10] +; CHECK-NEXT: movprfx z4, z29 +; CHECK-NEXT: frintx z4.d, p1/m, z29.d +; CHECK-NEXT: st1b { z0.b }, p0, [x8, x11] +; CHECK-NEXT: movprfx z0, z30 +; CHECK-NEXT: frintx z0.d, p1/m, z30.d +; CHECK-NEXT: fcvtzs z24.d, p1/m, z24.d +; CHECK-NEXT: st1b { z1.b }, p0, [x8, x9] +; CHECK-NEXT: movprfx z1, z31 +; CHECK-NEXT: frintx z1.d, p1/m, z31.d +; CHECK-NEXT: fcvtzs z5.d, p1/m, z5.d +; CHECK-NEXT: fcvtzs z6.d, p1/m, z6.d +; CHECK-NEXT: fcvtzs z7.d, p1/m, z7.d ; CHECK-NEXT: st1b { z2.b }, p0, [x8, x16] -; CHECK-NEXT: movprfx z2, z28 -; CHECK-NEXT: fcvtzs z2.d, p1/m, z28.d -; CHECK-NEXT: st1d { z3.d }, p1, [x8, #7, mul vl] -; CHECK-NEXT: movprfx z3, z29 -; CHECK-NEXT: fcvtzs z3.d, p1/m, z29.d -; CHECK-NEXT: st1d { z4.d }, p1, [x8, #6, mul vl] -; CHECK-NEXT: movprfx z4, z30 -; CHECK-NEXT: fcvtzs z4.d, p1/m, z30.d -; CHECK-NEXT: st1d { z0.d }, p1, [x8, #5, mul vl] -; CHECK-NEXT: movprfx z0, z31 -; CHECK-NEXT: fcvtzs z0.d, p1/m, z31.d -; CHECK-NEXT: st1d { z1.d }, p1, [x8, #4, mul vl] +; CHECK-NEXT: movprfx z2, z3 +; CHECK-NEXT: fcvtzs z2.d, p1/m, z3.d +; CHECK-NEXT: movprfx z3, z4 +; CHECK-NEXT: fcvtzs z3.d, p1/m, z4.d +; CHECK-NEXT: fcvtzs z0.d, p1/m, z0.d +; CHECK-NEXT: st1d { z24.d }, p1, [x8, #7, mul vl] +; CHECK-NEXT: fcvtzs z1.d, p1/m, z1.d +; CHECK-NEXT: st1d { z5.d }, p1, [x8, #6, mul vl] +; CHECK-NEXT: st1d { z6.d }, p1, [x8, #5, mul vl] +; CHECK-NEXT: st1d { z7.d }, p1, [x8, #4, mul vl] ; CHECK-NEXT: st1d { z2.d }, p1, [x8, #3, mul vl] ; CHECK-NEXT: st1d { z3.d }, p1, [x8, #2, mul vl] -; CHECK-NEXT: st1d { z4.d }, p1, [x8, #1, mul vl] -; CHECK-NEXT: st1d { z0.d }, p1, [x8] +; CHECK-NEXT: st1d { z0.d }, p1, [x8, #1, mul vl] +; CHECK-NEXT: st1d { z1.d }, p1, [x8] ; CHECK-NEXT: ret %a = call @llvm.lrint.nxv32i64.nxv16f64( %x) ret %a diff --git a/llvm/test/CodeGen/AArch64/vector-llrint.ll b/llvm/test/CodeGen/AArch64/vector-llrint.ll index d4d3fbb0e96b5..480d0c19db3aa 100644 --- a/llvm/test/CodeGen/AArch64/vector-llrint.ll +++ b/llvm/test/CodeGen/AArch64/vector-llrint.ll @@ -17,12 +17,12 @@ declare <1 x i64> @llvm.llrint.v1i64.v1f16(<1 x half>) define <2 x i64> @llrint_v1i64_v2f16(<2 x half> %x) { ; CHECK-LABEL: llrint_v1i64_v2f16: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 +; CHECK-NEXT: fcvtl v0.4s, v0.4h +; CHECK-NEXT: frintx v0.4s, v0.4s +; CHECK-NEXT: fcvtn v0.4h, v0.4s ; CHECK-NEXT: mov h1, v0.h[1] ; CHECK-NEXT: fcvt s0, h0 ; CHECK-NEXT: fcvt s1, h1 -; CHECK-NEXT: frintx s0, s0 -; CHECK-NEXT: frintx s1, s1 ; CHECK-NEXT: fcvtzs x8, s0 ; CHECK-NEXT: fcvtzs x9, s1 ; CHECK-NEXT: fmov d0, x8 @@ -37,22 +37,24 @@ define <4 x i64> @llrint_v4i64_v4f16(<4 x half> %x) { ; CHECK-LABEL: llrint_v4i64_v4f16: ; CHECK: // %bb.0: ; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-NEXT: mov h1, v0.h[2] +; CHECK-NEXT: dup v1.2s, v0.s[1] +; CHECK-NEXT: fcvtl v0.4s, v0.4h +; CHECK-NEXT: fcvtl v1.4s, v1.4h +; CHECK-NEXT: frintx v0.4s, v0.4s +; CHECK-NEXT: frintx v1.4s, v1.4s +; CHECK-NEXT: fcvtn v0.4h, v0.4s +; CHECK-NEXT: fcvtn v1.4h, v1.4s ; CHECK-NEXT: mov h2, v0.h[1] -; CHECK-NEXT: mov h3, v0.h[3] ; CHECK-NEXT: fcvt s0, h0 +; CHECK-NEXT: mov h3, v1.h[1] ; CHECK-NEXT: fcvt s1, h1 ; CHECK-NEXT: fcvt s2, h2 -; CHECK-NEXT: fcvt s3, h3 -; CHECK-NEXT: frintx s0, s0 -; CHECK-NEXT: frintx s1, s1 -; CHECK-NEXT: frintx s2, s2 -; CHECK-NEXT: frintx s3, s3 ; CHECK-NEXT: fcvtzs x8, s0 +; CHECK-NEXT: fcvt s3, h3 ; CHECK-NEXT: fcvtzs x9, s1 ; CHECK-NEXT: fcvtzs x10, s2 -; CHECK-NEXT: fcvtzs x11, s3 ; CHECK-NEXT: fmov d0, x8 +; CHECK-NEXT: fcvtzs x11, s3 ; CHECK-NEXT: fmov d1, x9 ; CHECK-NEXT: mov v0.d[1], x10 ; CHECK-NEXT: mov v1.d[1], x11 @@ -65,45 +67,48 @@ declare <4 x i64> @llvm.llrint.v4i64.v4f16(<4 x half>) define <8 x i64> @llrint_v8i64_v8f16(<8 x half> %x) { ; CHECK-LABEL: llrint_v8i64_v8f16: ; CHECK: // %bb.0: -; CHECK-NEXT: ext v1.16b, v0.16b, v0.16b, #8 -; CHECK-NEXT: mov h4, v0.h[2] -; CHECK-NEXT: mov h3, v0.h[1] -; CHECK-NEXT: mov h7, v0.h[3] +; CHECK-NEXT: dup v1.2s, v0.s[1] +; CHECK-NEXT: dup v2.2s, v0.s[3] +; CHECK-NEXT: fcvtl v3.4s, v0.4h +; CHECK-NEXT: fcvtl2 v0.4s, v0.8h +; CHECK-NEXT: fcvtl v1.4s, v1.4h +; CHECK-NEXT: fcvtl v2.4s, v2.4h +; CHECK-NEXT: frintx v3.4s, v3.4s +; CHECK-NEXT: frintx v0.4s, v0.4s +; CHECK-NEXT: frintx v1.4s, v1.4s +; CHECK-NEXT: frintx v2.4s, v2.4s +; CHECK-NEXT: fcvtn v3.4h, v3.4s +; CHECK-NEXT: fcvtn v0.4h, v0.4s +; CHECK-NEXT: fcvtn v1.4h, v1.4s +; CHECK-NEXT: fcvtn v2.4h, v2.4s +; CHECK-NEXT: mov h4, v3.h[1] +; CHECK-NEXT: mov h5, v0.h[1] ; CHECK-NEXT: fcvt s0, h0 -; CHECK-NEXT: mov h2, v1.h[2] -; CHECK-NEXT: mov h5, v1.h[1] -; CHECK-NEXT: mov h6, v1.h[3] -; CHECK-NEXT: fcvt s1, h1 -; CHECK-NEXT: fcvt s4, h4 ; CHECK-NEXT: fcvt s3, h3 -; CHECK-NEXT: fcvt s7, h7 -; CHECK-NEXT: frintx s0, s0 +; CHECK-NEXT: mov h6, v1.h[1] +; CHECK-NEXT: mov h7, v2.h[1] +; CHECK-NEXT: fcvt s1, h1 ; CHECK-NEXT: fcvt s2, h2 +; CHECK-NEXT: fcvt s4, h4 ; CHECK-NEXT: fcvt s5, h5 +; CHECK-NEXT: fcvtzs x8, s0 +; CHECK-NEXT: fcvtzs x9, s3 ; CHECK-NEXT: fcvt s6, h6 -; CHECK-NEXT: frintx s1, s1 -; CHECK-NEXT: frintx s4, s4 -; CHECK-NEXT: frintx s3, s3 -; CHECK-NEXT: frintx s7, s7 -; CHECK-NEXT: fcvtzs x9, s0 -; CHECK-NEXT: frintx s2, s2 -; CHECK-NEXT: frintx s5, s5 -; CHECK-NEXT: frintx s6, s6 -; CHECK-NEXT: fcvtzs x8, s1 -; CHECK-NEXT: fcvtzs x12, s4 -; CHECK-NEXT: fcvtzs x11, s3 -; CHECK-NEXT: fcvtzs x15, s7 -; CHECK-NEXT: fmov d0, x9 -; CHECK-NEXT: fcvtzs x10, s2 +; CHECK-NEXT: fcvt s7, h7 +; CHECK-NEXT: fcvtzs x11, s1 +; CHECK-NEXT: fcvtzs x12, s2 +; CHECK-NEXT: fcvtzs x10, s4 ; CHECK-NEXT: fcvtzs x13, s5 -; CHECK-NEXT: fcvtzs x14, s6 ; CHECK-NEXT: fmov d2, x8 -; CHECK-NEXT: fmov d1, x12 -; CHECK-NEXT: mov v0.d[1], x11 -; CHECK-NEXT: fmov d3, x10 +; CHECK-NEXT: fmov d0, x9 +; CHECK-NEXT: fcvtzs x14, s6 +; CHECK-NEXT: fcvtzs x15, s7 +; CHECK-NEXT: fmov d1, x11 +; CHECK-NEXT: fmov d3, x12 +; CHECK-NEXT: mov v0.d[1], x10 ; CHECK-NEXT: mov v2.d[1], x13 -; CHECK-NEXT: mov v1.d[1], x15 -; CHECK-NEXT: mov v3.d[1], x14 +; CHECK-NEXT: mov v1.d[1], x14 +; CHECK-NEXT: mov v3.d[1], x15 ; CHECK-NEXT: ret %a = call <8 x i64> @llvm.llrint.v8i64.v8f16(<8 x half> %x) ret <8 x i64> %a @@ -113,84 +118,90 @@ declare <8 x i64> @llvm.llrint.v8i64.v8f16(<8 x half>) define <16 x i64> @llrint_v16i64_v16f16(<16 x half> %x) { ; CHECK-LABEL: llrint_v16i64_v16f16: ; CHECK: // %bb.0: -; CHECK-NEXT: ext v2.16b, v0.16b, v0.16b, #8 -; CHECK-NEXT: ext v3.16b, v1.16b, v1.16b, #8 -; CHECK-NEXT: mov h17, v0.h[1] -; CHECK-NEXT: mov h19, v0.h[2] -; CHECK-NEXT: fcvt s18, h0 -; CHECK-NEXT: mov h0, v0.h[3] -; CHECK-NEXT: mov h4, v2.h[1] -; CHECK-NEXT: mov h5, v2.h[2] -; CHECK-NEXT: fcvt s7, h3 -; CHECK-NEXT: fcvt s6, h2 -; CHECK-NEXT: mov h16, v3.h[2] -; CHECK-NEXT: mov h2, v2.h[3] -; CHECK-NEXT: fcvt s17, h17 -; CHECK-NEXT: fcvt s19, h19 -; CHECK-NEXT: frintx s18, s18 -; CHECK-NEXT: fcvt s0, h0 +; CHECK-NEXT: fcvtl2 v4.4s, v0.8h +; CHECK-NEXT: fcvtl2 v2.4s, v1.8h +; CHECK-NEXT: fcvtl v3.4s, v0.4h +; CHECK-NEXT: dup v5.2s, v0.s[1] +; CHECK-NEXT: dup v0.2s, v0.s[3] +; CHECK-NEXT: dup v6.2s, v1.s[1] +; CHECK-NEXT: dup v7.2s, v1.s[3] +; CHECK-NEXT: fcvtl v1.4s, v1.4h +; CHECK-NEXT: frintx v4.4s, v4.4s +; CHECK-NEXT: frintx v2.4s, v2.4s +; CHECK-NEXT: frintx v3.4s, v3.4s +; CHECK-NEXT: fcvtl v5.4s, v5.4h +; CHECK-NEXT: fcvtl v0.4s, v0.4h +; CHECK-NEXT: fcvtl v6.4s, v6.4h +; CHECK-NEXT: fcvtl v7.4s, v7.4h +; CHECK-NEXT: frintx v1.4s, v1.4s +; CHECK-NEXT: fcvtn v4.4h, v4.4s +; CHECK-NEXT: fcvtn v2.4h, v2.4s +; CHECK-NEXT: fcvtn v3.4h, v3.4s +; CHECK-NEXT: frintx v5.4s, v5.4s +; CHECK-NEXT: frintx v0.4s, v0.4s +; CHECK-NEXT: frintx v6.4s, v6.4s +; CHECK-NEXT: frintx v7.4s, v7.4s +; CHECK-NEXT: fcvtn v1.4h, v1.4s +; CHECK-NEXT: mov h16, v4.h[1] ; CHECK-NEXT: fcvt s4, h4 -; CHECK-NEXT: fcvt s5, h5 -; CHECK-NEXT: frintx s7, s7 -; CHECK-NEXT: frintx s6, s6 +; CHECK-NEXT: fcvt s17, h2 +; CHECK-NEXT: mov h18, v3.h[1] +; CHECK-NEXT: fcvtn v5.4h, v5.4s +; CHECK-NEXT: fcvt s3, h3 +; CHECK-NEXT: fcvtn v0.4h, v0.4s +; CHECK-NEXT: fcvtn v6.4h, v6.4s +; CHECK-NEXT: fcvtn v7.4h, v7.4s +; CHECK-NEXT: mov h2, v2.h[1] ; CHECK-NEXT: fcvt s16, h16 -; CHECK-NEXT: fcvt s2, h2 -; CHECK-NEXT: frintx s17, s17 -; CHECK-NEXT: frintx s19, s19 -; CHECK-NEXT: fcvtzs x13, s18 -; CHECK-NEXT: frintx s0, s0 -; CHECK-NEXT: frintx s4, s4 -; CHECK-NEXT: frintx s5, s5 -; CHECK-NEXT: fcvtzs x9, s7 -; CHECK-NEXT: mov h7, v1.h[2] -; CHECK-NEXT: fcvtzs x8, s6 -; CHECK-NEXT: mov h6, v1.h[1] -; CHECK-NEXT: frintx s16, s16 -; CHECK-NEXT: fcvtzs x14, s17 -; CHECK-NEXT: fcvtzs x15, s19 -; CHECK-NEXT: fcvtzs x10, s4 -; CHECK-NEXT: mov h4, v3.h[1] -; CHECK-NEXT: fcvtzs x11, s5 -; CHECK-NEXT: mov h5, v1.h[3] -; CHECK-NEXT: mov h3, v3.h[3] +; CHECK-NEXT: fcvtzs x8, s4 +; CHECK-NEXT: fcvtzs x9, s17 +; CHECK-NEXT: fcvt s4, h18 +; CHECK-NEXT: fcvt s17, h5 +; CHECK-NEXT: fcvtzs x10, s3 +; CHECK-NEXT: mov h3, v5.h[1] +; CHECK-NEXT: fcvt s5, h0 +; CHECK-NEXT: mov h0, v0.h[1] +; CHECK-NEXT: mov h18, v6.h[1] +; CHECK-NEXT: mov h19, v7.h[1] +; CHECK-NEXT: fcvtzs x11, s16 +; CHECK-NEXT: mov h16, v1.h[1] ; CHECK-NEXT: fcvt s1, h1 -; CHECK-NEXT: fcvt s7, h7 -; CHECK-NEXT: fcvt s6, h6 -; CHECK-NEXT: fcvtzs x12, s16 -; CHECK-NEXT: frintx s16, s2 -; CHECK-NEXT: fmov d2, x8 -; CHECK-NEXT: fcvt s4, h4 +; CHECK-NEXT: fcvtzs x12, s4 +; CHECK-NEXT: fcvt s4, h6 +; CHECK-NEXT: fcvtzs x13, s17 +; CHECK-NEXT: fcvtzs x14, s5 +; CHECK-NEXT: fcvt s5, h7 ; CHECK-NEXT: fcvt s3, h3 -; CHECK-NEXT: fcvt s5, h5 -; CHECK-NEXT: frintx s1, s1 -; CHECK-NEXT: frintx s7, s7 -; CHECK-NEXT: frintx s17, s6 +; CHECK-NEXT: fcvt s7, h2 +; CHECK-NEXT: fcvt s17, h0 +; CHECK-NEXT: fcvt s18, h18 +; CHECK-NEXT: fcvt s16, h16 +; CHECK-NEXT: fcvt s19, h19 +; CHECK-NEXT: fcvtzs x15, s1 +; CHECK-NEXT: fmov d2, x8 +; CHECK-NEXT: fcvtzs x8, s4 +; CHECK-NEXT: fmov d0, x10 +; CHECK-NEXT: fcvtzs x10, s5 ; CHECK-NEXT: fmov d6, x9 -; CHECK-NEXT: mov v2.d[1], x10 -; CHECK-NEXT: frintx s4, s4 -; CHECK-NEXT: frintx s18, s3 -; CHECK-NEXT: frintx s5, s5 -; CHECK-NEXT: fcvtzs x8, s1 -; CHECK-NEXT: fcvtzs x9, s7 -; CHECK-NEXT: fmov d3, x11 -; CHECK-NEXT: fcvtzs x11, s0 -; CHECK-NEXT: fmov d7, x12 -; CHECK-NEXT: fcvtzs x12, s16 -; CHECK-NEXT: fcvtzs x16, s17 -; CHECK-NEXT: fcvtzs x17, s4 -; CHECK-NEXT: fmov d0, x13 -; CHECK-NEXT: fmov d1, x15 +; CHECK-NEXT: fcvtzs x9, s3 +; CHECK-NEXT: fmov d1, x13 +; CHECK-NEXT: fcvtzs x13, s17 +; CHECK-NEXT: fcvtzs x17, s7 +; CHECK-NEXT: fcvtzs x16, s16 ; CHECK-NEXT: fcvtzs x18, s18 -; CHECK-NEXT: fcvtzs x0, s5 -; CHECK-NEXT: fmov d4, x8 -; CHECK-NEXT: fmov d5, x9 -; CHECK-NEXT: mov v0.d[1], x14 -; CHECK-NEXT: mov v1.d[1], x11 -; CHECK-NEXT: mov v3.d[1], x12 -; CHECK-NEXT: mov v4.d[1], x16 +; CHECK-NEXT: fcvtzs x0, s19 +; CHECK-NEXT: fmov d3, x14 +; CHECK-NEXT: fmov d4, x15 +; CHECK-NEXT: fmov d5, x8 +; CHECK-NEXT: fmov d7, x10 +; CHECK-NEXT: mov v0.d[1], x12 +; CHECK-NEXT: mov v1.d[1], x9 +; CHECK-NEXT: mov v2.d[1], x11 ; CHECK-NEXT: mov v6.d[1], x17 -; CHECK-NEXT: mov v7.d[1], x18 -; CHECK-NEXT: mov v5.d[1], x0 +; CHECK-NEXT: mov v3.d[1], x13 +; CHECK-NEXT: mov v4.d[1], x16 +; CHECK-NEXT: mov v5.d[1], x18 +; CHECK-NEXT: mov v7.d[1], x0 ; CHECK-NEXT: ret %a = call <16 x i64> @llvm.llrint.v16i64.v16f16(<16 x half> %x) ret <16 x i64> %a @@ -200,170 +211,182 @@ declare <16 x i64> @llvm.llrint.v16i64.v16f16(<16 x half>) define <32 x i64> @llrint_v32i64_v32f16(<32 x half> %x) { ; CHECK-LABEL: llrint_v32i64_v32f16: ; CHECK: // %bb.0: -; CHECK-NEXT: ext v4.16b, v1.16b, v1.16b, #8 -; CHECK-NEXT: ext v5.16b, v2.16b, v2.16b, #8 -; CHECK-NEXT: ext v6.16b, v3.16b, v3.16b, #8 -; CHECK-NEXT: ext v7.16b, v0.16b, v0.16b, #8 -; CHECK-NEXT: mov h19, v0.h[1] -; CHECK-NEXT: fcvt s21, h0 -; CHECK-NEXT: mov h23, v1.h[2] -; CHECK-NEXT: fcvt s22, h1 -; CHECK-NEXT: fcvt s26, h2 -; CHECK-NEXT: mov h27, v2.h[1] -; CHECK-NEXT: mov h28, v2.h[2] -; CHECK-NEXT: mov h16, v4.h[2] -; CHECK-NEXT: fcvt s17, h5 -; CHECK-NEXT: mov h18, v5.h[2] -; CHECK-NEXT: mov h20, v6.h[2] -; CHECK-NEXT: fcvt s24, h7 -; CHECK-NEXT: fcvt s25, h6 -; CHECK-NEXT: fcvt s19, h19 -; CHECK-NEXT: frintx s22, s22 -; CHECK-NEXT: fcvt s16, h16 -; CHECK-NEXT: frintx s17, s17 -; CHECK-NEXT: fcvt s18, h18 -; CHECK-NEXT: fcvt s20, h20 -; CHECK-NEXT: frintx s16, s16 -; CHECK-NEXT: fcvtzs x12, s17 -; CHECK-NEXT: frintx s17, s18 -; CHECK-NEXT: frintx s18, s21 -; CHECK-NEXT: fcvt s21, h23 -; CHECK-NEXT: frintx s23, s24 -; CHECK-NEXT: frintx s24, s25 -; CHECK-NEXT: frintx s25, s19 -; CHECK-NEXT: mov h19, v7.h[1] -; CHECK-NEXT: fcvtzs x13, s16 -; CHECK-NEXT: frintx s16, s20 -; CHECK-NEXT: frintx s20, s26 -; CHECK-NEXT: fcvtzs x9, s23 -; CHECK-NEXT: mov h23, v3.h[2] -; CHECK-NEXT: fcvt s26, h27 -; CHECK-NEXT: fcvtzs x15, s24 -; CHECK-NEXT: fcvtzs x10, s25 -; CHECK-NEXT: fcvt s24, h28 -; CHECK-NEXT: mov h25, v3.h[3] -; CHECK-NEXT: fcvtzs x14, s17 -; CHECK-NEXT: frintx s21, s21 -; CHECK-NEXT: fmov d17, x12 -; CHECK-NEXT: fcvtzs x12, s16 -; CHECK-NEXT: fmov d16, x13 +; CHECK-NEXT: dup v4.2s, v1.s[1] +; CHECK-NEXT: fcvtl v5.4s, v0.4h +; CHECK-NEXT: dup v6.2s, v1.s[3] +; CHECK-NEXT: fcvtl v7.4s, v1.4h +; CHECK-NEXT: dup v16.2s, v2.s[3] +; CHECK-NEXT: fcvtl v17.4s, v2.4h +; CHECK-NEXT: dup v19.2s, v2.s[1] +; CHECK-NEXT: dup v18.2s, v0.s[1] +; CHECK-NEXT: dup v21.2s, v3.s[1] +; CHECK-NEXT: dup v24.2s, v3.s[3] +; CHECK-NEXT: fcvtl2 v1.4s, v1.8h +; CHECK-NEXT: fcvtl2 v2.4s, v2.8h +; CHECK-NEXT: fcvtl v4.4s, v4.4h +; CHECK-NEXT: frintx v5.4s, v5.4s +; CHECK-NEXT: fcvtl v6.4s, v6.4h +; CHECK-NEXT: frintx v7.4s, v7.4s +; CHECK-NEXT: fcvtl v16.4s, v16.4h +; CHECK-NEXT: frintx v22.4s, v17.4s +; CHECK-NEXT: fcvtl v19.4s, v19.4h +; CHECK-NEXT: dup v17.2s, v0.s[3] +; CHECK-NEXT: fcvtl v21.4s, v21.4h +; CHECK-NEXT: fcvtl v24.4s, v24.4h +; CHECK-NEXT: frintx v1.4s, v1.4s +; CHECK-NEXT: frintx v2.4s, v2.4s +; CHECK-NEXT: frintx v20.4s, v4.4s +; CHECK-NEXT: fcvtn v4.4h, v5.4s +; CHECK-NEXT: frintx v23.4s, v6.4s +; CHECK-NEXT: fcvtn v5.4h, v7.4s +; CHECK-NEXT: frintx v25.4s, v16.4s +; CHECK-NEXT: fcvtn v16.4h, v22.4s +; CHECK-NEXT: frintx v26.4s, v19.4s +; CHECK-NEXT: fcvtn v6.4h, v20.4s +; CHECK-NEXT: fcvtl v20.4s, v3.4h +; CHECK-NEXT: fcvt s22, h4 +; CHECK-NEXT: fcvtn v7.4h, v23.4s +; CHECK-NEXT: fcvtl2 v23.4s, v3.8h +; CHECK-NEXT: fcvtl v3.4s, v18.4h +; CHECK-NEXT: fcvtn v25.4h, v25.4s +; CHECK-NEXT: fcvt s27, h5 +; CHECK-NEXT: fcvtl v18.4s, v17.4h +; CHECK-NEXT: frintx v17.4s, v21.4s +; CHECK-NEXT: fcvt s29, h16 +; CHECK-NEXT: mov h16, v16.h[1] +; CHECK-NEXT: frintx v20.4s, v20.4s +; CHECK-NEXT: fcvtzs x9, s22 +; CHECK-NEXT: fcvt s28, h6 +; CHECK-NEXT: fcvt s22, h7 +; CHECK-NEXT: frintx v19.4s, v3.4s +; CHECK-NEXT: fcvtn v3.4h, v26.4s +; CHECK-NEXT: mov h21, v25.h[1] +; CHECK-NEXT: frintx v23.4s, v23.4s +; CHECK-NEXT: fcvtzs x10, s27 +; CHECK-NEXT: fcvtl2 v26.4s, v0.8h +; CHECK-NEXT: fcvt s25, h25 +; CHECK-NEXT: fcvtn v17.4h, v17.4s +; CHECK-NEXT: fcvtn v20.4h, v20.4s +; CHECK-NEXT: fcvtzs x12, s28 +; CHECK-NEXT: fcvtzs x14, s29 ; CHECK-NEXT: fcvtzs x13, s22 -; CHECK-NEXT: fcvt s22, h3 -; CHECK-NEXT: mov h3, v3.h[1] -; CHECK-NEXT: mov h27, v0.h[2] -; CHECK-NEXT: mov h28, v2.h[3] +; CHECK-NEXT: frintx v22.4s, v24.4s +; CHECK-NEXT: fcvt s24, h3 +; CHECK-NEXT: fcvt s21, h21 +; CHECK-NEXT: fcvtn v23.4h, v23.4s +; CHECK-NEXT: fmov d0, x10 +; CHECK-NEXT: fcvtzs x15, s25 +; CHECK-NEXT: mov h25, v17.h[1] +; CHECK-NEXT: fcvt s17, h17 +; CHECK-NEXT: mov h27, v20.h[1] +; CHECK-NEXT: fcvt s20, h20 +; CHECK-NEXT: fcvtn v28.4h, v2.4s +; CHECK-NEXT: fcvtn v22.4h, v22.4s +; CHECK-NEXT: fcvtzs x10, s24 +; CHECK-NEXT: frintx v24.4s, v26.4s +; CHECK-NEXT: fcvtzs x11, s21 +; CHECK-NEXT: mov h26, v23.h[1] ; CHECK-NEXT: fcvt s23, h23 -; CHECK-NEXT: frintx s26, s26 +; CHECK-NEXT: fcvt s25, h25 +; CHECK-NEXT: fmov d2, x13 +; CHECK-NEXT: fcvtzs x13, s17 +; CHECK-NEXT: fcvt s21, h27 ; CHECK-NEXT: fcvtzs x16, s20 -; CHECK-NEXT: frintx s20, s24 -; CHECK-NEXT: fcvt s24, h25 -; CHECK-NEXT: fcvtzs x11, s18 -; CHECK-NEXT: fmov d18, x14 +; CHECK-NEXT: fcvtn v27.4h, v1.4s +; CHECK-NEXT: mov h20, v22.h[1] +; CHECK-NEXT: fcvt s22, h22 +; CHECK-NEXT: fcvtn v24.4h, v24.4s +; CHECK-NEXT: fmov d1, x12 +; CHECK-NEXT: fcvtzs x0, s23 +; CHECK-NEXT: fmov d17, x14 +; CHECK-NEXT: fcvtzs x18, s25 +; CHECK-NEXT: mov h25, v28.h[1] +; CHECK-NEXT: fcvt s23, h28 +; CHECK-NEXT: fcvtzs x12, s21 +; CHECK-NEXT: fcvt s21, h26 +; CHECK-NEXT: fcvt s26, h27 +; CHECK-NEXT: fcvt s20, h20 +; CHECK-NEXT: fcvtzs x17, s22 +; CHECK-NEXT: fcvt s22, h24 +; CHECK-NEXT: frintx v18.4s, v18.4s +; CHECK-NEXT: mov h3, v3.h[1] +; CHECK-NEXT: mov h7, v7.h[1] +; CHECK-NEXT: fcvt s25, h25 +; CHECK-NEXT: fcvtn v19.4h, v19.4s +; CHECK-NEXT: fcvt s16, h16 ; CHECK-NEXT: fcvtzs x14, s21 -; CHECK-NEXT: frintx s22, s22 -; CHECK-NEXT: fcvt s3, h3 -; CHECK-NEXT: fcvt s25, h27 -; CHECK-NEXT: fcvt s27, h28 -; CHECK-NEXT: frintx s23, s23 -; CHECK-NEXT: mov h21, v1.h[3] -; CHECK-NEXT: fmov d2, x15 -; CHECK-NEXT: fcvtzs x15, s26 -; CHECK-NEXT: fmov d26, x13 -; CHECK-NEXT: mov h1, v1.h[1] -; CHECK-NEXT: fcvtzs x13, s20 -; CHECK-NEXT: frintx s20, s24 -; CHECK-NEXT: fmov d24, x14 -; CHECK-NEXT: fcvtzs x14, s22 -; CHECK-NEXT: frintx s3, s3 -; CHECK-NEXT: fmov d22, x16 -; CHECK-NEXT: frintx s27, s27 -; CHECK-NEXT: fcvtzs x16, s23 -; CHECK-NEXT: fcvt s21, h21 -; CHECK-NEXT: frintx s25, s25 -; CHECK-NEXT: fcvt s1, h1 -; CHECK-NEXT: mov h0, v0.h[3] -; CHECK-NEXT: mov h23, v7.h[2] -; CHECK-NEXT: mov v22.d[1], x15 +; CHECK-NEXT: fmov d21, x15 +; CHECK-NEXT: mov h5, v5.h[1] ; CHECK-NEXT: fcvtzs x15, s20 -; CHECK-NEXT: fmov d20, x13 -; CHECK-NEXT: fcvtzs x13, s3 -; CHECK-NEXT: fmov d3, x14 -; CHECK-NEXT: fcvtzs x14, s27 -; CHECK-NEXT: fmov d27, x16 -; CHECK-NEXT: frintx s21, s21 -; CHECK-NEXT: mov h7, v7.h[3] -; CHECK-NEXT: frintx s1, s1 -; CHECK-NEXT: fcvt s0, h0 -; CHECK-NEXT: fcvt s23, h23 -; CHECK-NEXT: fcvt s19, h19 -; CHECK-NEXT: mov v27.d[1], x15 -; CHECK-NEXT: fcvtzs x15, s25 -; CHECK-NEXT: mov h25, v6.h[3] +; CHECK-NEXT: fmov d20, x16 +; CHECK-NEXT: fcvtzs x16, s22 +; CHECK-NEXT: fmov d22, x17 +; CHECK-NEXT: fcvtzs x17, s26 +; CHECK-NEXT: fmov d26, x0 +; CHECK-NEXT: fcvtn v18.4h, v18.4s ; CHECK-NEXT: mov h6, v6.h[1] -; CHECK-NEXT: mov v3.d[1], x13 -; CHECK-NEXT: fcvtzs x13, s21 -; CHECK-NEXT: mov h21, v5.h[1] -; CHECK-NEXT: mov h5, v5.h[3] -; CHECK-NEXT: mov v20.d[1], x14 -; CHECK-NEXT: fcvtzs x14, s1 -; CHECK-NEXT: mov h1, v4.h[1] -; CHECK-NEXT: frintx s0, s0 -; CHECK-NEXT: fcvt s25, h25 +; CHECK-NEXT: fcvt s3, h3 +; CHECK-NEXT: mov v20.d[1], x12 +; CHECK-NEXT: fcvtzs x12, s25 ; CHECK-NEXT: fcvt s7, h7 -; CHECK-NEXT: stp q3, q27, [x8, #192] +; CHECK-NEXT: mov v26.d[1], x14 +; CHECK-NEXT: mov v22.d[1], x15 +; CHECK-NEXT: fcvtzs x14, s23 +; CHECK-NEXT: fmov d23, x13 +; CHECK-NEXT: mov v21.d[1], x11 +; CHECK-NEXT: mov h4, v4.h[1] +; CHECK-NEXT: mov h25, v19.h[1] ; CHECK-NEXT: fcvt s6, h6 -; CHECK-NEXT: mov h3, v4.h[3] -; CHECK-NEXT: stp q22, q20, [x8, #128] -; CHECK-NEXT: fcvt s21, h21 +; CHECK-NEXT: fcvtzs x11, s3 ; CHECK-NEXT: fcvt s5, h5 -; CHECK-NEXT: mov v24.d[1], x13 -; CHECK-NEXT: mov v26.d[1], x14 +; CHECK-NEXT: fcvt s19, h19 +; CHECK-NEXT: fcvtzs x13, s7 +; CHECK-NEXT: stp q26, q22, [x8, #224] +; CHECK-NEXT: mov v23.d[1], x18 +; CHECK-NEXT: mov h26, v27.h[1] +; CHECK-NEXT: fmov d22, x14 ; CHECK-NEXT: fcvt s4, h4 -; CHECK-NEXT: frintx s22, s25 -; CHECK-NEXT: fmov d20, x12 -; CHECK-NEXT: fcvt s1, h1 -; CHECK-NEXT: frintx s6, s6 -; CHECK-NEXT: fcvt s3, h3 -; CHECK-NEXT: fcvtzs x12, s0 -; CHECK-NEXT: frintx s5, s5 -; CHECK-NEXT: frintx s21, s21 -; CHECK-NEXT: fmov d0, x11 -; CHECK-NEXT: stp q26, q24, [x8, #64] -; CHECK-NEXT: fmov d24, x15 -; CHECK-NEXT: frintx s4, s4 -; CHECK-NEXT: fcvtzs x11, s22 -; CHECK-NEXT: frintx s22, s23 -; CHECK-NEXT: frintx s1, s1 -; CHECK-NEXT: fcvtzs x13, s6 -; CHECK-NEXT: frintx s3, s3 -; CHECK-NEXT: frintx s6, s7 -; CHECK-NEXT: fcvtzs x14, s5 -; CHECK-NEXT: mov v24.d[1], x12 -; CHECK-NEXT: frintx s5, s19 -; CHECK-NEXT: fcvtzs x12, s21 -; CHECK-NEXT: mov v0.d[1], x10 -; CHECK-NEXT: fcvtzs x10, s4 -; CHECK-NEXT: mov v20.d[1], x11 -; CHECK-NEXT: fcvtzs x11, s22 -; CHECK-NEXT: mov v2.d[1], x13 -; CHECK-NEXT: fcvtzs x15, s3 -; CHECK-NEXT: fcvtzs x13, s1 -; CHECK-NEXT: mov v18.d[1], x14 +; CHECK-NEXT: fmov d3, x16 +; CHECK-NEXT: fcvt s7, h25 ; CHECK-NEXT: fcvtzs x14, s6 -; CHECK-NEXT: stp q0, q24, [x8] -; CHECK-NEXT: mov v17.d[1], x12 -; CHECK-NEXT: fcvtzs x12, s5 -; CHECK-NEXT: fmov d0, x10 -; CHECK-NEXT: fmov d1, x11 -; CHECK-NEXT: stp q2, q20, [x8, #224] -; CHECK-NEXT: fmov d2, x9 -; CHECK-NEXT: mov v16.d[1], x15 -; CHECK-NEXT: stp q17, q18, [x8, #160] -; CHECK-NEXT: mov v0.d[1], x13 +; CHECK-NEXT: mov v2.d[1], x13 +; CHECK-NEXT: stp q20, q23, [x8, #192] +; CHECK-NEXT: fcvt s23, h26 +; CHECK-NEXT: mov v22.d[1], x12 +; CHECK-NEXT: fmov d20, x10 +; CHECK-NEXT: fcvtzs x10, s16 +; CHECK-NEXT: mov h16, v24.h[1] +; CHECK-NEXT: mov h24, v18.h[1] +; CHECK-NEXT: fcvt s18, h18 ; CHECK-NEXT: mov v1.d[1], x14 -; CHECK-NEXT: mov v2.d[1], x12 -; CHECK-NEXT: stp q0, q16, [x8, #96] -; CHECK-NEXT: stp q2, q1, [x8, #32] +; CHECK-NEXT: fcvtzs x14, s7 +; CHECK-NEXT: stp q22, q21, [x8, #160] +; CHECK-NEXT: fcvtzs x12, s23 +; CHECK-NEXT: fmov d21, x17 +; CHECK-NEXT: fcvt s16, h16 +; CHECK-NEXT: mov v20.d[1], x11 +; CHECK-NEXT: fcvtzs x11, s5 +; CHECK-NEXT: fcvt s22, h24 +; CHECK-NEXT: mov v17.d[1], x10 +; CHECK-NEXT: fcvtzs x10, s18 +; CHECK-NEXT: mov v21.d[1], x12 +; CHECK-NEXT: fcvtzs x12, s19 +; CHECK-NEXT: fcvtzs x15, s16 +; CHECK-NEXT: mov v0.d[1], x11 +; CHECK-NEXT: fcvtzs x11, s4 +; CHECK-NEXT: stp q17, q20, [x8, #128] +; CHECK-NEXT: fcvtzs x13, s22 +; CHECK-NEXT: fmov d4, x10 +; CHECK-NEXT: stp q21, q2, [x8, #96] +; CHECK-NEXT: fmov d5, x12 +; CHECK-NEXT: fmov d2, x9 +; CHECK-NEXT: stp q0, q1, [x8, #64] +; CHECK-NEXT: mov v3.d[1], x15 +; CHECK-NEXT: mov v4.d[1], x13 +; CHECK-NEXT: mov v5.d[1], x14 +; CHECK-NEXT: mov v2.d[1], x11 +; CHECK-NEXT: stp q3, q4, [x8, #32] +; CHECK-NEXT: stp q2, q5, [x8] ; CHECK-NEXT: ret %a = call <32 x i64> @llvm.llrint.v32i64.v32f16(<32 x half> %x) ret <32 x i64> %a @@ -373,10 +396,10 @@ declare <32 x i64> @llvm.llrint.v32i64.v32f16(<32 x half>) define <1 x i64> @llrint_v1i64_v1f32(<1 x float> %x) { ; CHECK-LABEL: llrint_v1i64_v1f32: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-NEXT: frintx s0, s0 -; CHECK-NEXT: fcvtzs x8, s0 -; CHECK-NEXT: fmov d0, x8 +; CHECK-NEXT: frintx v0.2s, v0.2s +; CHECK-NEXT: fcvtl v0.2d, v0.2s +; CHECK-NEXT: fcvtzs v0.2d, v0.2d +; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0 ; CHECK-NEXT: ret %a = call <1 x i64> @llvm.llrint.v1i64.v1f32(<1 x float> %x) ret <1 x i64> %a @@ -386,14 +409,9 @@ declare <1 x i64> @llvm.llrint.v1i64.v1f32(<1 x float>) define <2 x i64> @llrint_v2i64_v2f32(<2 x float> %x) { ; CHECK-LABEL: llrint_v2i64_v2f32: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-NEXT: mov s1, v0.s[1] -; CHECK-NEXT: frintx s0, s0 -; CHECK-NEXT: frintx s1, s1 -; CHECK-NEXT: fcvtzs x8, s0 -; CHECK-NEXT: fcvtzs x9, s1 -; CHECK-NEXT: fmov d0, x8 -; CHECK-NEXT: mov v0.d[1], x9 +; CHECK-NEXT: frintx v0.2s, v0.2s +; CHECK-NEXT: fcvtl v0.2d, v0.2s +; CHECK-NEXT: fcvtzs v0.2d, v0.2d ; CHECK-NEXT: ret %a = call <2 x i64> @llvm.llrint.v2i64.v2f32(<2 x float> %x) ret <2 x i64> %a @@ -404,20 +422,12 @@ define <4 x i64> @llrint_v4i64_v4f32(<4 x float> %x) { ; CHECK-LABEL: llrint_v4i64_v4f32: ; CHECK: // %bb.0: ; CHECK-NEXT: ext v1.16b, v0.16b, v0.16b, #8 -; CHECK-NEXT: mov s3, v0.s[1] -; CHECK-NEXT: frintx s0, s0 -; CHECK-NEXT: mov s2, v1.s[1] -; CHECK-NEXT: frintx s1, s1 -; CHECK-NEXT: frintx s3, s3 -; CHECK-NEXT: fcvtzs x9, s0 -; CHECK-NEXT: frintx s2, s2 -; CHECK-NEXT: fcvtzs x8, s1 -; CHECK-NEXT: fcvtzs x11, s3 -; CHECK-NEXT: fmov d0, x9 -; CHECK-NEXT: fcvtzs x10, s2 -; CHECK-NEXT: fmov d1, x8 -; CHECK-NEXT: mov v0.d[1], x11 -; CHECK-NEXT: mov v1.d[1], x10 +; CHECK-NEXT: frintx v0.2s, v0.2s +; CHECK-NEXT: frintx v1.2s, v1.2s +; CHECK-NEXT: fcvtl v0.2d, v0.2s +; CHECK-NEXT: fcvtl v1.2d, v1.2s +; CHECK-NEXT: fcvtzs v0.2d, v0.2d +; CHECK-NEXT: fcvtzs v1.2d, v1.2d ; CHECK-NEXT: ret %a = call <4 x i64> @llvm.llrint.v4i64.v4f32(<4 x float> %x) ret <4 x i64> %a @@ -429,34 +439,18 @@ define <8 x i64> @llrint_v8i64_v8f32(<8 x float> %x) { ; CHECK: // %bb.0: ; CHECK-NEXT: ext v2.16b, v0.16b, v0.16b, #8 ; CHECK-NEXT: ext v3.16b, v1.16b, v1.16b, #8 -; CHECK-NEXT: mov s4, v0.s[1] -; CHECK-NEXT: mov s7, v1.s[1] -; CHECK-NEXT: frintx s0, s0 -; CHECK-NEXT: frintx s1, s1 -; CHECK-NEXT: mov s5, v2.s[1] -; CHECK-NEXT: mov s6, v3.s[1] -; CHECK-NEXT: frintx s2, s2 -; CHECK-NEXT: frintx s3, s3 -; CHECK-NEXT: frintx s4, s4 -; CHECK-NEXT: frintx s7, s7 -; CHECK-NEXT: fcvtzs x9, s0 -; CHECK-NEXT: fcvtzs x12, s1 -; CHECK-NEXT: frintx s5, s5 -; CHECK-NEXT: frintx s6, s6 -; CHECK-NEXT: fcvtzs x8, s2 -; CHECK-NEXT: fcvtzs x10, s3 -; CHECK-NEXT: fcvtzs x11, s4 -; CHECK-NEXT: fcvtzs x15, s7 -; CHECK-NEXT: fmov d0, x9 -; CHECK-NEXT: fmov d2, x12 -; CHECK-NEXT: fcvtzs x13, s5 -; CHECK-NEXT: fcvtzs x14, s6 -; CHECK-NEXT: fmov d1, x8 -; CHECK-NEXT: fmov d3, x10 -; CHECK-NEXT: mov v0.d[1], x11 -; CHECK-NEXT: mov v2.d[1], x15 -; CHECK-NEXT: mov v1.d[1], x13 -; CHECK-NEXT: mov v3.d[1], x14 +; CHECK-NEXT: frintx v0.2s, v0.2s +; CHECK-NEXT: frintx v1.2s, v1.2s +; CHECK-NEXT: frintx v2.2s, v2.2s +; CHECK-NEXT: frintx v3.2s, v3.2s +; CHECK-NEXT: fcvtl v0.2d, v0.2s +; CHECK-NEXT: fcvtl v1.2d, v1.2s +; CHECK-NEXT: fcvtl v4.2d, v2.2s +; CHECK-NEXT: fcvtl v3.2d, v3.2s +; CHECK-NEXT: fcvtzs v0.2d, v0.2d +; CHECK-NEXT: fcvtzs v2.2d, v1.2d +; CHECK-NEXT: fcvtzs v1.2d, v4.2d +; CHECK-NEXT: fcvtzs v3.2d, v3.2d ; CHECK-NEXT: ret %a = call <8 x i64> @llvm.llrint.v8i64.v8f32(<8 x float> %x) ret <8 x i64> %a @@ -466,66 +460,34 @@ declare <8 x i64> @llvm.llrint.v8i64.v8f32(<8 x float>) define <16 x i64> @llrint_v16i64_v16f32(<16 x float> %x) { ; CHECK-LABEL: llrint_v16i64_v16f32: ; CHECK: // %bb.0: -; CHECK-NEXT: ext v4.16b, v0.16b, v0.16b, #8 -; CHECK-NEXT: ext v5.16b, v1.16b, v1.16b, #8 +; CHECK-NEXT: ext v4.16b, v1.16b, v1.16b, #8 +; CHECK-NEXT: ext v5.16b, v0.16b, v0.16b, #8 ; CHECK-NEXT: ext v6.16b, v2.16b, v2.16b, #8 -; CHECK-NEXT: frintx s7, s0 -; CHECK-NEXT: ext v16.16b, v3.16b, v3.16b, #8 -; CHECK-NEXT: mov s0, v0.s[1] -; CHECK-NEXT: frintx s17, s4 -; CHECK-NEXT: mov s4, v4.s[1] -; CHECK-NEXT: mov s18, v5.s[1] -; CHECK-NEXT: frintx s5, s5 -; CHECK-NEXT: frintx s19, s6 -; CHECK-NEXT: fcvtzs x8, s7 -; CHECK-NEXT: frintx s7, s16 -; CHECK-NEXT: mov s6, v6.s[1] -; CHECK-NEXT: mov s16, v16.s[1] -; CHECK-NEXT: frintx s0, s0 -; CHECK-NEXT: frintx s4, s4 -; CHECK-NEXT: fcvtzs x9, s17 -; CHECK-NEXT: frintx s17, s1 -; CHECK-NEXT: mov s1, v1.s[1] -; CHECK-NEXT: frintx s18, s18 -; CHECK-NEXT: fcvtzs x10, s5 -; CHECK-NEXT: mov s5, v2.s[1] -; CHECK-NEXT: fcvtzs x11, s19 -; CHECK-NEXT: mov s19, v3.s[1] -; CHECK-NEXT: frintx s2, s2 -; CHECK-NEXT: fcvtzs x12, s7 -; CHECK-NEXT: frintx s6, s6 -; CHECK-NEXT: fcvtzs x13, s4 -; CHECK-NEXT: frintx s4, s3 -; CHECK-NEXT: frintx s16, s16 -; CHECK-NEXT: fcvtzs x14, s18 -; CHECK-NEXT: frintx s18, s1 -; CHECK-NEXT: fcvtzs x15, s17 -; CHECK-NEXT: frintx s20, s5 -; CHECK-NEXT: frintx s17, s19 -; CHECK-NEXT: fmov d1, x9 -; CHECK-NEXT: fcvtzs x9, s2 -; CHECK-NEXT: fmov d5, x11 -; CHECK-NEXT: fmov d3, x10 -; CHECK-NEXT: fcvtzs x11, s4 -; CHECK-NEXT: fcvtzs x10, s0 -; CHECK-NEXT: fmov d7, x12 -; CHECK-NEXT: fcvtzs x12, s18 -; CHECK-NEXT: fcvtzs x17, s6 -; CHECK-NEXT: fcvtzs x18, s16 -; CHECK-NEXT: fcvtzs x16, s20 -; CHECK-NEXT: fcvtzs x0, s17 -; CHECK-NEXT: fmov d0, x8 -; CHECK-NEXT: fmov d2, x15 -; CHECK-NEXT: fmov d4, x9 -; CHECK-NEXT: mov v1.d[1], x13 -; CHECK-NEXT: fmov d6, x11 -; CHECK-NEXT: mov v3.d[1], x14 -; CHECK-NEXT: mov v0.d[1], x10 -; CHECK-NEXT: mov v5.d[1], x17 -; CHECK-NEXT: mov v7.d[1], x18 -; CHECK-NEXT: mov v2.d[1], x12 -; CHECK-NEXT: mov v4.d[1], x16 -; CHECK-NEXT: mov v6.d[1], x0 +; CHECK-NEXT: ext v7.16b, v3.16b, v3.16b, #8 +; CHECK-NEXT: frintx v0.2s, v0.2s +; CHECK-NEXT: frintx v1.2s, v1.2s +; CHECK-NEXT: frintx v2.2s, v2.2s +; CHECK-NEXT: frintx v3.2s, v3.2s +; CHECK-NEXT: frintx v5.2s, v5.2s +; CHECK-NEXT: frintx v4.2s, v4.2s +; CHECK-NEXT: frintx v6.2s, v6.2s +; CHECK-NEXT: frintx v7.2s, v7.2s +; CHECK-NEXT: fcvtl v0.2d, v0.2s +; CHECK-NEXT: fcvtl v1.2d, v1.2s +; CHECK-NEXT: fcvtl v16.2d, v2.2s +; CHECK-NEXT: fcvtl v18.2d, v3.2s +; CHECK-NEXT: fcvtl v5.2d, v5.2s +; CHECK-NEXT: fcvtl v17.2d, v4.2s +; CHECK-NEXT: fcvtl v19.2d, v6.2s +; CHECK-NEXT: fcvtl v7.2d, v7.2s +; CHECK-NEXT: fcvtzs v0.2d, v0.2d +; CHECK-NEXT: fcvtzs v2.2d, v1.2d +; CHECK-NEXT: fcvtzs v4.2d, v16.2d +; CHECK-NEXT: fcvtzs v6.2d, v18.2d +; CHECK-NEXT: fcvtzs v1.2d, v5.2d +; CHECK-NEXT: fcvtzs v3.2d, v17.2d +; CHECK-NEXT: fcvtzs v5.2d, v19.2d +; CHECK-NEXT: fcvtzs v7.2d, v7.2d ; CHECK-NEXT: ret %a = call <16 x i64> @llvm.llrint.v16i64.v16f32(<16 x float> %x) ret <16 x i64> %a @@ -535,134 +497,70 @@ declare <16 x i64> @llvm.llrint.v16i64.v16f32(<16 x float>) define <32 x i64> @llrint_v32i64_v32f32(<32 x float> %x) { ; CHECK-LABEL: llrint_v32i64_v32f32: ; CHECK: // %bb.0: -; CHECK-NEXT: ext v16.16b, v2.16b, v2.16b, #8 -; CHECK-NEXT: ext v20.16b, v5.16b, v5.16b, #8 -; CHECK-NEXT: ext v17.16b, v3.16b, v3.16b, #8 -; CHECK-NEXT: ext v18.16b, v0.16b, v0.16b, #8 -; CHECK-NEXT: ext v19.16b, v4.16b, v4.16b, #8 -; CHECK-NEXT: ext v21.16b, v6.16b, v6.16b, #8 -; CHECK-NEXT: ext v22.16b, v7.16b, v7.16b, #8 -; CHECK-NEXT: frintx s24, s16 -; CHECK-NEXT: mov s28, v20.s[1] -; CHECK-NEXT: frintx s25, s17 -; CHECK-NEXT: frintx s26, s18 -; CHECK-NEXT: frintx s27, s19 -; CHECK-NEXT: frintx s29, s20 -; CHECK-NEXT: mov s30, v21.s[1] -; CHECK-NEXT: frintx s20, s21 -; CHECK-NEXT: frintx s21, s22 -; CHECK-NEXT: mov s23, v22.s[1] -; CHECK-NEXT: mov s19, v19.s[1] -; CHECK-NEXT: mov s17, v17.s[1] -; CHECK-NEXT: fcvtzs x12, s24 -; CHECK-NEXT: frintx s24, s28 -; CHECK-NEXT: fcvtzs x13, s25 -; CHECK-NEXT: mov s25, v7.s[1] -; CHECK-NEXT: fcvtzs x9, s26 -; CHECK-NEXT: fcvtzs x11, s27 -; CHECK-NEXT: fcvtzs x14, s20 -; CHECK-NEXT: fcvtzs x15, s21 -; CHECK-NEXT: frintx s26, s1 -; CHECK-NEXT: frintx s23, s23 -; CHECK-NEXT: frintx s27, s7 -; CHECK-NEXT: frintx s22, s30 -; CHECK-NEXT: fmov d20, x12 -; CHECK-NEXT: fcvtzs x12, s24 -; CHECK-NEXT: mov s24, v6.s[1] -; CHECK-NEXT: frintx s25, s25 -; CHECK-NEXT: frintx s6, s6 -; CHECK-NEXT: fcvtzs x10, s29 -; CHECK-NEXT: fmov d7, x11 -; CHECK-NEXT: fmov d21, x13 -; CHECK-NEXT: frintx s28, s5 -; CHECK-NEXT: fcvtzs x11, s23 -; CHECK-NEXT: fmov d23, x14 -; CHECK-NEXT: fcvtzs x14, s26 -; CHECK-NEXT: fmov d26, x15 -; CHECK-NEXT: fcvtzs x15, s27 -; CHECK-NEXT: frintx s24, s24 -; CHECK-NEXT: mov s27, v5.s[1] -; CHECK-NEXT: fcvtzs x13, s22 -; CHECK-NEXT: fcvtzs x17, s25 -; CHECK-NEXT: frintx s25, s4 -; CHECK-NEXT: fcvtzs x18, s6 -; CHECK-NEXT: fmov d6, x10 -; CHECK-NEXT: frintx s22, s2 -; CHECK-NEXT: mov v26.d[1], x11 -; CHECK-NEXT: fmov d5, x14 -; CHECK-NEXT: fcvtzs x10, s24 -; CHECK-NEXT: fmov d24, x15 -; CHECK-NEXT: fcvtzs x14, s28 -; CHECK-NEXT: frintx s27, s27 -; CHECK-NEXT: mov v23.d[1], x13 -; CHECK-NEXT: mov s4, v4.s[1] -; CHECK-NEXT: fcvtzs x13, s25 -; CHECK-NEXT: fmov d25, x18 -; CHECK-NEXT: mov s16, v16.s[1] -; CHECK-NEXT: mov v24.d[1], x17 -; CHECK-NEXT: fcvtzs x16, s22 -; CHECK-NEXT: frintx s22, s3 -; CHECK-NEXT: mov s3, v3.s[1] -; CHECK-NEXT: frintx s19, s19 -; CHECK-NEXT: mov s2, v2.s[1] -; CHECK-NEXT: mov v25.d[1], x10 -; CHECK-NEXT: fcvtzs x10, s27 -; CHECK-NEXT: frintx s4, s4 -; CHECK-NEXT: mov v6.d[1], x12 -; CHECK-NEXT: frintx s17, s17 -; CHECK-NEXT: mov s18, v18.s[1] -; CHECK-NEXT: stp q24, q26, [x8, #224] -; CHECK-NEXT: fmov d24, x14 -; CHECK-NEXT: fcvtzs x11, s22 -; CHECK-NEXT: ext v22.16b, v1.16b, v1.16b, #8 -; CHECK-NEXT: mov s1, v1.s[1] -; CHECK-NEXT: frintx s3, s3 -; CHECK-NEXT: stp q25, q23, [x8, #192] -; CHECK-NEXT: frintx s2, s2 -; CHECK-NEXT: fcvtzs x12, s4 -; CHECK-NEXT: mov v24.d[1], x10 -; CHECK-NEXT: fcvtzs x10, s19 -; CHECK-NEXT: mov s19, v0.s[1] -; CHECK-NEXT: frintx s16, s16 -; CHECK-NEXT: frintx s0, s0 -; CHECK-NEXT: fmov d4, x11 -; CHECK-NEXT: mov s27, v22.s[1] -; CHECK-NEXT: frintx s22, s22 -; CHECK-NEXT: frintx s1, s1 -; CHECK-NEXT: fcvtzs x11, s3 -; CHECK-NEXT: fcvtzs x14, s2 -; CHECK-NEXT: frintx s2, s18 -; CHECK-NEXT: stp q24, q6, [x8, #160] -; CHECK-NEXT: fmov d6, x13 -; CHECK-NEXT: fcvtzs x13, s17 -; CHECK-NEXT: frintx s17, s19 -; CHECK-NEXT: fmov d23, x16 -; CHECK-NEXT: mov v7.d[1], x10 -; CHECK-NEXT: frintx s3, s27 -; CHECK-NEXT: fcvtzs x10, s22 -; CHECK-NEXT: fcvtzs x15, s1 -; CHECK-NEXT: mov v6.d[1], x12 -; CHECK-NEXT: fcvtzs x12, s16 -; CHECK-NEXT: mov v4.d[1], x11 -; CHECK-NEXT: mov v21.d[1], x13 -; CHECK-NEXT: fcvtzs x13, s0 -; CHECK-NEXT: mov v23.d[1], x14 -; CHECK-NEXT: fcvtzs x14, s17 -; CHECK-NEXT: fcvtzs x11, s3 -; CHECK-NEXT: fmov d0, x10 -; CHECK-NEXT: mov v5.d[1], x15 -; CHECK-NEXT: stp q6, q7, [x8, #128] -; CHECK-NEXT: mov v20.d[1], x12 -; CHECK-NEXT: fcvtzs x12, s2 -; CHECK-NEXT: stp q4, q21, [x8, #96] -; CHECK-NEXT: fmov d1, x13 -; CHECK-NEXT: fmov d2, x9 -; CHECK-NEXT: mov v0.d[1], x11 -; CHECK-NEXT: stp q23, q20, [x8, #64] -; CHECK-NEXT: mov v1.d[1], x14 -; CHECK-NEXT: mov v2.d[1], x12 -; CHECK-NEXT: stp q5, q0, [x8, #32] -; CHECK-NEXT: stp q1, q2, [x8] +; CHECK-NEXT: ext v16.16b, v7.16b, v7.16b, #8 +; CHECK-NEXT: ext v17.16b, v6.16b, v6.16b, #8 +; CHECK-NEXT: frintx v7.2s, v7.2s +; CHECK-NEXT: frintx v6.2s, v6.2s +; CHECK-NEXT: ext v18.16b, v5.16b, v5.16b, #8 +; CHECK-NEXT: ext v21.16b, v4.16b, v4.16b, #8 +; CHECK-NEXT: ext v22.16b, v2.16b, v2.16b, #8 +; CHECK-NEXT: frintx v5.2s, v5.2s +; CHECK-NEXT: ext v23.16b, v3.16b, v3.16b, #8 +; CHECK-NEXT: frintx v4.2s, v4.2s +; CHECK-NEXT: ext v19.16b, v0.16b, v0.16b, #8 +; CHECK-NEXT: ext v20.16b, v1.16b, v1.16b, #8 +; CHECK-NEXT: frintx v16.2s, v16.2s +; CHECK-NEXT: frintx v17.2s, v17.2s +; CHECK-NEXT: fcvtl v7.2d, v7.2s +; CHECK-NEXT: fcvtl v6.2d, v6.2s +; CHECK-NEXT: frintx v18.2s, v18.2s +; CHECK-NEXT: frintx v21.2s, v21.2s +; CHECK-NEXT: frintx v2.2s, v2.2s +; CHECK-NEXT: frintx v3.2s, v3.2s +; CHECK-NEXT: fcvtl v5.2d, v5.2s +; CHECK-NEXT: frintx v23.2s, v23.2s +; CHECK-NEXT: fcvtl v4.2d, v4.2s +; CHECK-NEXT: frintx v1.2s, v1.2s +; CHECK-NEXT: fcvtl v16.2d, v16.2s +; CHECK-NEXT: fcvtl v17.2d, v17.2s +; CHECK-NEXT: fcvtzs v7.2d, v7.2d +; CHECK-NEXT: fcvtzs v6.2d, v6.2d +; CHECK-NEXT: fcvtl v18.2d, v18.2s +; CHECK-NEXT: fcvtl v21.2d, v21.2s +; CHECK-NEXT: frintx v20.2s, v20.2s +; CHECK-NEXT: fcvtl v3.2d, v3.2s +; CHECK-NEXT: fcvtzs v5.2d, v5.2d +; CHECK-NEXT: frintx v0.2s, v0.2s +; CHECK-NEXT: fcvtl v2.2d, v2.2s +; CHECK-NEXT: fcvtzs v4.2d, v4.2d +; CHECK-NEXT: fcvtzs v16.2d, v16.2d +; CHECK-NEXT: fcvtzs v17.2d, v17.2d +; CHECK-NEXT: fcvtl v1.2d, v1.2s +; CHECK-NEXT: fcvtzs v3.2d, v3.2d +; CHECK-NEXT: fcvtl v0.2d, v0.2s +; CHECK-NEXT: fcvtzs v2.2d, v2.2d +; CHECK-NEXT: stp q6, q17, [x8, #192] +; CHECK-NEXT: fcvtl v6.2d, v23.2s +; CHECK-NEXT: frintx v17.2s, v19.2s +; CHECK-NEXT: stp q7, q16, [x8, #224] +; CHECK-NEXT: frintx v7.2s, v22.2s +; CHECK-NEXT: fcvtzs v16.2d, v18.2d +; CHECK-NEXT: fcvtzs v18.2d, v21.2d +; CHECK-NEXT: fcvtzs v1.2d, v1.2d +; CHECK-NEXT: fcvtzs v0.2d, v0.2d +; CHECK-NEXT: fcvtzs v6.2d, v6.2d +; CHECK-NEXT: stp q5, q16, [x8, #160] +; CHECK-NEXT: fcvtl v7.2d, v7.2s +; CHECK-NEXT: fcvtl v5.2d, v20.2s +; CHECK-NEXT: stp q4, q18, [x8, #128] +; CHECK-NEXT: fcvtl v4.2d, v17.2s +; CHECK-NEXT: stp q3, q6, [x8, #96] +; CHECK-NEXT: fcvtzs v7.2d, v7.2d +; CHECK-NEXT: fcvtzs v3.2d, v5.2d +; CHECK-NEXT: stp q1, q3, [x8, #32] +; CHECK-NEXT: stp q2, q7, [x8, #64] +; CHECK-NEXT: fcvtzs v2.2d, v4.2d +; CHECK-NEXT: stp q0, q2, [x8] ; CHECK-NEXT: ret %a = call <32 x i64> @llvm.llrint.v32i64.v32f32(<32 x float> %x) ret <32 x i64> %a @@ -684,13 +582,8 @@ declare <1 x i64> @llvm.llrint.v1i64.v1f64(<1 x double>) define <2 x i64> @llrint_v2i64_v2f64(<2 x double> %x) { ; CHECK-LABEL: llrint_v2i64_v2f64: ; CHECK: // %bb.0: -; CHECK-NEXT: mov d1, v0.d[1] -; CHECK-NEXT: frintx d0, d0 -; CHECK-NEXT: frintx d1, d1 -; CHECK-NEXT: fcvtzs x8, d0 -; CHECK-NEXT: fcvtzs x9, d1 -; CHECK-NEXT: fmov d0, x8 -; CHECK-NEXT: mov v0.d[1], x9 +; CHECK-NEXT: frintx v0.2d, v0.2d +; CHECK-NEXT: fcvtzs v0.2d, v0.2d ; CHECK-NEXT: ret %a = call <2 x i64> @llvm.llrint.v2i64.v2f64(<2 x double> %x) ret <2 x i64> %a @@ -700,20 +593,10 @@ declare <2 x i64> @llvm.llrint.v2i64.v2f64(<2 x double>) define <4 x i64> @llrint_v4i64_v4f64(<4 x double> %x) { ; CHECK-LABEL: llrint_v4i64_v4f64: ; CHECK: // %bb.0: -; CHECK-NEXT: mov d2, v0.d[1] -; CHECK-NEXT: mov d3, v1.d[1] -; CHECK-NEXT: frintx d0, d0 -; CHECK-NEXT: frintx d1, d1 -; CHECK-NEXT: frintx d2, d2 -; CHECK-NEXT: frintx d3, d3 -; CHECK-NEXT: fcvtzs x8, d0 -; CHECK-NEXT: fcvtzs x9, d1 -; CHECK-NEXT: fcvtzs x10, d2 -; CHECK-NEXT: fcvtzs x11, d3 -; CHECK-NEXT: fmov d0, x8 -; CHECK-NEXT: fmov d1, x9 -; CHECK-NEXT: mov v0.d[1], x10 -; CHECK-NEXT: mov v1.d[1], x11 +; CHECK-NEXT: frintx v0.2d, v0.2d +; CHECK-NEXT: frintx v1.2d, v1.2d +; CHECK-NEXT: fcvtzs v0.2d, v0.2d +; CHECK-NEXT: fcvtzs v1.2d, v1.2d ; CHECK-NEXT: ret %a = call <4 x i64> @llvm.llrint.v4i64.v4f64(<4 x double> %x) ret <4 x i64> %a @@ -723,34 +606,14 @@ declare <4 x i64> @llvm.llrint.v4i64.v4f64(<4 x double>) define <8 x i64> @llrint_v8i64_v8f64(<8 x double> %x) { ; CHECK-LABEL: llrint_v8i64_v8f64: ; CHECK: // %bb.0: -; CHECK-NEXT: mov d4, v0.d[1] -; CHECK-NEXT: mov d5, v1.d[1] -; CHECK-NEXT: mov d6, v2.d[1] -; CHECK-NEXT: mov d7, v3.d[1] -; CHECK-NEXT: frintx d0, d0 -; CHECK-NEXT: frintx d1, d1 -; CHECK-NEXT: frintx d2, d2 -; CHECK-NEXT: frintx d3, d3 -; CHECK-NEXT: frintx d4, d4 -; CHECK-NEXT: frintx d5, d5 -; CHECK-NEXT: frintx d6, d6 -; CHECK-NEXT: frintx d7, d7 -; CHECK-NEXT: fcvtzs x8, d0 -; CHECK-NEXT: fcvtzs x9, d1 -; CHECK-NEXT: fcvtzs x10, d2 -; CHECK-NEXT: fcvtzs x11, d3 -; CHECK-NEXT: fcvtzs x12, d4 -; CHECK-NEXT: fcvtzs x13, d5 -; CHECK-NEXT: fcvtzs x14, d6 -; CHECK-NEXT: fcvtzs x15, d7 -; CHECK-NEXT: fmov d0, x8 -; CHECK-NEXT: fmov d1, x9 -; CHECK-NEXT: fmov d2, x10 -; CHECK-NEXT: fmov d3, x11 -; CHECK-NEXT: mov v0.d[1], x12 -; CHECK-NEXT: mov v1.d[1], x13 -; CHECK-NEXT: mov v2.d[1], x14 -; CHECK-NEXT: mov v3.d[1], x15 +; CHECK-NEXT: frintx v0.2d, v0.2d +; CHECK-NEXT: frintx v1.2d, v1.2d +; CHECK-NEXT: frintx v2.2d, v2.2d +; CHECK-NEXT: frintx v3.2d, v3.2d +; CHECK-NEXT: fcvtzs v0.2d, v0.2d +; CHECK-NEXT: fcvtzs v1.2d, v1.2d +; CHECK-NEXT: fcvtzs v2.2d, v2.2d +; CHECK-NEXT: fcvtzs v3.2d, v3.2d ; CHECK-NEXT: ret %a = call <8 x i64> @llvm.llrint.v8i64.v8f64(<8 x double> %x) ret <8 x i64> %a @@ -760,62 +623,22 @@ declare <8 x i64> @llvm.llrint.v8i64.v8f64(<8 x double>) define <16 x i64> @llrint_v16f64(<16 x double> %x) { ; CHECK-LABEL: llrint_v16f64: ; CHECK: // %bb.0: -; CHECK-NEXT: mov d16, v0.d[1] -; CHECK-NEXT: mov d17, v1.d[1] -; CHECK-NEXT: frintx d0, d0 -; CHECK-NEXT: frintx d1, d1 -; CHECK-NEXT: frintx d18, d2 -; CHECK-NEXT: mov d2, v2.d[1] -; CHECK-NEXT: frintx d19, d3 -; CHECK-NEXT: mov d3, v3.d[1] -; CHECK-NEXT: frintx d16, d16 -; CHECK-NEXT: frintx d17, d17 -; CHECK-NEXT: fcvtzs x8, d0 -; CHECK-NEXT: frintx d0, d4 -; CHECK-NEXT: mov d4, v4.d[1] -; CHECK-NEXT: fcvtzs x9, d1 -; CHECK-NEXT: frintx d1, d5 -; CHECK-NEXT: mov d5, v5.d[1] -; CHECK-NEXT: fcvtzs x12, d18 -; CHECK-NEXT: frintx d2, d2 -; CHECK-NEXT: fcvtzs x13, d19 -; CHECK-NEXT: frintx d18, d3 -; CHECK-NEXT: fcvtzs x10, d16 -; CHECK-NEXT: mov d16, v6.d[1] -; CHECK-NEXT: fcvtzs x11, d17 -; CHECK-NEXT: mov d17, v7.d[1] -; CHECK-NEXT: frintx d6, d6 -; CHECK-NEXT: frintx d7, d7 -; CHECK-NEXT: frintx d4, d4 -; CHECK-NEXT: frintx d5, d5 -; CHECK-NEXT: fcvtzs x14, d0 -; CHECK-NEXT: fcvtzs x15, d1 -; CHECK-NEXT: fmov d0, x8 -; CHECK-NEXT: fmov d1, x9 -; CHECK-NEXT: frintx d16, d16 -; CHECK-NEXT: fcvtzs x9, d2 -; CHECK-NEXT: fmov d2, x12 -; CHECK-NEXT: frintx d17, d17 -; CHECK-NEXT: fcvtzs x8, d6 -; CHECK-NEXT: fcvtzs x12, d7 -; CHECK-NEXT: fmov d3, x13 -; CHECK-NEXT: fcvtzs x13, d18 -; CHECK-NEXT: fcvtzs x16, d4 -; CHECK-NEXT: fcvtzs x17, d5 -; CHECK-NEXT: fmov d4, x14 -; CHECK-NEXT: fmov d5, x15 -; CHECK-NEXT: fcvtzs x18, d16 -; CHECK-NEXT: mov v0.d[1], x10 -; CHECK-NEXT: mov v1.d[1], x11 -; CHECK-NEXT: fcvtzs x0, d17 -; CHECK-NEXT: fmov d6, x8 -; CHECK-NEXT: fmov d7, x12 -; CHECK-NEXT: mov v2.d[1], x9 -; CHECK-NEXT: mov v3.d[1], x13 -; CHECK-NEXT: mov v4.d[1], x16 -; CHECK-NEXT: mov v5.d[1], x17 -; CHECK-NEXT: mov v6.d[1], x18 -; CHECK-NEXT: mov v7.d[1], x0 +; CHECK-NEXT: frintx v0.2d, v0.2d +; CHECK-NEXT: frintx v1.2d, v1.2d +; CHECK-NEXT: frintx v2.2d, v2.2d +; CHECK-NEXT: frintx v3.2d, v3.2d +; CHECK-NEXT: frintx v4.2d, v4.2d +; CHECK-NEXT: frintx v5.2d, v5.2d +; CHECK-NEXT: frintx v6.2d, v6.2d +; CHECK-NEXT: frintx v7.2d, v7.2d +; CHECK-NEXT: fcvtzs v0.2d, v0.2d +; CHECK-NEXT: fcvtzs v1.2d, v1.2d +; CHECK-NEXT: fcvtzs v2.2d, v2.2d +; CHECK-NEXT: fcvtzs v3.2d, v3.2d +; CHECK-NEXT: fcvtzs v4.2d, v4.2d +; CHECK-NEXT: fcvtzs v5.2d, v5.2d +; CHECK-NEXT: fcvtzs v6.2d, v6.2d +; CHECK-NEXT: fcvtzs v7.2d, v7.2d ; CHECK-NEXT: ret %a = call <16 x i64> @llvm.llrint.v16i64.v16f64(<16 x double> %x) ret <16 x i64> %a @@ -825,130 +648,50 @@ declare <16 x i64> @llvm.llrint.v16i64.v16f64(<16 x double>) define <32 x i64> @llrint_v32f64(<32 x double> %x) { ; CHECK-LABEL: llrint_v32f64: ; CHECK: // %bb.0: -; CHECK-NEXT: frintx d20, d0 -; CHECK-NEXT: frintx d22, d3 -; CHECK-NEXT: frintx d21, d4 +; CHECK-NEXT: ldp q17, q16, [sp, #96] +; CHECK-NEXT: frintx v7.2d, v7.2d ; CHECK-NEXT: ldp q19, q18, [sp, #64] -; CHECK-NEXT: frintx d23, d5 -; CHECK-NEXT: ldp q27, q26, [sp, #96] -; CHECK-NEXT: mov d4, v4.d[1] -; CHECK-NEXT: ldp q16, q17, [sp, #32] -; CHECK-NEXT: mov d5, v5.d[1] -; CHECK-NEXT: fcvtzs x9, d20 -; CHECK-NEXT: frintx d20, d6 -; CHECK-NEXT: fcvtzs x11, d22 -; CHECK-NEXT: frintx d22, d19 -; CHECK-NEXT: fcvtzs x12, d21 -; CHECK-NEXT: fcvtzs x10, d23 -; CHECK-NEXT: mov d21, v26.d[1] -; CHECK-NEXT: frintx d23, d27 -; CHECK-NEXT: mov d27, v27.d[1] -; CHECK-NEXT: frintx d24, d16 -; CHECK-NEXT: mov d19, v19.d[1] -; CHECK-NEXT: frintx d25, d17 -; CHECK-NEXT: fcvtzs x13, d20 -; CHECK-NEXT: mov d20, v18.d[1] -; CHECK-NEXT: frintx d18, d18 -; CHECK-NEXT: fcvtzs x16, d22 -; CHECK-NEXT: frintx d22, d26 -; CHECK-NEXT: mov d16, v16.d[1] -; CHECK-NEXT: frintx d21, d21 -; CHECK-NEXT: fcvtzs x17, d23 -; CHECK-NEXT: frintx d23, d27 -; CHECK-NEXT: fcvtzs x14, d24 -; CHECK-NEXT: frintx d26, d19 -; CHECK-NEXT: fmov d19, x11 -; CHECK-NEXT: frintx d20, d20 -; CHECK-NEXT: mov d27, v17.d[1] -; CHECK-NEXT: fcvtzs x15, d25 -; CHECK-NEXT: ldp q25, q24, [sp] -; CHECK-NEXT: fcvtzs x11, d22 -; CHECK-NEXT: fmov d17, x12 -; CHECK-NEXT: fcvtzs x12, d21 -; CHECK-NEXT: fcvtzs x0, d23 -; CHECK-NEXT: fmov d23, x14 -; CHECK-NEXT: fcvtzs x14, d18 -; CHECK-NEXT: fmov d18, x17 -; CHECK-NEXT: fcvtzs x17, d20 -; CHECK-NEXT: frintx d21, d7 -; CHECK-NEXT: fcvtzs x18, d26 -; CHECK-NEXT: fmov d20, x11 -; CHECK-NEXT: frintx d22, d25 -; CHECK-NEXT: frintx d26, d27 -; CHECK-NEXT: frintx d16, d16 -; CHECK-NEXT: mov v18.d[1], x0 -; CHECK-NEXT: mov d25, v25.d[1] -; CHECK-NEXT: mov d7, v7.d[1] -; CHECK-NEXT: mov d6, v6.d[1] -; CHECK-NEXT: mov d0, v0.d[1] -; CHECK-NEXT: mov v20.d[1], x12 -; CHECK-NEXT: fcvtzs x11, d21 -; CHECK-NEXT: fmov d21, x15 -; CHECK-NEXT: fcvtzs x12, d22 -; CHECK-NEXT: fmov d22, x16 -; CHECK-NEXT: fcvtzs x15, d26 -; CHECK-NEXT: fmov d26, x14 -; CHECK-NEXT: fcvtzs x14, d16 -; CHECK-NEXT: frintx d25, d25 -; CHECK-NEXT: frintx d7, d7 -; CHECK-NEXT: mov d16, v1.d[1] -; CHECK-NEXT: mov d3, v3.d[1] -; CHECK-NEXT: stp q18, q20, [x8, #224] -; CHECK-NEXT: mov d18, v24.d[1] -; CHECK-NEXT: mov v22.d[1], x18 -; CHECK-NEXT: mov v26.d[1], x17 -; CHECK-NEXT: frintx d24, d24 -; CHECK-NEXT: mov v21.d[1], x15 -; CHECK-NEXT: mov v23.d[1], x14 -; CHECK-NEXT: frintx d20, d2 -; CHECK-NEXT: mov d2, v2.d[1] -; CHECK-NEXT: frintx d6, d6 -; CHECK-NEXT: frintx d5, d5 -; CHECK-NEXT: frintx d4, d4 -; CHECK-NEXT: frintx d18, d18 -; CHECK-NEXT: frintx d1, d1 -; CHECK-NEXT: frintx d3, d3 -; CHECK-NEXT: stp q22, q26, [x8, #192] -; CHECK-NEXT: fmov d22, x10 -; CHECK-NEXT: fcvtzs x10, d24 -; CHECK-NEXT: stp q23, q21, [x8, #160] -; CHECK-NEXT: fmov d21, x11 -; CHECK-NEXT: fmov d24, x13 -; CHECK-NEXT: frintx d2, d2 -; CHECK-NEXT: fcvtzs x13, d6 -; CHECK-NEXT: frintx d6, d16 -; CHECK-NEXT: fcvtzs x11, d18 -; CHECK-NEXT: fmov d18, x12 -; CHECK-NEXT: fcvtzs x12, d25 -; CHECK-NEXT: fmov d23, x10 -; CHECK-NEXT: fcvtzs x10, d7 -; CHECK-NEXT: fcvtzs x14, d5 -; CHECK-NEXT: frintx d0, d0 -; CHECK-NEXT: fcvtzs x15, d3 -; CHECK-NEXT: mov v24.d[1], x13 -; CHECK-NEXT: fcvtzs x13, d2 -; CHECK-NEXT: fmov d2, x9 -; CHECK-NEXT: mov v23.d[1], x11 -; CHECK-NEXT: fcvtzs x11, d4 -; CHECK-NEXT: mov v18.d[1], x12 -; CHECK-NEXT: fcvtzs x12, d20 -; CHECK-NEXT: mov v21.d[1], x10 -; CHECK-NEXT: fcvtzs x10, d1 -; CHECK-NEXT: mov v22.d[1], x14 -; CHECK-NEXT: fcvtzs x14, d6 -; CHECK-NEXT: mov v19.d[1], x15 -; CHECK-NEXT: stp q18, q23, [x8, #128] -; CHECK-NEXT: mov v17.d[1], x11 -; CHECK-NEXT: fcvtzs x11, d0 -; CHECK-NEXT: stp q24, q21, [x8, #96] -; CHECK-NEXT: fmov d0, x12 -; CHECK-NEXT: fmov d1, x10 -; CHECK-NEXT: stp q17, q22, [x8, #64] -; CHECK-NEXT: mov v0.d[1], x13 -; CHECK-NEXT: mov v1.d[1], x14 -; CHECK-NEXT: mov v2.d[1], x11 -; CHECK-NEXT: stp q0, q19, [x8, #32] -; CHECK-NEXT: stp q2, q1, [x8] +; CHECK-NEXT: frintx v6.2d, v6.2d +; CHECK-NEXT: ldp q21, q20, [sp, #32] +; CHECK-NEXT: frintx v5.2d, v5.2d +; CHECK-NEXT: frintx v16.2d, v16.2d +; CHECK-NEXT: frintx v17.2d, v17.2d +; CHECK-NEXT: frintx v4.2d, v4.2d +; CHECK-NEXT: frintx v18.2d, v18.2d +; CHECK-NEXT: frintx v19.2d, v19.2d +; CHECK-NEXT: frintx v3.2d, v3.2d +; CHECK-NEXT: ldp q23, q22, [sp] +; CHECK-NEXT: frintx v20.2d, v20.2d +; CHECK-NEXT: frintx v21.2d, v21.2d +; CHECK-NEXT: frintx v2.2d, v2.2d +; CHECK-NEXT: frintx v1.2d, v1.2d +; CHECK-NEXT: fcvtzs v16.2d, v16.2d +; CHECK-NEXT: fcvtzs v17.2d, v17.2d +; CHECK-NEXT: frintx v0.2d, v0.2d +; CHECK-NEXT: frintx v22.2d, v22.2d +; CHECK-NEXT: fcvtzs v18.2d, v18.2d +; CHECK-NEXT: frintx v23.2d, v23.2d +; CHECK-NEXT: fcvtzs v19.2d, v19.2d +; CHECK-NEXT: fcvtzs v20.2d, v20.2d +; CHECK-NEXT: fcvtzs v7.2d, v7.2d +; CHECK-NEXT: fcvtzs v6.2d, v6.2d +; CHECK-NEXT: fcvtzs v5.2d, v5.2d +; CHECK-NEXT: fcvtzs v4.2d, v4.2d +; CHECK-NEXT: stp q17, q16, [x8, #224] +; CHECK-NEXT: fcvtzs v16.2d, v21.2d +; CHECK-NEXT: fcvtzs v3.2d, v3.2d +; CHECK-NEXT: fcvtzs v17.2d, v22.2d +; CHECK-NEXT: fcvtzs v2.2d, v2.2d +; CHECK-NEXT: fcvtzs v1.2d, v1.2d +; CHECK-NEXT: stp q19, q18, [x8, #192] +; CHECK-NEXT: fcvtzs v18.2d, v23.2d +; CHECK-NEXT: fcvtzs v0.2d, v0.2d +; CHECK-NEXT: stp q4, q5, [x8, #64] +; CHECK-NEXT: stp q6, q7, [x8, #96] +; CHECK-NEXT: stp q2, q3, [x8, #32] +; CHECK-NEXT: stp q0, q1, [x8] +; CHECK-NEXT: stp q18, q17, [x8, #128] +; CHECK-NEXT: stp q16, q20, [x8, #160] ; CHECK-NEXT: ret %a = call <32 x i64> @llvm.llrint.v32i64.v16f64(<32 x double> %x) ret <32 x i64> %a diff --git a/llvm/test/CodeGen/AArch64/vector-lrint.ll b/llvm/test/CodeGen/AArch64/vector-lrint.ll index a58be8dcb7455..41ba13a863d97 100644 --- a/llvm/test/CodeGen/AArch64/vector-lrint.ll +++ b/llvm/test/CodeGen/AArch64/vector-lrint.ll @@ -1,5 +1,24 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc < %s -mtriple=aarch64 -mattr=+neon | FileCheck %s +; RUN: llc < %s -mtriple=aarch64 -mattr=+neon | FileCheck %s --check-prefixes=CHECK,CHECK-SD +; RUN: llc < %s -mtriple=aarch64 -mattr=+neon -global-isel -global-isel-abort=2 2>&1 |\ +; RUN: FileCheck %s --check-prefixes=CHECK,CHECK-GI + +; CHECK-GI: warning: Instruction selection used fallback path for lrint_v2f16 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for lrint_v4f16 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for lrint_v8f16 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for lrint_v16i64_v16f16 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for lrint_v32i64_v32f16 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for lrint_v2f32 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for lrint_v4f32 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for lrint_v8f32 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for lrint_v16i64_v16f32 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for lrint_v32i64_v32f32 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for lrint_v2f64 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for lrint_v4f64 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for lrint_v8f64 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for lrint_v16f64 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for lrint_v32f64 define <1 x i64> @lrint_v1f16(<1 x half> %x) { ; CHECK-LABEL: lrint_v1f16: @@ -17,12 +36,12 @@ declare <1 x i64> @llvm.lrint.v1i64.v1f16(<1 x half>) define <2 x i64> @lrint_v2f16(<2 x half> %x) { ; CHECK-LABEL: lrint_v2f16: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 +; CHECK-NEXT: fcvtl v0.4s, v0.4h +; CHECK-NEXT: frintx v0.4s, v0.4s +; CHECK-NEXT: fcvtn v0.4h, v0.4s ; CHECK-NEXT: mov h1, v0.h[1] ; CHECK-NEXT: fcvt s0, h0 ; CHECK-NEXT: fcvt s1, h1 -; CHECK-NEXT: frintx s0, s0 -; CHECK-NEXT: frintx s1, s1 ; CHECK-NEXT: fcvtzs x8, s0 ; CHECK-NEXT: fcvtzs x9, s1 ; CHECK-NEXT: fmov d0, x8 @@ -37,22 +56,24 @@ define <4 x i64> @lrint_v4f16(<4 x half> %x) { ; CHECK-LABEL: lrint_v4f16: ; CHECK: // %bb.0: ; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-NEXT: mov h1, v0.h[2] +; CHECK-NEXT: dup v1.2s, v0.s[1] +; CHECK-NEXT: fcvtl v0.4s, v0.4h +; CHECK-NEXT: fcvtl v1.4s, v1.4h +; CHECK-NEXT: frintx v0.4s, v0.4s +; CHECK-NEXT: frintx v1.4s, v1.4s +; CHECK-NEXT: fcvtn v0.4h, v0.4s +; CHECK-NEXT: fcvtn v1.4h, v1.4s ; CHECK-NEXT: mov h2, v0.h[1] -; CHECK-NEXT: mov h3, v0.h[3] ; CHECK-NEXT: fcvt s0, h0 +; CHECK-NEXT: mov h3, v1.h[1] ; CHECK-NEXT: fcvt s1, h1 ; CHECK-NEXT: fcvt s2, h2 -; CHECK-NEXT: fcvt s3, h3 -; CHECK-NEXT: frintx s0, s0 -; CHECK-NEXT: frintx s1, s1 -; CHECK-NEXT: frintx s2, s2 -; CHECK-NEXT: frintx s3, s3 ; CHECK-NEXT: fcvtzs x8, s0 +; CHECK-NEXT: fcvt s3, h3 ; CHECK-NEXT: fcvtzs x9, s1 ; CHECK-NEXT: fcvtzs x10, s2 -; CHECK-NEXT: fcvtzs x11, s3 ; CHECK-NEXT: fmov d0, x8 +; CHECK-NEXT: fcvtzs x11, s3 ; CHECK-NEXT: fmov d1, x9 ; CHECK-NEXT: mov v0.d[1], x10 ; CHECK-NEXT: mov v1.d[1], x11 @@ -65,45 +86,48 @@ declare <4 x i64> @llvm.lrint.v4i64.v4f16(<4 x half>) define <8 x i64> @lrint_v8f16(<8 x half> %x) { ; CHECK-LABEL: lrint_v8f16: ; CHECK: // %bb.0: -; CHECK-NEXT: ext v1.16b, v0.16b, v0.16b, #8 -; CHECK-NEXT: mov h4, v0.h[2] -; CHECK-NEXT: mov h3, v0.h[1] -; CHECK-NEXT: mov h7, v0.h[3] +; CHECK-NEXT: dup v1.2s, v0.s[1] +; CHECK-NEXT: dup v2.2s, v0.s[3] +; CHECK-NEXT: fcvtl v3.4s, v0.4h +; CHECK-NEXT: fcvtl2 v0.4s, v0.8h +; CHECK-NEXT: fcvtl v1.4s, v1.4h +; CHECK-NEXT: fcvtl v2.4s, v2.4h +; CHECK-NEXT: frintx v3.4s, v3.4s +; CHECK-NEXT: frintx v0.4s, v0.4s +; CHECK-NEXT: frintx v1.4s, v1.4s +; CHECK-NEXT: frintx v2.4s, v2.4s +; CHECK-NEXT: fcvtn v3.4h, v3.4s +; CHECK-NEXT: fcvtn v0.4h, v0.4s +; CHECK-NEXT: fcvtn v1.4h, v1.4s +; CHECK-NEXT: fcvtn v2.4h, v2.4s +; CHECK-NEXT: mov h4, v3.h[1] +; CHECK-NEXT: mov h5, v0.h[1] ; CHECK-NEXT: fcvt s0, h0 -; CHECK-NEXT: mov h2, v1.h[2] -; CHECK-NEXT: mov h5, v1.h[1] -; CHECK-NEXT: mov h6, v1.h[3] -; CHECK-NEXT: fcvt s1, h1 -; CHECK-NEXT: fcvt s4, h4 ; CHECK-NEXT: fcvt s3, h3 -; CHECK-NEXT: fcvt s7, h7 -; CHECK-NEXT: frintx s0, s0 +; CHECK-NEXT: mov h6, v1.h[1] +; CHECK-NEXT: mov h7, v2.h[1] +; CHECK-NEXT: fcvt s1, h1 ; CHECK-NEXT: fcvt s2, h2 +; CHECK-NEXT: fcvt s4, h4 ; CHECK-NEXT: fcvt s5, h5 +; CHECK-NEXT: fcvtzs x8, s0 +; CHECK-NEXT: fcvtzs x9, s3 ; CHECK-NEXT: fcvt s6, h6 -; CHECK-NEXT: frintx s1, s1 -; CHECK-NEXT: frintx s4, s4 -; CHECK-NEXT: frintx s3, s3 -; CHECK-NEXT: frintx s7, s7 -; CHECK-NEXT: fcvtzs x9, s0 -; CHECK-NEXT: frintx s2, s2 -; CHECK-NEXT: frintx s5, s5 -; CHECK-NEXT: frintx s6, s6 -; CHECK-NEXT: fcvtzs x8, s1 -; CHECK-NEXT: fcvtzs x12, s4 -; CHECK-NEXT: fcvtzs x11, s3 -; CHECK-NEXT: fcvtzs x15, s7 -; CHECK-NEXT: fmov d0, x9 -; CHECK-NEXT: fcvtzs x10, s2 +; CHECK-NEXT: fcvt s7, h7 +; CHECK-NEXT: fcvtzs x11, s1 +; CHECK-NEXT: fcvtzs x12, s2 +; CHECK-NEXT: fcvtzs x10, s4 ; CHECK-NEXT: fcvtzs x13, s5 -; CHECK-NEXT: fcvtzs x14, s6 ; CHECK-NEXT: fmov d2, x8 -; CHECK-NEXT: fmov d1, x12 -; CHECK-NEXT: mov v0.d[1], x11 -; CHECK-NEXT: fmov d3, x10 +; CHECK-NEXT: fmov d0, x9 +; CHECK-NEXT: fcvtzs x14, s6 +; CHECK-NEXT: fcvtzs x15, s7 +; CHECK-NEXT: fmov d1, x11 +; CHECK-NEXT: fmov d3, x12 +; CHECK-NEXT: mov v0.d[1], x10 ; CHECK-NEXT: mov v2.d[1], x13 -; CHECK-NEXT: mov v1.d[1], x15 -; CHECK-NEXT: mov v3.d[1], x14 +; CHECK-NEXT: mov v1.d[1], x14 +; CHECK-NEXT: mov v3.d[1], x15 ; CHECK-NEXT: ret %a = call <8 x i64> @llvm.lrint.v8i64.v8f16(<8 x half> %x) ret <8 x i64> %a @@ -113,84 +137,90 @@ declare <8 x i64> @llvm.lrint.v8i64.v8f16(<8 x half>) define <16 x i64> @lrint_v16i64_v16f16(<16 x half> %x) { ; CHECK-LABEL: lrint_v16i64_v16f16: ; CHECK: // %bb.0: -; CHECK-NEXT: ext v2.16b, v0.16b, v0.16b, #8 -; CHECK-NEXT: ext v3.16b, v1.16b, v1.16b, #8 -; CHECK-NEXT: mov h17, v0.h[1] -; CHECK-NEXT: mov h19, v0.h[2] -; CHECK-NEXT: fcvt s18, h0 -; CHECK-NEXT: mov h0, v0.h[3] -; CHECK-NEXT: mov h4, v2.h[1] -; CHECK-NEXT: mov h5, v2.h[2] -; CHECK-NEXT: fcvt s7, h3 -; CHECK-NEXT: fcvt s6, h2 -; CHECK-NEXT: mov h16, v3.h[2] -; CHECK-NEXT: mov h2, v2.h[3] -; CHECK-NEXT: fcvt s17, h17 -; CHECK-NEXT: fcvt s19, h19 -; CHECK-NEXT: frintx s18, s18 -; CHECK-NEXT: fcvt s0, h0 +; CHECK-NEXT: fcvtl2 v4.4s, v0.8h +; CHECK-NEXT: fcvtl2 v2.4s, v1.8h +; CHECK-NEXT: fcvtl v3.4s, v0.4h +; CHECK-NEXT: dup v5.2s, v0.s[1] +; CHECK-NEXT: dup v0.2s, v0.s[3] +; CHECK-NEXT: dup v6.2s, v1.s[1] +; CHECK-NEXT: dup v7.2s, v1.s[3] +; CHECK-NEXT: fcvtl v1.4s, v1.4h +; CHECK-NEXT: frintx v4.4s, v4.4s +; CHECK-NEXT: frintx v2.4s, v2.4s +; CHECK-NEXT: frintx v3.4s, v3.4s +; CHECK-NEXT: fcvtl v5.4s, v5.4h +; CHECK-NEXT: fcvtl v0.4s, v0.4h +; CHECK-NEXT: fcvtl v6.4s, v6.4h +; CHECK-NEXT: fcvtl v7.4s, v7.4h +; CHECK-NEXT: frintx v1.4s, v1.4s +; CHECK-NEXT: fcvtn v4.4h, v4.4s +; CHECK-NEXT: fcvtn v2.4h, v2.4s +; CHECK-NEXT: fcvtn v3.4h, v3.4s +; CHECK-NEXT: frintx v5.4s, v5.4s +; CHECK-NEXT: frintx v0.4s, v0.4s +; CHECK-NEXT: frintx v6.4s, v6.4s +; CHECK-NEXT: frintx v7.4s, v7.4s +; CHECK-NEXT: fcvtn v1.4h, v1.4s +; CHECK-NEXT: mov h16, v4.h[1] ; CHECK-NEXT: fcvt s4, h4 -; CHECK-NEXT: fcvt s5, h5 -; CHECK-NEXT: frintx s7, s7 -; CHECK-NEXT: frintx s6, s6 +; CHECK-NEXT: fcvt s17, h2 +; CHECK-NEXT: mov h18, v3.h[1] +; CHECK-NEXT: fcvtn v5.4h, v5.4s +; CHECK-NEXT: fcvt s3, h3 +; CHECK-NEXT: fcvtn v0.4h, v0.4s +; CHECK-NEXT: fcvtn v6.4h, v6.4s +; CHECK-NEXT: fcvtn v7.4h, v7.4s +; CHECK-NEXT: mov h2, v2.h[1] ; CHECK-NEXT: fcvt s16, h16 -; CHECK-NEXT: fcvt s2, h2 -; CHECK-NEXT: frintx s17, s17 -; CHECK-NEXT: frintx s19, s19 -; CHECK-NEXT: fcvtzs x13, s18 -; CHECK-NEXT: frintx s0, s0 -; CHECK-NEXT: frintx s4, s4 -; CHECK-NEXT: frintx s5, s5 -; CHECK-NEXT: fcvtzs x9, s7 -; CHECK-NEXT: mov h7, v1.h[2] -; CHECK-NEXT: fcvtzs x8, s6 -; CHECK-NEXT: mov h6, v1.h[1] -; CHECK-NEXT: frintx s16, s16 -; CHECK-NEXT: fcvtzs x14, s17 -; CHECK-NEXT: fcvtzs x15, s19 -; CHECK-NEXT: fcvtzs x10, s4 -; CHECK-NEXT: mov h4, v3.h[1] -; CHECK-NEXT: fcvtzs x11, s5 -; CHECK-NEXT: mov h5, v1.h[3] -; CHECK-NEXT: mov h3, v3.h[3] +; CHECK-NEXT: fcvtzs x8, s4 +; CHECK-NEXT: fcvtzs x9, s17 +; CHECK-NEXT: fcvt s4, h18 +; CHECK-NEXT: fcvt s17, h5 +; CHECK-NEXT: fcvtzs x10, s3 +; CHECK-NEXT: mov h3, v5.h[1] +; CHECK-NEXT: fcvt s5, h0 +; CHECK-NEXT: mov h0, v0.h[1] +; CHECK-NEXT: mov h18, v6.h[1] +; CHECK-NEXT: mov h19, v7.h[1] +; CHECK-NEXT: fcvtzs x11, s16 +; CHECK-NEXT: mov h16, v1.h[1] ; CHECK-NEXT: fcvt s1, h1 -; CHECK-NEXT: fcvt s7, h7 -; CHECK-NEXT: fcvt s6, h6 -; CHECK-NEXT: fcvtzs x12, s16 -; CHECK-NEXT: frintx s16, s2 -; CHECK-NEXT: fmov d2, x8 -; CHECK-NEXT: fcvt s4, h4 +; CHECK-NEXT: fcvtzs x12, s4 +; CHECK-NEXT: fcvt s4, h6 +; CHECK-NEXT: fcvtzs x13, s17 +; CHECK-NEXT: fcvtzs x14, s5 +; CHECK-NEXT: fcvt s5, h7 ; CHECK-NEXT: fcvt s3, h3 -; CHECK-NEXT: fcvt s5, h5 -; CHECK-NEXT: frintx s1, s1 -; CHECK-NEXT: frintx s7, s7 -; CHECK-NEXT: frintx s17, s6 +; CHECK-NEXT: fcvt s7, h2 +; CHECK-NEXT: fcvt s17, h0 +; CHECK-NEXT: fcvt s18, h18 +; CHECK-NEXT: fcvt s16, h16 +; CHECK-NEXT: fcvt s19, h19 +; CHECK-NEXT: fcvtzs x15, s1 +; CHECK-NEXT: fmov d2, x8 +; CHECK-NEXT: fcvtzs x8, s4 +; CHECK-NEXT: fmov d0, x10 +; CHECK-NEXT: fcvtzs x10, s5 ; CHECK-NEXT: fmov d6, x9 -; CHECK-NEXT: mov v2.d[1], x10 -; CHECK-NEXT: frintx s4, s4 -; CHECK-NEXT: frintx s18, s3 -; CHECK-NEXT: frintx s5, s5 -; CHECK-NEXT: fcvtzs x8, s1 -; CHECK-NEXT: fcvtzs x9, s7 -; CHECK-NEXT: fmov d3, x11 -; CHECK-NEXT: fcvtzs x11, s0 -; CHECK-NEXT: fmov d7, x12 -; CHECK-NEXT: fcvtzs x12, s16 -; CHECK-NEXT: fcvtzs x16, s17 -; CHECK-NEXT: fcvtzs x17, s4 -; CHECK-NEXT: fmov d0, x13 -; CHECK-NEXT: fmov d1, x15 +; CHECK-NEXT: fcvtzs x9, s3 +; CHECK-NEXT: fmov d1, x13 +; CHECK-NEXT: fcvtzs x13, s17 +; CHECK-NEXT: fcvtzs x17, s7 +; CHECK-NEXT: fcvtzs x16, s16 ; CHECK-NEXT: fcvtzs x18, s18 -; CHECK-NEXT: fcvtzs x0, s5 -; CHECK-NEXT: fmov d4, x8 -; CHECK-NEXT: fmov d5, x9 -; CHECK-NEXT: mov v0.d[1], x14 -; CHECK-NEXT: mov v1.d[1], x11 -; CHECK-NEXT: mov v3.d[1], x12 -; CHECK-NEXT: mov v4.d[1], x16 +; CHECK-NEXT: fcvtzs x0, s19 +; CHECK-NEXT: fmov d3, x14 +; CHECK-NEXT: fmov d4, x15 +; CHECK-NEXT: fmov d5, x8 +; CHECK-NEXT: fmov d7, x10 +; CHECK-NEXT: mov v0.d[1], x12 +; CHECK-NEXT: mov v1.d[1], x9 +; CHECK-NEXT: mov v2.d[1], x11 ; CHECK-NEXT: mov v6.d[1], x17 -; CHECK-NEXT: mov v7.d[1], x18 -; CHECK-NEXT: mov v5.d[1], x0 +; CHECK-NEXT: mov v3.d[1], x13 +; CHECK-NEXT: mov v4.d[1], x16 +; CHECK-NEXT: mov v5.d[1], x18 +; CHECK-NEXT: mov v7.d[1], x0 ; CHECK-NEXT: ret %a = call <16 x i64> @llvm.lrint.v16i64.v16f16(<16 x half> %x) ret <16 x i64> %a @@ -200,170 +230,182 @@ declare <16 x i64> @llvm.lrint.v16i64.v16f16(<16 x half>) define <32 x i64> @lrint_v32i64_v32f16(<32 x half> %x) { ; CHECK-LABEL: lrint_v32i64_v32f16: ; CHECK: // %bb.0: -; CHECK-NEXT: ext v4.16b, v1.16b, v1.16b, #8 -; CHECK-NEXT: ext v5.16b, v2.16b, v2.16b, #8 -; CHECK-NEXT: ext v6.16b, v3.16b, v3.16b, #8 -; CHECK-NEXT: ext v7.16b, v0.16b, v0.16b, #8 -; CHECK-NEXT: mov h19, v0.h[1] -; CHECK-NEXT: fcvt s21, h0 -; CHECK-NEXT: mov h23, v1.h[2] -; CHECK-NEXT: fcvt s22, h1 -; CHECK-NEXT: fcvt s26, h2 -; CHECK-NEXT: mov h27, v2.h[1] -; CHECK-NEXT: mov h28, v2.h[2] -; CHECK-NEXT: mov h16, v4.h[2] -; CHECK-NEXT: fcvt s17, h5 -; CHECK-NEXT: mov h18, v5.h[2] -; CHECK-NEXT: mov h20, v6.h[2] -; CHECK-NEXT: fcvt s24, h7 -; CHECK-NEXT: fcvt s25, h6 -; CHECK-NEXT: fcvt s19, h19 -; CHECK-NEXT: frintx s22, s22 -; CHECK-NEXT: fcvt s16, h16 -; CHECK-NEXT: frintx s17, s17 -; CHECK-NEXT: fcvt s18, h18 -; CHECK-NEXT: fcvt s20, h20 -; CHECK-NEXT: frintx s16, s16 -; CHECK-NEXT: fcvtzs x12, s17 -; CHECK-NEXT: frintx s17, s18 -; CHECK-NEXT: frintx s18, s21 -; CHECK-NEXT: fcvt s21, h23 -; CHECK-NEXT: frintx s23, s24 -; CHECK-NEXT: frintx s24, s25 -; CHECK-NEXT: frintx s25, s19 -; CHECK-NEXT: mov h19, v7.h[1] -; CHECK-NEXT: fcvtzs x13, s16 -; CHECK-NEXT: frintx s16, s20 -; CHECK-NEXT: frintx s20, s26 -; CHECK-NEXT: fcvtzs x9, s23 -; CHECK-NEXT: mov h23, v3.h[2] -; CHECK-NEXT: fcvt s26, h27 -; CHECK-NEXT: fcvtzs x15, s24 -; CHECK-NEXT: fcvtzs x10, s25 -; CHECK-NEXT: fcvt s24, h28 -; CHECK-NEXT: mov h25, v3.h[3] -; CHECK-NEXT: fcvtzs x14, s17 -; CHECK-NEXT: frintx s21, s21 -; CHECK-NEXT: fmov d17, x12 -; CHECK-NEXT: fcvtzs x12, s16 -; CHECK-NEXT: fmov d16, x13 +; CHECK-NEXT: dup v4.2s, v1.s[1] +; CHECK-NEXT: fcvtl v5.4s, v0.4h +; CHECK-NEXT: dup v6.2s, v1.s[3] +; CHECK-NEXT: fcvtl v7.4s, v1.4h +; CHECK-NEXT: dup v16.2s, v2.s[3] +; CHECK-NEXT: fcvtl v17.4s, v2.4h +; CHECK-NEXT: dup v19.2s, v2.s[1] +; CHECK-NEXT: dup v18.2s, v0.s[1] +; CHECK-NEXT: dup v21.2s, v3.s[1] +; CHECK-NEXT: dup v24.2s, v3.s[3] +; CHECK-NEXT: fcvtl2 v1.4s, v1.8h +; CHECK-NEXT: fcvtl2 v2.4s, v2.8h +; CHECK-NEXT: fcvtl v4.4s, v4.4h +; CHECK-NEXT: frintx v5.4s, v5.4s +; CHECK-NEXT: fcvtl v6.4s, v6.4h +; CHECK-NEXT: frintx v7.4s, v7.4s +; CHECK-NEXT: fcvtl v16.4s, v16.4h +; CHECK-NEXT: frintx v22.4s, v17.4s +; CHECK-NEXT: fcvtl v19.4s, v19.4h +; CHECK-NEXT: dup v17.2s, v0.s[3] +; CHECK-NEXT: fcvtl v21.4s, v21.4h +; CHECK-NEXT: fcvtl v24.4s, v24.4h +; CHECK-NEXT: frintx v1.4s, v1.4s +; CHECK-NEXT: frintx v2.4s, v2.4s +; CHECK-NEXT: frintx v20.4s, v4.4s +; CHECK-NEXT: fcvtn v4.4h, v5.4s +; CHECK-NEXT: frintx v23.4s, v6.4s +; CHECK-NEXT: fcvtn v5.4h, v7.4s +; CHECK-NEXT: frintx v25.4s, v16.4s +; CHECK-NEXT: fcvtn v16.4h, v22.4s +; CHECK-NEXT: frintx v26.4s, v19.4s +; CHECK-NEXT: fcvtn v6.4h, v20.4s +; CHECK-NEXT: fcvtl v20.4s, v3.4h +; CHECK-NEXT: fcvt s22, h4 +; CHECK-NEXT: fcvtn v7.4h, v23.4s +; CHECK-NEXT: fcvtl2 v23.4s, v3.8h +; CHECK-NEXT: fcvtl v3.4s, v18.4h +; CHECK-NEXT: fcvtn v25.4h, v25.4s +; CHECK-NEXT: fcvt s27, h5 +; CHECK-NEXT: fcvtl v18.4s, v17.4h +; CHECK-NEXT: frintx v17.4s, v21.4s +; CHECK-NEXT: fcvt s29, h16 +; CHECK-NEXT: mov h16, v16.h[1] +; CHECK-NEXT: frintx v20.4s, v20.4s +; CHECK-NEXT: fcvtzs x9, s22 +; CHECK-NEXT: fcvt s28, h6 +; CHECK-NEXT: fcvt s22, h7 +; CHECK-NEXT: frintx v19.4s, v3.4s +; CHECK-NEXT: fcvtn v3.4h, v26.4s +; CHECK-NEXT: mov h21, v25.h[1] +; CHECK-NEXT: frintx v23.4s, v23.4s +; CHECK-NEXT: fcvtzs x10, s27 +; CHECK-NEXT: fcvtl2 v26.4s, v0.8h +; CHECK-NEXT: fcvt s25, h25 +; CHECK-NEXT: fcvtn v17.4h, v17.4s +; CHECK-NEXT: fcvtn v20.4h, v20.4s +; CHECK-NEXT: fcvtzs x12, s28 +; CHECK-NEXT: fcvtzs x14, s29 ; CHECK-NEXT: fcvtzs x13, s22 -; CHECK-NEXT: fcvt s22, h3 -; CHECK-NEXT: mov h3, v3.h[1] -; CHECK-NEXT: mov h27, v0.h[2] -; CHECK-NEXT: mov h28, v2.h[3] +; CHECK-NEXT: frintx v22.4s, v24.4s +; CHECK-NEXT: fcvt s24, h3 +; CHECK-NEXT: fcvt s21, h21 +; CHECK-NEXT: fcvtn v23.4h, v23.4s +; CHECK-NEXT: fmov d0, x10 +; CHECK-NEXT: fcvtzs x15, s25 +; CHECK-NEXT: mov h25, v17.h[1] +; CHECK-NEXT: fcvt s17, h17 +; CHECK-NEXT: mov h27, v20.h[1] +; CHECK-NEXT: fcvt s20, h20 +; CHECK-NEXT: fcvtn v28.4h, v2.4s +; CHECK-NEXT: fcvtn v22.4h, v22.4s +; CHECK-NEXT: fcvtzs x10, s24 +; CHECK-NEXT: frintx v24.4s, v26.4s +; CHECK-NEXT: fcvtzs x11, s21 +; CHECK-NEXT: mov h26, v23.h[1] ; CHECK-NEXT: fcvt s23, h23 -; CHECK-NEXT: frintx s26, s26 +; CHECK-NEXT: fcvt s25, h25 +; CHECK-NEXT: fmov d2, x13 +; CHECK-NEXT: fcvtzs x13, s17 +; CHECK-NEXT: fcvt s21, h27 ; CHECK-NEXT: fcvtzs x16, s20 -; CHECK-NEXT: frintx s20, s24 -; CHECK-NEXT: fcvt s24, h25 -; CHECK-NEXT: fcvtzs x11, s18 -; CHECK-NEXT: fmov d18, x14 +; CHECK-NEXT: fcvtn v27.4h, v1.4s +; CHECK-NEXT: mov h20, v22.h[1] +; CHECK-NEXT: fcvt s22, h22 +; CHECK-NEXT: fcvtn v24.4h, v24.4s +; CHECK-NEXT: fmov d1, x12 +; CHECK-NEXT: fcvtzs x0, s23 +; CHECK-NEXT: fmov d17, x14 +; CHECK-NEXT: fcvtzs x18, s25 +; CHECK-NEXT: mov h25, v28.h[1] +; CHECK-NEXT: fcvt s23, h28 +; CHECK-NEXT: fcvtzs x12, s21 +; CHECK-NEXT: fcvt s21, h26 +; CHECK-NEXT: fcvt s26, h27 +; CHECK-NEXT: fcvt s20, h20 +; CHECK-NEXT: fcvtzs x17, s22 +; CHECK-NEXT: fcvt s22, h24 +; CHECK-NEXT: frintx v18.4s, v18.4s +; CHECK-NEXT: mov h3, v3.h[1] +; CHECK-NEXT: mov h7, v7.h[1] +; CHECK-NEXT: fcvt s25, h25 +; CHECK-NEXT: fcvtn v19.4h, v19.4s +; CHECK-NEXT: fcvt s16, h16 ; CHECK-NEXT: fcvtzs x14, s21 -; CHECK-NEXT: frintx s22, s22 -; CHECK-NEXT: fcvt s3, h3 -; CHECK-NEXT: fcvt s25, h27 -; CHECK-NEXT: fcvt s27, h28 -; CHECK-NEXT: frintx s23, s23 -; CHECK-NEXT: mov h21, v1.h[3] -; CHECK-NEXT: fmov d2, x15 -; CHECK-NEXT: fcvtzs x15, s26 -; CHECK-NEXT: fmov d26, x13 -; CHECK-NEXT: mov h1, v1.h[1] -; CHECK-NEXT: fcvtzs x13, s20 -; CHECK-NEXT: frintx s20, s24 -; CHECK-NEXT: fmov d24, x14 -; CHECK-NEXT: fcvtzs x14, s22 -; CHECK-NEXT: frintx s3, s3 -; CHECK-NEXT: fmov d22, x16 -; CHECK-NEXT: frintx s27, s27 -; CHECK-NEXT: fcvtzs x16, s23 -; CHECK-NEXT: fcvt s21, h21 -; CHECK-NEXT: frintx s25, s25 -; CHECK-NEXT: fcvt s1, h1 -; CHECK-NEXT: mov h0, v0.h[3] -; CHECK-NEXT: mov h23, v7.h[2] -; CHECK-NEXT: mov v22.d[1], x15 +; CHECK-NEXT: fmov d21, x15 +; CHECK-NEXT: mov h5, v5.h[1] ; CHECK-NEXT: fcvtzs x15, s20 -; CHECK-NEXT: fmov d20, x13 -; CHECK-NEXT: fcvtzs x13, s3 -; CHECK-NEXT: fmov d3, x14 -; CHECK-NEXT: fcvtzs x14, s27 -; CHECK-NEXT: fmov d27, x16 -; CHECK-NEXT: frintx s21, s21 -; CHECK-NEXT: mov h7, v7.h[3] -; CHECK-NEXT: frintx s1, s1 -; CHECK-NEXT: fcvt s0, h0 -; CHECK-NEXT: fcvt s23, h23 -; CHECK-NEXT: fcvt s19, h19 -; CHECK-NEXT: mov v27.d[1], x15 -; CHECK-NEXT: fcvtzs x15, s25 -; CHECK-NEXT: mov h25, v6.h[3] +; CHECK-NEXT: fmov d20, x16 +; CHECK-NEXT: fcvtzs x16, s22 +; CHECK-NEXT: fmov d22, x17 +; CHECK-NEXT: fcvtzs x17, s26 +; CHECK-NEXT: fmov d26, x0 +; CHECK-NEXT: fcvtn v18.4h, v18.4s ; CHECK-NEXT: mov h6, v6.h[1] -; CHECK-NEXT: mov v3.d[1], x13 -; CHECK-NEXT: fcvtzs x13, s21 -; CHECK-NEXT: mov h21, v5.h[1] -; CHECK-NEXT: mov h5, v5.h[3] -; CHECK-NEXT: mov v20.d[1], x14 -; CHECK-NEXT: fcvtzs x14, s1 -; CHECK-NEXT: mov h1, v4.h[1] -; CHECK-NEXT: frintx s0, s0 -; CHECK-NEXT: fcvt s25, h25 +; CHECK-NEXT: fcvt s3, h3 +; CHECK-NEXT: mov v20.d[1], x12 +; CHECK-NEXT: fcvtzs x12, s25 ; CHECK-NEXT: fcvt s7, h7 -; CHECK-NEXT: stp q3, q27, [x8, #192] +; CHECK-NEXT: mov v26.d[1], x14 +; CHECK-NEXT: mov v22.d[1], x15 +; CHECK-NEXT: fcvtzs x14, s23 +; CHECK-NEXT: fmov d23, x13 +; CHECK-NEXT: mov v21.d[1], x11 +; CHECK-NEXT: mov h4, v4.h[1] +; CHECK-NEXT: mov h25, v19.h[1] ; CHECK-NEXT: fcvt s6, h6 -; CHECK-NEXT: mov h3, v4.h[3] -; CHECK-NEXT: stp q22, q20, [x8, #128] -; CHECK-NEXT: fcvt s21, h21 +; CHECK-NEXT: fcvtzs x11, s3 ; CHECK-NEXT: fcvt s5, h5 -; CHECK-NEXT: mov v24.d[1], x13 -; CHECK-NEXT: mov v26.d[1], x14 +; CHECK-NEXT: fcvt s19, h19 +; CHECK-NEXT: fcvtzs x13, s7 +; CHECK-NEXT: stp q26, q22, [x8, #224] +; CHECK-NEXT: mov v23.d[1], x18 +; CHECK-NEXT: mov h26, v27.h[1] +; CHECK-NEXT: fmov d22, x14 ; CHECK-NEXT: fcvt s4, h4 -; CHECK-NEXT: frintx s22, s25 -; CHECK-NEXT: fmov d20, x12 -; CHECK-NEXT: fcvt s1, h1 -; CHECK-NEXT: frintx s6, s6 -; CHECK-NEXT: fcvt s3, h3 -; CHECK-NEXT: fcvtzs x12, s0 -; CHECK-NEXT: frintx s5, s5 -; CHECK-NEXT: frintx s21, s21 -; CHECK-NEXT: fmov d0, x11 -; CHECK-NEXT: stp q26, q24, [x8, #64] -; CHECK-NEXT: fmov d24, x15 -; CHECK-NEXT: frintx s4, s4 -; CHECK-NEXT: fcvtzs x11, s22 -; CHECK-NEXT: frintx s22, s23 -; CHECK-NEXT: frintx s1, s1 -; CHECK-NEXT: fcvtzs x13, s6 -; CHECK-NEXT: frintx s3, s3 -; CHECK-NEXT: frintx s6, s7 -; CHECK-NEXT: fcvtzs x14, s5 -; CHECK-NEXT: mov v24.d[1], x12 -; CHECK-NEXT: frintx s5, s19 -; CHECK-NEXT: fcvtzs x12, s21 -; CHECK-NEXT: mov v0.d[1], x10 -; CHECK-NEXT: fcvtzs x10, s4 -; CHECK-NEXT: mov v20.d[1], x11 -; CHECK-NEXT: fcvtzs x11, s22 -; CHECK-NEXT: mov v2.d[1], x13 -; CHECK-NEXT: fcvtzs x15, s3 -; CHECK-NEXT: fcvtzs x13, s1 -; CHECK-NEXT: mov v18.d[1], x14 +; CHECK-NEXT: fmov d3, x16 +; CHECK-NEXT: fcvt s7, h25 ; CHECK-NEXT: fcvtzs x14, s6 -; CHECK-NEXT: stp q0, q24, [x8] -; CHECK-NEXT: mov v17.d[1], x12 -; CHECK-NEXT: fcvtzs x12, s5 -; CHECK-NEXT: fmov d0, x10 -; CHECK-NEXT: fmov d1, x11 -; CHECK-NEXT: stp q2, q20, [x8, #224] -; CHECK-NEXT: fmov d2, x9 -; CHECK-NEXT: mov v16.d[1], x15 -; CHECK-NEXT: stp q17, q18, [x8, #160] -; CHECK-NEXT: mov v0.d[1], x13 +; CHECK-NEXT: mov v2.d[1], x13 +; CHECK-NEXT: stp q20, q23, [x8, #192] +; CHECK-NEXT: fcvt s23, h26 +; CHECK-NEXT: mov v22.d[1], x12 +; CHECK-NEXT: fmov d20, x10 +; CHECK-NEXT: fcvtzs x10, s16 +; CHECK-NEXT: mov h16, v24.h[1] +; CHECK-NEXT: mov h24, v18.h[1] +; CHECK-NEXT: fcvt s18, h18 ; CHECK-NEXT: mov v1.d[1], x14 -; CHECK-NEXT: mov v2.d[1], x12 -; CHECK-NEXT: stp q0, q16, [x8, #96] -; CHECK-NEXT: stp q2, q1, [x8, #32] +; CHECK-NEXT: fcvtzs x14, s7 +; CHECK-NEXT: stp q22, q21, [x8, #160] +; CHECK-NEXT: fcvtzs x12, s23 +; CHECK-NEXT: fmov d21, x17 +; CHECK-NEXT: fcvt s16, h16 +; CHECK-NEXT: mov v20.d[1], x11 +; CHECK-NEXT: fcvtzs x11, s5 +; CHECK-NEXT: fcvt s22, h24 +; CHECK-NEXT: mov v17.d[1], x10 +; CHECK-NEXT: fcvtzs x10, s18 +; CHECK-NEXT: mov v21.d[1], x12 +; CHECK-NEXT: fcvtzs x12, s19 +; CHECK-NEXT: fcvtzs x15, s16 +; CHECK-NEXT: mov v0.d[1], x11 +; CHECK-NEXT: fcvtzs x11, s4 +; CHECK-NEXT: stp q17, q20, [x8, #128] +; CHECK-NEXT: fcvtzs x13, s22 +; CHECK-NEXT: fmov d4, x10 +; CHECK-NEXT: stp q21, q2, [x8, #96] +; CHECK-NEXT: fmov d5, x12 +; CHECK-NEXT: fmov d2, x9 +; CHECK-NEXT: stp q0, q1, [x8, #64] +; CHECK-NEXT: mov v3.d[1], x15 +; CHECK-NEXT: mov v4.d[1], x13 +; CHECK-NEXT: mov v5.d[1], x14 +; CHECK-NEXT: mov v2.d[1], x11 +; CHECK-NEXT: stp q3, q4, [x8, #32] +; CHECK-NEXT: stp q2, q5, [x8] ; CHECK-NEXT: ret %a = call <32 x i64> @llvm.lrint.v32i64.v32f16(<32 x half> %x) ret <32 x i64> %a @@ -371,13 +413,20 @@ define <32 x i64> @lrint_v32i64_v32f16(<32 x half> %x) { declare <32 x i64> @llvm.lrint.v32i64.v32f16(<32 x half>) define <1 x i64> @lrint_v1f32(<1 x float> %x) { -; CHECK-LABEL: lrint_v1f32: -; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-NEXT: frintx s0, s0 -; CHECK-NEXT: fcvtzs x8, s0 -; CHECK-NEXT: fmov d0, x8 -; CHECK-NEXT: ret +; CHECK-SD-LABEL: lrint_v1f32: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: frintx v0.2s, v0.2s +; CHECK-SD-NEXT: fcvtl v0.2d, v0.2s +; CHECK-SD-NEXT: fcvtzs v0.2d, v0.2d +; CHECK-SD-NEXT: // kill: def $d0 killed $d0 killed $q0 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: lrint_v1f32: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: frintx s0, s0 +; CHECK-GI-NEXT: fcvtzs x8, s0 +; CHECK-GI-NEXT: fmov d0, x8 +; CHECK-GI-NEXT: ret %a = call <1 x i64> @llvm.lrint.v1i64.v1f32(<1 x float> %x) ret <1 x i64> %a } @@ -386,14 +435,9 @@ declare <1 x i64> @llvm.lrint.v1i64.v1f32(<1 x float>) define <2 x i64> @lrint_v2f32(<2 x float> %x) { ; CHECK-LABEL: lrint_v2f32: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-NEXT: mov s1, v0.s[1] -; CHECK-NEXT: frintx s0, s0 -; CHECK-NEXT: frintx s1, s1 -; CHECK-NEXT: fcvtzs x8, s0 -; CHECK-NEXT: fcvtzs x9, s1 -; CHECK-NEXT: fmov d0, x8 -; CHECK-NEXT: mov v0.d[1], x9 +; CHECK-NEXT: frintx v0.2s, v0.2s +; CHECK-NEXT: fcvtl v0.2d, v0.2s +; CHECK-NEXT: fcvtzs v0.2d, v0.2d ; CHECK-NEXT: ret %a = call <2 x i64> @llvm.lrint.v2i64.v2f32(<2 x float> %x) ret <2 x i64> %a @@ -404,20 +448,12 @@ define <4 x i64> @lrint_v4f32(<4 x float> %x) { ; CHECK-LABEL: lrint_v4f32: ; CHECK: // %bb.0: ; CHECK-NEXT: ext v1.16b, v0.16b, v0.16b, #8 -; CHECK-NEXT: mov s3, v0.s[1] -; CHECK-NEXT: frintx s0, s0 -; CHECK-NEXT: mov s2, v1.s[1] -; CHECK-NEXT: frintx s1, s1 -; CHECK-NEXT: frintx s3, s3 -; CHECK-NEXT: fcvtzs x9, s0 -; CHECK-NEXT: frintx s2, s2 -; CHECK-NEXT: fcvtzs x8, s1 -; CHECK-NEXT: fcvtzs x11, s3 -; CHECK-NEXT: fmov d0, x9 -; CHECK-NEXT: fcvtzs x10, s2 -; CHECK-NEXT: fmov d1, x8 -; CHECK-NEXT: mov v0.d[1], x11 -; CHECK-NEXT: mov v1.d[1], x10 +; CHECK-NEXT: frintx v0.2s, v0.2s +; CHECK-NEXT: frintx v1.2s, v1.2s +; CHECK-NEXT: fcvtl v0.2d, v0.2s +; CHECK-NEXT: fcvtl v1.2d, v1.2s +; CHECK-NEXT: fcvtzs v0.2d, v0.2d +; CHECK-NEXT: fcvtzs v1.2d, v1.2d ; CHECK-NEXT: ret %a = call <4 x i64> @llvm.lrint.v4i64.v4f32(<4 x float> %x) ret <4 x i64> %a @@ -429,34 +465,18 @@ define <8 x i64> @lrint_v8f32(<8 x float> %x) { ; CHECK: // %bb.0: ; CHECK-NEXT: ext v2.16b, v0.16b, v0.16b, #8 ; CHECK-NEXT: ext v3.16b, v1.16b, v1.16b, #8 -; CHECK-NEXT: mov s4, v0.s[1] -; CHECK-NEXT: mov s7, v1.s[1] -; CHECK-NEXT: frintx s0, s0 -; CHECK-NEXT: frintx s1, s1 -; CHECK-NEXT: mov s5, v2.s[1] -; CHECK-NEXT: mov s6, v3.s[1] -; CHECK-NEXT: frintx s2, s2 -; CHECK-NEXT: frintx s3, s3 -; CHECK-NEXT: frintx s4, s4 -; CHECK-NEXT: frintx s7, s7 -; CHECK-NEXT: fcvtzs x9, s0 -; CHECK-NEXT: fcvtzs x12, s1 -; CHECK-NEXT: frintx s5, s5 -; CHECK-NEXT: frintx s6, s6 -; CHECK-NEXT: fcvtzs x8, s2 -; CHECK-NEXT: fcvtzs x10, s3 -; CHECK-NEXT: fcvtzs x11, s4 -; CHECK-NEXT: fcvtzs x15, s7 -; CHECK-NEXT: fmov d0, x9 -; CHECK-NEXT: fmov d2, x12 -; CHECK-NEXT: fcvtzs x13, s5 -; CHECK-NEXT: fcvtzs x14, s6 -; CHECK-NEXT: fmov d1, x8 -; CHECK-NEXT: fmov d3, x10 -; CHECK-NEXT: mov v0.d[1], x11 -; CHECK-NEXT: mov v2.d[1], x15 -; CHECK-NEXT: mov v1.d[1], x13 -; CHECK-NEXT: mov v3.d[1], x14 +; CHECK-NEXT: frintx v0.2s, v0.2s +; CHECK-NEXT: frintx v1.2s, v1.2s +; CHECK-NEXT: frintx v2.2s, v2.2s +; CHECK-NEXT: frintx v3.2s, v3.2s +; CHECK-NEXT: fcvtl v0.2d, v0.2s +; CHECK-NEXT: fcvtl v1.2d, v1.2s +; CHECK-NEXT: fcvtl v4.2d, v2.2s +; CHECK-NEXT: fcvtl v3.2d, v3.2s +; CHECK-NEXT: fcvtzs v0.2d, v0.2d +; CHECK-NEXT: fcvtzs v2.2d, v1.2d +; CHECK-NEXT: fcvtzs v1.2d, v4.2d +; CHECK-NEXT: fcvtzs v3.2d, v3.2d ; CHECK-NEXT: ret %a = call <8 x i64> @llvm.lrint.v8i64.v8f32(<8 x float> %x) ret <8 x i64> %a @@ -466,66 +486,34 @@ declare <8 x i64> @llvm.lrint.v8i64.v8f32(<8 x float>) define <16 x i64> @lrint_v16i64_v16f32(<16 x float> %x) { ; CHECK-LABEL: lrint_v16i64_v16f32: ; CHECK: // %bb.0: -; CHECK-NEXT: ext v4.16b, v0.16b, v0.16b, #8 -; CHECK-NEXT: ext v5.16b, v1.16b, v1.16b, #8 +; CHECK-NEXT: ext v4.16b, v1.16b, v1.16b, #8 +; CHECK-NEXT: ext v5.16b, v0.16b, v0.16b, #8 ; CHECK-NEXT: ext v6.16b, v2.16b, v2.16b, #8 -; CHECK-NEXT: frintx s7, s0 -; CHECK-NEXT: ext v16.16b, v3.16b, v3.16b, #8 -; CHECK-NEXT: mov s0, v0.s[1] -; CHECK-NEXT: frintx s17, s4 -; CHECK-NEXT: mov s4, v4.s[1] -; CHECK-NEXT: mov s18, v5.s[1] -; CHECK-NEXT: frintx s5, s5 -; CHECK-NEXT: frintx s19, s6 -; CHECK-NEXT: fcvtzs x8, s7 -; CHECK-NEXT: frintx s7, s16 -; CHECK-NEXT: mov s6, v6.s[1] -; CHECK-NEXT: mov s16, v16.s[1] -; CHECK-NEXT: frintx s0, s0 -; CHECK-NEXT: frintx s4, s4 -; CHECK-NEXT: fcvtzs x9, s17 -; CHECK-NEXT: frintx s17, s1 -; CHECK-NEXT: mov s1, v1.s[1] -; CHECK-NEXT: frintx s18, s18 -; CHECK-NEXT: fcvtzs x10, s5 -; CHECK-NEXT: mov s5, v2.s[1] -; CHECK-NEXT: fcvtzs x11, s19 -; CHECK-NEXT: mov s19, v3.s[1] -; CHECK-NEXT: frintx s2, s2 -; CHECK-NEXT: fcvtzs x12, s7 -; CHECK-NEXT: frintx s6, s6 -; CHECK-NEXT: fcvtzs x13, s4 -; CHECK-NEXT: frintx s4, s3 -; CHECK-NEXT: frintx s16, s16 -; CHECK-NEXT: fcvtzs x14, s18 -; CHECK-NEXT: frintx s18, s1 -; CHECK-NEXT: fcvtzs x15, s17 -; CHECK-NEXT: frintx s20, s5 -; CHECK-NEXT: frintx s17, s19 -; CHECK-NEXT: fmov d1, x9 -; CHECK-NEXT: fcvtzs x9, s2 -; CHECK-NEXT: fmov d5, x11 -; CHECK-NEXT: fmov d3, x10 -; CHECK-NEXT: fcvtzs x11, s4 -; CHECK-NEXT: fcvtzs x10, s0 -; CHECK-NEXT: fmov d7, x12 -; CHECK-NEXT: fcvtzs x12, s18 -; CHECK-NEXT: fcvtzs x17, s6 -; CHECK-NEXT: fcvtzs x18, s16 -; CHECK-NEXT: fcvtzs x16, s20 -; CHECK-NEXT: fcvtzs x0, s17 -; CHECK-NEXT: fmov d0, x8 -; CHECK-NEXT: fmov d2, x15 -; CHECK-NEXT: fmov d4, x9 -; CHECK-NEXT: mov v1.d[1], x13 -; CHECK-NEXT: fmov d6, x11 -; CHECK-NEXT: mov v3.d[1], x14 -; CHECK-NEXT: mov v0.d[1], x10 -; CHECK-NEXT: mov v5.d[1], x17 -; CHECK-NEXT: mov v7.d[1], x18 -; CHECK-NEXT: mov v2.d[1], x12 -; CHECK-NEXT: mov v4.d[1], x16 -; CHECK-NEXT: mov v6.d[1], x0 +; CHECK-NEXT: ext v7.16b, v3.16b, v3.16b, #8 +; CHECK-NEXT: frintx v0.2s, v0.2s +; CHECK-NEXT: frintx v1.2s, v1.2s +; CHECK-NEXT: frintx v2.2s, v2.2s +; CHECK-NEXT: frintx v3.2s, v3.2s +; CHECK-NEXT: frintx v5.2s, v5.2s +; CHECK-NEXT: frintx v4.2s, v4.2s +; CHECK-NEXT: frintx v6.2s, v6.2s +; CHECK-NEXT: frintx v7.2s, v7.2s +; CHECK-NEXT: fcvtl v0.2d, v0.2s +; CHECK-NEXT: fcvtl v1.2d, v1.2s +; CHECK-NEXT: fcvtl v16.2d, v2.2s +; CHECK-NEXT: fcvtl v18.2d, v3.2s +; CHECK-NEXT: fcvtl v5.2d, v5.2s +; CHECK-NEXT: fcvtl v17.2d, v4.2s +; CHECK-NEXT: fcvtl v19.2d, v6.2s +; CHECK-NEXT: fcvtl v7.2d, v7.2s +; CHECK-NEXT: fcvtzs v0.2d, v0.2d +; CHECK-NEXT: fcvtzs v2.2d, v1.2d +; CHECK-NEXT: fcvtzs v4.2d, v16.2d +; CHECK-NEXT: fcvtzs v6.2d, v18.2d +; CHECK-NEXT: fcvtzs v1.2d, v5.2d +; CHECK-NEXT: fcvtzs v3.2d, v17.2d +; CHECK-NEXT: fcvtzs v5.2d, v19.2d +; CHECK-NEXT: fcvtzs v7.2d, v7.2d ; CHECK-NEXT: ret %a = call <16 x i64> @llvm.lrint.v16i64.v16f32(<16 x float> %x) ret <16 x i64> %a @@ -535,134 +523,70 @@ declare <16 x i64> @llvm.lrint.v16i64.v16f32(<16 x float>) define <32 x i64> @lrint_v32i64_v32f32(<32 x float> %x) { ; CHECK-LABEL: lrint_v32i64_v32f32: ; CHECK: // %bb.0: -; CHECK-NEXT: ext v16.16b, v2.16b, v2.16b, #8 -; CHECK-NEXT: ext v20.16b, v5.16b, v5.16b, #8 -; CHECK-NEXT: ext v17.16b, v3.16b, v3.16b, #8 -; CHECK-NEXT: ext v18.16b, v0.16b, v0.16b, #8 -; CHECK-NEXT: ext v19.16b, v4.16b, v4.16b, #8 -; CHECK-NEXT: ext v21.16b, v6.16b, v6.16b, #8 -; CHECK-NEXT: ext v22.16b, v7.16b, v7.16b, #8 -; CHECK-NEXT: frintx s24, s16 -; CHECK-NEXT: mov s28, v20.s[1] -; CHECK-NEXT: frintx s25, s17 -; CHECK-NEXT: frintx s26, s18 -; CHECK-NEXT: frintx s27, s19 -; CHECK-NEXT: frintx s29, s20 -; CHECK-NEXT: mov s30, v21.s[1] -; CHECK-NEXT: frintx s20, s21 -; CHECK-NEXT: frintx s21, s22 -; CHECK-NEXT: mov s23, v22.s[1] -; CHECK-NEXT: mov s19, v19.s[1] -; CHECK-NEXT: mov s17, v17.s[1] -; CHECK-NEXT: fcvtzs x12, s24 -; CHECK-NEXT: frintx s24, s28 -; CHECK-NEXT: fcvtzs x13, s25 -; CHECK-NEXT: mov s25, v7.s[1] -; CHECK-NEXT: fcvtzs x9, s26 -; CHECK-NEXT: fcvtzs x11, s27 -; CHECK-NEXT: fcvtzs x14, s20 -; CHECK-NEXT: fcvtzs x15, s21 -; CHECK-NEXT: frintx s26, s1 -; CHECK-NEXT: frintx s23, s23 -; CHECK-NEXT: frintx s27, s7 -; CHECK-NEXT: frintx s22, s30 -; CHECK-NEXT: fmov d20, x12 -; CHECK-NEXT: fcvtzs x12, s24 -; CHECK-NEXT: mov s24, v6.s[1] -; CHECK-NEXT: frintx s25, s25 -; CHECK-NEXT: frintx s6, s6 -; CHECK-NEXT: fcvtzs x10, s29 -; CHECK-NEXT: fmov d7, x11 -; CHECK-NEXT: fmov d21, x13 -; CHECK-NEXT: frintx s28, s5 -; CHECK-NEXT: fcvtzs x11, s23 -; CHECK-NEXT: fmov d23, x14 -; CHECK-NEXT: fcvtzs x14, s26 -; CHECK-NEXT: fmov d26, x15 -; CHECK-NEXT: fcvtzs x15, s27 -; CHECK-NEXT: frintx s24, s24 -; CHECK-NEXT: mov s27, v5.s[1] -; CHECK-NEXT: fcvtzs x13, s22 -; CHECK-NEXT: fcvtzs x17, s25 -; CHECK-NEXT: frintx s25, s4 -; CHECK-NEXT: fcvtzs x18, s6 -; CHECK-NEXT: fmov d6, x10 -; CHECK-NEXT: frintx s22, s2 -; CHECK-NEXT: mov v26.d[1], x11 -; CHECK-NEXT: fmov d5, x14 -; CHECK-NEXT: fcvtzs x10, s24 -; CHECK-NEXT: fmov d24, x15 -; CHECK-NEXT: fcvtzs x14, s28 -; CHECK-NEXT: frintx s27, s27 -; CHECK-NEXT: mov v23.d[1], x13 -; CHECK-NEXT: mov s4, v4.s[1] -; CHECK-NEXT: fcvtzs x13, s25 -; CHECK-NEXT: fmov d25, x18 -; CHECK-NEXT: mov s16, v16.s[1] -; CHECK-NEXT: mov v24.d[1], x17 -; CHECK-NEXT: fcvtzs x16, s22 -; CHECK-NEXT: frintx s22, s3 -; CHECK-NEXT: mov s3, v3.s[1] -; CHECK-NEXT: frintx s19, s19 -; CHECK-NEXT: mov s2, v2.s[1] -; CHECK-NEXT: mov v25.d[1], x10 -; CHECK-NEXT: fcvtzs x10, s27 -; CHECK-NEXT: frintx s4, s4 -; CHECK-NEXT: mov v6.d[1], x12 -; CHECK-NEXT: frintx s17, s17 -; CHECK-NEXT: mov s18, v18.s[1] -; CHECK-NEXT: stp q24, q26, [x8, #224] -; CHECK-NEXT: fmov d24, x14 -; CHECK-NEXT: fcvtzs x11, s22 -; CHECK-NEXT: ext v22.16b, v1.16b, v1.16b, #8 -; CHECK-NEXT: mov s1, v1.s[1] -; CHECK-NEXT: frintx s3, s3 -; CHECK-NEXT: stp q25, q23, [x8, #192] -; CHECK-NEXT: frintx s2, s2 -; CHECK-NEXT: fcvtzs x12, s4 -; CHECK-NEXT: mov v24.d[1], x10 -; CHECK-NEXT: fcvtzs x10, s19 -; CHECK-NEXT: mov s19, v0.s[1] -; CHECK-NEXT: frintx s16, s16 -; CHECK-NEXT: frintx s0, s0 -; CHECK-NEXT: fmov d4, x11 -; CHECK-NEXT: mov s27, v22.s[1] -; CHECK-NEXT: frintx s22, s22 -; CHECK-NEXT: frintx s1, s1 -; CHECK-NEXT: fcvtzs x11, s3 -; CHECK-NEXT: fcvtzs x14, s2 -; CHECK-NEXT: frintx s2, s18 -; CHECK-NEXT: stp q24, q6, [x8, #160] -; CHECK-NEXT: fmov d6, x13 -; CHECK-NEXT: fcvtzs x13, s17 -; CHECK-NEXT: frintx s17, s19 -; CHECK-NEXT: fmov d23, x16 -; CHECK-NEXT: mov v7.d[1], x10 -; CHECK-NEXT: frintx s3, s27 -; CHECK-NEXT: fcvtzs x10, s22 -; CHECK-NEXT: fcvtzs x15, s1 -; CHECK-NEXT: mov v6.d[1], x12 -; CHECK-NEXT: fcvtzs x12, s16 -; CHECK-NEXT: mov v4.d[1], x11 -; CHECK-NEXT: mov v21.d[1], x13 -; CHECK-NEXT: fcvtzs x13, s0 -; CHECK-NEXT: mov v23.d[1], x14 -; CHECK-NEXT: fcvtzs x14, s17 -; CHECK-NEXT: fcvtzs x11, s3 -; CHECK-NEXT: fmov d0, x10 -; CHECK-NEXT: mov v5.d[1], x15 -; CHECK-NEXT: stp q6, q7, [x8, #128] -; CHECK-NEXT: mov v20.d[1], x12 -; CHECK-NEXT: fcvtzs x12, s2 -; CHECK-NEXT: stp q4, q21, [x8, #96] -; CHECK-NEXT: fmov d1, x13 -; CHECK-NEXT: fmov d2, x9 -; CHECK-NEXT: mov v0.d[1], x11 -; CHECK-NEXT: stp q23, q20, [x8, #64] -; CHECK-NEXT: mov v1.d[1], x14 -; CHECK-NEXT: mov v2.d[1], x12 -; CHECK-NEXT: stp q5, q0, [x8, #32] -; CHECK-NEXT: stp q1, q2, [x8] +; CHECK-NEXT: ext v16.16b, v7.16b, v7.16b, #8 +; CHECK-NEXT: ext v17.16b, v6.16b, v6.16b, #8 +; CHECK-NEXT: frintx v7.2s, v7.2s +; CHECK-NEXT: frintx v6.2s, v6.2s +; CHECK-NEXT: ext v18.16b, v5.16b, v5.16b, #8 +; CHECK-NEXT: ext v21.16b, v4.16b, v4.16b, #8 +; CHECK-NEXT: ext v22.16b, v2.16b, v2.16b, #8 +; CHECK-NEXT: frintx v5.2s, v5.2s +; CHECK-NEXT: ext v23.16b, v3.16b, v3.16b, #8 +; CHECK-NEXT: frintx v4.2s, v4.2s +; CHECK-NEXT: ext v19.16b, v0.16b, v0.16b, #8 +; CHECK-NEXT: ext v20.16b, v1.16b, v1.16b, #8 +; CHECK-NEXT: frintx v16.2s, v16.2s +; CHECK-NEXT: frintx v17.2s, v17.2s +; CHECK-NEXT: fcvtl v7.2d, v7.2s +; CHECK-NEXT: fcvtl v6.2d, v6.2s +; CHECK-NEXT: frintx v18.2s, v18.2s +; CHECK-NEXT: frintx v21.2s, v21.2s +; CHECK-NEXT: frintx v2.2s, v2.2s +; CHECK-NEXT: frintx v3.2s, v3.2s +; CHECK-NEXT: fcvtl v5.2d, v5.2s +; CHECK-NEXT: frintx v23.2s, v23.2s +; CHECK-NEXT: fcvtl v4.2d, v4.2s +; CHECK-NEXT: frintx v1.2s, v1.2s +; CHECK-NEXT: fcvtl v16.2d, v16.2s +; CHECK-NEXT: fcvtl v17.2d, v17.2s +; CHECK-NEXT: fcvtzs v7.2d, v7.2d +; CHECK-NEXT: fcvtzs v6.2d, v6.2d +; CHECK-NEXT: fcvtl v18.2d, v18.2s +; CHECK-NEXT: fcvtl v21.2d, v21.2s +; CHECK-NEXT: frintx v20.2s, v20.2s +; CHECK-NEXT: fcvtl v3.2d, v3.2s +; CHECK-NEXT: fcvtzs v5.2d, v5.2d +; CHECK-NEXT: frintx v0.2s, v0.2s +; CHECK-NEXT: fcvtl v2.2d, v2.2s +; CHECK-NEXT: fcvtzs v4.2d, v4.2d +; CHECK-NEXT: fcvtzs v16.2d, v16.2d +; CHECK-NEXT: fcvtzs v17.2d, v17.2d +; CHECK-NEXT: fcvtl v1.2d, v1.2s +; CHECK-NEXT: fcvtzs v3.2d, v3.2d +; CHECK-NEXT: fcvtl v0.2d, v0.2s +; CHECK-NEXT: fcvtzs v2.2d, v2.2d +; CHECK-NEXT: stp q6, q17, [x8, #192] +; CHECK-NEXT: fcvtl v6.2d, v23.2s +; CHECK-NEXT: frintx v17.2s, v19.2s +; CHECK-NEXT: stp q7, q16, [x8, #224] +; CHECK-NEXT: frintx v7.2s, v22.2s +; CHECK-NEXT: fcvtzs v16.2d, v18.2d +; CHECK-NEXT: fcvtzs v18.2d, v21.2d +; CHECK-NEXT: fcvtzs v1.2d, v1.2d +; CHECK-NEXT: fcvtzs v0.2d, v0.2d +; CHECK-NEXT: fcvtzs v6.2d, v6.2d +; CHECK-NEXT: stp q5, q16, [x8, #160] +; CHECK-NEXT: fcvtl v7.2d, v7.2s +; CHECK-NEXT: fcvtl v5.2d, v20.2s +; CHECK-NEXT: stp q4, q18, [x8, #128] +; CHECK-NEXT: fcvtl v4.2d, v17.2s +; CHECK-NEXT: stp q3, q6, [x8, #96] +; CHECK-NEXT: fcvtzs v7.2d, v7.2d +; CHECK-NEXT: fcvtzs v3.2d, v5.2d +; CHECK-NEXT: stp q1, q3, [x8, #32] +; CHECK-NEXT: stp q2, q7, [x8, #64] +; CHECK-NEXT: fcvtzs v2.2d, v4.2d +; CHECK-NEXT: stp q0, q2, [x8] ; CHECK-NEXT: ret %a = call <32 x i64> @llvm.lrint.v32i64.v32f32(<32 x float> %x) ret <32 x i64> %a @@ -684,13 +608,8 @@ declare <1 x i64> @llvm.lrint.v1i64.v1f64(<1 x double>) define <2 x i64> @lrint_v2f64(<2 x double> %x) { ; CHECK-LABEL: lrint_v2f64: ; CHECK: // %bb.0: -; CHECK-NEXT: mov d1, v0.d[1] -; CHECK-NEXT: frintx d0, d0 -; CHECK-NEXT: frintx d1, d1 -; CHECK-NEXT: fcvtzs x8, d0 -; CHECK-NEXT: fcvtzs x9, d1 -; CHECK-NEXT: fmov d0, x8 -; CHECK-NEXT: mov v0.d[1], x9 +; CHECK-NEXT: frintx v0.2d, v0.2d +; CHECK-NEXT: fcvtzs v0.2d, v0.2d ; CHECK-NEXT: ret %a = call <2 x i64> @llvm.lrint.v2i64.v2f64(<2 x double> %x) ret <2 x i64> %a @@ -700,20 +619,10 @@ declare <2 x i64> @llvm.lrint.v2i64.v2f64(<2 x double>) define <4 x i64> @lrint_v4f64(<4 x double> %x) { ; CHECK-LABEL: lrint_v4f64: ; CHECK: // %bb.0: -; CHECK-NEXT: mov d2, v0.d[1] -; CHECK-NEXT: mov d3, v1.d[1] -; CHECK-NEXT: frintx d0, d0 -; CHECK-NEXT: frintx d1, d1 -; CHECK-NEXT: frintx d2, d2 -; CHECK-NEXT: frintx d3, d3 -; CHECK-NEXT: fcvtzs x8, d0 -; CHECK-NEXT: fcvtzs x9, d1 -; CHECK-NEXT: fcvtzs x10, d2 -; CHECK-NEXT: fcvtzs x11, d3 -; CHECK-NEXT: fmov d0, x8 -; CHECK-NEXT: fmov d1, x9 -; CHECK-NEXT: mov v0.d[1], x10 -; CHECK-NEXT: mov v1.d[1], x11 +; CHECK-NEXT: frintx v0.2d, v0.2d +; CHECK-NEXT: frintx v1.2d, v1.2d +; CHECK-NEXT: fcvtzs v0.2d, v0.2d +; CHECK-NEXT: fcvtzs v1.2d, v1.2d ; CHECK-NEXT: ret %a = call <4 x i64> @llvm.lrint.v4i64.v4f64(<4 x double> %x) ret <4 x i64> %a @@ -723,34 +632,14 @@ declare <4 x i64> @llvm.lrint.v4i64.v4f64(<4 x double>) define <8 x i64> @lrint_v8f64(<8 x double> %x) { ; CHECK-LABEL: lrint_v8f64: ; CHECK: // %bb.0: -; CHECK-NEXT: mov d4, v0.d[1] -; CHECK-NEXT: mov d5, v1.d[1] -; CHECK-NEXT: mov d6, v2.d[1] -; CHECK-NEXT: mov d7, v3.d[1] -; CHECK-NEXT: frintx d0, d0 -; CHECK-NEXT: frintx d1, d1 -; CHECK-NEXT: frintx d2, d2 -; CHECK-NEXT: frintx d3, d3 -; CHECK-NEXT: frintx d4, d4 -; CHECK-NEXT: frintx d5, d5 -; CHECK-NEXT: frintx d6, d6 -; CHECK-NEXT: frintx d7, d7 -; CHECK-NEXT: fcvtzs x8, d0 -; CHECK-NEXT: fcvtzs x9, d1 -; CHECK-NEXT: fcvtzs x10, d2 -; CHECK-NEXT: fcvtzs x11, d3 -; CHECK-NEXT: fcvtzs x12, d4 -; CHECK-NEXT: fcvtzs x13, d5 -; CHECK-NEXT: fcvtzs x14, d6 -; CHECK-NEXT: fcvtzs x15, d7 -; CHECK-NEXT: fmov d0, x8 -; CHECK-NEXT: fmov d1, x9 -; CHECK-NEXT: fmov d2, x10 -; CHECK-NEXT: fmov d3, x11 -; CHECK-NEXT: mov v0.d[1], x12 -; CHECK-NEXT: mov v1.d[1], x13 -; CHECK-NEXT: mov v2.d[1], x14 -; CHECK-NEXT: mov v3.d[1], x15 +; CHECK-NEXT: frintx v0.2d, v0.2d +; CHECK-NEXT: frintx v1.2d, v1.2d +; CHECK-NEXT: frintx v2.2d, v2.2d +; CHECK-NEXT: frintx v3.2d, v3.2d +; CHECK-NEXT: fcvtzs v0.2d, v0.2d +; CHECK-NEXT: fcvtzs v1.2d, v1.2d +; CHECK-NEXT: fcvtzs v2.2d, v2.2d +; CHECK-NEXT: fcvtzs v3.2d, v3.2d ; CHECK-NEXT: ret %a = call <8 x i64> @llvm.lrint.v8i64.v8f64(<8 x double> %x) ret <8 x i64> %a @@ -760,62 +649,22 @@ declare <8 x i64> @llvm.lrint.v8i64.v8f64(<8 x double>) define <16 x i64> @lrint_v16f64(<16 x double> %x) { ; CHECK-LABEL: lrint_v16f64: ; CHECK: // %bb.0: -; CHECK-NEXT: mov d16, v0.d[1] -; CHECK-NEXT: mov d17, v1.d[1] -; CHECK-NEXT: frintx d0, d0 -; CHECK-NEXT: frintx d1, d1 -; CHECK-NEXT: frintx d18, d2 -; CHECK-NEXT: mov d2, v2.d[1] -; CHECK-NEXT: frintx d19, d3 -; CHECK-NEXT: mov d3, v3.d[1] -; CHECK-NEXT: frintx d16, d16 -; CHECK-NEXT: frintx d17, d17 -; CHECK-NEXT: fcvtzs x8, d0 -; CHECK-NEXT: frintx d0, d4 -; CHECK-NEXT: mov d4, v4.d[1] -; CHECK-NEXT: fcvtzs x9, d1 -; CHECK-NEXT: frintx d1, d5 -; CHECK-NEXT: mov d5, v5.d[1] -; CHECK-NEXT: fcvtzs x12, d18 -; CHECK-NEXT: frintx d2, d2 -; CHECK-NEXT: fcvtzs x13, d19 -; CHECK-NEXT: frintx d18, d3 -; CHECK-NEXT: fcvtzs x10, d16 -; CHECK-NEXT: mov d16, v6.d[1] -; CHECK-NEXT: fcvtzs x11, d17 -; CHECK-NEXT: mov d17, v7.d[1] -; CHECK-NEXT: frintx d6, d6 -; CHECK-NEXT: frintx d7, d7 -; CHECK-NEXT: frintx d4, d4 -; CHECK-NEXT: frintx d5, d5 -; CHECK-NEXT: fcvtzs x14, d0 -; CHECK-NEXT: fcvtzs x15, d1 -; CHECK-NEXT: fmov d0, x8 -; CHECK-NEXT: fmov d1, x9 -; CHECK-NEXT: frintx d16, d16 -; CHECK-NEXT: fcvtzs x9, d2 -; CHECK-NEXT: fmov d2, x12 -; CHECK-NEXT: frintx d17, d17 -; CHECK-NEXT: fcvtzs x8, d6 -; CHECK-NEXT: fcvtzs x12, d7 -; CHECK-NEXT: fmov d3, x13 -; CHECK-NEXT: fcvtzs x13, d18 -; CHECK-NEXT: fcvtzs x16, d4 -; CHECK-NEXT: fcvtzs x17, d5 -; CHECK-NEXT: fmov d4, x14 -; CHECK-NEXT: fmov d5, x15 -; CHECK-NEXT: fcvtzs x18, d16 -; CHECK-NEXT: mov v0.d[1], x10 -; CHECK-NEXT: mov v1.d[1], x11 -; CHECK-NEXT: fcvtzs x0, d17 -; CHECK-NEXT: fmov d6, x8 -; CHECK-NEXT: fmov d7, x12 -; CHECK-NEXT: mov v2.d[1], x9 -; CHECK-NEXT: mov v3.d[1], x13 -; CHECK-NEXT: mov v4.d[1], x16 -; CHECK-NEXT: mov v5.d[1], x17 -; CHECK-NEXT: mov v6.d[1], x18 -; CHECK-NEXT: mov v7.d[1], x0 +; CHECK-NEXT: frintx v0.2d, v0.2d +; CHECK-NEXT: frintx v1.2d, v1.2d +; CHECK-NEXT: frintx v2.2d, v2.2d +; CHECK-NEXT: frintx v3.2d, v3.2d +; CHECK-NEXT: frintx v4.2d, v4.2d +; CHECK-NEXT: frintx v5.2d, v5.2d +; CHECK-NEXT: frintx v6.2d, v6.2d +; CHECK-NEXT: frintx v7.2d, v7.2d +; CHECK-NEXT: fcvtzs v0.2d, v0.2d +; CHECK-NEXT: fcvtzs v1.2d, v1.2d +; CHECK-NEXT: fcvtzs v2.2d, v2.2d +; CHECK-NEXT: fcvtzs v3.2d, v3.2d +; CHECK-NEXT: fcvtzs v4.2d, v4.2d +; CHECK-NEXT: fcvtzs v5.2d, v5.2d +; CHECK-NEXT: fcvtzs v6.2d, v6.2d +; CHECK-NEXT: fcvtzs v7.2d, v7.2d ; CHECK-NEXT: ret %a = call <16 x i64> @llvm.lrint.v16i64.v16f64(<16 x double> %x) ret <16 x i64> %a @@ -825,130 +674,50 @@ declare <16 x i64> @llvm.lrint.v16i64.v16f64(<16 x double>) define <32 x i64> @lrint_v32f64(<32 x double> %x) { ; CHECK-LABEL: lrint_v32f64: ; CHECK: // %bb.0: -; CHECK-NEXT: frintx d20, d0 -; CHECK-NEXT: frintx d22, d3 -; CHECK-NEXT: frintx d21, d4 +; CHECK-NEXT: ldp q17, q16, [sp, #96] +; CHECK-NEXT: frintx v7.2d, v7.2d ; CHECK-NEXT: ldp q19, q18, [sp, #64] -; CHECK-NEXT: frintx d23, d5 -; CHECK-NEXT: ldp q27, q26, [sp, #96] -; CHECK-NEXT: mov d4, v4.d[1] -; CHECK-NEXT: ldp q16, q17, [sp, #32] -; CHECK-NEXT: mov d5, v5.d[1] -; CHECK-NEXT: fcvtzs x9, d20 -; CHECK-NEXT: frintx d20, d6 -; CHECK-NEXT: fcvtzs x11, d22 -; CHECK-NEXT: frintx d22, d19 -; CHECK-NEXT: fcvtzs x12, d21 -; CHECK-NEXT: fcvtzs x10, d23 -; CHECK-NEXT: mov d21, v26.d[1] -; CHECK-NEXT: frintx d23, d27 -; CHECK-NEXT: mov d27, v27.d[1] -; CHECK-NEXT: frintx d24, d16 -; CHECK-NEXT: mov d19, v19.d[1] -; CHECK-NEXT: frintx d25, d17 -; CHECK-NEXT: fcvtzs x13, d20 -; CHECK-NEXT: mov d20, v18.d[1] -; CHECK-NEXT: frintx d18, d18 -; CHECK-NEXT: fcvtzs x16, d22 -; CHECK-NEXT: frintx d22, d26 -; CHECK-NEXT: mov d16, v16.d[1] -; CHECK-NEXT: frintx d21, d21 -; CHECK-NEXT: fcvtzs x17, d23 -; CHECK-NEXT: frintx d23, d27 -; CHECK-NEXT: fcvtzs x14, d24 -; CHECK-NEXT: frintx d26, d19 -; CHECK-NEXT: fmov d19, x11 -; CHECK-NEXT: frintx d20, d20 -; CHECK-NEXT: mov d27, v17.d[1] -; CHECK-NEXT: fcvtzs x15, d25 -; CHECK-NEXT: ldp q25, q24, [sp] -; CHECK-NEXT: fcvtzs x11, d22 -; CHECK-NEXT: fmov d17, x12 -; CHECK-NEXT: fcvtzs x12, d21 -; CHECK-NEXT: fcvtzs x0, d23 -; CHECK-NEXT: fmov d23, x14 -; CHECK-NEXT: fcvtzs x14, d18 -; CHECK-NEXT: fmov d18, x17 -; CHECK-NEXT: fcvtzs x17, d20 -; CHECK-NEXT: frintx d21, d7 -; CHECK-NEXT: fcvtzs x18, d26 -; CHECK-NEXT: fmov d20, x11 -; CHECK-NEXT: frintx d22, d25 -; CHECK-NEXT: frintx d26, d27 -; CHECK-NEXT: frintx d16, d16 -; CHECK-NEXT: mov v18.d[1], x0 -; CHECK-NEXT: mov d25, v25.d[1] -; CHECK-NEXT: mov d7, v7.d[1] -; CHECK-NEXT: mov d6, v6.d[1] -; CHECK-NEXT: mov d0, v0.d[1] -; CHECK-NEXT: mov v20.d[1], x12 -; CHECK-NEXT: fcvtzs x11, d21 -; CHECK-NEXT: fmov d21, x15 -; CHECK-NEXT: fcvtzs x12, d22 -; CHECK-NEXT: fmov d22, x16 -; CHECK-NEXT: fcvtzs x15, d26 -; CHECK-NEXT: fmov d26, x14 -; CHECK-NEXT: fcvtzs x14, d16 -; CHECK-NEXT: frintx d25, d25 -; CHECK-NEXT: frintx d7, d7 -; CHECK-NEXT: mov d16, v1.d[1] -; CHECK-NEXT: mov d3, v3.d[1] -; CHECK-NEXT: stp q18, q20, [x8, #224] -; CHECK-NEXT: mov d18, v24.d[1] -; CHECK-NEXT: mov v22.d[1], x18 -; CHECK-NEXT: mov v26.d[1], x17 -; CHECK-NEXT: frintx d24, d24 -; CHECK-NEXT: mov v21.d[1], x15 -; CHECK-NEXT: mov v23.d[1], x14 -; CHECK-NEXT: frintx d20, d2 -; CHECK-NEXT: mov d2, v2.d[1] -; CHECK-NEXT: frintx d6, d6 -; CHECK-NEXT: frintx d5, d5 -; CHECK-NEXT: frintx d4, d4 -; CHECK-NEXT: frintx d18, d18 -; CHECK-NEXT: frintx d1, d1 -; CHECK-NEXT: frintx d3, d3 -; CHECK-NEXT: stp q22, q26, [x8, #192] -; CHECK-NEXT: fmov d22, x10 -; CHECK-NEXT: fcvtzs x10, d24 -; CHECK-NEXT: stp q23, q21, [x8, #160] -; CHECK-NEXT: fmov d21, x11 -; CHECK-NEXT: fmov d24, x13 -; CHECK-NEXT: frintx d2, d2 -; CHECK-NEXT: fcvtzs x13, d6 -; CHECK-NEXT: frintx d6, d16 -; CHECK-NEXT: fcvtzs x11, d18 -; CHECK-NEXT: fmov d18, x12 -; CHECK-NEXT: fcvtzs x12, d25 -; CHECK-NEXT: fmov d23, x10 -; CHECK-NEXT: fcvtzs x10, d7 -; CHECK-NEXT: fcvtzs x14, d5 -; CHECK-NEXT: frintx d0, d0 -; CHECK-NEXT: fcvtzs x15, d3 -; CHECK-NEXT: mov v24.d[1], x13 -; CHECK-NEXT: fcvtzs x13, d2 -; CHECK-NEXT: fmov d2, x9 -; CHECK-NEXT: mov v23.d[1], x11 -; CHECK-NEXT: fcvtzs x11, d4 -; CHECK-NEXT: mov v18.d[1], x12 -; CHECK-NEXT: fcvtzs x12, d20 -; CHECK-NEXT: mov v21.d[1], x10 -; CHECK-NEXT: fcvtzs x10, d1 -; CHECK-NEXT: mov v22.d[1], x14 -; CHECK-NEXT: fcvtzs x14, d6 -; CHECK-NEXT: mov v19.d[1], x15 -; CHECK-NEXT: stp q18, q23, [x8, #128] -; CHECK-NEXT: mov v17.d[1], x11 -; CHECK-NEXT: fcvtzs x11, d0 -; CHECK-NEXT: stp q24, q21, [x8, #96] -; CHECK-NEXT: fmov d0, x12 -; CHECK-NEXT: fmov d1, x10 -; CHECK-NEXT: stp q17, q22, [x8, #64] -; CHECK-NEXT: mov v0.d[1], x13 -; CHECK-NEXT: mov v1.d[1], x14 -; CHECK-NEXT: mov v2.d[1], x11 -; CHECK-NEXT: stp q0, q19, [x8, #32] -; CHECK-NEXT: stp q2, q1, [x8] +; CHECK-NEXT: frintx v6.2d, v6.2d +; CHECK-NEXT: ldp q21, q20, [sp, #32] +; CHECK-NEXT: frintx v5.2d, v5.2d +; CHECK-NEXT: frintx v16.2d, v16.2d +; CHECK-NEXT: frintx v17.2d, v17.2d +; CHECK-NEXT: frintx v4.2d, v4.2d +; CHECK-NEXT: frintx v18.2d, v18.2d +; CHECK-NEXT: frintx v19.2d, v19.2d +; CHECK-NEXT: frintx v3.2d, v3.2d +; CHECK-NEXT: ldp q23, q22, [sp] +; CHECK-NEXT: frintx v20.2d, v20.2d +; CHECK-NEXT: frintx v21.2d, v21.2d +; CHECK-NEXT: frintx v2.2d, v2.2d +; CHECK-NEXT: frintx v1.2d, v1.2d +; CHECK-NEXT: fcvtzs v16.2d, v16.2d +; CHECK-NEXT: fcvtzs v17.2d, v17.2d +; CHECK-NEXT: frintx v0.2d, v0.2d +; CHECK-NEXT: frintx v22.2d, v22.2d +; CHECK-NEXT: fcvtzs v18.2d, v18.2d +; CHECK-NEXT: frintx v23.2d, v23.2d +; CHECK-NEXT: fcvtzs v19.2d, v19.2d +; CHECK-NEXT: fcvtzs v20.2d, v20.2d +; CHECK-NEXT: fcvtzs v7.2d, v7.2d +; CHECK-NEXT: fcvtzs v6.2d, v6.2d +; CHECK-NEXT: fcvtzs v5.2d, v5.2d +; CHECK-NEXT: fcvtzs v4.2d, v4.2d +; CHECK-NEXT: stp q17, q16, [x8, #224] +; CHECK-NEXT: fcvtzs v16.2d, v21.2d +; CHECK-NEXT: fcvtzs v3.2d, v3.2d +; CHECK-NEXT: fcvtzs v17.2d, v22.2d +; CHECK-NEXT: fcvtzs v2.2d, v2.2d +; CHECK-NEXT: fcvtzs v1.2d, v1.2d +; CHECK-NEXT: stp q19, q18, [x8, #192] +; CHECK-NEXT: fcvtzs v18.2d, v23.2d +; CHECK-NEXT: fcvtzs v0.2d, v0.2d +; CHECK-NEXT: stp q4, q5, [x8, #64] +; CHECK-NEXT: stp q6, q7, [x8, #96] +; CHECK-NEXT: stp q2, q3, [x8, #32] +; CHECK-NEXT: stp q0, q1, [x8] +; CHECK-NEXT: stp q18, q17, [x8, #128] +; CHECK-NEXT: stp q16, q20, [x8, #160] ; CHECK-NEXT: ret %a = call <32 x i64> @llvm.lrint.v32i64.v16f64(<32 x double> %x) ret <32 x i64> %a From 1c94c8b36f1c5faa350d5e31ddaf406e215bbbc5 Mon Sep 17 00:00:00 2001 From: Ramkumar Ramachandra Date: Thu, 9 May 2024 19:52:30 +0100 Subject: [PATCH 5/6] ISel/AArch64: address review --- .../Target/AArch64/AArch64ISelLowering.cpp | 30 +- .../AArch64/sve-fixed-vector-llrint.ll | 920 +++++--- .../CodeGen/AArch64/sve-fixed-vector-lrint.ll | 1354 +++++++---- llvm/test/CodeGen/AArch64/sve-llrint.ll | 1857 ++++++++++++--- llvm/test/CodeGen/AArch64/sve-lrint.ll | 1282 ++++++---- llvm/test/CodeGen/AArch64/vector-llrint.ll | 844 ++++--- llvm/test/CodeGen/AArch64/vector-lrint.ll | 2077 +++++++++++------ 7 files changed, 5662 insertions(+), 2702 deletions(-) diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp index dcfe07dc330d6..a4a7e1108df33 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -1305,9 +1305,12 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM, } // LRINT and LLRINT. - for (auto VT : MVT::fp_fixedlen_vector_valuetypes()) { - setOperationAction(ISD::LRINT, VT, Custom); - setOperationAction(ISD::LLRINT, VT, Custom); + for (auto Op : {ISD::LRINT, ISD::LLRINT}) { + for (MVT Ty : {MVT::v2f32, MVT::v4f32, MVT::v2f64}) + setOperationAction(Op, Ty, Custom); + if (Subtarget->hasFullFP16()) + for (MVT Ty : {MVT::v4f16, MVT::v8f16}) + setOperationAction(Op, Ty, Custom); } setTruncStoreAction(MVT::v4i16, MVT::v4i8, Custom); @@ -1425,12 +1428,6 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM, setOperationAction(ISD::OR, VT, Custom); } - // LRINT and LLRINT. - for (auto VT : MVT::fp_scalable_vector_valuetypes()) { - setOperationAction(ISD::LRINT, VT, Custom); - setOperationAction(ISD::LLRINT, VT, Custom); - } - // Illegal unpacked integer vector types. for (auto VT : {MVT::nxv8i8, MVT::nxv4i16, MVT::nxv2i32}) { setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Custom); @@ -1537,6 +1534,8 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM, setOperationAction(ISD::FFLOOR, VT, Custom); setOperationAction(ISD::FNEARBYINT, VT, Custom); setOperationAction(ISD::FRINT, VT, Custom); + setOperationAction(ISD::LRINT, VT, Custom); + setOperationAction(ISD::LLRINT, VT, Custom); setOperationAction(ISD::FROUND, VT, Custom); setOperationAction(ISD::FROUNDEVEN, VT, Custom); setOperationAction(ISD::FTRUNC, VT, Custom); @@ -1677,12 +1676,6 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM, setOperationAction(ISD::MULHU, VT, Custom); } - // LRINT and LLRINT. - for (auto VT : MVT::fp_fixedlen_vector_valuetypes()) { - setOperationAction(ISD::LRINT, VT, Custom); - setOperationAction(ISD::LLRINT, VT, Custom); - } - // Use SVE for vectors with more than 2 elements. for (auto VT : {MVT::v4f16, MVT::v8f16, MVT::v4f32}) setOperationAction(ISD::VECREDUCE_FADD, VT, Custom); @@ -1956,6 +1949,8 @@ void AArch64TargetLowering::addTypeForFixedLengthSVE(MVT VT) { setOperationAction(ISD::FP_TO_SINT, VT, Default); setOperationAction(ISD::FP_TO_UINT, VT, Default); setOperationAction(ISD::FRINT, VT, Default); + setOperationAction(ISD::LRINT, VT, Default); + setOperationAction(ISD::LLRINT, VT, Default); setOperationAction(ISD::FROUND, VT, Default); setOperationAction(ISD::FROUNDEVEN, VT, Default); setOperationAction(ISD::FSQRT, VT, Default); @@ -4394,8 +4389,9 @@ SDValue AArch64TargetLowering::LowerVectorXRINT(SDValue Op, // current rounding mode. SDValue FOp = DAG.getNode(ISD::FRINT, DL, CastVT, Src); - // Truncate the rounded floating point to an integer, rounding to zero. - return DAG.getNode(ISD::FP_TO_SINT, DL, VT, FOp); + // Truncate the rounded floating point to an integer. + return DAG.getNode(ISD::FP_TO_SINT_SAT, DL, VT, FOp, + DAG.getValueType(VT.getVectorElementType())); } SDValue AArch64TargetLowering::LowerVectorINT_TO_FP(SDValue Op, diff --git a/llvm/test/CodeGen/AArch64/sve-fixed-vector-llrint.ll b/llvm/test/CodeGen/AArch64/sve-fixed-vector-llrint.ll index 89ef30e38849f..9137eae269d91 100644 --- a/llvm/test/CodeGen/AArch64/sve-fixed-vector-llrint.ll +++ b/llvm/test/CodeGen/AArch64/sve-fixed-vector-llrint.ll @@ -16,12 +16,14 @@ declare <1 x i64> @llvm.llrint.v1i64.v1f16(<1 x half>) define <2 x i64> @llrint_v1i64_v2f16(<2 x half> %x) { ; CHECK-LABEL: llrint_v1i64_v2f16: ; CHECK: // %bb.0: -; CHECK-NEXT: frintx v0.4h, v0.4h -; CHECK-NEXT: ptrue p0.d, vl4 -; CHECK-NEXT: uunpklo z0.s, z0.h -; CHECK-NEXT: uunpklo z0.d, z0.s -; CHECK-NEXT: fcvtzs z0.d, p0/m, z0.h -; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 +; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 +; CHECK-NEXT: mov h1, v0.h[1] +; CHECK-NEXT: frintx h0, h0 +; CHECK-NEXT: frintx h1, h1 +; CHECK-NEXT: fcvtzs x8, h0 +; CHECK-NEXT: fcvtzs x9, h1 +; CHECK-NEXT: fmov d0, x8 +; CHECK-NEXT: mov v0.d[1], x9 ; CHECK-NEXT: ret %a = call <2 x i64> @llvm.llrint.v2i64.v2f16(<2 x half> %x) ret <2 x i64> %a @@ -32,14 +34,17 @@ define <4 x i64> @llrint_v4i64_v4f16(<4 x half> %x) { ; CHECK-LABEL: llrint_v4i64_v4f16: ; CHECK: // %bb.0: ; CHECK-NEXT: frintx v0.4h, v0.4h -; CHECK-NEXT: ptrue p0.d, vl4 -; CHECK-NEXT: uunpklo z0.s, z0.h -; CHECK-NEXT: uunpklo z0.d, z0.s -; CHECK-NEXT: fcvtzs z0.d, p0/m, z0.h -; CHECK-NEXT: mov z1.d, z0.d -; CHECK-NEXT: ext z1.b, z1.b, z0.b, #16 -; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 -; CHECK-NEXT: // kill: def $q1 killed $q1 killed $z1 +; CHECK-NEXT: mov h1, v0.h[2] +; CHECK-NEXT: mov h2, v0.h[3] +; CHECK-NEXT: mov h3, v0.h[1] +; CHECK-NEXT: fcvtzs x9, h0 +; CHECK-NEXT: fcvtzs x8, h1 +; CHECK-NEXT: fcvtzs x10, h2 +; CHECK-NEXT: fcvtzs x11, h3 +; CHECK-NEXT: fmov d0, x9 +; CHECK-NEXT: fmov d1, x8 +; CHECK-NEXT: mov v0.d[1], x11 +; CHECK-NEXT: mov v1.d[1], x10 ; CHECK-NEXT: ret %a = call <4 x i64> @llvm.llrint.v4i64.v4f16(<4 x half> %x) ret <4 x i64> %a @@ -51,23 +56,29 @@ define <8 x i64> @llrint_v8i64_v8f16(<8 x half> %x) { ; CHECK: // %bb.0: ; CHECK-NEXT: ext v1.16b, v0.16b, v0.16b, #8 ; CHECK-NEXT: frintx v0.4h, v0.4h -; CHECK-NEXT: ptrue p0.d, vl4 ; CHECK-NEXT: frintx v1.4h, v1.4h -; CHECK-NEXT: uunpklo z0.s, z0.h -; CHECK-NEXT: uunpklo z1.s, z1.h -; CHECK-NEXT: uunpklo z0.d, z0.s -; CHECK-NEXT: uunpklo z1.d, z1.s -; CHECK-NEXT: fcvtzs z0.d, p0/m, z0.h -; CHECK-NEXT: movprfx z2, z1 -; CHECK-NEXT: fcvtzs z2.d, p0/m, z1.h -; CHECK-NEXT: mov z1.d, z0.d -; CHECK-NEXT: ext z1.b, z1.b, z0.b, #16 -; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 -; CHECK-NEXT: mov z3.d, z2.d -; CHECK-NEXT: // kill: def $q1 killed $q1 killed $z1 -; CHECK-NEXT: ext z3.b, z3.b, z2.b, #16 -; CHECK-NEXT: // kill: def $q2 killed $q2 killed $z2 -; CHECK-NEXT: // kill: def $q3 killed $q3 killed $z3 +; CHECK-NEXT: mov h4, v0.h[2] +; CHECK-NEXT: mov h2, v0.h[1] +; CHECK-NEXT: mov h7, v0.h[3] +; CHECK-NEXT: fcvtzs x8, h0 +; CHECK-NEXT: mov h3, v1.h[2] +; CHECK-NEXT: mov h5, v1.h[3] +; CHECK-NEXT: mov h6, v1.h[1] +; CHECK-NEXT: fcvtzs x11, h1 +; CHECK-NEXT: fcvtzs x12, h4 +; CHECK-NEXT: fcvtzs x9, h2 +; CHECK-NEXT: fcvtzs x15, h7 +; CHECK-NEXT: fmov d0, x8 +; CHECK-NEXT: fcvtzs x10, h3 +; CHECK-NEXT: fcvtzs x13, h5 +; CHECK-NEXT: fcvtzs x14, h6 +; CHECK-NEXT: fmov d1, x12 +; CHECK-NEXT: fmov d2, x11 +; CHECK-NEXT: mov v0.d[1], x9 +; CHECK-NEXT: fmov d3, x10 +; CHECK-NEXT: mov v1.d[1], x15 +; CHECK-NEXT: mov v2.d[1], x14 +; CHECK-NEXT: mov v3.d[1], x13 ; CHECK-NEXT: ret %a = call <8 x i64> @llvm.llrint.v8i64.v8f16(<8 x half> %x) ret <8 x i64> %a @@ -77,43 +88,56 @@ declare <8 x i64> @llvm.llrint.v8i64.v8f16(<8 x half>) define <16 x i64> @llrint_v16i64_v16f16(<16 x half> %x) { ; CHECK-LABEL: llrint_v16i64_v16f16: ; CHECK: // %bb.0: -; CHECK-NEXT: ext v2.16b, v0.16b, v0.16b, #8 -; CHECK-NEXT: ext v3.16b, v1.16b, v1.16b, #8 -; CHECK-NEXT: frintx v0.4h, v0.4h +; CHECK-NEXT: ext v2.16b, v1.16b, v1.16b, #8 ; CHECK-NEXT: frintx v1.4h, v1.4h -; CHECK-NEXT: ptrue p0.d, vl4 +; CHECK-NEXT: frintx v3.4h, v0.4h +; CHECK-NEXT: ext v0.16b, v0.16b, v0.16b, #8 ; CHECK-NEXT: frintx v2.4h, v2.4h -; CHECK-NEXT: frintx v3.4h, v3.4h -; CHECK-NEXT: uunpklo z0.s, z0.h -; CHECK-NEXT: uunpklo z1.s, z1.h -; CHECK-NEXT: uunpklo z2.s, z2.h -; CHECK-NEXT: uunpklo z3.s, z3.h -; CHECK-NEXT: uunpklo z0.d, z0.s -; CHECK-NEXT: uunpklo z1.d, z1.s -; CHECK-NEXT: uunpklo z2.d, z2.s -; CHECK-NEXT: uunpklo z3.d, z3.s -; CHECK-NEXT: fcvtzs z0.d, p0/m, z0.h -; CHECK-NEXT: movprfx z4, z1 -; CHECK-NEXT: fcvtzs z4.d, p0/m, z1.h -; CHECK-NEXT: fcvtzs z2.d, p0/m, z2.h -; CHECK-NEXT: movprfx z6, z3 -; CHECK-NEXT: fcvtzs z6.d, p0/m, z3.h -; CHECK-NEXT: mov z1.d, z0.d -; CHECK-NEXT: mov z5.d, z4.d -; CHECK-NEXT: ext z1.b, z1.b, z0.b, #16 -; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 -; CHECK-NEXT: mov z3.d, z2.d -; CHECK-NEXT: mov z7.d, z6.d -; CHECK-NEXT: ext z5.b, z5.b, z4.b, #16 -; CHECK-NEXT: // kill: def $q4 killed $q4 killed $z4 -; CHECK-NEXT: // kill: def $q1 killed $q1 killed $z1 -; CHECK-NEXT: // kill: def $q5 killed $q5 killed $z5 -; CHECK-NEXT: ext z3.b, z3.b, z2.b, #16 -; CHECK-NEXT: ext z7.b, z7.b, z6.b, #16 -; CHECK-NEXT: // kill: def $q2 killed $q2 killed $z2 -; CHECK-NEXT: // kill: def $q6 killed $q6 killed $z6 -; CHECK-NEXT: // kill: def $q3 killed $q3 killed $z3 -; CHECK-NEXT: // kill: def $q7 killed $q7 killed $z7 +; CHECK-NEXT: mov h4, v1.h[2] +; CHECK-NEXT: mov h5, v3.h[2] +; CHECK-NEXT: frintx v0.4h, v0.4h +; CHECK-NEXT: mov h6, v3.h[1] +; CHECK-NEXT: fcvtzs x9, h3 +; CHECK-NEXT: mov h16, v1.h[1] +; CHECK-NEXT: fcvtzs x12, h1 +; CHECK-NEXT: mov h3, v3.h[3] +; CHECK-NEXT: mov h17, v1.h[3] +; CHECK-NEXT: mov h7, v2.h[3] +; CHECK-NEXT: fcvtzs x8, h4 +; CHECK-NEXT: fcvtzs x10, h5 +; CHECK-NEXT: mov h4, v2.h[2] +; CHECK-NEXT: mov h5, v0.h[2] +; CHECK-NEXT: fcvtzs x11, h6 +; CHECK-NEXT: mov h6, v0.h[3] +; CHECK-NEXT: fcvtzs x15, h2 +; CHECK-NEXT: mov h2, v2.h[1] +; CHECK-NEXT: fcvtzs x14, h0 +; CHECK-NEXT: fcvtzs x17, h3 +; CHECK-NEXT: fcvtzs x0, h17 +; CHECK-NEXT: fcvtzs x13, h7 +; CHECK-NEXT: mov h7, v0.h[1] +; CHECK-NEXT: fmov d0, x9 +; CHECK-NEXT: fcvtzs x16, h4 +; CHECK-NEXT: fcvtzs x9, h5 +; CHECK-NEXT: fmov d4, x12 +; CHECK-NEXT: fcvtzs x12, h16 +; CHECK-NEXT: fmov d1, x10 +; CHECK-NEXT: fcvtzs x10, h6 +; CHECK-NEXT: fmov d5, x8 +; CHECK-NEXT: fcvtzs x8, h2 +; CHECK-NEXT: fmov d2, x14 +; CHECK-NEXT: fcvtzs x18, h7 +; CHECK-NEXT: fmov d6, x15 +; CHECK-NEXT: mov v0.d[1], x11 +; CHECK-NEXT: fmov d3, x9 +; CHECK-NEXT: fmov d7, x16 +; CHECK-NEXT: mov v1.d[1], x17 +; CHECK-NEXT: mov v4.d[1], x12 +; CHECK-NEXT: mov v5.d[1], x0 +; CHECK-NEXT: mov v6.d[1], x8 +; CHECK-NEXT: mov v2.d[1], x18 +; CHECK-NEXT: mov v3.d[1], x10 +; CHECK-NEXT: mov v7.d[1], x13 ; CHECK-NEXT: ret %a = call <16 x i64> @llvm.llrint.v16i64.v16f16(<16 x half> %x) ret <16 x i64> %a @@ -123,61 +147,131 @@ declare <16 x i64> @llvm.llrint.v16i64.v16f16(<16 x half>) define <32 x i64> @llrint_v32i64_v32f16(<32 x half> %x) { ; CHECK-LABEL: llrint_v32i64_v32f16: ; CHECK: // %bb.0: -; CHECK-NEXT: ext v4.16b, v3.16b, v3.16b, #8 -; CHECK-NEXT: ext v5.16b, v2.16b, v2.16b, #8 -; CHECK-NEXT: mov x9, #24 // =0x18 -; CHECK-NEXT: frintx v3.4h, v3.4h -; CHECK-NEXT: ext v6.16b, v1.16b, v1.16b, #8 -; CHECK-NEXT: frintx v2.4h, v2.4h -; CHECK-NEXT: ext v7.16b, v0.16b, v0.16b, #8 +; CHECK-NEXT: stp x29, x30, [sp, #-16]! // 16-byte Folded Spill +; CHECK-NEXT: sub x9, sp, #272 +; CHECK-NEXT: mov x29, sp +; CHECK-NEXT: and sp, x9, #0xffffffffffffffe0 +; CHECK-NEXT: .cfi_def_cfa w29, 16 +; CHECK-NEXT: .cfi_offset w30, -8 +; CHECK-NEXT: .cfi_offset w29, -16 +; CHECK-NEXT: frintx v5.4h, v0.4h +; CHECK-NEXT: ext v0.16b, v0.16b, v0.16b, #8 +; CHECK-NEXT: ext v4.16b, v1.16b, v1.16b, #8 +; CHECK-NEXT: ext v17.16b, v2.16b, v2.16b, #8 ; CHECK-NEXT: frintx v1.4h, v1.4h -; CHECK-NEXT: frintx v0.4h, v0.4h +; CHECK-NEXT: frintx v2.4h, v2.4h ; CHECK-NEXT: ptrue p0.d, vl4 +; CHECK-NEXT: mov h6, v5.h[3] +; CHECK-NEXT: frintx v0.4h, v0.4h +; CHECK-NEXT: mov h7, v5.h[2] +; CHECK-NEXT: mov h16, v5.h[1] ; CHECK-NEXT: frintx v4.4h, v4.4h -; CHECK-NEXT: frintx v5.4h, v5.4h -; CHECK-NEXT: uunpklo z3.s, z3.h -; CHECK-NEXT: frintx v6.4h, v6.4h -; CHECK-NEXT: uunpklo z2.s, z2.h -; CHECK-NEXT: frintx v7.4h, v7.4h -; CHECK-NEXT: uunpklo z1.s, z1.h -; CHECK-NEXT: uunpklo z0.s, z0.h -; CHECK-NEXT: uunpklo z4.s, z4.h -; CHECK-NEXT: uunpklo z5.s, z5.h -; CHECK-NEXT: uunpklo z3.d, z3.s -; CHECK-NEXT: uunpklo z6.s, z6.h -; CHECK-NEXT: uunpklo z2.d, z2.s -; CHECK-NEXT: uunpklo z7.s, z7.h -; CHECK-NEXT: uunpklo z1.d, z1.s -; CHECK-NEXT: uunpklo z0.d, z0.s -; CHECK-NEXT: uunpklo z4.d, z4.s -; CHECK-NEXT: uunpklo z5.d, z5.s -; CHECK-NEXT: fcvtzs z3.d, p0/m, z3.h -; CHECK-NEXT: uunpklo z6.d, z6.s -; CHECK-NEXT: fcvtzs z2.d, p0/m, z2.h -; CHECK-NEXT: uunpklo z7.d, z7.s -; CHECK-NEXT: fcvtzs z1.d, p0/m, z1.h -; CHECK-NEXT: fcvtzs z0.d, p0/m, z0.h -; CHECK-NEXT: fcvtzs z4.d, p0/m, z4.h -; CHECK-NEXT: st1d { z3.d }, p0, [x8, x9, lsl #3] +; CHECK-NEXT: fcvtzs x12, h5 +; CHECK-NEXT: ext v5.16b, v3.16b, v3.16b, #8 +; CHECK-NEXT: frintx v17.4h, v17.4h +; CHECK-NEXT: frintx v3.4h, v3.4h +; CHECK-NEXT: fcvtzs x9, h6 +; CHECK-NEXT: mov h6, v0.h[3] +; CHECK-NEXT: fcvtzs x10, h7 +; CHECK-NEXT: mov h7, v0.h[2] +; CHECK-NEXT: fcvtzs x11, h16 +; CHECK-NEXT: mov h16, v0.h[1] +; CHECK-NEXT: fcvtzs x13, h6 +; CHECK-NEXT: mov h6, v4.h[3] +; CHECK-NEXT: stp x10, x9, [sp, #48] +; CHECK-NEXT: fcvtzs x9, h7 +; CHECK-NEXT: mov h7, v4.h[2] +; CHECK-NEXT: fcvtzs x10, h16 +; CHECK-NEXT: mov h16, v4.h[1] +; CHECK-NEXT: stp x12, x11, [sp, #32] +; CHECK-NEXT: fcvtzs x11, h0 +; CHECK-NEXT: frintx v0.4h, v5.4h +; CHECK-NEXT: mov h5, v17.h[3] +; CHECK-NEXT: fcvtzs x12, h6 +; CHECK-NEXT: mov h6, v17.h[2] +; CHECK-NEXT: stp x9, x13, [sp, #16] +; CHECK-NEXT: fcvtzs x13, h7 +; CHECK-NEXT: mov h7, v17.h[1] +; CHECK-NEXT: fcvtzs x9, h16 +; CHECK-NEXT: stp x11, x10, [sp] +; CHECK-NEXT: fcvtzs x10, h4 +; CHECK-NEXT: fcvtzs x11, h5 +; CHECK-NEXT: mov h4, v0.h[3] +; CHECK-NEXT: mov h5, v0.h[2] +; CHECK-NEXT: stp x13, x12, [sp, #80] +; CHECK-NEXT: fcvtzs x12, h6 +; CHECK-NEXT: fcvtzs x13, h7 +; CHECK-NEXT: mov h6, v0.h[1] +; CHECK-NEXT: stp x10, x9, [sp, #64] +; CHECK-NEXT: fcvtzs x9, h17 +; CHECK-NEXT: mov h7, v1.h[3] +; CHECK-NEXT: fcvtzs x10, h4 +; CHECK-NEXT: mov h4, v1.h[2] +; CHECK-NEXT: stp x12, x11, [sp, #144] +; CHECK-NEXT: fcvtzs x11, h5 +; CHECK-NEXT: mov h5, v1.h[1] +; CHECK-NEXT: fcvtzs x12, h6 +; CHECK-NEXT: stp x9, x13, [sp, #128] +; CHECK-NEXT: fcvtzs x9, h0 +; CHECK-NEXT: fcvtzs x13, h7 +; CHECK-NEXT: mov h0, v2.h[3] +; CHECK-NEXT: stp x11, x10, [sp, #208] +; CHECK-NEXT: fcvtzs x10, h4 +; CHECK-NEXT: mov h4, v2.h[2] +; CHECK-NEXT: fcvtzs x11, h5 +; CHECK-NEXT: mov h5, v2.h[1] +; CHECK-NEXT: stp x9, x12, [sp, #192] +; CHECK-NEXT: fcvtzs x9, h1 +; CHECK-NEXT: fcvtzs x12, h0 +; CHECK-NEXT: mov h0, v3.h[3] +; CHECK-NEXT: mov h1, v3.h[2] +; CHECK-NEXT: stp x10, x13, [sp, #112] +; CHECK-NEXT: fcvtzs x10, h4 +; CHECK-NEXT: mov h4, v3.h[1] +; CHECK-NEXT: fcvtzs x13, h5 +; CHECK-NEXT: stp x9, x11, [sp, #96] +; CHECK-NEXT: fcvtzs x9, h2 +; CHECK-NEXT: fcvtzs x11, h0 +; CHECK-NEXT: stp x10, x12, [sp, #176] +; CHECK-NEXT: fcvtzs x10, h1 +; CHECK-NEXT: fcvtzs x12, h4 +; CHECK-NEXT: stp x9, x13, [sp, #160] +; CHECK-NEXT: fcvtzs x9, h3 +; CHECK-NEXT: stp x10, x11, [sp, #240] +; CHECK-NEXT: add x10, sp, #64 +; CHECK-NEXT: stp x9, x12, [sp, #224] +; CHECK-NEXT: add x9, sp, #32 +; CHECK-NEXT: ld1d { z0.d }, p0/z, [x9] +; CHECK-NEXT: mov x9, sp +; CHECK-NEXT: ld1d { z2.d }, p0/z, [x10] +; CHECK-NEXT: ld1d { z1.d }, p0/z, [x9] +; CHECK-NEXT: add x9, sp, #224 +; CHECK-NEXT: add x10, sp, #128 +; CHECK-NEXT: ld1d { z3.d }, p0/z, [x9] +; CHECK-NEXT: add x9, sp, #160 +; CHECK-NEXT: ld1d { z4.d }, p0/z, [x10] +; CHECK-NEXT: add x10, sp, #96 +; CHECK-NEXT: ld1d { z5.d }, p0/z, [x9] +; CHECK-NEXT: add x9, sp, #192 +; CHECK-NEXT: ld1d { z6.d }, p0/z, [x10] +; CHECK-NEXT: mov x10, #24 // =0x18 +; CHECK-NEXT: ld1d { z7.d }, p0/z, [x9] ; CHECK-NEXT: mov x9, #16 // =0x10 -; CHECK-NEXT: movprfx z3, z5 -; CHECK-NEXT: fcvtzs z3.d, p0/m, z5.h -; CHECK-NEXT: st1d { z2.d }, p0, [x8, x9, lsl #3] +; CHECK-NEXT: st1d { z3.d }, p0, [x8, x10, lsl #3] +; CHECK-NEXT: st1d { z5.d }, p0, [x8, x9, lsl #3] ; CHECK-NEXT: mov x9, #8 // =0x8 -; CHECK-NEXT: movprfx z2, z6 -; CHECK-NEXT: fcvtzs z2.d, p0/m, z6.h -; CHECK-NEXT: st1d { z1.d }, p0, [x8, x9, lsl #3] +; CHECK-NEXT: st1d { z6.d }, p0, [x8, x9, lsl #3] ; CHECK-NEXT: mov x9, #28 // =0x1c -; CHECK-NEXT: movprfx z1, z7 -; CHECK-NEXT: fcvtzs z1.d, p0/m, z7.h -; CHECK-NEXT: st1d { z4.d }, p0, [x8, x9, lsl #3] +; CHECK-NEXT: st1d { z7.d }, p0, [x8, x9, lsl #3] ; CHECK-NEXT: mov x9, #20 // =0x14 -; CHECK-NEXT: st1d { z3.d }, p0, [x8, x9, lsl #3] +; CHECK-NEXT: st1d { z4.d }, p0, [x8, x9, lsl #3] ; CHECK-NEXT: mov x9, #12 // =0xc ; CHECK-NEXT: st1d { z2.d }, p0, [x8, x9, lsl #3] ; CHECK-NEXT: mov x9, #4 // =0x4 ; CHECK-NEXT: st1d { z1.d }, p0, [x8, x9, lsl #3] ; CHECK-NEXT: st1d { z0.d }, p0, [x8] +; CHECK-NEXT: mov sp, x29 +; CHECK-NEXT: ldp x29, x30, [sp], #16 // 16-byte Folded Reload ; CHECK-NEXT: ret %a = call <32 x i64> @llvm.llrint.v32i64.v32f16(<32 x half> %x) ret <32 x i64> %a @@ -187,10 +281,10 @@ declare <32 x i64> @llvm.llrint.v32i64.v32f16(<32 x half>) define <1 x i64> @llrint_v1i64_v1f32(<1 x float> %x) { ; CHECK-LABEL: llrint_v1i64_v1f32: ; CHECK: // %bb.0: -; CHECK-NEXT: frintx v0.2s, v0.2s -; CHECK-NEXT: fcvtl v0.2d, v0.2s -; CHECK-NEXT: fcvtzs v0.2d, v0.2d -; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0 +; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 +; CHECK-NEXT: frintx s0, s0 +; CHECK-NEXT: fcvtzs x8, s0 +; CHECK-NEXT: fmov d0, x8 ; CHECK-NEXT: ret %a = call <1 x i64> @llvm.llrint.v1i64.v1f32(<1 x float> %x) ret <1 x i64> %a @@ -201,8 +295,11 @@ define <2 x i64> @llrint_v2i64_v2f32(<2 x float> %x) { ; CHECK-LABEL: llrint_v2i64_v2f32: ; CHECK: // %bb.0: ; CHECK-NEXT: frintx v0.2s, v0.2s -; CHECK-NEXT: fcvtl v0.2d, v0.2s -; CHECK-NEXT: fcvtzs v0.2d, v0.2d +; CHECK-NEXT: mov s1, v0.s[1] +; CHECK-NEXT: fcvtzs x8, s0 +; CHECK-NEXT: fcvtzs x9, s1 +; CHECK-NEXT: fmov d0, x8 +; CHECK-NEXT: mov v0.d[1], x9 ; CHECK-NEXT: ret %a = call <2 x i64> @llvm.llrint.v2i64.v2f32(<2 x float> %x) ret <2 x i64> %a @@ -213,13 +310,17 @@ define <4 x i64> @llrint_v4i64_v4f32(<4 x float> %x) { ; CHECK-LABEL: llrint_v4i64_v4f32: ; CHECK: // %bb.0: ; CHECK-NEXT: frintx v0.4s, v0.4s -; CHECK-NEXT: ptrue p0.d, vl4 -; CHECK-NEXT: uunpklo z0.d, z0.s -; CHECK-NEXT: fcvtzs z0.d, p0/m, z0.s -; CHECK-NEXT: mov z1.d, z0.d -; CHECK-NEXT: ext z1.b, z1.b, z0.b, #16 -; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 -; CHECK-NEXT: // kill: def $q1 killed $q1 killed $z1 +; CHECK-NEXT: mov s1, v0.s[2] +; CHECK-NEXT: mov s2, v0.s[3] +; CHECK-NEXT: mov s3, v0.s[1] +; CHECK-NEXT: fcvtzs x9, s0 +; CHECK-NEXT: fcvtzs x8, s1 +; CHECK-NEXT: fcvtzs x10, s2 +; CHECK-NEXT: fcvtzs x11, s3 +; CHECK-NEXT: fmov d0, x9 +; CHECK-NEXT: fmov d1, x8 +; CHECK-NEXT: mov v0.d[1], x11 +; CHECK-NEXT: mov v1.d[1], x10 ; CHECK-NEXT: ret %a = call <4 x i64> @llvm.llrint.v4i64.v4f32(<4 x float> %x) ret <4 x i64> %a @@ -231,20 +332,28 @@ define <8 x i64> @llrint_v8i64_v8f32(<8 x float> %x) { ; CHECK: // %bb.0: ; CHECK-NEXT: frintx v0.4s, v0.4s ; CHECK-NEXT: frintx v1.4s, v1.4s -; CHECK-NEXT: ptrue p0.d, vl4 -; CHECK-NEXT: uunpklo z0.d, z0.s -; CHECK-NEXT: uunpklo z1.d, z1.s -; CHECK-NEXT: fcvtzs z0.d, p0/m, z0.s -; CHECK-NEXT: movprfx z2, z1 -; CHECK-NEXT: fcvtzs z2.d, p0/m, z1.s -; CHECK-NEXT: mov z1.d, z0.d -; CHECK-NEXT: mov z3.d, z2.d -; CHECK-NEXT: ext z1.b, z1.b, z0.b, #16 -; CHECK-NEXT: ext z3.b, z3.b, z2.b, #16 -; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 -; CHECK-NEXT: // kill: def $q2 killed $q2 killed $z2 -; CHECK-NEXT: // kill: def $q1 killed $q1 killed $z1 -; CHECK-NEXT: // kill: def $q3 killed $q3 killed $z3 +; CHECK-NEXT: mov s3, v1.s[2] +; CHECK-NEXT: mov s4, v0.s[2] +; CHECK-NEXT: mov s2, v0.s[1] +; CHECK-NEXT: mov s5, v1.s[3] +; CHECK-NEXT: mov s6, v1.s[1] +; CHECK-NEXT: mov s7, v0.s[3] +; CHECK-NEXT: fcvtzs x8, s0 +; CHECK-NEXT: fcvtzs x10, s1 +; CHECK-NEXT: fcvtzs x11, s3 +; CHECK-NEXT: fcvtzs x12, s4 +; CHECK-NEXT: fcvtzs x9, s2 +; CHECK-NEXT: fcvtzs x13, s5 +; CHECK-NEXT: fcvtzs x14, s6 +; CHECK-NEXT: fcvtzs x15, s7 +; CHECK-NEXT: fmov d0, x8 +; CHECK-NEXT: fmov d2, x10 +; CHECK-NEXT: fmov d1, x12 +; CHECK-NEXT: fmov d3, x11 +; CHECK-NEXT: mov v0.d[1], x9 +; CHECK-NEXT: mov v2.d[1], x14 +; CHECK-NEXT: mov v1.d[1], x15 +; CHECK-NEXT: mov v3.d[1], x13 ; CHECK-NEXT: ret %a = call <8 x i64> @llvm.llrint.v8i64.v8f32(<8 x float> %x) ret <8 x i64> %a @@ -254,37 +363,54 @@ declare <8 x i64> @llvm.llrint.v8i64.v8f32(<8 x float>) define <16 x i64> @llrint_v16i64_v16f32(<16 x float> %x) { ; CHECK-LABEL: llrint_v16i64_v16f32: ; CHECK: // %bb.0: +; CHECK-NEXT: frintx v3.4s, v3.4s +; CHECK-NEXT: frintx v2.4s, v2.4s ; CHECK-NEXT: frintx v1.4s, v1.4s ; CHECK-NEXT: frintx v0.4s, v0.4s -; CHECK-NEXT: frintx v2.4s, v2.4s -; CHECK-NEXT: frintx v3.4s, v3.4s -; CHECK-NEXT: ptrue p0.d, vl4 -; CHECK-NEXT: uunpklo z0.d, z0.s -; CHECK-NEXT: uunpklo z1.d, z1.s -; CHECK-NEXT: uunpklo z4.d, z2.s -; CHECK-NEXT: uunpklo z3.d, z3.s -; CHECK-NEXT: fcvtzs z0.d, p0/m, z0.s -; CHECK-NEXT: movprfx z2, z1 -; CHECK-NEXT: fcvtzs z2.d, p0/m, z1.s -; CHECK-NEXT: fcvtzs z4.d, p0/m, z4.s -; CHECK-NEXT: movprfx z6, z3 -; CHECK-NEXT: fcvtzs z6.d, p0/m, z3.s -; CHECK-NEXT: mov z1.d, z0.d -; CHECK-NEXT: mov z3.d, z2.d -; CHECK-NEXT: mov z5.d, z4.d -; CHECK-NEXT: mov z7.d, z6.d -; CHECK-NEXT: ext z1.b, z1.b, z0.b, #16 -; CHECK-NEXT: ext z3.b, z3.b, z2.b, #16 -; CHECK-NEXT: ext z5.b, z5.b, z4.b, #16 -; CHECK-NEXT: ext z7.b, z7.b, z6.b, #16 -; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 -; CHECK-NEXT: // kill: def $q2 killed $q2 killed $z2 -; CHECK-NEXT: // kill: def $q4 killed $q4 killed $z4 -; CHECK-NEXT: // kill: def $q6 killed $q6 killed $z6 -; CHECK-NEXT: // kill: def $q1 killed $q1 killed $z1 -; CHECK-NEXT: // kill: def $q3 killed $q3 killed $z3 -; CHECK-NEXT: // kill: def $q5 killed $q5 killed $z5 -; CHECK-NEXT: // kill: def $q7 killed $q7 killed $z7 +; CHECK-NEXT: mov s4, v3.s[2] +; CHECK-NEXT: mov s5, v2.s[2] +; CHECK-NEXT: mov s6, v1.s[2] +; CHECK-NEXT: mov s7, v0.s[2] +; CHECK-NEXT: fcvtzs x10, s1 +; CHECK-NEXT: fcvtzs x11, s0 +; CHECK-NEXT: mov s16, v0.s[1] +; CHECK-NEXT: mov s17, v1.s[1] +; CHECK-NEXT: mov s18, v3.s[1] +; CHECK-NEXT: fcvtzs x14, s3 +; CHECK-NEXT: fcvtzs x16, s2 +; CHECK-NEXT: fcvtzs x8, s4 +; CHECK-NEXT: mov s4, v2.s[1] +; CHECK-NEXT: fcvtzs x9, s5 +; CHECK-NEXT: mov s5, v1.s[3] +; CHECK-NEXT: fcvtzs x12, s6 +; CHECK-NEXT: mov s6, v0.s[3] +; CHECK-NEXT: fcvtzs x13, s7 +; CHECK-NEXT: mov s7, v3.s[3] +; CHECK-NEXT: fmov d0, x11 +; CHECK-NEXT: fcvtzs x17, s16 +; CHECK-NEXT: fcvtzs x18, s18 +; CHECK-NEXT: fcvtzs x15, s4 +; CHECK-NEXT: mov s4, v2.s[3] +; CHECK-NEXT: fmov d2, x10 +; CHECK-NEXT: fcvtzs x11, s5 +; CHECK-NEXT: fcvtzs x10, s6 +; CHECK-NEXT: fmov d3, x12 +; CHECK-NEXT: fmov d1, x13 +; CHECK-NEXT: fcvtzs x12, s17 +; CHECK-NEXT: fcvtzs x13, s7 +; CHECK-NEXT: fmov d5, x9 +; CHECK-NEXT: fmov d6, x14 +; CHECK-NEXT: fmov d7, x8 +; CHECK-NEXT: fcvtzs x0, s4 +; CHECK-NEXT: fmov d4, x16 +; CHECK-NEXT: mov v0.d[1], x17 +; CHECK-NEXT: mov v1.d[1], x10 +; CHECK-NEXT: mov v3.d[1], x11 +; CHECK-NEXT: mov v2.d[1], x12 +; CHECK-NEXT: mov v6.d[1], x18 +; CHECK-NEXT: mov v7.d[1], x13 +; CHECK-NEXT: mov v4.d[1], x15 +; CHECK-NEXT: mov v5.d[1], x0 ; CHECK-NEXT: ret %a = call <16 x i64> @llvm.llrint.v16i64.v16f32(<16 x float> %x) ret <16 x i64> %a @@ -294,46 +420,127 @@ declare <16 x i64> @llvm.llrint.v16i64.v16f32(<16 x float>) define <32 x i64> @llrint_v32i64_v32f32(<32 x float> %x) { ; CHECK-LABEL: llrint_v32i64_v32f32: ; CHECK: // %bb.0: -; CHECK-NEXT: frintx v7.4s, v7.4s -; CHECK-NEXT: frintx v6.4s, v6.4s -; CHECK-NEXT: mov x9, #28 // =0x1c -; CHECK-NEXT: frintx v5.4s, v5.4s -; CHECK-NEXT: frintx v4.4s, v4.4s -; CHECK-NEXT: frintx v3.4s, v3.4s +; CHECK-NEXT: stp x29, x30, [sp, #-16]! // 16-byte Folded Spill +; CHECK-NEXT: sub x9, sp, #272 +; CHECK-NEXT: mov x29, sp +; CHECK-NEXT: and sp, x9, #0xffffffffffffffe0 +; CHECK-NEXT: .cfi_def_cfa w29, 16 +; CHECK-NEXT: .cfi_offset w30, -8 +; CHECK-NEXT: .cfi_offset w29, -16 +; CHECK-NEXT: frintx v0.4s, v0.4s +; CHECK-NEXT: frintx v1.4s, v1.4s ; CHECK-NEXT: frintx v2.4s, v2.4s ; CHECK-NEXT: ptrue p0.d, vl4 -; CHECK-NEXT: frintx v1.4s, v1.4s -; CHECK-NEXT: frintx v0.4s, v0.4s -; CHECK-NEXT: uunpklo z7.d, z7.s -; CHECK-NEXT: uunpklo z6.d, z6.s -; CHECK-NEXT: uunpklo z5.d, z5.s -; CHECK-NEXT: uunpklo z4.d, z4.s -; CHECK-NEXT: uunpklo z3.d, z3.s -; CHECK-NEXT: uunpklo z2.d, z2.s -; CHECK-NEXT: uunpklo z1.d, z1.s -; CHECK-NEXT: uunpklo z0.d, z0.s -; CHECK-NEXT: fcvtzs z7.d, p0/m, z7.s -; CHECK-NEXT: fcvtzs z6.d, p0/m, z6.s -; CHECK-NEXT: fcvtzs z5.d, p0/m, z5.s -; CHECK-NEXT: fcvtzs z4.d, p0/m, z4.s -; CHECK-NEXT: fcvtzs z3.d, p0/m, z3.s -; CHECK-NEXT: fcvtzs z2.d, p0/m, z2.s -; CHECK-NEXT: fcvtzs z1.d, p0/m, z1.s -; CHECK-NEXT: fcvtzs z0.d, p0/m, z0.s -; CHECK-NEXT: st1d { z7.d }, p0, [x8, x9, lsl #3] +; CHECK-NEXT: mov s16, v0.s[3] +; CHECK-NEXT: mov s17, v0.s[2] +; CHECK-NEXT: mov s18, v0.s[1] +; CHECK-NEXT: fcvtzs x12, s0 +; CHECK-NEXT: frintx v0.4s, v3.4s +; CHECK-NEXT: mov s3, v2.s[3] +; CHECK-NEXT: fcvtzs x9, s16 +; CHECK-NEXT: mov s16, v1.s[3] +; CHECK-NEXT: fcvtzs x10, s17 +; CHECK-NEXT: mov s17, v1.s[2] +; CHECK-NEXT: fcvtzs x11, s18 +; CHECK-NEXT: mov s18, v1.s[1] +; CHECK-NEXT: fcvtzs x13, s16 +; CHECK-NEXT: stp x10, x9, [sp, #16] +; CHECK-NEXT: mov s16, v2.s[2] +; CHECK-NEXT: fcvtzs x9, s17 +; CHECK-NEXT: fcvtzs x10, s18 +; CHECK-NEXT: mov s17, v2.s[1] +; CHECK-NEXT: stp x12, x11, [sp] +; CHECK-NEXT: fcvtzs x11, s1 +; CHECK-NEXT: frintx v1.4s, v4.4s +; CHECK-NEXT: fcvtzs x12, s3 +; CHECK-NEXT: mov s3, v0.s[3] +; CHECK-NEXT: mov s4, v0.s[2] +; CHECK-NEXT: stp x9, x13, [sp, #48] +; CHECK-NEXT: fcvtzs x13, s16 +; CHECK-NEXT: fcvtzs x9, s17 +; CHECK-NEXT: mov s16, v0.s[1] +; CHECK-NEXT: stp x11, x10, [sp, #32] +; CHECK-NEXT: fcvtzs x10, s2 +; CHECK-NEXT: frintx v2.4s, v5.4s +; CHECK-NEXT: fcvtzs x11, s3 +; CHECK-NEXT: mov s3, v1.s[3] +; CHECK-NEXT: mov s5, v1.s[1] +; CHECK-NEXT: stp x13, x12, [sp, #80] +; CHECK-NEXT: fcvtzs x12, s4 +; CHECK-NEXT: mov s4, v1.s[2] +; CHECK-NEXT: fcvtzs x13, s16 +; CHECK-NEXT: stp x10, x9, [sp, #64] +; CHECK-NEXT: fcvtzs x9, s0 +; CHECK-NEXT: mov s0, v2.s[3] +; CHECK-NEXT: fcvtzs x10, s3 +; CHECK-NEXT: frintx v3.4s, v6.4s +; CHECK-NEXT: stp x12, x11, [sp, #112] +; CHECK-NEXT: fcvtzs x11, s4 +; CHECK-NEXT: mov s4, v2.s[2] +; CHECK-NEXT: fcvtzs x12, s5 +; CHECK-NEXT: mov s5, v2.s[1] +; CHECK-NEXT: stp x9, x13, [sp, #96] +; CHECK-NEXT: fcvtzs x9, s1 +; CHECK-NEXT: fcvtzs x13, s0 +; CHECK-NEXT: mov s0, v3.s[3] +; CHECK-NEXT: frintx v1.4s, v7.4s +; CHECK-NEXT: stp x11, x10, [sp, #144] +; CHECK-NEXT: fcvtzs x10, s4 +; CHECK-NEXT: mov s4, v3.s[2] +; CHECK-NEXT: fcvtzs x11, s5 +; CHECK-NEXT: mov s5, v3.s[1] +; CHECK-NEXT: stp x9, x12, [sp, #128] +; CHECK-NEXT: fcvtzs x9, s2 +; CHECK-NEXT: fcvtzs x12, s0 +; CHECK-NEXT: mov s0, v1.s[3] +; CHECK-NEXT: mov s2, v1.s[2] +; CHECK-NEXT: stp x10, x13, [sp, #176] +; CHECK-NEXT: fcvtzs x10, s4 +; CHECK-NEXT: mov s4, v1.s[1] +; CHECK-NEXT: fcvtzs x13, s5 +; CHECK-NEXT: stp x9, x11, [sp, #160] +; CHECK-NEXT: fcvtzs x9, s3 +; CHECK-NEXT: fcvtzs x11, s0 +; CHECK-NEXT: stp x10, x12, [sp, #208] +; CHECK-NEXT: fcvtzs x10, s2 +; CHECK-NEXT: fcvtzs x12, s4 +; CHECK-NEXT: stp x9, x13, [sp, #192] +; CHECK-NEXT: fcvtzs x9, s1 +; CHECK-NEXT: stp x10, x11, [sp, #240] +; CHECK-NEXT: add x10, sp, #64 +; CHECK-NEXT: stp x9, x12, [sp, #224] +; CHECK-NEXT: mov x9, sp +; CHECK-NEXT: ld1d { z0.d }, p0/z, [x9] +; CHECK-NEXT: add x9, sp, #32 +; CHECK-NEXT: ld1d { z2.d }, p0/z, [x10] +; CHECK-NEXT: ld1d { z1.d }, p0/z, [x9] +; CHECK-NEXT: add x9, sp, #224 +; CHECK-NEXT: add x10, sp, #96 +; CHECK-NEXT: ld1d { z3.d }, p0/z, [x9] +; CHECK-NEXT: add x9, sp, #192 +; CHECK-NEXT: ld1d { z4.d }, p0/z, [x10] +; CHECK-NEXT: add x10, sp, #160 +; CHECK-NEXT: ld1d { z5.d }, p0/z, [x9] +; CHECK-NEXT: add x9, sp, #128 +; CHECK-NEXT: ld1d { z6.d }, p0/z, [x10] +; CHECK-NEXT: mov x10, #28 // =0x1c +; CHECK-NEXT: ld1d { z7.d }, p0/z, [x9] ; CHECK-NEXT: mov x9, #24 // =0x18 -; CHECK-NEXT: st1d { z6.d }, p0, [x8, x9, lsl #3] -; CHECK-NEXT: mov x9, #20 // =0x14 +; CHECK-NEXT: st1d { z3.d }, p0, [x8, x10, lsl #3] ; CHECK-NEXT: st1d { z5.d }, p0, [x8, x9, lsl #3] +; CHECK-NEXT: mov x9, #20 // =0x14 +; CHECK-NEXT: st1d { z6.d }, p0, [x8, x9, lsl #3] ; CHECK-NEXT: mov x9, #16 // =0x10 -; CHECK-NEXT: st1d { z4.d }, p0, [x8, x9, lsl #3] +; CHECK-NEXT: st1d { z7.d }, p0, [x8, x9, lsl #3] ; CHECK-NEXT: mov x9, #12 // =0xc -; CHECK-NEXT: st1d { z3.d }, p0, [x8, x9, lsl #3] +; CHECK-NEXT: st1d { z4.d }, p0, [x8, x9, lsl #3] ; CHECK-NEXT: mov x9, #8 // =0x8 ; CHECK-NEXT: st1d { z2.d }, p0, [x8, x9, lsl #3] ; CHECK-NEXT: mov x9, #4 // =0x4 ; CHECK-NEXT: st1d { z1.d }, p0, [x8, x9, lsl #3] ; CHECK-NEXT: st1d { z0.d }, p0, [x8] +; CHECK-NEXT: mov sp, x29 +; CHECK-NEXT: ldp x29, x30, [sp], #16 // 16-byte Folded Reload ; CHECK-NEXT: ret %a = call <32 x i64> @llvm.llrint.v32i64.v32f32(<32 x float> %x) ret <32 x i64> %a @@ -372,11 +579,17 @@ define <4 x i64> @llrint_v4i64_v4f64(<4 x double> %x) { ; CHECK-NEXT: splice z0.d, p0, z0.d, z1.d ; CHECK-NEXT: ptrue p0.d, vl4 ; CHECK-NEXT: frintx z0.d, p0/m, z0.d -; CHECK-NEXT: fcvtzs z0.d, p0/m, z0.d -; CHECK-NEXT: mov z1.d, z0.d -; CHECK-NEXT: ext z1.b, z1.b, z0.b, #16 -; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 -; CHECK-NEXT: // kill: def $q1 killed $q1 killed $z1 +; CHECK-NEXT: mov z1.d, z0.d[2] +; CHECK-NEXT: mov z2.d, z0.d[3] +; CHECK-NEXT: mov z3.d, z0.d[1] +; CHECK-NEXT: fcvtzs x9, d0 +; CHECK-NEXT: fcvtzs x8, d1 +; CHECK-NEXT: fcvtzs x10, d2 +; CHECK-NEXT: fcvtzs x11, d3 +; CHECK-NEXT: fmov d0, x9 +; CHECK-NEXT: fmov d1, x8 +; CHECK-NEXT: mov v0.d[1], x11 +; CHECK-NEXT: mov v1.d[1], x10 ; CHECK-NEXT: ret %a = call <4 x i64> @llvm.llrint.v4i64.v4f64(<4 x double> %x) ret <4 x i64> %a @@ -391,23 +604,34 @@ define <8 x i64> @llrint_v8i64_v8f64(<8 x double> %x) { ; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 ; CHECK-NEXT: // kill: def $q3 killed $q3 def $z3 ; CHECK-NEXT: // kill: def $q1 killed $q1 def $z1 -; CHECK-NEXT: splice z2.d, p0, z2.d, z3.d ; CHECK-NEXT: splice z0.d, p0, z0.d, z1.d +; CHECK-NEXT: splice z2.d, p0, z2.d, z3.d ; CHECK-NEXT: ptrue p0.d, vl4 ; CHECK-NEXT: frintx z0.d, p0/m, z0.d ; CHECK-NEXT: movprfx z1, z2 ; CHECK-NEXT: frintx z1.d, p0/m, z2.d -; CHECK-NEXT: fcvtzs z0.d, p0/m, z0.d -; CHECK-NEXT: movprfx z2, z1 -; CHECK-NEXT: fcvtzs z2.d, p0/m, z1.d -; CHECK-NEXT: mov z1.d, z0.d -; CHECK-NEXT: mov z3.d, z2.d -; CHECK-NEXT: ext z1.b, z1.b, z0.b, #16 -; CHECK-NEXT: ext z3.b, z3.b, z2.b, #16 -; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 -; CHECK-NEXT: // kill: def $q2 killed $q2 killed $z2 -; CHECK-NEXT: // kill: def $q1 killed $q1 killed $z1 -; CHECK-NEXT: // kill: def $q3 killed $q3 killed $z3 +; CHECK-NEXT: mov z4.d, z1.d[2] +; CHECK-NEXT: mov z5.d, z0.d[2] +; CHECK-NEXT: mov z2.d, z0.d[1] +; CHECK-NEXT: mov z3.d, z1.d[3] +; CHECK-NEXT: mov z6.d, z0.d[3] +; CHECK-NEXT: fcvtzs x8, d0 +; CHECK-NEXT: mov z0.d, z1.d[1] +; CHECK-NEXT: fcvtzs x10, d1 +; CHECK-NEXT: fcvtzs x11, d4 +; CHECK-NEXT: fcvtzs x12, d5 +; CHECK-NEXT: fcvtzs x9, d2 +; CHECK-NEXT: fcvtzs x13, d3 +; CHECK-NEXT: fcvtzs x14, d6 +; CHECK-NEXT: fcvtzs x15, d0 +; CHECK-NEXT: fmov d0, x8 +; CHECK-NEXT: fmov d2, x10 +; CHECK-NEXT: fmov d1, x12 +; CHECK-NEXT: fmov d3, x11 +; CHECK-NEXT: mov v0.d[1], x9 +; CHECK-NEXT: mov v2.d[1], x15 +; CHECK-NEXT: mov v1.d[1], x14 +; CHECK-NEXT: mov v3.d[1], x13 ; CHECK-NEXT: ret %a = call <8 x i64> @llvm.llrint.v8i64.v8f64(<8 x double> %x) ret <8 x i64> %a @@ -417,50 +641,70 @@ declare <8 x i64> @llvm.llrint.v8i64.v8f64(<8 x double>) define <16 x i64> @llrint_v16f64(<16 x double> %x) { ; CHECK-LABEL: llrint_v16f64: ; CHECK: // %bb.0: -; CHECK-NEXT: ptrue p0.d, vl2 +; CHECK-NEXT: ptrue p1.d, vl2 ; CHECK-NEXT: // kill: def $q6 killed $q6 def $z6 ; CHECK-NEXT: // kill: def $q4 killed $q4 def $z4 -; CHECK-NEXT: // kill: def $q2 killed $q2 def $z2 -; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 ; CHECK-NEXT: // kill: def $q7 killed $q7 def $z7 ; CHECK-NEXT: // kill: def $q5 killed $q5 def $z5 +; CHECK-NEXT: // kill: def $q2 killed $q2 def $z2 +; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 ; CHECK-NEXT: // kill: def $q3 killed $q3 def $z3 ; CHECK-NEXT: // kill: def $q1 killed $q1 def $z1 -; CHECK-NEXT: splice z2.d, p0, z2.d, z3.d -; CHECK-NEXT: splice z0.d, p0, z0.d, z1.d -; CHECK-NEXT: splice z6.d, p0, z6.d, z7.d -; CHECK-NEXT: splice z4.d, p0, z4.d, z5.d ; CHECK-NEXT: ptrue p0.d, vl4 +; CHECK-NEXT: splice z6.d, p1, z6.d, z7.d +; CHECK-NEXT: splice z4.d, p1, z4.d, z5.d +; CHECK-NEXT: splice z2.d, p1, z2.d, z3.d +; CHECK-NEXT: splice z0.d, p1, z0.d, z1.d +; CHECK-NEXT: movprfx z3, z6 +; CHECK-NEXT: frintx z3.d, p0/m, z6.d +; CHECK-NEXT: movprfx z1, z4 +; CHECK-NEXT: frintx z1.d, p0/m, z4.d +; CHECK-NEXT: frintx z2.d, p0/m, z2.d ; CHECK-NEXT: frintx z0.d, p0/m, z0.d -; CHECK-NEXT: movprfx z1, z2 -; CHECK-NEXT: frintx z1.d, p0/m, z2.d -; CHECK-NEXT: movprfx z5, z6 -; CHECK-NEXT: frintx z5.d, p0/m, z6.d -; CHECK-NEXT: movprfx z3, z4 -; CHECK-NEXT: frintx z3.d, p0/m, z4.d -; CHECK-NEXT: fcvtzs z0.d, p0/m, z0.d -; CHECK-NEXT: movprfx z2, z1 -; CHECK-NEXT: fcvtzs z2.d, p0/m, z1.d -; CHECK-NEXT: movprfx z6, z5 -; CHECK-NEXT: fcvtzs z6.d, p0/m, z5.d -; CHECK-NEXT: movprfx z4, z3 -; CHECK-NEXT: fcvtzs z4.d, p0/m, z3.d -; CHECK-NEXT: mov z1.d, z0.d -; CHECK-NEXT: mov z3.d, z2.d -; CHECK-NEXT: mov z7.d, z6.d -; CHECK-NEXT: mov z5.d, z4.d -; CHECK-NEXT: ext z1.b, z1.b, z0.b, #16 -; CHECK-NEXT: ext z3.b, z3.b, z2.b, #16 -; CHECK-NEXT: ext z7.b, z7.b, z6.b, #16 -; CHECK-NEXT: ext z5.b, z5.b, z4.b, #16 -; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 -; CHECK-NEXT: // kill: def $q2 killed $q2 killed $z2 -; CHECK-NEXT: // kill: def $q4 killed $q4 killed $z4 -; CHECK-NEXT: // kill: def $q6 killed $q6 killed $z6 -; CHECK-NEXT: // kill: def $q1 killed $q1 killed $z1 -; CHECK-NEXT: // kill: def $q3 killed $q3 killed $z3 -; CHECK-NEXT: // kill: def $q7 killed $q7 killed $z7 -; CHECK-NEXT: // kill: def $q5 killed $q5 killed $z5 +; CHECK-NEXT: mov z4.d, z3.d[2] +; CHECK-NEXT: mov z5.d, z1.d[2] +; CHECK-NEXT: mov z6.d, z2.d[3] +; CHECK-NEXT: fcvtzs x11, d0 +; CHECK-NEXT: fcvtzs x12, d1 +; CHECK-NEXT: fcvtzs x13, d2 +; CHECK-NEXT: fcvtzs x14, d3 +; CHECK-NEXT: mov z7.d, z3.d[3] +; CHECK-NEXT: mov z16.d, z1.d[3] +; CHECK-NEXT: fcvtzs x9, d4 +; CHECK-NEXT: fcvtzs x10, d5 +; CHECK-NEXT: mov z4.d, z2.d[2] +; CHECK-NEXT: mov z5.d, z0.d[2] +; CHECK-NEXT: fcvtzs x8, d6 +; CHECK-NEXT: mov z2.d, z2.d[1] +; CHECK-NEXT: mov z6.d, z0.d[3] +; CHECK-NEXT: mov z1.d, z1.d[1] +; CHECK-NEXT: mov z3.d, z3.d[1] +; CHECK-NEXT: fcvtzs x15, d4 +; CHECK-NEXT: mov z4.d, z0.d[1] +; CHECK-NEXT: fmov d0, x11 +; CHECK-NEXT: fcvtzs x16, d5 +; CHECK-NEXT: fcvtzs x11, d2 +; CHECK-NEXT: fmov d2, x13 +; CHECK-NEXT: fcvtzs x17, d7 +; CHECK-NEXT: fcvtzs x18, d16 +; CHECK-NEXT: fcvtzs x0, d3 +; CHECK-NEXT: fcvtzs x13, d4 +; CHECK-NEXT: fmov d4, x12 +; CHECK-NEXT: fcvtzs x12, d6 +; CHECK-NEXT: fmov d6, x14 +; CHECK-NEXT: fcvtzs x14, d1 +; CHECK-NEXT: fmov d3, x15 +; CHECK-NEXT: fmov d1, x16 +; CHECK-NEXT: fmov d5, x10 +; CHECK-NEXT: fmov d7, x9 +; CHECK-NEXT: mov v2.d[1], x11 +; CHECK-NEXT: mov v0.d[1], x13 +; CHECK-NEXT: mov v3.d[1], x8 +; CHECK-NEXT: mov v6.d[1], x0 +; CHECK-NEXT: mov v4.d[1], x14 +; CHECK-NEXT: mov v1.d[1], x12 +; CHECK-NEXT: mov v5.d[1], x18 +; CHECK-NEXT: mov v7.d[1], x17 ; CHECK-NEXT: ret %a = call <16 x i64> @llvm.llrint.v16i64.v16f64(<16 x double> %x) ret <16 x i64> %a @@ -470,63 +714,151 @@ declare <16 x i64> @llvm.llrint.v16i64.v16f64(<16 x double>) define <32 x i64> @llrint_v32f64(<32 x double> %x) { ; CHECK-LABEL: llrint_v32f64: ; CHECK: // %bb.0: -; CHECK-NEXT: ldp q17, q16, [sp, #96] -; CHECK-NEXT: ptrue p0.d, vl2 -; CHECK-NEXT: ldp q19, q18, [sp, #64] -; CHECK-NEXT: ptrue p1.d, vl4 +; CHECK-NEXT: stp x29, x30, [sp, #-16]! // 16-byte Folded Spill +; CHECK-NEXT: sub x9, sp, #272 +; CHECK-NEXT: mov x29, sp +; CHECK-NEXT: and sp, x9, #0xffffffffffffffe0 +; CHECK-NEXT: .cfi_def_cfa w29, 16 +; CHECK-NEXT: .cfi_offset w30, -8 +; CHECK-NEXT: .cfi_offset w29, -16 +; CHECK-NEXT: ptrue p1.d, vl2 +; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 +; CHECK-NEXT: // kill: def $q1 killed $q1 def $z1 +; CHECK-NEXT: // kill: def $q3 killed $q3 def $z3 +; CHECK-NEXT: // kill: def $q2 killed $q2 def $z2 ; CHECK-NEXT: // kill: def $q7 killed $q7 def $z7 ; CHECK-NEXT: // kill: def $q6 killed $q6 def $z6 -; CHECK-NEXT: // kill: def $q5 killed $q5 def $z5 ; CHECK-NEXT: // kill: def $q4 killed $q4 def $z4 -; CHECK-NEXT: // kill: def $q3 killed $q3 def $z3 -; CHECK-NEXT: // kill: def $q2 killed $q2 def $z2 -; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 -; CHECK-NEXT: // kill: def $q1 killed $q1 def $z1 -; CHECK-NEXT: mov x9, #28 // =0x1c -; CHECK-NEXT: splice z17.d, p0, z17.d, z16.d -; CHECK-NEXT: ldp q20, q16, [sp, #32] -; CHECK-NEXT: splice z19.d, p0, z19.d, z18.d -; CHECK-NEXT: ldp q21, q18, [sp] -; CHECK-NEXT: splice z6.d, p0, z6.d, z7.d -; CHECK-NEXT: splice z4.d, p0, z4.d, z5.d -; CHECK-NEXT: splice z2.d, p0, z2.d, z3.d -; CHECK-NEXT: splice z20.d, p0, z20.d, z16.d -; CHECK-NEXT: splice z0.d, p0, z0.d, z1.d -; CHECK-NEXT: splice z21.d, p0, z21.d, z18.d -; CHECK-NEXT: movprfx z7, z17 -; CHECK-NEXT: frintx z7.d, p1/m, z17.d -; CHECK-NEXT: movprfx z5, z19 -; CHECK-NEXT: frintx z5.d, p1/m, z19.d -; CHECK-NEXT: frintx z6.d, p1/m, z6.d -; CHECK-NEXT: frintx z4.d, p1/m, z4.d -; CHECK-NEXT: frintx z2.d, p1/m, z2.d -; CHECK-NEXT: movprfx z3, z20 -; CHECK-NEXT: frintx z3.d, p1/m, z20.d -; CHECK-NEXT: frintx z0.d, p1/m, z0.d -; CHECK-NEXT: movprfx z1, z21 -; CHECK-NEXT: frintx z1.d, p1/m, z21.d -; CHECK-NEXT: fcvtzs z7.d, p1/m, z7.d -; CHECK-NEXT: fcvtzs z5.d, p1/m, z5.d -; CHECK-NEXT: fcvtzs z6.d, p1/m, z6.d -; CHECK-NEXT: fcvtzs z4.d, p1/m, z4.d -; CHECK-NEXT: fcvtzs z2.d, p1/m, z2.d -; CHECK-NEXT: fcvtzs z3.d, p1/m, z3.d -; CHECK-NEXT: fcvtzs z0.d, p1/m, z0.d -; CHECK-NEXT: fcvtzs z1.d, p1/m, z1.d -; CHECK-NEXT: st1d { z7.d }, p1, [x8, x9, lsl #3] +; CHECK-NEXT: // kill: def $q5 killed $q5 def $z5 +; CHECK-NEXT: ptrue p0.d, vl4 +; CHECK-NEXT: splice z0.d, p1, z0.d, z1.d +; CHECK-NEXT: splice z2.d, p1, z2.d, z3.d +; CHECK-NEXT: splice z4.d, p1, z4.d, z5.d +; CHECK-NEXT: splice z6.d, p1, z6.d, z7.d +; CHECK-NEXT: ldp q5, q19, [x29, #16] +; CHECK-NEXT: movprfx z3, z0 +; CHECK-NEXT: frintx z3.d, p0/m, z0.d +; CHECK-NEXT: movprfx z16, z2 +; CHECK-NEXT: frintx z16.d, p0/m, z2.d +; CHECK-NEXT: frintx z4.d, p0/m, z4.d +; CHECK-NEXT: splice z5.d, p1, z5.d, z19.d +; CHECK-NEXT: frintx z6.d, p0/m, z6.d +; CHECK-NEXT: ldp q2, q17, [x29, #48] +; CHECK-NEXT: ldp q0, q1, [x29, #112] +; CHECK-NEXT: mov z18.d, z3.d[3] +; CHECK-NEXT: mov z7.d, z3.d[2] +; CHECK-NEXT: fcvtzs x9, d3 +; CHECK-NEXT: mov z3.d, z3.d[1] +; CHECK-NEXT: mov z20.d, z16.d[3] +; CHECK-NEXT: fcvtzs x12, d16 +; CHECK-NEXT: splice z2.d, p1, z2.d, z17.d +; CHECK-NEXT: frintx z5.d, p0/m, z5.d +; CHECK-NEXT: splice z0.d, p1, z0.d, z1.d +; CHECK-NEXT: fcvtzs x10, d18 +; CHECK-NEXT: fcvtzs x11, d7 +; CHECK-NEXT: mov z18.d, z16.d[2] +; CHECK-NEXT: mov z7.d, z16.d[1] +; CHECK-NEXT: fcvtzs x13, d3 +; CHECK-NEXT: fcvtzs x14, d20 +; CHECK-NEXT: str x9, [sp, #128] +; CHECK-NEXT: mov z16.d, z4.d[3] +; CHECK-NEXT: fcvtzs x9, d18 +; CHECK-NEXT: mov z18.d, z4.d[2] +; CHECK-NEXT: frintx z2.d, p0/m, z2.d +; CHECK-NEXT: stp x11, x10, [sp, #144] +; CHECK-NEXT: fcvtzs x10, d7 +; CHECK-NEXT: mov z7.d, z4.d[1] +; CHECK-NEXT: str x13, [sp, #136] +; CHECK-NEXT: fcvtzs x11, d16 +; CHECK-NEXT: mov z16.d, z6.d[3] +; CHECK-NEXT: fcvtzs x13, d18 +; CHECK-NEXT: ldp q3, q19, [x29, #80] +; CHECK-NEXT: stp x9, x14, [sp, #176] +; CHECK-NEXT: fcvtzs x9, d4 +; CHECK-NEXT: mov z4.d, z6.d[2] +; CHECK-NEXT: stp x12, x10, [sp, #160] +; CHECK-NEXT: fcvtzs x10, d7 +; CHECK-NEXT: mov z7.d, z6.d[1] +; CHECK-NEXT: fcvtzs x12, d6 +; CHECK-NEXT: splice z3.d, p1, z3.d, z19.d +; CHECK-NEXT: mov z6.d, z5.d[2] +; CHECK-NEXT: stp x13, x11, [sp, #208] +; CHECK-NEXT: fcvtzs x11, d16 +; CHECK-NEXT: fcvtzs x13, d4 +; CHECK-NEXT: mov z4.d, z5.d[3] +; CHECK-NEXT: mov z1.d, z5.d[1] +; CHECK-NEXT: frintx z0.d, p0/m, z0.d +; CHECK-NEXT: stp x9, x10, [sp, #192] +; CHECK-NEXT: fcvtzs x9, d7 +; CHECK-NEXT: frintx z3.d, p0/m, z3.d +; CHECK-NEXT: fcvtzs x10, d4 +; CHECK-NEXT: stp x13, x11, [sp, #240] +; CHECK-NEXT: fcvtzs x11, d6 +; CHECK-NEXT: mov z4.d, z2.d[3] +; CHECK-NEXT: fcvtzs x13, d2 +; CHECK-NEXT: stp x12, x9, [sp, #224] +; CHECK-NEXT: fcvtzs x9, d5 +; CHECK-NEXT: fcvtzs x12, d1 +; CHECK-NEXT: mov z5.d, z2.d[2] +; CHECK-NEXT: mov z1.d, z2.d[1] +; CHECK-NEXT: mov z2.d, z3.d[2] +; CHECK-NEXT: stp x11, x10, [sp, #16] +; CHECK-NEXT: fcvtzs x10, d4 +; CHECK-NEXT: mov z4.d, z3.d[3] +; CHECK-NEXT: fcvtzs x11, d5 +; CHECK-NEXT: stp x9, x12, [sp] +; CHECK-NEXT: fcvtzs x9, d1 +; CHECK-NEXT: mov z1.d, z3.d[1] +; CHECK-NEXT: fcvtzs x12, d4 +; CHECK-NEXT: stp x11, x10, [sp, #48] +; CHECK-NEXT: fcvtzs x10, d2 +; CHECK-NEXT: fcvtzs x11, d3 +; CHECK-NEXT: stp x13, x9, [sp, #32] +; CHECK-NEXT: fcvtzs x9, d1 +; CHECK-NEXT: mov z2.d, z0.d[3] +; CHECK-NEXT: mov z3.d, z0.d[2] +; CHECK-NEXT: mov z1.d, z0.d[1] +; CHECK-NEXT: fcvtzs x13, d2 +; CHECK-NEXT: stp x10, x12, [sp, #80] +; CHECK-NEXT: fcvtzs x12, d0 +; CHECK-NEXT: fcvtzs x10, d3 +; CHECK-NEXT: stp x11, x9, [sp, #64] +; CHECK-NEXT: fcvtzs x9, d1 +; CHECK-NEXT: stp x10, x13, [sp, #112] +; CHECK-NEXT: add x10, sp, #192 +; CHECK-NEXT: stp x12, x9, [sp, #96] +; CHECK-NEXT: add x9, sp, #128 +; CHECK-NEXT: ld1d { z0.d }, p0/z, [x9] +; CHECK-NEXT: add x9, sp, #160 +; CHECK-NEXT: ld1d { z2.d }, p0/z, [x10] +; CHECK-NEXT: ld1d { z1.d }, p0/z, [x9] +; CHECK-NEXT: add x9, sp, #96 +; CHECK-NEXT: add x10, sp, #224 +; CHECK-NEXT: ld1d { z3.d }, p0/z, [x9] +; CHECK-NEXT: add x9, sp, #64 +; CHECK-NEXT: ld1d { z4.d }, p0/z, [x10] +; CHECK-NEXT: add x10, sp, #32 +; CHECK-NEXT: ld1d { z5.d }, p0/z, [x9] +; CHECK-NEXT: mov x9, sp +; CHECK-NEXT: ld1d { z6.d }, p0/z, [x10] +; CHECK-NEXT: mov x10, #28 // =0x1c +; CHECK-NEXT: ld1d { z7.d }, p0/z, [x9] ; CHECK-NEXT: mov x9, #24 // =0x18 -; CHECK-NEXT: st1d { z5.d }, p1, [x8, x9, lsl #3] +; CHECK-NEXT: st1d { z3.d }, p0, [x8, x10, lsl #3] +; CHECK-NEXT: st1d { z5.d }, p0, [x8, x9, lsl #3] ; CHECK-NEXT: mov x9, #20 // =0x14 -; CHECK-NEXT: st1d { z3.d }, p1, [x8, x9, lsl #3] +; CHECK-NEXT: st1d { z6.d }, p0, [x8, x9, lsl #3] ; CHECK-NEXT: mov x9, #16 // =0x10 -; CHECK-NEXT: st1d { z1.d }, p1, [x8, x9, lsl #3] +; CHECK-NEXT: st1d { z7.d }, p0, [x8, x9, lsl #3] ; CHECK-NEXT: mov x9, #12 // =0xc -; CHECK-NEXT: st1d { z6.d }, p1, [x8, x9, lsl #3] +; CHECK-NEXT: st1d { z4.d }, p0, [x8, x9, lsl #3] ; CHECK-NEXT: mov x9, #8 // =0x8 -; CHECK-NEXT: st1d { z4.d }, p1, [x8, x9, lsl #3] +; CHECK-NEXT: st1d { z2.d }, p0, [x8, x9, lsl #3] ; CHECK-NEXT: mov x9, #4 // =0x4 -; CHECK-NEXT: st1d { z2.d }, p1, [x8, x9, lsl #3] -; CHECK-NEXT: st1d { z0.d }, p1, [x8] +; CHECK-NEXT: st1d { z1.d }, p0, [x8, x9, lsl #3] +; CHECK-NEXT: st1d { z0.d }, p0, [x8] +; CHECK-NEXT: mov sp, x29 +; CHECK-NEXT: ldp x29, x30, [sp], #16 // 16-byte Folded Reload ; CHECK-NEXT: ret %a = call <32 x i64> @llvm.llrint.v32i64.v16f64(<32 x double> %x) ret <32 x i64> %a diff --git a/llvm/test/CodeGen/AArch64/sve-fixed-vector-lrint.ll b/llvm/test/CodeGen/AArch64/sve-fixed-vector-lrint.ll index 558fa88eb64bd..af1a84c56c448 100644 --- a/llvm/test/CodeGen/AArch64/sve-fixed-vector-lrint.ll +++ b/llvm/test/CodeGen/AArch64/sve-fixed-vector-lrint.ll @@ -1,534 +1,894 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc < %s -mtriple=aarch64 -mattr=+sve -aarch64-sve-vector-bits-min=256 | FileCheck %s +; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=aarch64 -mattr=+sve \ +; RUN: -aarch64-sve-vector-bits-min=256 | FileCheck --check-prefixes=CHECK-i32 %s +; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=aarch64 -mattr=+sve \ +; RUN: -aarch64-sve-vector-bits-min=256 | FileCheck --check-prefixes=CHECK-i64 %s -define <1 x i64> @lrint_v1f16(<1 x half> %x) { -; CHECK-LABEL: lrint_v1f16: -; CHECK: // %bb.0: -; CHECK-NEXT: frintx h0, h0 -; CHECK-NEXT: fcvtzs x8, h0 -; CHECK-NEXT: fmov d0, x8 -; CHECK-NEXT: ret - %a = call <1 x i64> @llvm.lrint.v1i64.v1f16(<1 x half> %x) - ret <1 x i64> %a +define <1 x iXLen> @lrint_v1f16(<1 x half> %x) { +; CHECK-i32-LABEL: lrint_v1f16: +; CHECK-i32: // %bb.0: +; CHECK-i32-NEXT: frintx h0, h0 +; CHECK-i32-NEXT: fcvtzs w8, h0 +; CHECK-i32-NEXT: fmov s0, w8 +; CHECK-i32-NEXT: ret +; +; CHECK-i64-LABEL: lrint_v1f16: +; CHECK-i64: // %bb.0: +; CHECK-i64-NEXT: frintx h0, h0 +; CHECK-i64-NEXT: fcvtzs x8, h0 +; CHECK-i64-NEXT: fmov d0, x8 +; CHECK-i64-NEXT: ret + %a = call <1 x iXLen> @llvm.lrint.v1iXLen.v1f16(<1 x half> %x) + ret <1 x iXLen> %a } -declare <1 x i64> @llvm.lrint.v1i64.v1f16(<1 x half>) +declare <1 x iXLen> @llvm.lrint.v1iXLen.v1f16(<1 x half>) -define <2 x i64> @lrint_v2f16(<2 x half> %x) { -; CHECK-LABEL: lrint_v2f16: -; CHECK: // %bb.0: -; CHECK-NEXT: frintx v0.4h, v0.4h -; CHECK-NEXT: ptrue p0.d, vl4 -; CHECK-NEXT: uunpklo z0.s, z0.h -; CHECK-NEXT: uunpklo z0.d, z0.s -; CHECK-NEXT: fcvtzs z0.d, p0/m, z0.h -; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 -; CHECK-NEXT: ret - %a = call <2 x i64> @llvm.lrint.v2i64.v2f16(<2 x half> %x) - ret <2 x i64> %a +define <2 x iXLen> @lrint_v2f16(<2 x half> %x) { +; CHECK-i32-LABEL: lrint_v2f16: +; CHECK-i32: // %bb.0: +; CHECK-i32-NEXT: // kill: def $d0 killed $d0 def $q0 +; CHECK-i32-NEXT: mov h1, v0.h[1] +; CHECK-i32-NEXT: frintx h0, h0 +; CHECK-i32-NEXT: frintx h1, h1 +; CHECK-i32-NEXT: fcvtzs w8, h0 +; CHECK-i32-NEXT: fcvtzs w9, h1 +; CHECK-i32-NEXT: fmov s0, w8 +; CHECK-i32-NEXT: mov v0.s[1], w9 +; CHECK-i32-NEXT: // kill: def $d0 killed $d0 killed $q0 +; CHECK-i32-NEXT: ret +; +; CHECK-i64-LABEL: lrint_v2f16: +; CHECK-i64: // %bb.0: +; CHECK-i64-NEXT: // kill: def $d0 killed $d0 def $q0 +; CHECK-i64-NEXT: mov h1, v0.h[1] +; CHECK-i64-NEXT: frintx h0, h0 +; CHECK-i64-NEXT: frintx h1, h1 +; CHECK-i64-NEXT: fcvtzs x8, h0 +; CHECK-i64-NEXT: fcvtzs x9, h1 +; CHECK-i64-NEXT: fmov d0, x8 +; CHECK-i64-NEXT: mov v0.d[1], x9 +; CHECK-i64-NEXT: ret + %a = call <2 x iXLen> @llvm.lrint.v2iXLen.v2f16(<2 x half> %x) + ret <2 x iXLen> %a } -declare <2 x i64> @llvm.lrint.v2i64.v2f16(<2 x half>) +declare <2 x iXLen> @llvm.lrint.v2iXLen.v2f16(<2 x half>) -define <4 x i64> @lrint_v4f16(<4 x half> %x) { -; CHECK-LABEL: lrint_v4f16: -; CHECK: // %bb.0: -; CHECK-NEXT: frintx v0.4h, v0.4h -; CHECK-NEXT: ptrue p0.d, vl4 -; CHECK-NEXT: uunpklo z0.s, z0.h -; CHECK-NEXT: uunpklo z0.d, z0.s -; CHECK-NEXT: fcvtzs z0.d, p0/m, z0.h -; CHECK-NEXT: mov z1.d, z0.d -; CHECK-NEXT: ext z1.b, z1.b, z0.b, #16 -; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 -; CHECK-NEXT: // kill: def $q1 killed $q1 killed $z1 -; CHECK-NEXT: ret - %a = call <4 x i64> @llvm.lrint.v4i64.v4f16(<4 x half> %x) - ret <4 x i64> %a +define <4 x iXLen> @lrint_v4f16(<4 x half> %x) { +; CHECK-i32-LABEL: lrint_v4f16: +; CHECK-i32: // %bb.0: +; CHECK-i32-NEXT: frintx v0.4h, v0.4h +; CHECK-i32-NEXT: fcvtl v0.4s, v0.4h +; CHECK-i32-NEXT: fcvtzs v0.4s, v0.4s +; CHECK-i32-NEXT: ret +; +; CHECK-i64-LABEL: lrint_v4f16: +; CHECK-i64: // %bb.0: +; CHECK-i64-NEXT: frintx v0.4h, v0.4h +; CHECK-i64-NEXT: mov h1, v0.h[2] +; CHECK-i64-NEXT: mov h2, v0.h[3] +; CHECK-i64-NEXT: mov h3, v0.h[1] +; CHECK-i64-NEXT: fcvtzs x9, h0 +; CHECK-i64-NEXT: fcvtzs x8, h1 +; CHECK-i64-NEXT: fcvtzs x10, h2 +; CHECK-i64-NEXT: fcvtzs x11, h3 +; CHECK-i64-NEXT: fmov d0, x9 +; CHECK-i64-NEXT: fmov d1, x8 +; CHECK-i64-NEXT: mov v0.d[1], x11 +; CHECK-i64-NEXT: mov v1.d[1], x10 +; CHECK-i64-NEXT: ret + %a = call <4 x iXLen> @llvm.lrint.v4iXLen.v4f16(<4 x half> %x) + ret <4 x iXLen> %a } -declare <4 x i64> @llvm.lrint.v4i64.v4f16(<4 x half>) +declare <4 x iXLen> @llvm.lrint.v4iXLen.v4f16(<4 x half>) -define <8 x i64> @lrint_v8f16(<8 x half> %x) { -; CHECK-LABEL: lrint_v8f16: -; CHECK: // %bb.0: -; CHECK-NEXT: ext v1.16b, v0.16b, v0.16b, #8 -; CHECK-NEXT: frintx v0.4h, v0.4h -; CHECK-NEXT: ptrue p0.d, vl4 -; CHECK-NEXT: frintx v1.4h, v1.4h -; CHECK-NEXT: uunpklo z0.s, z0.h -; CHECK-NEXT: uunpklo z1.s, z1.h -; CHECK-NEXT: uunpklo z0.d, z0.s -; CHECK-NEXT: uunpklo z1.d, z1.s -; CHECK-NEXT: fcvtzs z0.d, p0/m, z0.h -; CHECK-NEXT: movprfx z2, z1 -; CHECK-NEXT: fcvtzs z2.d, p0/m, z1.h -; CHECK-NEXT: mov z1.d, z0.d -; CHECK-NEXT: ext z1.b, z1.b, z0.b, #16 -; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 -; CHECK-NEXT: mov z3.d, z2.d -; CHECK-NEXT: // kill: def $q1 killed $q1 killed $z1 -; CHECK-NEXT: ext z3.b, z3.b, z2.b, #16 -; CHECK-NEXT: // kill: def $q2 killed $q2 killed $z2 -; CHECK-NEXT: // kill: def $q3 killed $q3 killed $z3 -; CHECK-NEXT: ret - %a = call <8 x i64> @llvm.lrint.v8i64.v8f16(<8 x half> %x) - ret <8 x i64> %a +define <8 x iXLen> @lrint_v8f16(<8 x half> %x) { +; CHECK-i32-LABEL: lrint_v8f16: +; CHECK-i32: // %bb.0: +; CHECK-i32-NEXT: frintx v2.8h, v0.8h +; CHECK-i32-NEXT: mov h0, v2.h[4] +; CHECK-i32-NEXT: mov h1, v2.h[5] +; CHECK-i32-NEXT: mov h3, v2.h[1] +; CHECK-i32-NEXT: fcvtzs w9, h2 +; CHECK-i32-NEXT: mov h4, v2.h[6] +; CHECK-i32-NEXT: fcvtzs w8, h0 +; CHECK-i32-NEXT: mov h0, v2.h[2] +; CHECK-i32-NEXT: fcvtzs w10, h1 +; CHECK-i32-NEXT: fcvtzs w11, h3 +; CHECK-i32-NEXT: mov h3, v2.h[7] +; CHECK-i32-NEXT: fcvtzs w12, h4 +; CHECK-i32-NEXT: mov h2, v2.h[3] +; CHECK-i32-NEXT: fcvtzs w13, h0 +; CHECK-i32-NEXT: fmov s0, w9 +; CHECK-i32-NEXT: fmov s1, w8 +; CHECK-i32-NEXT: fcvtzs w8, h3 +; CHECK-i32-NEXT: fcvtzs w9, h2 +; CHECK-i32-NEXT: mov v0.s[1], w11 +; CHECK-i32-NEXT: mov v1.s[1], w10 +; CHECK-i32-NEXT: mov v0.s[2], w13 +; CHECK-i32-NEXT: mov v1.s[2], w12 +; CHECK-i32-NEXT: mov v0.s[3], w9 +; CHECK-i32-NEXT: mov v1.s[3], w8 +; CHECK-i32-NEXT: ret +; +; CHECK-i64-LABEL: lrint_v8f16: +; CHECK-i64: // %bb.0: +; CHECK-i64-NEXT: ext v1.16b, v0.16b, v0.16b, #8 +; CHECK-i64-NEXT: frintx v0.4h, v0.4h +; CHECK-i64-NEXT: frintx v1.4h, v1.4h +; CHECK-i64-NEXT: mov h4, v0.h[2] +; CHECK-i64-NEXT: mov h2, v0.h[1] +; CHECK-i64-NEXT: mov h7, v0.h[3] +; CHECK-i64-NEXT: fcvtzs x8, h0 +; CHECK-i64-NEXT: mov h3, v1.h[2] +; CHECK-i64-NEXT: mov h5, v1.h[3] +; CHECK-i64-NEXT: mov h6, v1.h[1] +; CHECK-i64-NEXT: fcvtzs x11, h1 +; CHECK-i64-NEXT: fcvtzs x12, h4 +; CHECK-i64-NEXT: fcvtzs x9, h2 +; CHECK-i64-NEXT: fcvtzs x15, h7 +; CHECK-i64-NEXT: fmov d0, x8 +; CHECK-i64-NEXT: fcvtzs x10, h3 +; CHECK-i64-NEXT: fcvtzs x13, h5 +; CHECK-i64-NEXT: fcvtzs x14, h6 +; CHECK-i64-NEXT: fmov d1, x12 +; CHECK-i64-NEXT: fmov d2, x11 +; CHECK-i64-NEXT: mov v0.d[1], x9 +; CHECK-i64-NEXT: fmov d3, x10 +; CHECK-i64-NEXT: mov v1.d[1], x15 +; CHECK-i64-NEXT: mov v2.d[1], x14 +; CHECK-i64-NEXT: mov v3.d[1], x13 +; CHECK-i64-NEXT: ret + %a = call <8 x iXLen> @llvm.lrint.v8iXLen.v8f16(<8 x half> %x) + ret <8 x iXLen> %a } -declare <8 x i64> @llvm.lrint.v8i64.v8f16(<8 x half>) +declare <8 x iXLen> @llvm.lrint.v8iXLen.v8f16(<8 x half>) -define <16 x i64> @lrint_v16i64_v16f16(<16 x half> %x) { -; CHECK-LABEL: lrint_v16i64_v16f16: -; CHECK: // %bb.0: -; CHECK-NEXT: ext v2.16b, v0.16b, v0.16b, #8 -; CHECK-NEXT: ext v3.16b, v1.16b, v1.16b, #8 -; CHECK-NEXT: frintx v0.4h, v0.4h -; CHECK-NEXT: frintx v1.4h, v1.4h -; CHECK-NEXT: ptrue p0.d, vl4 -; CHECK-NEXT: frintx v2.4h, v2.4h -; CHECK-NEXT: frintx v3.4h, v3.4h -; CHECK-NEXT: uunpklo z0.s, z0.h -; CHECK-NEXT: uunpklo z1.s, z1.h -; CHECK-NEXT: uunpklo z2.s, z2.h -; CHECK-NEXT: uunpklo z3.s, z3.h -; CHECK-NEXT: uunpklo z0.d, z0.s -; CHECK-NEXT: uunpklo z1.d, z1.s -; CHECK-NEXT: uunpklo z2.d, z2.s -; CHECK-NEXT: uunpklo z3.d, z3.s -; CHECK-NEXT: fcvtzs z0.d, p0/m, z0.h -; CHECK-NEXT: movprfx z4, z1 -; CHECK-NEXT: fcvtzs z4.d, p0/m, z1.h -; CHECK-NEXT: fcvtzs z2.d, p0/m, z2.h -; CHECK-NEXT: movprfx z6, z3 -; CHECK-NEXT: fcvtzs z6.d, p0/m, z3.h -; CHECK-NEXT: mov z1.d, z0.d -; CHECK-NEXT: mov z5.d, z4.d -; CHECK-NEXT: ext z1.b, z1.b, z0.b, #16 -; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 -; CHECK-NEXT: mov z3.d, z2.d -; CHECK-NEXT: mov z7.d, z6.d -; CHECK-NEXT: ext z5.b, z5.b, z4.b, #16 -; CHECK-NEXT: // kill: def $q4 killed $q4 killed $z4 -; CHECK-NEXT: // kill: def $q1 killed $q1 killed $z1 -; CHECK-NEXT: // kill: def $q5 killed $q5 killed $z5 -; CHECK-NEXT: ext z3.b, z3.b, z2.b, #16 -; CHECK-NEXT: ext z7.b, z7.b, z6.b, #16 -; CHECK-NEXT: // kill: def $q2 killed $q2 killed $z2 -; CHECK-NEXT: // kill: def $q6 killed $q6 killed $z6 -; CHECK-NEXT: // kill: def $q3 killed $q3 killed $z3 -; CHECK-NEXT: // kill: def $q7 killed $q7 killed $z7 -; CHECK-NEXT: ret - %a = call <16 x i64> @llvm.lrint.v16i64.v16f16(<16 x half> %x) - ret <16 x i64> %a +define <16 x iXLen> @lrint_v16iXLen_v16f16(<16 x half> %x) { + %a = call <16 x iXLen> @llvm.lrint.v16iXLen.v16f16(<16 x half> %x) + ret <16 x iXLen> %a } -declare <16 x i64> @llvm.lrint.v16i64.v16f16(<16 x half>) +declare <16 x iXLen> @llvm.lrint.v16iXLen.v16f16(<16 x half>) -define <32 x i64> @lrint_v32i64_v32f16(<32 x half> %x) { -; CHECK-LABEL: lrint_v32i64_v32f16: -; CHECK: // %bb.0: -; CHECK-NEXT: ext v4.16b, v3.16b, v3.16b, #8 -; CHECK-NEXT: ext v5.16b, v2.16b, v2.16b, #8 -; CHECK-NEXT: mov x9, #24 // =0x18 -; CHECK-NEXT: frintx v3.4h, v3.4h -; CHECK-NEXT: ext v6.16b, v1.16b, v1.16b, #8 -; CHECK-NEXT: frintx v2.4h, v2.4h -; CHECK-NEXT: ext v7.16b, v0.16b, v0.16b, #8 -; CHECK-NEXT: frintx v1.4h, v1.4h -; CHECK-NEXT: frintx v0.4h, v0.4h -; CHECK-NEXT: ptrue p0.d, vl4 -; CHECK-NEXT: frintx v4.4h, v4.4h -; CHECK-NEXT: frintx v5.4h, v5.4h -; CHECK-NEXT: uunpklo z3.s, z3.h -; CHECK-NEXT: frintx v6.4h, v6.4h -; CHECK-NEXT: uunpklo z2.s, z2.h -; CHECK-NEXT: frintx v7.4h, v7.4h -; CHECK-NEXT: uunpklo z1.s, z1.h -; CHECK-NEXT: uunpklo z0.s, z0.h -; CHECK-NEXT: uunpklo z4.s, z4.h -; CHECK-NEXT: uunpklo z5.s, z5.h -; CHECK-NEXT: uunpklo z3.d, z3.s -; CHECK-NEXT: uunpklo z6.s, z6.h -; CHECK-NEXT: uunpklo z2.d, z2.s -; CHECK-NEXT: uunpklo z7.s, z7.h -; CHECK-NEXT: uunpklo z1.d, z1.s -; CHECK-NEXT: uunpklo z0.d, z0.s -; CHECK-NEXT: uunpklo z4.d, z4.s -; CHECK-NEXT: uunpklo z5.d, z5.s -; CHECK-NEXT: fcvtzs z3.d, p0/m, z3.h -; CHECK-NEXT: uunpklo z6.d, z6.s -; CHECK-NEXT: fcvtzs z2.d, p0/m, z2.h -; CHECK-NEXT: uunpklo z7.d, z7.s -; CHECK-NEXT: fcvtzs z1.d, p0/m, z1.h -; CHECK-NEXT: fcvtzs z0.d, p0/m, z0.h -; CHECK-NEXT: fcvtzs z4.d, p0/m, z4.h -; CHECK-NEXT: st1d { z3.d }, p0, [x8, x9, lsl #3] -; CHECK-NEXT: mov x9, #16 // =0x10 -; CHECK-NEXT: movprfx z3, z5 -; CHECK-NEXT: fcvtzs z3.d, p0/m, z5.h -; CHECK-NEXT: st1d { z2.d }, p0, [x8, x9, lsl #3] -; CHECK-NEXT: mov x9, #8 // =0x8 -; CHECK-NEXT: movprfx z2, z6 -; CHECK-NEXT: fcvtzs z2.d, p0/m, z6.h -; CHECK-NEXT: st1d { z1.d }, p0, [x8, x9, lsl #3] -; CHECK-NEXT: mov x9, #28 // =0x1c -; CHECK-NEXT: movprfx z1, z7 -; CHECK-NEXT: fcvtzs z1.d, p0/m, z7.h -; CHECK-NEXT: st1d { z4.d }, p0, [x8, x9, lsl #3] -; CHECK-NEXT: mov x9, #20 // =0x14 -; CHECK-NEXT: st1d { z3.d }, p0, [x8, x9, lsl #3] -; CHECK-NEXT: mov x9, #12 // =0xc -; CHECK-NEXT: st1d { z2.d }, p0, [x8, x9, lsl #3] -; CHECK-NEXT: mov x9, #4 // =0x4 -; CHECK-NEXT: st1d { z1.d }, p0, [x8, x9, lsl #3] -; CHECK-NEXT: st1d { z0.d }, p0, [x8] -; CHECK-NEXT: ret - %a = call <32 x i64> @llvm.lrint.v32i64.v32f16(<32 x half> %x) - ret <32 x i64> %a +define <32 x iXLen> @lrint_v32iXLen_v32f16(<32 x half> %x) { + %a = call <32 x iXLen> @llvm.lrint.v32iXLen.v32f16(<32 x half> %x) + ret <32 x iXLen> %a } -declare <32 x i64> @llvm.lrint.v32i64.v32f16(<32 x half>) +declare <32 x iXLen> @llvm.lrint.v32iXLen.v32f16(<32 x half>) -define <1 x i64> @lrint_v1f32(<1 x float> %x) { -; CHECK-LABEL: lrint_v1f32: -; CHECK: // %bb.0: -; CHECK-NEXT: frintx v0.2s, v0.2s -; CHECK-NEXT: fcvtl v0.2d, v0.2s -; CHECK-NEXT: fcvtzs v0.2d, v0.2d -; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0 -; CHECK-NEXT: ret - %a = call <1 x i64> @llvm.lrint.v1i64.v1f32(<1 x float> %x) - ret <1 x i64> %a +define <1 x iXLen> @lrint_v1f32(<1 x float> %x) { +; CHECK-i32-LABEL: lrint_v1f32: +; CHECK-i32: // %bb.0: +; CHECK-i32-NEXT: frintx v0.2s, v0.2s +; CHECK-i32-NEXT: fcvtzs v0.2s, v0.2s +; CHECK-i32-NEXT: ret +; +; CHECK-i64-LABEL: lrint_v1f32: +; CHECK-i64: // %bb.0: +; CHECK-i64-NEXT: // kill: def $d0 killed $d0 def $q0 +; CHECK-i64-NEXT: frintx s0, s0 +; CHECK-i64-NEXT: fcvtzs x8, s0 +; CHECK-i64-NEXT: fmov d0, x8 +; CHECK-i64-NEXT: ret + %a = call <1 x iXLen> @llvm.lrint.v1iXLen.v1f32(<1 x float> %x) + ret <1 x iXLen> %a } -declare <1 x i64> @llvm.lrint.v1i64.v1f32(<1 x float>) +declare <1 x iXLen> @llvm.lrint.v1iXLen.v1f32(<1 x float>) -define <2 x i64> @lrint_v2f32(<2 x float> %x) { -; CHECK-LABEL: lrint_v2f32: -; CHECK: // %bb.0: -; CHECK-NEXT: frintx v0.2s, v0.2s -; CHECK-NEXT: fcvtl v0.2d, v0.2s -; CHECK-NEXT: fcvtzs v0.2d, v0.2d -; CHECK-NEXT: ret - %a = call <2 x i64> @llvm.lrint.v2i64.v2f32(<2 x float> %x) - ret <2 x i64> %a +define <2 x iXLen> @lrint_v2f32(<2 x float> %x) { +; CHECK-i32-LABEL: lrint_v2f32: +; CHECK-i32: // %bb.0: +; CHECK-i32-NEXT: frintx v0.2s, v0.2s +; CHECK-i32-NEXT: fcvtzs v0.2s, v0.2s +; CHECK-i32-NEXT: ret +; +; CHECK-i64-LABEL: lrint_v2f32: +; CHECK-i64: // %bb.0: +; CHECK-i64-NEXT: frintx v0.2s, v0.2s +; CHECK-i64-NEXT: mov s1, v0.s[1] +; CHECK-i64-NEXT: fcvtzs x8, s0 +; CHECK-i64-NEXT: fcvtzs x9, s1 +; CHECK-i64-NEXT: fmov d0, x8 +; CHECK-i64-NEXT: mov v0.d[1], x9 +; CHECK-i64-NEXT: ret + %a = call <2 x iXLen> @llvm.lrint.v2iXLen.v2f32(<2 x float> %x) + ret <2 x iXLen> %a } -declare <2 x i64> @llvm.lrint.v2i64.v2f32(<2 x float>) +declare <2 x iXLen> @llvm.lrint.v2iXLen.v2f32(<2 x float>) -define <4 x i64> @lrint_v4f32(<4 x float> %x) { -; CHECK-LABEL: lrint_v4f32: -; CHECK: // %bb.0: -; CHECK-NEXT: frintx v0.4s, v0.4s -; CHECK-NEXT: ptrue p0.d, vl4 -; CHECK-NEXT: uunpklo z0.d, z0.s -; CHECK-NEXT: fcvtzs z0.d, p0/m, z0.s -; CHECK-NEXT: mov z1.d, z0.d -; CHECK-NEXT: ext z1.b, z1.b, z0.b, #16 -; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 -; CHECK-NEXT: // kill: def $q1 killed $q1 killed $z1 -; CHECK-NEXT: ret - %a = call <4 x i64> @llvm.lrint.v4i64.v4f32(<4 x float> %x) - ret <4 x i64> %a +define <4 x iXLen> @lrint_v4f32(<4 x float> %x) { +; CHECK-i32-LABEL: lrint_v4f32: +; CHECK-i32: // %bb.0: +; CHECK-i32-NEXT: frintx v0.4s, v0.4s +; CHECK-i32-NEXT: fcvtzs v0.4s, v0.4s +; CHECK-i32-NEXT: ret +; +; CHECK-i64-LABEL: lrint_v4f32: +; CHECK-i64: // %bb.0: +; CHECK-i64-NEXT: frintx v0.4s, v0.4s +; CHECK-i64-NEXT: mov s1, v0.s[2] +; CHECK-i64-NEXT: mov s2, v0.s[3] +; CHECK-i64-NEXT: mov s3, v0.s[1] +; CHECK-i64-NEXT: fcvtzs x9, s0 +; CHECK-i64-NEXT: fcvtzs x8, s1 +; CHECK-i64-NEXT: fcvtzs x10, s2 +; CHECK-i64-NEXT: fcvtzs x11, s3 +; CHECK-i64-NEXT: fmov d0, x9 +; CHECK-i64-NEXT: fmov d1, x8 +; CHECK-i64-NEXT: mov v0.d[1], x11 +; CHECK-i64-NEXT: mov v1.d[1], x10 +; CHECK-i64-NEXT: ret + %a = call <4 x iXLen> @llvm.lrint.v4iXLen.v4f32(<4 x float> %x) + ret <4 x iXLen> %a } -declare <4 x i64> @llvm.lrint.v4i64.v4f32(<4 x float>) +declare <4 x iXLen> @llvm.lrint.v4iXLen.v4f32(<4 x float>) -define <8 x i64> @lrint_v8f32(<8 x float> %x) { -; CHECK-LABEL: lrint_v8f32: -; CHECK: // %bb.0: -; CHECK-NEXT: frintx v0.4s, v0.4s -; CHECK-NEXT: frintx v1.4s, v1.4s -; CHECK-NEXT: ptrue p0.d, vl4 -; CHECK-NEXT: uunpklo z0.d, z0.s -; CHECK-NEXT: uunpklo z1.d, z1.s -; CHECK-NEXT: fcvtzs z0.d, p0/m, z0.s -; CHECK-NEXT: movprfx z2, z1 -; CHECK-NEXT: fcvtzs z2.d, p0/m, z1.s -; CHECK-NEXT: mov z1.d, z0.d -; CHECK-NEXT: mov z3.d, z2.d -; CHECK-NEXT: ext z1.b, z1.b, z0.b, #16 -; CHECK-NEXT: ext z3.b, z3.b, z2.b, #16 -; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 -; CHECK-NEXT: // kill: def $q2 killed $q2 killed $z2 -; CHECK-NEXT: // kill: def $q1 killed $q1 killed $z1 -; CHECK-NEXT: // kill: def $q3 killed $q3 killed $z3 -; CHECK-NEXT: ret - %a = call <8 x i64> @llvm.lrint.v8i64.v8f32(<8 x float> %x) - ret <8 x i64> %a +define <8 x iXLen> @lrint_v8f32(<8 x float> %x) { +; CHECK-i32-LABEL: lrint_v8f32: +; CHECK-i32: // %bb.0: +; CHECK-i32-NEXT: ptrue p0.d, vl2 +; CHECK-i32-NEXT: // kill: def $q0 killed $q0 def $z0 +; CHECK-i32-NEXT: // kill: def $q1 killed $q1 def $z1 +; CHECK-i32-NEXT: splice z0.d, p0, z0.d, z1.d +; CHECK-i32-NEXT: ptrue p0.s, vl8 +; CHECK-i32-NEXT: movprfx z2, z0 +; CHECK-i32-NEXT: frintx z2.s, p0/m, z0.s +; CHECK-i32-NEXT: mov z0.s, z2.s[4] +; CHECK-i32-NEXT: mov z1.s, z2.s[5] +; CHECK-i32-NEXT: mov z3.s, z2.s[1] +; CHECK-i32-NEXT: fcvtzs w9, s2 +; CHECK-i32-NEXT: fcvtzs w8, s0 +; CHECK-i32-NEXT: mov z0.s, z2.s[6] +; CHECK-i32-NEXT: fcvtzs w10, s1 +; CHECK-i32-NEXT: mov z1.s, z2.s[2] +; CHECK-i32-NEXT: fcvtzs w11, s3 +; CHECK-i32-NEXT: mov z3.s, z2.s[7] +; CHECK-i32-NEXT: mov z2.s, z2.s[3] +; CHECK-i32-NEXT: fcvtzs w12, s0 +; CHECK-i32-NEXT: fmov s0, w9 +; CHECK-i32-NEXT: fcvtzs w13, s1 +; CHECK-i32-NEXT: fmov s1, w8 +; CHECK-i32-NEXT: fcvtzs w8, s3 +; CHECK-i32-NEXT: fcvtzs w9, s2 +; CHECK-i32-NEXT: mov v0.s[1], w11 +; CHECK-i32-NEXT: mov v1.s[1], w10 +; CHECK-i32-NEXT: mov v0.s[2], w13 +; CHECK-i32-NEXT: mov v1.s[2], w12 +; CHECK-i32-NEXT: mov v0.s[3], w9 +; CHECK-i32-NEXT: mov v1.s[3], w8 +; CHECK-i32-NEXT: ret +; +; CHECK-i64-LABEL: lrint_v8f32: +; CHECK-i64: // %bb.0: +; CHECK-i64-NEXT: frintx v0.4s, v0.4s +; CHECK-i64-NEXT: frintx v1.4s, v1.4s +; CHECK-i64-NEXT: mov s3, v1.s[2] +; CHECK-i64-NEXT: mov s4, v0.s[2] +; CHECK-i64-NEXT: mov s2, v0.s[1] +; CHECK-i64-NEXT: mov s5, v1.s[3] +; CHECK-i64-NEXT: mov s6, v1.s[1] +; CHECK-i64-NEXT: mov s7, v0.s[3] +; CHECK-i64-NEXT: fcvtzs x8, s0 +; CHECK-i64-NEXT: fcvtzs x10, s1 +; CHECK-i64-NEXT: fcvtzs x11, s3 +; CHECK-i64-NEXT: fcvtzs x12, s4 +; CHECK-i64-NEXT: fcvtzs x9, s2 +; CHECK-i64-NEXT: fcvtzs x13, s5 +; CHECK-i64-NEXT: fcvtzs x14, s6 +; CHECK-i64-NEXT: fcvtzs x15, s7 +; CHECK-i64-NEXT: fmov d0, x8 +; CHECK-i64-NEXT: fmov d2, x10 +; CHECK-i64-NEXT: fmov d1, x12 +; CHECK-i64-NEXT: fmov d3, x11 +; CHECK-i64-NEXT: mov v0.d[1], x9 +; CHECK-i64-NEXT: mov v2.d[1], x14 +; CHECK-i64-NEXT: mov v1.d[1], x15 +; CHECK-i64-NEXT: mov v3.d[1], x13 +; CHECK-i64-NEXT: ret + %a = call <8 x iXLen> @llvm.lrint.v8iXLen.v8f32(<8 x float> %x) + ret <8 x iXLen> %a } -declare <8 x i64> @llvm.lrint.v8i64.v8f32(<8 x float>) +declare <8 x iXLen> @llvm.lrint.v8iXLen.v8f32(<8 x float>) -define <16 x i64> @lrint_v16i64_v16f32(<16 x float> %x) { -; CHECK-LABEL: lrint_v16i64_v16f32: -; CHECK: // %bb.0: -; CHECK-NEXT: frintx v1.4s, v1.4s -; CHECK-NEXT: frintx v0.4s, v0.4s -; CHECK-NEXT: frintx v2.4s, v2.4s -; CHECK-NEXT: frintx v3.4s, v3.4s -; CHECK-NEXT: ptrue p0.d, vl4 -; CHECK-NEXT: uunpklo z0.d, z0.s -; CHECK-NEXT: uunpklo z1.d, z1.s -; CHECK-NEXT: uunpklo z4.d, z2.s -; CHECK-NEXT: uunpklo z3.d, z3.s -; CHECK-NEXT: fcvtzs z0.d, p0/m, z0.s -; CHECK-NEXT: movprfx z2, z1 -; CHECK-NEXT: fcvtzs z2.d, p0/m, z1.s -; CHECK-NEXT: fcvtzs z4.d, p0/m, z4.s -; CHECK-NEXT: movprfx z6, z3 -; CHECK-NEXT: fcvtzs z6.d, p0/m, z3.s -; CHECK-NEXT: mov z1.d, z0.d -; CHECK-NEXT: mov z3.d, z2.d -; CHECK-NEXT: mov z5.d, z4.d -; CHECK-NEXT: mov z7.d, z6.d -; CHECK-NEXT: ext z1.b, z1.b, z0.b, #16 -; CHECK-NEXT: ext z3.b, z3.b, z2.b, #16 -; CHECK-NEXT: ext z5.b, z5.b, z4.b, #16 -; CHECK-NEXT: ext z7.b, z7.b, z6.b, #16 -; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 -; CHECK-NEXT: // kill: def $q2 killed $q2 killed $z2 -; CHECK-NEXT: // kill: def $q4 killed $q4 killed $z4 -; CHECK-NEXT: // kill: def $q6 killed $q6 killed $z6 -; CHECK-NEXT: // kill: def $q1 killed $q1 killed $z1 -; CHECK-NEXT: // kill: def $q3 killed $q3 killed $z3 -; CHECK-NEXT: // kill: def $q5 killed $q5 killed $z5 -; CHECK-NEXT: // kill: def $q7 killed $q7 killed $z7 -; CHECK-NEXT: ret - %a = call <16 x i64> @llvm.lrint.v16i64.v16f32(<16 x float> %x) - ret <16 x i64> %a +define <16 x iXLen> @lrint_v16iXLen_v16f32(<16 x float> %x) { + %a = call <16 x iXLen> @llvm.lrint.v16iXLen.v16f32(<16 x float> %x) + ret <16 x iXLen> %a } -declare <16 x i64> @llvm.lrint.v16i64.v16f32(<16 x float>) +declare <16 x iXLen> @llvm.lrint.v16iXLen.v16f32(<16 x float>) -define <32 x i64> @lrint_v32i64_v32f32(<32 x float> %x) { -; CHECK-LABEL: lrint_v32i64_v32f32: -; CHECK: // %bb.0: -; CHECK-NEXT: frintx v7.4s, v7.4s -; CHECK-NEXT: frintx v6.4s, v6.4s -; CHECK-NEXT: mov x9, #28 // =0x1c -; CHECK-NEXT: frintx v5.4s, v5.4s -; CHECK-NEXT: frintx v4.4s, v4.4s -; CHECK-NEXT: frintx v3.4s, v3.4s -; CHECK-NEXT: frintx v2.4s, v2.4s -; CHECK-NEXT: ptrue p0.d, vl4 -; CHECK-NEXT: frintx v1.4s, v1.4s -; CHECK-NEXT: frintx v0.4s, v0.4s -; CHECK-NEXT: uunpklo z7.d, z7.s -; CHECK-NEXT: uunpklo z6.d, z6.s -; CHECK-NEXT: uunpklo z5.d, z5.s -; CHECK-NEXT: uunpklo z4.d, z4.s -; CHECK-NEXT: uunpklo z3.d, z3.s -; CHECK-NEXT: uunpklo z2.d, z2.s -; CHECK-NEXT: uunpklo z1.d, z1.s -; CHECK-NEXT: uunpklo z0.d, z0.s -; CHECK-NEXT: fcvtzs z7.d, p0/m, z7.s -; CHECK-NEXT: fcvtzs z6.d, p0/m, z6.s -; CHECK-NEXT: fcvtzs z5.d, p0/m, z5.s -; CHECK-NEXT: fcvtzs z4.d, p0/m, z4.s -; CHECK-NEXT: fcvtzs z3.d, p0/m, z3.s -; CHECK-NEXT: fcvtzs z2.d, p0/m, z2.s -; CHECK-NEXT: fcvtzs z1.d, p0/m, z1.s -; CHECK-NEXT: fcvtzs z0.d, p0/m, z0.s -; CHECK-NEXT: st1d { z7.d }, p0, [x8, x9, lsl #3] -; CHECK-NEXT: mov x9, #24 // =0x18 -; CHECK-NEXT: st1d { z6.d }, p0, [x8, x9, lsl #3] -; CHECK-NEXT: mov x9, #20 // =0x14 -; CHECK-NEXT: st1d { z5.d }, p0, [x8, x9, lsl #3] -; CHECK-NEXT: mov x9, #16 // =0x10 -; CHECK-NEXT: st1d { z4.d }, p0, [x8, x9, lsl #3] -; CHECK-NEXT: mov x9, #12 // =0xc -; CHECK-NEXT: st1d { z3.d }, p0, [x8, x9, lsl #3] -; CHECK-NEXT: mov x9, #8 // =0x8 -; CHECK-NEXT: st1d { z2.d }, p0, [x8, x9, lsl #3] -; CHECK-NEXT: mov x9, #4 // =0x4 -; CHECK-NEXT: st1d { z1.d }, p0, [x8, x9, lsl #3] -; CHECK-NEXT: st1d { z0.d }, p0, [x8] -; CHECK-NEXT: ret - %a = call <32 x i64> @llvm.lrint.v32i64.v32f32(<32 x float> %x) - ret <32 x i64> %a +define <32 x iXLen> @lrint_v32iXLen_v32f32(<32 x float> %x) { + %a = call <32 x iXLen> @llvm.lrint.v32iXLen.v32f32(<32 x float> %x) + ret <32 x iXLen> %a } -declare <32 x i64> @llvm.lrint.v32i64.v32f32(<32 x float>) +declare <32 x iXLen> @llvm.lrint.v32iXLen.v32f32(<32 x float>) -define <1 x i64> @lrint_v1f64(<1 x double> %x) { -; CHECK-LABEL: lrint_v1f64: -; CHECK: // %bb.0: -; CHECK-NEXT: frintx d0, d0 -; CHECK-NEXT: fcvtzs x8, d0 -; CHECK-NEXT: fmov d0, x8 -; CHECK-NEXT: ret - %a = call <1 x i64> @llvm.lrint.v1i64.v1f64(<1 x double> %x) - ret <1 x i64> %a +define <1 x iXLen> @lrint_v1f64(<1 x double> %x) { +; CHECK-i32-LABEL: lrint_v1f64: +; CHECK-i32: // %bb.0: +; CHECK-i32-NEXT: frintx d0, d0 +; CHECK-i32-NEXT: fcvtzs w8, d0 +; CHECK-i32-NEXT: fmov s0, w8 +; CHECK-i32-NEXT: ret +; +; CHECK-i64-LABEL: lrint_v1f64: +; CHECK-i64: // %bb.0: +; CHECK-i64-NEXT: frintx d0, d0 +; CHECK-i64-NEXT: fcvtzs x8, d0 +; CHECK-i64-NEXT: fmov d0, x8 +; CHECK-i64-NEXT: ret + %a = call <1 x iXLen> @llvm.lrint.v1iXLen.v1f64(<1 x double> %x) + ret <1 x iXLen> %a } -declare <1 x i64> @llvm.lrint.v1i64.v1f64(<1 x double>) +declare <1 x iXLen> @llvm.lrint.v1iXLen.v1f64(<1 x double>) -define <2 x i64> @lrint_v2f64(<2 x double> %x) { -; CHECK-LABEL: lrint_v2f64: -; CHECK: // %bb.0: -; CHECK-NEXT: frintx v0.2d, v0.2d -; CHECK-NEXT: fcvtzs v0.2d, v0.2d -; CHECK-NEXT: ret - %a = call <2 x i64> @llvm.lrint.v2i64.v2f64(<2 x double> %x) - ret <2 x i64> %a +define <2 x iXLen> @lrint_v2f64(<2 x double> %x) { +; CHECK-i32-LABEL: lrint_v2f64: +; CHECK-i32: // %bb.0: +; CHECK-i32-NEXT: frintx v0.2d, v0.2d +; CHECK-i32-NEXT: mov d1, v0.d[1] +; CHECK-i32-NEXT: fcvtzs w8, d0 +; CHECK-i32-NEXT: fcvtzs w9, d1 +; CHECK-i32-NEXT: fmov s0, w8 +; CHECK-i32-NEXT: mov v0.s[1], w9 +; CHECK-i32-NEXT: // kill: def $d0 killed $d0 killed $q0 +; CHECK-i32-NEXT: ret +; +; CHECK-i64-LABEL: lrint_v2f64: +; CHECK-i64: // %bb.0: +; CHECK-i64-NEXT: frintx v0.2d, v0.2d +; CHECK-i64-NEXT: fcvtzs v0.2d, v0.2d +; CHECK-i64-NEXT: ret + %a = call <2 x iXLen> @llvm.lrint.v2iXLen.v2f64(<2 x double> %x) + ret <2 x iXLen> %a } -declare <2 x i64> @llvm.lrint.v2i64.v2f64(<2 x double>) +declare <2 x iXLen> @llvm.lrint.v2iXLen.v2f64(<2 x double>) -define <4 x i64> @lrint_v4f64(<4 x double> %x) { -; CHECK-LABEL: lrint_v4f64: -; CHECK: // %bb.0: -; CHECK-NEXT: ptrue p0.d, vl2 -; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 -; CHECK-NEXT: // kill: def $q1 killed $q1 def $z1 -; CHECK-NEXT: splice z0.d, p0, z0.d, z1.d -; CHECK-NEXT: ptrue p0.d, vl4 -; CHECK-NEXT: frintx z0.d, p0/m, z0.d -; CHECK-NEXT: fcvtzs z0.d, p0/m, z0.d -; CHECK-NEXT: mov z1.d, z0.d -; CHECK-NEXT: ext z1.b, z1.b, z0.b, #16 -; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 -; CHECK-NEXT: // kill: def $q1 killed $q1 killed $z1 -; CHECK-NEXT: ret - %a = call <4 x i64> @llvm.lrint.v4i64.v4f64(<4 x double> %x) - ret <4 x i64> %a +define <4 x iXLen> @lrint_v4f64(<4 x double> %x) { +; CHECK-i32-LABEL: lrint_v4f64: +; CHECK-i32: // %bb.0: +; CHECK-i32-NEXT: ptrue p0.d, vl2 +; CHECK-i32-NEXT: // kill: def $q0 killed $q0 def $z0 +; CHECK-i32-NEXT: // kill: def $q1 killed $q1 def $z1 +; CHECK-i32-NEXT: splice z0.d, p0, z0.d, z1.d +; CHECK-i32-NEXT: ptrue p0.d, vl4 +; CHECK-i32-NEXT: movprfx z1, z0 +; CHECK-i32-NEXT: frintx z1.d, p0/m, z0.d +; CHECK-i32-NEXT: mov z0.d, z1.d[1] +; CHECK-i32-NEXT: fcvtzs w8, d1 +; CHECK-i32-NEXT: mov z2.d, z1.d[2] +; CHECK-i32-NEXT: mov z1.d, z1.d[3] +; CHECK-i32-NEXT: fcvtzs w9, d0 +; CHECK-i32-NEXT: fmov s0, w8 +; CHECK-i32-NEXT: fcvtzs w8, d2 +; CHECK-i32-NEXT: mov v0.s[1], w9 +; CHECK-i32-NEXT: mov v0.s[2], w8 +; CHECK-i32-NEXT: fcvtzs w8, d1 +; CHECK-i32-NEXT: mov v0.s[3], w8 +; CHECK-i32-NEXT: ret +; +; CHECK-i64-LABEL: lrint_v4f64: +; CHECK-i64: // %bb.0: +; CHECK-i64-NEXT: ptrue p0.d, vl2 +; CHECK-i64-NEXT: // kill: def $q0 killed $q0 def $z0 +; CHECK-i64-NEXT: // kill: def $q1 killed $q1 def $z1 +; CHECK-i64-NEXT: splice z0.d, p0, z0.d, z1.d +; CHECK-i64-NEXT: ptrue p0.d, vl4 +; CHECK-i64-NEXT: frintx z0.d, p0/m, z0.d +; CHECK-i64-NEXT: mov z1.d, z0.d[2] +; CHECK-i64-NEXT: mov z2.d, z0.d[3] +; CHECK-i64-NEXT: mov z3.d, z0.d[1] +; CHECK-i64-NEXT: fcvtzs x9, d0 +; CHECK-i64-NEXT: fcvtzs x8, d1 +; CHECK-i64-NEXT: fcvtzs x10, d2 +; CHECK-i64-NEXT: fcvtzs x11, d3 +; CHECK-i64-NEXT: fmov d0, x9 +; CHECK-i64-NEXT: fmov d1, x8 +; CHECK-i64-NEXT: mov v0.d[1], x11 +; CHECK-i64-NEXT: mov v1.d[1], x10 +; CHECK-i64-NEXT: ret + %a = call <4 x iXLen> @llvm.lrint.v4iXLen.v4f64(<4 x double> %x) + ret <4 x iXLen> %a } -declare <4 x i64> @llvm.lrint.v4i64.v4f64(<4 x double>) +declare <4 x iXLen> @llvm.lrint.v4iXLen.v4f64(<4 x double>) -define <8 x i64> @lrint_v8f64(<8 x double> %x) { -; CHECK-LABEL: lrint_v8f64: -; CHECK: // %bb.0: -; CHECK-NEXT: ptrue p0.d, vl2 -; CHECK-NEXT: // kill: def $q2 killed $q2 def $z2 -; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 -; CHECK-NEXT: // kill: def $q3 killed $q3 def $z3 -; CHECK-NEXT: // kill: def $q1 killed $q1 def $z1 -; CHECK-NEXT: splice z2.d, p0, z2.d, z3.d -; CHECK-NEXT: splice z0.d, p0, z0.d, z1.d -; CHECK-NEXT: ptrue p0.d, vl4 -; CHECK-NEXT: frintx z0.d, p0/m, z0.d -; CHECK-NEXT: movprfx z1, z2 -; CHECK-NEXT: frintx z1.d, p0/m, z2.d -; CHECK-NEXT: fcvtzs z0.d, p0/m, z0.d -; CHECK-NEXT: movprfx z2, z1 -; CHECK-NEXT: fcvtzs z2.d, p0/m, z1.d -; CHECK-NEXT: mov z1.d, z0.d -; CHECK-NEXT: mov z3.d, z2.d -; CHECK-NEXT: ext z1.b, z1.b, z0.b, #16 -; CHECK-NEXT: ext z3.b, z3.b, z2.b, #16 -; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 -; CHECK-NEXT: // kill: def $q2 killed $q2 killed $z2 -; CHECK-NEXT: // kill: def $q1 killed $q1 killed $z1 -; CHECK-NEXT: // kill: def $q3 killed $q3 killed $z3 -; CHECK-NEXT: ret - %a = call <8 x i64> @llvm.lrint.v8i64.v8f64(<8 x double> %x) - ret <8 x i64> %a +define <8 x iXLen> @lrint_v8f64(<8 x double> %x) { +; CHECK-i32-LABEL: lrint_v8f64: +; CHECK-i32: // %bb.0: +; CHECK-i32-NEXT: ptrue p0.d, vl2 +; CHECK-i32-NEXT: // kill: def $q2 killed $q2 def $z2 +; CHECK-i32-NEXT: // kill: def $q0 killed $q0 def $z0 +; CHECK-i32-NEXT: // kill: def $q3 killed $q3 def $z3 +; CHECK-i32-NEXT: // kill: def $q1 killed $q1 def $z1 +; CHECK-i32-NEXT: splice z0.d, p0, z0.d, z1.d +; CHECK-i32-NEXT: splice z2.d, p0, z2.d, z3.d +; CHECK-i32-NEXT: ptrue p0.d, vl4 +; CHECK-i32-NEXT: movprfx z3, z0 +; CHECK-i32-NEXT: frintx z3.d, p0/m, z0.d +; CHECK-i32-NEXT: frintx z2.d, p0/m, z2.d +; CHECK-i32-NEXT: mov z0.d, z3.d[1] +; CHECK-i32-NEXT: mov z1.d, z2.d[1] +; CHECK-i32-NEXT: fcvtzs w8, d3 +; CHECK-i32-NEXT: fcvtzs w9, d2 +; CHECK-i32-NEXT: mov z4.d, z3.d[2] +; CHECK-i32-NEXT: mov z5.d, z2.d[2] +; CHECK-i32-NEXT: mov z3.d, z3.d[3] +; CHECK-i32-NEXT: mov z2.d, z2.d[3] +; CHECK-i32-NEXT: fcvtzs w10, d0 +; CHECK-i32-NEXT: fcvtzs w11, d1 +; CHECK-i32-NEXT: fmov s0, w8 +; CHECK-i32-NEXT: fcvtzs w8, d4 +; CHECK-i32-NEXT: fmov s1, w9 +; CHECK-i32-NEXT: fcvtzs w9, d5 +; CHECK-i32-NEXT: mov v0.s[1], w10 +; CHECK-i32-NEXT: mov v1.s[1], w11 +; CHECK-i32-NEXT: mov v0.s[2], w8 +; CHECK-i32-NEXT: fcvtzs w8, d3 +; CHECK-i32-NEXT: mov v1.s[2], w9 +; CHECK-i32-NEXT: fcvtzs w9, d2 +; CHECK-i32-NEXT: mov v0.s[3], w8 +; CHECK-i32-NEXT: mov v1.s[3], w9 +; CHECK-i32-NEXT: ret +; +; CHECK-i64-LABEL: lrint_v8f64: +; CHECK-i64: // %bb.0: +; CHECK-i64-NEXT: ptrue p0.d, vl2 +; CHECK-i64-NEXT: // kill: def $q2 killed $q2 def $z2 +; CHECK-i64-NEXT: // kill: def $q0 killed $q0 def $z0 +; CHECK-i64-NEXT: // kill: def $q3 killed $q3 def $z3 +; CHECK-i64-NEXT: // kill: def $q1 killed $q1 def $z1 +; CHECK-i64-NEXT: splice z0.d, p0, z0.d, z1.d +; CHECK-i64-NEXT: splice z2.d, p0, z2.d, z3.d +; CHECK-i64-NEXT: ptrue p0.d, vl4 +; CHECK-i64-NEXT: frintx z0.d, p0/m, z0.d +; CHECK-i64-NEXT: movprfx z1, z2 +; CHECK-i64-NEXT: frintx z1.d, p0/m, z2.d +; CHECK-i64-NEXT: mov z4.d, z1.d[2] +; CHECK-i64-NEXT: mov z5.d, z0.d[2] +; CHECK-i64-NEXT: mov z2.d, z0.d[1] +; CHECK-i64-NEXT: mov z3.d, z1.d[3] +; CHECK-i64-NEXT: mov z6.d, z0.d[3] +; CHECK-i64-NEXT: fcvtzs x8, d0 +; CHECK-i64-NEXT: mov z0.d, z1.d[1] +; CHECK-i64-NEXT: fcvtzs x10, d1 +; CHECK-i64-NEXT: fcvtzs x11, d4 +; CHECK-i64-NEXT: fcvtzs x12, d5 +; CHECK-i64-NEXT: fcvtzs x9, d2 +; CHECK-i64-NEXT: fcvtzs x13, d3 +; CHECK-i64-NEXT: fcvtzs x14, d6 +; CHECK-i64-NEXT: fcvtzs x15, d0 +; CHECK-i64-NEXT: fmov d0, x8 +; CHECK-i64-NEXT: fmov d2, x10 +; CHECK-i64-NEXT: fmov d1, x12 +; CHECK-i64-NEXT: fmov d3, x11 +; CHECK-i64-NEXT: mov v0.d[1], x9 +; CHECK-i64-NEXT: mov v2.d[1], x15 +; CHECK-i64-NEXT: mov v1.d[1], x14 +; CHECK-i64-NEXT: mov v3.d[1], x13 +; CHECK-i64-NEXT: ret + %a = call <8 x iXLen> @llvm.lrint.v8iXLen.v8f64(<8 x double> %x) + ret <8 x iXLen> %a } -declare <8 x i64> @llvm.lrint.v8i64.v8f64(<8 x double>) +declare <8 x iXLen> @llvm.lrint.v8iXLen.v8f64(<8 x double>) -define <16 x i64> @lrint_v16f64(<16 x double> %x) { -; CHECK-LABEL: lrint_v16f64: -; CHECK: // %bb.0: -; CHECK-NEXT: ptrue p0.d, vl2 -; CHECK-NEXT: // kill: def $q6 killed $q6 def $z6 -; CHECK-NEXT: // kill: def $q4 killed $q4 def $z4 -; CHECK-NEXT: // kill: def $q2 killed $q2 def $z2 -; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 -; CHECK-NEXT: // kill: def $q7 killed $q7 def $z7 -; CHECK-NEXT: // kill: def $q5 killed $q5 def $z5 -; CHECK-NEXT: // kill: def $q3 killed $q3 def $z3 -; CHECK-NEXT: // kill: def $q1 killed $q1 def $z1 -; CHECK-NEXT: splice z2.d, p0, z2.d, z3.d -; CHECK-NEXT: splice z0.d, p0, z0.d, z1.d -; CHECK-NEXT: splice z6.d, p0, z6.d, z7.d -; CHECK-NEXT: splice z4.d, p0, z4.d, z5.d -; CHECK-NEXT: ptrue p0.d, vl4 -; CHECK-NEXT: frintx z0.d, p0/m, z0.d -; CHECK-NEXT: movprfx z1, z2 -; CHECK-NEXT: frintx z1.d, p0/m, z2.d -; CHECK-NEXT: movprfx z5, z6 -; CHECK-NEXT: frintx z5.d, p0/m, z6.d -; CHECK-NEXT: movprfx z3, z4 -; CHECK-NEXT: frintx z3.d, p0/m, z4.d -; CHECK-NEXT: fcvtzs z0.d, p0/m, z0.d -; CHECK-NEXT: movprfx z2, z1 -; CHECK-NEXT: fcvtzs z2.d, p0/m, z1.d -; CHECK-NEXT: movprfx z6, z5 -; CHECK-NEXT: fcvtzs z6.d, p0/m, z5.d -; CHECK-NEXT: movprfx z4, z3 -; CHECK-NEXT: fcvtzs z4.d, p0/m, z3.d -; CHECK-NEXT: mov z1.d, z0.d -; CHECK-NEXT: mov z3.d, z2.d -; CHECK-NEXT: mov z7.d, z6.d -; CHECK-NEXT: mov z5.d, z4.d -; CHECK-NEXT: ext z1.b, z1.b, z0.b, #16 -; CHECK-NEXT: ext z3.b, z3.b, z2.b, #16 -; CHECK-NEXT: ext z7.b, z7.b, z6.b, #16 -; CHECK-NEXT: ext z5.b, z5.b, z4.b, #16 -; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 -; CHECK-NEXT: // kill: def $q2 killed $q2 killed $z2 -; CHECK-NEXT: // kill: def $q4 killed $q4 killed $z4 -; CHECK-NEXT: // kill: def $q6 killed $q6 killed $z6 -; CHECK-NEXT: // kill: def $q1 killed $q1 killed $z1 -; CHECK-NEXT: // kill: def $q3 killed $q3 killed $z3 -; CHECK-NEXT: // kill: def $q7 killed $q7 killed $z7 -; CHECK-NEXT: // kill: def $q5 killed $q5 killed $z5 -; CHECK-NEXT: ret - %a = call <16 x i64> @llvm.lrint.v16i64.v16f64(<16 x double> %x) - ret <16 x i64> %a +define <16 x iXLen> @lrint_v16f64(<16 x double> %x) { +; CHECK-i32-LABEL: lrint_v16f64: +; CHECK-i32: // %bb.0: +; CHECK-i32-NEXT: ptrue p1.d, vl2 +; CHECK-i32-NEXT: // kill: def $q0 killed $q0 def $z0 +; CHECK-i32-NEXT: // kill: def $q6 killed $q6 def $z6 +; CHECK-i32-NEXT: // kill: def $q4 killed $q4 def $z4 +; CHECK-i32-NEXT: // kill: def $q2 killed $q2 def $z2 +; CHECK-i32-NEXT: // kill: def $q1 killed $q1 def $z1 +; CHECK-i32-NEXT: // kill: def $q7 killed $q7 def $z7 +; CHECK-i32-NEXT: // kill: def $q5 killed $q5 def $z5 +; CHECK-i32-NEXT: // kill: def $q3 killed $q3 def $z3 +; CHECK-i32-NEXT: ptrue p0.d, vl4 +; CHECK-i32-NEXT: splice z0.d, p1, z0.d, z1.d +; CHECK-i32-NEXT: splice z2.d, p1, z2.d, z3.d +; CHECK-i32-NEXT: splice z4.d, p1, z4.d, z5.d +; CHECK-i32-NEXT: splice z6.d, p1, z6.d, z7.d +; CHECK-i32-NEXT: movprfx z5, z0 +; CHECK-i32-NEXT: frintx z5.d, p0/m, z0.d +; CHECK-i32-NEXT: movprfx z7, z2 +; CHECK-i32-NEXT: frintx z7.d, p0/m, z2.d +; CHECK-i32-NEXT: frintx z4.d, p0/m, z4.d +; CHECK-i32-NEXT: frintx z6.d, p0/m, z6.d +; CHECK-i32-NEXT: fcvtzs w8, d5 +; CHECK-i32-NEXT: mov z0.d, z5.d[1] +; CHECK-i32-NEXT: mov z1.d, z7.d[1] +; CHECK-i32-NEXT: fcvtzs w9, d7 +; CHECK-i32-NEXT: mov z3.d, z4.d[1] +; CHECK-i32-NEXT: fcvtzs w10, d4 +; CHECK-i32-NEXT: mov z16.d, z6.d[1] +; CHECK-i32-NEXT: fcvtzs w12, d6 +; CHECK-i32-NEXT: mov z2.d, z5.d[2] +; CHECK-i32-NEXT: fcvtzs w11, d0 +; CHECK-i32-NEXT: fcvtzs w13, d1 +; CHECK-i32-NEXT: mov z17.d, z7.d[2] +; CHECK-i32-NEXT: fcvtzs w14, d3 +; CHECK-i32-NEXT: fmov s0, w8 +; CHECK-i32-NEXT: mov z18.d, z4.d[2] +; CHECK-i32-NEXT: fcvtzs w8, d16 +; CHECK-i32-NEXT: mov z19.d, z6.d[2] +; CHECK-i32-NEXT: fcvtzs w15, d2 +; CHECK-i32-NEXT: fmov s1, w9 +; CHECK-i32-NEXT: fmov s2, w10 +; CHECK-i32-NEXT: fmov s3, w12 +; CHECK-i32-NEXT: fcvtzs w9, d17 +; CHECK-i32-NEXT: fcvtzs w10, d18 +; CHECK-i32-NEXT: mov v0.s[1], w11 +; CHECK-i32-NEXT: fcvtzs w11, d19 +; CHECK-i32-NEXT: mov z5.d, z5.d[3] +; CHECK-i32-NEXT: mov z7.d, z7.d[3] +; CHECK-i32-NEXT: mov v1.s[1], w13 +; CHECK-i32-NEXT: mov v2.s[1], w14 +; CHECK-i32-NEXT: mov v3.s[1], w8 +; CHECK-i32-NEXT: mov z4.d, z4.d[3] +; CHECK-i32-NEXT: mov z6.d, z6.d[3] +; CHECK-i32-NEXT: mov v0.s[2], w15 +; CHECK-i32-NEXT: fcvtzs w8, d5 +; CHECK-i32-NEXT: mov v1.s[2], w9 +; CHECK-i32-NEXT: fcvtzs w9, d7 +; CHECK-i32-NEXT: mov v2.s[2], w10 +; CHECK-i32-NEXT: fcvtzs w10, d4 +; CHECK-i32-NEXT: mov v3.s[2], w11 +; CHECK-i32-NEXT: fcvtzs w11, d6 +; CHECK-i32-NEXT: mov v0.s[3], w8 +; CHECK-i32-NEXT: mov v1.s[3], w9 +; CHECK-i32-NEXT: mov v2.s[3], w10 +; CHECK-i32-NEXT: mov v3.s[3], w11 +; CHECK-i32-NEXT: ret +; +; CHECK-i64-LABEL: lrint_v16f64: +; CHECK-i64: // %bb.0: +; CHECK-i64-NEXT: ptrue p1.d, vl2 +; CHECK-i64-NEXT: // kill: def $q6 killed $q6 def $z6 +; CHECK-i64-NEXT: // kill: def $q4 killed $q4 def $z4 +; CHECK-i64-NEXT: // kill: def $q7 killed $q7 def $z7 +; CHECK-i64-NEXT: // kill: def $q5 killed $q5 def $z5 +; CHECK-i64-NEXT: // kill: def $q2 killed $q2 def $z2 +; CHECK-i64-NEXT: // kill: def $q0 killed $q0 def $z0 +; CHECK-i64-NEXT: // kill: def $q3 killed $q3 def $z3 +; CHECK-i64-NEXT: // kill: def $q1 killed $q1 def $z1 +; CHECK-i64-NEXT: ptrue p0.d, vl4 +; CHECK-i64-NEXT: splice z6.d, p1, z6.d, z7.d +; CHECK-i64-NEXT: splice z4.d, p1, z4.d, z5.d +; CHECK-i64-NEXT: splice z2.d, p1, z2.d, z3.d +; CHECK-i64-NEXT: splice z0.d, p1, z0.d, z1.d +; CHECK-i64-NEXT: movprfx z3, z6 +; CHECK-i64-NEXT: frintx z3.d, p0/m, z6.d +; CHECK-i64-NEXT: movprfx z1, z4 +; CHECK-i64-NEXT: frintx z1.d, p0/m, z4.d +; CHECK-i64-NEXT: frintx z2.d, p0/m, z2.d +; CHECK-i64-NEXT: frintx z0.d, p0/m, z0.d +; CHECK-i64-NEXT: mov z4.d, z3.d[2] +; CHECK-i64-NEXT: mov z5.d, z1.d[2] +; CHECK-i64-NEXT: mov z6.d, z2.d[3] +; CHECK-i64-NEXT: fcvtzs x11, d0 +; CHECK-i64-NEXT: fcvtzs x12, d1 +; CHECK-i64-NEXT: fcvtzs x13, d2 +; CHECK-i64-NEXT: fcvtzs x14, d3 +; CHECK-i64-NEXT: mov z7.d, z3.d[3] +; CHECK-i64-NEXT: mov z16.d, z1.d[3] +; CHECK-i64-NEXT: fcvtzs x9, d4 +; CHECK-i64-NEXT: fcvtzs x10, d5 +; CHECK-i64-NEXT: mov z4.d, z2.d[2] +; CHECK-i64-NEXT: mov z5.d, z0.d[2] +; CHECK-i64-NEXT: fcvtzs x8, d6 +; CHECK-i64-NEXT: mov z2.d, z2.d[1] +; CHECK-i64-NEXT: mov z6.d, z0.d[3] +; CHECK-i64-NEXT: mov z1.d, z1.d[1] +; CHECK-i64-NEXT: mov z3.d, z3.d[1] +; CHECK-i64-NEXT: fcvtzs x15, d4 +; CHECK-i64-NEXT: mov z4.d, z0.d[1] +; CHECK-i64-NEXT: fmov d0, x11 +; CHECK-i64-NEXT: fcvtzs x16, d5 +; CHECK-i64-NEXT: fcvtzs x11, d2 +; CHECK-i64-NEXT: fmov d2, x13 +; CHECK-i64-NEXT: fcvtzs x17, d7 +; CHECK-i64-NEXT: fcvtzs x18, d16 +; CHECK-i64-NEXT: fcvtzs x0, d3 +; CHECK-i64-NEXT: fcvtzs x13, d4 +; CHECK-i64-NEXT: fmov d4, x12 +; CHECK-i64-NEXT: fcvtzs x12, d6 +; CHECK-i64-NEXT: fmov d6, x14 +; CHECK-i64-NEXT: fcvtzs x14, d1 +; CHECK-i64-NEXT: fmov d3, x15 +; CHECK-i64-NEXT: fmov d1, x16 +; CHECK-i64-NEXT: fmov d5, x10 +; CHECK-i64-NEXT: fmov d7, x9 +; CHECK-i64-NEXT: mov v2.d[1], x11 +; CHECK-i64-NEXT: mov v0.d[1], x13 +; CHECK-i64-NEXT: mov v3.d[1], x8 +; CHECK-i64-NEXT: mov v6.d[1], x0 +; CHECK-i64-NEXT: mov v4.d[1], x14 +; CHECK-i64-NEXT: mov v1.d[1], x12 +; CHECK-i64-NEXT: mov v5.d[1], x18 +; CHECK-i64-NEXT: mov v7.d[1], x17 +; CHECK-i64-NEXT: ret + %a = call <16 x iXLen> @llvm.lrint.v16iXLen.v16f64(<16 x double> %x) + ret <16 x iXLen> %a } -declare <16 x i64> @llvm.lrint.v16i64.v16f64(<16 x double>) +declare <16 x iXLen> @llvm.lrint.v16iXLen.v16f64(<16 x double>) -define <32 x i64> @lrint_v32f64(<32 x double> %x) { -; CHECK-LABEL: lrint_v32f64: -; CHECK: // %bb.0: -; CHECK-NEXT: ldp q17, q16, [sp, #96] -; CHECK-NEXT: ptrue p0.d, vl2 -; CHECK-NEXT: ldp q19, q18, [sp, #64] -; CHECK-NEXT: ptrue p1.d, vl4 -; CHECK-NEXT: // kill: def $q7 killed $q7 def $z7 -; CHECK-NEXT: // kill: def $q6 killed $q6 def $z6 -; CHECK-NEXT: // kill: def $q5 killed $q5 def $z5 -; CHECK-NEXT: // kill: def $q4 killed $q4 def $z4 -; CHECK-NEXT: // kill: def $q3 killed $q3 def $z3 -; CHECK-NEXT: // kill: def $q2 killed $q2 def $z2 -; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 -; CHECK-NEXT: // kill: def $q1 killed $q1 def $z1 -; CHECK-NEXT: mov x9, #28 // =0x1c -; CHECK-NEXT: splice z17.d, p0, z17.d, z16.d -; CHECK-NEXT: ldp q20, q16, [sp, #32] -; CHECK-NEXT: splice z19.d, p0, z19.d, z18.d -; CHECK-NEXT: ldp q21, q18, [sp] -; CHECK-NEXT: splice z6.d, p0, z6.d, z7.d -; CHECK-NEXT: splice z4.d, p0, z4.d, z5.d -; CHECK-NEXT: splice z2.d, p0, z2.d, z3.d -; CHECK-NEXT: splice z20.d, p0, z20.d, z16.d -; CHECK-NEXT: splice z0.d, p0, z0.d, z1.d -; CHECK-NEXT: splice z21.d, p0, z21.d, z18.d -; CHECK-NEXT: movprfx z7, z17 -; CHECK-NEXT: frintx z7.d, p1/m, z17.d -; CHECK-NEXT: movprfx z5, z19 -; CHECK-NEXT: frintx z5.d, p1/m, z19.d -; CHECK-NEXT: frintx z6.d, p1/m, z6.d -; CHECK-NEXT: frintx z4.d, p1/m, z4.d -; CHECK-NEXT: frintx z2.d, p1/m, z2.d -; CHECK-NEXT: movprfx z3, z20 -; CHECK-NEXT: frintx z3.d, p1/m, z20.d -; CHECK-NEXT: frintx z0.d, p1/m, z0.d -; CHECK-NEXT: movprfx z1, z21 -; CHECK-NEXT: frintx z1.d, p1/m, z21.d -; CHECK-NEXT: fcvtzs z7.d, p1/m, z7.d -; CHECK-NEXT: fcvtzs z5.d, p1/m, z5.d -; CHECK-NEXT: fcvtzs z6.d, p1/m, z6.d -; CHECK-NEXT: fcvtzs z4.d, p1/m, z4.d -; CHECK-NEXT: fcvtzs z2.d, p1/m, z2.d -; CHECK-NEXT: fcvtzs z3.d, p1/m, z3.d -; CHECK-NEXT: fcvtzs z0.d, p1/m, z0.d -; CHECK-NEXT: fcvtzs z1.d, p1/m, z1.d -; CHECK-NEXT: st1d { z7.d }, p1, [x8, x9, lsl #3] -; CHECK-NEXT: mov x9, #24 // =0x18 -; CHECK-NEXT: st1d { z5.d }, p1, [x8, x9, lsl #3] -; CHECK-NEXT: mov x9, #20 // =0x14 -; CHECK-NEXT: st1d { z3.d }, p1, [x8, x9, lsl #3] -; CHECK-NEXT: mov x9, #16 // =0x10 -; CHECK-NEXT: st1d { z1.d }, p1, [x8, x9, lsl #3] -; CHECK-NEXT: mov x9, #12 // =0xc -; CHECK-NEXT: st1d { z6.d }, p1, [x8, x9, lsl #3] -; CHECK-NEXT: mov x9, #8 // =0x8 -; CHECK-NEXT: st1d { z4.d }, p1, [x8, x9, lsl #3] -; CHECK-NEXT: mov x9, #4 // =0x4 -; CHECK-NEXT: st1d { z2.d }, p1, [x8, x9, lsl #3] -; CHECK-NEXT: st1d { z0.d }, p1, [x8] -; CHECK-NEXT: ret - %a = call <32 x i64> @llvm.lrint.v32i64.v16f64(<32 x double> %x) - ret <32 x i64> %a +define <32 x iXLen> @lrint_v32f64(<32 x double> %x) { +; CHECK-i32-LABEL: lrint_v32f64: +; CHECK-i32: // %bb.0: +; CHECK-i32-NEXT: ptrue p1.d, vl2 +; CHECK-i32-NEXT: // kill: def $q0 killed $q0 def $z0 +; CHECK-i32-NEXT: // kill: def $q1 killed $q1 def $z1 +; CHECK-i32-NEXT: // kill: def $q3 killed $q3 def $z3 +; CHECK-i32-NEXT: // kill: def $q2 killed $q2 def $z2 +; CHECK-i32-NEXT: // kill: def $q4 killed $q4 def $z4 +; CHECK-i32-NEXT: // kill: def $q5 killed $q5 def $z5 +; CHECK-i32-NEXT: // kill: def $q7 killed $q7 def $z7 +; CHECK-i32-NEXT: // kill: def $q6 killed $q6 def $z6 +; CHECK-i32-NEXT: ptrue p0.d, vl4 +; CHECK-i32-NEXT: splice z0.d, p1, z0.d, z1.d +; CHECK-i32-NEXT: splice z2.d, p1, z2.d, z3.d +; CHECK-i32-NEXT: splice z4.d, p1, z4.d, z5.d +; CHECK-i32-NEXT: ldp q1, q3, [sp] +; CHECK-i32-NEXT: splice z6.d, p1, z6.d, z7.d +; CHECK-i32-NEXT: frintx z0.d, p0/m, z0.d +; CHECK-i32-NEXT: splice z1.d, p1, z1.d, z3.d +; CHECK-i32-NEXT: movprfx z18, z2 +; CHECK-i32-NEXT: frintx z18.d, p0/m, z2.d +; CHECK-i32-NEXT: ldp q5, q3, [sp, #96] +; CHECK-i32-NEXT: ldp q2, q7, [sp, #64] +; CHECK-i32-NEXT: splice z5.d, p1, z5.d, z3.d +; CHECK-i32-NEXT: movprfx z3, z4 +; CHECK-i32-NEXT: frintx z3.d, p0/m, z4.d +; CHECK-i32-NEXT: mov z4.d, z0.d[1] +; CHECK-i32-NEXT: fcvtzs w8, d0 +; CHECK-i32-NEXT: splice z2.d, p1, z2.d, z7.d +; CHECK-i32-NEXT: mov z19.d, z18.d[1] +; CHECK-i32-NEXT: ldp q7, q16, [sp, #32] +; CHECK-i32-NEXT: movprfx z17, z1 +; CHECK-i32-NEXT: frintx z17.d, p0/m, z1.d +; CHECK-i32-NEXT: fcvtzs w10, d4 +; CHECK-i32-NEXT: mov z1.d, z0.d[2] +; CHECK-i32-NEXT: fcvtzs w9, d18 +; CHECK-i32-NEXT: mov z4.d, z0.d[3] +; CHECK-i32-NEXT: fcvtzs w11, d19 +; CHECK-i32-NEXT: mov z20.d, z18.d[3] +; CHECK-i32-NEXT: fmov s0, w8 +; CHECK-i32-NEXT: splice z7.d, p1, z7.d, z16.d +; CHECK-i32-NEXT: movprfx z16, z6 +; CHECK-i32-NEXT: frintx z16.d, p0/m, z6.d +; CHECK-i32-NEXT: mov z6.d, z18.d[2] +; CHECK-i32-NEXT: mov z18.d, z3.d[1] +; CHECK-i32-NEXT: fcvtzs w12, d3 +; CHECK-i32-NEXT: fcvtzs w13, d1 +; CHECK-i32-NEXT: fmov s1, w9 +; CHECK-i32-NEXT: movprfx z19, z2 +; CHECK-i32-NEXT: frintx z19.d, p0/m, z2.d +; CHECK-i32-NEXT: mov v0.s[1], w10 +; CHECK-i32-NEXT: mov z21.d, z3.d[2] +; CHECK-i32-NEXT: fcvtzs w8, d4 +; CHECK-i32-NEXT: fcvtzs w14, d6 +; CHECK-i32-NEXT: mov z6.d, z16.d[1] +; CHECK-i32-NEXT: fcvtzs w15, d18 +; CHECK-i32-NEXT: movprfx z18, z7 +; CHECK-i32-NEXT: frintx z18.d, p0/m, z7.d +; CHECK-i32-NEXT: mov v1.s[1], w11 +; CHECK-i32-NEXT: fmov s2, w12 +; CHECK-i32-NEXT: mov z7.d, z17.d[1] +; CHECK-i32-NEXT: mov z4.d, z16.d[2] +; CHECK-i32-NEXT: fcvtzs w16, d16 +; CHECK-i32-NEXT: mov v0.s[2], w13 +; CHECK-i32-NEXT: fcvtzs w13, d17 +; CHECK-i32-NEXT: fcvtzs w12, d6 +; CHECK-i32-NEXT: mov z6.d, z19.d[1] +; CHECK-i32-NEXT: fcvtzs w11, d21 +; CHECK-i32-NEXT: movprfx z21, z5 +; CHECK-i32-NEXT: frintx z21.d, p0/m, z5.d +; CHECK-i32-NEXT: mov z3.d, z3.d[3] +; CHECK-i32-NEXT: mov v2.s[1], w15 +; CHECK-i32-NEXT: mov z5.d, z18.d[1] +; CHECK-i32-NEXT: fcvtzs w15, d7 +; CHECK-i32-NEXT: fcvtzs w0, d19 +; CHECK-i32-NEXT: mov v1.s[2], w14 +; CHECK-i32-NEXT: fcvtzs w14, d4 +; CHECK-i32-NEXT: mov z7.d, z18.d[2] +; CHECK-i32-NEXT: fmov s4, w13 +; CHECK-i32-NEXT: fcvtzs w13, d6 +; CHECK-i32-NEXT: mov z6.d, z19.d[2] +; CHECK-i32-NEXT: fcvtzs w10, d3 +; CHECK-i32-NEXT: fmov s3, w16 +; CHECK-i32-NEXT: fcvtzs w17, d18 +; CHECK-i32-NEXT: fcvtzs w18, d5 +; CHECK-i32-NEXT: mov z5.d, z21.d[1] +; CHECK-i32-NEXT: fcvtzs w2, d21 +; CHECK-i32-NEXT: fcvtzs w1, d7 +; CHECK-i32-NEXT: mov z7.d, z21.d[2] +; CHECK-i32-NEXT: mov v4.s[1], w15 +; CHECK-i32-NEXT: fcvtzs w15, d6 +; CHECK-i32-NEXT: fmov s6, w0 +; CHECK-i32-NEXT: mov v3.s[1], w12 +; CHECK-i32-NEXT: fcvtzs w9, d20 +; CHECK-i32-NEXT: fcvtzs w12, d5 +; CHECK-i32-NEXT: mov z20.d, z17.d[2] +; CHECK-i32-NEXT: fmov s5, w17 +; CHECK-i32-NEXT: mov z16.d, z16.d[3] +; CHECK-i32-NEXT: mov z17.d, z17.d[3] +; CHECK-i32-NEXT: mov z18.d, z18.d[3] +; CHECK-i32-NEXT: mov v6.s[1], w13 +; CHECK-i32-NEXT: fcvtzs w13, d7 +; CHECK-i32-NEXT: fmov s7, w2 +; CHECK-i32-NEXT: fcvtzs w16, d20 +; CHECK-i32-NEXT: mov v5.s[1], w18 +; CHECK-i32-NEXT: mov z19.d, z19.d[3] +; CHECK-i32-NEXT: mov z20.d, z21.d[3] +; CHECK-i32-NEXT: mov v2.s[2], w11 +; CHECK-i32-NEXT: mov v3.s[2], w14 +; CHECK-i32-NEXT: mov v7.s[1], w12 +; CHECK-i32-NEXT: fcvtzs w11, d16 +; CHECK-i32-NEXT: fcvtzs w12, d17 +; CHECK-i32-NEXT: fcvtzs w14, d18 +; CHECK-i32-NEXT: mov v6.s[2], w15 +; CHECK-i32-NEXT: fcvtzs w15, d19 +; CHECK-i32-NEXT: mov v4.s[2], w16 +; CHECK-i32-NEXT: mov v5.s[2], w1 +; CHECK-i32-NEXT: mov v0.s[3], w8 +; CHECK-i32-NEXT: mov v1.s[3], w9 +; CHECK-i32-NEXT: mov v2.s[3], w10 +; CHECK-i32-NEXT: mov v7.s[2], w13 +; CHECK-i32-NEXT: fcvtzs w13, d20 +; CHECK-i32-NEXT: mov v3.s[3], w11 +; CHECK-i32-NEXT: mov v6.s[3], w15 +; CHECK-i32-NEXT: mov v4.s[3], w12 +; CHECK-i32-NEXT: mov v5.s[3], w14 +; CHECK-i32-NEXT: mov v7.s[3], w13 +; CHECK-i32-NEXT: ret +; +; CHECK-i64-LABEL: lrint_v32f64: +; CHECK-i64: // %bb.0: +; CHECK-i64-NEXT: stp x29, x30, [sp, #-16]! // 16-byte Folded Spill +; CHECK-i64-NEXT: sub x9, sp, #272 +; CHECK-i64-NEXT: mov x29, sp +; CHECK-i64-NEXT: and sp, x9, #0xffffffffffffffe0 +; CHECK-i64-NEXT: .cfi_def_cfa w29, 16 +; CHECK-i64-NEXT: .cfi_offset w30, -8 +; CHECK-i64-NEXT: .cfi_offset w29, -16 +; CHECK-i64-NEXT: ptrue p1.d, vl2 +; CHECK-i64-NEXT: // kill: def $q0 killed $q0 def $z0 +; CHECK-i64-NEXT: // kill: def $q1 killed $q1 def $z1 +; CHECK-i64-NEXT: // kill: def $q3 killed $q3 def $z3 +; CHECK-i64-NEXT: // kill: def $q2 killed $q2 def $z2 +; CHECK-i64-NEXT: // kill: def $q7 killed $q7 def $z7 +; CHECK-i64-NEXT: // kill: def $q6 killed $q6 def $z6 +; CHECK-i64-NEXT: // kill: def $q4 killed $q4 def $z4 +; CHECK-i64-NEXT: // kill: def $q5 killed $q5 def $z5 +; CHECK-i64-NEXT: ptrue p0.d, vl4 +; CHECK-i64-NEXT: splice z0.d, p1, z0.d, z1.d +; CHECK-i64-NEXT: splice z2.d, p1, z2.d, z3.d +; CHECK-i64-NEXT: splice z4.d, p1, z4.d, z5.d +; CHECK-i64-NEXT: splice z6.d, p1, z6.d, z7.d +; CHECK-i64-NEXT: ldp q5, q19, [x29, #16] +; CHECK-i64-NEXT: movprfx z3, z0 +; CHECK-i64-NEXT: frintx z3.d, p0/m, z0.d +; CHECK-i64-NEXT: movprfx z16, z2 +; CHECK-i64-NEXT: frintx z16.d, p0/m, z2.d +; CHECK-i64-NEXT: frintx z4.d, p0/m, z4.d +; CHECK-i64-NEXT: splice z5.d, p1, z5.d, z19.d +; CHECK-i64-NEXT: frintx z6.d, p0/m, z6.d +; CHECK-i64-NEXT: ldp q2, q17, [x29, #48] +; CHECK-i64-NEXT: ldp q0, q1, [x29, #112] +; CHECK-i64-NEXT: mov z18.d, z3.d[3] +; CHECK-i64-NEXT: mov z7.d, z3.d[2] +; CHECK-i64-NEXT: fcvtzs x9, d3 +; CHECK-i64-NEXT: mov z3.d, z3.d[1] +; CHECK-i64-NEXT: mov z20.d, z16.d[3] +; CHECK-i64-NEXT: fcvtzs x12, d16 +; CHECK-i64-NEXT: splice z2.d, p1, z2.d, z17.d +; CHECK-i64-NEXT: frintx z5.d, p0/m, z5.d +; CHECK-i64-NEXT: splice z0.d, p1, z0.d, z1.d +; CHECK-i64-NEXT: fcvtzs x10, d18 +; CHECK-i64-NEXT: fcvtzs x11, d7 +; CHECK-i64-NEXT: mov z18.d, z16.d[2] +; CHECK-i64-NEXT: mov z7.d, z16.d[1] +; CHECK-i64-NEXT: fcvtzs x13, d3 +; CHECK-i64-NEXT: fcvtzs x14, d20 +; CHECK-i64-NEXT: str x9, [sp, #128] +; CHECK-i64-NEXT: mov z16.d, z4.d[3] +; CHECK-i64-NEXT: fcvtzs x9, d18 +; CHECK-i64-NEXT: mov z18.d, z4.d[2] +; CHECK-i64-NEXT: frintx z2.d, p0/m, z2.d +; CHECK-i64-NEXT: stp x11, x10, [sp, #144] +; CHECK-i64-NEXT: fcvtzs x10, d7 +; CHECK-i64-NEXT: mov z7.d, z4.d[1] +; CHECK-i64-NEXT: str x13, [sp, #136] +; CHECK-i64-NEXT: fcvtzs x11, d16 +; CHECK-i64-NEXT: mov z16.d, z6.d[3] +; CHECK-i64-NEXT: fcvtzs x13, d18 +; CHECK-i64-NEXT: ldp q3, q19, [x29, #80] +; CHECK-i64-NEXT: stp x9, x14, [sp, #176] +; CHECK-i64-NEXT: fcvtzs x9, d4 +; CHECK-i64-NEXT: mov z4.d, z6.d[2] +; CHECK-i64-NEXT: stp x12, x10, [sp, #160] +; CHECK-i64-NEXT: fcvtzs x10, d7 +; CHECK-i64-NEXT: mov z7.d, z6.d[1] +; CHECK-i64-NEXT: fcvtzs x12, d6 +; CHECK-i64-NEXT: splice z3.d, p1, z3.d, z19.d +; CHECK-i64-NEXT: mov z6.d, z5.d[2] +; CHECK-i64-NEXT: stp x13, x11, [sp, #208] +; CHECK-i64-NEXT: fcvtzs x11, d16 +; CHECK-i64-NEXT: fcvtzs x13, d4 +; CHECK-i64-NEXT: mov z4.d, z5.d[3] +; CHECK-i64-NEXT: mov z1.d, z5.d[1] +; CHECK-i64-NEXT: frintx z0.d, p0/m, z0.d +; CHECK-i64-NEXT: stp x9, x10, [sp, #192] +; CHECK-i64-NEXT: fcvtzs x9, d7 +; CHECK-i64-NEXT: frintx z3.d, p0/m, z3.d +; CHECK-i64-NEXT: fcvtzs x10, d4 +; CHECK-i64-NEXT: stp x13, x11, [sp, #240] +; CHECK-i64-NEXT: fcvtzs x11, d6 +; CHECK-i64-NEXT: mov z4.d, z2.d[3] +; CHECK-i64-NEXT: fcvtzs x13, d2 +; CHECK-i64-NEXT: stp x12, x9, [sp, #224] +; CHECK-i64-NEXT: fcvtzs x9, d5 +; CHECK-i64-NEXT: fcvtzs x12, d1 +; CHECK-i64-NEXT: mov z5.d, z2.d[2] +; CHECK-i64-NEXT: mov z1.d, z2.d[1] +; CHECK-i64-NEXT: mov z2.d, z3.d[2] +; CHECK-i64-NEXT: stp x11, x10, [sp, #16] +; CHECK-i64-NEXT: fcvtzs x10, d4 +; CHECK-i64-NEXT: mov z4.d, z3.d[3] +; CHECK-i64-NEXT: fcvtzs x11, d5 +; CHECK-i64-NEXT: stp x9, x12, [sp] +; CHECK-i64-NEXT: fcvtzs x9, d1 +; CHECK-i64-NEXT: mov z1.d, z3.d[1] +; CHECK-i64-NEXT: fcvtzs x12, d4 +; CHECK-i64-NEXT: stp x11, x10, [sp, #48] +; CHECK-i64-NEXT: fcvtzs x10, d2 +; CHECK-i64-NEXT: fcvtzs x11, d3 +; CHECK-i64-NEXT: stp x13, x9, [sp, #32] +; CHECK-i64-NEXT: fcvtzs x9, d1 +; CHECK-i64-NEXT: mov z2.d, z0.d[3] +; CHECK-i64-NEXT: mov z3.d, z0.d[2] +; CHECK-i64-NEXT: mov z1.d, z0.d[1] +; CHECK-i64-NEXT: fcvtzs x13, d2 +; CHECK-i64-NEXT: stp x10, x12, [sp, #80] +; CHECK-i64-NEXT: fcvtzs x12, d0 +; CHECK-i64-NEXT: fcvtzs x10, d3 +; CHECK-i64-NEXT: stp x11, x9, [sp, #64] +; CHECK-i64-NEXT: fcvtzs x9, d1 +; CHECK-i64-NEXT: stp x10, x13, [sp, #112] +; CHECK-i64-NEXT: add x10, sp, #192 +; CHECK-i64-NEXT: stp x12, x9, [sp, #96] +; CHECK-i64-NEXT: add x9, sp, #128 +; CHECK-i64-NEXT: ld1d { z0.d }, p0/z, [x9] +; CHECK-i64-NEXT: add x9, sp, #160 +; CHECK-i64-NEXT: ld1d { z2.d }, p0/z, [x10] +; CHECK-i64-NEXT: ld1d { z1.d }, p0/z, [x9] +; CHECK-i64-NEXT: add x9, sp, #96 +; CHECK-i64-NEXT: add x10, sp, #224 +; CHECK-i64-NEXT: ld1d { z3.d }, p0/z, [x9] +; CHECK-i64-NEXT: add x9, sp, #64 +; CHECK-i64-NEXT: ld1d { z4.d }, p0/z, [x10] +; CHECK-i64-NEXT: add x10, sp, #32 +; CHECK-i64-NEXT: ld1d { z5.d }, p0/z, [x9] +; CHECK-i64-NEXT: mov x9, sp +; CHECK-i64-NEXT: ld1d { z6.d }, p0/z, [x10] +; CHECK-i64-NEXT: mov x10, #28 // =0x1c +; CHECK-i64-NEXT: ld1d { z7.d }, p0/z, [x9] +; CHECK-i64-NEXT: mov x9, #24 // =0x18 +; CHECK-i64-NEXT: st1d { z3.d }, p0, [x8, x10, lsl #3] +; CHECK-i64-NEXT: st1d { z5.d }, p0, [x8, x9, lsl #3] +; CHECK-i64-NEXT: mov x9, #20 // =0x14 +; CHECK-i64-NEXT: st1d { z6.d }, p0, [x8, x9, lsl #3] +; CHECK-i64-NEXT: mov x9, #16 // =0x10 +; CHECK-i64-NEXT: st1d { z7.d }, p0, [x8, x9, lsl #3] +; CHECK-i64-NEXT: mov x9, #12 // =0xc +; CHECK-i64-NEXT: st1d { z4.d }, p0, [x8, x9, lsl #3] +; CHECK-i64-NEXT: mov x9, #8 // =0x8 +; CHECK-i64-NEXT: st1d { z2.d }, p0, [x8, x9, lsl #3] +; CHECK-i64-NEXT: mov x9, #4 // =0x4 +; CHECK-i64-NEXT: st1d { z1.d }, p0, [x8, x9, lsl #3] +; CHECK-i64-NEXT: st1d { z0.d }, p0, [x8] +; CHECK-i64-NEXT: mov sp, x29 +; CHECK-i64-NEXT: ldp x29, x30, [sp], #16 // 16-byte Folded Reload +; CHECK-i64-NEXT: ret + %a = call <32 x iXLen> @llvm.lrint.v32iXLen.v16f64(<32 x double> %x) + ret <32 x iXLen> %a } -declare <32 x i64> @llvm.lrint.v32i64.v32f64(<32 x double>) +declare <32 x iXLen> @llvm.lrint.v32iXLen.v32f64(<32 x double>) diff --git a/llvm/test/CodeGen/AArch64/sve-llrint.ll b/llvm/test/CodeGen/AArch64/sve-llrint.ll index 825ff55117d5c..a881af1612016 100644 --- a/llvm/test/CodeGen/AArch64/sve-llrint.ll +++ b/llvm/test/CodeGen/AArch64/sve-llrint.ll @@ -5,8 +5,22 @@ define @llrint_v1i64_v1f16( %x) { ; CHECK-LABEL: llrint_v1i64_v1f16: ; CHECK: // %bb.0: ; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: mov w8, #64511 // =0xfbff +; CHECK-NEXT: mov z2.d, #0x8000000000000000 +; CHECK-NEXT: mov z1.h, w8 +; CHECK-NEXT: mov w8, #31743 // =0x7bff ; CHECK-NEXT: frintx z0.h, p0/m, z0.h -; CHECK-NEXT: fcvtzs z0.d, p0/m, z0.h +; CHECK-NEXT: mov z3.h, w8 +; CHECK-NEXT: fcmge p1.h, p0/z, z0.h, z1.h +; CHECK-NEXT: movprfx z1, z0 +; CHECK-NEXT: fcvtzs z1.d, p0/m, z0.h +; CHECK-NEXT: fcmgt p2.h, p0/z, z0.h, z3.h +; CHECK-NEXT: mov z3.d, #0x7fffffffffffffff +; CHECK-NEXT: not p1.b, p0/z, p1.b +; CHECK-NEXT: fcmuo p0.h, p0/z, z0.h, z0.h +; CHECK-NEXT: mov z1.d, p1/m, z2.d +; CHECK-NEXT: sel z0.d, p2, z3.d, z1.d +; CHECK-NEXT: mov z0.d, p0/m, #0 // =0x0 ; CHECK-NEXT: ret %a = call @llvm.llrint.nxv1i64.nxv1f16( %x) ret %a @@ -17,8 +31,22 @@ define @llrint_v1i64_v2f16( %x) { ; CHECK-LABEL: llrint_v1i64_v2f16: ; CHECK: // %bb.0: ; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: mov w8, #64511 // =0xfbff +; CHECK-NEXT: mov z2.d, #0x8000000000000000 +; CHECK-NEXT: mov z1.h, w8 +; CHECK-NEXT: mov w8, #31743 // =0x7bff ; CHECK-NEXT: frintx z0.h, p0/m, z0.h -; CHECK-NEXT: fcvtzs z0.d, p0/m, z0.h +; CHECK-NEXT: mov z3.h, w8 +; CHECK-NEXT: fcmge p1.h, p0/z, z0.h, z1.h +; CHECK-NEXT: movprfx z1, z0 +; CHECK-NEXT: fcvtzs z1.d, p0/m, z0.h +; CHECK-NEXT: fcmgt p2.h, p0/z, z0.h, z3.h +; CHECK-NEXT: mov z3.d, #0x7fffffffffffffff +; CHECK-NEXT: not p1.b, p0/z, p1.b +; CHECK-NEXT: fcmuo p0.h, p0/z, z0.h, z0.h +; CHECK-NEXT: mov z1.d, p1/m, z2.d +; CHECK-NEXT: sel z0.d, p2, z3.d, z1.d +; CHECK-NEXT: mov z0.d, p0/m, #0 // =0x0 ; CHECK-NEXT: ret %a = call @llvm.llrint.nxv2i64.nxv2f16( %x) ret %a @@ -28,16 +56,43 @@ declare @llvm.llrint.nxv2i64.nxv2f16() define @llrint_v4i64_v4f16( %x) { ; CHECK-LABEL: llrint_v4i64_v4f16: ; CHECK: // %bb.0: +; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: addvl sp, sp, #-1 +; CHECK-NEXT: str p4, [sp, #7, mul vl] // 2-byte Folded Spill +; CHECK-NEXT: .cfi_escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 8 * VG +; CHECK-NEXT: .cfi_offset w29, -16 ; CHECK-NEXT: uunpklo z1.d, z0.s ; CHECK-NEXT: uunpkhi z0.d, z0.s +; CHECK-NEXT: mov w8, #64511 // =0xfbff ; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: mov z2.h, w8 +; CHECK-NEXT: mov w8, #31743 // =0x7bff +; CHECK-NEXT: mov z3.h, w8 +; CHECK-NEXT: mov z6.d, #0x7fffffffffffffff ; CHECK-NEXT: frintx z1.h, p0/m, z1.h -; CHECK-NEXT: movprfx z2, z0 -; CHECK-NEXT: frintx z2.h, p0/m, z0.h -; CHECK-NEXT: movprfx z0, z1 -; CHECK-NEXT: fcvtzs z0.d, p0/m, z1.h -; CHECK-NEXT: movprfx z1, z2 -; CHECK-NEXT: fcvtzs z1.d, p0/m, z2.h +; CHECK-NEXT: frintx z0.h, p0/m, z0.h +; CHECK-NEXT: fcmge p1.h, p0/z, z1.h, z2.h +; CHECK-NEXT: fcmge p2.h, p0/z, z0.h, z2.h +; CHECK-NEXT: mov z2.d, #0x8000000000000000 +; CHECK-NEXT: movprfx z4, z1 +; CHECK-NEXT: fcvtzs z4.d, p0/m, z1.h +; CHECK-NEXT: movprfx z5, z0 +; CHECK-NEXT: fcvtzs z5.d, p0/m, z0.h +; CHECK-NEXT: fcmgt p3.h, p0/z, z1.h, z3.h +; CHECK-NEXT: fcmgt p4.h, p0/z, z0.h, z3.h +; CHECK-NEXT: not p1.b, p0/z, p1.b +; CHECK-NEXT: not p2.b, p0/z, p2.b +; CHECK-NEXT: sel z3.d, p1, z2.d, z4.d +; CHECK-NEXT: fcmuo p1.h, p0/z, z1.h, z1.h +; CHECK-NEXT: fcmuo p0.h, p0/z, z0.h, z0.h +; CHECK-NEXT: sel z2.d, p2, z2.d, z5.d +; CHECK-NEXT: sel z0.d, p3, z6.d, z3.d +; CHECK-NEXT: sel z1.d, p4, z6.d, z2.d +; CHECK-NEXT: ldr p4, [sp, #7, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: mov z0.d, p1/m, #0 // =0x0 +; CHECK-NEXT: mov z1.d, p0/m, #0 // =0x0 +; CHECK-NEXT: addvl sp, sp, #1 +; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload ; CHECK-NEXT: ret %a = call @llvm.llrint.nxv4i64.nxv4f16( %x) ret %a @@ -47,25 +102,74 @@ declare @llvm.llrint.nxv4i64.nxv4f16() define @llrint_v8i64_v8f16( %x) { ; CHECK-LABEL: llrint_v8i64_v8f16: ; CHECK: // %bb.0: +; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: addvl sp, sp, #-1 +; CHECK-NEXT: str p7, [sp, #4, mul vl] // 2-byte Folded Spill +; CHECK-NEXT: str p6, [sp, #5, mul vl] // 2-byte Folded Spill +; CHECK-NEXT: str p5, [sp, #6, mul vl] // 2-byte Folded Spill +; CHECK-NEXT: str p4, [sp, #7, mul vl] // 2-byte Folded Spill +; CHECK-NEXT: .cfi_escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 8 * VG +; CHECK-NEXT: .cfi_offset w29, -16 ; CHECK-NEXT: uunpklo z1.s, z0.h ; CHECK-NEXT: uunpkhi z0.s, z0.h +; CHECK-NEXT: mov w8, #64511 // =0xfbff ; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: mov z4.h, w8 +; CHECK-NEXT: mov w8, #31743 // =0x7bff +; CHECK-NEXT: mov z6.h, w8 +; CHECK-NEXT: mov z26.d, #0x7fffffffffffffff ; CHECK-NEXT: uunpklo z2.d, z1.s ; CHECK-NEXT: uunpkhi z1.d, z1.s ; CHECK-NEXT: uunpklo z3.d, z0.s ; CHECK-NEXT: uunpkhi z0.d, z0.s -; CHECK-NEXT: frintx z1.h, p0/m, z1.h ; CHECK-NEXT: frintx z2.h, p0/m, z2.h +; CHECK-NEXT: frintx z1.h, p0/m, z1.h ; CHECK-NEXT: frintx z3.h, p0/m, z3.h -; CHECK-NEXT: movprfx z4, z0 -; CHECK-NEXT: frintx z4.h, p0/m, z0.h -; CHECK-NEXT: fcvtzs z1.d, p0/m, z1.h -; CHECK-NEXT: movprfx z0, z2 -; CHECK-NEXT: fcvtzs z0.d, p0/m, z2.h -; CHECK-NEXT: movprfx z2, z3 -; CHECK-NEXT: fcvtzs z2.d, p0/m, z3.h -; CHECK-NEXT: movprfx z3, z4 -; CHECK-NEXT: fcvtzs z3.d, p0/m, z4.h +; CHECK-NEXT: movprfx z5, z0 +; CHECK-NEXT: frintx z5.h, p0/m, z0.h +; CHECK-NEXT: mov z0.d, #0x8000000000000000 +; CHECK-NEXT: fcmge p1.h, p0/z, z2.h, z4.h +; CHECK-NEXT: fcmge p2.h, p0/z, z1.h, z4.h +; CHECK-NEXT: fcmge p3.h, p0/z, z3.h, z4.h +; CHECK-NEXT: fcmge p4.h, p0/z, z5.h, z4.h +; CHECK-NEXT: movprfx z4, z2 +; CHECK-NEXT: fcvtzs z4.d, p0/m, z2.h +; CHECK-NEXT: movprfx z7, z1 +; CHECK-NEXT: fcvtzs z7.d, p0/m, z1.h +; CHECK-NEXT: movprfx z24, z3 +; CHECK-NEXT: fcvtzs z24.d, p0/m, z3.h +; CHECK-NEXT: movprfx z25, z5 +; CHECK-NEXT: fcvtzs z25.d, p0/m, z5.h +; CHECK-NEXT: fcmgt p7.h, p0/z, z3.h, z6.h +; CHECK-NEXT: fcmgt p5.h, p0/z, z2.h, z6.h +; CHECK-NEXT: fcmgt p6.h, p0/z, z1.h, z6.h +; CHECK-NEXT: not p1.b, p0/z, p1.b +; CHECK-NEXT: not p2.b, p0/z, p2.b +; CHECK-NEXT: not p3.b, p0/z, p3.b +; CHECK-NEXT: mov z4.d, p1/m, z0.d +; CHECK-NEXT: fcmgt p1.h, p0/z, z5.h, z6.h +; CHECK-NEXT: not p4.b, p0/z, p4.b +; CHECK-NEXT: sel z6.d, p2, z0.d, z7.d +; CHECK-NEXT: fcmuo p2.h, p0/z, z2.h, z2.h +; CHECK-NEXT: sel z7.d, p3, z0.d, z24.d +; CHECK-NEXT: fcmuo p3.h, p0/z, z1.h, z1.h +; CHECK-NEXT: sel z24.d, p4, z0.d, z25.d +; CHECK-NEXT: fcmuo p4.h, p0/z, z3.h, z3.h +; CHECK-NEXT: fcmuo p0.h, p0/z, z5.h, z5.h +; CHECK-NEXT: sel z0.d, p5, z26.d, z4.d +; CHECK-NEXT: sel z1.d, p6, z26.d, z6.d +; CHECK-NEXT: ldr p6, [sp, #5, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: sel z2.d, p7, z26.d, z7.d +; CHECK-NEXT: ldr p7, [sp, #4, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: sel z3.d, p1, z26.d, z24.d +; CHECK-NEXT: ldr p5, [sp, #6, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: mov z0.d, p2/m, #0 // =0x0 +; CHECK-NEXT: mov z1.d, p3/m, #0 // =0x0 +; CHECK-NEXT: mov z2.d, p4/m, #0 // =0x0 +; CHECK-NEXT: ldr p4, [sp, #7, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: mov z3.d, p0/m, #0 // =0x0 +; CHECK-NEXT: addvl sp, sp, #1 +; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload ; CHECK-NEXT: ret %a = call @llvm.llrint.nxv8i64.nxv8f16( %x) ret %a @@ -75,46 +179,134 @@ declare @llvm.llrint.nxv8i64.nxv8f16() define @llrint_v16i64_v16f16( %x) { ; CHECK-LABEL: llrint_v16i64_v16f16: ; CHECK: // %bb.0: +; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: addvl sp, sp, #-3 +; CHECK-NEXT: str p10, [sp, #1, mul vl] // 2-byte Folded Spill +; CHECK-NEXT: str p9, [sp, #2, mul vl] // 2-byte Folded Spill +; CHECK-NEXT: str p8, [sp, #3, mul vl] // 2-byte Folded Spill +; CHECK-NEXT: str p7, [sp, #4, mul vl] // 2-byte Folded Spill +; CHECK-NEXT: str p6, [sp, #5, mul vl] // 2-byte Folded Spill +; CHECK-NEXT: str p5, [sp, #6, mul vl] // 2-byte Folded Spill +; CHECK-NEXT: str p4, [sp, #7, mul vl] // 2-byte Folded Spill +; CHECK-NEXT: str z9, [sp, #1, mul vl] // 16-byte Folded Spill +; CHECK-NEXT: str z8, [sp, #2, mul vl] // 16-byte Folded Spill +; CHECK-NEXT: .cfi_escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x18, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 24 * VG +; CHECK-NEXT: .cfi_offset w29, -16 +; CHECK-NEXT: .cfi_escape 0x10, 0x48, 0x0a, 0x11, 0x70, 0x22, 0x11, 0x78, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d8 @ cfa - 16 - 8 * VG +; CHECK-NEXT: .cfi_escape 0x10, 0x49, 0x0a, 0x11, 0x70, 0x22, 0x11, 0x70, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d9 @ cfa - 16 - 16 * VG ; CHECK-NEXT: uunpklo z2.s, z0.h ; CHECK-NEXT: uunpkhi z0.s, z0.h -; CHECK-NEXT: uunpklo z3.s, z1.h -; CHECK-NEXT: uunpkhi z1.s, z1.h +; CHECK-NEXT: mov w8, #64511 // =0xfbff +; CHECK-NEXT: uunpklo z4.s, z1.h ; CHECK-NEXT: ptrue p0.d -; CHECK-NEXT: uunpklo z4.d, z2.s +; CHECK-NEXT: uunpkhi z1.s, z1.h +; CHECK-NEXT: mov z5.h, w8 +; CHECK-NEXT: mov w8, #31743 // =0x7bff +; CHECK-NEXT: mov z25.d, #0x8000000000000000 +; CHECK-NEXT: mov z27.h, w8 +; CHECK-NEXT: mov z7.d, #0x7fffffffffffffff +; CHECK-NEXT: uunpklo z3.d, z2.s ; CHECK-NEXT: uunpkhi z2.d, z2.s -; CHECK-NEXT: uunpklo z5.d, z0.s +; CHECK-NEXT: uunpklo z6.d, z0.s ; CHECK-NEXT: uunpkhi z0.d, z0.s -; CHECK-NEXT: uunpklo z6.d, z3.s -; CHECK-NEXT: uunpkhi z3.d, z3.s -; CHECK-NEXT: uunpklo z7.d, z1.s +; CHECK-NEXT: uunpklo z24.d, z4.s +; CHECK-NEXT: uunpkhi z4.d, z4.s +; CHECK-NEXT: uunpklo z26.d, z1.s ; CHECK-NEXT: uunpkhi z1.d, z1.s -; CHECK-NEXT: frintx z4.h, p0/m, z4.h ; CHECK-NEXT: frintx z2.h, p0/m, z2.h -; CHECK-NEXT: frintx z5.h, p0/m, z5.h -; CHECK-NEXT: movprfx z24, z0 -; CHECK-NEXT: frintx z24.h, p0/m, z0.h +; CHECK-NEXT: frintx z3.h, p0/m, z3.h ; CHECK-NEXT: frintx z6.h, p0/m, z6.h -; CHECK-NEXT: movprfx z25, z3 -; CHECK-NEXT: frintx z25.h, p0/m, z3.h -; CHECK-NEXT: frintx z7.h, p0/m, z7.h -; CHECK-NEXT: movprfx z26, z1 -; CHECK-NEXT: frintx z26.h, p0/m, z1.h -; CHECK-NEXT: movprfx z0, z4 -; CHECK-NEXT: fcvtzs z0.d, p0/m, z4.h +; CHECK-NEXT: movprfx z28, z0 +; CHECK-NEXT: frintx z28.h, p0/m, z0.h +; CHECK-NEXT: movprfx z29, z4 +; CHECK-NEXT: frintx z29.h, p0/m, z4.h +; CHECK-NEXT: frintx z24.h, p0/m, z24.h +; CHECK-NEXT: movprfx z30, z1 +; CHECK-NEXT: frintx z30.h, p0/m, z1.h +; CHECK-NEXT: frintx z26.h, p0/m, z26.h +; CHECK-NEXT: fcmge p5.h, p0/z, z2.h, z5.h +; CHECK-NEXT: fcmge p2.h, p0/z, z3.h, z5.h ; CHECK-NEXT: movprfx z1, z2 ; CHECK-NEXT: fcvtzs z1.d, p0/m, z2.h -; CHECK-NEXT: movprfx z2, z5 -; CHECK-NEXT: fcvtzs z2.d, p0/m, z5.h -; CHECK-NEXT: movprfx z3, z24 -; CHECK-NEXT: fcvtzs z3.d, p0/m, z24.h -; CHECK-NEXT: movprfx z4, z6 -; CHECK-NEXT: fcvtzs z4.d, p0/m, z6.h -; CHECK-NEXT: movprfx z5, z25 -; CHECK-NEXT: fcvtzs z5.d, p0/m, z25.h -; CHECK-NEXT: movprfx z6, z7 -; CHECK-NEXT: fcvtzs z6.d, p0/m, z7.h -; CHECK-NEXT: movprfx z7, z26 -; CHECK-NEXT: fcvtzs z7.d, p0/m, z26.h +; CHECK-NEXT: movprfx z0, z3 +; CHECK-NEXT: fcvtzs z0.d, p0/m, z3.h +; CHECK-NEXT: fcmge p6.h, p0/z, z6.h, z5.h +; CHECK-NEXT: fcmgt p3.h, p0/z, z3.h, z27.h +; CHECK-NEXT: fcmuo p1.h, p0/z, z3.h, z3.h +; CHECK-NEXT: fcmge p7.h, p0/z, z28.h, z5.h +; CHECK-NEXT: movprfx z3, z6 +; CHECK-NEXT: fcvtzs z3.d, p0/m, z6.h +; CHECK-NEXT: fcmge p8.h, p0/z, z24.h, z5.h +; CHECK-NEXT: fcmgt p4.h, p0/z, z2.h, z27.h +; CHECK-NEXT: fcmge p9.h, p0/z, z26.h, z5.h +; CHECK-NEXT: not p5.b, p0/z, p5.b +; CHECK-NEXT: movprfx z4, z24 +; CHECK-NEXT: fcvtzs z4.d, p0/m, z24.h +; CHECK-NEXT: fcmge p10.h, p0/z, z30.h, z5.h +; CHECK-NEXT: not p2.b, p0/z, p2.b +; CHECK-NEXT: movprfx z31, z26 +; CHECK-NEXT: fcvtzs z31.d, p0/m, z26.h +; CHECK-NEXT: movprfx z8, z30 +; CHECK-NEXT: fcvtzs z8.d, p0/m, z30.h +; CHECK-NEXT: mov z1.d, p5/m, z25.d +; CHECK-NEXT: fcmge p5.h, p0/z, z29.h, z5.h +; CHECK-NEXT: not p6.b, p0/z, p6.b +; CHECK-NEXT: mov z0.d, p2/m, z25.d +; CHECK-NEXT: fcmuo p2.h, p0/z, z2.h, z2.h +; CHECK-NEXT: movprfx z2, z28 +; CHECK-NEXT: fcvtzs z2.d, p0/m, z28.h +; CHECK-NEXT: movprfx z5, z29 +; CHECK-NEXT: fcvtzs z5.d, p0/m, z29.h +; CHECK-NEXT: not p7.b, p0/z, p7.b +; CHECK-NEXT: mov z3.d, p6/m, z25.d +; CHECK-NEXT: not p6.b, p0/z, p8.b +; CHECK-NEXT: fcmgt p8.h, p0/z, z6.h, z27.h +; CHECK-NEXT: mov z1.d, p4/m, z7.d +; CHECK-NEXT: not p5.b, p0/z, p5.b +; CHECK-NEXT: mov z0.d, p3/m, z7.d +; CHECK-NEXT: fcmgt p3.h, p0/z, z29.h, z27.h +; CHECK-NEXT: sel z9.d, p7, z25.d, z2.d +; CHECK-NEXT: not p7.b, p0/z, p9.b +; CHECK-NEXT: mov z4.d, p6/m, z25.d +; CHECK-NEXT: not p6.b, p0/z, p10.b +; CHECK-NEXT: fcmgt p10.h, p0/z, z28.h, z27.h +; CHECK-NEXT: mov z5.d, p5/m, z25.d +; CHECK-NEXT: fcmgt p5.h, p0/z, z24.h, z27.h +; CHECK-NEXT: fcmuo p9.h, p0/z, z6.h, z6.h +; CHECK-NEXT: sel z6.d, p7, z25.d, z31.d +; CHECK-NEXT: sel z25.d, p6, z25.d, z8.d +; CHECK-NEXT: ldr z8, [sp, #2, mul vl] // 16-byte Folded Reload +; CHECK-NEXT: fcmgt p6.h, p0/z, z26.h, z27.h +; CHECK-NEXT: fcmgt p7.h, p0/z, z30.h, z27.h +; CHECK-NEXT: fcmuo p4.h, p0/z, z28.h, z28.h +; CHECK-NEXT: sel z2.d, p8, z7.d, z3.d +; CHECK-NEXT: sel z3.d, p10, z7.d, z9.d +; CHECK-NEXT: ldr z9, [sp, #1, mul vl] // 16-byte Folded Reload +; CHECK-NEXT: fcmuo p8.h, p0/z, z29.h, z29.h +; CHECK-NEXT: mov z4.d, p5/m, z7.d +; CHECK-NEXT: fcmuo p5.h, p0/z, z24.h, z24.h +; CHECK-NEXT: fcmuo p10.h, p0/z, z26.h, z26.h +; CHECK-NEXT: mov z5.d, p3/m, z7.d +; CHECK-NEXT: mov z6.d, p6/m, z7.d +; CHECK-NEXT: ldr p6, [sp, #5, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: fcmuo p0.h, p0/z, z30.h, z30.h +; CHECK-NEXT: sel z7.d, p7, z7.d, z25.d +; CHECK-NEXT: ldr p7, [sp, #4, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: mov z2.d, p9/m, #0 // =0x0 +; CHECK-NEXT: ldr p9, [sp, #2, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: mov z3.d, p4/m, #0 // =0x0 +; CHECK-NEXT: ldr p4, [sp, #7, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: mov z4.d, p5/m, #0 // =0x0 +; CHECK-NEXT: mov z5.d, p8/m, #0 // =0x0 +; CHECK-NEXT: ldr p8, [sp, #3, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: mov z6.d, p10/m, #0 // =0x0 +; CHECK-NEXT: mov z0.d, p1/m, #0 // =0x0 +; CHECK-NEXT: ldr p10, [sp, #1, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: mov z1.d, p2/m, #0 // =0x0 +; CHECK-NEXT: mov z7.d, p0/m, #0 // =0x0 +; CHECK-NEXT: ldr p5, [sp, #6, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: addvl sp, sp, #3 +; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload ; CHECK-NEXT: ret %a = call @llvm.llrint.nxv16i64.nxv16f16( %x) ret %a @@ -124,93 +316,292 @@ declare @llvm.llrint.nxv16i64.nxv16f16() define @llrint_v32i64_v32f16( %x) { ; CHECK-LABEL: llrint_v32i64_v32f16: ; CHECK: // %bb.0: -; CHECK-NEXT: uunpkhi z4.s, z3.h -; CHECK-NEXT: uunpklo z3.s, z3.h -; CHECK-NEXT: rdvl x9, #15 +; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: addvl sp, sp, #-17 +; CHECK-NEXT: str p9, [sp, #2, mul vl] // 2-byte Folded Spill +; CHECK-NEXT: str p8, [sp, #3, mul vl] // 2-byte Folded Spill +; CHECK-NEXT: str p7, [sp, #4, mul vl] // 2-byte Folded Spill +; CHECK-NEXT: str p6, [sp, #5, mul vl] // 2-byte Folded Spill +; CHECK-NEXT: str p5, [sp, #6, mul vl] // 2-byte Folded Spill +; CHECK-NEXT: str p4, [sp, #7, mul vl] // 2-byte Folded Spill +; CHECK-NEXT: str z23, [sp, #1, mul vl] // 16-byte Folded Spill +; CHECK-NEXT: str z22, [sp, #2, mul vl] // 16-byte Folded Spill +; CHECK-NEXT: str z21, [sp, #3, mul vl] // 16-byte Folded Spill +; CHECK-NEXT: str z20, [sp, #4, mul vl] // 16-byte Folded Spill +; CHECK-NEXT: str z19, [sp, #5, mul vl] // 16-byte Folded Spill +; CHECK-NEXT: str z18, [sp, #6, mul vl] // 16-byte Folded Spill +; CHECK-NEXT: str z17, [sp, #7, mul vl] // 16-byte Folded Spill +; CHECK-NEXT: str z16, [sp, #8, mul vl] // 16-byte Folded Spill +; CHECK-NEXT: str z15, [sp, #9, mul vl] // 16-byte Folded Spill +; CHECK-NEXT: str z14, [sp, #10, mul vl] // 16-byte Folded Spill +; CHECK-NEXT: str z13, [sp, #11, mul vl] // 16-byte Folded Spill +; CHECK-NEXT: str z12, [sp, #12, mul vl] // 16-byte Folded Spill +; CHECK-NEXT: str z11, [sp, #13, mul vl] // 16-byte Folded Spill +; CHECK-NEXT: str z10, [sp, #14, mul vl] // 16-byte Folded Spill +; CHECK-NEXT: str z9, [sp, #15, mul vl] // 16-byte Folded Spill +; CHECK-NEXT: str z8, [sp, #16, mul vl] // 16-byte Folded Spill +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x88, 0x01, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 136 * VG +; CHECK-NEXT: .cfi_offset w29, -16 +; CHECK-NEXT: .cfi_escape 0x10, 0x48, 0x0a, 0x11, 0x70, 0x22, 0x11, 0x78, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d8 @ cfa - 16 - 8 * VG +; CHECK-NEXT: .cfi_escape 0x10, 0x49, 0x0a, 0x11, 0x70, 0x22, 0x11, 0x70, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d9 @ cfa - 16 - 16 * VG +; CHECK-NEXT: .cfi_escape 0x10, 0x4a, 0x0a, 0x11, 0x70, 0x22, 0x11, 0x68, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d10 @ cfa - 16 - 24 * VG +; CHECK-NEXT: .cfi_escape 0x10, 0x4b, 0x0a, 0x11, 0x70, 0x22, 0x11, 0x60, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d11 @ cfa - 16 - 32 * VG +; CHECK-NEXT: .cfi_escape 0x10, 0x4c, 0x0a, 0x11, 0x70, 0x22, 0x11, 0x58, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d12 @ cfa - 16 - 40 * VG +; CHECK-NEXT: .cfi_escape 0x10, 0x4d, 0x0a, 0x11, 0x70, 0x22, 0x11, 0x50, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d13 @ cfa - 16 - 48 * VG +; CHECK-NEXT: .cfi_escape 0x10, 0x4e, 0x0a, 0x11, 0x70, 0x22, 0x11, 0x48, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d14 @ cfa - 16 - 56 * VG +; CHECK-NEXT: .cfi_escape 0x10, 0x4f, 0x0a, 0x11, 0x70, 0x22, 0x11, 0x40, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d15 @ cfa - 16 - 64 * VG +; CHECK-NEXT: uunpkhi z5.s, z0.h +; CHECK-NEXT: uunpklo z4.s, z0.h +; CHECK-NEXT: mov w9, #64511 // =0xfbff ; CHECK-NEXT: ptrue p0.d -; CHECK-NEXT: uunpkhi z7.s, z2.h -; CHECK-NEXT: uunpklo z2.s, z2.h +; CHECK-NEXT: uunpklo z6.s, z1.h +; CHECK-NEXT: mov z30.h, w9 +; CHECK-NEXT: uunpkhi z10.s, z1.h +; CHECK-NEXT: mov w9, #31743 // =0x7bff +; CHECK-NEXT: mov z29.d, #0x8000000000000000 +; CHECK-NEXT: uunpklo z8.s, z2.h +; CHECK-NEXT: uunpkhi z13.s, z3.h +; CHECK-NEXT: uunpklo z18.s, z3.h +; CHECK-NEXT: uunpklo z7.d, z5.s +; CHECK-NEXT: uunpklo z0.d, z4.s +; CHECK-NEXT: uunpkhi z4.d, z4.s +; CHECK-NEXT: uunpkhi z24.d, z5.s +; CHECK-NEXT: uunpklo z25.d, z6.s +; CHECK-NEXT: uunpkhi z26.d, z6.s +; CHECK-NEXT: uunpklo z27.d, z10.s +; CHECK-NEXT: uunpkhi z10.d, z10.s +; CHECK-NEXT: uunpklo z12.d, z8.s +; CHECK-NEXT: uunpkhi z16.d, z8.s +; CHECK-NEXT: movprfx z5, z7 +; CHECK-NEXT: frintx z5.h, p0/m, z7.h +; CHECK-NEXT: movprfx z1, z4 +; CHECK-NEXT: frintx z1.h, p0/m, z4.h +; CHECK-NEXT: frintx z0.h, p0/m, z0.h +; CHECK-NEXT: movprfx z6, z24 +; CHECK-NEXT: frintx z6.h, p0/m, z24.h +; CHECK-NEXT: movprfx z24, z25 +; CHECK-NEXT: frintx z24.h, p0/m, z25.h +; CHECK-NEXT: movprfx z25, z26 +; CHECK-NEXT: frintx z25.h, p0/m, z26.h +; CHECK-NEXT: movprfx z28, z27 +; CHECK-NEXT: frintx z28.h, p0/m, z27.h +; CHECK-NEXT: movprfx z8, z10 +; CHECK-NEXT: frintx z8.h, p0/m, z10.h +; CHECK-NEXT: mov z7.h, w9 +; CHECK-NEXT: mov z4.d, #0x7fffffffffffffff +; CHECK-NEXT: rdvl x9, #15 +; CHECK-NEXT: fcmge p3.h, p0/z, z5.h, z30.h +; CHECK-NEXT: movprfx z11, z5 +; CHECK-NEXT: fcvtzs z11.d, p0/m, z5.h +; CHECK-NEXT: fcmge p2.h, p0/z, z1.h, z30.h +; CHECK-NEXT: fcmge p1.h, p0/z, z0.h, z30.h +; CHECK-NEXT: fcmge p4.h, p0/z, z6.h, z30.h +; CHECK-NEXT: movprfx z9, z6 +; CHECK-NEXT: fcvtzs z9.d, p0/m, z6.h +; CHECK-NEXT: movprfx z15, z25 +; CHECK-NEXT: fcvtzs z15.d, p0/m, z25.h +; CHECK-NEXT: movprfx z14, z24 +; CHECK-NEXT: fcvtzs z14.d, p0/m, z24.h +; CHECK-NEXT: movprfx z26, z0 +; CHECK-NEXT: fcvtzs z26.d, p0/m, z0.h +; CHECK-NEXT: movprfx z19, z28 +; CHECK-NEXT: fcvtzs z19.d, p0/m, z28.h +; CHECK-NEXT: movprfx z31, z1 +; CHECK-NEXT: fcvtzs z31.d, p0/m, z1.h +; CHECK-NEXT: not p3.b, p0/z, p3.b +; CHECK-NEXT: not p6.b, p0/z, p2.b +; CHECK-NEXT: fcmge p2.h, p0/z, z25.h, z30.h +; CHECK-NEXT: sel z27.d, p3, z29.d, z11.d +; CHECK-NEXT: uunpkhi z11.s, z2.h +; CHECK-NEXT: not p5.b, p0/z, p1.b +; CHECK-NEXT: fcmge p1.h, p0/z, z24.h, z30.h +; CHECK-NEXT: not p3.b, p0/z, p4.b +; CHECK-NEXT: fcmge p4.h, p0/z, z28.h, z30.h +; CHECK-NEXT: mov z26.d, p5/m, z29.d +; CHECK-NEXT: mov z31.d, p6/m, z29.d +; CHECK-NEXT: sel z2.d, p3, z29.d, z9.d +; CHECK-NEXT: movprfx z9, z12 +; CHECK-NEXT: frintx z9.h, p0/m, z12.h +; CHECK-NEXT: uunpkhi z12.d, z13.s +; CHECK-NEXT: uunpklo z17.d, z11.s +; CHECK-NEXT: not p2.b, p0/z, p2.b +; CHECK-NEXT: not p1.b, p0/z, p1.b +; CHECK-NEXT: sel z3.d, p2, z29.d, z15.d +; CHECK-NEXT: uunpklo z15.d, z13.s +; CHECK-NEXT: fcmge p2.h, p0/z, z8.h, z30.h +; CHECK-NEXT: sel z10.d, p1, z29.d, z14.d +; CHECK-NEXT: movprfx z14, z16 +; CHECK-NEXT: frintx z14.h, p0/m, z16.h +; CHECK-NEXT: uunpkhi z16.d, z18.s +; CHECK-NEXT: movprfx z13, z17 +; CHECK-NEXT: frintx z13.h, p0/m, z17.h +; CHECK-NEXT: movprfx z20, z12 +; CHECK-NEXT: frintx z20.h, p0/m, z12.h +; CHECK-NEXT: fcmge p3.h, p0/z, z9.h, z30.h +; CHECK-NEXT: uunpkhi z17.d, z11.s +; CHECK-NEXT: uunpklo z18.d, z18.s +; CHECK-NEXT: movprfx z12, z8 +; CHECK-NEXT: fcvtzs z12.d, p0/m, z8.h +; CHECK-NEXT: movprfx z21, z15 +; CHECK-NEXT: frintx z21.h, p0/m, z15.h +; CHECK-NEXT: not p1.b, p0/z, p4.b +; CHECK-NEXT: movprfx z15, z9 +; CHECK-NEXT: fcvtzs z15.d, p0/m, z9.h +; CHECK-NEXT: frintx z16.h, p0/m, z16.h +; CHECK-NEXT: not p2.b, p0/z, p2.b +; CHECK-NEXT: movprfx z22, z14 +; CHECK-NEXT: fcvtzs z22.d, p0/m, z14.h +; CHECK-NEXT: fcmge p4.h, p0/z, z13.h, z30.h +; CHECK-NEXT: fcmge p5.h, p0/z, z20.h, z30.h +; CHECK-NEXT: sel z11.d, p1, z29.d, z19.d +; CHECK-NEXT: not p3.b, p0/z, p3.b +; CHECK-NEXT: frintx z17.h, p0/m, z17.h +; CHECK-NEXT: frintx z18.h, p0/m, z18.h +; CHECK-NEXT: movprfx z19, z20 +; CHECK-NEXT: fcvtzs z19.d, p0/m, z20.h +; CHECK-NEXT: mov z12.d, p2/m, z29.d +; CHECK-NEXT: fcmge p2.h, p0/z, z21.h, z30.h +; CHECK-NEXT: fcmge p1.h, p0/z, z14.h, z30.h +; CHECK-NEXT: mov z15.d, p3/m, z29.d +; CHECK-NEXT: movprfx z23, z21 +; CHECK-NEXT: fcvtzs z23.d, p0/m, z21.h +; CHECK-NEXT: not p3.b, p0/z, p4.b +; CHECK-NEXT: fcmge p4.h, p0/z, z16.h, z30.h +; CHECK-NEXT: fcmgt p8.h, p0/z, z21.h, z7.h +; CHECK-NEXT: not p5.b, p0/z, p5.b +; CHECK-NEXT: fcmge p6.h, p0/z, z17.h, z30.h +; CHECK-NEXT: fcmge p7.h, p0/z, z18.h, z30.h +; CHECK-NEXT: movprfx z30, z16 +; CHECK-NEXT: fcvtzs z30.d, p0/m, z16.h +; CHECK-NEXT: not p2.b, p0/z, p2.b +; CHECK-NEXT: fcmuo p9.h, p0/z, z21.h, z21.h +; CHECK-NEXT: mov z19.d, p5/m, z29.d +; CHECK-NEXT: fcmgt p5.h, p0/z, z20.h, z7.h +; CHECK-NEXT: not p1.b, p0/z, p1.b +; CHECK-NEXT: not p4.b, p0/z, p4.b +; CHECK-NEXT: mov z23.d, p2/m, z29.d +; CHECK-NEXT: fcmuo p2.h, p0/z, z20.h, z20.h +; CHECK-NEXT: movprfx z20, z18 +; CHECK-NEXT: fcvtzs z20.d, p0/m, z18.h +; CHECK-NEXT: movprfx z21, z13 +; CHECK-NEXT: fcvtzs z21.d, p0/m, z13.h +; CHECK-NEXT: mov z22.d, p1/m, z29.d +; CHECK-NEXT: not p1.b, p0/z, p7.b +; CHECK-NEXT: mov z30.d, p4/m, z29.d +; CHECK-NEXT: fcmgt p4.h, p0/z, z18.h, z7.h +; CHECK-NEXT: mov z19.d, p5/m, z4.d +; CHECK-NEXT: fcmuo p7.h, p0/z, z18.h, z18.h +; CHECK-NEXT: movprfx z18, z17 +; CHECK-NEXT: fcvtzs z18.d, p0/m, z17.h +; CHECK-NEXT: fcmgt p5.h, p0/z, z16.h, z7.h +; CHECK-NEXT: not p6.b, p0/z, p6.b +; CHECK-NEXT: mov z23.d, p8/m, z4.d +; CHECK-NEXT: mov z20.d, p1/m, z29.d +; CHECK-NEXT: mov z21.d, p3/m, z29.d +; CHECK-NEXT: fcmuo p3.h, p0/z, z16.h, z16.h +; CHECK-NEXT: mov z19.d, p2/m, #0 // =0x0 +; CHECK-NEXT: fcmgt p2.h, p0/z, z17.h, z7.h ; CHECK-NEXT: ptrue p1.b -; CHECK-NEXT: uunpkhi z5.d, z4.s -; CHECK-NEXT: uunpklo z4.d, z4.s -; CHECK-NEXT: uunpkhi z6.d, z3.s -; CHECK-NEXT: uunpklo z3.d, z3.s -; CHECK-NEXT: uunpkhi z24.d, z7.s -; CHECK-NEXT: uunpklo z7.d, z7.s -; CHECK-NEXT: frintx z5.h, p0/m, z5.h -; CHECK-NEXT: frintx z4.h, p0/m, z4.h -; CHECK-NEXT: frintx z6.h, p0/m, z6.h -; CHECK-NEXT: frintx z3.h, p0/m, z3.h -; CHECK-NEXT: frintx z24.h, p0/m, z24.h -; CHECK-NEXT: frintx z7.h, p0/m, z7.h -; CHECK-NEXT: fcvtzs z5.d, p0/m, z5.h -; CHECK-NEXT: fcvtzs z4.d, p0/m, z4.h -; CHECK-NEXT: fcvtzs z6.d, p0/m, z6.h -; CHECK-NEXT: fcvtzs z3.d, p0/m, z3.h -; CHECK-NEXT: fcvtzs z7.d, p0/m, z7.h -; CHECK-NEXT: st1b { z5.b }, p1, [x8, x9] +; CHECK-NEXT: sel z29.d, p6, z29.d, z18.d +; CHECK-NEXT: mov z23.d, p9/m, #0 // =0x0 +; CHECK-NEXT: fcmgt p6.h, p0/z, z14.h, z7.h +; CHECK-NEXT: mov z30.d, p5/m, z4.d +; CHECK-NEXT: sel z16.d, p4, z4.d, z20.d +; CHECK-NEXT: fcmuo p4.h, p0/z, z17.h, z17.h +; CHECK-NEXT: st1b { z19.b }, p1, [x8, x9] ; CHECK-NEXT: rdvl x9, #14 -; CHECK-NEXT: uunpkhi z5.s, z1.h -; CHECK-NEXT: st1b { z4.b }, p1, [x8, x9] -; CHECK-NEXT: uunpkhi z4.d, z2.s +; CHECK-NEXT: fcmgt p5.h, p0/z, z1.h, z7.h +; CHECK-NEXT: st1b { z23.b }, p1, [x8, x9] ; CHECK-NEXT: rdvl x9, #13 -; CHECK-NEXT: st1b { z6.b }, p1, [x8, x9] +; CHECK-NEXT: mov z29.d, p2/m, z4.d +; CHECK-NEXT: mov z30.d, p3/m, #0 // =0x0 +; CHECK-NEXT: fcmgt p3.h, p0/z, z13.h, z7.h +; CHECK-NEXT: mov z16.d, p7/m, #0 // =0x0 +; CHECK-NEXT: fcmgt p2.h, p0/z, z9.h, z7.h +; CHECK-NEXT: fcmuo p7.h, p0/z, z14.h, z14.h +; CHECK-NEXT: mov z29.d, p4/m, #0 // =0x0 +; CHECK-NEXT: fcmuo p4.h, p0/z, z13.h, z13.h +; CHECK-NEXT: st1b { z30.b }, p1, [x8, x9] ; CHECK-NEXT: rdvl x9, #12 -; CHECK-NEXT: movprfx z6, z24 -; CHECK-NEXT: fcvtzs z6.d, p0/m, z24.h -; CHECK-NEXT: uunpklo z1.s, z1.h -; CHECK-NEXT: uunpklo z2.d, z2.s -; CHECK-NEXT: uunpkhi z24.s, z0.h -; CHECK-NEXT: st1b { z3.b }, p1, [x8, x9] -; CHECK-NEXT: uunpklo z3.d, z5.s -; CHECK-NEXT: uunpklo z0.s, z0.h -; CHECK-NEXT: frintx z4.h, p0/m, z4.h +; CHECK-NEXT: sel z30.d, p5, z4.d, z31.d +; CHECK-NEXT: st1b { z16.b }, p1, [x8, x9] ; CHECK-NEXT: rdvl x9, #11 -; CHECK-NEXT: uunpkhi z25.d, z5.s -; CHECK-NEXT: st1b { z6.b }, p1, [x8, x9] +; CHECK-NEXT: sel z31.d, p3, z4.d, z21.d +; CHECK-NEXT: st1b { z29.b }, p1, [x8, x9] ; CHECK-NEXT: rdvl x9, #10 -; CHECK-NEXT: uunpkhi z5.d, z1.s -; CHECK-NEXT: uunpklo z1.d, z1.s -; CHECK-NEXT: frintx z2.h, p0/m, z2.h -; CHECK-NEXT: uunpkhi z6.d, z24.s -; CHECK-NEXT: uunpklo z24.d, z24.s -; CHECK-NEXT: frintx z3.h, p0/m, z3.h -; CHECK-NEXT: fcvtzs z4.d, p0/m, z4.h -; CHECK-NEXT: st1b { z7.b }, p1, [x8, x9] -; CHECK-NEXT: uunpkhi z7.d, z0.s -; CHECK-NEXT: uunpklo z0.d, z0.s +; CHECK-NEXT: fcmgt p5.h, p0/z, z24.h, z7.h +; CHECK-NEXT: fcmgt p3.h, p0/z, z28.h, z7.h +; CHECK-NEXT: sel z13.d, p2, z4.d, z15.d +; CHECK-NEXT: fcmuo p2.h, p0/z, z9.h, z9.h +; CHECK-NEXT: sel z29.d, p6, z4.d, z22.d +; CHECK-NEXT: mov z31.d, p4/m, #0 // =0x0 +; CHECK-NEXT: fcmgt p4.h, p0/z, z8.h, z7.h +; CHECK-NEXT: fcmgt p6.h, p0/z, z5.h, z7.h +; CHECK-NEXT: sel z9.d, p5, z4.d, z10.d +; CHECK-NEXT: fcmgt p5.h, p0/z, z6.h, z7.h +; CHECK-NEXT: st1b { z31.b }, p1, [x8, x9] ; CHECK-NEXT: rdvl x9, #9 -; CHECK-NEXT: frintx z25.h, p0/m, z25.h -; CHECK-NEXT: frintx z5.h, p0/m, z5.h -; CHECK-NEXT: frintx z1.h, p0/m, z1.h -; CHECK-NEXT: fcvtzs z2.d, p0/m, z2.h -; CHECK-NEXT: frintx z6.h, p0/m, z6.h -; CHECK-NEXT: fcvtzs z3.d, p0/m, z3.h -; CHECK-NEXT: st1b { z4.b }, p1, [x8, x9] -; CHECK-NEXT: movprfx z4, z24 -; CHECK-NEXT: frintx z4.h, p0/m, z24.h -; CHECK-NEXT: frintx z7.h, p0/m, z7.h -; CHECK-NEXT: frintx z0.h, p0/m, z0.h +; CHECK-NEXT: mov z29.d, p7/m, #0 // =0x0 +; CHECK-NEXT: sel z10.d, p3, z4.d, z11.d +; CHECK-NEXT: fcmgt p3.h, p0/z, z25.h, z7.h +; CHECK-NEXT: mov z13.d, p2/m, #0 // =0x0 +; CHECK-NEXT: fcmuo p7.h, p0/z, z8.h, z8.h +; CHECK-NEXT: fcmuo p2.h, p0/z, z28.h, z28.h +; CHECK-NEXT: sel z28.d, p4, z4.d, z12.d +; CHECK-NEXT: st1b { z29.b }, p1, [x8, x9] ; CHECK-NEXT: rdvl x9, #8 -; CHECK-NEXT: fcvtzs z25.d, p0/m, z25.h -; CHECK-NEXT: fcvtzs z5.d, p0/m, z5.h -; CHECK-NEXT: st1b { z2.b }, p1, [x8, x9] -; CHECK-NEXT: fcvtzs z1.d, p0/m, z1.h -; CHECK-NEXT: movprfx z2, z6 -; CHECK-NEXT: fcvtzs z2.d, p0/m, z6.h -; CHECK-NEXT: st1d { z3.d }, p0, [x8, #6, mul vl] -; CHECK-NEXT: movprfx z3, z4 -; CHECK-NEXT: fcvtzs z3.d, p0/m, z4.h -; CHECK-NEXT: movprfx z4, z7 -; CHECK-NEXT: fcvtzs z4.d, p0/m, z7.h -; CHECK-NEXT: fcvtzs z0.d, p0/m, z0.h -; CHECK-NEXT: st1d { z25.d }, p0, [x8, #7, mul vl] -; CHECK-NEXT: st1d { z5.d }, p0, [x8, #5, mul vl] -; CHECK-NEXT: st1d { z1.d }, p0, [x8, #4, mul vl] +; CHECK-NEXT: fcmuo p4.h, p0/z, z25.h, z25.h +; CHECK-NEXT: st1b { z13.b }, p1, [x8, x9] +; CHECK-NEXT: fcmuo p1.h, p0/z, z24.h, z24.h +; CHECK-NEXT: mov z2.d, p5/m, z4.d +; CHECK-NEXT: mov z3.d, p3/m, z4.d +; CHECK-NEXT: fcmgt p3.h, p0/z, z0.h, z7.h +; CHECK-NEXT: mov z28.d, p7/m, #0 // =0x0 +; CHECK-NEXT: fcmuo p7.h, p0/z, z6.h, z6.h +; CHECK-NEXT: mov z10.d, p2/m, #0 // =0x0 +; CHECK-NEXT: fcmuo p2.h, p0/z, z5.h, z5.h +; CHECK-NEXT: sel z5.d, p6, z4.d, z27.d +; CHECK-NEXT: mov z3.d, p4/m, #0 // =0x0 +; CHECK-NEXT: fcmuo p4.h, p0/z, z1.h, z1.h +; CHECK-NEXT: mov z9.d, p1/m, #0 // =0x0 +; CHECK-NEXT: st1d { z28.d }, p0, [x8, #7, mul vl] +; CHECK-NEXT: fcmuo p1.h, p0/z, z0.h, z0.h +; CHECK-NEXT: sel z0.d, p3, z4.d, z26.d +; CHECK-NEXT: st1d { z10.d }, p0, [x8, #6, mul vl] +; CHECK-NEXT: mov z2.d, p7/m, #0 // =0x0 +; CHECK-NEXT: st1d { z3.d }, p0, [x8, #5, mul vl] +; CHECK-NEXT: mov z5.d, p2/m, #0 // =0x0 +; CHECK-NEXT: st1d { z9.d }, p0, [x8, #4, mul vl] +; CHECK-NEXT: mov z30.d, p4/m, #0 // =0x0 +; CHECK-NEXT: mov z0.d, p1/m, #0 // =0x0 ; CHECK-NEXT: st1d { z2.d }, p0, [x8, #3, mul vl] -; CHECK-NEXT: st1d { z3.d }, p0, [x8, #2, mul vl] -; CHECK-NEXT: st1d { z4.d }, p0, [x8, #1, mul vl] +; CHECK-NEXT: st1d { z5.d }, p0, [x8, #2, mul vl] +; CHECK-NEXT: st1d { z30.d }, p0, [x8, #1, mul vl] ; CHECK-NEXT: st1d { z0.d }, p0, [x8] +; CHECK-NEXT: ldr z23, [sp, #1, mul vl] // 16-byte Folded Reload +; CHECK-NEXT: ldr z22, [sp, #2, mul vl] // 16-byte Folded Reload +; CHECK-NEXT: ldr z21, [sp, #3, mul vl] // 16-byte Folded Reload +; CHECK-NEXT: ldr z20, [sp, #4, mul vl] // 16-byte Folded Reload +; CHECK-NEXT: ldr z19, [sp, #5, mul vl] // 16-byte Folded Reload +; CHECK-NEXT: ldr z18, [sp, #6, mul vl] // 16-byte Folded Reload +; CHECK-NEXT: ldr z17, [sp, #7, mul vl] // 16-byte Folded Reload +; CHECK-NEXT: ldr z16, [sp, #8, mul vl] // 16-byte Folded Reload +; CHECK-NEXT: ldr z15, [sp, #9, mul vl] // 16-byte Folded Reload +; CHECK-NEXT: ldr z14, [sp, #10, mul vl] // 16-byte Folded Reload +; CHECK-NEXT: ldr z13, [sp, #11, mul vl] // 16-byte Folded Reload +; CHECK-NEXT: ldr z12, [sp, #12, mul vl] // 16-byte Folded Reload +; CHECK-NEXT: ldr z11, [sp, #13, mul vl] // 16-byte Folded Reload +; CHECK-NEXT: ldr z10, [sp, #14, mul vl] // 16-byte Folded Reload +; CHECK-NEXT: ldr z9, [sp, #15, mul vl] // 16-byte Folded Reload +; CHECK-NEXT: ldr z8, [sp, #16, mul vl] // 16-byte Folded Reload +; CHECK-NEXT: ldr p9, [sp, #2, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: ldr p8, [sp, #3, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: ldr p7, [sp, #4, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: ldr p6, [sp, #5, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: ldr p5, [sp, #6, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: ldr p4, [sp, #7, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: addvl sp, sp, #17 +; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload ; CHECK-NEXT: ret %a = call @llvm.llrint.nxv32i64.nxv32f16( %x) ret %a @@ -221,8 +612,22 @@ define @llrint_v1i64_v1f32( %x) { ; CHECK-LABEL: llrint_v1i64_v1f32: ; CHECK: // %bb.0: ; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: mov w8, #-553648128 // =0xdf000000 +; CHECK-NEXT: mov z2.d, #0x8000000000000000 +; CHECK-NEXT: mov z1.s, w8 +; CHECK-NEXT: mov w8, #1593835519 // =0x5effffff ; CHECK-NEXT: frintx z0.s, p0/m, z0.s -; CHECK-NEXT: fcvtzs z0.d, p0/m, z0.s +; CHECK-NEXT: mov z3.s, w8 +; CHECK-NEXT: fcmge p1.s, p0/z, z0.s, z1.s +; CHECK-NEXT: movprfx z1, z0 +; CHECK-NEXT: fcvtzs z1.d, p0/m, z0.s +; CHECK-NEXT: fcmgt p2.s, p0/z, z0.s, z3.s +; CHECK-NEXT: mov z3.d, #0x7fffffffffffffff +; CHECK-NEXT: not p1.b, p0/z, p1.b +; CHECK-NEXT: fcmuo p0.s, p0/z, z0.s, z0.s +; CHECK-NEXT: mov z1.d, p1/m, z2.d +; CHECK-NEXT: sel z0.d, p2, z3.d, z1.d +; CHECK-NEXT: mov z0.d, p0/m, #0 // =0x0 ; CHECK-NEXT: ret %a = call @llvm.llrint.nxv1i64.nxv1f32( %x) ret %a @@ -233,8 +638,22 @@ define @llrint_v2i64_v2f32( %x) { ; CHECK-LABEL: llrint_v2i64_v2f32: ; CHECK: // %bb.0: ; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: mov w8, #-553648128 // =0xdf000000 +; CHECK-NEXT: mov z2.d, #0x8000000000000000 +; CHECK-NEXT: mov z1.s, w8 +; CHECK-NEXT: mov w8, #1593835519 // =0x5effffff ; CHECK-NEXT: frintx z0.s, p0/m, z0.s -; CHECK-NEXT: fcvtzs z0.d, p0/m, z0.s +; CHECK-NEXT: mov z3.s, w8 +; CHECK-NEXT: fcmge p1.s, p0/z, z0.s, z1.s +; CHECK-NEXT: movprfx z1, z0 +; CHECK-NEXT: fcvtzs z1.d, p0/m, z0.s +; CHECK-NEXT: fcmgt p2.s, p0/z, z0.s, z3.s +; CHECK-NEXT: mov z3.d, #0x7fffffffffffffff +; CHECK-NEXT: not p1.b, p0/z, p1.b +; CHECK-NEXT: fcmuo p0.s, p0/z, z0.s, z0.s +; CHECK-NEXT: mov z1.d, p1/m, z2.d +; CHECK-NEXT: sel z0.d, p2, z3.d, z1.d +; CHECK-NEXT: mov z0.d, p0/m, #0 // =0x0 ; CHECK-NEXT: ret %a = call @llvm.llrint.nxv2i64.nxv2f32( %x) ret %a @@ -244,16 +663,43 @@ declare @llvm.llrint.nxv2i64.nxv2f32() define @llrint_v4i64_v4f32( %x) { ; CHECK-LABEL: llrint_v4i64_v4f32: ; CHECK: // %bb.0: +; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: addvl sp, sp, #-1 +; CHECK-NEXT: str p4, [sp, #7, mul vl] // 2-byte Folded Spill +; CHECK-NEXT: .cfi_escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 8 * VG +; CHECK-NEXT: .cfi_offset w29, -16 ; CHECK-NEXT: uunpklo z1.d, z0.s ; CHECK-NEXT: uunpkhi z0.d, z0.s +; CHECK-NEXT: mov w8, #-553648128 // =0xdf000000 ; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: mov z2.s, w8 +; CHECK-NEXT: mov w8, #1593835519 // =0x5effffff +; CHECK-NEXT: mov z3.s, w8 +; CHECK-NEXT: mov z6.d, #0x7fffffffffffffff ; CHECK-NEXT: frintx z1.s, p0/m, z1.s -; CHECK-NEXT: movprfx z2, z0 -; CHECK-NEXT: frintx z2.s, p0/m, z0.s -; CHECK-NEXT: movprfx z0, z1 -; CHECK-NEXT: fcvtzs z0.d, p0/m, z1.s -; CHECK-NEXT: movprfx z1, z2 -; CHECK-NEXT: fcvtzs z1.d, p0/m, z2.s +; CHECK-NEXT: frintx z0.s, p0/m, z0.s +; CHECK-NEXT: fcmge p1.s, p0/z, z1.s, z2.s +; CHECK-NEXT: fcmge p2.s, p0/z, z0.s, z2.s +; CHECK-NEXT: mov z2.d, #0x8000000000000000 +; CHECK-NEXT: movprfx z4, z1 +; CHECK-NEXT: fcvtzs z4.d, p0/m, z1.s +; CHECK-NEXT: movprfx z5, z0 +; CHECK-NEXT: fcvtzs z5.d, p0/m, z0.s +; CHECK-NEXT: fcmgt p3.s, p0/z, z1.s, z3.s +; CHECK-NEXT: fcmgt p4.s, p0/z, z0.s, z3.s +; CHECK-NEXT: not p1.b, p0/z, p1.b +; CHECK-NEXT: not p2.b, p0/z, p2.b +; CHECK-NEXT: sel z3.d, p1, z2.d, z4.d +; CHECK-NEXT: fcmuo p1.s, p0/z, z1.s, z1.s +; CHECK-NEXT: fcmuo p0.s, p0/z, z0.s, z0.s +; CHECK-NEXT: sel z2.d, p2, z2.d, z5.d +; CHECK-NEXT: sel z0.d, p3, z6.d, z3.d +; CHECK-NEXT: sel z1.d, p4, z6.d, z2.d +; CHECK-NEXT: ldr p4, [sp, #7, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: mov z0.d, p1/m, #0 // =0x0 +; CHECK-NEXT: mov z1.d, p0/m, #0 // =0x0 +; CHECK-NEXT: addvl sp, sp, #1 +; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload ; CHECK-NEXT: ret %a = call @llvm.llrint.nxv4i64.nxv4f32( %x) ret %a @@ -263,25 +709,71 @@ declare @llvm.llrint.nxv4i64.nxv4f32() define @llrint_v8i64_v8f32( %x) { ; CHECK-LABEL: llrint_v8i64_v8f32: ; CHECK: // %bb.0: +; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: addvl sp, sp, #-1 +; CHECK-NEXT: str p7, [sp, #4, mul vl] // 2-byte Folded Spill +; CHECK-NEXT: str p6, [sp, #5, mul vl] // 2-byte Folded Spill +; CHECK-NEXT: str p5, [sp, #6, mul vl] // 2-byte Folded Spill +; CHECK-NEXT: str p4, [sp, #7, mul vl] // 2-byte Folded Spill +; CHECK-NEXT: .cfi_escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 8 * VG +; CHECK-NEXT: .cfi_offset w29, -16 ; CHECK-NEXT: uunpklo z2.d, z0.s ; CHECK-NEXT: uunpkhi z0.d, z0.s +; CHECK-NEXT: mov w8, #-553648128 // =0xdf000000 ; CHECK-NEXT: uunpklo z3.d, z1.s -; CHECK-NEXT: uunpkhi z1.d, z1.s ; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: uunpkhi z1.d, z1.s +; CHECK-NEXT: mov z4.s, w8 +; CHECK-NEXT: mov w8, #1593835519 // =0x5effffff +; CHECK-NEXT: mov z5.d, #0x8000000000000000 +; CHECK-NEXT: mov z6.s, w8 +; CHECK-NEXT: mov z26.d, #0x7fffffffffffffff ; CHECK-NEXT: frintx z2.s, p0/m, z2.s -; CHECK-NEXT: movprfx z4, z0 -; CHECK-NEXT: frintx z4.s, p0/m, z0.s +; CHECK-NEXT: frintx z0.s, p0/m, z0.s ; CHECK-NEXT: frintx z3.s, p0/m, z3.s -; CHECK-NEXT: movprfx z5, z1 -; CHECK-NEXT: frintx z5.s, p0/m, z1.s -; CHECK-NEXT: movprfx z0, z2 -; CHECK-NEXT: fcvtzs z0.d, p0/m, z2.s -; CHECK-NEXT: movprfx z1, z4 -; CHECK-NEXT: fcvtzs z1.d, p0/m, z4.s -; CHECK-NEXT: movprfx z2, z3 -; CHECK-NEXT: fcvtzs z2.d, p0/m, z3.s -; CHECK-NEXT: movprfx z3, z5 -; CHECK-NEXT: fcvtzs z3.d, p0/m, z5.s +; CHECK-NEXT: frintx z1.s, p0/m, z1.s +; CHECK-NEXT: fcmge p1.s, p0/z, z2.s, z4.s +; CHECK-NEXT: fcmge p2.s, p0/z, z0.s, z4.s +; CHECK-NEXT: movprfx z7, z0 +; CHECK-NEXT: fcvtzs z7.d, p0/m, z0.s +; CHECK-NEXT: fcmge p3.s, p0/z, z3.s, z4.s +; CHECK-NEXT: fcmge p4.s, p0/z, z1.s, z4.s +; CHECK-NEXT: movprfx z4, z2 +; CHECK-NEXT: fcvtzs z4.d, p0/m, z2.s +; CHECK-NEXT: movprfx z24, z3 +; CHECK-NEXT: fcvtzs z24.d, p0/m, z3.s +; CHECK-NEXT: movprfx z25, z1 +; CHECK-NEXT: fcvtzs z25.d, p0/m, z1.s +; CHECK-NEXT: fcmgt p7.s, p0/z, z3.s, z6.s +; CHECK-NEXT: fcmgt p5.s, p0/z, z2.s, z6.s +; CHECK-NEXT: fcmgt p6.s, p0/z, z0.s, z6.s +; CHECK-NEXT: not p1.b, p0/z, p1.b +; CHECK-NEXT: not p2.b, p0/z, p2.b +; CHECK-NEXT: not p3.b, p0/z, p3.b +; CHECK-NEXT: mov z4.d, p1/m, z5.d +; CHECK-NEXT: fcmgt p1.s, p0/z, z1.s, z6.s +; CHECK-NEXT: not p4.b, p0/z, p4.b +; CHECK-NEXT: sel z6.d, p2, z5.d, z7.d +; CHECK-NEXT: fcmuo p2.s, p0/z, z2.s, z2.s +; CHECK-NEXT: sel z7.d, p3, z5.d, z24.d +; CHECK-NEXT: fcmuo p3.s, p0/z, z0.s, z0.s +; CHECK-NEXT: sel z5.d, p4, z5.d, z25.d +; CHECK-NEXT: fcmuo p4.s, p0/z, z3.s, z3.s +; CHECK-NEXT: fcmuo p0.s, p0/z, z1.s, z1.s +; CHECK-NEXT: sel z0.d, p5, z26.d, z4.d +; CHECK-NEXT: sel z1.d, p6, z26.d, z6.d +; CHECK-NEXT: ldr p6, [sp, #5, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: sel z2.d, p7, z26.d, z7.d +; CHECK-NEXT: ldr p7, [sp, #4, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: sel z3.d, p1, z26.d, z5.d +; CHECK-NEXT: ldr p5, [sp, #6, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: mov z0.d, p2/m, #0 // =0x0 +; CHECK-NEXT: mov z1.d, p3/m, #0 // =0x0 +; CHECK-NEXT: mov z2.d, p4/m, #0 // =0x0 +; CHECK-NEXT: ldr p4, [sp, #7, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: mov z3.d, p0/m, #0 // =0x0 +; CHECK-NEXT: addvl sp, sp, #1 +; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload ; CHECK-NEXT: ret %a = call @llvm.llrint.nxv8i64.nxv8f32( %x) ret %a @@ -291,43 +783,129 @@ declare @llvm.llrint.nxv8i64.nxv8f32() define @llrint_v16i64_v16f32( %x) { ; CHECK-LABEL: llrint_v16i64_v16f32: ; CHECK: // %bb.0: +; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: addvl sp, sp, #-2 +; CHECK-NEXT: str p10, [sp, #1, mul vl] // 2-byte Folded Spill +; CHECK-NEXT: str p9, [sp, #2, mul vl] // 2-byte Folded Spill +; CHECK-NEXT: str p8, [sp, #3, mul vl] // 2-byte Folded Spill +; CHECK-NEXT: str p7, [sp, #4, mul vl] // 2-byte Folded Spill +; CHECK-NEXT: str p6, [sp, #5, mul vl] // 2-byte Folded Spill +; CHECK-NEXT: str p5, [sp, #6, mul vl] // 2-byte Folded Spill +; CHECK-NEXT: str p4, [sp, #7, mul vl] // 2-byte Folded Spill +; CHECK-NEXT: str z8, [sp, #1, mul vl] // 16-byte Folded Spill +; CHECK-NEXT: .cfi_escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 16 * VG +; CHECK-NEXT: .cfi_offset w29, -16 +; CHECK-NEXT: .cfi_escape 0x10, 0x48, 0x0a, 0x11, 0x70, 0x22, 0x11, 0x78, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d8 @ cfa - 16 - 8 * VG ; CHECK-NEXT: uunpklo z4.d, z0.s ; CHECK-NEXT: uunpkhi z0.d, z0.s -; CHECK-NEXT: uunpklo z5.d, z1.s +; CHECK-NEXT: mov w8, #-553648128 // =0xdf000000 +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: uunpklo z7.d, z1.s ; CHECK-NEXT: uunpkhi z1.d, z1.s -; CHECK-NEXT: uunpklo z6.d, z2.s +; CHECK-NEXT: uunpklo z24.d, z2.s ; CHECK-NEXT: uunpkhi z2.d, z2.s -; CHECK-NEXT: uunpklo z7.d, z3.s +; CHECK-NEXT: uunpklo z25.d, z3.s ; CHECK-NEXT: uunpkhi z3.d, z3.s -; CHECK-NEXT: ptrue p0.d -; CHECK-NEXT: frintx z4.s, p0/m, z4.s -; CHECK-NEXT: movprfx z24, z0 -; CHECK-NEXT: frintx z24.s, p0/m, z0.s -; CHECK-NEXT: frintx z5.s, p0/m, z5.s -; CHECK-NEXT: movprfx z25, z1 -; CHECK-NEXT: frintx z25.s, p0/m, z1.s -; CHECK-NEXT: frintx z6.s, p0/m, z6.s -; CHECK-NEXT: movprfx z26, z2 -; CHECK-NEXT: frintx z26.s, p0/m, z2.s +; CHECK-NEXT: mov z26.d, #0x7fffffffffffffff +; CHECK-NEXT: movprfx z5, z4 +; CHECK-NEXT: frintx z5.s, p0/m, z4.s +; CHECK-NEXT: movprfx z6, z0 +; CHECK-NEXT: frintx z6.s, p0/m, z0.s +; CHECK-NEXT: mov z4.s, w8 ; CHECK-NEXT: frintx z7.s, p0/m, z7.s -; CHECK-NEXT: movprfx z27, z3 -; CHECK-NEXT: frintx z27.s, p0/m, z3.s -; CHECK-NEXT: movprfx z0, z4 -; CHECK-NEXT: fcvtzs z0.d, p0/m, z4.s -; CHECK-NEXT: movprfx z1, z24 -; CHECK-NEXT: fcvtzs z1.d, p0/m, z24.s -; CHECK-NEXT: movprfx z2, z5 -; CHECK-NEXT: fcvtzs z2.d, p0/m, z5.s -; CHECK-NEXT: movprfx z3, z25 -; CHECK-NEXT: fcvtzs z3.d, p0/m, z25.s -; CHECK-NEXT: movprfx z4, z6 -; CHECK-NEXT: fcvtzs z4.d, p0/m, z6.s -; CHECK-NEXT: movprfx z5, z26 -; CHECK-NEXT: fcvtzs z5.d, p0/m, z26.s -; CHECK-NEXT: movprfx z6, z7 -; CHECK-NEXT: fcvtzs z6.d, p0/m, z7.s -; CHECK-NEXT: movprfx z7, z27 -; CHECK-NEXT: fcvtzs z7.d, p0/m, z27.s +; CHECK-NEXT: movprfx z28, z1 +; CHECK-NEXT: frintx z28.s, p0/m, z1.s +; CHECK-NEXT: mov w8, #1593835519 // =0x5effffff +; CHECK-NEXT: mov z0.d, #0x8000000000000000 +; CHECK-NEXT: frintx z24.s, p0/m, z24.s +; CHECK-NEXT: movprfx z29, z2 +; CHECK-NEXT: frintx z29.s, p0/m, z2.s +; CHECK-NEXT: frintx z25.s, p0/m, z25.s +; CHECK-NEXT: movprfx z30, z3 +; CHECK-NEXT: frintx z30.s, p0/m, z3.s +; CHECK-NEXT: mov z27.s, w8 +; CHECK-NEXT: fcmge p1.s, p0/z, z5.s, z4.s +; CHECK-NEXT: fcmge p2.s, p0/z, z6.s, z4.s +; CHECK-NEXT: movprfx z1, z5 +; CHECK-NEXT: fcvtzs z1.d, p0/m, z5.s +; CHECK-NEXT: movprfx z2, z6 +; CHECK-NEXT: fcvtzs z2.d, p0/m, z6.s +; CHECK-NEXT: fcmge p5.s, p0/z, z7.s, z4.s +; CHECK-NEXT: fcmge p6.s, p0/z, z28.s, z4.s +; CHECK-NEXT: movprfx z3, z7 +; CHECK-NEXT: fcvtzs z3.d, p0/m, z7.s +; CHECK-NEXT: fcmge p8.s, p0/z, z29.s, z4.s +; CHECK-NEXT: fcmgt p3.s, p0/z, z5.s, z27.s +; CHECK-NEXT: fcmgt p7.s, p0/z, z6.s, z27.s +; CHECK-NEXT: fcmge p9.s, p0/z, z25.s, z4.s +; CHECK-NEXT: movprfx z31, z25 +; CHECK-NEXT: fcvtzs z31.d, p0/m, z25.s +; CHECK-NEXT: not p4.b, p0/z, p1.b +; CHECK-NEXT: fcmuo p1.s, p0/z, z5.s, z5.s +; CHECK-NEXT: movprfx z5, z28 +; CHECK-NEXT: fcvtzs z5.d, p0/m, z28.s +; CHECK-NEXT: not p2.b, p0/z, p2.b +; CHECK-NEXT: fcmge p10.s, p0/z, z30.s, z4.s +; CHECK-NEXT: movprfx z8, z30 +; CHECK-NEXT: fcvtzs z8.d, p0/m, z30.s +; CHECK-NEXT: mov z1.d, p4/m, z0.d +; CHECK-NEXT: fcmge p4.s, p0/z, z24.s, z4.s +; CHECK-NEXT: movprfx z4, z29 +; CHECK-NEXT: fcvtzs z4.d, p0/m, z29.s +; CHECK-NEXT: mov z2.d, p2/m, z0.d +; CHECK-NEXT: fcmuo p2.s, p0/z, z6.s, z6.s +; CHECK-NEXT: movprfx z6, z24 +; CHECK-NEXT: fcvtzs z6.d, p0/m, z24.s +; CHECK-NEXT: not p5.b, p0/z, p5.b +; CHECK-NEXT: not p6.b, p0/z, p6.b +; CHECK-NEXT: not p4.b, p0/z, p4.b +; CHECK-NEXT: mov z3.d, p5/m, z0.d +; CHECK-NEXT: not p5.b, p0/z, p8.b +; CHECK-NEXT: mov z5.d, p6/m, z0.d +; CHECK-NEXT: fcmgt p8.s, p0/z, z7.s, z27.s +; CHECK-NEXT: not p6.b, p0/z, p9.b +; CHECK-NEXT: mov z6.d, p4/m, z0.d +; CHECK-NEXT: fcmuo p9.s, p0/z, z7.s, z7.s +; CHECK-NEXT: not p4.b, p0/z, p10.b +; CHECK-NEXT: fcmgt p10.s, p0/z, z28.s, z27.s +; CHECK-NEXT: sel z7.d, p5, z0.d, z4.d +; CHECK-NEXT: fcmgt p5.s, p0/z, z24.s, z27.s +; CHECK-NEXT: mov z31.d, p6/m, z0.d +; CHECK-NEXT: fcmgt p6.s, p0/z, z30.s, z27.s +; CHECK-NEXT: mov z8.d, p4/m, z0.d +; CHECK-NEXT: sel z0.d, p3, z26.d, z1.d +; CHECK-NEXT: fcmgt p3.s, p0/z, z29.s, z27.s +; CHECK-NEXT: fcmgt p4.s, p0/z, z25.s, z27.s +; CHECK-NEXT: sel z1.d, p7, z26.d, z2.d +; CHECK-NEXT: fcmuo p7.s, p0/z, z28.s, z28.s +; CHECK-NEXT: sel z2.d, p8, z26.d, z3.d +; CHECK-NEXT: sel z3.d, p10, z26.d, z5.d +; CHECK-NEXT: fcmuo p8.s, p0/z, z29.s, z29.s +; CHECK-NEXT: sel z4.d, p5, z26.d, z6.d +; CHECK-NEXT: fcmuo p5.s, p0/z, z24.s, z24.s +; CHECK-NEXT: fcmuo p10.s, p0/z, z25.s, z25.s +; CHECK-NEXT: sel z5.d, p3, z26.d, z7.d +; CHECK-NEXT: fcmuo p0.s, p0/z, z30.s, z30.s +; CHECK-NEXT: sel z7.d, p6, z26.d, z8.d +; CHECK-NEXT: ldr z8, [sp, #1, mul vl] // 16-byte Folded Reload +; CHECK-NEXT: ldr p6, [sp, #5, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: sel z6.d, p4, z26.d, z31.d +; CHECK-NEXT: ldr p4, [sp, #7, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: mov z2.d, p9/m, #0 // =0x0 +; CHECK-NEXT: mov z3.d, p7/m, #0 // =0x0 +; CHECK-NEXT: ldr p9, [sp, #2, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: mov z4.d, p5/m, #0 // =0x0 +; CHECK-NEXT: mov z5.d, p8/m, #0 // =0x0 +; CHECK-NEXT: ldr p8, [sp, #3, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: mov z6.d, p10/m, #0 // =0x0 +; CHECK-NEXT: mov z0.d, p1/m, #0 // =0x0 +; CHECK-NEXT: ldr p10, [sp, #1, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: mov z1.d, p2/m, #0 // =0x0 +; CHECK-NEXT: mov z7.d, p0/m, #0 // =0x0 +; CHECK-NEXT: ldr p7, [sp, #4, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: ldr p5, [sp, #6, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: addvl sp, sp, #2 +; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload ; CHECK-NEXT: ret %a = call @llvm.llrint.nxv16i64.nxv16f32( %x) ret %a @@ -337,87 +915,283 @@ declare @llvm.llrint.nxv16i64.nxv16f32( define @llrint_v32i64_v32f32( %x) { ; CHECK-LABEL: llrint_v32i64_v32f32: ; CHECK: // %bb.0: -; CHECK-NEXT: uunpkhi z24.d, z7.s +; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: addvl sp, sp, #-17 +; CHECK-NEXT: str p9, [sp, #2, mul vl] // 2-byte Folded Spill +; CHECK-NEXT: str p8, [sp, #3, mul vl] // 2-byte Folded Spill +; CHECK-NEXT: str p7, [sp, #4, mul vl] // 2-byte Folded Spill +; CHECK-NEXT: str p6, [sp, #5, mul vl] // 2-byte Folded Spill +; CHECK-NEXT: str p5, [sp, #6, mul vl] // 2-byte Folded Spill +; CHECK-NEXT: str p4, [sp, #7, mul vl] // 2-byte Folded Spill +; CHECK-NEXT: str z23, [sp, #1, mul vl] // 16-byte Folded Spill +; CHECK-NEXT: str z22, [sp, #2, mul vl] // 16-byte Folded Spill +; CHECK-NEXT: str z21, [sp, #3, mul vl] // 16-byte Folded Spill +; CHECK-NEXT: str z20, [sp, #4, mul vl] // 16-byte Folded Spill +; CHECK-NEXT: str z19, [sp, #5, mul vl] // 16-byte Folded Spill +; CHECK-NEXT: str z18, [sp, #6, mul vl] // 16-byte Folded Spill +; CHECK-NEXT: str z17, [sp, #7, mul vl] // 16-byte Folded Spill +; CHECK-NEXT: str z16, [sp, #8, mul vl] // 16-byte Folded Spill +; CHECK-NEXT: str z15, [sp, #9, mul vl] // 16-byte Folded Spill +; CHECK-NEXT: str z14, [sp, #10, mul vl] // 16-byte Folded Spill +; CHECK-NEXT: str z13, [sp, #11, mul vl] // 16-byte Folded Spill +; CHECK-NEXT: str z12, [sp, #12, mul vl] // 16-byte Folded Spill +; CHECK-NEXT: str z11, [sp, #13, mul vl] // 16-byte Folded Spill +; CHECK-NEXT: str z10, [sp, #14, mul vl] // 16-byte Folded Spill +; CHECK-NEXT: str z9, [sp, #15, mul vl] // 16-byte Folded Spill +; CHECK-NEXT: str z8, [sp, #16, mul vl] // 16-byte Folded Spill +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x88, 0x01, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 136 * VG +; CHECK-NEXT: .cfi_offset w29, -16 +; CHECK-NEXT: .cfi_escape 0x10, 0x48, 0x0a, 0x11, 0x70, 0x22, 0x11, 0x78, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d8 @ cfa - 16 - 8 * VG +; CHECK-NEXT: .cfi_escape 0x10, 0x49, 0x0a, 0x11, 0x70, 0x22, 0x11, 0x70, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d9 @ cfa - 16 - 16 * VG +; CHECK-NEXT: .cfi_escape 0x10, 0x4a, 0x0a, 0x11, 0x70, 0x22, 0x11, 0x68, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d10 @ cfa - 16 - 24 * VG +; CHECK-NEXT: .cfi_escape 0x10, 0x4b, 0x0a, 0x11, 0x70, 0x22, 0x11, 0x60, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d11 @ cfa - 16 - 32 * VG +; CHECK-NEXT: .cfi_escape 0x10, 0x4c, 0x0a, 0x11, 0x70, 0x22, 0x11, 0x58, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d12 @ cfa - 16 - 40 * VG +; CHECK-NEXT: .cfi_escape 0x10, 0x4d, 0x0a, 0x11, 0x70, 0x22, 0x11, 0x50, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d13 @ cfa - 16 - 48 * VG +; CHECK-NEXT: .cfi_escape 0x10, 0x4e, 0x0a, 0x11, 0x70, 0x22, 0x11, 0x48, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d14 @ cfa - 16 - 56 * VG +; CHECK-NEXT: .cfi_escape 0x10, 0x4f, 0x0a, 0x11, 0x70, 0x22, 0x11, 0x40, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d15 @ cfa - 16 - 64 * VG +; CHECK-NEXT: uunpklo z24.d, z0.s +; CHECK-NEXT: uunpkhi z25.d, z0.s +; CHECK-NEXT: mov w9, #-553648128 // =0xdf000000 +; CHECK-NEXT: uunpklo z26.d, z1.s ; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: uunpkhi z27.d, z1.s +; CHECK-NEXT: mov z31.s, w9 +; CHECK-NEXT: mov w9, #1593835519 // =0x5effffff +; CHECK-NEXT: uunpklo z28.d, z2.s +; CHECK-NEXT: mov z8.d, #0x8000000000000000 +; CHECK-NEXT: uunpklo z30.d, z3.s +; CHECK-NEXT: uunpklo z13.d, z4.s +; CHECK-NEXT: movprfx z0, z24 +; CHECK-NEXT: frintx z0.s, p0/m, z24.s +; CHECK-NEXT: movprfx z1, z25 +; CHECK-NEXT: frintx z1.s, p0/m, z25.s +; CHECK-NEXT: uunpkhi z15.d, z4.s +; CHECK-NEXT: movprfx z24, z26 +; CHECK-NEXT: frintx z24.s, p0/m, z26.s +; CHECK-NEXT: uunpkhi z26.d, z2.s +; CHECK-NEXT: movprfx z25, z27 +; CHECK-NEXT: frintx z25.s, p0/m, z27.s +; CHECK-NEXT: movprfx z27, z28 +; CHECK-NEXT: frintx z27.s, p0/m, z28.s +; CHECK-NEXT: uunpklo z16.d, z5.s +; CHECK-NEXT: uunpkhi z17.d, z7.s +; CHECK-NEXT: frintx z30.s, p0/m, z30.s +; CHECK-NEXT: uunpklo z18.d, z7.s +; CHECK-NEXT: uunpklo z21.d, z6.s +; CHECK-NEXT: fcmge p1.s, p0/z, z0.s, z31.s +; CHECK-NEXT: movprfx z9, z0 +; CHECK-NEXT: fcvtzs z9.d, p0/m, z0.s +; CHECK-NEXT: movprfx z10, z1 +; CHECK-NEXT: fcvtzs z10.d, p0/m, z1.s +; CHECK-NEXT: fcmge p2.s, p0/z, z1.s, z31.s +; CHECK-NEXT: fcmge p3.s, p0/z, z24.s, z31.s +; CHECK-NEXT: movprfx z11, z24 +; CHECK-NEXT: fcvtzs z11.d, p0/m, z24.s +; CHECK-NEXT: movprfx z29, z26 +; CHECK-NEXT: frintx z29.s, p0/m, z26.s +; CHECK-NEXT: fcmge p4.s, p0/z, z25.s, z31.s +; CHECK-NEXT: fcmge p5.s, p0/z, z27.s, z31.s +; CHECK-NEXT: movprfx z12, z27 +; CHECK-NEXT: fcvtzs z12.d, p0/m, z27.s +; CHECK-NEXT: movprfx z19, z30 +; CHECK-NEXT: fcvtzs z19.d, p0/m, z30.s +; CHECK-NEXT: movprfx z7, z16 +; CHECK-NEXT: frintx z7.s, p0/m, z16.s +; CHECK-NEXT: not p1.b, p0/z, p1.b +; CHECK-NEXT: frintx z17.s, p0/m, z17.s +; CHECK-NEXT: uunpkhi z16.d, z5.s +; CHECK-NEXT: not p2.b, p0/z, p2.b +; CHECK-NEXT: frintx z18.s, p0/m, z18.s +; CHECK-NEXT: mov z28.s, w9 +; CHECK-NEXT: not p6.b, p0/z, p3.b +; CHECK-NEXT: sel z26.d, p1, z8.d, z9.d +; CHECK-NEXT: movprfx z14, z29 +; CHECK-NEXT: fcvtzs z14.d, p0/m, z29.s +; CHECK-NEXT: sel z9.d, p2, z8.d, z10.d +; CHECK-NEXT: uunpkhi z10.d, z3.s ; CHECK-NEXT: rdvl x9, #15 -; CHECK-NEXT: uunpklo z7.d, z7.s -; CHECK-NEXT: uunpkhi z25.d, z6.s +; CHECK-NEXT: sel z3.d, p6, z8.d, z11.d +; CHECK-NEXT: movprfx z11, z25 +; CHECK-NEXT: fcvtzs z11.d, p0/m, z25.s +; CHECK-NEXT: fcmge p3.s, p0/z, z29.s, z31.s +; CHECK-NEXT: not p4.b, p0/z, p4.b +; CHECK-NEXT: fcmge p1.s, p0/z, z30.s, z31.s +; CHECK-NEXT: movprfx z23, z18 +; CHECK-NEXT: fcvtzs z23.d, p0/m, z18.s +; CHECK-NEXT: not p2.b, p0/z, p5.b +; CHECK-NEXT: fcmge p5.s, p0/z, z17.s, z31.s +; CHECK-NEXT: frintx z16.s, p0/m, z16.s +; CHECK-NEXT: frintx z10.s, p0/m, z10.s +; CHECK-NEXT: mov z2.d, #0x7fffffffffffffff +; CHECK-NEXT: fcmgt p8.s, p0/z, z18.s, z28.s +; CHECK-NEXT: sel z4.d, p4, z8.d, z11.d +; CHECK-NEXT: movprfx z11, z13 +; CHECK-NEXT: frintx z11.s, p0/m, z13.s +; CHECK-NEXT: not p3.b, p0/z, p3.b +; CHECK-NEXT: sel z13.d, p2, z8.d, z12.d +; CHECK-NEXT: not p1.b, p0/z, p1.b +; CHECK-NEXT: fcmge p4.s, p0/z, z7.s, z31.s +; CHECK-NEXT: sel z12.d, p3, z8.d, z14.d +; CHECK-NEXT: movprfx z14, z15 +; CHECK-NEXT: frintx z14.s, p0/m, z15.s +; CHECK-NEXT: uunpkhi z15.d, z6.s +; CHECK-NEXT: movprfx z20, z10 +; CHECK-NEXT: fcvtzs z20.d, p0/m, z10.s +; CHECK-NEXT: fcmge p2.s, p0/z, z10.s, z31.s +; CHECK-NEXT: sel z5.d, p1, z8.d, z19.d +; CHECK-NEXT: movprfx z19, z11 +; CHECK-NEXT: fcvtzs z19.d, p0/m, z11.s +; CHECK-NEXT: fcmge p3.s, p0/z, z11.s, z31.s +; CHECK-NEXT: not p5.b, p0/z, p5.b +; CHECK-NEXT: fcmge p6.s, p0/z, z16.s, z31.s +; CHECK-NEXT: fcmuo p9.s, p0/z, z18.s, z18.s +; CHECK-NEXT: movprfx z22, z15 +; CHECK-NEXT: frintx z22.s, p0/m, z15.s +; CHECK-NEXT: fcmge p1.s, p0/z, z14.s, z31.s +; CHECK-NEXT: not p2.b, p0/z, p2.b +; CHECK-NEXT: not p3.b, p0/z, p3.b +; CHECK-NEXT: sel z6.d, p2, z8.d, z20.d +; CHECK-NEXT: movprfx z20, z21 +; CHECK-NEXT: frintx z20.s, p0/m, z21.s +; CHECK-NEXT: fcmge p2.s, p0/z, z18.s, z31.s +; CHECK-NEXT: sel z15.d, p3, z8.d, z19.d +; CHECK-NEXT: movprfx z19, z17 +; CHECK-NEXT: fcvtzs z19.d, p0/m, z17.s +; CHECK-NEXT: not p3.b, p0/z, p4.b +; CHECK-NEXT: fcmge p4.s, p0/z, z22.s, z31.s +; CHECK-NEXT: movprfx z21, z14 +; CHECK-NEXT: fcvtzs z21.d, p0/m, z14.s +; CHECK-NEXT: not p1.b, p0/z, p1.b +; CHECK-NEXT: movprfx z18, z7 +; CHECK-NEXT: fcvtzs z18.d, p0/m, z7.s +; CHECK-NEXT: not p6.b, p0/z, p6.b +; CHECK-NEXT: fcmge p7.s, p0/z, z20.s, z31.s +; CHECK-NEXT: movprfx z31, z22 +; CHECK-NEXT: fcvtzs z31.d, p0/m, z22.s +; CHECK-NEXT: not p2.b, p0/z, p2.b +; CHECK-NEXT: mov z19.d, p5/m, z8.d +; CHECK-NEXT: fcmgt p5.s, p0/z, z17.s, z28.s +; CHECK-NEXT: not p4.b, p0/z, p4.b +; CHECK-NEXT: mov z23.d, p2/m, z8.d +; CHECK-NEXT: fcmuo p2.s, p0/z, z17.s, z17.s +; CHECK-NEXT: movprfx z17, z20 +; CHECK-NEXT: fcvtzs z17.d, p0/m, z20.s +; CHECK-NEXT: mov z21.d, p1/m, z8.d +; CHECK-NEXT: mov z18.d, p3/m, z8.d +; CHECK-NEXT: not p1.b, p0/z, p7.b +; CHECK-NEXT: mov z31.d, p4/m, z8.d +; CHECK-NEXT: fcmgt p4.s, p0/z, z20.s, z28.s +; CHECK-NEXT: mov z19.d, p5/m, z2.d +; CHECK-NEXT: fcmuo p7.s, p0/z, z20.s, z20.s +; CHECK-NEXT: movprfx z20, z16 +; CHECK-NEXT: fcvtzs z20.d, p0/m, z16.s +; CHECK-NEXT: fcmgt p5.s, p0/z, z22.s, z28.s +; CHECK-NEXT: mov z23.d, p8/m, z2.d +; CHECK-NEXT: fcmuo p3.s, p0/z, z22.s, z22.s +; CHECK-NEXT: mov z17.d, p1/m, z8.d ; CHECK-NEXT: ptrue p1.b -; CHECK-NEXT: uunpklo z6.d, z6.s -; CHECK-NEXT: uunpkhi z27.d, z4.s -; CHECK-NEXT: uunpklo z4.d, z4.s -; CHECK-NEXT: uunpklo z29.d, z3.s -; CHECK-NEXT: uunpkhi z3.d, z3.s -; CHECK-NEXT: uunpklo z26.d, z1.s -; CHECK-NEXT: frintx z24.s, p0/m, z24.s -; CHECK-NEXT: uunpklo z28.d, z2.s -; CHECK-NEXT: uunpkhi z2.d, z2.s -; CHECK-NEXT: frintx z7.s, p0/m, z7.s -; CHECK-NEXT: frintx z25.s, p0/m, z25.s -; CHECK-NEXT: uunpkhi z1.d, z1.s -; CHECK-NEXT: frintx z6.s, p0/m, z6.s -; CHECK-NEXT: frintx z27.s, p0/m, z27.s -; CHECK-NEXT: frintx z4.s, p0/m, z4.s -; CHECK-NEXT: frintx z3.s, p0/m, z3.s -; CHECK-NEXT: frintx z26.s, p0/m, z26.s -; CHECK-NEXT: fcvtzs z24.d, p0/m, z24.s -; CHECK-NEXT: frintx z2.s, p0/m, z2.s -; CHECK-NEXT: fcvtzs z7.d, p0/m, z7.s -; CHECK-NEXT: fcvtzs z25.d, p0/m, z25.s -; CHECK-NEXT: frintx z1.s, p0/m, z1.s -; CHECK-NEXT: fcvtzs z6.d, p0/m, z6.s -; CHECK-NEXT: fcvtzs z4.d, p0/m, z4.s -; CHECK-NEXT: fcvtzs z3.d, p0/m, z3.s -; CHECK-NEXT: st1b { z24.b }, p1, [x8, x9] +; CHECK-NEXT: mov z19.d, p2/m, #0 // =0x0 +; CHECK-NEXT: fcmgt p2.s, p0/z, z16.s, z28.s +; CHECK-NEXT: sel z8.d, p6, z8.d, z20.d +; CHECK-NEXT: mov z23.d, p9/m, #0 // =0x0 +; CHECK-NEXT: fcmgt p6.s, p0/z, z14.s, z28.s +; CHECK-NEXT: mov z31.d, p5/m, z2.d +; CHECK-NEXT: mov z17.d, p4/m, z2.d +; CHECK-NEXT: fcmuo p4.s, p0/z, z16.s, z16.s +; CHECK-NEXT: st1b { z19.b }, p1, [x8, x9] ; CHECK-NEXT: rdvl x9, #14 -; CHECK-NEXT: uunpkhi z24.d, z5.s -; CHECK-NEXT: st1b { z7.b }, p1, [x8, x9] +; CHECK-NEXT: fcmgt p5.s, p0/z, z1.s, z28.s +; CHECK-NEXT: st1b { z23.b }, p1, [x8, x9] ; CHECK-NEXT: rdvl x9, #13 -; CHECK-NEXT: uunpklo z5.d, z5.s -; CHECK-NEXT: st1b { z25.b }, p1, [x8, x9] +; CHECK-NEXT: mov z8.d, p2/m, z2.d +; CHECK-NEXT: mov z31.d, p3/m, #0 // =0x0 +; CHECK-NEXT: fcmgt p3.s, p0/z, z7.s, z28.s +; CHECK-NEXT: mov z17.d, p7/m, #0 // =0x0 +; CHECK-NEXT: fcmgt p2.s, p0/z, z11.s, z28.s +; CHECK-NEXT: fcmuo p7.s, p0/z, z14.s, z14.s +; CHECK-NEXT: mov z8.d, p4/m, #0 // =0x0 +; CHECK-NEXT: fcmuo p4.s, p0/z, z7.s, z7.s +; CHECK-NEXT: sel z7.d, p5, z2.d, z9.d +; CHECK-NEXT: st1b { z31.b }, p1, [x8, x9] ; CHECK-NEXT: rdvl x9, #12 -; CHECK-NEXT: movprfx z25, z27 -; CHECK-NEXT: fcvtzs z25.d, p0/m, z27.s -; CHECK-NEXT: st1b { z6.b }, p1, [x8, x9] +; CHECK-NEXT: fcmgt p5.s, p0/z, z27.s, z28.s +; CHECK-NEXT: st1b { z17.b }, p1, [x8, x9] ; CHECK-NEXT: rdvl x9, #11 -; CHECK-NEXT: movprfx z6, z29 -; CHECK-NEXT: frintx z6.s, p0/m, z29.s -; CHECK-NEXT: frintx z24.s, p0/m, z24.s -; CHECK-NEXT: uunpkhi z7.d, z0.s -; CHECK-NEXT: uunpklo z0.d, z0.s -; CHECK-NEXT: frintx z5.s, p0/m, z5.s -; CHECK-NEXT: fcvtzs z2.d, p0/m, z2.s -; CHECK-NEXT: fcvtzs z1.d, p0/m, z1.s -; CHECK-NEXT: fcvtzs z24.d, p0/m, z24.s -; CHECK-NEXT: frintx z7.s, p0/m, z7.s -; CHECK-NEXT: frintx z0.s, p0/m, z0.s -; CHECK-NEXT: fcvtzs z5.d, p0/m, z5.s -; CHECK-NEXT: st1b { z24.b }, p1, [x8, x9] -; CHECK-NEXT: movprfx z24, z28 -; CHECK-NEXT: frintx z24.s, p0/m, z28.s +; CHECK-NEXT: sel z31.d, p3, z2.d, z18.d +; CHECK-NEXT: st1b { z8.b }, p1, [x8, x9] ; CHECK-NEXT: rdvl x9, #10 -; CHECK-NEXT: st1b { z5.b }, p1, [x8, x9] +; CHECK-NEXT: fcmgt p3.s, p0/z, z30.s, z28.s +; CHECK-NEXT: sel z9.d, p2, z2.d, z15.d +; CHECK-NEXT: fcmuo p2.s, p0/z, z11.s, z11.s +; CHECK-NEXT: sel z8.d, p6, z2.d, z21.d +; CHECK-NEXT: mov z31.d, p4/m, #0 // =0x0 +; CHECK-NEXT: fcmgt p4.s, p0/z, z10.s, z28.s +; CHECK-NEXT: fcmgt p6.s, p0/z, z24.s, z28.s +; CHECK-NEXT: sel z11.d, p5, z2.d, z13.d +; CHECK-NEXT: fcmgt p5.s, p0/z, z25.s, z28.s +; CHECK-NEXT: mov z8.d, p7/m, #0 // =0x0 +; CHECK-NEXT: mov z5.d, p3/m, z2.d +; CHECK-NEXT: fcmgt p3.s, p0/z, z29.s, z28.s +; CHECK-NEXT: st1b { z31.b }, p1, [x8, x9] ; CHECK-NEXT: rdvl x9, #9 -; CHECK-NEXT: movprfx z5, z6 -; CHECK-NEXT: fcvtzs z5.d, p0/m, z6.s -; CHECK-NEXT: st1b { z25.b }, p1, [x8, x9] +; CHECK-NEXT: mov z9.d, p2/m, #0 // =0x0 +; CHECK-NEXT: fcmuo p7.s, p0/z, z10.s, z10.s +; CHECK-NEXT: fcmuo p2.s, p0/z, z30.s, z30.s +; CHECK-NEXT: mov z6.d, p4/m, z2.d +; CHECK-NEXT: st1b { z8.b }, p1, [x8, x9] ; CHECK-NEXT: rdvl x9, #8 -; CHECK-NEXT: fcvtzs z0.d, p0/m, z0.s -; CHECK-NEXT: st1b { z4.b }, p1, [x8, x9] -; CHECK-NEXT: movprfx z4, z7 -; CHECK-NEXT: fcvtzs z4.d, p0/m, z7.s -; CHECK-NEXT: movprfx z6, z24 -; CHECK-NEXT: fcvtzs z6.d, p0/m, z24.s -; CHECK-NEXT: st1d { z3.d }, p0, [x8, #7, mul vl] -; CHECK-NEXT: movprfx z3, z26 -; CHECK-NEXT: fcvtzs z3.d, p0/m, z26.s +; CHECK-NEXT: fcmuo p4.s, p0/z, z29.s, z29.s +; CHECK-NEXT: st1b { z9.b }, p1, [x8, x9] +; CHECK-NEXT: fcmuo p1.s, p0/z, z27.s, z27.s +; CHECK-NEXT: sel z27.d, p3, z2.d, z12.d +; CHECK-NEXT: fcmgt p3.s, p0/z, z0.s, z28.s +; CHECK-NEXT: mov z4.d, p5/m, z2.d +; CHECK-NEXT: mov z3.d, p6/m, z2.d +; CHECK-NEXT: mov z6.d, p7/m, #0 // =0x0 +; CHECK-NEXT: fcmuo p7.s, p0/z, z25.s, z25.s +; CHECK-NEXT: mov z5.d, p2/m, #0 // =0x0 +; CHECK-NEXT: fcmuo p2.s, p0/z, z24.s, z24.s +; CHECK-NEXT: mov z27.d, p4/m, #0 // =0x0 +; CHECK-NEXT: fcmuo p4.s, p0/z, z1.s, z1.s +; CHECK-NEXT: mov z11.d, p1/m, #0 // =0x0 +; CHECK-NEXT: fcmuo p1.s, p0/z, z0.s, z0.s +; CHECK-NEXT: st1d { z6.d }, p0, [x8, #7, mul vl] +; CHECK-NEXT: sel z0.d, p3, z2.d, z26.d ; CHECK-NEXT: st1d { z5.d }, p0, [x8, #6, mul vl] -; CHECK-NEXT: st1d { z2.d }, p0, [x8, #5, mul vl] -; CHECK-NEXT: st1d { z1.d }, p0, [x8, #3, mul vl] -; CHECK-NEXT: st1d { z6.d }, p0, [x8, #4, mul vl] +; CHECK-NEXT: mov z4.d, p7/m, #0 // =0x0 +; CHECK-NEXT: st1d { z27.d }, p0, [x8, #5, mul vl] +; CHECK-NEXT: mov z3.d, p2/m, #0 // =0x0 +; CHECK-NEXT: mov z7.d, p4/m, #0 // =0x0 +; CHECK-NEXT: st1d { z11.d }, p0, [x8, #4, mul vl] +; CHECK-NEXT: mov z0.d, p1/m, #0 // =0x0 +; CHECK-NEXT: st1d { z4.d }, p0, [x8, #3, mul vl] ; CHECK-NEXT: st1d { z3.d }, p0, [x8, #2, mul vl] -; CHECK-NEXT: st1d { z4.d }, p0, [x8, #1, mul vl] +; CHECK-NEXT: st1d { z7.d }, p0, [x8, #1, mul vl] ; CHECK-NEXT: st1d { z0.d }, p0, [x8] +; CHECK-NEXT: ldr z23, [sp, #1, mul vl] // 16-byte Folded Reload +; CHECK-NEXT: ldr z22, [sp, #2, mul vl] // 16-byte Folded Reload +; CHECK-NEXT: ldr z21, [sp, #3, mul vl] // 16-byte Folded Reload +; CHECK-NEXT: ldr z20, [sp, #4, mul vl] // 16-byte Folded Reload +; CHECK-NEXT: ldr z19, [sp, #5, mul vl] // 16-byte Folded Reload +; CHECK-NEXT: ldr z18, [sp, #6, mul vl] // 16-byte Folded Reload +; CHECK-NEXT: ldr z17, [sp, #7, mul vl] // 16-byte Folded Reload +; CHECK-NEXT: ldr z16, [sp, #8, mul vl] // 16-byte Folded Reload +; CHECK-NEXT: ldr z15, [sp, #9, mul vl] // 16-byte Folded Reload +; CHECK-NEXT: ldr z14, [sp, #10, mul vl] // 16-byte Folded Reload +; CHECK-NEXT: ldr z13, [sp, #11, mul vl] // 16-byte Folded Reload +; CHECK-NEXT: ldr z12, [sp, #12, mul vl] // 16-byte Folded Reload +; CHECK-NEXT: ldr z11, [sp, #13, mul vl] // 16-byte Folded Reload +; CHECK-NEXT: ldr z10, [sp, #14, mul vl] // 16-byte Folded Reload +; CHECK-NEXT: ldr z9, [sp, #15, mul vl] // 16-byte Folded Reload +; CHECK-NEXT: ldr z8, [sp, #16, mul vl] // 16-byte Folded Reload +; CHECK-NEXT: ldr p9, [sp, #2, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: ldr p8, [sp, #3, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: ldr p7, [sp, #4, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: ldr p6, [sp, #5, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: ldr p5, [sp, #6, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: ldr p4, [sp, #7, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: addvl sp, sp, #17 +; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload ; CHECK-NEXT: ret %a = call @llvm.llrint.nxv32i64.nxv32f32( %x) ret %a @@ -428,8 +1202,22 @@ define @llrint_v1i64_v1f64( %x) { ; CHECK-LABEL: llrint_v1i64_v1f64: ; CHECK: // %bb.0: ; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: mov x8, #-4332462841530417152 // =0xc3e0000000000000 +; CHECK-NEXT: mov z2.d, #0x8000000000000000 +; CHECK-NEXT: mov z1.d, x8 +; CHECK-NEXT: mov x8, #4890909195324358655 // =0x43dfffffffffffff ; CHECK-NEXT: frintx z0.d, p0/m, z0.d -; CHECK-NEXT: fcvtzs z0.d, p0/m, z0.d +; CHECK-NEXT: mov z3.d, x8 +; CHECK-NEXT: fcmge p1.d, p0/z, z0.d, z1.d +; CHECK-NEXT: movprfx z1, z0 +; CHECK-NEXT: fcvtzs z1.d, p0/m, z0.d +; CHECK-NEXT: fcmgt p2.d, p0/z, z0.d, z3.d +; CHECK-NEXT: mov z3.d, #0x7fffffffffffffff +; CHECK-NEXT: not p1.b, p0/z, p1.b +; CHECK-NEXT: fcmuo p0.d, p0/z, z0.d, z0.d +; CHECK-NEXT: mov z1.d, p1/m, z2.d +; CHECK-NEXT: sel z0.d, p2, z3.d, z1.d +; CHECK-NEXT: mov z0.d, p0/m, #0 // =0x0 ; CHECK-NEXT: ret %a = call @llvm.llrint.nxv1i64.nxv1f64( %x) ret %a @@ -440,8 +1228,22 @@ define @llrint_v2i64_v2f64( %x) { ; CHECK-LABEL: llrint_v2i64_v2f64: ; CHECK: // %bb.0: ; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: mov x8, #-4332462841530417152 // =0xc3e0000000000000 +; CHECK-NEXT: mov z2.d, #0x8000000000000000 +; CHECK-NEXT: mov z1.d, x8 +; CHECK-NEXT: mov x8, #4890909195324358655 // =0x43dfffffffffffff ; CHECK-NEXT: frintx z0.d, p0/m, z0.d -; CHECK-NEXT: fcvtzs z0.d, p0/m, z0.d +; CHECK-NEXT: mov z3.d, x8 +; CHECK-NEXT: fcmge p1.d, p0/z, z0.d, z1.d +; CHECK-NEXT: movprfx z1, z0 +; CHECK-NEXT: fcvtzs z1.d, p0/m, z0.d +; CHECK-NEXT: fcmgt p2.d, p0/z, z0.d, z3.d +; CHECK-NEXT: mov z3.d, #0x7fffffffffffffff +; CHECK-NEXT: not p1.b, p0/z, p1.b +; CHECK-NEXT: fcmuo p0.d, p0/z, z0.d, z0.d +; CHECK-NEXT: mov z1.d, p1/m, z2.d +; CHECK-NEXT: sel z0.d, p2, z3.d, z1.d +; CHECK-NEXT: mov z0.d, p0/m, #0 // =0x0 ; CHECK-NEXT: ret %a = call @llvm.llrint.nxv2i64.nxv2f64( %x) ret %a @@ -451,11 +1253,41 @@ declare @llvm.llrint.nxv2i64.nxv2f64() define @llrint_v4i64_v4f64( %x) { ; CHECK-LABEL: llrint_v4i64_v4f64: ; CHECK: // %bb.0: +; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: addvl sp, sp, #-1 +; CHECK-NEXT: str p4, [sp, #7, mul vl] // 2-byte Folded Spill +; CHECK-NEXT: .cfi_escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 8 * VG +; CHECK-NEXT: .cfi_offset w29, -16 ; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: mov x8, #-4332462841530417152 // =0xc3e0000000000000 +; CHECK-NEXT: mov z6.d, #0x7fffffffffffffff +; CHECK-NEXT: mov z2.d, x8 +; CHECK-NEXT: mov x8, #4890909195324358655 // =0x43dfffffffffffff ; CHECK-NEXT: frintx z0.d, p0/m, z0.d ; CHECK-NEXT: frintx z1.d, p0/m, z1.d -; CHECK-NEXT: fcvtzs z0.d, p0/m, z0.d -; CHECK-NEXT: fcvtzs z1.d, p0/m, z1.d +; CHECK-NEXT: mov z3.d, x8 +; CHECK-NEXT: fcmge p1.d, p0/z, z0.d, z2.d +; CHECK-NEXT: fcmge p2.d, p0/z, z1.d, z2.d +; CHECK-NEXT: mov z2.d, #0x8000000000000000 +; CHECK-NEXT: movprfx z4, z0 +; CHECK-NEXT: fcvtzs z4.d, p0/m, z0.d +; CHECK-NEXT: movprfx z5, z1 +; CHECK-NEXT: fcvtzs z5.d, p0/m, z1.d +; CHECK-NEXT: fcmgt p3.d, p0/z, z0.d, z3.d +; CHECK-NEXT: fcmgt p4.d, p0/z, z1.d, z3.d +; CHECK-NEXT: not p1.b, p0/z, p1.b +; CHECK-NEXT: not p2.b, p0/z, p2.b +; CHECK-NEXT: sel z3.d, p1, z2.d, z4.d +; CHECK-NEXT: fcmuo p1.d, p0/z, z0.d, z0.d +; CHECK-NEXT: fcmuo p0.d, p0/z, z1.d, z1.d +; CHECK-NEXT: sel z2.d, p2, z2.d, z5.d +; CHECK-NEXT: sel z0.d, p3, z6.d, z3.d +; CHECK-NEXT: sel z1.d, p4, z6.d, z2.d +; CHECK-NEXT: ldr p4, [sp, #7, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: mov z0.d, p1/m, #0 // =0x0 +; CHECK-NEXT: mov z1.d, p0/m, #0 // =0x0 +; CHECK-NEXT: addvl sp, sp, #1 +; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload ; CHECK-NEXT: ret %a = call @llvm.llrint.nxv4i64.nxv4f64( %x) ret %a @@ -465,15 +1297,67 @@ declare @llvm.llrint.nxv4i64.nxv4f64() define @llrint_v8i64_v8f64( %x) { ; CHECK-LABEL: llrint_v8i64_v8f64: ; CHECK: // %bb.0: +; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: addvl sp, sp, #-1 +; CHECK-NEXT: str p7, [sp, #4, mul vl] // 2-byte Folded Spill +; CHECK-NEXT: str p6, [sp, #5, mul vl] // 2-byte Folded Spill +; CHECK-NEXT: str p5, [sp, #6, mul vl] // 2-byte Folded Spill +; CHECK-NEXT: str p4, [sp, #7, mul vl] // 2-byte Folded Spill +; CHECK-NEXT: .cfi_escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 8 * VG +; CHECK-NEXT: .cfi_offset w29, -16 ; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: mov x8, #-4332462841530417152 // =0xc3e0000000000000 +; CHECK-NEXT: mov z5.d, #0x8000000000000000 +; CHECK-NEXT: mov z4.d, x8 +; CHECK-NEXT: mov x8, #4890909195324358655 // =0x43dfffffffffffff +; CHECK-NEXT: mov z26.d, #0x7fffffffffffffff ; CHECK-NEXT: frintx z0.d, p0/m, z0.d ; CHECK-NEXT: frintx z1.d, p0/m, z1.d ; CHECK-NEXT: frintx z2.d, p0/m, z2.d ; CHECK-NEXT: frintx z3.d, p0/m, z3.d -; CHECK-NEXT: fcvtzs z0.d, p0/m, z0.d -; CHECK-NEXT: fcvtzs z1.d, p0/m, z1.d -; CHECK-NEXT: fcvtzs z2.d, p0/m, z2.d -; CHECK-NEXT: fcvtzs z3.d, p0/m, z3.d +; CHECK-NEXT: mov z6.d, x8 +; CHECK-NEXT: fcmge p1.d, p0/z, z0.d, z4.d +; CHECK-NEXT: fcmge p2.d, p0/z, z1.d, z4.d +; CHECK-NEXT: fcmge p3.d, p0/z, z2.d, z4.d +; CHECK-NEXT: fcmge p4.d, p0/z, z3.d, z4.d +; CHECK-NEXT: movprfx z4, z0 +; CHECK-NEXT: fcvtzs z4.d, p0/m, z0.d +; CHECK-NEXT: movprfx z7, z1 +; CHECK-NEXT: fcvtzs z7.d, p0/m, z1.d +; CHECK-NEXT: movprfx z24, z2 +; CHECK-NEXT: fcvtzs z24.d, p0/m, z2.d +; CHECK-NEXT: movprfx z25, z3 +; CHECK-NEXT: fcvtzs z25.d, p0/m, z3.d +; CHECK-NEXT: fcmgt p7.d, p0/z, z2.d, z6.d +; CHECK-NEXT: fcmgt p5.d, p0/z, z0.d, z6.d +; CHECK-NEXT: fcmgt p6.d, p0/z, z1.d, z6.d +; CHECK-NEXT: not p1.b, p0/z, p1.b +; CHECK-NEXT: not p2.b, p0/z, p2.b +; CHECK-NEXT: not p3.b, p0/z, p3.b +; CHECK-NEXT: mov z4.d, p1/m, z5.d +; CHECK-NEXT: fcmgt p1.d, p0/z, z3.d, z6.d +; CHECK-NEXT: not p4.b, p0/z, p4.b +; CHECK-NEXT: sel z6.d, p2, z5.d, z7.d +; CHECK-NEXT: fcmuo p2.d, p0/z, z0.d, z0.d +; CHECK-NEXT: sel z7.d, p3, z5.d, z24.d +; CHECK-NEXT: fcmuo p3.d, p0/z, z1.d, z1.d +; CHECK-NEXT: sel z5.d, p4, z5.d, z25.d +; CHECK-NEXT: fcmuo p4.d, p0/z, z2.d, z2.d +; CHECK-NEXT: fcmuo p0.d, p0/z, z3.d, z3.d +; CHECK-NEXT: sel z0.d, p5, z26.d, z4.d +; CHECK-NEXT: sel z1.d, p6, z26.d, z6.d +; CHECK-NEXT: ldr p6, [sp, #5, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: sel z2.d, p7, z26.d, z7.d +; CHECK-NEXT: ldr p7, [sp, #4, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: sel z3.d, p1, z26.d, z5.d +; CHECK-NEXT: ldr p5, [sp, #6, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: mov z0.d, p2/m, #0 // =0x0 +; CHECK-NEXT: mov z1.d, p3/m, #0 // =0x0 +; CHECK-NEXT: mov z2.d, p4/m, #0 // =0x0 +; CHECK-NEXT: ldr p4, [sp, #7, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: mov z3.d, p0/m, #0 // =0x0 +; CHECK-NEXT: addvl sp, sp, #1 +; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload ; CHECK-NEXT: ret %a = call @llvm.llrint.nxv8i64.nxv8f64( %x) ret %a @@ -483,23 +1367,119 @@ declare @llvm.llrint.nxv8i64.nxv8f64() define @llrint_v16f64( %x) { ; CHECK-LABEL: llrint_v16f64: ; CHECK: // %bb.0: +; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: addvl sp, sp, #-2 +; CHECK-NEXT: str p10, [sp, #1, mul vl] // 2-byte Folded Spill +; CHECK-NEXT: str p9, [sp, #2, mul vl] // 2-byte Folded Spill +; CHECK-NEXT: str p8, [sp, #3, mul vl] // 2-byte Folded Spill +; CHECK-NEXT: str p7, [sp, #4, mul vl] // 2-byte Folded Spill +; CHECK-NEXT: str p6, [sp, #5, mul vl] // 2-byte Folded Spill +; CHECK-NEXT: str p5, [sp, #6, mul vl] // 2-byte Folded Spill +; CHECK-NEXT: str p4, [sp, #7, mul vl] // 2-byte Folded Spill +; CHECK-NEXT: str z8, [sp, #1, mul vl] // 16-byte Folded Spill +; CHECK-NEXT: .cfi_escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 16 * VG +; CHECK-NEXT: .cfi_offset w29, -16 +; CHECK-NEXT: .cfi_escape 0x10, 0x48, 0x0a, 0x11, 0x70, 0x22, 0x11, 0x78, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d8 @ cfa - 16 - 8 * VG ; CHECK-NEXT: ptrue p0.d -; CHECK-NEXT: frintx z0.d, p0/m, z0.d -; CHECK-NEXT: frintx z1.d, p0/m, z1.d +; CHECK-NEXT: mov x8, #-4332462841530417152 // =0xc3e0000000000000 +; CHECK-NEXT: mov z24.d, #0x7fffffffffffffff +; CHECK-NEXT: mov z25.d, x8 +; CHECK-NEXT: mov x8, #4890909195324358655 // =0x43dfffffffffffff +; CHECK-NEXT: movprfx z26, z0 +; CHECK-NEXT: frintx z26.d, p0/m, z0.d +; CHECK-NEXT: movprfx z27, z1 +; CHECK-NEXT: frintx z27.d, p0/m, z1.d ; CHECK-NEXT: frintx z2.d, p0/m, z2.d +; CHECK-NEXT: mov z0.d, #0x8000000000000000 +; CHECK-NEXT: mov z1.d, x8 ; CHECK-NEXT: frintx z3.d, p0/m, z3.d -; CHECK-NEXT: frintx z4.d, p0/m, z4.d +; CHECK-NEXT: movprfx z28, z4 +; CHECK-NEXT: frintx z28.d, p0/m, z4.d ; CHECK-NEXT: frintx z5.d, p0/m, z5.d ; CHECK-NEXT: frintx z6.d, p0/m, z6.d ; CHECK-NEXT: frintx z7.d, p0/m, z7.d -; CHECK-NEXT: fcvtzs z0.d, p0/m, z0.d -; CHECK-NEXT: fcvtzs z1.d, p0/m, z1.d -; CHECK-NEXT: fcvtzs z2.d, p0/m, z2.d -; CHECK-NEXT: fcvtzs z3.d, p0/m, z3.d -; CHECK-NEXT: fcvtzs z4.d, p0/m, z4.d -; CHECK-NEXT: fcvtzs z5.d, p0/m, z5.d -; CHECK-NEXT: fcvtzs z6.d, p0/m, z6.d -; CHECK-NEXT: fcvtzs z7.d, p0/m, z7.d +; CHECK-NEXT: fcmge p1.d, p0/z, z26.d, z25.d +; CHECK-NEXT: fcmge p2.d, p0/z, z27.d, z25.d +; CHECK-NEXT: movprfx z4, z26 +; CHECK-NEXT: fcvtzs z4.d, p0/m, z26.d +; CHECK-NEXT: fcmge p5.d, p0/z, z2.d, z25.d +; CHECK-NEXT: movprfx z29, z27 +; CHECK-NEXT: fcvtzs z29.d, p0/m, z27.d +; CHECK-NEXT: fcmgt p3.d, p0/z, z26.d, z1.d +; CHECK-NEXT: fcmge p6.d, p0/z, z3.d, z25.d +; CHECK-NEXT: fcmge p8.d, p0/z, z5.d, z25.d +; CHECK-NEXT: fcmgt p7.d, p0/z, z27.d, z1.d +; CHECK-NEXT: fcmge p9.d, p0/z, z6.d, z25.d +; CHECK-NEXT: movprfx z30, z28 +; CHECK-NEXT: fcvtzs z30.d, p0/m, z28.d +; CHECK-NEXT: fcmge p10.d, p0/z, z7.d, z25.d +; CHECK-NEXT: not p4.b, p0/z, p1.b +; CHECK-NEXT: fcmuo p1.d, p0/z, z26.d, z26.d +; CHECK-NEXT: movprfx z26, z2 +; CHECK-NEXT: fcvtzs z26.d, p0/m, z2.d +; CHECK-NEXT: not p2.b, p0/z, p2.b +; CHECK-NEXT: movprfx z31, z6 +; CHECK-NEXT: fcvtzs z31.d, p0/m, z6.d +; CHECK-NEXT: movprfx z8, z7 +; CHECK-NEXT: fcvtzs z8.d, p0/m, z7.d +; CHECK-NEXT: mov z4.d, p4/m, z0.d +; CHECK-NEXT: fcmge p4.d, p0/z, z28.d, z25.d +; CHECK-NEXT: not p5.b, p0/z, p5.b +; CHECK-NEXT: mov z29.d, p2/m, z0.d +; CHECK-NEXT: fcmuo p2.d, p0/z, z27.d, z27.d +; CHECK-NEXT: movprfx z27, z3 +; CHECK-NEXT: fcvtzs z27.d, p0/m, z3.d +; CHECK-NEXT: sel z25.d, p5, z0.d, z26.d +; CHECK-NEXT: movprfx z26, z5 +; CHECK-NEXT: fcvtzs z26.d, p0/m, z5.d +; CHECK-NEXT: not p6.b, p0/z, p6.b +; CHECK-NEXT: not p5.b, p0/z, p8.b +; CHECK-NEXT: fcmgt p8.d, p0/z, z2.d, z1.d +; CHECK-NEXT: not p4.b, p0/z, p4.b +; CHECK-NEXT: mov z27.d, p6/m, z0.d +; CHECK-NEXT: not p6.b, p0/z, p9.b +; CHECK-NEXT: fcmuo p9.d, p0/z, z2.d, z2.d +; CHECK-NEXT: mov z30.d, p4/m, z0.d +; CHECK-NEXT: not p4.b, p0/z, p10.b +; CHECK-NEXT: fcmgt p10.d, p0/z, z3.d, z1.d +; CHECK-NEXT: mov z26.d, p5/m, z0.d +; CHECK-NEXT: fcmgt p5.d, p0/z, z28.d, z1.d +; CHECK-NEXT: mov z31.d, p6/m, z0.d +; CHECK-NEXT: mov z8.d, p4/m, z0.d +; CHECK-NEXT: sel z0.d, p3, z24.d, z4.d +; CHECK-NEXT: fcmgt p3.d, p0/z, z5.d, z1.d +; CHECK-NEXT: fcmgt p4.d, p0/z, z6.d, z1.d +; CHECK-NEXT: fcmgt p6.d, p0/z, z7.d, z1.d +; CHECK-NEXT: sel z1.d, p7, z24.d, z29.d +; CHECK-NEXT: fcmuo p7.d, p0/z, z3.d, z3.d +; CHECK-NEXT: sel z2.d, p8, z24.d, z25.d +; CHECK-NEXT: sel z3.d, p10, z24.d, z27.d +; CHECK-NEXT: sel z4.d, p5, z24.d, z30.d +; CHECK-NEXT: fcmuo p5.d, p0/z, z28.d, z28.d +; CHECK-NEXT: fcmuo p8.d, p0/z, z5.d, z5.d +; CHECK-NEXT: fcmuo p10.d, p0/z, z6.d, z6.d +; CHECK-NEXT: sel z5.d, p3, z24.d, z26.d +; CHECK-NEXT: fcmuo p0.d, p0/z, z7.d, z7.d +; CHECK-NEXT: sel z6.d, p4, z24.d, z31.d +; CHECK-NEXT: ldr p4, [sp, #7, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: sel z7.d, p6, z24.d, z8.d +; CHECK-NEXT: ldr z8, [sp, #1, mul vl] // 16-byte Folded Reload +; CHECK-NEXT: ldr p6, [sp, #5, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: mov z2.d, p9/m, #0 // =0x0 +; CHECK-NEXT: ldr p9, [sp, #2, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: mov z3.d, p7/m, #0 // =0x0 +; CHECK-NEXT: mov z4.d, p5/m, #0 // =0x0 +; CHECK-NEXT: ldr p7, [sp, #4, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: mov z5.d, p8/m, #0 // =0x0 +; CHECK-NEXT: mov z6.d, p10/m, #0 // =0x0 +; CHECK-NEXT: ldr p10, [sp, #1, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: mov z0.d, p1/m, #0 // =0x0 +; CHECK-NEXT: mov z1.d, p2/m, #0 // =0x0 +; CHECK-NEXT: ldr p8, [sp, #3, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: mov z7.d, p0/m, #0 // =0x0 +; CHECK-NEXT: ldr p5, [sp, #6, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: addvl sp, sp, #2 +; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload ; CHECK-NEXT: ret %a = call @llvm.llrint.nxv16i64.nxv16f64( %x) ret %a @@ -509,92 +1489,273 @@ declare @llvm.llrint.nxv16i64.nxv16f64( @llrint_v32f64( %x) { ; CHECK-LABEL: llrint_v32f64: ; CHECK: // %bb.0: -; CHECK-NEXT: ptrue p0.b -; CHECK-NEXT: rdvl x14, #15 -; CHECK-NEXT: rdvl x15, #14 -; CHECK-NEXT: ptrue p1.d -; CHECK-NEXT: rdvl x13, #13 -; CHECK-NEXT: rdvl x12, #12 -; CHECK-NEXT: ld1b { z0.b }, p0/z, [x0, x14] -; CHECK-NEXT: ld1b { z1.b }, p0/z, [x0, x15] -; CHECK-NEXT: rdvl x10, #11 +; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: addvl sp, sp, #-12 +; CHECK-NEXT: str p8, [sp, #3, mul vl] // 2-byte Folded Spill +; CHECK-NEXT: str p7, [sp, #4, mul vl] // 2-byte Folded Spill +; CHECK-NEXT: str p6, [sp, #5, mul vl] // 2-byte Folded Spill +; CHECK-NEXT: str p5, [sp, #6, mul vl] // 2-byte Folded Spill +; CHECK-NEXT: str p4, [sp, #7, mul vl] // 2-byte Folded Spill +; CHECK-NEXT: str z18, [sp, #1, mul vl] // 16-byte Folded Spill +; CHECK-NEXT: str z17, [sp, #2, mul vl] // 16-byte Folded Spill +; CHECK-NEXT: str z16, [sp, #3, mul vl] // 16-byte Folded Spill +; CHECK-NEXT: str z15, [sp, #4, mul vl] // 16-byte Folded Spill +; CHECK-NEXT: str z14, [sp, #5, mul vl] // 16-byte Folded Spill +; CHECK-NEXT: str z13, [sp, #6, mul vl] // 16-byte Folded Spill +; CHECK-NEXT: str z12, [sp, #7, mul vl] // 16-byte Folded Spill +; CHECK-NEXT: str z11, [sp, #8, mul vl] // 16-byte Folded Spill +; CHECK-NEXT: str z10, [sp, #9, mul vl] // 16-byte Folded Spill +; CHECK-NEXT: str z9, [sp, #10, mul vl] // 16-byte Folded Spill +; CHECK-NEXT: str z8, [sp, #11, mul vl] // 16-byte Folded Spill +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0xe0, 0x00, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 96 * VG +; CHECK-NEXT: .cfi_offset w29, -16 +; CHECK-NEXT: .cfi_escape 0x10, 0x48, 0x0a, 0x11, 0x70, 0x22, 0x11, 0x78, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d8 @ cfa - 16 - 8 * VG +; CHECK-NEXT: .cfi_escape 0x10, 0x49, 0x0a, 0x11, 0x70, 0x22, 0x11, 0x70, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d9 @ cfa - 16 - 16 * VG +; CHECK-NEXT: .cfi_escape 0x10, 0x4a, 0x0a, 0x11, 0x70, 0x22, 0x11, 0x68, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d10 @ cfa - 16 - 24 * VG +; CHECK-NEXT: .cfi_escape 0x10, 0x4b, 0x0a, 0x11, 0x70, 0x22, 0x11, 0x60, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d11 @ cfa - 16 - 32 * VG +; CHECK-NEXT: .cfi_escape 0x10, 0x4c, 0x0a, 0x11, 0x70, 0x22, 0x11, 0x58, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d12 @ cfa - 16 - 40 * VG +; CHECK-NEXT: .cfi_escape 0x10, 0x4d, 0x0a, 0x11, 0x70, 0x22, 0x11, 0x50, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d13 @ cfa - 16 - 48 * VG +; CHECK-NEXT: .cfi_escape 0x10, 0x4e, 0x0a, 0x11, 0x70, 0x22, 0x11, 0x48, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d14 @ cfa - 16 - 56 * VG +; CHECK-NEXT: .cfi_escape 0x10, 0x4f, 0x0a, 0x11, 0x70, 0x22, 0x11, 0x40, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d15 @ cfa - 16 - 64 * VG +; CHECK-NEXT: ptrue p1.b +; CHECK-NEXT: rdvl x9, #8 +; CHECK-NEXT: rdvl x10, #9 +; CHECK-NEXT: ptrue p0.d ; CHECK-NEXT: rdvl x11, #10 -; CHECK-NEXT: rdvl x9, #9 -; CHECK-NEXT: ld1b { z2.b }, p0/z, [x0, x13] -; CHECK-NEXT: ld1b { z3.b }, p0/z, [x0, x12] -; CHECK-NEXT: ld1b { z4.b }, p0/z, [x0, x10] -; CHECK-NEXT: ld1b { z5.b }, p0/z, [x0, x11] -; CHECK-NEXT: frintx z0.d, p1/m, z0.d -; CHECK-NEXT: frintx z1.d, p1/m, z1.d -; CHECK-NEXT: ld1b { z6.b }, p0/z, [x0, x9] -; CHECK-NEXT: rdvl x16, #8 -; CHECK-NEXT: frintx z2.d, p1/m, z2.d -; CHECK-NEXT: ld1d { z24.d }, p1/z, [x0, #7, mul vl] -; CHECK-NEXT: frintx z3.d, p1/m, z3.d -; CHECK-NEXT: frintx z4.d, p1/m, z4.d -; CHECK-NEXT: frintx z5.d, p1/m, z5.d -; CHECK-NEXT: frintx z6.d, p1/m, z6.d -; CHECK-NEXT: ld1b { z7.b }, p0/z, [x0, x16] -; CHECK-NEXT: ld1d { z25.d }, p1/z, [x0, #6, mul vl] -; CHECK-NEXT: fcvtzs z0.d, p1/m, z0.d -; CHECK-NEXT: fcvtzs z1.d, p1/m, z1.d -; CHECK-NEXT: ld1d { z26.d }, p1/z, [x0, #5, mul vl] -; CHECK-NEXT: ld1d { z27.d }, p1/z, [x0, #4, mul vl] -; CHECK-NEXT: ld1d { z28.d }, p1/z, [x0, #3, mul vl] -; CHECK-NEXT: ld1d { z29.d }, p1/z, [x0, #2, mul vl] -; CHECK-NEXT: ld1d { z30.d }, p1/z, [x0, #1, mul vl] -; CHECK-NEXT: fcvtzs z2.d, p1/m, z2.d -; CHECK-NEXT: ld1d { z31.d }, p1/z, [x0] -; CHECK-NEXT: frintx z7.d, p1/m, z7.d -; CHECK-NEXT: fcvtzs z3.d, p1/m, z3.d -; CHECK-NEXT: fcvtzs z4.d, p1/m, z4.d -; CHECK-NEXT: st1b { z0.b }, p0, [x8, x14] -; CHECK-NEXT: movprfx z0, z5 -; CHECK-NEXT: fcvtzs z0.d, p1/m, z5.d -; CHECK-NEXT: frintx z24.d, p1/m, z24.d -; CHECK-NEXT: st1b { z1.b }, p0, [x8, x15] -; CHECK-NEXT: movprfx z1, z6 -; CHECK-NEXT: fcvtzs z1.d, p1/m, z6.d -; CHECK-NEXT: movprfx z5, z25 -; CHECK-NEXT: frintx z5.d, p1/m, z25.d -; CHECK-NEXT: movprfx z6, z26 -; CHECK-NEXT: frintx z6.d, p1/m, z26.d -; CHECK-NEXT: st1b { z2.b }, p0, [x8, x13] -; CHECK-NEXT: movprfx z2, z7 -; CHECK-NEXT: fcvtzs z2.d, p1/m, z7.d -; CHECK-NEXT: movprfx z7, z27 -; CHECK-NEXT: frintx z7.d, p1/m, z27.d -; CHECK-NEXT: st1b { z3.b }, p0, [x8, x12] -; CHECK-NEXT: movprfx z3, z28 -; CHECK-NEXT: frintx z3.d, p1/m, z28.d -; CHECK-NEXT: st1b { z4.b }, p0, [x8, x10] -; CHECK-NEXT: movprfx z4, z29 -; CHECK-NEXT: frintx z4.d, p1/m, z29.d -; CHECK-NEXT: st1b { z0.b }, p0, [x8, x11] -; CHECK-NEXT: movprfx z0, z30 -; CHECK-NEXT: frintx z0.d, p1/m, z30.d -; CHECK-NEXT: fcvtzs z24.d, p1/m, z24.d -; CHECK-NEXT: st1b { z1.b }, p0, [x8, x9] -; CHECK-NEXT: movprfx z1, z31 -; CHECK-NEXT: frintx z1.d, p1/m, z31.d -; CHECK-NEXT: fcvtzs z5.d, p1/m, z5.d -; CHECK-NEXT: fcvtzs z6.d, p1/m, z6.d -; CHECK-NEXT: fcvtzs z7.d, p1/m, z7.d -; CHECK-NEXT: st1b { z2.b }, p0, [x8, x16] -; CHECK-NEXT: movprfx z2, z3 -; CHECK-NEXT: fcvtzs z2.d, p1/m, z3.d -; CHECK-NEXT: movprfx z3, z4 -; CHECK-NEXT: fcvtzs z3.d, p1/m, z4.d -; CHECK-NEXT: fcvtzs z0.d, p1/m, z0.d -; CHECK-NEXT: st1d { z24.d }, p1, [x8, #7, mul vl] -; CHECK-NEXT: fcvtzs z1.d, p1/m, z1.d -; CHECK-NEXT: st1d { z5.d }, p1, [x8, #6, mul vl] -; CHECK-NEXT: st1d { z6.d }, p1, [x8, #5, mul vl] -; CHECK-NEXT: st1d { z7.d }, p1, [x8, #4, mul vl] -; CHECK-NEXT: st1d { z2.d }, p1, [x8, #3, mul vl] -; CHECK-NEXT: st1d { z3.d }, p1, [x8, #2, mul vl] -; CHECK-NEXT: st1d { z0.d }, p1, [x8, #1, mul vl] -; CHECK-NEXT: st1d { z1.d }, p1, [x8] +; CHECK-NEXT: mov x12, #-4332462841530417152 // =0xc3e0000000000000 +; CHECK-NEXT: ld1b { z0.b }, p1/z, [x0, x9] +; CHECK-NEXT: ld1b { z1.b }, p1/z, [x0, x10] +; CHECK-NEXT: mov z2.d, x12 +; CHECK-NEXT: rdvl x14, #13 +; CHECK-NEXT: rdvl x13, #12 +; CHECK-NEXT: rdvl x12, #11 +; CHECK-NEXT: ld1b { z6.b }, p1/z, [x0, x14] +; CHECK-NEXT: ld1b { z7.b }, p1/z, [x0, x13] +; CHECK-NEXT: mov z3.d, #0x8000000000000000 +; CHECK-NEXT: movprfx z24, z0 +; CHECK-NEXT: frintx z24.d, p0/m, z0.d +; CHECK-NEXT: ld1b { z0.b }, p1/z, [x0, x11] +; CHECK-NEXT: movprfx z5, z1 +; CHECK-NEXT: frintx z5.d, p0/m, z1.d +; CHECK-NEXT: ld1b { z1.b }, p1/z, [x0, x12] +; CHECK-NEXT: mov x15, #4890909195324358655 // =0x43dfffffffffffff +; CHECK-NEXT: rdvl x16, #15 +; CHECK-NEXT: movprfx z30, z6 +; CHECK-NEXT: frintx z30.d, p0/m, z6.d +; CHECK-NEXT: movprfx z28, z7 +; CHECK-NEXT: frintx z28.d, p0/m, z7.d +; CHECK-NEXT: ld1b { z8.b }, p1/z, [x0, x16] +; CHECK-NEXT: movprfx z4, z0 +; CHECK-NEXT: frintx z4.d, p0/m, z0.d +; CHECK-NEXT: mov z0.d, #0x7fffffffffffffff +; CHECK-NEXT: ld1d { z18.d }, p0/z, [x0] +; CHECK-NEXT: fcmge p3.d, p0/z, z5.d, z2.d +; CHECK-NEXT: fcmge p2.d, p0/z, z24.d, z2.d +; CHECK-NEXT: movprfx z6, z5 +; CHECK-NEXT: fcvtzs z6.d, p0/m, z5.d +; CHECK-NEXT: movprfx z27, z1 +; CHECK-NEXT: frintx z27.d, p0/m, z1.d +; CHECK-NEXT: movprfx z25, z24 +; CHECK-NEXT: fcvtzs z25.d, p0/m, z24.d +; CHECK-NEXT: mov z1.d, x15 +; CHECK-NEXT: rdvl x15, #14 +; CHECK-NEXT: movprfx z9, z28 +; CHECK-NEXT: fcvtzs z9.d, p0/m, z28.d +; CHECK-NEXT: movprfx z13, z8 +; CHECK-NEXT: frintx z13.d, p0/m, z8.d +; CHECK-NEXT: fcmge p4.d, p0/z, z4.d, z2.d +; CHECK-NEXT: movprfx z7, z4 +; CHECK-NEXT: fcvtzs z7.d, p0/m, z4.d +; CHECK-NEXT: ld1d { z15.d }, p0/z, [x0, #2, mul vl] +; CHECK-NEXT: not p3.b, p0/z, p3.b +; CHECK-NEXT: fcmgt p5.d, p0/z, z24.d, z1.d +; CHECK-NEXT: fcmgt p6.d, p0/z, z5.d, z1.d +; CHECK-NEXT: not p2.b, p0/z, p2.b +; CHECK-NEXT: fcmge p7.d, p0/z, z27.d, z2.d +; CHECK-NEXT: movprfx z26, z27 +; CHECK-NEXT: fcvtzs z26.d, p0/m, z27.d +; CHECK-NEXT: sel z29.d, p3, z3.d, z6.d +; CHECK-NEXT: ld1b { z6.b }, p1/z, [x0, x15] +; CHECK-NEXT: fcmge p3.d, p0/z, z28.d, z2.d +; CHECK-NEXT: not p4.b, p0/z, p4.b +; CHECK-NEXT: mov z25.d, p2/m, z3.d +; CHECK-NEXT: fcmgt p2.d, p0/z, z4.d, z1.d +; CHECK-NEXT: movprfx z16, z13 +; CHECK-NEXT: fcvtzs z16.d, p0/m, z13.d +; CHECK-NEXT: ld1d { z17.d }, p0/z, [x0, #1, mul vl] +; CHECK-NEXT: ld1d { z14.d }, p0/z, [x0, #3, mul vl] +; CHECK-NEXT: sel z31.d, p4, z3.d, z7.d +; CHECK-NEXT: movprfx z11, z6 +; CHECK-NEXT: frintx z11.d, p0/m, z6.d +; CHECK-NEXT: not p7.b, p0/z, p7.b +; CHECK-NEXT: not p3.b, p0/z, p3.b +; CHECK-NEXT: sel z6.d, p5, z0.d, z25.d +; CHECK-NEXT: fcmgt p4.d, p0/z, z27.d, z1.d +; CHECK-NEXT: sel z7.d, p6, z0.d, z29.d +; CHECK-NEXT: mov z26.d, p7/m, z3.d +; CHECK-NEXT: fcmge p5.d, p0/z, z13.d, z2.d +; CHECK-NEXT: sel z25.d, p2, z0.d, z31.d +; CHECK-NEXT: fcmge p2.d, p0/z, z30.d, z2.d +; CHECK-NEXT: sel z29.d, p3, z3.d, z9.d +; CHECK-NEXT: fcmge p3.d, p0/z, z11.d, z2.d +; CHECK-NEXT: movprfx z31, z30 +; CHECK-NEXT: fcvtzs z31.d, p0/m, z30.d +; CHECK-NEXT: movprfx z9, z11 +; CHECK-NEXT: fcvtzs z9.d, p0/m, z11.d +; CHECK-NEXT: mov z26.d, p4/m, z0.d +; CHECK-NEXT: fcmgt p4.d, p0/z, z28.d, z1.d +; CHECK-NEXT: fcmgt p6.d, p0/z, z30.d, z1.d +; CHECK-NEXT: not p7.b, p0/z, p5.b +; CHECK-NEXT: fcmuo p5.d, p0/z, z27.d, z27.d +; CHECK-NEXT: fcmgt p8.d, p0/z, z13.d, z1.d +; CHECK-NEXT: not p2.b, p0/z, p2.b +; CHECK-NEXT: movprfx z27, z18 +; CHECK-NEXT: frintx z27.d, p0/m, z18.d +; CHECK-NEXT: ld1d { z8.d }, p0/z, [x0, #7, mul vl] +; CHECK-NEXT: not p3.b, p0/z, p3.b +; CHECK-NEXT: mov z16.d, p7/m, z3.d +; CHECK-NEXT: fcmuo p7.d, p0/z, z13.d, z13.d +; CHECK-NEXT: mov z31.d, p2/m, z3.d +; CHECK-NEXT: fcmgt p2.d, p0/z, z11.d, z1.d +; CHECK-NEXT: mov z29.d, p4/m, z0.d +; CHECK-NEXT: mov z9.d, p3/m, z3.d +; CHECK-NEXT: fcmuo p3.d, p0/z, z28.d, z28.d +; CHECK-NEXT: fcmuo p4.d, p0/z, z30.d, z30.d +; CHECK-NEXT: movprfx z28, z17 +; CHECK-NEXT: frintx z28.d, p0/m, z17.d +; CHECK-NEXT: movprfx z30, z15 +; CHECK-NEXT: frintx z30.d, p0/m, z15.d +; CHECK-NEXT: ld1d { z13.d }, p0/z, [x0, #4, mul vl] +; CHECK-NEXT: mov z31.d, p6/m, z0.d +; CHECK-NEXT: fcmuo p6.d, p0/z, z11.d, z11.d +; CHECK-NEXT: sel z11.d, p8, z0.d, z16.d +; CHECK-NEXT: mov z9.d, p2/m, z0.d +; CHECK-NEXT: fcmuo p2.d, p0/z, z24.d, z24.d +; CHECK-NEXT: movprfx z24, z14 +; CHECK-NEXT: frintx z24.d, p0/m, z14.d +; CHECK-NEXT: fcmge p8.d, p0/z, z27.d, z2.d +; CHECK-NEXT: ld1d { z10.d }, p0/z, [x0, #6, mul vl] +; CHECK-NEXT: ld1d { z12.d }, p0/z, [x0, #5, mul vl] +; CHECK-NEXT: mov z26.d, p5/m, #0 // =0x0 +; CHECK-NEXT: mov z29.d, p3/m, #0 // =0x0 +; CHECK-NEXT: fcmge p5.d, p0/z, z28.d, z2.d +; CHECK-NEXT: movprfx z14, z27 +; CHECK-NEXT: fcvtzs z14.d, p0/m, z27.d +; CHECK-NEXT: fcmge p3.d, p0/z, z30.d, z2.d +; CHECK-NEXT: frintx z13.d, p0/m, z13.d +; CHECK-NEXT: mov z31.d, p4/m, #0 // =0x0 +; CHECK-NEXT: fcmge p4.d, p0/z, z24.d, z2.d +; CHECK-NEXT: mov z9.d, p6/m, #0 // =0x0 +; CHECK-NEXT: movprfx z15, z28 +; CHECK-NEXT: fcvtzs z15.d, p0/m, z28.d +; CHECK-NEXT: not p6.b, p0/z, p8.b +; CHECK-NEXT: movprfx z16, z30 +; CHECK-NEXT: fcvtzs z16.d, p0/m, z30.d +; CHECK-NEXT: frintx z12.d, p0/m, z12.d +; CHECK-NEXT: frintx z10.d, p0/m, z10.d +; CHECK-NEXT: movprfx z17, z24 +; CHECK-NEXT: fcvtzs z17.d, p0/m, z24.d +; CHECK-NEXT: movprfx z18, z8 +; CHECK-NEXT: frintx z18.d, p0/m, z8.d +; CHECK-NEXT: not p5.b, p0/z, p5.b +; CHECK-NEXT: sel z8.d, p6, z3.d, z14.d +; CHECK-NEXT: not p3.b, p0/z, p3.b +; CHECK-NEXT: fcmge p6.d, p0/z, z13.d, z2.d +; CHECK-NEXT: mov z11.d, p7/m, #0 // =0x0 +; CHECK-NEXT: not p4.b, p0/z, p4.b +; CHECK-NEXT: sel z14.d, p5, z3.d, z15.d +; CHECK-NEXT: fcmuo p7.d, p0/z, z5.d, z5.d +; CHECK-NEXT: sel z15.d, p3, z3.d, z16.d +; CHECK-NEXT: movprfx z16, z13 +; CHECK-NEXT: fcvtzs z16.d, p0/m, z13.d +; CHECK-NEXT: fcmge p5.d, p0/z, z12.d, z2.d +; CHECK-NEXT: fcmge p3.d, p0/z, z10.d, z2.d +; CHECK-NEXT: sel z5.d, p4, z3.d, z17.d +; CHECK-NEXT: fcmge p4.d, p0/z, z18.d, z2.d +; CHECK-NEXT: not p6.b, p0/z, p6.b +; CHECK-NEXT: movprfx z2, z12 +; CHECK-NEXT: fcvtzs z2.d, p0/m, z12.d +; CHECK-NEXT: movprfx z17, z10 +; CHECK-NEXT: fcvtzs z17.d, p0/m, z10.d +; CHECK-NEXT: st1b { z11.b }, p1, [x8, x16] +; CHECK-NEXT: movprfx z11, z18 +; CHECK-NEXT: fcvtzs z11.d, p0/m, z18.d +; CHECK-NEXT: mov z6.d, p2/m, #0 // =0x0 +; CHECK-NEXT: st1b { z9.b }, p1, [x8, x15] +; CHECK-NEXT: sel z9.d, p6, z3.d, z16.d +; CHECK-NEXT: fcmuo p6.d, p0/z, z4.d, z4.d +; CHECK-NEXT: not p5.b, p0/z, p5.b +; CHECK-NEXT: fcmgt p2.d, p0/z, z18.d, z1.d +; CHECK-NEXT: mov z7.d, p7/m, #0 // =0x0 +; CHECK-NEXT: not p3.b, p0/z, p3.b +; CHECK-NEXT: st1b { z31.b }, p1, [x8, x14] +; CHECK-NEXT: fcmgt p7.d, p0/z, z24.d, z1.d +; CHECK-NEXT: not p4.b, p0/z, p4.b +; CHECK-NEXT: mov z2.d, p5/m, z3.d +; CHECK-NEXT: fcmgt p5.d, p0/z, z28.d, z1.d +; CHECK-NEXT: sel z4.d, p3, z3.d, z17.d +; CHECK-NEXT: fcmgt p3.d, p0/z, z13.d, z1.d +; CHECK-NEXT: mov z25.d, p6/m, #0 // =0x0 +; CHECK-NEXT: sel z3.d, p4, z3.d, z11.d +; CHECK-NEXT: fcmgt p4.d, p0/z, z10.d, z1.d +; CHECK-NEXT: fcmgt p6.d, p0/z, z12.d, z1.d +; CHECK-NEXT: st1b { z29.b }, p1, [x8, x13] +; CHECK-NEXT: st1b { z26.b }, p1, [x8, x12] +; CHECK-NEXT: sel z26.d, p5, z0.d, z14.d +; CHECK-NEXT: fcmgt p5.d, p0/z, z30.d, z1.d +; CHECK-NEXT: sel z29.d, p3, z0.d, z9.d +; CHECK-NEXT: fcmuo p3.d, p0/z, z18.d, z18.d +; CHECK-NEXT: mov z3.d, p2/m, z0.d +; CHECK-NEXT: st1b { z25.b }, p1, [x8, x11] +; CHECK-NEXT: fcmuo p2.d, p0/z, z10.d, z10.d +; CHECK-NEXT: mov z4.d, p4/m, z0.d +; CHECK-NEXT: fcmuo p4.d, p0/z, z12.d, z12.d +; CHECK-NEXT: st1b { z7.b }, p1, [x8, x10] +; CHECK-NEXT: mov z2.d, p6/m, z0.d +; CHECK-NEXT: st1b { z6.b }, p1, [x8, x9] +; CHECK-NEXT: fcmuo p1.d, p0/z, z13.d, z13.d +; CHECK-NEXT: fcmgt p6.d, p0/z, z27.d, z1.d +; CHECK-NEXT: mov z3.d, p3/m, #0 // =0x0 +; CHECK-NEXT: fcmuo p3.d, p0/z, z24.d, z24.d +; CHECK-NEXT: sel z1.d, p7, z0.d, z5.d +; CHECK-NEXT: mov z4.d, p2/m, #0 // =0x0 +; CHECK-NEXT: fcmuo p2.d, p0/z, z30.d, z30.d +; CHECK-NEXT: sel z5.d, p5, z0.d, z15.d +; CHECK-NEXT: mov z2.d, p4/m, #0 // =0x0 +; CHECK-NEXT: fcmuo p4.d, p0/z, z28.d, z28.d +; CHECK-NEXT: mov z29.d, p1/m, #0 // =0x0 +; CHECK-NEXT: fcmuo p1.d, p0/z, z27.d, z27.d +; CHECK-NEXT: sel z0.d, p6, z0.d, z8.d +; CHECK-NEXT: mov z1.d, p3/m, #0 // =0x0 +; CHECK-NEXT: st1d { z3.d }, p0, [x8, #7, mul vl] +; CHECK-NEXT: mov z5.d, p2/m, #0 // =0x0 +; CHECK-NEXT: st1d { z4.d }, p0, [x8, #6, mul vl] +; CHECK-NEXT: mov z26.d, p4/m, #0 // =0x0 +; CHECK-NEXT: st1d { z2.d }, p0, [x8, #5, mul vl] +; CHECK-NEXT: mov z0.d, p1/m, #0 // =0x0 +; CHECK-NEXT: st1d { z29.d }, p0, [x8, #4, mul vl] +; CHECK-NEXT: st1d { z1.d }, p0, [x8, #3, mul vl] +; CHECK-NEXT: st1d { z5.d }, p0, [x8, #2, mul vl] +; CHECK-NEXT: st1d { z26.d }, p0, [x8, #1, mul vl] +; CHECK-NEXT: st1d { z0.d }, p0, [x8] +; CHECK-NEXT: ldr z18, [sp, #1, mul vl] // 16-byte Folded Reload +; CHECK-NEXT: ldr z17, [sp, #2, mul vl] // 16-byte Folded Reload +; CHECK-NEXT: ldr z16, [sp, #3, mul vl] // 16-byte Folded Reload +; CHECK-NEXT: ldr z15, [sp, #4, mul vl] // 16-byte Folded Reload +; CHECK-NEXT: ldr z14, [sp, #5, mul vl] // 16-byte Folded Reload +; CHECK-NEXT: ldr z13, [sp, #6, mul vl] // 16-byte Folded Reload +; CHECK-NEXT: ldr z12, [sp, #7, mul vl] // 16-byte Folded Reload +; CHECK-NEXT: ldr z11, [sp, #8, mul vl] // 16-byte Folded Reload +; CHECK-NEXT: ldr z10, [sp, #9, mul vl] // 16-byte Folded Reload +; CHECK-NEXT: ldr z9, [sp, #10, mul vl] // 16-byte Folded Reload +; CHECK-NEXT: ldr z8, [sp, #11, mul vl] // 16-byte Folded Reload +; CHECK-NEXT: ldr p8, [sp, #3, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: ldr p7, [sp, #4, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: ldr p6, [sp, #5, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: ldr p5, [sp, #6, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: ldr p4, [sp, #7, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: addvl sp, sp, #12 +; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload ; CHECK-NEXT: ret %a = call @llvm.llrint.nxv32i64.nxv16f64( %x) ret %a diff --git a/llvm/test/CodeGen/AArch64/sve-lrint.ll b/llvm/test/CodeGen/AArch64/sve-lrint.ll index ce58e26ff8a75..d8415be01f463 100644 --- a/llvm/test/CodeGen/AArch64/sve-lrint.ll +++ b/llvm/test/CodeGen/AArch64/sve-lrint.ll @@ -1,602 +1,938 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc < %s -mtriple=aarch64 -mattr=+sve | FileCheck %s +; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=aarch64 -mattr=+sve |\ +; RUN: FileCheck --check-prefixes=CHECK %s -define @lrint_v1f16( %x) { +define @lrint_v1f16( %x) { ; CHECK-LABEL: lrint_v1f16: ; CHECK: // %bb.0: ; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: mov w8, #64511 // =0xfbff +; CHECK-NEXT: mov z2.d, #0x8000000000000000 +; CHECK-NEXT: mov z1.h, w8 +; CHECK-NEXT: mov w8, #31743 // =0x7bff ; CHECK-NEXT: frintx z0.h, p0/m, z0.h -; CHECK-NEXT: fcvtzs z0.d, p0/m, z0.h +; CHECK-NEXT: mov z3.h, w8 +; CHECK-NEXT: fcmge p1.h, p0/z, z0.h, z1.h +; CHECK-NEXT: movprfx z1, z0 +; CHECK-NEXT: fcvtzs z1.d, p0/m, z0.h +; CHECK-NEXT: fcmgt p2.h, p0/z, z0.h, z3.h +; CHECK-NEXT: mov z3.d, #0x7fffffffffffffff +; CHECK-NEXT: not p1.b, p0/z, p1.b +; CHECK-NEXT: fcmuo p0.h, p0/z, z0.h, z0.h +; CHECK-NEXT: mov z1.d, p1/m, z2.d +; CHECK-NEXT: sel z0.d, p2, z3.d, z1.d +; CHECK-NEXT: mov z0.d, p0/m, #0 // =0x0 ; CHECK-NEXT: ret - %a = call @llvm.lrint.nxv1i64.nxv1f16( %x) - ret %a + %a = call @llvm.lrint.nxv1iXLen.nxv1f16( %x) + ret %a } -declare @llvm.lrint.nxv1i64.nxv1f16() +declare @llvm.lrint.nxv1iXLen.nxv1f16() -define @lrint_v2f16( %x) { +define @lrint_v2f16( %x) { ; CHECK-LABEL: lrint_v2f16: ; CHECK: // %bb.0: ; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: mov w8, #64511 // =0xfbff +; CHECK-NEXT: mov z2.d, #0x8000000000000000 +; CHECK-NEXT: mov z1.h, w8 +; CHECK-NEXT: mov w8, #31743 // =0x7bff ; CHECK-NEXT: frintx z0.h, p0/m, z0.h -; CHECK-NEXT: fcvtzs z0.d, p0/m, z0.h +; CHECK-NEXT: mov z3.h, w8 +; CHECK-NEXT: fcmge p1.h, p0/z, z0.h, z1.h +; CHECK-NEXT: movprfx z1, z0 +; CHECK-NEXT: fcvtzs z1.d, p0/m, z0.h +; CHECK-NEXT: fcmgt p2.h, p0/z, z0.h, z3.h +; CHECK-NEXT: mov z3.d, #0x7fffffffffffffff +; CHECK-NEXT: not p1.b, p0/z, p1.b +; CHECK-NEXT: fcmuo p0.h, p0/z, z0.h, z0.h +; CHECK-NEXT: mov z1.d, p1/m, z2.d +; CHECK-NEXT: sel z0.d, p2, z3.d, z1.d +; CHECK-NEXT: mov z0.d, p0/m, #0 // =0x0 ; CHECK-NEXT: ret - %a = call @llvm.lrint.nxv2i64.nxv2f16( %x) - ret %a + %a = call @llvm.lrint.nxv2iXLen.nxv2f16( %x) + ret %a } -declare @llvm.lrint.nxv2i64.nxv2f16() +declare @llvm.lrint.nxv2iXLen.nxv2f16() -define @lrint_v4f16( %x) { +define @lrint_v4f16( %x) { ; CHECK-LABEL: lrint_v4f16: ; CHECK: // %bb.0: +; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: addvl sp, sp, #-1 +; CHECK-NEXT: str p4, [sp, #7, mul vl] // 2-byte Folded Spill +; CHECK-NEXT: .cfi_escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 8 * VG +; CHECK-NEXT: .cfi_offset w29, -16 ; CHECK-NEXT: uunpklo z1.d, z0.s ; CHECK-NEXT: uunpkhi z0.d, z0.s +; CHECK-NEXT: mov w8, #64511 // =0xfbff ; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: mov z2.h, w8 +; CHECK-NEXT: mov w8, #31743 // =0x7bff +; CHECK-NEXT: mov z3.h, w8 +; CHECK-NEXT: mov z6.d, #0x7fffffffffffffff ; CHECK-NEXT: frintx z1.h, p0/m, z1.h -; CHECK-NEXT: movprfx z2, z0 -; CHECK-NEXT: frintx z2.h, p0/m, z0.h -; CHECK-NEXT: movprfx z0, z1 -; CHECK-NEXT: fcvtzs z0.d, p0/m, z1.h -; CHECK-NEXT: movprfx z1, z2 -; CHECK-NEXT: fcvtzs z1.d, p0/m, z2.h +; CHECK-NEXT: frintx z0.h, p0/m, z0.h +; CHECK-NEXT: fcmge p1.h, p0/z, z1.h, z2.h +; CHECK-NEXT: fcmge p2.h, p0/z, z0.h, z2.h +; CHECK-NEXT: mov z2.d, #0x8000000000000000 +; CHECK-NEXT: movprfx z4, z1 +; CHECK-NEXT: fcvtzs z4.d, p0/m, z1.h +; CHECK-NEXT: movprfx z5, z0 +; CHECK-NEXT: fcvtzs z5.d, p0/m, z0.h +; CHECK-NEXT: fcmgt p3.h, p0/z, z1.h, z3.h +; CHECK-NEXT: fcmgt p4.h, p0/z, z0.h, z3.h +; CHECK-NEXT: not p1.b, p0/z, p1.b +; CHECK-NEXT: not p2.b, p0/z, p2.b +; CHECK-NEXT: sel z3.d, p1, z2.d, z4.d +; CHECK-NEXT: fcmuo p1.h, p0/z, z1.h, z1.h +; CHECK-NEXT: fcmuo p0.h, p0/z, z0.h, z0.h +; CHECK-NEXT: sel z2.d, p2, z2.d, z5.d +; CHECK-NEXT: sel z0.d, p3, z6.d, z3.d +; CHECK-NEXT: sel z1.d, p4, z6.d, z2.d +; CHECK-NEXT: ldr p4, [sp, #7, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: mov z0.d, p1/m, #0 // =0x0 +; CHECK-NEXT: mov z1.d, p0/m, #0 // =0x0 +; CHECK-NEXT: addvl sp, sp, #1 +; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload ; CHECK-NEXT: ret - %a = call @llvm.lrint.nxv4i64.nxv4f16( %x) - ret %a + %a = call @llvm.lrint.nxv4iXLen.nxv4f16( %x) + ret %a } -declare @llvm.lrint.nxv4i64.nxv4f16() +declare @llvm.lrint.nxv4iXLen.nxv4f16() -define @lrint_v8f16( %x) { +define @lrint_v8f16( %x) { ; CHECK-LABEL: lrint_v8f16: ; CHECK: // %bb.0: +; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: addvl sp, sp, #-1 +; CHECK-NEXT: str p7, [sp, #4, mul vl] // 2-byte Folded Spill +; CHECK-NEXT: str p6, [sp, #5, mul vl] // 2-byte Folded Spill +; CHECK-NEXT: str p5, [sp, #6, mul vl] // 2-byte Folded Spill +; CHECK-NEXT: str p4, [sp, #7, mul vl] // 2-byte Folded Spill +; CHECK-NEXT: .cfi_escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 8 * VG +; CHECK-NEXT: .cfi_offset w29, -16 ; CHECK-NEXT: uunpklo z1.s, z0.h ; CHECK-NEXT: uunpkhi z0.s, z0.h +; CHECK-NEXT: mov w8, #64511 // =0xfbff ; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: mov z4.h, w8 +; CHECK-NEXT: mov w8, #31743 // =0x7bff +; CHECK-NEXT: mov z6.h, w8 +; CHECK-NEXT: mov z26.d, #0x7fffffffffffffff ; CHECK-NEXT: uunpklo z2.d, z1.s ; CHECK-NEXT: uunpkhi z1.d, z1.s ; CHECK-NEXT: uunpklo z3.d, z0.s ; CHECK-NEXT: uunpkhi z0.d, z0.s -; CHECK-NEXT: frintx z1.h, p0/m, z1.h ; CHECK-NEXT: frintx z2.h, p0/m, z2.h +; CHECK-NEXT: frintx z1.h, p0/m, z1.h ; CHECK-NEXT: frintx z3.h, p0/m, z3.h -; CHECK-NEXT: movprfx z4, z0 -; CHECK-NEXT: frintx z4.h, p0/m, z0.h -; CHECK-NEXT: fcvtzs z1.d, p0/m, z1.h -; CHECK-NEXT: movprfx z0, z2 -; CHECK-NEXT: fcvtzs z0.d, p0/m, z2.h -; CHECK-NEXT: movprfx z2, z3 -; CHECK-NEXT: fcvtzs z2.d, p0/m, z3.h -; CHECK-NEXT: movprfx z3, z4 -; CHECK-NEXT: fcvtzs z3.d, p0/m, z4.h +; CHECK-NEXT: movprfx z5, z0 +; CHECK-NEXT: frintx z5.h, p0/m, z0.h +; CHECK-NEXT: mov z0.d, #0x8000000000000000 +; CHECK-NEXT: fcmge p1.h, p0/z, z2.h, z4.h +; CHECK-NEXT: fcmge p2.h, p0/z, z1.h, z4.h +; CHECK-NEXT: fcmge p3.h, p0/z, z3.h, z4.h +; CHECK-NEXT: fcmge p4.h, p0/z, z5.h, z4.h +; CHECK-NEXT: movprfx z4, z2 +; CHECK-NEXT: fcvtzs z4.d, p0/m, z2.h +; CHECK-NEXT: movprfx z7, z1 +; CHECK-NEXT: fcvtzs z7.d, p0/m, z1.h +; CHECK-NEXT: movprfx z24, z3 +; CHECK-NEXT: fcvtzs z24.d, p0/m, z3.h +; CHECK-NEXT: movprfx z25, z5 +; CHECK-NEXT: fcvtzs z25.d, p0/m, z5.h +; CHECK-NEXT: fcmgt p7.h, p0/z, z3.h, z6.h +; CHECK-NEXT: fcmgt p5.h, p0/z, z2.h, z6.h +; CHECK-NEXT: fcmgt p6.h, p0/z, z1.h, z6.h +; CHECK-NEXT: not p1.b, p0/z, p1.b +; CHECK-NEXT: not p2.b, p0/z, p2.b +; CHECK-NEXT: not p3.b, p0/z, p3.b +; CHECK-NEXT: mov z4.d, p1/m, z0.d +; CHECK-NEXT: fcmgt p1.h, p0/z, z5.h, z6.h +; CHECK-NEXT: not p4.b, p0/z, p4.b +; CHECK-NEXT: sel z6.d, p2, z0.d, z7.d +; CHECK-NEXT: fcmuo p2.h, p0/z, z2.h, z2.h +; CHECK-NEXT: sel z7.d, p3, z0.d, z24.d +; CHECK-NEXT: fcmuo p3.h, p0/z, z1.h, z1.h +; CHECK-NEXT: sel z24.d, p4, z0.d, z25.d +; CHECK-NEXT: fcmuo p4.h, p0/z, z3.h, z3.h +; CHECK-NEXT: fcmuo p0.h, p0/z, z5.h, z5.h +; CHECK-NEXT: sel z0.d, p5, z26.d, z4.d +; CHECK-NEXT: sel z1.d, p6, z26.d, z6.d +; CHECK-NEXT: ldr p6, [sp, #5, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: sel z2.d, p7, z26.d, z7.d +; CHECK-NEXT: ldr p7, [sp, #4, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: sel z3.d, p1, z26.d, z24.d +; CHECK-NEXT: ldr p5, [sp, #6, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: mov z0.d, p2/m, #0 // =0x0 +; CHECK-NEXT: mov z1.d, p3/m, #0 // =0x0 +; CHECK-NEXT: mov z2.d, p4/m, #0 // =0x0 +; CHECK-NEXT: ldr p4, [sp, #7, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: mov z3.d, p0/m, #0 // =0x0 +; CHECK-NEXT: addvl sp, sp, #1 +; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload ; CHECK-NEXT: ret - %a = call @llvm.lrint.nxv8i64.nxv8f16( %x) - ret %a + %a = call @llvm.lrint.nxv8iXLen.nxv8f16( %x) + ret %a } -declare @llvm.lrint.nxv8i64.nxv8f16() +declare @llvm.lrint.nxv8iXLen.nxv8f16() -define @lrint_v16i64_v16f16( %x) { -; CHECK-LABEL: lrint_v16i64_v16f16: -; CHECK: // %bb.0: -; CHECK-NEXT: uunpklo z2.s, z0.h -; CHECK-NEXT: uunpkhi z0.s, z0.h -; CHECK-NEXT: uunpklo z3.s, z1.h -; CHECK-NEXT: uunpkhi z1.s, z1.h -; CHECK-NEXT: ptrue p0.d -; CHECK-NEXT: uunpklo z4.d, z2.s -; CHECK-NEXT: uunpkhi z2.d, z2.s -; CHECK-NEXT: uunpklo z5.d, z0.s -; CHECK-NEXT: uunpkhi z0.d, z0.s -; CHECK-NEXT: uunpklo z6.d, z3.s -; CHECK-NEXT: uunpkhi z3.d, z3.s -; CHECK-NEXT: uunpklo z7.d, z1.s -; CHECK-NEXT: uunpkhi z1.d, z1.s -; CHECK-NEXT: frintx z4.h, p0/m, z4.h -; CHECK-NEXT: frintx z2.h, p0/m, z2.h -; CHECK-NEXT: frintx z5.h, p0/m, z5.h -; CHECK-NEXT: movprfx z24, z0 -; CHECK-NEXT: frintx z24.h, p0/m, z0.h -; CHECK-NEXT: frintx z6.h, p0/m, z6.h -; CHECK-NEXT: movprfx z25, z3 -; CHECK-NEXT: frintx z25.h, p0/m, z3.h -; CHECK-NEXT: frintx z7.h, p0/m, z7.h -; CHECK-NEXT: movprfx z26, z1 -; CHECK-NEXT: frintx z26.h, p0/m, z1.h -; CHECK-NEXT: movprfx z0, z4 -; CHECK-NEXT: fcvtzs z0.d, p0/m, z4.h -; CHECK-NEXT: movprfx z1, z2 -; CHECK-NEXT: fcvtzs z1.d, p0/m, z2.h -; CHECK-NEXT: movprfx z2, z5 -; CHECK-NEXT: fcvtzs z2.d, p0/m, z5.h -; CHECK-NEXT: movprfx z3, z24 -; CHECK-NEXT: fcvtzs z3.d, p0/m, z24.h -; CHECK-NEXT: movprfx z4, z6 -; CHECK-NEXT: fcvtzs z4.d, p0/m, z6.h -; CHECK-NEXT: movprfx z5, z25 -; CHECK-NEXT: fcvtzs z5.d, p0/m, z25.h -; CHECK-NEXT: movprfx z6, z7 -; CHECK-NEXT: fcvtzs z6.d, p0/m, z7.h -; CHECK-NEXT: movprfx z7, z26 -; CHECK-NEXT: fcvtzs z7.d, p0/m, z26.h -; CHECK-NEXT: ret - %a = call @llvm.lrint.nxv16i64.nxv16f16( %x) - ret %a +define @lrint_v16iXLen_v16f16( %x) { + %a = call @llvm.lrint.nxv16iXLen.nxv16f16( %x) + ret %a } -declare @llvm.lrint.nxv16i64.nxv16f16() +declare @llvm.lrint.nxv16iXLen.nxv16f16() -define @lrint_v32i64_v32f16( %x) { -; CHECK-LABEL: lrint_v32i64_v32f16: -; CHECK: // %bb.0: -; CHECK-NEXT: uunpkhi z4.s, z3.h -; CHECK-NEXT: uunpklo z3.s, z3.h -; CHECK-NEXT: rdvl x9, #15 -; CHECK-NEXT: ptrue p0.d -; CHECK-NEXT: uunpkhi z7.s, z2.h -; CHECK-NEXT: uunpklo z2.s, z2.h -; CHECK-NEXT: ptrue p1.b -; CHECK-NEXT: uunpkhi z5.d, z4.s -; CHECK-NEXT: uunpklo z4.d, z4.s -; CHECK-NEXT: uunpkhi z6.d, z3.s -; CHECK-NEXT: uunpklo z3.d, z3.s -; CHECK-NEXT: uunpkhi z24.d, z7.s -; CHECK-NEXT: uunpklo z7.d, z7.s -; CHECK-NEXT: frintx z5.h, p0/m, z5.h -; CHECK-NEXT: frintx z4.h, p0/m, z4.h -; CHECK-NEXT: frintx z6.h, p0/m, z6.h -; CHECK-NEXT: frintx z3.h, p0/m, z3.h -; CHECK-NEXT: frintx z24.h, p0/m, z24.h -; CHECK-NEXT: frintx z7.h, p0/m, z7.h -; CHECK-NEXT: fcvtzs z5.d, p0/m, z5.h -; CHECK-NEXT: fcvtzs z4.d, p0/m, z4.h -; CHECK-NEXT: fcvtzs z6.d, p0/m, z6.h -; CHECK-NEXT: fcvtzs z3.d, p0/m, z3.h -; CHECK-NEXT: fcvtzs z7.d, p0/m, z7.h -; CHECK-NEXT: st1b { z5.b }, p1, [x8, x9] -; CHECK-NEXT: rdvl x9, #14 -; CHECK-NEXT: uunpkhi z5.s, z1.h -; CHECK-NEXT: st1b { z4.b }, p1, [x8, x9] -; CHECK-NEXT: uunpkhi z4.d, z2.s -; CHECK-NEXT: rdvl x9, #13 -; CHECK-NEXT: st1b { z6.b }, p1, [x8, x9] -; CHECK-NEXT: rdvl x9, #12 -; CHECK-NEXT: movprfx z6, z24 -; CHECK-NEXT: fcvtzs z6.d, p0/m, z24.h -; CHECK-NEXT: uunpklo z1.s, z1.h -; CHECK-NEXT: uunpklo z2.d, z2.s -; CHECK-NEXT: uunpkhi z24.s, z0.h -; CHECK-NEXT: st1b { z3.b }, p1, [x8, x9] -; CHECK-NEXT: uunpklo z3.d, z5.s -; CHECK-NEXT: uunpklo z0.s, z0.h -; CHECK-NEXT: frintx z4.h, p0/m, z4.h -; CHECK-NEXT: rdvl x9, #11 -; CHECK-NEXT: uunpkhi z25.d, z5.s -; CHECK-NEXT: st1b { z6.b }, p1, [x8, x9] -; CHECK-NEXT: rdvl x9, #10 -; CHECK-NEXT: uunpkhi z5.d, z1.s -; CHECK-NEXT: uunpklo z1.d, z1.s -; CHECK-NEXT: frintx z2.h, p0/m, z2.h -; CHECK-NEXT: uunpkhi z6.d, z24.s -; CHECK-NEXT: uunpklo z24.d, z24.s -; CHECK-NEXT: frintx z3.h, p0/m, z3.h -; CHECK-NEXT: fcvtzs z4.d, p0/m, z4.h -; CHECK-NEXT: st1b { z7.b }, p1, [x8, x9] -; CHECK-NEXT: uunpkhi z7.d, z0.s -; CHECK-NEXT: uunpklo z0.d, z0.s -; CHECK-NEXT: rdvl x9, #9 -; CHECK-NEXT: frintx z25.h, p0/m, z25.h -; CHECK-NEXT: frintx z5.h, p0/m, z5.h -; CHECK-NEXT: frintx z1.h, p0/m, z1.h -; CHECK-NEXT: fcvtzs z2.d, p0/m, z2.h -; CHECK-NEXT: frintx z6.h, p0/m, z6.h -; CHECK-NEXT: fcvtzs z3.d, p0/m, z3.h -; CHECK-NEXT: st1b { z4.b }, p1, [x8, x9] -; CHECK-NEXT: movprfx z4, z24 -; CHECK-NEXT: frintx z4.h, p0/m, z24.h -; CHECK-NEXT: frintx z7.h, p0/m, z7.h -; CHECK-NEXT: frintx z0.h, p0/m, z0.h -; CHECK-NEXT: rdvl x9, #8 -; CHECK-NEXT: fcvtzs z25.d, p0/m, z25.h -; CHECK-NEXT: fcvtzs z5.d, p0/m, z5.h -; CHECK-NEXT: st1b { z2.b }, p1, [x8, x9] -; CHECK-NEXT: fcvtzs z1.d, p0/m, z1.h -; CHECK-NEXT: movprfx z2, z6 -; CHECK-NEXT: fcvtzs z2.d, p0/m, z6.h -; CHECK-NEXT: st1d { z3.d }, p0, [x8, #6, mul vl] -; CHECK-NEXT: movprfx z3, z4 -; CHECK-NEXT: fcvtzs z3.d, p0/m, z4.h -; CHECK-NEXT: movprfx z4, z7 -; CHECK-NEXT: fcvtzs z4.d, p0/m, z7.h -; CHECK-NEXT: fcvtzs z0.d, p0/m, z0.h -; CHECK-NEXT: st1d { z25.d }, p0, [x8, #7, mul vl] -; CHECK-NEXT: st1d { z5.d }, p0, [x8, #5, mul vl] -; CHECK-NEXT: st1d { z1.d }, p0, [x8, #4, mul vl] -; CHECK-NEXT: st1d { z2.d }, p0, [x8, #3, mul vl] -; CHECK-NEXT: st1d { z3.d }, p0, [x8, #2, mul vl] -; CHECK-NEXT: st1d { z4.d }, p0, [x8, #1, mul vl] -; CHECK-NEXT: st1d { z0.d }, p0, [x8] -; CHECK-NEXT: ret - %a = call @llvm.lrint.nxv32i64.nxv32f16( %x) - ret %a +define @lrint_v32iXLen_v32f16( %x) { + %a = call @llvm.lrint.nxv32iXLen.nxv32f16( %x) + ret %a } -declare @llvm.lrint.nxv32i64.nxv32f16() +declare @llvm.lrint.nxv32iXLen.nxv32f16() -define @lrint_v1f32( %x) { +define @lrint_v1f32( %x) { ; CHECK-LABEL: lrint_v1f32: ; CHECK: // %bb.0: ; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: mov w8, #-553648128 // =0xdf000000 +; CHECK-NEXT: mov z2.d, #0x8000000000000000 +; CHECK-NEXT: mov z1.s, w8 +; CHECK-NEXT: mov w8, #1593835519 // =0x5effffff ; CHECK-NEXT: frintx z0.s, p0/m, z0.s -; CHECK-NEXT: fcvtzs z0.d, p0/m, z0.s +; CHECK-NEXT: mov z3.s, w8 +; CHECK-NEXT: fcmge p1.s, p0/z, z0.s, z1.s +; CHECK-NEXT: movprfx z1, z0 +; CHECK-NEXT: fcvtzs z1.d, p0/m, z0.s +; CHECK-NEXT: fcmgt p2.s, p0/z, z0.s, z3.s +; CHECK-NEXT: mov z3.d, #0x7fffffffffffffff +; CHECK-NEXT: not p1.b, p0/z, p1.b +; CHECK-NEXT: fcmuo p0.s, p0/z, z0.s, z0.s +; CHECK-NEXT: mov z1.d, p1/m, z2.d +; CHECK-NEXT: sel z0.d, p2, z3.d, z1.d +; CHECK-NEXT: mov z0.d, p0/m, #0 // =0x0 ; CHECK-NEXT: ret - %a = call @llvm.lrint.nxv1i64.nxv1f32( %x) - ret %a + %a = call @llvm.lrint.nxv1iXLen.nxv1f32( %x) + ret %a } -declare @llvm.lrint.nxv1i64.nxv1f32() +declare @llvm.lrint.nxv1iXLen.nxv1f32() -define @lrint_v2f32( %x) { +define @lrint_v2f32( %x) { ; CHECK-LABEL: lrint_v2f32: ; CHECK: // %bb.0: ; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: mov w8, #-553648128 // =0xdf000000 +; CHECK-NEXT: mov z2.d, #0x8000000000000000 +; CHECK-NEXT: mov z1.s, w8 +; CHECK-NEXT: mov w8, #1593835519 // =0x5effffff ; CHECK-NEXT: frintx z0.s, p0/m, z0.s -; CHECK-NEXT: fcvtzs z0.d, p0/m, z0.s +; CHECK-NEXT: mov z3.s, w8 +; CHECK-NEXT: fcmge p1.s, p0/z, z0.s, z1.s +; CHECK-NEXT: movprfx z1, z0 +; CHECK-NEXT: fcvtzs z1.d, p0/m, z0.s +; CHECK-NEXT: fcmgt p2.s, p0/z, z0.s, z3.s +; CHECK-NEXT: mov z3.d, #0x7fffffffffffffff +; CHECK-NEXT: not p1.b, p0/z, p1.b +; CHECK-NEXT: fcmuo p0.s, p0/z, z0.s, z0.s +; CHECK-NEXT: mov z1.d, p1/m, z2.d +; CHECK-NEXT: sel z0.d, p2, z3.d, z1.d +; CHECK-NEXT: mov z0.d, p0/m, #0 // =0x0 ; CHECK-NEXT: ret - %a = call @llvm.lrint.nxv2i64.nxv2f32( %x) - ret %a + %a = call @llvm.lrint.nxv2iXLen.nxv2f32( %x) + ret %a } -declare @llvm.lrint.nxv2i64.nxv2f32() +declare @llvm.lrint.nxv2iXLen.nxv2f32() -define @lrint_v4f32( %x) { +define @lrint_v4f32( %x) { ; CHECK-LABEL: lrint_v4f32: ; CHECK: // %bb.0: +; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: addvl sp, sp, #-1 +; CHECK-NEXT: str p4, [sp, #7, mul vl] // 2-byte Folded Spill +; CHECK-NEXT: .cfi_escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 8 * VG +; CHECK-NEXT: .cfi_offset w29, -16 ; CHECK-NEXT: uunpklo z1.d, z0.s ; CHECK-NEXT: uunpkhi z0.d, z0.s +; CHECK-NEXT: mov w8, #-553648128 // =0xdf000000 ; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: mov z2.s, w8 +; CHECK-NEXT: mov w8, #1593835519 // =0x5effffff +; CHECK-NEXT: mov z3.s, w8 +; CHECK-NEXT: mov z6.d, #0x7fffffffffffffff ; CHECK-NEXT: frintx z1.s, p0/m, z1.s -; CHECK-NEXT: movprfx z2, z0 -; CHECK-NEXT: frintx z2.s, p0/m, z0.s -; CHECK-NEXT: movprfx z0, z1 -; CHECK-NEXT: fcvtzs z0.d, p0/m, z1.s -; CHECK-NEXT: movprfx z1, z2 -; CHECK-NEXT: fcvtzs z1.d, p0/m, z2.s +; CHECK-NEXT: frintx z0.s, p0/m, z0.s +; CHECK-NEXT: fcmge p1.s, p0/z, z1.s, z2.s +; CHECK-NEXT: fcmge p2.s, p0/z, z0.s, z2.s +; CHECK-NEXT: mov z2.d, #0x8000000000000000 +; CHECK-NEXT: movprfx z4, z1 +; CHECK-NEXT: fcvtzs z4.d, p0/m, z1.s +; CHECK-NEXT: movprfx z5, z0 +; CHECK-NEXT: fcvtzs z5.d, p0/m, z0.s +; CHECK-NEXT: fcmgt p3.s, p0/z, z1.s, z3.s +; CHECK-NEXT: fcmgt p4.s, p0/z, z0.s, z3.s +; CHECK-NEXT: not p1.b, p0/z, p1.b +; CHECK-NEXT: not p2.b, p0/z, p2.b +; CHECK-NEXT: sel z3.d, p1, z2.d, z4.d +; CHECK-NEXT: fcmuo p1.s, p0/z, z1.s, z1.s +; CHECK-NEXT: fcmuo p0.s, p0/z, z0.s, z0.s +; CHECK-NEXT: sel z2.d, p2, z2.d, z5.d +; CHECK-NEXT: sel z0.d, p3, z6.d, z3.d +; CHECK-NEXT: sel z1.d, p4, z6.d, z2.d +; CHECK-NEXT: ldr p4, [sp, #7, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: mov z0.d, p1/m, #0 // =0x0 +; CHECK-NEXT: mov z1.d, p0/m, #0 // =0x0 +; CHECK-NEXT: addvl sp, sp, #1 +; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload ; CHECK-NEXT: ret - %a = call @llvm.lrint.nxv4i64.nxv4f32( %x) - ret %a + %a = call @llvm.lrint.nxv4iXLen.nxv4f32( %x) + ret %a } -declare @llvm.lrint.nxv4i64.nxv4f32() +declare @llvm.lrint.nxv4iXLen.nxv4f32() -define @lrint_v8f32( %x) { +define @lrint_v8f32( %x) { ; CHECK-LABEL: lrint_v8f32: ; CHECK: // %bb.0: +; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: addvl sp, sp, #-1 +; CHECK-NEXT: str p7, [sp, #4, mul vl] // 2-byte Folded Spill +; CHECK-NEXT: str p6, [sp, #5, mul vl] // 2-byte Folded Spill +; CHECK-NEXT: str p5, [sp, #6, mul vl] // 2-byte Folded Spill +; CHECK-NEXT: str p4, [sp, #7, mul vl] // 2-byte Folded Spill +; CHECK-NEXT: .cfi_escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 8 * VG +; CHECK-NEXT: .cfi_offset w29, -16 ; CHECK-NEXT: uunpklo z2.d, z0.s ; CHECK-NEXT: uunpkhi z0.d, z0.s +; CHECK-NEXT: mov w8, #-553648128 // =0xdf000000 ; CHECK-NEXT: uunpklo z3.d, z1.s -; CHECK-NEXT: uunpkhi z1.d, z1.s ; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: uunpkhi z1.d, z1.s +; CHECK-NEXT: mov z4.s, w8 +; CHECK-NEXT: mov w8, #1593835519 // =0x5effffff +; CHECK-NEXT: mov z5.d, #0x8000000000000000 +; CHECK-NEXT: mov z6.s, w8 +; CHECK-NEXT: mov z26.d, #0x7fffffffffffffff ; CHECK-NEXT: frintx z2.s, p0/m, z2.s -; CHECK-NEXT: movprfx z4, z0 -; CHECK-NEXT: frintx z4.s, p0/m, z0.s +; CHECK-NEXT: frintx z0.s, p0/m, z0.s ; CHECK-NEXT: frintx z3.s, p0/m, z3.s -; CHECK-NEXT: movprfx z5, z1 -; CHECK-NEXT: frintx z5.s, p0/m, z1.s -; CHECK-NEXT: movprfx z0, z2 -; CHECK-NEXT: fcvtzs z0.d, p0/m, z2.s -; CHECK-NEXT: movprfx z1, z4 -; CHECK-NEXT: fcvtzs z1.d, p0/m, z4.s -; CHECK-NEXT: movprfx z2, z3 -; CHECK-NEXT: fcvtzs z2.d, p0/m, z3.s -; CHECK-NEXT: movprfx z3, z5 -; CHECK-NEXT: fcvtzs z3.d, p0/m, z5.s +; CHECK-NEXT: frintx z1.s, p0/m, z1.s +; CHECK-NEXT: fcmge p1.s, p0/z, z2.s, z4.s +; CHECK-NEXT: fcmge p2.s, p0/z, z0.s, z4.s +; CHECK-NEXT: movprfx z7, z0 +; CHECK-NEXT: fcvtzs z7.d, p0/m, z0.s +; CHECK-NEXT: fcmge p3.s, p0/z, z3.s, z4.s +; CHECK-NEXT: fcmge p4.s, p0/z, z1.s, z4.s +; CHECK-NEXT: movprfx z4, z2 +; CHECK-NEXT: fcvtzs z4.d, p0/m, z2.s +; CHECK-NEXT: movprfx z24, z3 +; CHECK-NEXT: fcvtzs z24.d, p0/m, z3.s +; CHECK-NEXT: movprfx z25, z1 +; CHECK-NEXT: fcvtzs z25.d, p0/m, z1.s +; CHECK-NEXT: fcmgt p7.s, p0/z, z3.s, z6.s +; CHECK-NEXT: fcmgt p5.s, p0/z, z2.s, z6.s +; CHECK-NEXT: fcmgt p6.s, p0/z, z0.s, z6.s +; CHECK-NEXT: not p1.b, p0/z, p1.b +; CHECK-NEXT: not p2.b, p0/z, p2.b +; CHECK-NEXT: not p3.b, p0/z, p3.b +; CHECK-NEXT: mov z4.d, p1/m, z5.d +; CHECK-NEXT: fcmgt p1.s, p0/z, z1.s, z6.s +; CHECK-NEXT: not p4.b, p0/z, p4.b +; CHECK-NEXT: sel z6.d, p2, z5.d, z7.d +; CHECK-NEXT: fcmuo p2.s, p0/z, z2.s, z2.s +; CHECK-NEXT: sel z7.d, p3, z5.d, z24.d +; CHECK-NEXT: fcmuo p3.s, p0/z, z0.s, z0.s +; CHECK-NEXT: sel z5.d, p4, z5.d, z25.d +; CHECK-NEXT: fcmuo p4.s, p0/z, z3.s, z3.s +; CHECK-NEXT: fcmuo p0.s, p0/z, z1.s, z1.s +; CHECK-NEXT: sel z0.d, p5, z26.d, z4.d +; CHECK-NEXT: sel z1.d, p6, z26.d, z6.d +; CHECK-NEXT: ldr p6, [sp, #5, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: sel z2.d, p7, z26.d, z7.d +; CHECK-NEXT: ldr p7, [sp, #4, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: sel z3.d, p1, z26.d, z5.d +; CHECK-NEXT: ldr p5, [sp, #6, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: mov z0.d, p2/m, #0 // =0x0 +; CHECK-NEXT: mov z1.d, p3/m, #0 // =0x0 +; CHECK-NEXT: mov z2.d, p4/m, #0 // =0x0 +; CHECK-NEXT: ldr p4, [sp, #7, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: mov z3.d, p0/m, #0 // =0x0 +; CHECK-NEXT: addvl sp, sp, #1 +; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload ; CHECK-NEXT: ret - %a = call @llvm.lrint.nxv8i64.nxv8f32( %x) - ret %a + %a = call @llvm.lrint.nxv8iXLen.nxv8f32( %x) + ret %a } -declare @llvm.lrint.nxv8i64.nxv8f32() +declare @llvm.lrint.nxv8iXLen.nxv8f32() -define @lrint_v16i64_v16f32( %x) { -; CHECK-LABEL: lrint_v16i64_v16f32: -; CHECK: // %bb.0: -; CHECK-NEXT: uunpklo z4.d, z0.s -; CHECK-NEXT: uunpkhi z0.d, z0.s -; CHECK-NEXT: uunpklo z5.d, z1.s -; CHECK-NEXT: uunpkhi z1.d, z1.s -; CHECK-NEXT: uunpklo z6.d, z2.s -; CHECK-NEXT: uunpkhi z2.d, z2.s -; CHECK-NEXT: uunpklo z7.d, z3.s -; CHECK-NEXT: uunpkhi z3.d, z3.s -; CHECK-NEXT: ptrue p0.d -; CHECK-NEXT: frintx z4.s, p0/m, z4.s -; CHECK-NEXT: movprfx z24, z0 -; CHECK-NEXT: frintx z24.s, p0/m, z0.s -; CHECK-NEXT: frintx z5.s, p0/m, z5.s -; CHECK-NEXT: movprfx z25, z1 -; CHECK-NEXT: frintx z25.s, p0/m, z1.s -; CHECK-NEXT: frintx z6.s, p0/m, z6.s -; CHECK-NEXT: movprfx z26, z2 -; CHECK-NEXT: frintx z26.s, p0/m, z2.s -; CHECK-NEXT: frintx z7.s, p0/m, z7.s -; CHECK-NEXT: movprfx z27, z3 -; CHECK-NEXT: frintx z27.s, p0/m, z3.s -; CHECK-NEXT: movprfx z0, z4 -; CHECK-NEXT: fcvtzs z0.d, p0/m, z4.s -; CHECK-NEXT: movprfx z1, z24 -; CHECK-NEXT: fcvtzs z1.d, p0/m, z24.s -; CHECK-NEXT: movprfx z2, z5 -; CHECK-NEXT: fcvtzs z2.d, p0/m, z5.s -; CHECK-NEXT: movprfx z3, z25 -; CHECK-NEXT: fcvtzs z3.d, p0/m, z25.s -; CHECK-NEXT: movprfx z4, z6 -; CHECK-NEXT: fcvtzs z4.d, p0/m, z6.s -; CHECK-NEXT: movprfx z5, z26 -; CHECK-NEXT: fcvtzs z5.d, p0/m, z26.s -; CHECK-NEXT: movprfx z6, z7 -; CHECK-NEXT: fcvtzs z6.d, p0/m, z7.s -; CHECK-NEXT: movprfx z7, z27 -; CHECK-NEXT: fcvtzs z7.d, p0/m, z27.s -; CHECK-NEXT: ret - %a = call @llvm.lrint.nxv16i64.nxv16f32( %x) - ret %a +define @lrint_v16iXLen_v16f32( %x) { + %a = call @llvm.lrint.nxv16iXLen.nxv16f32( %x) + ret %a } -declare @llvm.lrint.nxv16i64.nxv16f32() +declare @llvm.lrint.nxv16iXLen.nxv16f32() -define @lrint_v32i64_v32f32( %x) { -; CHECK-LABEL: lrint_v32i64_v32f32: -; CHECK: // %bb.0: -; CHECK-NEXT: uunpkhi z24.d, z7.s -; CHECK-NEXT: ptrue p0.d -; CHECK-NEXT: rdvl x9, #15 -; CHECK-NEXT: uunpklo z7.d, z7.s -; CHECK-NEXT: uunpkhi z25.d, z6.s -; CHECK-NEXT: ptrue p1.b -; CHECK-NEXT: uunpklo z6.d, z6.s -; CHECK-NEXT: uunpkhi z27.d, z4.s -; CHECK-NEXT: uunpklo z4.d, z4.s -; CHECK-NEXT: uunpklo z29.d, z3.s -; CHECK-NEXT: uunpkhi z3.d, z3.s -; CHECK-NEXT: uunpklo z26.d, z1.s -; CHECK-NEXT: frintx z24.s, p0/m, z24.s -; CHECK-NEXT: uunpklo z28.d, z2.s -; CHECK-NEXT: uunpkhi z2.d, z2.s -; CHECK-NEXT: frintx z7.s, p0/m, z7.s -; CHECK-NEXT: frintx z25.s, p0/m, z25.s -; CHECK-NEXT: uunpkhi z1.d, z1.s -; CHECK-NEXT: frintx z6.s, p0/m, z6.s -; CHECK-NEXT: frintx z27.s, p0/m, z27.s -; CHECK-NEXT: frintx z4.s, p0/m, z4.s -; CHECK-NEXT: frintx z3.s, p0/m, z3.s -; CHECK-NEXT: frintx z26.s, p0/m, z26.s -; CHECK-NEXT: fcvtzs z24.d, p0/m, z24.s -; CHECK-NEXT: frintx z2.s, p0/m, z2.s -; CHECK-NEXT: fcvtzs z7.d, p0/m, z7.s -; CHECK-NEXT: fcvtzs z25.d, p0/m, z25.s -; CHECK-NEXT: frintx z1.s, p0/m, z1.s -; CHECK-NEXT: fcvtzs z6.d, p0/m, z6.s -; CHECK-NEXT: fcvtzs z4.d, p0/m, z4.s -; CHECK-NEXT: fcvtzs z3.d, p0/m, z3.s -; CHECK-NEXT: st1b { z24.b }, p1, [x8, x9] -; CHECK-NEXT: rdvl x9, #14 -; CHECK-NEXT: uunpkhi z24.d, z5.s -; CHECK-NEXT: st1b { z7.b }, p1, [x8, x9] -; CHECK-NEXT: rdvl x9, #13 -; CHECK-NEXT: uunpklo z5.d, z5.s -; CHECK-NEXT: st1b { z25.b }, p1, [x8, x9] -; CHECK-NEXT: rdvl x9, #12 -; CHECK-NEXT: movprfx z25, z27 -; CHECK-NEXT: fcvtzs z25.d, p0/m, z27.s -; CHECK-NEXT: st1b { z6.b }, p1, [x8, x9] -; CHECK-NEXT: rdvl x9, #11 -; CHECK-NEXT: movprfx z6, z29 -; CHECK-NEXT: frintx z6.s, p0/m, z29.s -; CHECK-NEXT: frintx z24.s, p0/m, z24.s -; CHECK-NEXT: uunpkhi z7.d, z0.s -; CHECK-NEXT: uunpklo z0.d, z0.s -; CHECK-NEXT: frintx z5.s, p0/m, z5.s -; CHECK-NEXT: fcvtzs z2.d, p0/m, z2.s -; CHECK-NEXT: fcvtzs z1.d, p0/m, z1.s -; CHECK-NEXT: fcvtzs z24.d, p0/m, z24.s -; CHECK-NEXT: frintx z7.s, p0/m, z7.s -; CHECK-NEXT: frintx z0.s, p0/m, z0.s -; CHECK-NEXT: fcvtzs z5.d, p0/m, z5.s -; CHECK-NEXT: st1b { z24.b }, p1, [x8, x9] -; CHECK-NEXT: movprfx z24, z28 -; CHECK-NEXT: frintx z24.s, p0/m, z28.s -; CHECK-NEXT: rdvl x9, #10 -; CHECK-NEXT: st1b { z5.b }, p1, [x8, x9] -; CHECK-NEXT: rdvl x9, #9 -; CHECK-NEXT: movprfx z5, z6 -; CHECK-NEXT: fcvtzs z5.d, p0/m, z6.s -; CHECK-NEXT: st1b { z25.b }, p1, [x8, x9] -; CHECK-NEXT: rdvl x9, #8 -; CHECK-NEXT: fcvtzs z0.d, p0/m, z0.s -; CHECK-NEXT: st1b { z4.b }, p1, [x8, x9] -; CHECK-NEXT: movprfx z4, z7 -; CHECK-NEXT: fcvtzs z4.d, p0/m, z7.s -; CHECK-NEXT: movprfx z6, z24 -; CHECK-NEXT: fcvtzs z6.d, p0/m, z24.s -; CHECK-NEXT: st1d { z3.d }, p0, [x8, #7, mul vl] -; CHECK-NEXT: movprfx z3, z26 -; CHECK-NEXT: fcvtzs z3.d, p0/m, z26.s -; CHECK-NEXT: st1d { z5.d }, p0, [x8, #6, mul vl] -; CHECK-NEXT: st1d { z2.d }, p0, [x8, #5, mul vl] -; CHECK-NEXT: st1d { z1.d }, p0, [x8, #3, mul vl] -; CHECK-NEXT: st1d { z6.d }, p0, [x8, #4, mul vl] -; CHECK-NEXT: st1d { z3.d }, p0, [x8, #2, mul vl] -; CHECK-NEXT: st1d { z4.d }, p0, [x8, #1, mul vl] -; CHECK-NEXT: st1d { z0.d }, p0, [x8] -; CHECK-NEXT: ret - %a = call @llvm.lrint.nxv32i64.nxv32f32( %x) - ret %a +define @lrint_v32iXLen_v32f32( %x) { + %a = call @llvm.lrint.nxv32iXLen.nxv32f32( %x) + ret %a } -declare @llvm.lrint.nxv32i64.nxv32f32() +declare @llvm.lrint.nxv32iXLen.nxv32f32() -define @lrint_v1f64( %x) { +define @lrint_v1f64( %x) { ; CHECK-LABEL: lrint_v1f64: ; CHECK: // %bb.0: ; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: mov x8, #-4332462841530417152 // =0xc3e0000000000000 +; CHECK-NEXT: mov z2.d, #0x8000000000000000 +; CHECK-NEXT: mov z1.d, x8 +; CHECK-NEXT: mov x8, #4890909195324358655 // =0x43dfffffffffffff ; CHECK-NEXT: frintx z0.d, p0/m, z0.d -; CHECK-NEXT: fcvtzs z0.d, p0/m, z0.d +; CHECK-NEXT: mov z3.d, x8 +; CHECK-NEXT: fcmge p1.d, p0/z, z0.d, z1.d +; CHECK-NEXT: movprfx z1, z0 +; CHECK-NEXT: fcvtzs z1.d, p0/m, z0.d +; CHECK-NEXT: fcmgt p2.d, p0/z, z0.d, z3.d +; CHECK-NEXT: mov z3.d, #0x7fffffffffffffff +; CHECK-NEXT: not p1.b, p0/z, p1.b +; CHECK-NEXT: fcmuo p0.d, p0/z, z0.d, z0.d +; CHECK-NEXT: mov z1.d, p1/m, z2.d +; CHECK-NEXT: sel z0.d, p2, z3.d, z1.d +; CHECK-NEXT: mov z0.d, p0/m, #0 // =0x0 ; CHECK-NEXT: ret - %a = call @llvm.lrint.nxv1i64.nxv1f64( %x) - ret %a + %a = call @llvm.lrint.nxv1iXLen.nxv1f64( %x) + ret %a } -declare @llvm.lrint.nxv1i64.nxv1f64() +declare @llvm.lrint.nxv1iXLen.nxv1f64() -define @lrint_v2f64( %x) { +define @lrint_v2f64( %x) { ; CHECK-LABEL: lrint_v2f64: ; CHECK: // %bb.0: ; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: mov x8, #-4332462841530417152 // =0xc3e0000000000000 +; CHECK-NEXT: mov z2.d, #0x8000000000000000 +; CHECK-NEXT: mov z1.d, x8 +; CHECK-NEXT: mov x8, #4890909195324358655 // =0x43dfffffffffffff ; CHECK-NEXT: frintx z0.d, p0/m, z0.d -; CHECK-NEXT: fcvtzs z0.d, p0/m, z0.d +; CHECK-NEXT: mov z3.d, x8 +; CHECK-NEXT: fcmge p1.d, p0/z, z0.d, z1.d +; CHECK-NEXT: movprfx z1, z0 +; CHECK-NEXT: fcvtzs z1.d, p0/m, z0.d +; CHECK-NEXT: fcmgt p2.d, p0/z, z0.d, z3.d +; CHECK-NEXT: mov z3.d, #0x7fffffffffffffff +; CHECK-NEXT: not p1.b, p0/z, p1.b +; CHECK-NEXT: fcmuo p0.d, p0/z, z0.d, z0.d +; CHECK-NEXT: mov z1.d, p1/m, z2.d +; CHECK-NEXT: sel z0.d, p2, z3.d, z1.d +; CHECK-NEXT: mov z0.d, p0/m, #0 // =0x0 ; CHECK-NEXT: ret - %a = call @llvm.lrint.nxv2i64.nxv2f64( %x) - ret %a + %a = call @llvm.lrint.nxv2iXLen.nxv2f64( %x) + ret %a } -declare @llvm.lrint.nxv2i64.nxv2f64() +declare @llvm.lrint.nxv2iXLen.nxv2f64() -define @lrint_v4f64( %x) { +define @lrint_v4f64( %x) { ; CHECK-LABEL: lrint_v4f64: ; CHECK: // %bb.0: +; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: addvl sp, sp, #-1 +; CHECK-NEXT: str p4, [sp, #7, mul vl] // 2-byte Folded Spill +; CHECK-NEXT: .cfi_escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 8 * VG +; CHECK-NEXT: .cfi_offset w29, -16 ; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: mov x8, #-4332462841530417152 // =0xc3e0000000000000 +; CHECK-NEXT: mov z6.d, #0x7fffffffffffffff +; CHECK-NEXT: mov z2.d, x8 +; CHECK-NEXT: mov x8, #4890909195324358655 // =0x43dfffffffffffff ; CHECK-NEXT: frintx z0.d, p0/m, z0.d ; CHECK-NEXT: frintx z1.d, p0/m, z1.d -; CHECK-NEXT: fcvtzs z0.d, p0/m, z0.d -; CHECK-NEXT: fcvtzs z1.d, p0/m, z1.d +; CHECK-NEXT: mov z3.d, x8 +; CHECK-NEXT: fcmge p1.d, p0/z, z0.d, z2.d +; CHECK-NEXT: fcmge p2.d, p0/z, z1.d, z2.d +; CHECK-NEXT: mov z2.d, #0x8000000000000000 +; CHECK-NEXT: movprfx z4, z0 +; CHECK-NEXT: fcvtzs z4.d, p0/m, z0.d +; CHECK-NEXT: movprfx z5, z1 +; CHECK-NEXT: fcvtzs z5.d, p0/m, z1.d +; CHECK-NEXT: fcmgt p3.d, p0/z, z0.d, z3.d +; CHECK-NEXT: fcmgt p4.d, p0/z, z1.d, z3.d +; CHECK-NEXT: not p1.b, p0/z, p1.b +; CHECK-NEXT: not p2.b, p0/z, p2.b +; CHECK-NEXT: sel z3.d, p1, z2.d, z4.d +; CHECK-NEXT: fcmuo p1.d, p0/z, z0.d, z0.d +; CHECK-NEXT: fcmuo p0.d, p0/z, z1.d, z1.d +; CHECK-NEXT: sel z2.d, p2, z2.d, z5.d +; CHECK-NEXT: sel z0.d, p3, z6.d, z3.d +; CHECK-NEXT: sel z1.d, p4, z6.d, z2.d +; CHECK-NEXT: ldr p4, [sp, #7, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: mov z0.d, p1/m, #0 // =0x0 +; CHECK-NEXT: mov z1.d, p0/m, #0 // =0x0 +; CHECK-NEXT: addvl sp, sp, #1 +; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload ; CHECK-NEXT: ret - %a = call @llvm.lrint.nxv4i64.nxv4f64( %x) - ret %a + %a = call @llvm.lrint.nxv4iXLen.nxv4f64( %x) + ret %a } -declare @llvm.lrint.nxv4i64.nxv4f64() +declare @llvm.lrint.nxv4iXLen.nxv4f64() -define @lrint_v8f64( %x) { +define @lrint_v8f64( %x) { ; CHECK-LABEL: lrint_v8f64: ; CHECK: // %bb.0: +; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: addvl sp, sp, #-1 +; CHECK-NEXT: str p7, [sp, #4, mul vl] // 2-byte Folded Spill +; CHECK-NEXT: str p6, [sp, #5, mul vl] // 2-byte Folded Spill +; CHECK-NEXT: str p5, [sp, #6, mul vl] // 2-byte Folded Spill +; CHECK-NEXT: str p4, [sp, #7, mul vl] // 2-byte Folded Spill +; CHECK-NEXT: .cfi_escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 8 * VG +; CHECK-NEXT: .cfi_offset w29, -16 ; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: mov x8, #-4332462841530417152 // =0xc3e0000000000000 +; CHECK-NEXT: mov z5.d, #0x8000000000000000 +; CHECK-NEXT: mov z4.d, x8 +; CHECK-NEXT: mov x8, #4890909195324358655 // =0x43dfffffffffffff +; CHECK-NEXT: mov z26.d, #0x7fffffffffffffff ; CHECK-NEXT: frintx z0.d, p0/m, z0.d ; CHECK-NEXT: frintx z1.d, p0/m, z1.d ; CHECK-NEXT: frintx z2.d, p0/m, z2.d ; CHECK-NEXT: frintx z3.d, p0/m, z3.d -; CHECK-NEXT: fcvtzs z0.d, p0/m, z0.d -; CHECK-NEXT: fcvtzs z1.d, p0/m, z1.d -; CHECK-NEXT: fcvtzs z2.d, p0/m, z2.d -; CHECK-NEXT: fcvtzs z3.d, p0/m, z3.d +; CHECK-NEXT: mov z6.d, x8 +; CHECK-NEXT: fcmge p1.d, p0/z, z0.d, z4.d +; CHECK-NEXT: fcmge p2.d, p0/z, z1.d, z4.d +; CHECK-NEXT: fcmge p3.d, p0/z, z2.d, z4.d +; CHECK-NEXT: fcmge p4.d, p0/z, z3.d, z4.d +; CHECK-NEXT: movprfx z4, z0 +; CHECK-NEXT: fcvtzs z4.d, p0/m, z0.d +; CHECK-NEXT: movprfx z7, z1 +; CHECK-NEXT: fcvtzs z7.d, p0/m, z1.d +; CHECK-NEXT: movprfx z24, z2 +; CHECK-NEXT: fcvtzs z24.d, p0/m, z2.d +; CHECK-NEXT: movprfx z25, z3 +; CHECK-NEXT: fcvtzs z25.d, p0/m, z3.d +; CHECK-NEXT: fcmgt p7.d, p0/z, z2.d, z6.d +; CHECK-NEXT: fcmgt p5.d, p0/z, z0.d, z6.d +; CHECK-NEXT: fcmgt p6.d, p0/z, z1.d, z6.d +; CHECK-NEXT: not p1.b, p0/z, p1.b +; CHECK-NEXT: not p2.b, p0/z, p2.b +; CHECK-NEXT: not p3.b, p0/z, p3.b +; CHECK-NEXT: mov z4.d, p1/m, z5.d +; CHECK-NEXT: fcmgt p1.d, p0/z, z3.d, z6.d +; CHECK-NEXT: not p4.b, p0/z, p4.b +; CHECK-NEXT: sel z6.d, p2, z5.d, z7.d +; CHECK-NEXT: fcmuo p2.d, p0/z, z0.d, z0.d +; CHECK-NEXT: sel z7.d, p3, z5.d, z24.d +; CHECK-NEXT: fcmuo p3.d, p0/z, z1.d, z1.d +; CHECK-NEXT: sel z5.d, p4, z5.d, z25.d +; CHECK-NEXT: fcmuo p4.d, p0/z, z2.d, z2.d +; CHECK-NEXT: fcmuo p0.d, p0/z, z3.d, z3.d +; CHECK-NEXT: sel z0.d, p5, z26.d, z4.d +; CHECK-NEXT: sel z1.d, p6, z26.d, z6.d +; CHECK-NEXT: ldr p6, [sp, #5, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: sel z2.d, p7, z26.d, z7.d +; CHECK-NEXT: ldr p7, [sp, #4, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: sel z3.d, p1, z26.d, z5.d +; CHECK-NEXT: ldr p5, [sp, #6, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: mov z0.d, p2/m, #0 // =0x0 +; CHECK-NEXT: mov z1.d, p3/m, #0 // =0x0 +; CHECK-NEXT: mov z2.d, p4/m, #0 // =0x0 +; CHECK-NEXT: ldr p4, [sp, #7, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: mov z3.d, p0/m, #0 // =0x0 +; CHECK-NEXT: addvl sp, sp, #1 +; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload ; CHECK-NEXT: ret - %a = call @llvm.lrint.nxv8i64.nxv8f64( %x) - ret %a + %a = call @llvm.lrint.nxv8iXLen.nxv8f64( %x) + ret %a } -declare @llvm.lrint.nxv8i64.nxv8f64() +declare @llvm.lrint.nxv8iXLen.nxv8f64() -define @lrint_v16f64( %x) { +define @lrint_v16f64( %x) { ; CHECK-LABEL: lrint_v16f64: ; CHECK: // %bb.0: +; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: addvl sp, sp, #-2 +; CHECK-NEXT: str p10, [sp, #1, mul vl] // 2-byte Folded Spill +; CHECK-NEXT: str p9, [sp, #2, mul vl] // 2-byte Folded Spill +; CHECK-NEXT: str p8, [sp, #3, mul vl] // 2-byte Folded Spill +; CHECK-NEXT: str p7, [sp, #4, mul vl] // 2-byte Folded Spill +; CHECK-NEXT: str p6, [sp, #5, mul vl] // 2-byte Folded Spill +; CHECK-NEXT: str p5, [sp, #6, mul vl] // 2-byte Folded Spill +; CHECK-NEXT: str p4, [sp, #7, mul vl] // 2-byte Folded Spill +; CHECK-NEXT: str z8, [sp, #1, mul vl] // 16-byte Folded Spill +; CHECK-NEXT: .cfi_escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 16 * VG +; CHECK-NEXT: .cfi_offset w29, -16 +; CHECK-NEXT: .cfi_escape 0x10, 0x48, 0x0a, 0x11, 0x70, 0x22, 0x11, 0x78, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d8 @ cfa - 16 - 8 * VG ; CHECK-NEXT: ptrue p0.d -; CHECK-NEXT: frintx z0.d, p0/m, z0.d -; CHECK-NEXT: frintx z1.d, p0/m, z1.d +; CHECK-NEXT: mov x8, #-4332462841530417152 // =0xc3e0000000000000 +; CHECK-NEXT: mov z24.d, #0x7fffffffffffffff +; CHECK-NEXT: mov z25.d, x8 +; CHECK-NEXT: mov x8, #4890909195324358655 // =0x43dfffffffffffff +; CHECK-NEXT: movprfx z26, z0 +; CHECK-NEXT: frintx z26.d, p0/m, z0.d +; CHECK-NEXT: movprfx z27, z1 +; CHECK-NEXT: frintx z27.d, p0/m, z1.d ; CHECK-NEXT: frintx z2.d, p0/m, z2.d +; CHECK-NEXT: mov z0.d, #0x8000000000000000 +; CHECK-NEXT: mov z1.d, x8 ; CHECK-NEXT: frintx z3.d, p0/m, z3.d -; CHECK-NEXT: frintx z4.d, p0/m, z4.d +; CHECK-NEXT: movprfx z28, z4 +; CHECK-NEXT: frintx z28.d, p0/m, z4.d ; CHECK-NEXT: frintx z5.d, p0/m, z5.d ; CHECK-NEXT: frintx z6.d, p0/m, z6.d ; CHECK-NEXT: frintx z7.d, p0/m, z7.d -; CHECK-NEXT: fcvtzs z0.d, p0/m, z0.d -; CHECK-NEXT: fcvtzs z1.d, p0/m, z1.d -; CHECK-NEXT: fcvtzs z2.d, p0/m, z2.d -; CHECK-NEXT: fcvtzs z3.d, p0/m, z3.d -; CHECK-NEXT: fcvtzs z4.d, p0/m, z4.d -; CHECK-NEXT: fcvtzs z5.d, p0/m, z5.d -; CHECK-NEXT: fcvtzs z6.d, p0/m, z6.d -; CHECK-NEXT: fcvtzs z7.d, p0/m, z7.d +; CHECK-NEXT: fcmge p1.d, p0/z, z26.d, z25.d +; CHECK-NEXT: fcmge p2.d, p0/z, z27.d, z25.d +; CHECK-NEXT: movprfx z4, z26 +; CHECK-NEXT: fcvtzs z4.d, p0/m, z26.d +; CHECK-NEXT: fcmge p5.d, p0/z, z2.d, z25.d +; CHECK-NEXT: movprfx z29, z27 +; CHECK-NEXT: fcvtzs z29.d, p0/m, z27.d +; CHECK-NEXT: fcmgt p3.d, p0/z, z26.d, z1.d +; CHECK-NEXT: fcmge p6.d, p0/z, z3.d, z25.d +; CHECK-NEXT: fcmge p8.d, p0/z, z5.d, z25.d +; CHECK-NEXT: fcmgt p7.d, p0/z, z27.d, z1.d +; CHECK-NEXT: fcmge p9.d, p0/z, z6.d, z25.d +; CHECK-NEXT: movprfx z30, z28 +; CHECK-NEXT: fcvtzs z30.d, p0/m, z28.d +; CHECK-NEXT: fcmge p10.d, p0/z, z7.d, z25.d +; CHECK-NEXT: not p4.b, p0/z, p1.b +; CHECK-NEXT: fcmuo p1.d, p0/z, z26.d, z26.d +; CHECK-NEXT: movprfx z26, z2 +; CHECK-NEXT: fcvtzs z26.d, p0/m, z2.d +; CHECK-NEXT: not p2.b, p0/z, p2.b +; CHECK-NEXT: movprfx z31, z6 +; CHECK-NEXT: fcvtzs z31.d, p0/m, z6.d +; CHECK-NEXT: movprfx z8, z7 +; CHECK-NEXT: fcvtzs z8.d, p0/m, z7.d +; CHECK-NEXT: mov z4.d, p4/m, z0.d +; CHECK-NEXT: fcmge p4.d, p0/z, z28.d, z25.d +; CHECK-NEXT: not p5.b, p0/z, p5.b +; CHECK-NEXT: mov z29.d, p2/m, z0.d +; CHECK-NEXT: fcmuo p2.d, p0/z, z27.d, z27.d +; CHECK-NEXT: movprfx z27, z3 +; CHECK-NEXT: fcvtzs z27.d, p0/m, z3.d +; CHECK-NEXT: sel z25.d, p5, z0.d, z26.d +; CHECK-NEXT: movprfx z26, z5 +; CHECK-NEXT: fcvtzs z26.d, p0/m, z5.d +; CHECK-NEXT: not p6.b, p0/z, p6.b +; CHECK-NEXT: not p5.b, p0/z, p8.b +; CHECK-NEXT: fcmgt p8.d, p0/z, z2.d, z1.d +; CHECK-NEXT: not p4.b, p0/z, p4.b +; CHECK-NEXT: mov z27.d, p6/m, z0.d +; CHECK-NEXT: not p6.b, p0/z, p9.b +; CHECK-NEXT: fcmuo p9.d, p0/z, z2.d, z2.d +; CHECK-NEXT: mov z30.d, p4/m, z0.d +; CHECK-NEXT: not p4.b, p0/z, p10.b +; CHECK-NEXT: fcmgt p10.d, p0/z, z3.d, z1.d +; CHECK-NEXT: mov z26.d, p5/m, z0.d +; CHECK-NEXT: fcmgt p5.d, p0/z, z28.d, z1.d +; CHECK-NEXT: mov z31.d, p6/m, z0.d +; CHECK-NEXT: mov z8.d, p4/m, z0.d +; CHECK-NEXT: sel z0.d, p3, z24.d, z4.d +; CHECK-NEXT: fcmgt p3.d, p0/z, z5.d, z1.d +; CHECK-NEXT: fcmgt p4.d, p0/z, z6.d, z1.d +; CHECK-NEXT: fcmgt p6.d, p0/z, z7.d, z1.d +; CHECK-NEXT: sel z1.d, p7, z24.d, z29.d +; CHECK-NEXT: fcmuo p7.d, p0/z, z3.d, z3.d +; CHECK-NEXT: sel z2.d, p8, z24.d, z25.d +; CHECK-NEXT: sel z3.d, p10, z24.d, z27.d +; CHECK-NEXT: sel z4.d, p5, z24.d, z30.d +; CHECK-NEXT: fcmuo p5.d, p0/z, z28.d, z28.d +; CHECK-NEXT: fcmuo p8.d, p0/z, z5.d, z5.d +; CHECK-NEXT: fcmuo p10.d, p0/z, z6.d, z6.d +; CHECK-NEXT: sel z5.d, p3, z24.d, z26.d +; CHECK-NEXT: fcmuo p0.d, p0/z, z7.d, z7.d +; CHECK-NEXT: sel z6.d, p4, z24.d, z31.d +; CHECK-NEXT: ldr p4, [sp, #7, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: sel z7.d, p6, z24.d, z8.d +; CHECK-NEXT: ldr z8, [sp, #1, mul vl] // 16-byte Folded Reload +; CHECK-NEXT: ldr p6, [sp, #5, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: mov z2.d, p9/m, #0 // =0x0 +; CHECK-NEXT: ldr p9, [sp, #2, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: mov z3.d, p7/m, #0 // =0x0 +; CHECK-NEXT: mov z4.d, p5/m, #0 // =0x0 +; CHECK-NEXT: ldr p7, [sp, #4, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: mov z5.d, p8/m, #0 // =0x0 +; CHECK-NEXT: mov z6.d, p10/m, #0 // =0x0 +; CHECK-NEXT: ldr p10, [sp, #1, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: mov z0.d, p1/m, #0 // =0x0 +; CHECK-NEXT: mov z1.d, p2/m, #0 // =0x0 +; CHECK-NEXT: ldr p8, [sp, #3, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: mov z7.d, p0/m, #0 // =0x0 +; CHECK-NEXT: ldr p5, [sp, #6, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: addvl sp, sp, #2 +; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload ; CHECK-NEXT: ret - %a = call @llvm.lrint.nxv16i64.nxv16f64( %x) - ret %a + %a = call @llvm.lrint.nxv16iXLen.nxv16f64( %x) + ret %a } -declare @llvm.lrint.nxv16i64.nxv16f64() +declare @llvm.lrint.nxv16iXLen.nxv16f64() -define @lrint_v32f64( %x) { +define @lrint_v32f64( %x) { ; CHECK-LABEL: lrint_v32f64: ; CHECK: // %bb.0: -; CHECK-NEXT: ptrue p0.b -; CHECK-NEXT: rdvl x14, #15 -; CHECK-NEXT: rdvl x15, #14 -; CHECK-NEXT: ptrue p1.d -; CHECK-NEXT: rdvl x13, #13 -; CHECK-NEXT: rdvl x12, #12 -; CHECK-NEXT: ld1b { z0.b }, p0/z, [x0, x14] -; CHECK-NEXT: ld1b { z1.b }, p0/z, [x0, x15] -; CHECK-NEXT: rdvl x10, #11 +; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: addvl sp, sp, #-12 +; CHECK-NEXT: str p8, [sp, #3, mul vl] // 2-byte Folded Spill +; CHECK-NEXT: str p7, [sp, #4, mul vl] // 2-byte Folded Spill +; CHECK-NEXT: str p6, [sp, #5, mul vl] // 2-byte Folded Spill +; CHECK-NEXT: str p5, [sp, #6, mul vl] // 2-byte Folded Spill +; CHECK-NEXT: str p4, [sp, #7, mul vl] // 2-byte Folded Spill +; CHECK-NEXT: str z18, [sp, #1, mul vl] // 16-byte Folded Spill +; CHECK-NEXT: str z17, [sp, #2, mul vl] // 16-byte Folded Spill +; CHECK-NEXT: str z16, [sp, #3, mul vl] // 16-byte Folded Spill +; CHECK-NEXT: str z15, [sp, #4, mul vl] // 16-byte Folded Spill +; CHECK-NEXT: str z14, [sp, #5, mul vl] // 16-byte Folded Spill +; CHECK-NEXT: str z13, [sp, #6, mul vl] // 16-byte Folded Spill +; CHECK-NEXT: str z12, [sp, #7, mul vl] // 16-byte Folded Spill +; CHECK-NEXT: str z11, [sp, #8, mul vl] // 16-byte Folded Spill +; CHECK-NEXT: str z10, [sp, #9, mul vl] // 16-byte Folded Spill +; CHECK-NEXT: str z9, [sp, #10, mul vl] // 16-byte Folded Spill +; CHECK-NEXT: str z8, [sp, #11, mul vl] // 16-byte Folded Spill +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0xe0, 0x00, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 96 * VG +; CHECK-NEXT: .cfi_offset w29, -16 +; CHECK-NEXT: .cfi_escape 0x10, 0x48, 0x0a, 0x11, 0x70, 0x22, 0x11, 0x78, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d8 @ cfa - 16 - 8 * VG +; CHECK-NEXT: .cfi_escape 0x10, 0x49, 0x0a, 0x11, 0x70, 0x22, 0x11, 0x70, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d9 @ cfa - 16 - 16 * VG +; CHECK-NEXT: .cfi_escape 0x10, 0x4a, 0x0a, 0x11, 0x70, 0x22, 0x11, 0x68, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d10 @ cfa - 16 - 24 * VG +; CHECK-NEXT: .cfi_escape 0x10, 0x4b, 0x0a, 0x11, 0x70, 0x22, 0x11, 0x60, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d11 @ cfa - 16 - 32 * VG +; CHECK-NEXT: .cfi_escape 0x10, 0x4c, 0x0a, 0x11, 0x70, 0x22, 0x11, 0x58, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d12 @ cfa - 16 - 40 * VG +; CHECK-NEXT: .cfi_escape 0x10, 0x4d, 0x0a, 0x11, 0x70, 0x22, 0x11, 0x50, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d13 @ cfa - 16 - 48 * VG +; CHECK-NEXT: .cfi_escape 0x10, 0x4e, 0x0a, 0x11, 0x70, 0x22, 0x11, 0x48, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d14 @ cfa - 16 - 56 * VG +; CHECK-NEXT: .cfi_escape 0x10, 0x4f, 0x0a, 0x11, 0x70, 0x22, 0x11, 0x40, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d15 @ cfa - 16 - 64 * VG +; CHECK-NEXT: ptrue p1.b +; CHECK-NEXT: rdvl x9, #8 +; CHECK-NEXT: rdvl x10, #9 +; CHECK-NEXT: ptrue p0.d ; CHECK-NEXT: rdvl x11, #10 -; CHECK-NEXT: rdvl x9, #9 -; CHECK-NEXT: ld1b { z2.b }, p0/z, [x0, x13] -; CHECK-NEXT: ld1b { z3.b }, p0/z, [x0, x12] -; CHECK-NEXT: ld1b { z4.b }, p0/z, [x0, x10] -; CHECK-NEXT: ld1b { z5.b }, p0/z, [x0, x11] -; CHECK-NEXT: frintx z0.d, p1/m, z0.d -; CHECK-NEXT: frintx z1.d, p1/m, z1.d -; CHECK-NEXT: ld1b { z6.b }, p0/z, [x0, x9] -; CHECK-NEXT: rdvl x16, #8 -; CHECK-NEXT: frintx z2.d, p1/m, z2.d -; CHECK-NEXT: ld1d { z24.d }, p1/z, [x0, #7, mul vl] -; CHECK-NEXT: frintx z3.d, p1/m, z3.d -; CHECK-NEXT: frintx z4.d, p1/m, z4.d -; CHECK-NEXT: frintx z5.d, p1/m, z5.d -; CHECK-NEXT: frintx z6.d, p1/m, z6.d -; CHECK-NEXT: ld1b { z7.b }, p0/z, [x0, x16] -; CHECK-NEXT: ld1d { z25.d }, p1/z, [x0, #6, mul vl] -; CHECK-NEXT: fcvtzs z0.d, p1/m, z0.d -; CHECK-NEXT: fcvtzs z1.d, p1/m, z1.d -; CHECK-NEXT: ld1d { z26.d }, p1/z, [x0, #5, mul vl] -; CHECK-NEXT: ld1d { z27.d }, p1/z, [x0, #4, mul vl] -; CHECK-NEXT: ld1d { z28.d }, p1/z, [x0, #3, mul vl] -; CHECK-NEXT: ld1d { z29.d }, p1/z, [x0, #2, mul vl] -; CHECK-NEXT: ld1d { z30.d }, p1/z, [x0, #1, mul vl] -; CHECK-NEXT: fcvtzs z2.d, p1/m, z2.d -; CHECK-NEXT: ld1d { z31.d }, p1/z, [x0] -; CHECK-NEXT: frintx z7.d, p1/m, z7.d -; CHECK-NEXT: fcvtzs z3.d, p1/m, z3.d -; CHECK-NEXT: fcvtzs z4.d, p1/m, z4.d -; CHECK-NEXT: st1b { z0.b }, p0, [x8, x14] -; CHECK-NEXT: movprfx z0, z5 -; CHECK-NEXT: fcvtzs z0.d, p1/m, z5.d -; CHECK-NEXT: frintx z24.d, p1/m, z24.d -; CHECK-NEXT: st1b { z1.b }, p0, [x8, x15] -; CHECK-NEXT: movprfx z1, z6 -; CHECK-NEXT: fcvtzs z1.d, p1/m, z6.d -; CHECK-NEXT: movprfx z5, z25 -; CHECK-NEXT: frintx z5.d, p1/m, z25.d -; CHECK-NEXT: movprfx z6, z26 -; CHECK-NEXT: frintx z6.d, p1/m, z26.d -; CHECK-NEXT: st1b { z2.b }, p0, [x8, x13] -; CHECK-NEXT: movprfx z2, z7 -; CHECK-NEXT: fcvtzs z2.d, p1/m, z7.d -; CHECK-NEXT: movprfx z7, z27 -; CHECK-NEXT: frintx z7.d, p1/m, z27.d -; CHECK-NEXT: st1b { z3.b }, p0, [x8, x12] -; CHECK-NEXT: movprfx z3, z28 -; CHECK-NEXT: frintx z3.d, p1/m, z28.d -; CHECK-NEXT: st1b { z4.b }, p0, [x8, x10] -; CHECK-NEXT: movprfx z4, z29 -; CHECK-NEXT: frintx z4.d, p1/m, z29.d -; CHECK-NEXT: st1b { z0.b }, p0, [x8, x11] -; CHECK-NEXT: movprfx z0, z30 -; CHECK-NEXT: frintx z0.d, p1/m, z30.d -; CHECK-NEXT: fcvtzs z24.d, p1/m, z24.d -; CHECK-NEXT: st1b { z1.b }, p0, [x8, x9] -; CHECK-NEXT: movprfx z1, z31 -; CHECK-NEXT: frintx z1.d, p1/m, z31.d -; CHECK-NEXT: fcvtzs z5.d, p1/m, z5.d -; CHECK-NEXT: fcvtzs z6.d, p1/m, z6.d -; CHECK-NEXT: fcvtzs z7.d, p1/m, z7.d -; CHECK-NEXT: st1b { z2.b }, p0, [x8, x16] -; CHECK-NEXT: movprfx z2, z3 -; CHECK-NEXT: fcvtzs z2.d, p1/m, z3.d -; CHECK-NEXT: movprfx z3, z4 -; CHECK-NEXT: fcvtzs z3.d, p1/m, z4.d -; CHECK-NEXT: fcvtzs z0.d, p1/m, z0.d -; CHECK-NEXT: st1d { z24.d }, p1, [x8, #7, mul vl] -; CHECK-NEXT: fcvtzs z1.d, p1/m, z1.d -; CHECK-NEXT: st1d { z5.d }, p1, [x8, #6, mul vl] -; CHECK-NEXT: st1d { z6.d }, p1, [x8, #5, mul vl] -; CHECK-NEXT: st1d { z7.d }, p1, [x8, #4, mul vl] -; CHECK-NEXT: st1d { z2.d }, p1, [x8, #3, mul vl] -; CHECK-NEXT: st1d { z3.d }, p1, [x8, #2, mul vl] -; CHECK-NEXT: st1d { z0.d }, p1, [x8, #1, mul vl] -; CHECK-NEXT: st1d { z1.d }, p1, [x8] +; CHECK-NEXT: mov x12, #-4332462841530417152 // =0xc3e0000000000000 +; CHECK-NEXT: ld1b { z0.b }, p1/z, [x0, x9] +; CHECK-NEXT: ld1b { z1.b }, p1/z, [x0, x10] +; CHECK-NEXT: mov z2.d, x12 +; CHECK-NEXT: rdvl x14, #13 +; CHECK-NEXT: rdvl x13, #12 +; CHECK-NEXT: rdvl x12, #11 +; CHECK-NEXT: ld1b { z6.b }, p1/z, [x0, x14] +; CHECK-NEXT: ld1b { z7.b }, p1/z, [x0, x13] +; CHECK-NEXT: mov z3.d, #0x8000000000000000 +; CHECK-NEXT: movprfx z24, z0 +; CHECK-NEXT: frintx z24.d, p0/m, z0.d +; CHECK-NEXT: ld1b { z0.b }, p1/z, [x0, x11] +; CHECK-NEXT: movprfx z5, z1 +; CHECK-NEXT: frintx z5.d, p0/m, z1.d +; CHECK-NEXT: ld1b { z1.b }, p1/z, [x0, x12] +; CHECK-NEXT: mov x15, #4890909195324358655 // =0x43dfffffffffffff +; CHECK-NEXT: rdvl x16, #15 +; CHECK-NEXT: movprfx z30, z6 +; CHECK-NEXT: frintx z30.d, p0/m, z6.d +; CHECK-NEXT: movprfx z28, z7 +; CHECK-NEXT: frintx z28.d, p0/m, z7.d +; CHECK-NEXT: ld1b { z8.b }, p1/z, [x0, x16] +; CHECK-NEXT: movprfx z4, z0 +; CHECK-NEXT: frintx z4.d, p0/m, z0.d +; CHECK-NEXT: mov z0.d, #0x7fffffffffffffff +; CHECK-NEXT: ld1d { z18.d }, p0/z, [x0] +; CHECK-NEXT: fcmge p3.d, p0/z, z5.d, z2.d +; CHECK-NEXT: fcmge p2.d, p0/z, z24.d, z2.d +; CHECK-NEXT: movprfx z6, z5 +; CHECK-NEXT: fcvtzs z6.d, p0/m, z5.d +; CHECK-NEXT: movprfx z27, z1 +; CHECK-NEXT: frintx z27.d, p0/m, z1.d +; CHECK-NEXT: movprfx z25, z24 +; CHECK-NEXT: fcvtzs z25.d, p0/m, z24.d +; CHECK-NEXT: mov z1.d, x15 +; CHECK-NEXT: rdvl x15, #14 +; CHECK-NEXT: movprfx z9, z28 +; CHECK-NEXT: fcvtzs z9.d, p0/m, z28.d +; CHECK-NEXT: movprfx z13, z8 +; CHECK-NEXT: frintx z13.d, p0/m, z8.d +; CHECK-NEXT: fcmge p4.d, p0/z, z4.d, z2.d +; CHECK-NEXT: movprfx z7, z4 +; CHECK-NEXT: fcvtzs z7.d, p0/m, z4.d +; CHECK-NEXT: ld1d { z15.d }, p0/z, [x0, #2, mul vl] +; CHECK-NEXT: not p3.b, p0/z, p3.b +; CHECK-NEXT: fcmgt p5.d, p0/z, z24.d, z1.d +; CHECK-NEXT: fcmgt p6.d, p0/z, z5.d, z1.d +; CHECK-NEXT: not p2.b, p0/z, p2.b +; CHECK-NEXT: fcmge p7.d, p0/z, z27.d, z2.d +; CHECK-NEXT: movprfx z26, z27 +; CHECK-NEXT: fcvtzs z26.d, p0/m, z27.d +; CHECK-NEXT: sel z29.d, p3, z3.d, z6.d +; CHECK-NEXT: ld1b { z6.b }, p1/z, [x0, x15] +; CHECK-NEXT: fcmge p3.d, p0/z, z28.d, z2.d +; CHECK-NEXT: not p4.b, p0/z, p4.b +; CHECK-NEXT: mov z25.d, p2/m, z3.d +; CHECK-NEXT: fcmgt p2.d, p0/z, z4.d, z1.d +; CHECK-NEXT: movprfx z16, z13 +; CHECK-NEXT: fcvtzs z16.d, p0/m, z13.d +; CHECK-NEXT: ld1d { z17.d }, p0/z, [x0, #1, mul vl] +; CHECK-NEXT: ld1d { z14.d }, p0/z, [x0, #3, mul vl] +; CHECK-NEXT: sel z31.d, p4, z3.d, z7.d +; CHECK-NEXT: movprfx z11, z6 +; CHECK-NEXT: frintx z11.d, p0/m, z6.d +; CHECK-NEXT: not p7.b, p0/z, p7.b +; CHECK-NEXT: not p3.b, p0/z, p3.b +; CHECK-NEXT: sel z6.d, p5, z0.d, z25.d +; CHECK-NEXT: fcmgt p4.d, p0/z, z27.d, z1.d +; CHECK-NEXT: sel z7.d, p6, z0.d, z29.d +; CHECK-NEXT: mov z26.d, p7/m, z3.d +; CHECK-NEXT: fcmge p5.d, p0/z, z13.d, z2.d +; CHECK-NEXT: sel z25.d, p2, z0.d, z31.d +; CHECK-NEXT: fcmge p2.d, p0/z, z30.d, z2.d +; CHECK-NEXT: sel z29.d, p3, z3.d, z9.d +; CHECK-NEXT: fcmge p3.d, p0/z, z11.d, z2.d +; CHECK-NEXT: movprfx z31, z30 +; CHECK-NEXT: fcvtzs z31.d, p0/m, z30.d +; CHECK-NEXT: movprfx z9, z11 +; CHECK-NEXT: fcvtzs z9.d, p0/m, z11.d +; CHECK-NEXT: mov z26.d, p4/m, z0.d +; CHECK-NEXT: fcmgt p4.d, p0/z, z28.d, z1.d +; CHECK-NEXT: fcmgt p6.d, p0/z, z30.d, z1.d +; CHECK-NEXT: not p7.b, p0/z, p5.b +; CHECK-NEXT: fcmuo p5.d, p0/z, z27.d, z27.d +; CHECK-NEXT: fcmgt p8.d, p0/z, z13.d, z1.d +; CHECK-NEXT: not p2.b, p0/z, p2.b +; CHECK-NEXT: movprfx z27, z18 +; CHECK-NEXT: frintx z27.d, p0/m, z18.d +; CHECK-NEXT: ld1d { z8.d }, p0/z, [x0, #7, mul vl] +; CHECK-NEXT: not p3.b, p0/z, p3.b +; CHECK-NEXT: mov z16.d, p7/m, z3.d +; CHECK-NEXT: fcmuo p7.d, p0/z, z13.d, z13.d +; CHECK-NEXT: mov z31.d, p2/m, z3.d +; CHECK-NEXT: fcmgt p2.d, p0/z, z11.d, z1.d +; CHECK-NEXT: mov z29.d, p4/m, z0.d +; CHECK-NEXT: mov z9.d, p3/m, z3.d +; CHECK-NEXT: fcmuo p3.d, p0/z, z28.d, z28.d +; CHECK-NEXT: fcmuo p4.d, p0/z, z30.d, z30.d +; CHECK-NEXT: movprfx z28, z17 +; CHECK-NEXT: frintx z28.d, p0/m, z17.d +; CHECK-NEXT: movprfx z30, z15 +; CHECK-NEXT: frintx z30.d, p0/m, z15.d +; CHECK-NEXT: ld1d { z13.d }, p0/z, [x0, #4, mul vl] +; CHECK-NEXT: mov z31.d, p6/m, z0.d +; CHECK-NEXT: fcmuo p6.d, p0/z, z11.d, z11.d +; CHECK-NEXT: sel z11.d, p8, z0.d, z16.d +; CHECK-NEXT: mov z9.d, p2/m, z0.d +; CHECK-NEXT: fcmuo p2.d, p0/z, z24.d, z24.d +; CHECK-NEXT: movprfx z24, z14 +; CHECK-NEXT: frintx z24.d, p0/m, z14.d +; CHECK-NEXT: fcmge p8.d, p0/z, z27.d, z2.d +; CHECK-NEXT: ld1d { z10.d }, p0/z, [x0, #6, mul vl] +; CHECK-NEXT: ld1d { z12.d }, p0/z, [x0, #5, mul vl] +; CHECK-NEXT: mov z26.d, p5/m, #0 // =0x0 +; CHECK-NEXT: mov z29.d, p3/m, #0 // =0x0 +; CHECK-NEXT: fcmge p5.d, p0/z, z28.d, z2.d +; CHECK-NEXT: movprfx z14, z27 +; CHECK-NEXT: fcvtzs z14.d, p0/m, z27.d +; CHECK-NEXT: fcmge p3.d, p0/z, z30.d, z2.d +; CHECK-NEXT: frintx z13.d, p0/m, z13.d +; CHECK-NEXT: mov z31.d, p4/m, #0 // =0x0 +; CHECK-NEXT: fcmge p4.d, p0/z, z24.d, z2.d +; CHECK-NEXT: mov z9.d, p6/m, #0 // =0x0 +; CHECK-NEXT: movprfx z15, z28 +; CHECK-NEXT: fcvtzs z15.d, p0/m, z28.d +; CHECK-NEXT: not p6.b, p0/z, p8.b +; CHECK-NEXT: movprfx z16, z30 +; CHECK-NEXT: fcvtzs z16.d, p0/m, z30.d +; CHECK-NEXT: frintx z12.d, p0/m, z12.d +; CHECK-NEXT: frintx z10.d, p0/m, z10.d +; CHECK-NEXT: movprfx z17, z24 +; CHECK-NEXT: fcvtzs z17.d, p0/m, z24.d +; CHECK-NEXT: movprfx z18, z8 +; CHECK-NEXT: frintx z18.d, p0/m, z8.d +; CHECK-NEXT: not p5.b, p0/z, p5.b +; CHECK-NEXT: sel z8.d, p6, z3.d, z14.d +; CHECK-NEXT: not p3.b, p0/z, p3.b +; CHECK-NEXT: fcmge p6.d, p0/z, z13.d, z2.d +; CHECK-NEXT: mov z11.d, p7/m, #0 // =0x0 +; CHECK-NEXT: not p4.b, p0/z, p4.b +; CHECK-NEXT: sel z14.d, p5, z3.d, z15.d +; CHECK-NEXT: fcmuo p7.d, p0/z, z5.d, z5.d +; CHECK-NEXT: sel z15.d, p3, z3.d, z16.d +; CHECK-NEXT: movprfx z16, z13 +; CHECK-NEXT: fcvtzs z16.d, p0/m, z13.d +; CHECK-NEXT: fcmge p5.d, p0/z, z12.d, z2.d +; CHECK-NEXT: fcmge p3.d, p0/z, z10.d, z2.d +; CHECK-NEXT: sel z5.d, p4, z3.d, z17.d +; CHECK-NEXT: fcmge p4.d, p0/z, z18.d, z2.d +; CHECK-NEXT: not p6.b, p0/z, p6.b +; CHECK-NEXT: movprfx z2, z12 +; CHECK-NEXT: fcvtzs z2.d, p0/m, z12.d +; CHECK-NEXT: movprfx z17, z10 +; CHECK-NEXT: fcvtzs z17.d, p0/m, z10.d +; CHECK-NEXT: st1b { z11.b }, p1, [x8, x16] +; CHECK-NEXT: movprfx z11, z18 +; CHECK-NEXT: fcvtzs z11.d, p0/m, z18.d +; CHECK-NEXT: mov z6.d, p2/m, #0 // =0x0 +; CHECK-NEXT: st1b { z9.b }, p1, [x8, x15] +; CHECK-NEXT: sel z9.d, p6, z3.d, z16.d +; CHECK-NEXT: fcmuo p6.d, p0/z, z4.d, z4.d +; CHECK-NEXT: not p5.b, p0/z, p5.b +; CHECK-NEXT: fcmgt p2.d, p0/z, z18.d, z1.d +; CHECK-NEXT: mov z7.d, p7/m, #0 // =0x0 +; CHECK-NEXT: not p3.b, p0/z, p3.b +; CHECK-NEXT: st1b { z31.b }, p1, [x8, x14] +; CHECK-NEXT: fcmgt p7.d, p0/z, z24.d, z1.d +; CHECK-NEXT: not p4.b, p0/z, p4.b +; CHECK-NEXT: mov z2.d, p5/m, z3.d +; CHECK-NEXT: fcmgt p5.d, p0/z, z28.d, z1.d +; CHECK-NEXT: sel z4.d, p3, z3.d, z17.d +; CHECK-NEXT: fcmgt p3.d, p0/z, z13.d, z1.d +; CHECK-NEXT: mov z25.d, p6/m, #0 // =0x0 +; CHECK-NEXT: sel z3.d, p4, z3.d, z11.d +; CHECK-NEXT: fcmgt p4.d, p0/z, z10.d, z1.d +; CHECK-NEXT: fcmgt p6.d, p0/z, z12.d, z1.d +; CHECK-NEXT: st1b { z29.b }, p1, [x8, x13] +; CHECK-NEXT: st1b { z26.b }, p1, [x8, x12] +; CHECK-NEXT: sel z26.d, p5, z0.d, z14.d +; CHECK-NEXT: fcmgt p5.d, p0/z, z30.d, z1.d +; CHECK-NEXT: sel z29.d, p3, z0.d, z9.d +; CHECK-NEXT: fcmuo p3.d, p0/z, z18.d, z18.d +; CHECK-NEXT: mov z3.d, p2/m, z0.d +; CHECK-NEXT: st1b { z25.b }, p1, [x8, x11] +; CHECK-NEXT: fcmuo p2.d, p0/z, z10.d, z10.d +; CHECK-NEXT: mov z4.d, p4/m, z0.d +; CHECK-NEXT: fcmuo p4.d, p0/z, z12.d, z12.d +; CHECK-NEXT: st1b { z7.b }, p1, [x8, x10] +; CHECK-NEXT: mov z2.d, p6/m, z0.d +; CHECK-NEXT: st1b { z6.b }, p1, [x8, x9] +; CHECK-NEXT: fcmuo p1.d, p0/z, z13.d, z13.d +; CHECK-NEXT: fcmgt p6.d, p0/z, z27.d, z1.d +; CHECK-NEXT: mov z3.d, p3/m, #0 // =0x0 +; CHECK-NEXT: fcmuo p3.d, p0/z, z24.d, z24.d +; CHECK-NEXT: sel z1.d, p7, z0.d, z5.d +; CHECK-NEXT: mov z4.d, p2/m, #0 // =0x0 +; CHECK-NEXT: fcmuo p2.d, p0/z, z30.d, z30.d +; CHECK-NEXT: sel z5.d, p5, z0.d, z15.d +; CHECK-NEXT: mov z2.d, p4/m, #0 // =0x0 +; CHECK-NEXT: fcmuo p4.d, p0/z, z28.d, z28.d +; CHECK-NEXT: mov z29.d, p1/m, #0 // =0x0 +; CHECK-NEXT: fcmuo p1.d, p0/z, z27.d, z27.d +; CHECK-NEXT: sel z0.d, p6, z0.d, z8.d +; CHECK-NEXT: mov z1.d, p3/m, #0 // =0x0 +; CHECK-NEXT: st1d { z3.d }, p0, [x8, #7, mul vl] +; CHECK-NEXT: mov z5.d, p2/m, #0 // =0x0 +; CHECK-NEXT: st1d { z4.d }, p0, [x8, #6, mul vl] +; CHECK-NEXT: mov z26.d, p4/m, #0 // =0x0 +; CHECK-NEXT: st1d { z2.d }, p0, [x8, #5, mul vl] +; CHECK-NEXT: mov z0.d, p1/m, #0 // =0x0 +; CHECK-NEXT: st1d { z29.d }, p0, [x8, #4, mul vl] +; CHECK-NEXT: st1d { z1.d }, p0, [x8, #3, mul vl] +; CHECK-NEXT: st1d { z5.d }, p0, [x8, #2, mul vl] +; CHECK-NEXT: st1d { z26.d }, p0, [x8, #1, mul vl] +; CHECK-NEXT: st1d { z0.d }, p0, [x8] +; CHECK-NEXT: ldr z18, [sp, #1, mul vl] // 16-byte Folded Reload +; CHECK-NEXT: ldr z17, [sp, #2, mul vl] // 16-byte Folded Reload +; CHECK-NEXT: ldr z16, [sp, #3, mul vl] // 16-byte Folded Reload +; CHECK-NEXT: ldr z15, [sp, #4, mul vl] // 16-byte Folded Reload +; CHECK-NEXT: ldr z14, [sp, #5, mul vl] // 16-byte Folded Reload +; CHECK-NEXT: ldr z13, [sp, #6, mul vl] // 16-byte Folded Reload +; CHECK-NEXT: ldr z12, [sp, #7, mul vl] // 16-byte Folded Reload +; CHECK-NEXT: ldr z11, [sp, #8, mul vl] // 16-byte Folded Reload +; CHECK-NEXT: ldr z10, [sp, #9, mul vl] // 16-byte Folded Reload +; CHECK-NEXT: ldr z9, [sp, #10, mul vl] // 16-byte Folded Reload +; CHECK-NEXT: ldr z8, [sp, #11, mul vl] // 16-byte Folded Reload +; CHECK-NEXT: ldr p8, [sp, #3, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: ldr p7, [sp, #4, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: ldr p6, [sp, #5, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: ldr p5, [sp, #6, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: ldr p4, [sp, #7, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: addvl sp, sp, #12 +; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload ; CHECK-NEXT: ret - %a = call @llvm.lrint.nxv32i64.nxv16f64( %x) - ret %a + %a = call @llvm.lrint.nxv32iXLen.nxv16f64( %x) + ret %a } -declare @llvm.lrint.nxv32i64.nxv32f64() +declare @llvm.lrint.nxv32iXLen.nxv32f64() diff --git a/llvm/test/CodeGen/AArch64/vector-llrint.ll b/llvm/test/CodeGen/AArch64/vector-llrint.ll index 480d0c19db3aa..b7e743b5085f2 100644 --- a/llvm/test/CodeGen/AArch64/vector-llrint.ll +++ b/llvm/test/CodeGen/AArch64/vector-llrint.ll @@ -17,12 +17,12 @@ declare <1 x i64> @llvm.llrint.v1i64.v1f16(<1 x half>) define <2 x i64> @llrint_v1i64_v2f16(<2 x half> %x) { ; CHECK-LABEL: llrint_v1i64_v2f16: ; CHECK: // %bb.0: -; CHECK-NEXT: fcvtl v0.4s, v0.4h -; CHECK-NEXT: frintx v0.4s, v0.4s -; CHECK-NEXT: fcvtn v0.4h, v0.4s +; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 ; CHECK-NEXT: mov h1, v0.h[1] ; CHECK-NEXT: fcvt s0, h0 ; CHECK-NEXT: fcvt s1, h1 +; CHECK-NEXT: frintx s0, s0 +; CHECK-NEXT: frintx s1, s1 ; CHECK-NEXT: fcvtzs x8, s0 ; CHECK-NEXT: fcvtzs x9, s1 ; CHECK-NEXT: fmov d0, x8 @@ -37,24 +37,22 @@ define <4 x i64> @llrint_v4i64_v4f16(<4 x half> %x) { ; CHECK-LABEL: llrint_v4i64_v4f16: ; CHECK: // %bb.0: ; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-NEXT: dup v1.2s, v0.s[1] -; CHECK-NEXT: fcvtl v0.4s, v0.4h -; CHECK-NEXT: fcvtl v1.4s, v1.4h -; CHECK-NEXT: frintx v0.4s, v0.4s -; CHECK-NEXT: frintx v1.4s, v1.4s -; CHECK-NEXT: fcvtn v0.4h, v0.4s -; CHECK-NEXT: fcvtn v1.4h, v1.4s +; CHECK-NEXT: mov h1, v0.h[2] ; CHECK-NEXT: mov h2, v0.h[1] +; CHECK-NEXT: mov h3, v0.h[3] ; CHECK-NEXT: fcvt s0, h0 -; CHECK-NEXT: mov h3, v1.h[1] ; CHECK-NEXT: fcvt s1, h1 ; CHECK-NEXT: fcvt s2, h2 -; CHECK-NEXT: fcvtzs x8, s0 ; CHECK-NEXT: fcvt s3, h3 +; CHECK-NEXT: frintx s0, s0 +; CHECK-NEXT: frintx s1, s1 +; CHECK-NEXT: frintx s2, s2 +; CHECK-NEXT: frintx s3, s3 +; CHECK-NEXT: fcvtzs x8, s0 ; CHECK-NEXT: fcvtzs x9, s1 ; CHECK-NEXT: fcvtzs x10, s2 -; CHECK-NEXT: fmov d0, x8 ; CHECK-NEXT: fcvtzs x11, s3 +; CHECK-NEXT: fmov d0, x8 ; CHECK-NEXT: fmov d1, x9 ; CHECK-NEXT: mov v0.d[1], x10 ; CHECK-NEXT: mov v1.d[1], x11 @@ -67,48 +65,45 @@ declare <4 x i64> @llvm.llrint.v4i64.v4f16(<4 x half>) define <8 x i64> @llrint_v8i64_v8f16(<8 x half> %x) { ; CHECK-LABEL: llrint_v8i64_v8f16: ; CHECK: // %bb.0: -; CHECK-NEXT: dup v1.2s, v0.s[1] -; CHECK-NEXT: dup v2.2s, v0.s[3] -; CHECK-NEXT: fcvtl v3.4s, v0.4h -; CHECK-NEXT: fcvtl2 v0.4s, v0.8h -; CHECK-NEXT: fcvtl v1.4s, v1.4h -; CHECK-NEXT: fcvtl v2.4s, v2.4h -; CHECK-NEXT: frintx v3.4s, v3.4s -; CHECK-NEXT: frintx v0.4s, v0.4s -; CHECK-NEXT: frintx v1.4s, v1.4s -; CHECK-NEXT: frintx v2.4s, v2.4s -; CHECK-NEXT: fcvtn v3.4h, v3.4s -; CHECK-NEXT: fcvtn v0.4h, v0.4s -; CHECK-NEXT: fcvtn v1.4h, v1.4s -; CHECK-NEXT: fcvtn v2.4h, v2.4s -; CHECK-NEXT: mov h4, v3.h[1] -; CHECK-NEXT: mov h5, v0.h[1] +; CHECK-NEXT: ext v1.16b, v0.16b, v0.16b, #8 +; CHECK-NEXT: mov h4, v0.h[2] +; CHECK-NEXT: mov h3, v0.h[1] +; CHECK-NEXT: mov h7, v0.h[3] ; CHECK-NEXT: fcvt s0, h0 -; CHECK-NEXT: fcvt s3, h3 -; CHECK-NEXT: mov h6, v1.h[1] -; CHECK-NEXT: mov h7, v2.h[1] +; CHECK-NEXT: mov h2, v1.h[2] +; CHECK-NEXT: mov h5, v1.h[1] +; CHECK-NEXT: mov h6, v1.h[3] ; CHECK-NEXT: fcvt s1, h1 -; CHECK-NEXT: fcvt s2, h2 ; CHECK-NEXT: fcvt s4, h4 +; CHECK-NEXT: fcvt s3, h3 +; CHECK-NEXT: fcvt s7, h7 +; CHECK-NEXT: frintx s0, s0 +; CHECK-NEXT: fcvt s2, h2 ; CHECK-NEXT: fcvt s5, h5 -; CHECK-NEXT: fcvtzs x8, s0 -; CHECK-NEXT: fcvtzs x9, s3 ; CHECK-NEXT: fcvt s6, h6 -; CHECK-NEXT: fcvt s7, h7 -; CHECK-NEXT: fcvtzs x11, s1 -; CHECK-NEXT: fcvtzs x12, s2 -; CHECK-NEXT: fcvtzs x10, s4 -; CHECK-NEXT: fcvtzs x13, s5 -; CHECK-NEXT: fmov d2, x8 +; CHECK-NEXT: frintx s1, s1 +; CHECK-NEXT: frintx s4, s4 +; CHECK-NEXT: frintx s3, s3 +; CHECK-NEXT: frintx s7, s7 +; CHECK-NEXT: fcvtzs x9, s0 +; CHECK-NEXT: frintx s2, s2 +; CHECK-NEXT: frintx s5, s5 +; CHECK-NEXT: frintx s6, s6 +; CHECK-NEXT: fcvtzs x8, s1 +; CHECK-NEXT: fcvtzs x12, s4 +; CHECK-NEXT: fcvtzs x11, s3 +; CHECK-NEXT: fcvtzs x15, s7 ; CHECK-NEXT: fmov d0, x9 +; CHECK-NEXT: fcvtzs x10, s2 +; CHECK-NEXT: fcvtzs x13, s5 ; CHECK-NEXT: fcvtzs x14, s6 -; CHECK-NEXT: fcvtzs x15, s7 -; CHECK-NEXT: fmov d1, x11 -; CHECK-NEXT: fmov d3, x12 -; CHECK-NEXT: mov v0.d[1], x10 +; CHECK-NEXT: fmov d2, x8 +; CHECK-NEXT: fmov d1, x12 +; CHECK-NEXT: mov v0.d[1], x11 +; CHECK-NEXT: fmov d3, x10 ; CHECK-NEXT: mov v2.d[1], x13 -; CHECK-NEXT: mov v1.d[1], x14 -; CHECK-NEXT: mov v3.d[1], x15 +; CHECK-NEXT: mov v1.d[1], x15 +; CHECK-NEXT: mov v3.d[1], x14 ; CHECK-NEXT: ret %a = call <8 x i64> @llvm.llrint.v8i64.v8f16(<8 x half> %x) ret <8 x i64> %a @@ -118,90 +113,84 @@ declare <8 x i64> @llvm.llrint.v8i64.v8f16(<8 x half>) define <16 x i64> @llrint_v16i64_v16f16(<16 x half> %x) { ; CHECK-LABEL: llrint_v16i64_v16f16: ; CHECK: // %bb.0: -; CHECK-NEXT: fcvtl2 v4.4s, v0.8h -; CHECK-NEXT: fcvtl2 v2.4s, v1.8h -; CHECK-NEXT: fcvtl v3.4s, v0.4h -; CHECK-NEXT: dup v5.2s, v0.s[1] -; CHECK-NEXT: dup v0.2s, v0.s[3] -; CHECK-NEXT: dup v6.2s, v1.s[1] -; CHECK-NEXT: dup v7.2s, v1.s[3] -; CHECK-NEXT: fcvtl v1.4s, v1.4h -; CHECK-NEXT: frintx v4.4s, v4.4s -; CHECK-NEXT: frintx v2.4s, v2.4s -; CHECK-NEXT: frintx v3.4s, v3.4s -; CHECK-NEXT: fcvtl v5.4s, v5.4h -; CHECK-NEXT: fcvtl v0.4s, v0.4h -; CHECK-NEXT: fcvtl v6.4s, v6.4h -; CHECK-NEXT: fcvtl v7.4s, v7.4h -; CHECK-NEXT: frintx v1.4s, v1.4s -; CHECK-NEXT: fcvtn v4.4h, v4.4s -; CHECK-NEXT: fcvtn v2.4h, v2.4s -; CHECK-NEXT: fcvtn v3.4h, v3.4s -; CHECK-NEXT: frintx v5.4s, v5.4s -; CHECK-NEXT: frintx v0.4s, v0.4s -; CHECK-NEXT: frintx v6.4s, v6.4s -; CHECK-NEXT: frintx v7.4s, v7.4s -; CHECK-NEXT: fcvtn v1.4h, v1.4s -; CHECK-NEXT: mov h16, v4.h[1] +; CHECK-NEXT: ext v2.16b, v0.16b, v0.16b, #8 +; CHECK-NEXT: ext v3.16b, v1.16b, v1.16b, #8 +; CHECK-NEXT: mov h17, v0.h[1] +; CHECK-NEXT: mov h19, v0.h[2] +; CHECK-NEXT: fcvt s18, h0 +; CHECK-NEXT: mov h0, v0.h[3] +; CHECK-NEXT: mov h4, v2.h[1] +; CHECK-NEXT: mov h5, v2.h[2] +; CHECK-NEXT: fcvt s7, h3 +; CHECK-NEXT: fcvt s6, h2 +; CHECK-NEXT: mov h16, v3.h[2] +; CHECK-NEXT: mov h2, v2.h[3] +; CHECK-NEXT: fcvt s17, h17 +; CHECK-NEXT: fcvt s19, h19 +; CHECK-NEXT: frintx s18, s18 +; CHECK-NEXT: fcvt s0, h0 ; CHECK-NEXT: fcvt s4, h4 -; CHECK-NEXT: fcvt s17, h2 -; CHECK-NEXT: mov h18, v3.h[1] -; CHECK-NEXT: fcvtn v5.4h, v5.4s -; CHECK-NEXT: fcvt s3, h3 -; CHECK-NEXT: fcvtn v0.4h, v0.4s -; CHECK-NEXT: fcvtn v6.4h, v6.4s -; CHECK-NEXT: fcvtn v7.4h, v7.4s -; CHECK-NEXT: mov h2, v2.h[1] +; CHECK-NEXT: fcvt s5, h5 +; CHECK-NEXT: frintx s7, s7 +; CHECK-NEXT: frintx s6, s6 ; CHECK-NEXT: fcvt s16, h16 -; CHECK-NEXT: fcvtzs x8, s4 -; CHECK-NEXT: fcvtzs x9, s17 -; CHECK-NEXT: fcvt s4, h18 -; CHECK-NEXT: fcvt s17, h5 -; CHECK-NEXT: fcvtzs x10, s3 -; CHECK-NEXT: mov h3, v5.h[1] -; CHECK-NEXT: fcvt s5, h0 -; CHECK-NEXT: mov h0, v0.h[1] -; CHECK-NEXT: mov h18, v6.h[1] -; CHECK-NEXT: mov h19, v7.h[1] -; CHECK-NEXT: fcvtzs x11, s16 -; CHECK-NEXT: mov h16, v1.h[1] +; CHECK-NEXT: fcvt s2, h2 +; CHECK-NEXT: frintx s17, s17 +; CHECK-NEXT: frintx s19, s19 +; CHECK-NEXT: fcvtzs x13, s18 +; CHECK-NEXT: frintx s0, s0 +; CHECK-NEXT: frintx s4, s4 +; CHECK-NEXT: frintx s5, s5 +; CHECK-NEXT: fcvtzs x9, s7 +; CHECK-NEXT: mov h7, v1.h[2] +; CHECK-NEXT: fcvtzs x8, s6 +; CHECK-NEXT: mov h6, v1.h[1] +; CHECK-NEXT: frintx s16, s16 +; CHECK-NEXT: fcvtzs x14, s17 +; CHECK-NEXT: fcvtzs x15, s19 +; CHECK-NEXT: fcvtzs x10, s4 +; CHECK-NEXT: mov h4, v3.h[1] +; CHECK-NEXT: fcvtzs x11, s5 +; CHECK-NEXT: mov h5, v1.h[3] +; CHECK-NEXT: mov h3, v3.h[3] ; CHECK-NEXT: fcvt s1, h1 -; CHECK-NEXT: fcvtzs x12, s4 -; CHECK-NEXT: fcvt s4, h6 -; CHECK-NEXT: fcvtzs x13, s17 -; CHECK-NEXT: fcvtzs x14, s5 -; CHECK-NEXT: fcvt s5, h7 -; CHECK-NEXT: fcvt s3, h3 -; CHECK-NEXT: fcvt s7, h2 -; CHECK-NEXT: fcvt s17, h0 -; CHECK-NEXT: fcvt s18, h18 -; CHECK-NEXT: fcvt s16, h16 -; CHECK-NEXT: fcvt s19, h19 -; CHECK-NEXT: fcvtzs x15, s1 +; CHECK-NEXT: fcvt s7, h7 +; CHECK-NEXT: fcvt s6, h6 +; CHECK-NEXT: fcvtzs x12, s16 +; CHECK-NEXT: frintx s16, s2 ; CHECK-NEXT: fmov d2, x8 -; CHECK-NEXT: fcvtzs x8, s4 -; CHECK-NEXT: fmov d0, x10 -; CHECK-NEXT: fcvtzs x10, s5 +; CHECK-NEXT: fcvt s4, h4 +; CHECK-NEXT: fcvt s3, h3 +; CHECK-NEXT: fcvt s5, h5 +; CHECK-NEXT: frintx s1, s1 +; CHECK-NEXT: frintx s7, s7 +; CHECK-NEXT: frintx s17, s6 ; CHECK-NEXT: fmov d6, x9 -; CHECK-NEXT: fcvtzs x9, s3 -; CHECK-NEXT: fmov d1, x13 -; CHECK-NEXT: fcvtzs x13, s17 -; CHECK-NEXT: fcvtzs x17, s7 -; CHECK-NEXT: fcvtzs x16, s16 +; CHECK-NEXT: mov v2.d[1], x10 +; CHECK-NEXT: frintx s4, s4 +; CHECK-NEXT: frintx s18, s3 +; CHECK-NEXT: frintx s5, s5 +; CHECK-NEXT: fcvtzs x8, s1 +; CHECK-NEXT: fcvtzs x9, s7 +; CHECK-NEXT: fmov d3, x11 +; CHECK-NEXT: fcvtzs x11, s0 +; CHECK-NEXT: fmov d7, x12 +; CHECK-NEXT: fcvtzs x12, s16 +; CHECK-NEXT: fcvtzs x16, s17 +; CHECK-NEXT: fcvtzs x17, s4 +; CHECK-NEXT: fmov d0, x13 +; CHECK-NEXT: fmov d1, x15 ; CHECK-NEXT: fcvtzs x18, s18 -; CHECK-NEXT: fcvtzs x0, s19 -; CHECK-NEXT: fmov d3, x14 -; CHECK-NEXT: fmov d4, x15 -; CHECK-NEXT: fmov d5, x8 -; CHECK-NEXT: fmov d7, x10 -; CHECK-NEXT: mov v0.d[1], x12 -; CHECK-NEXT: mov v1.d[1], x9 -; CHECK-NEXT: mov v2.d[1], x11 -; CHECK-NEXT: mov v6.d[1], x17 -; CHECK-NEXT: mov v3.d[1], x13 +; CHECK-NEXT: fcvtzs x0, s5 +; CHECK-NEXT: fmov d4, x8 +; CHECK-NEXT: fmov d5, x9 +; CHECK-NEXT: mov v0.d[1], x14 +; CHECK-NEXT: mov v1.d[1], x11 +; CHECK-NEXT: mov v3.d[1], x12 ; CHECK-NEXT: mov v4.d[1], x16 -; CHECK-NEXT: mov v5.d[1], x18 -; CHECK-NEXT: mov v7.d[1], x0 +; CHECK-NEXT: mov v6.d[1], x17 +; CHECK-NEXT: mov v7.d[1], x18 +; CHECK-NEXT: mov v5.d[1], x0 ; CHECK-NEXT: ret %a = call <16 x i64> @llvm.llrint.v16i64.v16f16(<16 x half> %x) ret <16 x i64> %a @@ -211,182 +200,170 @@ declare <16 x i64> @llvm.llrint.v16i64.v16f16(<16 x half>) define <32 x i64> @llrint_v32i64_v32f16(<32 x half> %x) { ; CHECK-LABEL: llrint_v32i64_v32f16: ; CHECK: // %bb.0: -; CHECK-NEXT: dup v4.2s, v1.s[1] -; CHECK-NEXT: fcvtl v5.4s, v0.4h -; CHECK-NEXT: dup v6.2s, v1.s[3] -; CHECK-NEXT: fcvtl v7.4s, v1.4h -; CHECK-NEXT: dup v16.2s, v2.s[3] -; CHECK-NEXT: fcvtl v17.4s, v2.4h -; CHECK-NEXT: dup v19.2s, v2.s[1] -; CHECK-NEXT: dup v18.2s, v0.s[1] -; CHECK-NEXT: dup v21.2s, v3.s[1] -; CHECK-NEXT: dup v24.2s, v3.s[3] -; CHECK-NEXT: fcvtl2 v1.4s, v1.8h -; CHECK-NEXT: fcvtl2 v2.4s, v2.8h -; CHECK-NEXT: fcvtl v4.4s, v4.4h -; CHECK-NEXT: frintx v5.4s, v5.4s -; CHECK-NEXT: fcvtl v6.4s, v6.4h -; CHECK-NEXT: frintx v7.4s, v7.4s -; CHECK-NEXT: fcvtl v16.4s, v16.4h -; CHECK-NEXT: frintx v22.4s, v17.4s -; CHECK-NEXT: fcvtl v19.4s, v19.4h -; CHECK-NEXT: dup v17.2s, v0.s[3] -; CHECK-NEXT: fcvtl v21.4s, v21.4h -; CHECK-NEXT: fcvtl v24.4s, v24.4h -; CHECK-NEXT: frintx v1.4s, v1.4s -; CHECK-NEXT: frintx v2.4s, v2.4s -; CHECK-NEXT: frintx v20.4s, v4.4s -; CHECK-NEXT: fcvtn v4.4h, v5.4s -; CHECK-NEXT: frintx v23.4s, v6.4s -; CHECK-NEXT: fcvtn v5.4h, v7.4s -; CHECK-NEXT: frintx v25.4s, v16.4s -; CHECK-NEXT: fcvtn v16.4h, v22.4s -; CHECK-NEXT: frintx v26.4s, v19.4s -; CHECK-NEXT: fcvtn v6.4h, v20.4s -; CHECK-NEXT: fcvtl v20.4s, v3.4h -; CHECK-NEXT: fcvt s22, h4 -; CHECK-NEXT: fcvtn v7.4h, v23.4s -; CHECK-NEXT: fcvtl2 v23.4s, v3.8h -; CHECK-NEXT: fcvtl v3.4s, v18.4h -; CHECK-NEXT: fcvtn v25.4h, v25.4s -; CHECK-NEXT: fcvt s27, h5 -; CHECK-NEXT: fcvtl v18.4s, v17.4h -; CHECK-NEXT: frintx v17.4s, v21.4s -; CHECK-NEXT: fcvt s29, h16 -; CHECK-NEXT: mov h16, v16.h[1] -; CHECK-NEXT: frintx v20.4s, v20.4s -; CHECK-NEXT: fcvtzs x9, s22 -; CHECK-NEXT: fcvt s28, h6 -; CHECK-NEXT: fcvt s22, h7 -; CHECK-NEXT: frintx v19.4s, v3.4s -; CHECK-NEXT: fcvtn v3.4h, v26.4s -; CHECK-NEXT: mov h21, v25.h[1] -; CHECK-NEXT: frintx v23.4s, v23.4s -; CHECK-NEXT: fcvtzs x10, s27 -; CHECK-NEXT: fcvtl2 v26.4s, v0.8h -; CHECK-NEXT: fcvt s25, h25 -; CHECK-NEXT: fcvtn v17.4h, v17.4s -; CHECK-NEXT: fcvtn v20.4h, v20.4s -; CHECK-NEXT: fcvtzs x12, s28 -; CHECK-NEXT: fcvtzs x14, s29 -; CHECK-NEXT: fcvtzs x13, s22 -; CHECK-NEXT: frintx v22.4s, v24.4s -; CHECK-NEXT: fcvt s24, h3 -; CHECK-NEXT: fcvt s21, h21 -; CHECK-NEXT: fcvtn v23.4h, v23.4s -; CHECK-NEXT: fmov d0, x10 -; CHECK-NEXT: fcvtzs x15, s25 -; CHECK-NEXT: mov h25, v17.h[1] -; CHECK-NEXT: fcvt s17, h17 -; CHECK-NEXT: mov h27, v20.h[1] +; CHECK-NEXT: ext v4.16b, v1.16b, v1.16b, #8 +; CHECK-NEXT: ext v5.16b, v2.16b, v2.16b, #8 +; CHECK-NEXT: ext v6.16b, v3.16b, v3.16b, #8 +; CHECK-NEXT: ext v7.16b, v0.16b, v0.16b, #8 +; CHECK-NEXT: mov h19, v0.h[1] +; CHECK-NEXT: fcvt s21, h0 +; CHECK-NEXT: mov h23, v1.h[2] +; CHECK-NEXT: fcvt s22, h1 +; CHECK-NEXT: fcvt s26, h2 +; CHECK-NEXT: mov h27, v2.h[1] +; CHECK-NEXT: mov h28, v2.h[2] +; CHECK-NEXT: mov h16, v4.h[2] +; CHECK-NEXT: fcvt s17, h5 +; CHECK-NEXT: mov h18, v5.h[2] +; CHECK-NEXT: mov h20, v6.h[2] +; CHECK-NEXT: fcvt s24, h7 +; CHECK-NEXT: fcvt s25, h6 +; CHECK-NEXT: fcvt s19, h19 +; CHECK-NEXT: frintx s22, s22 +; CHECK-NEXT: fcvt s16, h16 +; CHECK-NEXT: frintx s17, s17 +; CHECK-NEXT: fcvt s18, h18 ; CHECK-NEXT: fcvt s20, h20 -; CHECK-NEXT: fcvtn v28.4h, v2.4s -; CHECK-NEXT: fcvtn v22.4h, v22.4s -; CHECK-NEXT: fcvtzs x10, s24 -; CHECK-NEXT: frintx v24.4s, v26.4s -; CHECK-NEXT: fcvtzs x11, s21 -; CHECK-NEXT: mov h26, v23.h[1] -; CHECK-NEXT: fcvt s23, h23 -; CHECK-NEXT: fcvt s25, h25 -; CHECK-NEXT: fmov d2, x13 -; CHECK-NEXT: fcvtzs x13, s17 -; CHECK-NEXT: fcvt s21, h27 -; CHECK-NEXT: fcvtzs x16, s20 -; CHECK-NEXT: fcvtn v27.4h, v1.4s -; CHECK-NEXT: mov h20, v22.h[1] -; CHECK-NEXT: fcvt s22, h22 -; CHECK-NEXT: fcvtn v24.4h, v24.4s -; CHECK-NEXT: fmov d1, x12 -; CHECK-NEXT: fcvtzs x0, s23 -; CHECK-NEXT: fmov d17, x14 -; CHECK-NEXT: fcvtzs x18, s25 -; CHECK-NEXT: mov h25, v28.h[1] -; CHECK-NEXT: fcvt s23, h28 -; CHECK-NEXT: fcvtzs x12, s21 -; CHECK-NEXT: fcvt s21, h26 +; CHECK-NEXT: frintx s16, s16 +; CHECK-NEXT: fcvtzs x12, s17 +; CHECK-NEXT: frintx s17, s18 +; CHECK-NEXT: frintx s18, s21 +; CHECK-NEXT: fcvt s21, h23 +; CHECK-NEXT: frintx s23, s24 +; CHECK-NEXT: frintx s24, s25 +; CHECK-NEXT: frintx s25, s19 +; CHECK-NEXT: mov h19, v7.h[1] +; CHECK-NEXT: fcvtzs x13, s16 +; CHECK-NEXT: frintx s16, s20 +; CHECK-NEXT: frintx s20, s26 +; CHECK-NEXT: fcvtzs x9, s23 +; CHECK-NEXT: mov h23, v3.h[2] ; CHECK-NEXT: fcvt s26, h27 -; CHECK-NEXT: fcvt s20, h20 -; CHECK-NEXT: fcvtzs x17, s22 -; CHECK-NEXT: fcvt s22, h24 -; CHECK-NEXT: frintx v18.4s, v18.4s +; CHECK-NEXT: fcvtzs x15, s24 +; CHECK-NEXT: fcvtzs x10, s25 +; CHECK-NEXT: fcvt s24, h28 +; CHECK-NEXT: mov h25, v3.h[3] +; CHECK-NEXT: fcvtzs x14, s17 +; CHECK-NEXT: frintx s21, s21 +; CHECK-NEXT: fmov d17, x12 +; CHECK-NEXT: fcvtzs x12, s16 +; CHECK-NEXT: fmov d16, x13 +; CHECK-NEXT: fcvtzs x13, s22 +; CHECK-NEXT: fcvt s22, h3 ; CHECK-NEXT: mov h3, v3.h[1] -; CHECK-NEXT: mov h7, v7.h[1] -; CHECK-NEXT: fcvt s25, h25 -; CHECK-NEXT: fcvtn v19.4h, v19.4s -; CHECK-NEXT: fcvt s16, h16 +; CHECK-NEXT: mov h27, v0.h[2] +; CHECK-NEXT: mov h28, v2.h[3] +; CHECK-NEXT: fcvt s23, h23 +; CHECK-NEXT: frintx s26, s26 +; CHECK-NEXT: fcvtzs x16, s20 +; CHECK-NEXT: frintx s20, s24 +; CHECK-NEXT: fcvt s24, h25 +; CHECK-NEXT: fcvtzs x11, s18 +; CHECK-NEXT: fmov d18, x14 ; CHECK-NEXT: fcvtzs x14, s21 -; CHECK-NEXT: fmov d21, x15 -; CHECK-NEXT: mov h5, v5.h[1] +; CHECK-NEXT: frintx s22, s22 +; CHECK-NEXT: fcvt s3, h3 +; CHECK-NEXT: fcvt s25, h27 +; CHECK-NEXT: fcvt s27, h28 +; CHECK-NEXT: frintx s23, s23 +; CHECK-NEXT: mov h21, v1.h[3] +; CHECK-NEXT: fmov d2, x15 +; CHECK-NEXT: fcvtzs x15, s26 +; CHECK-NEXT: fmov d26, x13 +; CHECK-NEXT: mov h1, v1.h[1] +; CHECK-NEXT: fcvtzs x13, s20 +; CHECK-NEXT: frintx s20, s24 +; CHECK-NEXT: fmov d24, x14 +; CHECK-NEXT: fcvtzs x14, s22 +; CHECK-NEXT: frintx s3, s3 +; CHECK-NEXT: fmov d22, x16 +; CHECK-NEXT: frintx s27, s27 +; CHECK-NEXT: fcvtzs x16, s23 +; CHECK-NEXT: fcvt s21, h21 +; CHECK-NEXT: frintx s25, s25 +; CHECK-NEXT: fcvt s1, h1 +; CHECK-NEXT: mov h0, v0.h[3] +; CHECK-NEXT: mov h23, v7.h[2] +; CHECK-NEXT: mov v22.d[1], x15 ; CHECK-NEXT: fcvtzs x15, s20 -; CHECK-NEXT: fmov d20, x16 -; CHECK-NEXT: fcvtzs x16, s22 -; CHECK-NEXT: fmov d22, x17 -; CHECK-NEXT: fcvtzs x17, s26 -; CHECK-NEXT: fmov d26, x0 -; CHECK-NEXT: fcvtn v18.4h, v18.4s +; CHECK-NEXT: fmov d20, x13 +; CHECK-NEXT: fcvtzs x13, s3 +; CHECK-NEXT: fmov d3, x14 +; CHECK-NEXT: fcvtzs x14, s27 +; CHECK-NEXT: fmov d27, x16 +; CHECK-NEXT: frintx s21, s21 +; CHECK-NEXT: mov h7, v7.h[3] +; CHECK-NEXT: frintx s1, s1 +; CHECK-NEXT: fcvt s0, h0 +; CHECK-NEXT: fcvt s23, h23 +; CHECK-NEXT: fcvt s19, h19 +; CHECK-NEXT: mov v27.d[1], x15 +; CHECK-NEXT: fcvtzs x15, s25 +; CHECK-NEXT: mov h25, v6.h[3] ; CHECK-NEXT: mov h6, v6.h[1] -; CHECK-NEXT: fcvt s3, h3 -; CHECK-NEXT: mov v20.d[1], x12 -; CHECK-NEXT: fcvtzs x12, s25 +; CHECK-NEXT: mov v3.d[1], x13 +; CHECK-NEXT: fcvtzs x13, s21 +; CHECK-NEXT: mov h21, v5.h[1] +; CHECK-NEXT: mov h5, v5.h[3] +; CHECK-NEXT: mov v20.d[1], x14 +; CHECK-NEXT: fcvtzs x14, s1 +; CHECK-NEXT: mov h1, v4.h[1] +; CHECK-NEXT: frintx s0, s0 +; CHECK-NEXT: fcvt s25, h25 ; CHECK-NEXT: fcvt s7, h7 -; CHECK-NEXT: mov v26.d[1], x14 -; CHECK-NEXT: mov v22.d[1], x15 -; CHECK-NEXT: fcvtzs x14, s23 -; CHECK-NEXT: fmov d23, x13 -; CHECK-NEXT: mov v21.d[1], x11 -; CHECK-NEXT: mov h4, v4.h[1] -; CHECK-NEXT: mov h25, v19.h[1] +; CHECK-NEXT: stp q3, q27, [x8, #192] ; CHECK-NEXT: fcvt s6, h6 -; CHECK-NEXT: fcvtzs x11, s3 +; CHECK-NEXT: mov h3, v4.h[3] +; CHECK-NEXT: stp q22, q20, [x8, #128] +; CHECK-NEXT: fcvt s21, h21 ; CHECK-NEXT: fcvt s5, h5 -; CHECK-NEXT: fcvt s19, h19 -; CHECK-NEXT: fcvtzs x13, s7 -; CHECK-NEXT: stp q26, q22, [x8, #224] -; CHECK-NEXT: mov v23.d[1], x18 -; CHECK-NEXT: mov h26, v27.h[1] -; CHECK-NEXT: fmov d22, x14 +; CHECK-NEXT: mov v24.d[1], x13 +; CHECK-NEXT: mov v26.d[1], x14 ; CHECK-NEXT: fcvt s4, h4 -; CHECK-NEXT: fmov d3, x16 -; CHECK-NEXT: fcvt s7, h25 -; CHECK-NEXT: fcvtzs x14, s6 -; CHECK-NEXT: mov v2.d[1], x13 -; CHECK-NEXT: stp q20, q23, [x8, #192] -; CHECK-NEXT: fcvt s23, h26 -; CHECK-NEXT: mov v22.d[1], x12 -; CHECK-NEXT: fmov d20, x10 -; CHECK-NEXT: fcvtzs x10, s16 -; CHECK-NEXT: mov h16, v24.h[1] -; CHECK-NEXT: mov h24, v18.h[1] -; CHECK-NEXT: fcvt s18, h18 -; CHECK-NEXT: mov v1.d[1], x14 -; CHECK-NEXT: fcvtzs x14, s7 -; CHECK-NEXT: stp q22, q21, [x8, #160] -; CHECK-NEXT: fcvtzs x12, s23 -; CHECK-NEXT: fmov d21, x17 -; CHECK-NEXT: fcvt s16, h16 +; CHECK-NEXT: frintx s22, s25 +; CHECK-NEXT: fmov d20, x12 +; CHECK-NEXT: fcvt s1, h1 +; CHECK-NEXT: frintx s6, s6 +; CHECK-NEXT: fcvt s3, h3 +; CHECK-NEXT: fcvtzs x12, s0 +; CHECK-NEXT: frintx s5, s5 +; CHECK-NEXT: frintx s21, s21 +; CHECK-NEXT: fmov d0, x11 +; CHECK-NEXT: stp q26, q24, [x8, #64] +; CHECK-NEXT: fmov d24, x15 +; CHECK-NEXT: frintx s4, s4 +; CHECK-NEXT: fcvtzs x11, s22 +; CHECK-NEXT: frintx s22, s23 +; CHECK-NEXT: frintx s1, s1 +; CHECK-NEXT: fcvtzs x13, s6 +; CHECK-NEXT: frintx s3, s3 +; CHECK-NEXT: frintx s6, s7 +; CHECK-NEXT: fcvtzs x14, s5 +; CHECK-NEXT: mov v24.d[1], x12 +; CHECK-NEXT: frintx s5, s19 +; CHECK-NEXT: fcvtzs x12, s21 +; CHECK-NEXT: mov v0.d[1], x10 +; CHECK-NEXT: fcvtzs x10, s4 ; CHECK-NEXT: mov v20.d[1], x11 -; CHECK-NEXT: fcvtzs x11, s5 -; CHECK-NEXT: fcvt s22, h24 -; CHECK-NEXT: mov v17.d[1], x10 -; CHECK-NEXT: fcvtzs x10, s18 -; CHECK-NEXT: mov v21.d[1], x12 -; CHECK-NEXT: fcvtzs x12, s19 -; CHECK-NEXT: fcvtzs x15, s16 -; CHECK-NEXT: mov v0.d[1], x11 -; CHECK-NEXT: fcvtzs x11, s4 -; CHECK-NEXT: stp q17, q20, [x8, #128] -; CHECK-NEXT: fcvtzs x13, s22 -; CHECK-NEXT: fmov d4, x10 -; CHECK-NEXT: stp q21, q2, [x8, #96] -; CHECK-NEXT: fmov d5, x12 +; CHECK-NEXT: fcvtzs x11, s22 +; CHECK-NEXT: mov v2.d[1], x13 +; CHECK-NEXT: fcvtzs x15, s3 +; CHECK-NEXT: fcvtzs x13, s1 +; CHECK-NEXT: mov v18.d[1], x14 +; CHECK-NEXT: fcvtzs x14, s6 +; CHECK-NEXT: stp q0, q24, [x8] +; CHECK-NEXT: mov v17.d[1], x12 +; CHECK-NEXT: fcvtzs x12, s5 +; CHECK-NEXT: fmov d0, x10 +; CHECK-NEXT: fmov d1, x11 +; CHECK-NEXT: stp q2, q20, [x8, #224] ; CHECK-NEXT: fmov d2, x9 -; CHECK-NEXT: stp q0, q1, [x8, #64] -; CHECK-NEXT: mov v3.d[1], x15 -; CHECK-NEXT: mov v4.d[1], x13 -; CHECK-NEXT: mov v5.d[1], x14 -; CHECK-NEXT: mov v2.d[1], x11 -; CHECK-NEXT: stp q3, q4, [x8, #32] -; CHECK-NEXT: stp q2, q5, [x8] +; CHECK-NEXT: mov v16.d[1], x15 +; CHECK-NEXT: stp q17, q18, [x8, #160] +; CHECK-NEXT: mov v0.d[1], x13 +; CHECK-NEXT: mov v1.d[1], x14 +; CHECK-NEXT: mov v2.d[1], x12 +; CHECK-NEXT: stp q0, q16, [x8, #96] +; CHECK-NEXT: stp q2, q1, [x8, #32] ; CHECK-NEXT: ret %a = call <32 x i64> @llvm.llrint.v32i64.v32f16(<32 x half> %x) ret <32 x i64> %a @@ -396,10 +373,10 @@ declare <32 x i64> @llvm.llrint.v32i64.v32f16(<32 x half>) define <1 x i64> @llrint_v1i64_v1f32(<1 x float> %x) { ; CHECK-LABEL: llrint_v1i64_v1f32: ; CHECK: // %bb.0: -; CHECK-NEXT: frintx v0.2s, v0.2s -; CHECK-NEXT: fcvtl v0.2d, v0.2s -; CHECK-NEXT: fcvtzs v0.2d, v0.2d -; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0 +; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 +; CHECK-NEXT: frintx s0, s0 +; CHECK-NEXT: fcvtzs x8, s0 +; CHECK-NEXT: fmov d0, x8 ; CHECK-NEXT: ret %a = call <1 x i64> @llvm.llrint.v1i64.v1f32(<1 x float> %x) ret <1 x i64> %a @@ -410,8 +387,11 @@ define <2 x i64> @llrint_v2i64_v2f32(<2 x float> %x) { ; CHECK-LABEL: llrint_v2i64_v2f32: ; CHECK: // %bb.0: ; CHECK-NEXT: frintx v0.2s, v0.2s -; CHECK-NEXT: fcvtl v0.2d, v0.2s -; CHECK-NEXT: fcvtzs v0.2d, v0.2d +; CHECK-NEXT: mov s1, v0.s[1] +; CHECK-NEXT: fcvtzs x8, s0 +; CHECK-NEXT: fcvtzs x9, s1 +; CHECK-NEXT: fmov d0, x8 +; CHECK-NEXT: mov v0.d[1], x9 ; CHECK-NEXT: ret %a = call <2 x i64> @llvm.llrint.v2i64.v2f32(<2 x float> %x) ret <2 x i64> %a @@ -424,10 +404,16 @@ define <4 x i64> @llrint_v4i64_v4f32(<4 x float> %x) { ; CHECK-NEXT: ext v1.16b, v0.16b, v0.16b, #8 ; CHECK-NEXT: frintx v0.2s, v0.2s ; CHECK-NEXT: frintx v1.2s, v1.2s -; CHECK-NEXT: fcvtl v0.2d, v0.2s -; CHECK-NEXT: fcvtl v1.2d, v1.2s -; CHECK-NEXT: fcvtzs v0.2d, v0.2d -; CHECK-NEXT: fcvtzs v1.2d, v1.2d +; CHECK-NEXT: mov s2, v0.s[1] +; CHECK-NEXT: fcvtzs x8, s0 +; CHECK-NEXT: mov s3, v1.s[1] +; CHECK-NEXT: fcvtzs x9, s1 +; CHECK-NEXT: fcvtzs x10, s2 +; CHECK-NEXT: fmov d0, x8 +; CHECK-NEXT: fcvtzs x11, s3 +; CHECK-NEXT: fmov d1, x9 +; CHECK-NEXT: mov v0.d[1], x10 +; CHECK-NEXT: mov v1.d[1], x11 ; CHECK-NEXT: ret %a = call <4 x i64> @llvm.llrint.v4i64.v4f32(<4 x float> %x) ret <4 x i64> %a @@ -443,14 +429,26 @@ define <8 x i64> @llrint_v8i64_v8f32(<8 x float> %x) { ; CHECK-NEXT: frintx v1.2s, v1.2s ; CHECK-NEXT: frintx v2.2s, v2.2s ; CHECK-NEXT: frintx v3.2s, v3.2s -; CHECK-NEXT: fcvtl v0.2d, v0.2s -; CHECK-NEXT: fcvtl v1.2d, v1.2s -; CHECK-NEXT: fcvtl v4.2d, v2.2s -; CHECK-NEXT: fcvtl v3.2d, v3.2s -; CHECK-NEXT: fcvtzs v0.2d, v0.2d -; CHECK-NEXT: fcvtzs v2.2d, v1.2d -; CHECK-NEXT: fcvtzs v1.2d, v4.2d -; CHECK-NEXT: fcvtzs v3.2d, v3.2d +; CHECK-NEXT: mov s4, v0.s[1] +; CHECK-NEXT: mov s5, v1.s[1] +; CHECK-NEXT: fcvtzs x8, s0 +; CHECK-NEXT: fcvtzs x10, s1 +; CHECK-NEXT: mov s6, v2.s[1] +; CHECK-NEXT: mov s7, v3.s[1] +; CHECK-NEXT: fcvtzs x11, s2 +; CHECK-NEXT: fcvtzs x12, s3 +; CHECK-NEXT: fcvtzs x9, s4 +; CHECK-NEXT: fcvtzs x13, s5 +; CHECK-NEXT: fmov d0, x8 +; CHECK-NEXT: fmov d2, x10 +; CHECK-NEXT: fcvtzs x14, s6 +; CHECK-NEXT: fcvtzs x15, s7 +; CHECK-NEXT: fmov d1, x11 +; CHECK-NEXT: fmov d3, x12 +; CHECK-NEXT: mov v0.d[1], x9 +; CHECK-NEXT: mov v2.d[1], x13 +; CHECK-NEXT: mov v1.d[1], x14 +; CHECK-NEXT: mov v3.d[1], x15 ; CHECK-NEXT: ret %a = call <8 x i64> @llvm.llrint.v8i64.v8f32(<8 x float> %x) ret <8 x i64> %a @@ -460,34 +458,58 @@ declare <8 x i64> @llvm.llrint.v8i64.v8f32(<8 x float>) define <16 x i64> @llrint_v16i64_v16f32(<16 x float> %x) { ; CHECK-LABEL: llrint_v16i64_v16f32: ; CHECK: // %bb.0: -; CHECK-NEXT: ext v4.16b, v1.16b, v1.16b, #8 -; CHECK-NEXT: ext v5.16b, v0.16b, v0.16b, #8 +; CHECK-NEXT: frintx v4.2s, v0.2s +; CHECK-NEXT: frintx v5.2s, v1.2s +; CHECK-NEXT: ext v0.16b, v0.16b, v0.16b, #8 +; CHECK-NEXT: ext v1.16b, v1.16b, v1.16b, #8 ; CHECK-NEXT: ext v6.16b, v2.16b, v2.16b, #8 ; CHECK-NEXT: ext v7.16b, v3.16b, v3.16b, #8 -; CHECK-NEXT: frintx v0.2s, v0.2s -; CHECK-NEXT: frintx v1.2s, v1.2s ; CHECK-NEXT: frintx v2.2s, v2.2s ; CHECK-NEXT: frintx v3.2s, v3.2s -; CHECK-NEXT: frintx v5.2s, v5.2s -; CHECK-NEXT: frintx v4.2s, v4.2s -; CHECK-NEXT: frintx v6.2s, v6.2s -; CHECK-NEXT: frintx v7.2s, v7.2s -; CHECK-NEXT: fcvtl v0.2d, v0.2s -; CHECK-NEXT: fcvtl v1.2d, v1.2s -; CHECK-NEXT: fcvtl v16.2d, v2.2s -; CHECK-NEXT: fcvtl v18.2d, v3.2s -; CHECK-NEXT: fcvtl v5.2d, v5.2s -; CHECK-NEXT: fcvtl v17.2d, v4.2s -; CHECK-NEXT: fcvtl v19.2d, v6.2s -; CHECK-NEXT: fcvtl v7.2d, v7.2s -; CHECK-NEXT: fcvtzs v0.2d, v0.2d -; CHECK-NEXT: fcvtzs v2.2d, v1.2d -; CHECK-NEXT: fcvtzs v4.2d, v16.2d -; CHECK-NEXT: fcvtzs v6.2d, v18.2d -; CHECK-NEXT: fcvtzs v1.2d, v5.2d -; CHECK-NEXT: fcvtzs v3.2d, v17.2d -; CHECK-NEXT: fcvtzs v5.2d, v19.2d -; CHECK-NEXT: fcvtzs v7.2d, v7.2d +; CHECK-NEXT: mov s16, v4.s[1] +; CHECK-NEXT: mov s17, v5.s[1] +; CHECK-NEXT: fcvtzs x8, s4 +; CHECK-NEXT: frintx v0.2s, v0.2s +; CHECK-NEXT: frintx v1.2s, v1.2s +; CHECK-NEXT: fcvtzs x9, s5 +; CHECK-NEXT: frintx v4.2s, v6.2s +; CHECK-NEXT: frintx v5.2s, v7.2s +; CHECK-NEXT: fcvtzs x10, s2 +; CHECK-NEXT: mov s6, v2.s[1] +; CHECK-NEXT: fcvtzs x13, s3 +; CHECK-NEXT: mov s3, v3.s[1] +; CHECK-NEXT: fcvtzs x11, s16 +; CHECK-NEXT: fcvtzs x12, s17 +; CHECK-NEXT: mov s7, v0.s[1] +; CHECK-NEXT: mov s16, v1.s[1] +; CHECK-NEXT: fcvtzs x15, s1 +; CHECK-NEXT: mov s1, v4.s[1] +; CHECK-NEXT: mov s17, v5.s[1] +; CHECK-NEXT: fcvtzs x14, s0 +; CHECK-NEXT: fmov d0, x8 +; CHECK-NEXT: fcvtzs x8, s4 +; CHECK-NEXT: fmov d4, x10 +; CHECK-NEXT: fcvtzs x10, s5 +; CHECK-NEXT: fmov d2, x9 +; CHECK-NEXT: fcvtzs x9, s6 +; CHECK-NEXT: fmov d6, x13 +; CHECK-NEXT: fcvtzs x13, s7 +; CHECK-NEXT: fcvtzs x16, s16 +; CHECK-NEXT: fcvtzs x17, s3 +; CHECK-NEXT: fcvtzs x18, s1 +; CHECK-NEXT: fcvtzs x0, s17 +; CHECK-NEXT: fmov d1, x14 +; CHECK-NEXT: fmov d3, x15 +; CHECK-NEXT: fmov d5, x8 +; CHECK-NEXT: fmov d7, x10 +; CHECK-NEXT: mov v0.d[1], x11 +; CHECK-NEXT: mov v2.d[1], x12 +; CHECK-NEXT: mov v4.d[1], x9 +; CHECK-NEXT: mov v1.d[1], x13 +; CHECK-NEXT: mov v3.d[1], x16 +; CHECK-NEXT: mov v6.d[1], x17 +; CHECK-NEXT: mov v5.d[1], x18 +; CHECK-NEXT: mov v7.d[1], x0 ; CHECK-NEXT: ret %a = call <16 x i64> @llvm.llrint.v16i64.v16f32(<16 x float> %x) ret <16 x i64> %a @@ -497,70 +519,118 @@ declare <16 x i64> @llvm.llrint.v16i64.v16f32(<16 x float>) define <32 x i64> @llrint_v32i64_v32f32(<32 x float> %x) { ; CHECK-LABEL: llrint_v32i64_v32f32: ; CHECK: // %bb.0: -; CHECK-NEXT: ext v16.16b, v7.16b, v7.16b, #8 -; CHECK-NEXT: ext v17.16b, v6.16b, v6.16b, #8 +; CHECK-NEXT: ext v17.16b, v3.16b, v3.16b, #8 +; CHECK-NEXT: ext v18.16b, v4.16b, v4.16b, #8 +; CHECK-NEXT: ext v19.16b, v5.16b, v5.16b, #8 +; CHECK-NEXT: ext v21.16b, v7.16b, v7.16b, #8 +; CHECK-NEXT: ext v16.16b, v2.16b, v2.16b, #8 +; CHECK-NEXT: ext v20.16b, v6.16b, v6.16b, #8 ; CHECK-NEXT: frintx v7.2s, v7.2s -; CHECK-NEXT: frintx v6.2s, v6.2s -; CHECK-NEXT: ext v18.16b, v5.16b, v5.16b, #8 -; CHECK-NEXT: ext v21.16b, v4.16b, v4.16b, #8 -; CHECK-NEXT: ext v22.16b, v2.16b, v2.16b, #8 -; CHECK-NEXT: frintx v5.2s, v5.2s -; CHECK-NEXT: ext v23.16b, v3.16b, v3.16b, #8 +; CHECK-NEXT: frintx v24.2s, v6.2s +; CHECK-NEXT: frintx v23.2s, v5.2s ; CHECK-NEXT: frintx v4.2s, v4.2s -; CHECK-NEXT: ext v19.16b, v0.16b, v0.16b, #8 -; CHECK-NEXT: ext v20.16b, v1.16b, v1.16b, #8 -; CHECK-NEXT: frintx v16.2s, v16.2s +; CHECK-NEXT: frintx v3.2s, v3.2s ; CHECK-NEXT: frintx v17.2s, v17.2s -; CHECK-NEXT: fcvtl v7.2d, v7.2s -; CHECK-NEXT: fcvtl v6.2d, v6.2s ; CHECK-NEXT: frintx v18.2s, v18.2s +; CHECK-NEXT: frintx v22.2s, v19.2s ; CHECK-NEXT: frintx v21.2s, v21.2s -; CHECK-NEXT: frintx v2.2s, v2.2s -; CHECK-NEXT: frintx v3.2s, v3.2s -; CHECK-NEXT: fcvtl v5.2d, v5.2s -; CHECK-NEXT: frintx v23.2s, v23.2s -; CHECK-NEXT: fcvtl v4.2d, v4.2s -; CHECK-NEXT: frintx v1.2s, v1.2s -; CHECK-NEXT: fcvtl v16.2d, v16.2s -; CHECK-NEXT: fcvtl v17.2d, v17.2s -; CHECK-NEXT: fcvtzs v7.2d, v7.2d -; CHECK-NEXT: fcvtzs v6.2d, v6.2d -; CHECK-NEXT: fcvtl v18.2d, v18.2s -; CHECK-NEXT: fcvtl v21.2d, v21.2s +; CHECK-NEXT: frintx v16.2s, v16.2s ; CHECK-NEXT: frintx v20.2s, v20.2s -; CHECK-NEXT: fcvtl v3.2d, v3.2s -; CHECK-NEXT: fcvtzs v5.2d, v5.2d +; CHECK-NEXT: mov s25, v7.s[1] +; CHECK-NEXT: fcvtzs x15, s7 +; CHECK-NEXT: frintx v19.2s, v1.2s +; CHECK-NEXT: fcvtzs x16, s24 +; CHECK-NEXT: ext v1.16b, v1.16b, v1.16b, #8 +; CHECK-NEXT: fcvtzs x10, s17 +; CHECK-NEXT: fcvtzs x11, s18 +; CHECK-NEXT: mov s26, v22.s[1] +; CHECK-NEXT: fcvtzs x12, s22 +; CHECK-NEXT: mov s22, v21.s[1] +; CHECK-NEXT: fcvtzs x14, s21 +; CHECK-NEXT: mov s21, v24.s[1] +; CHECK-NEXT: fcvtzs x9, s16 +; CHECK-NEXT: fcvtzs x13, s20 +; CHECK-NEXT: mov s20, v20.s[1] +; CHECK-NEXT: fmov d24, x15 +; CHECK-NEXT: mov s18, v18.s[1] +; CHECK-NEXT: fmov d6, x10 +; CHECK-NEXT: fmov d7, x11 +; CHECK-NEXT: fcvtzs x10, s25 +; CHECK-NEXT: fcvtzs x11, s22 +; CHECK-NEXT: fmov d25, x12 +; CHECK-NEXT: frintx v22.2s, v2.2s +; CHECK-NEXT: fcvtzs x15, s21 +; CHECK-NEXT: fmov d21, x14 +; CHECK-NEXT: fmov d5, x9 +; CHECK-NEXT: fcvtzs x9, s26 +; CHECK-NEXT: fmov d26, x13 +; CHECK-NEXT: fcvtzs x12, s20 +; CHECK-NEXT: fcvtzs x13, s19 +; CHECK-NEXT: mov s20, v23.s[1] +; CHECK-NEXT: mov v24.d[1], x10 +; CHECK-NEXT: mov v21.d[1], x11 +; CHECK-NEXT: fcvtzs x11, s23 +; CHECK-NEXT: fcvtzs x10, s22 +; CHECK-NEXT: mov s17, v17.s[1] +; CHECK-NEXT: frintx v1.2s, v1.2s +; CHECK-NEXT: mov s22, v22.s[1] +; CHECK-NEXT: mov v26.d[1], x12 +; CHECK-NEXT: fcvtzs x12, s18 +; CHECK-NEXT: mov v25.d[1], x9 +; CHECK-NEXT: fmov d2, x13 +; CHECK-NEXT: fcvtzs x13, s20 +; CHECK-NEXT: fmov d20, x16 +; CHECK-NEXT: stp q24, q21, [x8, #224] +; CHECK-NEXT: ext v21.16b, v0.16b, v0.16b, #8 +; CHECK-NEXT: fmov d18, x11 +; CHECK-NEXT: fcvtzs x11, s4 +; CHECK-NEXT: mov s4, v4.s[1] +; CHECK-NEXT: fmov d23, x10 +; CHECK-NEXT: mov v20.d[1], x15 +; CHECK-NEXT: fcvtzs x10, s3 +; CHECK-NEXT: mov s3, v3.s[1] +; CHECK-NEXT: mov v18.d[1], x13 ; CHECK-NEXT: frintx v0.2s, v0.2s -; CHECK-NEXT: fcvtl v2.2d, v2.2s -; CHECK-NEXT: fcvtzs v4.2d, v4.2d -; CHECK-NEXT: fcvtzs v16.2d, v16.2d -; CHECK-NEXT: fcvtzs v17.2d, v17.2d -; CHECK-NEXT: fcvtl v1.2d, v1.2s -; CHECK-NEXT: fcvtzs v3.2d, v3.2d -; CHECK-NEXT: fcvtl v0.2d, v0.2s -; CHECK-NEXT: fcvtzs v2.2d, v2.2d -; CHECK-NEXT: stp q6, q17, [x8, #192] -; CHECK-NEXT: fcvtl v6.2d, v23.2s -; CHECK-NEXT: frintx v17.2s, v19.2s -; CHECK-NEXT: stp q7, q16, [x8, #224] -; CHECK-NEXT: frintx v7.2s, v22.2s -; CHECK-NEXT: fcvtzs v16.2d, v18.2d -; CHECK-NEXT: fcvtzs v18.2d, v21.2d -; CHECK-NEXT: fcvtzs v1.2d, v1.2d -; CHECK-NEXT: fcvtzs v0.2d, v0.2d -; CHECK-NEXT: fcvtzs v6.2d, v6.2d -; CHECK-NEXT: stp q5, q16, [x8, #160] -; CHECK-NEXT: fcvtl v7.2d, v7.2s -; CHECK-NEXT: fcvtl v5.2d, v20.2s -; CHECK-NEXT: stp q4, q18, [x8, #128] -; CHECK-NEXT: fcvtl v4.2d, v17.2s -; CHECK-NEXT: stp q3, q6, [x8, #96] -; CHECK-NEXT: fcvtzs v7.2d, v7.2d -; CHECK-NEXT: fcvtzs v3.2d, v5.2d -; CHECK-NEXT: stp q1, q3, [x8, #32] -; CHECK-NEXT: stp q2, q7, [x8, #64] -; CHECK-NEXT: fcvtzs v2.2d, v4.2d -; CHECK-NEXT: stp q0, q2, [x8] +; CHECK-NEXT: mov s16, v16.s[1] +; CHECK-NEXT: frintx v21.2s, v21.2s +; CHECK-NEXT: fcvtzs x13, s17 +; CHECK-NEXT: fcvtzs x14, s22 +; CHECK-NEXT: fcvtzs x9, s4 +; CHECK-NEXT: fmov d4, x11 +; CHECK-NEXT: mov v7.d[1], x12 +; CHECK-NEXT: stp q20, q26, [x8, #192] +; CHECK-NEXT: fmov d20, x10 +; CHECK-NEXT: fcvtzs x10, s3 +; CHECK-NEXT: stp q18, q25, [x8, #160] +; CHECK-NEXT: mov s18, v19.s[1] +; CHECK-NEXT: mov s3, v1.s[1] +; CHECK-NEXT: mov s17, v0.s[1] +; CHECK-NEXT: mov s19, v21.s[1] +; CHECK-NEXT: fcvtzs x11, s21 +; CHECK-NEXT: mov v4.d[1], x9 +; CHECK-NEXT: fcvtzs x9, s16 +; CHECK-NEXT: fcvtzs x12, s1 +; CHECK-NEXT: mov v6.d[1], x13 +; CHECK-NEXT: fcvtzs x13, s0 +; CHECK-NEXT: mov v20.d[1], x10 +; CHECK-NEXT: fcvtzs x15, s18 +; CHECK-NEXT: fcvtzs x10, s3 +; CHECK-NEXT: mov v23.d[1], x14 +; CHECK-NEXT: fcvtzs x14, s17 +; CHECK-NEXT: fmov d3, x11 +; CHECK-NEXT: stp q4, q7, [x8, #128] +; CHECK-NEXT: mov v5.d[1], x9 +; CHECK-NEXT: fcvtzs x9, s19 +; CHECK-NEXT: stp q20, q6, [x8, #96] +; CHECK-NEXT: fmov d0, x12 +; CHECK-NEXT: fmov d1, x13 +; CHECK-NEXT: mov v2.d[1], x15 +; CHECK-NEXT: stp q23, q5, [x8, #64] +; CHECK-NEXT: mov v0.d[1], x10 +; CHECK-NEXT: mov v1.d[1], x14 +; CHECK-NEXT: mov v3.d[1], x9 +; CHECK-NEXT: stp q2, q0, [x8, #32] +; CHECK-NEXT: stp q1, q3, [x8] ; CHECK-NEXT: ret %a = call <32 x i64> @llvm.llrint.v32i64.v32f32(<32 x float> %x) ret <32 x i64> %a diff --git a/llvm/test/CodeGen/AArch64/vector-lrint.ll b/llvm/test/CodeGen/AArch64/vector-lrint.ll index 41ba13a863d97..62fb8b1679fc7 100644 --- a/llvm/test/CodeGen/AArch64/vector-lrint.ll +++ b/llvm/test/CodeGen/AArch64/vector-lrint.ll @@ -1,725 +1,1430 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc < %s -mtriple=aarch64 -mattr=+neon | FileCheck %s -; RUN: llc < %s -mtriple=aarch64 -mattr=+neon | FileCheck %s --check-prefixes=CHECK,CHECK-SD -; RUN: llc < %s -mtriple=aarch64 -mattr=+neon -global-isel -global-isel-abort=2 2>&1 |\ -; RUN: FileCheck %s --check-prefixes=CHECK,CHECK-GI +; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=aarch64 -mattr=+neon |\ +; RUN: FileCheck %s --check-prefixes=CHECK-i32 +; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=aarch64 -mattr=+neon |\ +; RUN: FileCheck %s --check-prefixes=CHECK-i64,CHECK-i64-SD +; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=aarch64 -mattr=+neon \ +; RUN: -global-isel -global-isel-abort=2 2>&1 |\ +; RUN: FileCheck %s --check-prefixes=CHECK-i32,CHECK-i32-GI +; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=aarch64 -mattr=+neon \ +; RUN: -global-isel -global-isel-abort=2 2>&1 |\ +; RUN: FileCheck %s --check-prefixes=CHECK-i64,CHECK-i64-GI -; CHECK-GI: warning: Instruction selection used fallback path for lrint_v2f16 -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for lrint_v4f16 -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for lrint_v8f16 -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for lrint_v16i64_v16f16 -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for lrint_v32i64_v32f16 -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for lrint_v2f32 -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for lrint_v4f32 -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for lrint_v8f32 -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for lrint_v16i64_v16f32 -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for lrint_v32i64_v32f32 -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for lrint_v2f64 -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for lrint_v4f64 -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for lrint_v8f64 -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for lrint_v16f64 -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for lrint_v32f64 +; CHECK-i32-GI: warning: Instruction selection used fallback path for lrint_v1f16 +; CHECK-i32-GI-NEXT: warning: Instruction selection used fallback path for lrint_v2f16 +; CHECK-i32-GI-NEXT: warning: Instruction selection used fallback path for lrint_v4f16 +; CHECK-i32-GI-NEXT: warning: Instruction selection used fallback path for lrint_v8f16 +; CHECK-i32-GI-NEXT: warning: Instruction selection used fallback path for lrint_v16i64_v16f16 +; CHECK-i32-GI-NEXT: warning: Instruction selection used fallback path for lrint_v32i64_v32f16 +; CHECK-i32-GI-NEXT: warning: Instruction selection used fallback path for lrint_v1f32 +; CHECK-i32-GI-NEXT: warning: Instruction selection used fallback path for lrint_v2f32 +; CHECK-i32-GI-NEXT: warning: Instruction selection used fallback path for lrint_v4f32 +; CHECK-i32-GI-NEXT: warning: Instruction selection used fallback path for lrint_v8f32 +; CHECK-i32-GI-NEXT: warning: Instruction selection used fallback path for lrint_v16i64_v16f32 +; CHECK-i32-GI-NEXT: warning: Instruction selection used fallback path for lrint_v32i64_v32f32 +; CHECK-i32-GI-NEXT: warning: Instruction selection used fallback path for lrint_v1f64 +; CHECK-i32-GI-NEXT: warning: Instruction selection used fallback path for lrint_v2f64 +; CHECK-i32-GI-NEXT: warning: Instruction selection used fallback path for lrint_v4f64 +; CHECK-i32-GI-NEXT: warning: Instruction selection used fallback path for lrint_v8f64 +; CHECK-i32-GI-NEXT: warning: Instruction selection used fallback path for lrint_v16f64 +; CHECK-i32-GI-NEXT: warning: Instruction selection used fallback path for lrint_v32f64 -define <1 x i64> @lrint_v1f16(<1 x half> %x) { -; CHECK-LABEL: lrint_v1f16: -; CHECK: // %bb.0: -; CHECK-NEXT: fcvt s0, h0 -; CHECK-NEXT: frintx s0, s0 -; CHECK-NEXT: fcvtzs x8, s0 -; CHECK-NEXT: fmov d0, x8 -; CHECK-NEXT: ret - %a = call <1 x i64> @llvm.lrint.v1i64.v1f16(<1 x half> %x) - ret <1 x i64> %a +; CHECK-i64-GI: warning: Instruction selection used fallback path for lrint_v2f16 +; CHECK-i64-GI-NEXT: warning: Instruction selection used fallback path for lrint_v4f16 +; CHECK-i64-GI-NEXT: warning: Instruction selection used fallback path for lrint_v8f16 +; CHECK-i64-GI-NEXT: warning: Instruction selection used fallback path for lrint_v16i64_v16f16 +; CHECK-i64-GI-NEXT: warning: Instruction selection used fallback path for lrint_v32i64_v32f16 +; CHECK-i64-GI-NEXT: warning: Instruction selection used fallback path for lrint_v2f32 +; CHECK-i64-GI-NEXT: warning: Instruction selection used fallback path for lrint_v4f32 +; CHECK-i64-GI-NEXT: warning: Instruction selection used fallback path for lrint_v8f32 +; CHECK-i64-GI-NEXT: warning: Instruction selection used fallback path for lrint_v16i64_v16f32 +; CHECK-i64-GI-NEXT: warning: Instruction selection used fallback path for lrint_v32i64_v32f32 +; CHECK-i64-GI-NEXT: warning: Instruction selection used fallback path for lrint_v2f64 +; CHECK-i64-GI-NEXT: warning: Instruction selection used fallback path for lrint_v4f64 +; CHECK-i64-GI-NEXT: warning: Instruction selection used fallback path for lrint_v8f64 +; CHECK-i64-GI-NEXT: warning: Instruction selection used fallback path for lrint_v16f64 +; CHECK-i64-GI-NEXT: warning: Instruction selection used fallback path for lrint_v32f64 + +define <1 x iXLen> @lrint_v1f16(<1 x half> %x) { +; CHECK-i32-LABEL: lrint_v1f16: +; CHECK-i32: // %bb.0: +; CHECK-i32-NEXT: fcvt s0, h0 +; CHECK-i32-NEXT: frintx s0, s0 +; CHECK-i32-NEXT: fcvtzs w8, s0 +; CHECK-i32-NEXT: fmov s0, w8 +; CHECK-i32-NEXT: ret +; +; CHECK-i64-LABEL: lrint_v1f16: +; CHECK-i64: // %bb.0: +; CHECK-i64-NEXT: fcvt s0, h0 +; CHECK-i64-NEXT: frintx s0, s0 +; CHECK-i64-NEXT: fcvtzs x8, s0 +; CHECK-i64-NEXT: fmov d0, x8 +; CHECK-i64-NEXT: ret + %a = call <1 x iXLen> @llvm.lrint.v1iXLen.v1f16(<1 x half> %x) + ret <1 x iXLen> %a } -declare <1 x i64> @llvm.lrint.v1i64.v1f16(<1 x half>) +declare <1 x iXLen> @llvm.lrint.v1iXLen.v1f16(<1 x half>) -define <2 x i64> @lrint_v2f16(<2 x half> %x) { -; CHECK-LABEL: lrint_v2f16: -; CHECK: // %bb.0: -; CHECK-NEXT: fcvtl v0.4s, v0.4h -; CHECK-NEXT: frintx v0.4s, v0.4s -; CHECK-NEXT: fcvtn v0.4h, v0.4s -; CHECK-NEXT: mov h1, v0.h[1] -; CHECK-NEXT: fcvt s0, h0 -; CHECK-NEXT: fcvt s1, h1 -; CHECK-NEXT: fcvtzs x8, s0 -; CHECK-NEXT: fcvtzs x9, s1 -; CHECK-NEXT: fmov d0, x8 -; CHECK-NEXT: mov v0.d[1], x9 -; CHECK-NEXT: ret - %a = call <2 x i64> @llvm.lrint.v2i64.v2f16(<2 x half> %x) - ret <2 x i64> %a +define <2 x iXLen> @lrint_v2f16(<2 x half> %x) { +; CHECK-i32-LABEL: lrint_v2f16: +; CHECK-i32: // %bb.0: +; CHECK-i32-NEXT: // kill: def $d0 killed $d0 def $q0 +; CHECK-i32-NEXT: mov h1, v0.h[1] +; CHECK-i32-NEXT: fcvt s0, h0 +; CHECK-i32-NEXT: fcvt s1, h1 +; CHECK-i32-NEXT: frintx s0, s0 +; CHECK-i32-NEXT: frintx s1, s1 +; CHECK-i32-NEXT: fcvtzs w8, s0 +; CHECK-i32-NEXT: fcvtzs w9, s1 +; CHECK-i32-NEXT: fmov s0, w8 +; CHECK-i32-NEXT: mov v0.s[1], w9 +; CHECK-i32-NEXT: // kill: def $d0 killed $d0 killed $q0 +; CHECK-i32-NEXT: ret +; +; CHECK-i64-LABEL: lrint_v2f16: +; CHECK-i64: // %bb.0: +; CHECK-i64-NEXT: // kill: def $d0 killed $d0 def $q0 +; CHECK-i64-NEXT: mov h1, v0.h[1] +; CHECK-i64-NEXT: fcvt s0, h0 +; CHECK-i64-NEXT: fcvt s1, h1 +; CHECK-i64-NEXT: frintx s0, s0 +; CHECK-i64-NEXT: frintx s1, s1 +; CHECK-i64-NEXT: fcvtzs x8, s0 +; CHECK-i64-NEXT: fcvtzs x9, s1 +; CHECK-i64-NEXT: fmov d0, x8 +; CHECK-i64-NEXT: mov v0.d[1], x9 +; CHECK-i64-NEXT: ret + %a = call <2 x iXLen> @llvm.lrint.v2iXLen.v2f16(<2 x half> %x) + ret <2 x iXLen> %a } -declare <2 x i64> @llvm.lrint.v2i64.v2f16(<2 x half>) +declare <2 x iXLen> @llvm.lrint.v2iXLen.v2f16(<2 x half>) -define <4 x i64> @lrint_v4f16(<4 x half> %x) { -; CHECK-LABEL: lrint_v4f16: -; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-NEXT: dup v1.2s, v0.s[1] -; CHECK-NEXT: fcvtl v0.4s, v0.4h -; CHECK-NEXT: fcvtl v1.4s, v1.4h -; CHECK-NEXT: frintx v0.4s, v0.4s -; CHECK-NEXT: frintx v1.4s, v1.4s -; CHECK-NEXT: fcvtn v0.4h, v0.4s -; CHECK-NEXT: fcvtn v1.4h, v1.4s -; CHECK-NEXT: mov h2, v0.h[1] -; CHECK-NEXT: fcvt s0, h0 -; CHECK-NEXT: mov h3, v1.h[1] -; CHECK-NEXT: fcvt s1, h1 -; CHECK-NEXT: fcvt s2, h2 -; CHECK-NEXT: fcvtzs x8, s0 -; CHECK-NEXT: fcvt s3, h3 -; CHECK-NEXT: fcvtzs x9, s1 -; CHECK-NEXT: fcvtzs x10, s2 -; CHECK-NEXT: fmov d0, x8 -; CHECK-NEXT: fcvtzs x11, s3 -; CHECK-NEXT: fmov d1, x9 -; CHECK-NEXT: mov v0.d[1], x10 -; CHECK-NEXT: mov v1.d[1], x11 -; CHECK-NEXT: ret - %a = call <4 x i64> @llvm.lrint.v4i64.v4f16(<4 x half> %x) - ret <4 x i64> %a +define <4 x iXLen> @lrint_v4f16(<4 x half> %x) { +; CHECK-i32-LABEL: lrint_v4f16: +; CHECK-i32: // %bb.0: +; CHECK-i32-NEXT: // kill: def $d0 killed $d0 def $q0 +; CHECK-i32-NEXT: mov h1, v0.h[1] +; CHECK-i32-NEXT: fcvt s2, h0 +; CHECK-i32-NEXT: mov h3, v0.h[2] +; CHECK-i32-NEXT: mov h0, v0.h[3] +; CHECK-i32-NEXT: fcvt s1, h1 +; CHECK-i32-NEXT: frintx s2, s2 +; CHECK-i32-NEXT: fcvt s3, h3 +; CHECK-i32-NEXT: frintx s1, s1 +; CHECK-i32-NEXT: fcvtzs w8, s2 +; CHECK-i32-NEXT: fcvt s2, h0 +; CHECK-i32-NEXT: fcvtzs w9, s1 +; CHECK-i32-NEXT: frintx s1, s3 +; CHECK-i32-NEXT: fmov s0, w8 +; CHECK-i32-NEXT: mov v0.s[1], w9 +; CHECK-i32-NEXT: fcvtzs w8, s1 +; CHECK-i32-NEXT: frintx s1, s2 +; CHECK-i32-NEXT: mov v0.s[2], w8 +; CHECK-i32-NEXT: fcvtzs w8, s1 +; CHECK-i32-NEXT: mov v0.s[3], w8 +; CHECK-i32-NEXT: ret +; +; CHECK-i64-LABEL: lrint_v4f16: +; CHECK-i64: // %bb.0: +; CHECK-i64-NEXT: // kill: def $d0 killed $d0 def $q0 +; CHECK-i64-NEXT: mov h1, v0.h[2] +; CHECK-i64-NEXT: mov h2, v0.h[1] +; CHECK-i64-NEXT: mov h3, v0.h[3] +; CHECK-i64-NEXT: fcvt s0, h0 +; CHECK-i64-NEXT: fcvt s1, h1 +; CHECK-i64-NEXT: fcvt s2, h2 +; CHECK-i64-NEXT: fcvt s3, h3 +; CHECK-i64-NEXT: frintx s0, s0 +; CHECK-i64-NEXT: frintx s1, s1 +; CHECK-i64-NEXT: frintx s2, s2 +; CHECK-i64-NEXT: frintx s3, s3 +; CHECK-i64-NEXT: fcvtzs x8, s0 +; CHECK-i64-NEXT: fcvtzs x9, s1 +; CHECK-i64-NEXT: fcvtzs x10, s2 +; CHECK-i64-NEXT: fcvtzs x11, s3 +; CHECK-i64-NEXT: fmov d0, x8 +; CHECK-i64-NEXT: fmov d1, x9 +; CHECK-i64-NEXT: mov v0.d[1], x10 +; CHECK-i64-NEXT: mov v1.d[1], x11 +; CHECK-i64-NEXT: ret + %a = call <4 x iXLen> @llvm.lrint.v4iXLen.v4f16(<4 x half> %x) + ret <4 x iXLen> %a } -declare <4 x i64> @llvm.lrint.v4i64.v4f16(<4 x half>) +declare <4 x iXLen> @llvm.lrint.v4iXLen.v4f16(<4 x half>) -define <8 x i64> @lrint_v8f16(<8 x half> %x) { -; CHECK-LABEL: lrint_v8f16: -; CHECK: // %bb.0: -; CHECK-NEXT: dup v1.2s, v0.s[1] -; CHECK-NEXT: dup v2.2s, v0.s[3] -; CHECK-NEXT: fcvtl v3.4s, v0.4h -; CHECK-NEXT: fcvtl2 v0.4s, v0.8h -; CHECK-NEXT: fcvtl v1.4s, v1.4h -; CHECK-NEXT: fcvtl v2.4s, v2.4h -; CHECK-NEXT: frintx v3.4s, v3.4s -; CHECK-NEXT: frintx v0.4s, v0.4s -; CHECK-NEXT: frintx v1.4s, v1.4s -; CHECK-NEXT: frintx v2.4s, v2.4s -; CHECK-NEXT: fcvtn v3.4h, v3.4s -; CHECK-NEXT: fcvtn v0.4h, v0.4s -; CHECK-NEXT: fcvtn v1.4h, v1.4s -; CHECK-NEXT: fcvtn v2.4h, v2.4s -; CHECK-NEXT: mov h4, v3.h[1] -; CHECK-NEXT: mov h5, v0.h[1] -; CHECK-NEXT: fcvt s0, h0 -; CHECK-NEXT: fcvt s3, h3 -; CHECK-NEXT: mov h6, v1.h[1] -; CHECK-NEXT: mov h7, v2.h[1] -; CHECK-NEXT: fcvt s1, h1 -; CHECK-NEXT: fcvt s2, h2 -; CHECK-NEXT: fcvt s4, h4 -; CHECK-NEXT: fcvt s5, h5 -; CHECK-NEXT: fcvtzs x8, s0 -; CHECK-NEXT: fcvtzs x9, s3 -; CHECK-NEXT: fcvt s6, h6 -; CHECK-NEXT: fcvt s7, h7 -; CHECK-NEXT: fcvtzs x11, s1 -; CHECK-NEXT: fcvtzs x12, s2 -; CHECK-NEXT: fcvtzs x10, s4 -; CHECK-NEXT: fcvtzs x13, s5 -; CHECK-NEXT: fmov d2, x8 -; CHECK-NEXT: fmov d0, x9 -; CHECK-NEXT: fcvtzs x14, s6 -; CHECK-NEXT: fcvtzs x15, s7 -; CHECK-NEXT: fmov d1, x11 -; CHECK-NEXT: fmov d3, x12 -; CHECK-NEXT: mov v0.d[1], x10 -; CHECK-NEXT: mov v2.d[1], x13 -; CHECK-NEXT: mov v1.d[1], x14 -; CHECK-NEXT: mov v3.d[1], x15 -; CHECK-NEXT: ret - %a = call <8 x i64> @llvm.lrint.v8i64.v8f16(<8 x half> %x) - ret <8 x i64> %a +define <8 x iXLen> @lrint_v8f16(<8 x half> %x) { +; CHECK-i32-LABEL: lrint_v8f16: +; CHECK-i32: // %bb.0: +; CHECK-i32-NEXT: ext v1.16b, v0.16b, v0.16b, #8 +; CHECK-i32-NEXT: mov h3, v0.h[1] +; CHECK-i32-NEXT: fcvt s6, h0 +; CHECK-i32-NEXT: mov h4, v0.h[2] +; CHECK-i32-NEXT: mov h0, v0.h[3] +; CHECK-i32-NEXT: mov h2, v1.h[1] +; CHECK-i32-NEXT: fcvt s5, h1 +; CHECK-i32-NEXT: mov h7, v1.h[2] +; CHECK-i32-NEXT: fcvt s3, h3 +; CHECK-i32-NEXT: frintx s6, s6 +; CHECK-i32-NEXT: fcvt s4, h4 +; CHECK-i32-NEXT: mov h1, v1.h[3] +; CHECK-i32-NEXT: fcvt s2, h2 +; CHECK-i32-NEXT: frintx s5, s5 +; CHECK-i32-NEXT: fcvt s7, h7 +; CHECK-i32-NEXT: frintx s3, s3 +; CHECK-i32-NEXT: fcvtzs w9, s6 +; CHECK-i32-NEXT: frintx s4, s4 +; CHECK-i32-NEXT: frintx s2, s2 +; CHECK-i32-NEXT: fcvtzs w8, s5 +; CHECK-i32-NEXT: fcvt s5, h1 +; CHECK-i32-NEXT: fcvtzs w11, s3 +; CHECK-i32-NEXT: fcvt s3, h0 +; CHECK-i32-NEXT: fmov s0, w9 +; CHECK-i32-NEXT: fcvtzs w12, s4 +; CHECK-i32-NEXT: fcvtzs w10, s2 +; CHECK-i32-NEXT: frintx s2, s7 +; CHECK-i32-NEXT: fmov s1, w8 +; CHECK-i32-NEXT: mov v0.s[1], w11 +; CHECK-i32-NEXT: fcvtzs w8, s2 +; CHECK-i32-NEXT: mov v1.s[1], w10 +; CHECK-i32-NEXT: frintx s2, s3 +; CHECK-i32-NEXT: frintx s3, s5 +; CHECK-i32-NEXT: mov v0.s[2], w12 +; CHECK-i32-NEXT: mov v1.s[2], w8 +; CHECK-i32-NEXT: fcvtzs w9, s2 +; CHECK-i32-NEXT: fcvtzs w8, s3 +; CHECK-i32-NEXT: mov v0.s[3], w9 +; CHECK-i32-NEXT: mov v1.s[3], w8 +; CHECK-i32-NEXT: ret +; +; CHECK-i64-LABEL: lrint_v8f16: +; CHECK-i64: // %bb.0: +; CHECK-i64-NEXT: ext v1.16b, v0.16b, v0.16b, #8 +; CHECK-i64-NEXT: mov h4, v0.h[2] +; CHECK-i64-NEXT: mov h3, v0.h[1] +; CHECK-i64-NEXT: mov h7, v0.h[3] +; CHECK-i64-NEXT: fcvt s0, h0 +; CHECK-i64-NEXT: mov h2, v1.h[2] +; CHECK-i64-NEXT: mov h5, v1.h[1] +; CHECK-i64-NEXT: mov h6, v1.h[3] +; CHECK-i64-NEXT: fcvt s1, h1 +; CHECK-i64-NEXT: fcvt s4, h4 +; CHECK-i64-NEXT: fcvt s3, h3 +; CHECK-i64-NEXT: fcvt s7, h7 +; CHECK-i64-NEXT: frintx s0, s0 +; CHECK-i64-NEXT: fcvt s2, h2 +; CHECK-i64-NEXT: fcvt s5, h5 +; CHECK-i64-NEXT: fcvt s6, h6 +; CHECK-i64-NEXT: frintx s1, s1 +; CHECK-i64-NEXT: frintx s4, s4 +; CHECK-i64-NEXT: frintx s3, s3 +; CHECK-i64-NEXT: frintx s7, s7 +; CHECK-i64-NEXT: fcvtzs x9, s0 +; CHECK-i64-NEXT: frintx s2, s2 +; CHECK-i64-NEXT: frintx s5, s5 +; CHECK-i64-NEXT: frintx s6, s6 +; CHECK-i64-NEXT: fcvtzs x8, s1 +; CHECK-i64-NEXT: fcvtzs x12, s4 +; CHECK-i64-NEXT: fcvtzs x11, s3 +; CHECK-i64-NEXT: fcvtzs x15, s7 +; CHECK-i64-NEXT: fmov d0, x9 +; CHECK-i64-NEXT: fcvtzs x10, s2 +; CHECK-i64-NEXT: fcvtzs x13, s5 +; CHECK-i64-NEXT: fcvtzs x14, s6 +; CHECK-i64-NEXT: fmov d2, x8 +; CHECK-i64-NEXT: fmov d1, x12 +; CHECK-i64-NEXT: mov v0.d[1], x11 +; CHECK-i64-NEXT: fmov d3, x10 +; CHECK-i64-NEXT: mov v2.d[1], x13 +; CHECK-i64-NEXT: mov v1.d[1], x15 +; CHECK-i64-NEXT: mov v3.d[1], x14 +; CHECK-i64-NEXT: ret + %a = call <8 x iXLen> @llvm.lrint.v8iXLen.v8f16(<8 x half> %x) + ret <8 x iXLen> %a } -declare <8 x i64> @llvm.lrint.v8i64.v8f16(<8 x half>) +declare <8 x iXLen> @llvm.lrint.v8iXLen.v8f16(<8 x half>) -define <16 x i64> @lrint_v16i64_v16f16(<16 x half> %x) { -; CHECK-LABEL: lrint_v16i64_v16f16: -; CHECK: // %bb.0: -; CHECK-NEXT: fcvtl2 v4.4s, v0.8h -; CHECK-NEXT: fcvtl2 v2.4s, v1.8h -; CHECK-NEXT: fcvtl v3.4s, v0.4h -; CHECK-NEXT: dup v5.2s, v0.s[1] -; CHECK-NEXT: dup v0.2s, v0.s[3] -; CHECK-NEXT: dup v6.2s, v1.s[1] -; CHECK-NEXT: dup v7.2s, v1.s[3] -; CHECK-NEXT: fcvtl v1.4s, v1.4h -; CHECK-NEXT: frintx v4.4s, v4.4s -; CHECK-NEXT: frintx v2.4s, v2.4s -; CHECK-NEXT: frintx v3.4s, v3.4s -; CHECK-NEXT: fcvtl v5.4s, v5.4h -; CHECK-NEXT: fcvtl v0.4s, v0.4h -; CHECK-NEXT: fcvtl v6.4s, v6.4h -; CHECK-NEXT: fcvtl v7.4s, v7.4h -; CHECK-NEXT: frintx v1.4s, v1.4s -; CHECK-NEXT: fcvtn v4.4h, v4.4s -; CHECK-NEXT: fcvtn v2.4h, v2.4s -; CHECK-NEXT: fcvtn v3.4h, v3.4s -; CHECK-NEXT: frintx v5.4s, v5.4s -; CHECK-NEXT: frintx v0.4s, v0.4s -; CHECK-NEXT: frintx v6.4s, v6.4s -; CHECK-NEXT: frintx v7.4s, v7.4s -; CHECK-NEXT: fcvtn v1.4h, v1.4s -; CHECK-NEXT: mov h16, v4.h[1] -; CHECK-NEXT: fcvt s4, h4 -; CHECK-NEXT: fcvt s17, h2 -; CHECK-NEXT: mov h18, v3.h[1] -; CHECK-NEXT: fcvtn v5.4h, v5.4s -; CHECK-NEXT: fcvt s3, h3 -; CHECK-NEXT: fcvtn v0.4h, v0.4s -; CHECK-NEXT: fcvtn v6.4h, v6.4s -; CHECK-NEXT: fcvtn v7.4h, v7.4s -; CHECK-NEXT: mov h2, v2.h[1] -; CHECK-NEXT: fcvt s16, h16 -; CHECK-NEXT: fcvtzs x8, s4 -; CHECK-NEXT: fcvtzs x9, s17 -; CHECK-NEXT: fcvt s4, h18 -; CHECK-NEXT: fcvt s17, h5 -; CHECK-NEXT: fcvtzs x10, s3 -; CHECK-NEXT: mov h3, v5.h[1] -; CHECK-NEXT: fcvt s5, h0 -; CHECK-NEXT: mov h0, v0.h[1] -; CHECK-NEXT: mov h18, v6.h[1] -; CHECK-NEXT: mov h19, v7.h[1] -; CHECK-NEXT: fcvtzs x11, s16 -; CHECK-NEXT: mov h16, v1.h[1] -; CHECK-NEXT: fcvt s1, h1 -; CHECK-NEXT: fcvtzs x12, s4 -; CHECK-NEXT: fcvt s4, h6 -; CHECK-NEXT: fcvtzs x13, s17 -; CHECK-NEXT: fcvtzs x14, s5 -; CHECK-NEXT: fcvt s5, h7 -; CHECK-NEXT: fcvt s3, h3 -; CHECK-NEXT: fcvt s7, h2 -; CHECK-NEXT: fcvt s17, h0 -; CHECK-NEXT: fcvt s18, h18 -; CHECK-NEXT: fcvt s16, h16 -; CHECK-NEXT: fcvt s19, h19 -; CHECK-NEXT: fcvtzs x15, s1 -; CHECK-NEXT: fmov d2, x8 -; CHECK-NEXT: fcvtzs x8, s4 -; CHECK-NEXT: fmov d0, x10 -; CHECK-NEXT: fcvtzs x10, s5 -; CHECK-NEXT: fmov d6, x9 -; CHECK-NEXT: fcvtzs x9, s3 -; CHECK-NEXT: fmov d1, x13 -; CHECK-NEXT: fcvtzs x13, s17 -; CHECK-NEXT: fcvtzs x17, s7 -; CHECK-NEXT: fcvtzs x16, s16 -; CHECK-NEXT: fcvtzs x18, s18 -; CHECK-NEXT: fcvtzs x0, s19 -; CHECK-NEXT: fmov d3, x14 -; CHECK-NEXT: fmov d4, x15 -; CHECK-NEXT: fmov d5, x8 -; CHECK-NEXT: fmov d7, x10 -; CHECK-NEXT: mov v0.d[1], x12 -; CHECK-NEXT: mov v1.d[1], x9 -; CHECK-NEXT: mov v2.d[1], x11 -; CHECK-NEXT: mov v6.d[1], x17 -; CHECK-NEXT: mov v3.d[1], x13 -; CHECK-NEXT: mov v4.d[1], x16 -; CHECK-NEXT: mov v5.d[1], x18 -; CHECK-NEXT: mov v7.d[1], x0 -; CHECK-NEXT: ret - %a = call <16 x i64> @llvm.lrint.v16i64.v16f16(<16 x half> %x) - ret <16 x i64> %a +define <16 x iXLen> @lrint_v16i64_v16f16(<16 x half> %x) { +; CHECK-i32-LABEL: lrint_v16i64_v16f16: +; CHECK-i32: // %bb.0: +; CHECK-i32-NEXT: ext v2.16b, v0.16b, v0.16b, #8 +; CHECK-i32-NEXT: ext v3.16b, v1.16b, v1.16b, #8 +; CHECK-i32-NEXT: mov h18, v0.h[1] +; CHECK-i32-NEXT: mov h19, v1.h[1] +; CHECK-i32-NEXT: fcvt s20, h0 +; CHECK-i32-NEXT: mov h21, v0.h[2] +; CHECK-i32-NEXT: mov h0, v0.h[3] +; CHECK-i32-NEXT: mov h4, v2.h[1] +; CHECK-i32-NEXT: mov h5, v2.h[2] +; CHECK-i32-NEXT: fcvt s6, h2 +; CHECK-i32-NEXT: fcvt s7, h3 +; CHECK-i32-NEXT: mov h16, v3.h[1] +; CHECK-i32-NEXT: mov h17, v3.h[2] +; CHECK-i32-NEXT: fcvt s18, h18 +; CHECK-i32-NEXT: fcvt s19, h19 +; CHECK-i32-NEXT: mov h2, v2.h[3] +; CHECK-i32-NEXT: fcvt s4, h4 +; CHECK-i32-NEXT: fcvt s5, h5 +; CHECK-i32-NEXT: frintx s6, s6 +; CHECK-i32-NEXT: frintx s7, s7 +; CHECK-i32-NEXT: fcvt s16, h16 +; CHECK-i32-NEXT: fcvt s17, h17 +; CHECK-i32-NEXT: frintx s18, s18 +; CHECK-i32-NEXT: fcvt s2, h2 +; CHECK-i32-NEXT: frintx s4, s4 +; CHECK-i32-NEXT: frintx s5, s5 +; CHECK-i32-NEXT: fcvtzs w8, s6 +; CHECK-i32-NEXT: fcvt s6, h1 +; CHECK-i32-NEXT: fcvtzs w9, s7 +; CHECK-i32-NEXT: mov h7, v1.h[2] +; CHECK-i32-NEXT: frintx s16, s16 +; CHECK-i32-NEXT: fcvtzs w15, s18 +; CHECK-i32-NEXT: fcvtzs w10, s4 +; CHECK-i32-NEXT: frintx s4, s17 +; CHECK-i32-NEXT: fcvtzs w11, s5 +; CHECK-i32-NEXT: frintx s5, s20 +; CHECK-i32-NEXT: fcvt s17, h21 +; CHECK-i32-NEXT: frintx s6, s6 +; CHECK-i32-NEXT: fcvtzs w12, s16 +; CHECK-i32-NEXT: frintx s16, s19 +; CHECK-i32-NEXT: fcvt s7, h7 +; CHECK-i32-NEXT: mov h19, v1.h[3] +; CHECK-i32-NEXT: fmov s1, w8 +; CHECK-i32-NEXT: fcvtzs w13, s4 +; CHECK-i32-NEXT: mov h4, v3.h[3] +; CHECK-i32-NEXT: fmov s3, w9 +; CHECK-i32-NEXT: fcvtzs w14, s5 +; CHECK-i32-NEXT: frintx s5, s17 +; CHECK-i32-NEXT: fcvtzs w16, s6 +; CHECK-i32-NEXT: fcvt s17, h0 +; CHECK-i32-NEXT: fcvtzs w8, s16 +; CHECK-i32-NEXT: frintx s6, s7 +; CHECK-i32-NEXT: fcvt s7, h19 +; CHECK-i32-NEXT: mov v1.s[1], w10 +; CHECK-i32-NEXT: mov v3.s[1], w12 +; CHECK-i32-NEXT: fcvt s4, h4 +; CHECK-i32-NEXT: fcvtzs w9, s5 +; CHECK-i32-NEXT: fmov s0, w14 +; CHECK-i32-NEXT: frintx s5, s2 +; CHECK-i32-NEXT: fmov s2, w16 +; CHECK-i32-NEXT: frintx s16, s17 +; CHECK-i32-NEXT: fcvtzs w10, s6 +; CHECK-i32-NEXT: frintx s6, s7 +; CHECK-i32-NEXT: mov v1.s[2], w11 +; CHECK-i32-NEXT: mov v3.s[2], w13 +; CHECK-i32-NEXT: mov v0.s[1], w15 +; CHECK-i32-NEXT: frintx s4, s4 +; CHECK-i32-NEXT: mov v2.s[1], w8 +; CHECK-i32-NEXT: fcvtzs w8, s5 +; CHECK-i32-NEXT: fcvtzs w12, s16 +; CHECK-i32-NEXT: mov v0.s[2], w9 +; CHECK-i32-NEXT: fcvtzs w9, s4 +; CHECK-i32-NEXT: mov v2.s[2], w10 +; CHECK-i32-NEXT: fcvtzs w10, s6 +; CHECK-i32-NEXT: mov v1.s[3], w8 +; CHECK-i32-NEXT: mov v0.s[3], w12 +; CHECK-i32-NEXT: mov v3.s[3], w9 +; CHECK-i32-NEXT: mov v2.s[3], w10 +; CHECK-i32-NEXT: ret +; +; CHECK-i64-LABEL: lrint_v16i64_v16f16: +; CHECK-i64: // %bb.0: +; CHECK-i64-NEXT: ext v2.16b, v0.16b, v0.16b, #8 +; CHECK-i64-NEXT: ext v3.16b, v1.16b, v1.16b, #8 +; CHECK-i64-NEXT: mov h17, v0.h[1] +; CHECK-i64-NEXT: mov h19, v0.h[2] +; CHECK-i64-NEXT: fcvt s18, h0 +; CHECK-i64-NEXT: mov h0, v0.h[3] +; CHECK-i64-NEXT: mov h4, v2.h[1] +; CHECK-i64-NEXT: mov h5, v2.h[2] +; CHECK-i64-NEXT: fcvt s7, h3 +; CHECK-i64-NEXT: fcvt s6, h2 +; CHECK-i64-NEXT: mov h16, v3.h[2] +; CHECK-i64-NEXT: mov h2, v2.h[3] +; CHECK-i64-NEXT: fcvt s17, h17 +; CHECK-i64-NEXT: fcvt s19, h19 +; CHECK-i64-NEXT: frintx s18, s18 +; CHECK-i64-NEXT: fcvt s0, h0 +; CHECK-i64-NEXT: fcvt s4, h4 +; CHECK-i64-NEXT: fcvt s5, h5 +; CHECK-i64-NEXT: frintx s7, s7 +; CHECK-i64-NEXT: frintx s6, s6 +; CHECK-i64-NEXT: fcvt s16, h16 +; CHECK-i64-NEXT: fcvt s2, h2 +; CHECK-i64-NEXT: frintx s17, s17 +; CHECK-i64-NEXT: frintx s19, s19 +; CHECK-i64-NEXT: fcvtzs x13, s18 +; CHECK-i64-NEXT: frintx s0, s0 +; CHECK-i64-NEXT: frintx s4, s4 +; CHECK-i64-NEXT: frintx s5, s5 +; CHECK-i64-NEXT: fcvtzs x9, s7 +; CHECK-i64-NEXT: mov h7, v1.h[2] +; CHECK-i64-NEXT: fcvtzs x8, s6 +; CHECK-i64-NEXT: mov h6, v1.h[1] +; CHECK-i64-NEXT: frintx s16, s16 +; CHECK-i64-NEXT: fcvtzs x14, s17 +; CHECK-i64-NEXT: fcvtzs x15, s19 +; CHECK-i64-NEXT: fcvtzs x10, s4 +; CHECK-i64-NEXT: mov h4, v3.h[1] +; CHECK-i64-NEXT: fcvtzs x11, s5 +; CHECK-i64-NEXT: mov h5, v1.h[3] +; CHECK-i64-NEXT: mov h3, v3.h[3] +; CHECK-i64-NEXT: fcvt s1, h1 +; CHECK-i64-NEXT: fcvt s7, h7 +; CHECK-i64-NEXT: fcvt s6, h6 +; CHECK-i64-NEXT: fcvtzs x12, s16 +; CHECK-i64-NEXT: frintx s16, s2 +; CHECK-i64-NEXT: fmov d2, x8 +; CHECK-i64-NEXT: fcvt s4, h4 +; CHECK-i64-NEXT: fcvt s3, h3 +; CHECK-i64-NEXT: fcvt s5, h5 +; CHECK-i64-NEXT: frintx s1, s1 +; CHECK-i64-NEXT: frintx s7, s7 +; CHECK-i64-NEXT: frintx s17, s6 +; CHECK-i64-NEXT: fmov d6, x9 +; CHECK-i64-NEXT: mov v2.d[1], x10 +; CHECK-i64-NEXT: frintx s4, s4 +; CHECK-i64-NEXT: frintx s18, s3 +; CHECK-i64-NEXT: frintx s5, s5 +; CHECK-i64-NEXT: fcvtzs x8, s1 +; CHECK-i64-NEXT: fcvtzs x9, s7 +; CHECK-i64-NEXT: fmov d3, x11 +; CHECK-i64-NEXT: fcvtzs x11, s0 +; CHECK-i64-NEXT: fmov d7, x12 +; CHECK-i64-NEXT: fcvtzs x12, s16 +; CHECK-i64-NEXT: fcvtzs x16, s17 +; CHECK-i64-NEXT: fcvtzs x17, s4 +; CHECK-i64-NEXT: fmov d0, x13 +; CHECK-i64-NEXT: fmov d1, x15 +; CHECK-i64-NEXT: fcvtzs x18, s18 +; CHECK-i64-NEXT: fcvtzs x0, s5 +; CHECK-i64-NEXT: fmov d4, x8 +; CHECK-i64-NEXT: fmov d5, x9 +; CHECK-i64-NEXT: mov v0.d[1], x14 +; CHECK-i64-NEXT: mov v1.d[1], x11 +; CHECK-i64-NEXT: mov v3.d[1], x12 +; CHECK-i64-NEXT: mov v4.d[1], x16 +; CHECK-i64-NEXT: mov v6.d[1], x17 +; CHECK-i64-NEXT: mov v7.d[1], x18 +; CHECK-i64-NEXT: mov v5.d[1], x0 +; CHECK-i64-NEXT: ret + %a = call <16 x iXLen> @llvm.lrint.v16iXLen.v16f16(<16 x half> %x) + ret <16 x iXLen> %a } -declare <16 x i64> @llvm.lrint.v16i64.v16f16(<16 x half>) +declare <16 x iXLen> @llvm.lrint.v16iXLen.v16f16(<16 x half>) -define <32 x i64> @lrint_v32i64_v32f16(<32 x half> %x) { -; CHECK-LABEL: lrint_v32i64_v32f16: -; CHECK: // %bb.0: -; CHECK-NEXT: dup v4.2s, v1.s[1] -; CHECK-NEXT: fcvtl v5.4s, v0.4h -; CHECK-NEXT: dup v6.2s, v1.s[3] -; CHECK-NEXT: fcvtl v7.4s, v1.4h -; CHECK-NEXT: dup v16.2s, v2.s[3] -; CHECK-NEXT: fcvtl v17.4s, v2.4h -; CHECK-NEXT: dup v19.2s, v2.s[1] -; CHECK-NEXT: dup v18.2s, v0.s[1] -; CHECK-NEXT: dup v21.2s, v3.s[1] -; CHECK-NEXT: dup v24.2s, v3.s[3] -; CHECK-NEXT: fcvtl2 v1.4s, v1.8h -; CHECK-NEXT: fcvtl2 v2.4s, v2.8h -; CHECK-NEXT: fcvtl v4.4s, v4.4h -; CHECK-NEXT: frintx v5.4s, v5.4s -; CHECK-NEXT: fcvtl v6.4s, v6.4h -; CHECK-NEXT: frintx v7.4s, v7.4s -; CHECK-NEXT: fcvtl v16.4s, v16.4h -; CHECK-NEXT: frintx v22.4s, v17.4s -; CHECK-NEXT: fcvtl v19.4s, v19.4h -; CHECK-NEXT: dup v17.2s, v0.s[3] -; CHECK-NEXT: fcvtl v21.4s, v21.4h -; CHECK-NEXT: fcvtl v24.4s, v24.4h -; CHECK-NEXT: frintx v1.4s, v1.4s -; CHECK-NEXT: frintx v2.4s, v2.4s -; CHECK-NEXT: frintx v20.4s, v4.4s -; CHECK-NEXT: fcvtn v4.4h, v5.4s -; CHECK-NEXT: frintx v23.4s, v6.4s -; CHECK-NEXT: fcvtn v5.4h, v7.4s -; CHECK-NEXT: frintx v25.4s, v16.4s -; CHECK-NEXT: fcvtn v16.4h, v22.4s -; CHECK-NEXT: frintx v26.4s, v19.4s -; CHECK-NEXT: fcvtn v6.4h, v20.4s -; CHECK-NEXT: fcvtl v20.4s, v3.4h -; CHECK-NEXT: fcvt s22, h4 -; CHECK-NEXT: fcvtn v7.4h, v23.4s -; CHECK-NEXT: fcvtl2 v23.4s, v3.8h -; CHECK-NEXT: fcvtl v3.4s, v18.4h -; CHECK-NEXT: fcvtn v25.4h, v25.4s -; CHECK-NEXT: fcvt s27, h5 -; CHECK-NEXT: fcvtl v18.4s, v17.4h -; CHECK-NEXT: frintx v17.4s, v21.4s -; CHECK-NEXT: fcvt s29, h16 -; CHECK-NEXT: mov h16, v16.h[1] -; CHECK-NEXT: frintx v20.4s, v20.4s -; CHECK-NEXT: fcvtzs x9, s22 -; CHECK-NEXT: fcvt s28, h6 -; CHECK-NEXT: fcvt s22, h7 -; CHECK-NEXT: frintx v19.4s, v3.4s -; CHECK-NEXT: fcvtn v3.4h, v26.4s -; CHECK-NEXT: mov h21, v25.h[1] -; CHECK-NEXT: frintx v23.4s, v23.4s -; CHECK-NEXT: fcvtzs x10, s27 -; CHECK-NEXT: fcvtl2 v26.4s, v0.8h -; CHECK-NEXT: fcvt s25, h25 -; CHECK-NEXT: fcvtn v17.4h, v17.4s -; CHECK-NEXT: fcvtn v20.4h, v20.4s -; CHECK-NEXT: fcvtzs x12, s28 -; CHECK-NEXT: fcvtzs x14, s29 -; CHECK-NEXT: fcvtzs x13, s22 -; CHECK-NEXT: frintx v22.4s, v24.4s -; CHECK-NEXT: fcvt s24, h3 -; CHECK-NEXT: fcvt s21, h21 -; CHECK-NEXT: fcvtn v23.4h, v23.4s -; CHECK-NEXT: fmov d0, x10 -; CHECK-NEXT: fcvtzs x15, s25 -; CHECK-NEXT: mov h25, v17.h[1] -; CHECK-NEXT: fcvt s17, h17 -; CHECK-NEXT: mov h27, v20.h[1] -; CHECK-NEXT: fcvt s20, h20 -; CHECK-NEXT: fcvtn v28.4h, v2.4s -; CHECK-NEXT: fcvtn v22.4h, v22.4s -; CHECK-NEXT: fcvtzs x10, s24 -; CHECK-NEXT: frintx v24.4s, v26.4s -; CHECK-NEXT: fcvtzs x11, s21 -; CHECK-NEXT: mov h26, v23.h[1] -; CHECK-NEXT: fcvt s23, h23 -; CHECK-NEXT: fcvt s25, h25 -; CHECK-NEXT: fmov d2, x13 -; CHECK-NEXT: fcvtzs x13, s17 -; CHECK-NEXT: fcvt s21, h27 -; CHECK-NEXT: fcvtzs x16, s20 -; CHECK-NEXT: fcvtn v27.4h, v1.4s -; CHECK-NEXT: mov h20, v22.h[1] -; CHECK-NEXT: fcvt s22, h22 -; CHECK-NEXT: fcvtn v24.4h, v24.4s -; CHECK-NEXT: fmov d1, x12 -; CHECK-NEXT: fcvtzs x0, s23 -; CHECK-NEXT: fmov d17, x14 -; CHECK-NEXT: fcvtzs x18, s25 -; CHECK-NEXT: mov h25, v28.h[1] -; CHECK-NEXT: fcvt s23, h28 -; CHECK-NEXT: fcvtzs x12, s21 -; CHECK-NEXT: fcvt s21, h26 -; CHECK-NEXT: fcvt s26, h27 -; CHECK-NEXT: fcvt s20, h20 -; CHECK-NEXT: fcvtzs x17, s22 -; CHECK-NEXT: fcvt s22, h24 -; CHECK-NEXT: frintx v18.4s, v18.4s -; CHECK-NEXT: mov h3, v3.h[1] -; CHECK-NEXT: mov h7, v7.h[1] -; CHECK-NEXT: fcvt s25, h25 -; CHECK-NEXT: fcvtn v19.4h, v19.4s -; CHECK-NEXT: fcvt s16, h16 -; CHECK-NEXT: fcvtzs x14, s21 -; CHECK-NEXT: fmov d21, x15 -; CHECK-NEXT: mov h5, v5.h[1] -; CHECK-NEXT: fcvtzs x15, s20 -; CHECK-NEXT: fmov d20, x16 -; CHECK-NEXT: fcvtzs x16, s22 -; CHECK-NEXT: fmov d22, x17 -; CHECK-NEXT: fcvtzs x17, s26 -; CHECK-NEXT: fmov d26, x0 -; CHECK-NEXT: fcvtn v18.4h, v18.4s -; CHECK-NEXT: mov h6, v6.h[1] -; CHECK-NEXT: fcvt s3, h3 -; CHECK-NEXT: mov v20.d[1], x12 -; CHECK-NEXT: fcvtzs x12, s25 -; CHECK-NEXT: fcvt s7, h7 -; CHECK-NEXT: mov v26.d[1], x14 -; CHECK-NEXT: mov v22.d[1], x15 -; CHECK-NEXT: fcvtzs x14, s23 -; CHECK-NEXT: fmov d23, x13 -; CHECK-NEXT: mov v21.d[1], x11 -; CHECK-NEXT: mov h4, v4.h[1] -; CHECK-NEXT: mov h25, v19.h[1] -; CHECK-NEXT: fcvt s6, h6 -; CHECK-NEXT: fcvtzs x11, s3 -; CHECK-NEXT: fcvt s5, h5 -; CHECK-NEXT: fcvt s19, h19 -; CHECK-NEXT: fcvtzs x13, s7 -; CHECK-NEXT: stp q26, q22, [x8, #224] -; CHECK-NEXT: mov v23.d[1], x18 -; CHECK-NEXT: mov h26, v27.h[1] -; CHECK-NEXT: fmov d22, x14 -; CHECK-NEXT: fcvt s4, h4 -; CHECK-NEXT: fmov d3, x16 -; CHECK-NEXT: fcvt s7, h25 -; CHECK-NEXT: fcvtzs x14, s6 -; CHECK-NEXT: mov v2.d[1], x13 -; CHECK-NEXT: stp q20, q23, [x8, #192] -; CHECK-NEXT: fcvt s23, h26 -; CHECK-NEXT: mov v22.d[1], x12 -; CHECK-NEXT: fmov d20, x10 -; CHECK-NEXT: fcvtzs x10, s16 -; CHECK-NEXT: mov h16, v24.h[1] -; CHECK-NEXT: mov h24, v18.h[1] -; CHECK-NEXT: fcvt s18, h18 -; CHECK-NEXT: mov v1.d[1], x14 -; CHECK-NEXT: fcvtzs x14, s7 -; CHECK-NEXT: stp q22, q21, [x8, #160] -; CHECK-NEXT: fcvtzs x12, s23 -; CHECK-NEXT: fmov d21, x17 -; CHECK-NEXT: fcvt s16, h16 -; CHECK-NEXT: mov v20.d[1], x11 -; CHECK-NEXT: fcvtzs x11, s5 -; CHECK-NEXT: fcvt s22, h24 -; CHECK-NEXT: mov v17.d[1], x10 -; CHECK-NEXT: fcvtzs x10, s18 -; CHECK-NEXT: mov v21.d[1], x12 -; CHECK-NEXT: fcvtzs x12, s19 -; CHECK-NEXT: fcvtzs x15, s16 -; CHECK-NEXT: mov v0.d[1], x11 -; CHECK-NEXT: fcvtzs x11, s4 -; CHECK-NEXT: stp q17, q20, [x8, #128] -; CHECK-NEXT: fcvtzs x13, s22 -; CHECK-NEXT: fmov d4, x10 -; CHECK-NEXT: stp q21, q2, [x8, #96] -; CHECK-NEXT: fmov d5, x12 -; CHECK-NEXT: fmov d2, x9 -; CHECK-NEXT: stp q0, q1, [x8, #64] -; CHECK-NEXT: mov v3.d[1], x15 -; CHECK-NEXT: mov v4.d[1], x13 -; CHECK-NEXT: mov v5.d[1], x14 -; CHECK-NEXT: mov v2.d[1], x11 -; CHECK-NEXT: stp q3, q4, [x8, #32] -; CHECK-NEXT: stp q2, q5, [x8] -; CHECK-NEXT: ret - %a = call <32 x i64> @llvm.lrint.v32i64.v32f16(<32 x half> %x) - ret <32 x i64> %a +define <32 x iXLen> @lrint_v32i64_v32f16(<32 x half> %x) { +; CHECK-i32-LABEL: lrint_v32i64_v32f16: +; CHECK-i32: // %bb.0: +; CHECK-i32-NEXT: ext v5.16b, v0.16b, v0.16b, #8 +; CHECK-i32-NEXT: ext v4.16b, v1.16b, v1.16b, #8 +; CHECK-i32-NEXT: ext v17.16b, v2.16b, v2.16b, #8 +; CHECK-i32-NEXT: mov h6, v5.h[1] +; CHECK-i32-NEXT: fcvt s7, h5 +; CHECK-i32-NEXT: mov h16, v5.h[2] +; CHECK-i32-NEXT: mov h5, v5.h[3] +; CHECK-i32-NEXT: mov h18, v4.h[1] +; CHECK-i32-NEXT: mov h20, v4.h[3] +; CHECK-i32-NEXT: mov h19, v4.h[2] +; CHECK-i32-NEXT: fcvt s21, h4 +; CHECK-i32-NEXT: mov h23, v17.h[1] +; CHECK-i32-NEXT: ext v4.16b, v3.16b, v3.16b, #8 +; CHECK-i32-NEXT: fcvt s22, h17 +; CHECK-i32-NEXT: fcvt s6, h6 +; CHECK-i32-NEXT: frintx s7, s7 +; CHECK-i32-NEXT: fcvt s16, h16 +; CHECK-i32-NEXT: fcvt s5, h5 +; CHECK-i32-NEXT: fcvt s18, h18 +; CHECK-i32-NEXT: fcvt s20, h20 +; CHECK-i32-NEXT: fcvt s19, h19 +; CHECK-i32-NEXT: frintx s22, s22 +; CHECK-i32-NEXT: frintx s6, s6 +; CHECK-i32-NEXT: fcvtzs w12, s7 +; CHECK-i32-NEXT: frintx s7, s16 +; CHECK-i32-NEXT: frintx s5, s5 +; CHECK-i32-NEXT: frintx s16, s21 +; CHECK-i32-NEXT: fcvt s21, h23 +; CHECK-i32-NEXT: frintx s18, s18 +; CHECK-i32-NEXT: frintx s20, s20 +; CHECK-i32-NEXT: frintx s19, s19 +; CHECK-i32-NEXT: fcvtzs w15, s22 +; CHECK-i32-NEXT: mov h22, v1.h[2] +; CHECK-i32-NEXT: fcvtzs w17, s6 +; CHECK-i32-NEXT: mov h6, v17.h[2] +; CHECK-i32-NEXT: mov h17, v17.h[3] +; CHECK-i32-NEXT: fcvtzs w9, s7 +; CHECK-i32-NEXT: mov h7, v4.h[2] +; CHECK-i32-NEXT: fcvtzs w8, s5 +; CHECK-i32-NEXT: mov h5, v4.h[1] +; CHECK-i32-NEXT: fcvtzs w13, s16 +; CHECK-i32-NEXT: frintx s16, s21 +; CHECK-i32-NEXT: fcvtzs w14, s18 +; CHECK-i32-NEXT: fcvtzs w10, s20 +; CHECK-i32-NEXT: fcvt s18, h4 +; CHECK-i32-NEXT: fcvt s6, h6 +; CHECK-i32-NEXT: fcvt s17, h17 +; CHECK-i32-NEXT: mov h20, v0.h[2] +; CHECK-i32-NEXT: fcvt s7, h7 +; CHECK-i32-NEXT: fcvtzs w11, s19 +; CHECK-i32-NEXT: mov h19, v0.h[1] +; CHECK-i32-NEXT: fcvt s5, h5 +; CHECK-i32-NEXT: fcvtzs w0, s16 +; CHECK-i32-NEXT: mov h21, v1.h[1] +; CHECK-i32-NEXT: frintx s18, s18 +; CHECK-i32-NEXT: mov h4, v4.h[3] +; CHECK-i32-NEXT: frintx s6, s6 +; CHECK-i32-NEXT: frintx s16, s17 +; CHECK-i32-NEXT: mov h17, v0.h[3] +; CHECK-i32-NEXT: fcvt s0, h0 +; CHECK-i32-NEXT: fcvt s19, h19 +; CHECK-i32-NEXT: frintx s5, s5 +; CHECK-i32-NEXT: fcvtzs w2, s18 +; CHECK-i32-NEXT: fcvt s18, h21 +; CHECK-i32-NEXT: fcvt s21, h2 +; CHECK-i32-NEXT: fcvtzs w18, s6 +; CHECK-i32-NEXT: frintx s6, s7 +; CHECK-i32-NEXT: fcvt s7, h20 +; CHECK-i32-NEXT: fcvtzs w16, s16 +; CHECK-i32-NEXT: fcvt s16, h17 +; CHECK-i32-NEXT: fcvt s17, h1 +; CHECK-i32-NEXT: frintx s0, s0 +; CHECK-i32-NEXT: fcvtzs w3, s5 +; CHECK-i32-NEXT: frintx s5, s19 +; CHECK-i32-NEXT: fcvt s19, h22 +; CHECK-i32-NEXT: mov h1, v1.h[3] +; CHECK-i32-NEXT: fcvtzs w1, s6 +; CHECK-i32-NEXT: frintx s6, s7 +; CHECK-i32-NEXT: mov h7, v2.h[1] +; CHECK-i32-NEXT: frintx s17, s17 +; CHECK-i32-NEXT: frintx s20, s16 +; CHECK-i32-NEXT: fmov s16, w12 +; CHECK-i32-NEXT: fcvtzs w4, s0 +; CHECK-i32-NEXT: frintx s0, s18 +; CHECK-i32-NEXT: fcvtzs w5, s5 +; CHECK-i32-NEXT: frintx s5, s19 +; CHECK-i32-NEXT: frintx s18, s21 +; CHECK-i32-NEXT: fcvt s19, h3 +; CHECK-i32-NEXT: fcvtzs w12, s6 +; CHECK-i32-NEXT: fcvt s6, h7 +; CHECK-i32-NEXT: mov h7, v3.h[1] +; CHECK-i32-NEXT: fcvtzs w6, s17 +; CHECK-i32-NEXT: fmov s17, w13 +; CHECK-i32-NEXT: mov v16.s[1], w17 +; CHECK-i32-NEXT: fcvtzs w17, s20 +; CHECK-i32-NEXT: fcvtzs w7, s0 +; CHECK-i32-NEXT: mov h0, v2.h[2] +; CHECK-i32-NEXT: mov h20, v3.h[2] +; CHECK-i32-NEXT: fcvtzs w13, s5 +; CHECK-i32-NEXT: fmov s5, w15 +; CHECK-i32-NEXT: frintx s6, s6 +; CHECK-i32-NEXT: fcvt s7, h7 +; CHECK-i32-NEXT: mov v17.s[1], w14 +; CHECK-i32-NEXT: fcvtzs w14, s18 +; CHECK-i32-NEXT: frintx s18, s19 +; CHECK-i32-NEXT: mov h2, v2.h[3] +; CHECK-i32-NEXT: fcvt s0, h0 +; CHECK-i32-NEXT: mov h3, v3.h[3] +; CHECK-i32-NEXT: mov v5.s[1], w0 +; CHECK-i32-NEXT: fcvt s19, h20 +; CHECK-i32-NEXT: fcvt s1, h1 +; CHECK-i32-NEXT: mov v16.s[2], w9 +; CHECK-i32-NEXT: fcvtzs w15, s6 +; CHECK-i32-NEXT: frintx s6, s7 +; CHECK-i32-NEXT: fmov s7, w2 +; CHECK-i32-NEXT: fcvtzs w0, s18 +; CHECK-i32-NEXT: fcvt s20, h2 +; CHECK-i32-NEXT: fcvt s18, h4 +; CHECK-i32-NEXT: frintx s21, s0 +; CHECK-i32-NEXT: fcvt s3, h3 +; CHECK-i32-NEXT: fmov s0, w4 +; CHECK-i32-NEXT: frintx s19, s19 +; CHECK-i32-NEXT: fmov s2, w6 +; CHECK-i32-NEXT: fmov s4, w14 +; CHECK-i32-NEXT: fcvtzs w2, s6 +; CHECK-i32-NEXT: mov v7.s[1], w3 +; CHECK-i32-NEXT: frintx s1, s1 +; CHECK-i32-NEXT: fmov s6, w0 +; CHECK-i32-NEXT: mov v0.s[1], w5 +; CHECK-i32-NEXT: frintx s20, s20 +; CHECK-i32-NEXT: mov v2.s[1], w7 +; CHECK-i32-NEXT: fcvtzs w3, s21 +; CHECK-i32-NEXT: mov v4.s[1], w15 +; CHECK-i32-NEXT: fcvtzs w14, s19 +; CHECK-i32-NEXT: frintx s18, s18 +; CHECK-i32-NEXT: frintx s3, s3 +; CHECK-i32-NEXT: mov v6.s[1], w2 +; CHECK-i32-NEXT: mov v17.s[2], w11 +; CHECK-i32-NEXT: fcvtzs w15, s1 +; CHECK-i32-NEXT: fcvtzs w0, s20 +; CHECK-i32-NEXT: mov v5.s[2], w18 +; CHECK-i32-NEXT: mov v0.s[2], w12 +; CHECK-i32-NEXT: mov v7.s[2], w1 +; CHECK-i32-NEXT: mov v2.s[2], w13 +; CHECK-i32-NEXT: mov v4.s[2], w3 +; CHECK-i32-NEXT: fcvtzs w9, s18 +; CHECK-i32-NEXT: fcvtzs w11, s3 +; CHECK-i32-NEXT: mov v16.s[3], w8 +; CHECK-i32-NEXT: mov v6.s[2], w14 +; CHECK-i32-NEXT: mov v17.s[3], w10 +; CHECK-i32-NEXT: mov v0.s[3], w17 +; CHECK-i32-NEXT: mov v5.s[3], w16 +; CHECK-i32-NEXT: mov v2.s[3], w15 +; CHECK-i32-NEXT: mov v4.s[3], w0 +; CHECK-i32-NEXT: mov v7.s[3], w9 +; CHECK-i32-NEXT: mov v1.16b, v16.16b +; CHECK-i32-NEXT: mov v6.s[3], w11 +; CHECK-i32-NEXT: mov v3.16b, v17.16b +; CHECK-i32-NEXT: ret +; +; CHECK-i64-LABEL: lrint_v32i64_v32f16: +; CHECK-i64: // %bb.0: +; CHECK-i64-NEXT: ext v4.16b, v1.16b, v1.16b, #8 +; CHECK-i64-NEXT: ext v5.16b, v2.16b, v2.16b, #8 +; CHECK-i64-NEXT: ext v6.16b, v3.16b, v3.16b, #8 +; CHECK-i64-NEXT: ext v7.16b, v0.16b, v0.16b, #8 +; CHECK-i64-NEXT: mov h19, v0.h[1] +; CHECK-i64-NEXT: fcvt s21, h0 +; CHECK-i64-NEXT: mov h23, v1.h[2] +; CHECK-i64-NEXT: fcvt s22, h1 +; CHECK-i64-NEXT: fcvt s26, h2 +; CHECK-i64-NEXT: mov h27, v2.h[1] +; CHECK-i64-NEXT: mov h28, v2.h[2] +; CHECK-i64-NEXT: mov h16, v4.h[2] +; CHECK-i64-NEXT: fcvt s17, h5 +; CHECK-i64-NEXT: mov h18, v5.h[2] +; CHECK-i64-NEXT: mov h20, v6.h[2] +; CHECK-i64-NEXT: fcvt s24, h7 +; CHECK-i64-NEXT: fcvt s25, h6 +; CHECK-i64-NEXT: fcvt s19, h19 +; CHECK-i64-NEXT: frintx s22, s22 +; CHECK-i64-NEXT: fcvt s16, h16 +; CHECK-i64-NEXT: frintx s17, s17 +; CHECK-i64-NEXT: fcvt s18, h18 +; CHECK-i64-NEXT: fcvt s20, h20 +; CHECK-i64-NEXT: frintx s16, s16 +; CHECK-i64-NEXT: fcvtzs x12, s17 +; CHECK-i64-NEXT: frintx s17, s18 +; CHECK-i64-NEXT: frintx s18, s21 +; CHECK-i64-NEXT: fcvt s21, h23 +; CHECK-i64-NEXT: frintx s23, s24 +; CHECK-i64-NEXT: frintx s24, s25 +; CHECK-i64-NEXT: frintx s25, s19 +; CHECK-i64-NEXT: mov h19, v7.h[1] +; CHECK-i64-NEXT: fcvtzs x13, s16 +; CHECK-i64-NEXT: frintx s16, s20 +; CHECK-i64-NEXT: frintx s20, s26 +; CHECK-i64-NEXT: fcvtzs x9, s23 +; CHECK-i64-NEXT: mov h23, v3.h[2] +; CHECK-i64-NEXT: fcvt s26, h27 +; CHECK-i64-NEXT: fcvtzs x15, s24 +; CHECK-i64-NEXT: fcvtzs x10, s25 +; CHECK-i64-NEXT: fcvt s24, h28 +; CHECK-i64-NEXT: mov h25, v3.h[3] +; CHECK-i64-NEXT: fcvtzs x14, s17 +; CHECK-i64-NEXT: frintx s21, s21 +; CHECK-i64-NEXT: fmov d17, x12 +; CHECK-i64-NEXT: fcvtzs x12, s16 +; CHECK-i64-NEXT: fmov d16, x13 +; CHECK-i64-NEXT: fcvtzs x13, s22 +; CHECK-i64-NEXT: fcvt s22, h3 +; CHECK-i64-NEXT: mov h3, v3.h[1] +; CHECK-i64-NEXT: mov h27, v0.h[2] +; CHECK-i64-NEXT: mov h28, v2.h[3] +; CHECK-i64-NEXT: fcvt s23, h23 +; CHECK-i64-NEXT: frintx s26, s26 +; CHECK-i64-NEXT: fcvtzs x16, s20 +; CHECK-i64-NEXT: frintx s20, s24 +; CHECK-i64-NEXT: fcvt s24, h25 +; CHECK-i64-NEXT: fcvtzs x11, s18 +; CHECK-i64-NEXT: fmov d18, x14 +; CHECK-i64-NEXT: fcvtzs x14, s21 +; CHECK-i64-NEXT: frintx s22, s22 +; CHECK-i64-NEXT: fcvt s3, h3 +; CHECK-i64-NEXT: fcvt s25, h27 +; CHECK-i64-NEXT: fcvt s27, h28 +; CHECK-i64-NEXT: frintx s23, s23 +; CHECK-i64-NEXT: mov h21, v1.h[3] +; CHECK-i64-NEXT: fmov d2, x15 +; CHECK-i64-NEXT: fcvtzs x15, s26 +; CHECK-i64-NEXT: fmov d26, x13 +; CHECK-i64-NEXT: mov h1, v1.h[1] +; CHECK-i64-NEXT: fcvtzs x13, s20 +; CHECK-i64-NEXT: frintx s20, s24 +; CHECK-i64-NEXT: fmov d24, x14 +; CHECK-i64-NEXT: fcvtzs x14, s22 +; CHECK-i64-NEXT: frintx s3, s3 +; CHECK-i64-NEXT: fmov d22, x16 +; CHECK-i64-NEXT: frintx s27, s27 +; CHECK-i64-NEXT: fcvtzs x16, s23 +; CHECK-i64-NEXT: fcvt s21, h21 +; CHECK-i64-NEXT: frintx s25, s25 +; CHECK-i64-NEXT: fcvt s1, h1 +; CHECK-i64-NEXT: mov h0, v0.h[3] +; CHECK-i64-NEXT: mov h23, v7.h[2] +; CHECK-i64-NEXT: mov v22.d[1], x15 +; CHECK-i64-NEXT: fcvtzs x15, s20 +; CHECK-i64-NEXT: fmov d20, x13 +; CHECK-i64-NEXT: fcvtzs x13, s3 +; CHECK-i64-NEXT: fmov d3, x14 +; CHECK-i64-NEXT: fcvtzs x14, s27 +; CHECK-i64-NEXT: fmov d27, x16 +; CHECK-i64-NEXT: frintx s21, s21 +; CHECK-i64-NEXT: mov h7, v7.h[3] +; CHECK-i64-NEXT: frintx s1, s1 +; CHECK-i64-NEXT: fcvt s0, h0 +; CHECK-i64-NEXT: fcvt s23, h23 +; CHECK-i64-NEXT: fcvt s19, h19 +; CHECK-i64-NEXT: mov v27.d[1], x15 +; CHECK-i64-NEXT: fcvtzs x15, s25 +; CHECK-i64-NEXT: mov h25, v6.h[3] +; CHECK-i64-NEXT: mov h6, v6.h[1] +; CHECK-i64-NEXT: mov v3.d[1], x13 +; CHECK-i64-NEXT: fcvtzs x13, s21 +; CHECK-i64-NEXT: mov h21, v5.h[1] +; CHECK-i64-NEXT: mov h5, v5.h[3] +; CHECK-i64-NEXT: mov v20.d[1], x14 +; CHECK-i64-NEXT: fcvtzs x14, s1 +; CHECK-i64-NEXT: mov h1, v4.h[1] +; CHECK-i64-NEXT: frintx s0, s0 +; CHECK-i64-NEXT: fcvt s25, h25 +; CHECK-i64-NEXT: fcvt s7, h7 +; CHECK-i64-NEXT: stp q3, q27, [x8, #192] +; CHECK-i64-NEXT: fcvt s6, h6 +; CHECK-i64-NEXT: mov h3, v4.h[3] +; CHECK-i64-NEXT: stp q22, q20, [x8, #128] +; CHECK-i64-NEXT: fcvt s21, h21 +; CHECK-i64-NEXT: fcvt s5, h5 +; CHECK-i64-NEXT: mov v24.d[1], x13 +; CHECK-i64-NEXT: mov v26.d[1], x14 +; CHECK-i64-NEXT: fcvt s4, h4 +; CHECK-i64-NEXT: frintx s22, s25 +; CHECK-i64-NEXT: fmov d20, x12 +; CHECK-i64-NEXT: fcvt s1, h1 +; CHECK-i64-NEXT: frintx s6, s6 +; CHECK-i64-NEXT: fcvt s3, h3 +; CHECK-i64-NEXT: fcvtzs x12, s0 +; CHECK-i64-NEXT: frintx s5, s5 +; CHECK-i64-NEXT: frintx s21, s21 +; CHECK-i64-NEXT: fmov d0, x11 +; CHECK-i64-NEXT: stp q26, q24, [x8, #64] +; CHECK-i64-NEXT: fmov d24, x15 +; CHECK-i64-NEXT: frintx s4, s4 +; CHECK-i64-NEXT: fcvtzs x11, s22 +; CHECK-i64-NEXT: frintx s22, s23 +; CHECK-i64-NEXT: frintx s1, s1 +; CHECK-i64-NEXT: fcvtzs x13, s6 +; CHECK-i64-NEXT: frintx s3, s3 +; CHECK-i64-NEXT: frintx s6, s7 +; CHECK-i64-NEXT: fcvtzs x14, s5 +; CHECK-i64-NEXT: mov v24.d[1], x12 +; CHECK-i64-NEXT: frintx s5, s19 +; CHECK-i64-NEXT: fcvtzs x12, s21 +; CHECK-i64-NEXT: mov v0.d[1], x10 +; CHECK-i64-NEXT: fcvtzs x10, s4 +; CHECK-i64-NEXT: mov v20.d[1], x11 +; CHECK-i64-NEXT: fcvtzs x11, s22 +; CHECK-i64-NEXT: mov v2.d[1], x13 +; CHECK-i64-NEXT: fcvtzs x15, s3 +; CHECK-i64-NEXT: fcvtzs x13, s1 +; CHECK-i64-NEXT: mov v18.d[1], x14 +; CHECK-i64-NEXT: fcvtzs x14, s6 +; CHECK-i64-NEXT: stp q0, q24, [x8] +; CHECK-i64-NEXT: mov v17.d[1], x12 +; CHECK-i64-NEXT: fcvtzs x12, s5 +; CHECK-i64-NEXT: fmov d0, x10 +; CHECK-i64-NEXT: fmov d1, x11 +; CHECK-i64-NEXT: stp q2, q20, [x8, #224] +; CHECK-i64-NEXT: fmov d2, x9 +; CHECK-i64-NEXT: mov v16.d[1], x15 +; CHECK-i64-NEXT: stp q17, q18, [x8, #160] +; CHECK-i64-NEXT: mov v0.d[1], x13 +; CHECK-i64-NEXT: mov v1.d[1], x14 +; CHECK-i64-NEXT: mov v2.d[1], x12 +; CHECK-i64-NEXT: stp q0, q16, [x8, #96] +; CHECK-i64-NEXT: stp q2, q1, [x8, #32] +; CHECK-i64-NEXT: ret + %a = call <32 x iXLen> @llvm.lrint.v32iXLen.v32f16(<32 x half> %x) + ret <32 x iXLen> %a } -declare <32 x i64> @llvm.lrint.v32i64.v32f16(<32 x half>) +declare <32 x iXLen> @llvm.lrint.v32iXLen.v32f16(<32 x half>) -define <1 x i64> @lrint_v1f32(<1 x float> %x) { -; CHECK-SD-LABEL: lrint_v1f32: -; CHECK-SD: // %bb.0: -; CHECK-SD-NEXT: frintx v0.2s, v0.2s -; CHECK-SD-NEXT: fcvtl v0.2d, v0.2s -; CHECK-SD-NEXT: fcvtzs v0.2d, v0.2d -; CHECK-SD-NEXT: // kill: def $d0 killed $d0 killed $q0 -; CHECK-SD-NEXT: ret +define <1 x iXLen> @lrint_v1f32(<1 x float> %x) { +; CHECK-i32-LABEL: lrint_v1f32: +; CHECK-i32: // %bb.0: +; CHECK-i32-NEXT: frintx v0.2s, v0.2s +; CHECK-i32-NEXT: fcvtzs v0.2s, v0.2s +; CHECK-i32-NEXT: ret +; +; CHECK-i64-SD-LABEL: lrint_v1f32: +; CHECK-i64-SD: // %bb.0: +; CHECK-i64-SD-NEXT: // kill: def $d0 killed $d0 def $q0 +; CHECK-i64-SD-NEXT: frintx s0, s0 +; CHECK-i64-SD-NEXT: fcvtzs x8, s0 +; CHECK-i64-SD-NEXT: fmov d0, x8 +; CHECK-i64-SD-NEXT: ret ; -; CHECK-GI-LABEL: lrint_v1f32: -; CHECK-GI: // %bb.0: -; CHECK-GI-NEXT: frintx s0, s0 -; CHECK-GI-NEXT: fcvtzs x8, s0 -; CHECK-GI-NEXT: fmov d0, x8 -; CHECK-GI-NEXT: ret - %a = call <1 x i64> @llvm.lrint.v1i64.v1f32(<1 x float> %x) - ret <1 x i64> %a +; CHECK-i64-GI-LABEL: lrint_v1f32: +; CHECK-i64-GI: // %bb.0: +; CHECK-i64-GI-NEXT: frintx s0, s0 +; CHECK-i64-GI-NEXT: fcvtzs x8, s0 +; CHECK-i64-GI-NEXT: fmov d0, x8 +; CHECK-i64-GI-NEXT: ret + %a = call <1 x iXLen> @llvm.lrint.v1iXLen.v1f32(<1 x float> %x) + ret <1 x iXLen> %a } -declare <1 x i64> @llvm.lrint.v1i64.v1f32(<1 x float>) +declare <1 x iXLen> @llvm.lrint.v1iXLen.v1f32(<1 x float>) -define <2 x i64> @lrint_v2f32(<2 x float> %x) { -; CHECK-LABEL: lrint_v2f32: -; CHECK: // %bb.0: -; CHECK-NEXT: frintx v0.2s, v0.2s -; CHECK-NEXT: fcvtl v0.2d, v0.2s -; CHECK-NEXT: fcvtzs v0.2d, v0.2d -; CHECK-NEXT: ret - %a = call <2 x i64> @llvm.lrint.v2i64.v2f32(<2 x float> %x) - ret <2 x i64> %a +define <2 x iXLen> @lrint_v2f32(<2 x float> %x) { +; CHECK-i32-LABEL: lrint_v2f32: +; CHECK-i32: // %bb.0: +; CHECK-i32-NEXT: frintx v0.2s, v0.2s +; CHECK-i32-NEXT: fcvtzs v0.2s, v0.2s +; CHECK-i32-NEXT: ret +; +; CHECK-i64-LABEL: lrint_v2f32: +; CHECK-i64: // %bb.0: +; CHECK-i64-NEXT: frintx v0.2s, v0.2s +; CHECK-i64-NEXT: mov s1, v0.s[1] +; CHECK-i64-NEXT: fcvtzs x8, s0 +; CHECK-i64-NEXT: fcvtzs x9, s1 +; CHECK-i64-NEXT: fmov d0, x8 +; CHECK-i64-NEXT: mov v0.d[1], x9 +; CHECK-i64-NEXT: ret + %a = call <2 x iXLen> @llvm.lrint.v2iXLen.v2f32(<2 x float> %x) + ret <2 x iXLen> %a } -declare <2 x i64> @llvm.lrint.v2i64.v2f32(<2 x float>) +declare <2 x iXLen> @llvm.lrint.v2iXLen.v2f32(<2 x float>) -define <4 x i64> @lrint_v4f32(<4 x float> %x) { -; CHECK-LABEL: lrint_v4f32: -; CHECK: // %bb.0: -; CHECK-NEXT: ext v1.16b, v0.16b, v0.16b, #8 -; CHECK-NEXT: frintx v0.2s, v0.2s -; CHECK-NEXT: frintx v1.2s, v1.2s -; CHECK-NEXT: fcvtl v0.2d, v0.2s -; CHECK-NEXT: fcvtl v1.2d, v1.2s -; CHECK-NEXT: fcvtzs v0.2d, v0.2d -; CHECK-NEXT: fcvtzs v1.2d, v1.2d -; CHECK-NEXT: ret - %a = call <4 x i64> @llvm.lrint.v4i64.v4f32(<4 x float> %x) - ret <4 x i64> %a +define <4 x iXLen> @lrint_v4f32(<4 x float> %x) { +; CHECK-i32-LABEL: lrint_v4f32: +; CHECK-i32: // %bb.0: +; CHECK-i32-NEXT: frintx v0.4s, v0.4s +; CHECK-i32-NEXT: fcvtzs v0.4s, v0.4s +; CHECK-i32-NEXT: ret +; +; CHECK-i64-LABEL: lrint_v4f32: +; CHECK-i64: // %bb.0: +; CHECK-i64-NEXT: ext v1.16b, v0.16b, v0.16b, #8 +; CHECK-i64-NEXT: frintx v0.2s, v0.2s +; CHECK-i64-NEXT: frintx v1.2s, v1.2s +; CHECK-i64-NEXT: mov s2, v0.s[1] +; CHECK-i64-NEXT: fcvtzs x8, s0 +; CHECK-i64-NEXT: mov s3, v1.s[1] +; CHECK-i64-NEXT: fcvtzs x9, s1 +; CHECK-i64-NEXT: fcvtzs x10, s2 +; CHECK-i64-NEXT: fmov d0, x8 +; CHECK-i64-NEXT: fcvtzs x11, s3 +; CHECK-i64-NEXT: fmov d1, x9 +; CHECK-i64-NEXT: mov v0.d[1], x10 +; CHECK-i64-NEXT: mov v1.d[1], x11 +; CHECK-i64-NEXT: ret + %a = call <4 x iXLen> @llvm.lrint.v4iXLen.v4f32(<4 x float> %x) + ret <4 x iXLen> %a } -declare <4 x i64> @llvm.lrint.v4i64.v4f32(<4 x float>) +declare <4 x iXLen> @llvm.lrint.v4iXLen.v4f32(<4 x float>) -define <8 x i64> @lrint_v8f32(<8 x float> %x) { -; CHECK-LABEL: lrint_v8f32: -; CHECK: // %bb.0: -; CHECK-NEXT: ext v2.16b, v0.16b, v0.16b, #8 -; CHECK-NEXT: ext v3.16b, v1.16b, v1.16b, #8 -; CHECK-NEXT: frintx v0.2s, v0.2s -; CHECK-NEXT: frintx v1.2s, v1.2s -; CHECK-NEXT: frintx v2.2s, v2.2s -; CHECK-NEXT: frintx v3.2s, v3.2s -; CHECK-NEXT: fcvtl v0.2d, v0.2s -; CHECK-NEXT: fcvtl v1.2d, v1.2s -; CHECK-NEXT: fcvtl v4.2d, v2.2s -; CHECK-NEXT: fcvtl v3.2d, v3.2s -; CHECK-NEXT: fcvtzs v0.2d, v0.2d -; CHECK-NEXT: fcvtzs v2.2d, v1.2d -; CHECK-NEXT: fcvtzs v1.2d, v4.2d -; CHECK-NEXT: fcvtzs v3.2d, v3.2d -; CHECK-NEXT: ret - %a = call <8 x i64> @llvm.lrint.v8i64.v8f32(<8 x float> %x) - ret <8 x i64> %a +define <8 x iXLen> @lrint_v8f32(<8 x float> %x) { +; CHECK-i32-LABEL: lrint_v8f32: +; CHECK-i32: // %bb.0: +; CHECK-i32-NEXT: frintx v0.4s, v0.4s +; CHECK-i32-NEXT: frintx v1.4s, v1.4s +; CHECK-i32-NEXT: fcvtzs v0.4s, v0.4s +; CHECK-i32-NEXT: fcvtzs v1.4s, v1.4s +; CHECK-i32-NEXT: ret +; +; CHECK-i64-LABEL: lrint_v8f32: +; CHECK-i64: // %bb.0: +; CHECK-i64-NEXT: ext v2.16b, v0.16b, v0.16b, #8 +; CHECK-i64-NEXT: ext v3.16b, v1.16b, v1.16b, #8 +; CHECK-i64-NEXT: frintx v0.2s, v0.2s +; CHECK-i64-NEXT: frintx v1.2s, v1.2s +; CHECK-i64-NEXT: frintx v2.2s, v2.2s +; CHECK-i64-NEXT: frintx v3.2s, v3.2s +; CHECK-i64-NEXT: mov s4, v0.s[1] +; CHECK-i64-NEXT: mov s5, v1.s[1] +; CHECK-i64-NEXT: fcvtzs x8, s0 +; CHECK-i64-NEXT: fcvtzs x10, s1 +; CHECK-i64-NEXT: mov s6, v2.s[1] +; CHECK-i64-NEXT: mov s7, v3.s[1] +; CHECK-i64-NEXT: fcvtzs x11, s2 +; CHECK-i64-NEXT: fcvtzs x12, s3 +; CHECK-i64-NEXT: fcvtzs x9, s4 +; CHECK-i64-NEXT: fcvtzs x13, s5 +; CHECK-i64-NEXT: fmov d0, x8 +; CHECK-i64-NEXT: fmov d2, x10 +; CHECK-i64-NEXT: fcvtzs x14, s6 +; CHECK-i64-NEXT: fcvtzs x15, s7 +; CHECK-i64-NEXT: fmov d1, x11 +; CHECK-i64-NEXT: fmov d3, x12 +; CHECK-i64-NEXT: mov v0.d[1], x9 +; CHECK-i64-NEXT: mov v2.d[1], x13 +; CHECK-i64-NEXT: mov v1.d[1], x14 +; CHECK-i64-NEXT: mov v3.d[1], x15 +; CHECK-i64-NEXT: ret + %a = call <8 x iXLen> @llvm.lrint.v8iXLen.v8f32(<8 x float> %x) + ret <8 x iXLen> %a } -declare <8 x i64> @llvm.lrint.v8i64.v8f32(<8 x float>) +declare <8 x iXLen> @llvm.lrint.v8iXLen.v8f32(<8 x float>) -define <16 x i64> @lrint_v16i64_v16f32(<16 x float> %x) { -; CHECK-LABEL: lrint_v16i64_v16f32: -; CHECK: // %bb.0: -; CHECK-NEXT: ext v4.16b, v1.16b, v1.16b, #8 -; CHECK-NEXT: ext v5.16b, v0.16b, v0.16b, #8 -; CHECK-NEXT: ext v6.16b, v2.16b, v2.16b, #8 -; CHECK-NEXT: ext v7.16b, v3.16b, v3.16b, #8 -; CHECK-NEXT: frintx v0.2s, v0.2s -; CHECK-NEXT: frintx v1.2s, v1.2s -; CHECK-NEXT: frintx v2.2s, v2.2s -; CHECK-NEXT: frintx v3.2s, v3.2s -; CHECK-NEXT: frintx v5.2s, v5.2s -; CHECK-NEXT: frintx v4.2s, v4.2s -; CHECK-NEXT: frintx v6.2s, v6.2s -; CHECK-NEXT: frintx v7.2s, v7.2s -; CHECK-NEXT: fcvtl v0.2d, v0.2s -; CHECK-NEXT: fcvtl v1.2d, v1.2s -; CHECK-NEXT: fcvtl v16.2d, v2.2s -; CHECK-NEXT: fcvtl v18.2d, v3.2s -; CHECK-NEXT: fcvtl v5.2d, v5.2s -; CHECK-NEXT: fcvtl v17.2d, v4.2s -; CHECK-NEXT: fcvtl v19.2d, v6.2s -; CHECK-NEXT: fcvtl v7.2d, v7.2s -; CHECK-NEXT: fcvtzs v0.2d, v0.2d -; CHECK-NEXT: fcvtzs v2.2d, v1.2d -; CHECK-NEXT: fcvtzs v4.2d, v16.2d -; CHECK-NEXT: fcvtzs v6.2d, v18.2d -; CHECK-NEXT: fcvtzs v1.2d, v5.2d -; CHECK-NEXT: fcvtzs v3.2d, v17.2d -; CHECK-NEXT: fcvtzs v5.2d, v19.2d -; CHECK-NEXT: fcvtzs v7.2d, v7.2d -; CHECK-NEXT: ret - %a = call <16 x i64> @llvm.lrint.v16i64.v16f32(<16 x float> %x) - ret <16 x i64> %a +define <16 x iXLen> @lrint_v16i64_v16f32(<16 x float> %x) { +; CHECK-i32-LABEL: lrint_v16i64_v16f32: +; CHECK-i32: // %bb.0: +; CHECK-i32-NEXT: frintx v0.4s, v0.4s +; CHECK-i32-NEXT: frintx v1.4s, v1.4s +; CHECK-i32-NEXT: frintx v2.4s, v2.4s +; CHECK-i32-NEXT: frintx v3.4s, v3.4s +; CHECK-i32-NEXT: fcvtzs v0.4s, v0.4s +; CHECK-i32-NEXT: fcvtzs v1.4s, v1.4s +; CHECK-i32-NEXT: fcvtzs v2.4s, v2.4s +; CHECK-i32-NEXT: fcvtzs v3.4s, v3.4s +; CHECK-i32-NEXT: ret +; +; CHECK-i64-LABEL: lrint_v16i64_v16f32: +; CHECK-i64: // %bb.0: +; CHECK-i64-NEXT: frintx v4.2s, v0.2s +; CHECK-i64-NEXT: frintx v5.2s, v1.2s +; CHECK-i64-NEXT: ext v0.16b, v0.16b, v0.16b, #8 +; CHECK-i64-NEXT: ext v1.16b, v1.16b, v1.16b, #8 +; CHECK-i64-NEXT: ext v6.16b, v2.16b, v2.16b, #8 +; CHECK-i64-NEXT: ext v7.16b, v3.16b, v3.16b, #8 +; CHECK-i64-NEXT: frintx v2.2s, v2.2s +; CHECK-i64-NEXT: frintx v3.2s, v3.2s +; CHECK-i64-NEXT: mov s16, v4.s[1] +; CHECK-i64-NEXT: mov s17, v5.s[1] +; CHECK-i64-NEXT: fcvtzs x8, s4 +; CHECK-i64-NEXT: frintx v0.2s, v0.2s +; CHECK-i64-NEXT: frintx v1.2s, v1.2s +; CHECK-i64-NEXT: fcvtzs x9, s5 +; CHECK-i64-NEXT: frintx v4.2s, v6.2s +; CHECK-i64-NEXT: frintx v5.2s, v7.2s +; CHECK-i64-NEXT: fcvtzs x10, s2 +; CHECK-i64-NEXT: mov s6, v2.s[1] +; CHECK-i64-NEXT: fcvtzs x13, s3 +; CHECK-i64-NEXT: mov s3, v3.s[1] +; CHECK-i64-NEXT: fcvtzs x11, s16 +; CHECK-i64-NEXT: fcvtzs x12, s17 +; CHECK-i64-NEXT: mov s7, v0.s[1] +; CHECK-i64-NEXT: mov s16, v1.s[1] +; CHECK-i64-NEXT: fcvtzs x15, s1 +; CHECK-i64-NEXT: mov s1, v4.s[1] +; CHECK-i64-NEXT: mov s17, v5.s[1] +; CHECK-i64-NEXT: fcvtzs x14, s0 +; CHECK-i64-NEXT: fmov d0, x8 +; CHECK-i64-NEXT: fcvtzs x8, s4 +; CHECK-i64-NEXT: fmov d4, x10 +; CHECK-i64-NEXT: fcvtzs x10, s5 +; CHECK-i64-NEXT: fmov d2, x9 +; CHECK-i64-NEXT: fcvtzs x9, s6 +; CHECK-i64-NEXT: fmov d6, x13 +; CHECK-i64-NEXT: fcvtzs x13, s7 +; CHECK-i64-NEXT: fcvtzs x16, s16 +; CHECK-i64-NEXT: fcvtzs x17, s3 +; CHECK-i64-NEXT: fcvtzs x18, s1 +; CHECK-i64-NEXT: fcvtzs x0, s17 +; CHECK-i64-NEXT: fmov d1, x14 +; CHECK-i64-NEXT: fmov d3, x15 +; CHECK-i64-NEXT: fmov d5, x8 +; CHECK-i64-NEXT: fmov d7, x10 +; CHECK-i64-NEXT: mov v0.d[1], x11 +; CHECK-i64-NEXT: mov v2.d[1], x12 +; CHECK-i64-NEXT: mov v4.d[1], x9 +; CHECK-i64-NEXT: mov v1.d[1], x13 +; CHECK-i64-NEXT: mov v3.d[1], x16 +; CHECK-i64-NEXT: mov v6.d[1], x17 +; CHECK-i64-NEXT: mov v5.d[1], x18 +; CHECK-i64-NEXT: mov v7.d[1], x0 +; CHECK-i64-NEXT: ret + %a = call <16 x iXLen> @llvm.lrint.v16iXLen.v16f32(<16 x float> %x) + ret <16 x iXLen> %a } -declare <16 x i64> @llvm.lrint.v16i64.v16f32(<16 x float>) +declare <16 x iXLen> @llvm.lrint.v16iXLen.v16f32(<16 x float>) -define <32 x i64> @lrint_v32i64_v32f32(<32 x float> %x) { -; CHECK-LABEL: lrint_v32i64_v32f32: -; CHECK: // %bb.0: -; CHECK-NEXT: ext v16.16b, v7.16b, v7.16b, #8 -; CHECK-NEXT: ext v17.16b, v6.16b, v6.16b, #8 -; CHECK-NEXT: frintx v7.2s, v7.2s -; CHECK-NEXT: frintx v6.2s, v6.2s -; CHECK-NEXT: ext v18.16b, v5.16b, v5.16b, #8 -; CHECK-NEXT: ext v21.16b, v4.16b, v4.16b, #8 -; CHECK-NEXT: ext v22.16b, v2.16b, v2.16b, #8 -; CHECK-NEXT: frintx v5.2s, v5.2s -; CHECK-NEXT: ext v23.16b, v3.16b, v3.16b, #8 -; CHECK-NEXT: frintx v4.2s, v4.2s -; CHECK-NEXT: ext v19.16b, v0.16b, v0.16b, #8 -; CHECK-NEXT: ext v20.16b, v1.16b, v1.16b, #8 -; CHECK-NEXT: frintx v16.2s, v16.2s -; CHECK-NEXT: frintx v17.2s, v17.2s -; CHECK-NEXT: fcvtl v7.2d, v7.2s -; CHECK-NEXT: fcvtl v6.2d, v6.2s -; CHECK-NEXT: frintx v18.2s, v18.2s -; CHECK-NEXT: frintx v21.2s, v21.2s -; CHECK-NEXT: frintx v2.2s, v2.2s -; CHECK-NEXT: frintx v3.2s, v3.2s -; CHECK-NEXT: fcvtl v5.2d, v5.2s -; CHECK-NEXT: frintx v23.2s, v23.2s -; CHECK-NEXT: fcvtl v4.2d, v4.2s -; CHECK-NEXT: frintx v1.2s, v1.2s -; CHECK-NEXT: fcvtl v16.2d, v16.2s -; CHECK-NEXT: fcvtl v17.2d, v17.2s -; CHECK-NEXT: fcvtzs v7.2d, v7.2d -; CHECK-NEXT: fcvtzs v6.2d, v6.2d -; CHECK-NEXT: fcvtl v18.2d, v18.2s -; CHECK-NEXT: fcvtl v21.2d, v21.2s -; CHECK-NEXT: frintx v20.2s, v20.2s -; CHECK-NEXT: fcvtl v3.2d, v3.2s -; CHECK-NEXT: fcvtzs v5.2d, v5.2d -; CHECK-NEXT: frintx v0.2s, v0.2s -; CHECK-NEXT: fcvtl v2.2d, v2.2s -; CHECK-NEXT: fcvtzs v4.2d, v4.2d -; CHECK-NEXT: fcvtzs v16.2d, v16.2d -; CHECK-NEXT: fcvtzs v17.2d, v17.2d -; CHECK-NEXT: fcvtl v1.2d, v1.2s -; CHECK-NEXT: fcvtzs v3.2d, v3.2d -; CHECK-NEXT: fcvtl v0.2d, v0.2s -; CHECK-NEXT: fcvtzs v2.2d, v2.2d -; CHECK-NEXT: stp q6, q17, [x8, #192] -; CHECK-NEXT: fcvtl v6.2d, v23.2s -; CHECK-NEXT: frintx v17.2s, v19.2s -; CHECK-NEXT: stp q7, q16, [x8, #224] -; CHECK-NEXT: frintx v7.2s, v22.2s -; CHECK-NEXT: fcvtzs v16.2d, v18.2d -; CHECK-NEXT: fcvtzs v18.2d, v21.2d -; CHECK-NEXT: fcvtzs v1.2d, v1.2d -; CHECK-NEXT: fcvtzs v0.2d, v0.2d -; CHECK-NEXT: fcvtzs v6.2d, v6.2d -; CHECK-NEXT: stp q5, q16, [x8, #160] -; CHECK-NEXT: fcvtl v7.2d, v7.2s -; CHECK-NEXT: fcvtl v5.2d, v20.2s -; CHECK-NEXT: stp q4, q18, [x8, #128] -; CHECK-NEXT: fcvtl v4.2d, v17.2s -; CHECK-NEXT: stp q3, q6, [x8, #96] -; CHECK-NEXT: fcvtzs v7.2d, v7.2d -; CHECK-NEXT: fcvtzs v3.2d, v5.2d -; CHECK-NEXT: stp q1, q3, [x8, #32] -; CHECK-NEXT: stp q2, q7, [x8, #64] -; CHECK-NEXT: fcvtzs v2.2d, v4.2d -; CHECK-NEXT: stp q0, q2, [x8] -; CHECK-NEXT: ret - %a = call <32 x i64> @llvm.lrint.v32i64.v32f32(<32 x float> %x) - ret <32 x i64> %a +define <32 x iXLen> @lrint_v32i64_v32f32(<32 x float> %x) { +; CHECK-i32-LABEL: lrint_v32i64_v32f32: +; CHECK-i32: // %bb.0: +; CHECK-i32-NEXT: frintx v0.4s, v0.4s +; CHECK-i32-NEXT: frintx v1.4s, v1.4s +; CHECK-i32-NEXT: frintx v2.4s, v2.4s +; CHECK-i32-NEXT: frintx v3.4s, v3.4s +; CHECK-i32-NEXT: frintx v4.4s, v4.4s +; CHECK-i32-NEXT: frintx v5.4s, v5.4s +; CHECK-i32-NEXT: frintx v6.4s, v6.4s +; CHECK-i32-NEXT: frintx v7.4s, v7.4s +; CHECK-i32-NEXT: fcvtzs v0.4s, v0.4s +; CHECK-i32-NEXT: fcvtzs v1.4s, v1.4s +; CHECK-i32-NEXT: fcvtzs v2.4s, v2.4s +; CHECK-i32-NEXT: fcvtzs v3.4s, v3.4s +; CHECK-i32-NEXT: fcvtzs v4.4s, v4.4s +; CHECK-i32-NEXT: fcvtzs v5.4s, v5.4s +; CHECK-i32-NEXT: fcvtzs v6.4s, v6.4s +; CHECK-i32-NEXT: fcvtzs v7.4s, v7.4s +; CHECK-i32-NEXT: ret +; +; CHECK-i64-LABEL: lrint_v32i64_v32f32: +; CHECK-i64: // %bb.0: +; CHECK-i64-NEXT: ext v17.16b, v3.16b, v3.16b, #8 +; CHECK-i64-NEXT: ext v18.16b, v4.16b, v4.16b, #8 +; CHECK-i64-NEXT: ext v19.16b, v5.16b, v5.16b, #8 +; CHECK-i64-NEXT: ext v21.16b, v7.16b, v7.16b, #8 +; CHECK-i64-NEXT: ext v16.16b, v2.16b, v2.16b, #8 +; CHECK-i64-NEXT: ext v20.16b, v6.16b, v6.16b, #8 +; CHECK-i64-NEXT: frintx v7.2s, v7.2s +; CHECK-i64-NEXT: frintx v24.2s, v6.2s +; CHECK-i64-NEXT: frintx v23.2s, v5.2s +; CHECK-i64-NEXT: frintx v4.2s, v4.2s +; CHECK-i64-NEXT: frintx v3.2s, v3.2s +; CHECK-i64-NEXT: frintx v17.2s, v17.2s +; CHECK-i64-NEXT: frintx v18.2s, v18.2s +; CHECK-i64-NEXT: frintx v22.2s, v19.2s +; CHECK-i64-NEXT: frintx v21.2s, v21.2s +; CHECK-i64-NEXT: frintx v16.2s, v16.2s +; CHECK-i64-NEXT: frintx v20.2s, v20.2s +; CHECK-i64-NEXT: mov s25, v7.s[1] +; CHECK-i64-NEXT: fcvtzs x15, s7 +; CHECK-i64-NEXT: frintx v19.2s, v1.2s +; CHECK-i64-NEXT: fcvtzs x16, s24 +; CHECK-i64-NEXT: ext v1.16b, v1.16b, v1.16b, #8 +; CHECK-i64-NEXT: fcvtzs x10, s17 +; CHECK-i64-NEXT: fcvtzs x11, s18 +; CHECK-i64-NEXT: mov s26, v22.s[1] +; CHECK-i64-NEXT: fcvtzs x12, s22 +; CHECK-i64-NEXT: mov s22, v21.s[1] +; CHECK-i64-NEXT: fcvtzs x14, s21 +; CHECK-i64-NEXT: mov s21, v24.s[1] +; CHECK-i64-NEXT: fcvtzs x9, s16 +; CHECK-i64-NEXT: fcvtzs x13, s20 +; CHECK-i64-NEXT: mov s20, v20.s[1] +; CHECK-i64-NEXT: fmov d24, x15 +; CHECK-i64-NEXT: mov s18, v18.s[1] +; CHECK-i64-NEXT: fmov d6, x10 +; CHECK-i64-NEXT: fmov d7, x11 +; CHECK-i64-NEXT: fcvtzs x10, s25 +; CHECK-i64-NEXT: fcvtzs x11, s22 +; CHECK-i64-NEXT: fmov d25, x12 +; CHECK-i64-NEXT: frintx v22.2s, v2.2s +; CHECK-i64-NEXT: fcvtzs x15, s21 +; CHECK-i64-NEXT: fmov d21, x14 +; CHECK-i64-NEXT: fmov d5, x9 +; CHECK-i64-NEXT: fcvtzs x9, s26 +; CHECK-i64-NEXT: fmov d26, x13 +; CHECK-i64-NEXT: fcvtzs x12, s20 +; CHECK-i64-NEXT: fcvtzs x13, s19 +; CHECK-i64-NEXT: mov s20, v23.s[1] +; CHECK-i64-NEXT: mov v24.d[1], x10 +; CHECK-i64-NEXT: mov v21.d[1], x11 +; CHECK-i64-NEXT: fcvtzs x11, s23 +; CHECK-i64-NEXT: fcvtzs x10, s22 +; CHECK-i64-NEXT: mov s17, v17.s[1] +; CHECK-i64-NEXT: frintx v1.2s, v1.2s +; CHECK-i64-NEXT: mov s22, v22.s[1] +; CHECK-i64-NEXT: mov v26.d[1], x12 +; CHECK-i64-NEXT: fcvtzs x12, s18 +; CHECK-i64-NEXT: mov v25.d[1], x9 +; CHECK-i64-NEXT: fmov d2, x13 +; CHECK-i64-NEXT: fcvtzs x13, s20 +; CHECK-i64-NEXT: fmov d20, x16 +; CHECK-i64-NEXT: stp q24, q21, [x8, #224] +; CHECK-i64-NEXT: ext v21.16b, v0.16b, v0.16b, #8 +; CHECK-i64-NEXT: fmov d18, x11 +; CHECK-i64-NEXT: fcvtzs x11, s4 +; CHECK-i64-NEXT: mov s4, v4.s[1] +; CHECK-i64-NEXT: fmov d23, x10 +; CHECK-i64-NEXT: mov v20.d[1], x15 +; CHECK-i64-NEXT: fcvtzs x10, s3 +; CHECK-i64-NEXT: mov s3, v3.s[1] +; CHECK-i64-NEXT: mov v18.d[1], x13 +; CHECK-i64-NEXT: frintx v0.2s, v0.2s +; CHECK-i64-NEXT: mov s16, v16.s[1] +; CHECK-i64-NEXT: frintx v21.2s, v21.2s +; CHECK-i64-NEXT: fcvtzs x13, s17 +; CHECK-i64-NEXT: fcvtzs x14, s22 +; CHECK-i64-NEXT: fcvtzs x9, s4 +; CHECK-i64-NEXT: fmov d4, x11 +; CHECK-i64-NEXT: mov v7.d[1], x12 +; CHECK-i64-NEXT: stp q20, q26, [x8, #192] +; CHECK-i64-NEXT: fmov d20, x10 +; CHECK-i64-NEXT: fcvtzs x10, s3 +; CHECK-i64-NEXT: stp q18, q25, [x8, #160] +; CHECK-i64-NEXT: mov s18, v19.s[1] +; CHECK-i64-NEXT: mov s3, v1.s[1] +; CHECK-i64-NEXT: mov s17, v0.s[1] +; CHECK-i64-NEXT: mov s19, v21.s[1] +; CHECK-i64-NEXT: fcvtzs x11, s21 +; CHECK-i64-NEXT: mov v4.d[1], x9 +; CHECK-i64-NEXT: fcvtzs x9, s16 +; CHECK-i64-NEXT: fcvtzs x12, s1 +; CHECK-i64-NEXT: mov v6.d[1], x13 +; CHECK-i64-NEXT: fcvtzs x13, s0 +; CHECK-i64-NEXT: mov v20.d[1], x10 +; CHECK-i64-NEXT: fcvtzs x15, s18 +; CHECK-i64-NEXT: fcvtzs x10, s3 +; CHECK-i64-NEXT: mov v23.d[1], x14 +; CHECK-i64-NEXT: fcvtzs x14, s17 +; CHECK-i64-NEXT: fmov d3, x11 +; CHECK-i64-NEXT: stp q4, q7, [x8, #128] +; CHECK-i64-NEXT: mov v5.d[1], x9 +; CHECK-i64-NEXT: fcvtzs x9, s19 +; CHECK-i64-NEXT: stp q20, q6, [x8, #96] +; CHECK-i64-NEXT: fmov d0, x12 +; CHECK-i64-NEXT: fmov d1, x13 +; CHECK-i64-NEXT: mov v2.d[1], x15 +; CHECK-i64-NEXT: stp q23, q5, [x8, #64] +; CHECK-i64-NEXT: mov v0.d[1], x10 +; CHECK-i64-NEXT: mov v1.d[1], x14 +; CHECK-i64-NEXT: mov v3.d[1], x9 +; CHECK-i64-NEXT: stp q2, q0, [x8, #32] +; CHECK-i64-NEXT: stp q1, q3, [x8] +; CHECK-i64-NEXT: ret + %a = call <32 x iXLen> @llvm.lrint.v32iXLen.v32f32(<32 x float> %x) + ret <32 x iXLen> %a } -declare <32 x i64> @llvm.lrint.v32i64.v32f32(<32 x float>) +declare <32 x iXLen> @llvm.lrint.v32iXLen.v32f32(<32 x float>) -define <1 x i64> @lrint_v1f64(<1 x double> %x) { -; CHECK-LABEL: lrint_v1f64: -; CHECK: // %bb.0: -; CHECK-NEXT: frintx d0, d0 -; CHECK-NEXT: fcvtzs x8, d0 -; CHECK-NEXT: fmov d0, x8 -; CHECK-NEXT: ret - %a = call <1 x i64> @llvm.lrint.v1i64.v1f64(<1 x double> %x) - ret <1 x i64> %a +define <1 x iXLen> @lrint_v1f64(<1 x double> %x) { +; CHECK-i32-LABEL: lrint_v1f64: +; CHECK-i32: // %bb.0: +; CHECK-i32-NEXT: frintx d0, d0 +; CHECK-i32-NEXT: fcvtzs w8, d0 +; CHECK-i32-NEXT: fmov s0, w8 +; CHECK-i32-NEXT: ret +; +; CHECK-i64-LABEL: lrint_v1f64: +; CHECK-i64: // %bb.0: +; CHECK-i64-NEXT: frintx d0, d0 +; CHECK-i64-NEXT: fcvtzs x8, d0 +; CHECK-i64-NEXT: fmov d0, x8 +; CHECK-i64-NEXT: ret + %a = call <1 x iXLen> @llvm.lrint.v1iXLen.v1f64(<1 x double> %x) + ret <1 x iXLen> %a } -declare <1 x i64> @llvm.lrint.v1i64.v1f64(<1 x double>) +declare <1 x iXLen> @llvm.lrint.v1iXLen.v1f64(<1 x double>) -define <2 x i64> @lrint_v2f64(<2 x double> %x) { -; CHECK-LABEL: lrint_v2f64: -; CHECK: // %bb.0: -; CHECK-NEXT: frintx v0.2d, v0.2d -; CHECK-NEXT: fcvtzs v0.2d, v0.2d -; CHECK-NEXT: ret - %a = call <2 x i64> @llvm.lrint.v2i64.v2f64(<2 x double> %x) - ret <2 x i64> %a +define <2 x iXLen> @lrint_v2f64(<2 x double> %x) { +; CHECK-i32-LABEL: lrint_v2f64: +; CHECK-i32: // %bb.0: +; CHECK-i32-NEXT: frintx v0.2d, v0.2d +; CHECK-i32-NEXT: mov d1, v0.d[1] +; CHECK-i32-NEXT: fcvtzs w8, d0 +; CHECK-i32-NEXT: fcvtzs w9, d1 +; CHECK-i32-NEXT: fmov s0, w8 +; CHECK-i32-NEXT: mov v0.s[1], w9 +; CHECK-i32-NEXT: // kill: def $d0 killed $d0 killed $q0 +; CHECK-i32-NEXT: ret +; +; CHECK-i64-LABEL: lrint_v2f64: +; CHECK-i64: // %bb.0: +; CHECK-i64-NEXT: frintx v0.2d, v0.2d +; CHECK-i64-NEXT: fcvtzs v0.2d, v0.2d +; CHECK-i64-NEXT: ret + %a = call <2 x iXLen> @llvm.lrint.v2iXLen.v2f64(<2 x double> %x) + ret <2 x iXLen> %a } -declare <2 x i64> @llvm.lrint.v2i64.v2f64(<2 x double>) +declare <2 x iXLen> @llvm.lrint.v2iXLen.v2f64(<2 x double>) -define <4 x i64> @lrint_v4f64(<4 x double> %x) { -; CHECK-LABEL: lrint_v4f64: -; CHECK: // %bb.0: -; CHECK-NEXT: frintx v0.2d, v0.2d -; CHECK-NEXT: frintx v1.2d, v1.2d -; CHECK-NEXT: fcvtzs v0.2d, v0.2d -; CHECK-NEXT: fcvtzs v1.2d, v1.2d -; CHECK-NEXT: ret - %a = call <4 x i64> @llvm.lrint.v4i64.v4f64(<4 x double> %x) - ret <4 x i64> %a +define <4 x iXLen> @lrint_v4f64(<4 x double> %x) { +; CHECK-i32-LABEL: lrint_v4f64: +; CHECK-i32: // %bb.0: +; CHECK-i32-NEXT: frintx v0.2d, v0.2d +; CHECK-i32-NEXT: frintx v1.2d, v1.2d +; CHECK-i32-NEXT: mov d2, v0.d[1] +; CHECK-i32-NEXT: fcvtzs w8, d0 +; CHECK-i32-NEXT: fcvtzs w9, d2 +; CHECK-i32-NEXT: fmov s0, w8 +; CHECK-i32-NEXT: fcvtzs w8, d1 +; CHECK-i32-NEXT: mov d1, v1.d[1] +; CHECK-i32-NEXT: mov v0.s[1], w9 +; CHECK-i32-NEXT: mov v0.s[2], w8 +; CHECK-i32-NEXT: fcvtzs w8, d1 +; CHECK-i32-NEXT: mov v0.s[3], w8 +; CHECK-i32-NEXT: ret +; +; CHECK-i64-LABEL: lrint_v4f64: +; CHECK-i64: // %bb.0: +; CHECK-i64-NEXT: frintx v0.2d, v0.2d +; CHECK-i64-NEXT: frintx v1.2d, v1.2d +; CHECK-i64-NEXT: fcvtzs v0.2d, v0.2d +; CHECK-i64-NEXT: fcvtzs v1.2d, v1.2d +; CHECK-i64-NEXT: ret + %a = call <4 x iXLen> @llvm.lrint.v4iXLen.v4f64(<4 x double> %x) + ret <4 x iXLen> %a } -declare <4 x i64> @llvm.lrint.v4i64.v4f64(<4 x double>) +declare <4 x iXLen> @llvm.lrint.v4iXLen.v4f64(<4 x double>) -define <8 x i64> @lrint_v8f64(<8 x double> %x) { -; CHECK-LABEL: lrint_v8f64: -; CHECK: // %bb.0: -; CHECK-NEXT: frintx v0.2d, v0.2d -; CHECK-NEXT: frintx v1.2d, v1.2d -; CHECK-NEXT: frintx v2.2d, v2.2d -; CHECK-NEXT: frintx v3.2d, v3.2d -; CHECK-NEXT: fcvtzs v0.2d, v0.2d -; CHECK-NEXT: fcvtzs v1.2d, v1.2d -; CHECK-NEXT: fcvtzs v2.2d, v2.2d -; CHECK-NEXT: fcvtzs v3.2d, v3.2d -; CHECK-NEXT: ret - %a = call <8 x i64> @llvm.lrint.v8i64.v8f64(<8 x double> %x) - ret <8 x i64> %a +define <8 x iXLen> @lrint_v8f64(<8 x double> %x) { +; CHECK-i32-LABEL: lrint_v8f64: +; CHECK-i32: // %bb.0: +; CHECK-i32-NEXT: frintx v2.2d, v2.2d +; CHECK-i32-NEXT: frintx v0.2d, v0.2d +; CHECK-i32-NEXT: frintx v3.2d, v3.2d +; CHECK-i32-NEXT: mov d4, v0.d[1] +; CHECK-i32-NEXT: mov d5, v2.d[1] +; CHECK-i32-NEXT: fcvtzs w8, d0 +; CHECK-i32-NEXT: fcvtzs w9, d2 +; CHECK-i32-NEXT: frintx v2.2d, v1.2d +; CHECK-i32-NEXT: fcvtzs w10, d4 +; CHECK-i32-NEXT: fcvtzs w11, d5 +; CHECK-i32-NEXT: fmov s0, w8 +; CHECK-i32-NEXT: fmov s1, w9 +; CHECK-i32-NEXT: fcvtzs w8, d2 +; CHECK-i32-NEXT: mov d2, v2.d[1] +; CHECK-i32-NEXT: fcvtzs w9, d3 +; CHECK-i32-NEXT: mov d3, v3.d[1] +; CHECK-i32-NEXT: mov v0.s[1], w10 +; CHECK-i32-NEXT: mov v1.s[1], w11 +; CHECK-i32-NEXT: mov v0.s[2], w8 +; CHECK-i32-NEXT: fcvtzs w8, d2 +; CHECK-i32-NEXT: mov v1.s[2], w9 +; CHECK-i32-NEXT: fcvtzs w9, d3 +; CHECK-i32-NEXT: mov v0.s[3], w8 +; CHECK-i32-NEXT: mov v1.s[3], w9 +; CHECK-i32-NEXT: ret +; +; CHECK-i64-LABEL: lrint_v8f64: +; CHECK-i64: // %bb.0: +; CHECK-i64-NEXT: frintx v0.2d, v0.2d +; CHECK-i64-NEXT: frintx v1.2d, v1.2d +; CHECK-i64-NEXT: frintx v2.2d, v2.2d +; CHECK-i64-NEXT: frintx v3.2d, v3.2d +; CHECK-i64-NEXT: fcvtzs v0.2d, v0.2d +; CHECK-i64-NEXT: fcvtzs v1.2d, v1.2d +; CHECK-i64-NEXT: fcvtzs v2.2d, v2.2d +; CHECK-i64-NEXT: fcvtzs v3.2d, v3.2d +; CHECK-i64-NEXT: ret + %a = call <8 x iXLen> @llvm.lrint.v8iXLen.v8f64(<8 x double> %x) + ret <8 x iXLen> %a } -declare <8 x i64> @llvm.lrint.v8i64.v8f64(<8 x double>) +declare <8 x iXLen> @llvm.lrint.v8iXLen.v8f64(<8 x double>) -define <16 x i64> @lrint_v16f64(<16 x double> %x) { -; CHECK-LABEL: lrint_v16f64: -; CHECK: // %bb.0: -; CHECK-NEXT: frintx v0.2d, v0.2d -; CHECK-NEXT: frintx v1.2d, v1.2d -; CHECK-NEXT: frintx v2.2d, v2.2d -; CHECK-NEXT: frintx v3.2d, v3.2d -; CHECK-NEXT: frintx v4.2d, v4.2d -; CHECK-NEXT: frintx v5.2d, v5.2d -; CHECK-NEXT: frintx v6.2d, v6.2d -; CHECK-NEXT: frintx v7.2d, v7.2d -; CHECK-NEXT: fcvtzs v0.2d, v0.2d -; CHECK-NEXT: fcvtzs v1.2d, v1.2d -; CHECK-NEXT: fcvtzs v2.2d, v2.2d -; CHECK-NEXT: fcvtzs v3.2d, v3.2d -; CHECK-NEXT: fcvtzs v4.2d, v4.2d -; CHECK-NEXT: fcvtzs v5.2d, v5.2d -; CHECK-NEXT: fcvtzs v6.2d, v6.2d -; CHECK-NEXT: fcvtzs v7.2d, v7.2d -; CHECK-NEXT: ret - %a = call <16 x i64> @llvm.lrint.v16i64.v16f64(<16 x double> %x) - ret <16 x i64> %a +define <16 x iXLen> @lrint_v16f64(<16 x double> %x) { +; CHECK-i32-LABEL: lrint_v16f64: +; CHECK-i32: // %bb.0: +; CHECK-i32-NEXT: frintx v0.2d, v0.2d +; CHECK-i32-NEXT: frintx v2.2d, v2.2d +; CHECK-i32-NEXT: frintx v4.2d, v4.2d +; CHECK-i32-NEXT: frintx v6.2d, v6.2d +; CHECK-i32-NEXT: frintx v17.2d, v1.2d +; CHECK-i32-NEXT: frintx v5.2d, v5.2d +; CHECK-i32-NEXT: fcvtzs w8, d0 +; CHECK-i32-NEXT: mov d16, v0.d[1] +; CHECK-i32-NEXT: fcvtzs w9, d2 +; CHECK-i32-NEXT: mov d2, v2.d[1] +; CHECK-i32-NEXT: fcvtzs w10, d4 +; CHECK-i32-NEXT: mov d4, v4.d[1] +; CHECK-i32-NEXT: fcvtzs w11, d6 +; CHECK-i32-NEXT: mov d6, v6.d[1] +; CHECK-i32-NEXT: fmov s0, w8 +; CHECK-i32-NEXT: fcvtzs w8, d16 +; CHECK-i32-NEXT: frintx v16.2d, v3.2d +; CHECK-i32-NEXT: fmov s1, w9 +; CHECK-i32-NEXT: fcvtzs w9, d2 +; CHECK-i32-NEXT: fmov s2, w10 +; CHECK-i32-NEXT: fcvtzs w10, d4 +; CHECK-i32-NEXT: frintx v4.2d, v7.2d +; CHECK-i32-NEXT: fmov s3, w11 +; CHECK-i32-NEXT: fcvtzs w11, d6 +; CHECK-i32-NEXT: mov d6, v17.d[1] +; CHECK-i32-NEXT: mov v0.s[1], w8 +; CHECK-i32-NEXT: fcvtzs w8, d17 +; CHECK-i32-NEXT: mov d7, v16.d[1] +; CHECK-i32-NEXT: mov v1.s[1], w9 +; CHECK-i32-NEXT: fcvtzs w9, d16 +; CHECK-i32-NEXT: mov v2.s[1], w10 +; CHECK-i32-NEXT: fcvtzs w10, d5 +; CHECK-i32-NEXT: mov d5, v5.d[1] +; CHECK-i32-NEXT: mov v3.s[1], w11 +; CHECK-i32-NEXT: fcvtzs w11, d4 +; CHECK-i32-NEXT: mov d4, v4.d[1] +; CHECK-i32-NEXT: mov v0.s[2], w8 +; CHECK-i32-NEXT: fcvtzs w8, d6 +; CHECK-i32-NEXT: mov v1.s[2], w9 +; CHECK-i32-NEXT: fcvtzs w9, d7 +; CHECK-i32-NEXT: mov v2.s[2], w10 +; CHECK-i32-NEXT: fcvtzs w10, d5 +; CHECK-i32-NEXT: mov v3.s[2], w11 +; CHECK-i32-NEXT: fcvtzs w11, d4 +; CHECK-i32-NEXT: mov v0.s[3], w8 +; CHECK-i32-NEXT: mov v1.s[3], w9 +; CHECK-i32-NEXT: mov v2.s[3], w10 +; CHECK-i32-NEXT: mov v3.s[3], w11 +; CHECK-i32-NEXT: ret +; +; CHECK-i64-LABEL: lrint_v16f64: +; CHECK-i64: // %bb.0: +; CHECK-i64-NEXT: frintx v0.2d, v0.2d +; CHECK-i64-NEXT: frintx v1.2d, v1.2d +; CHECK-i64-NEXT: frintx v2.2d, v2.2d +; CHECK-i64-NEXT: frintx v3.2d, v3.2d +; CHECK-i64-NEXT: frintx v4.2d, v4.2d +; CHECK-i64-NEXT: frintx v5.2d, v5.2d +; CHECK-i64-NEXT: frintx v6.2d, v6.2d +; CHECK-i64-NEXT: frintx v7.2d, v7.2d +; CHECK-i64-NEXT: fcvtzs v0.2d, v0.2d +; CHECK-i64-NEXT: fcvtzs v1.2d, v1.2d +; CHECK-i64-NEXT: fcvtzs v2.2d, v2.2d +; CHECK-i64-NEXT: fcvtzs v3.2d, v3.2d +; CHECK-i64-NEXT: fcvtzs v4.2d, v4.2d +; CHECK-i64-NEXT: fcvtzs v5.2d, v5.2d +; CHECK-i64-NEXT: fcvtzs v6.2d, v6.2d +; CHECK-i64-NEXT: fcvtzs v7.2d, v7.2d +; CHECK-i64-NEXT: ret + %a = call <16 x iXLen> @llvm.lrint.v16iXLen.v16f64(<16 x double> %x) + ret <16 x iXLen> %a } -declare <16 x i64> @llvm.lrint.v16i64.v16f64(<16 x double>) +declare <16 x iXLen> @llvm.lrint.v16iXLen.v16f64(<16 x double>) -define <32 x i64> @lrint_v32f64(<32 x double> %x) { -; CHECK-LABEL: lrint_v32f64: -; CHECK: // %bb.0: -; CHECK-NEXT: ldp q17, q16, [sp, #96] -; CHECK-NEXT: frintx v7.2d, v7.2d -; CHECK-NEXT: ldp q19, q18, [sp, #64] -; CHECK-NEXT: frintx v6.2d, v6.2d -; CHECK-NEXT: ldp q21, q20, [sp, #32] -; CHECK-NEXT: frintx v5.2d, v5.2d -; CHECK-NEXT: frintx v16.2d, v16.2d -; CHECK-NEXT: frintx v17.2d, v17.2d -; CHECK-NEXT: frintx v4.2d, v4.2d -; CHECK-NEXT: frintx v18.2d, v18.2d -; CHECK-NEXT: frintx v19.2d, v19.2d -; CHECK-NEXT: frintx v3.2d, v3.2d -; CHECK-NEXT: ldp q23, q22, [sp] -; CHECK-NEXT: frintx v20.2d, v20.2d -; CHECK-NEXT: frintx v21.2d, v21.2d -; CHECK-NEXT: frintx v2.2d, v2.2d -; CHECK-NEXT: frintx v1.2d, v1.2d -; CHECK-NEXT: fcvtzs v16.2d, v16.2d -; CHECK-NEXT: fcvtzs v17.2d, v17.2d -; CHECK-NEXT: frintx v0.2d, v0.2d -; CHECK-NEXT: frintx v22.2d, v22.2d -; CHECK-NEXT: fcvtzs v18.2d, v18.2d -; CHECK-NEXT: frintx v23.2d, v23.2d -; CHECK-NEXT: fcvtzs v19.2d, v19.2d -; CHECK-NEXT: fcvtzs v20.2d, v20.2d -; CHECK-NEXT: fcvtzs v7.2d, v7.2d -; CHECK-NEXT: fcvtzs v6.2d, v6.2d -; CHECK-NEXT: fcvtzs v5.2d, v5.2d -; CHECK-NEXT: fcvtzs v4.2d, v4.2d -; CHECK-NEXT: stp q17, q16, [x8, #224] -; CHECK-NEXT: fcvtzs v16.2d, v21.2d -; CHECK-NEXT: fcvtzs v3.2d, v3.2d -; CHECK-NEXT: fcvtzs v17.2d, v22.2d -; CHECK-NEXT: fcvtzs v2.2d, v2.2d -; CHECK-NEXT: fcvtzs v1.2d, v1.2d -; CHECK-NEXT: stp q19, q18, [x8, #192] -; CHECK-NEXT: fcvtzs v18.2d, v23.2d -; CHECK-NEXT: fcvtzs v0.2d, v0.2d -; CHECK-NEXT: stp q4, q5, [x8, #64] -; CHECK-NEXT: stp q6, q7, [x8, #96] -; CHECK-NEXT: stp q2, q3, [x8, #32] -; CHECK-NEXT: stp q0, q1, [x8] -; CHECK-NEXT: stp q18, q17, [x8, #128] -; CHECK-NEXT: stp q16, q20, [x8, #160] -; CHECK-NEXT: ret - %a = call <32 x i64> @llvm.lrint.v32i64.v16f64(<32 x double> %x) - ret <32 x i64> %a +define <32 x iXLen> @lrint_v32f64(<32 x double> %x) { +; CHECK-i32-LABEL: lrint_v32f64: +; CHECK-i32: // %bb.0: +; CHECK-i32-NEXT: frintx v17.2d, v0.2d +; CHECK-i32-NEXT: frintx v19.2d, v2.2d +; CHECK-i32-NEXT: frintx v0.2d, v1.2d +; CHECK-i32-NEXT: frintx v1.2d, v4.2d +; CHECK-i32-NEXT: frintx v2.2d, v3.2d +; CHECK-i32-NEXT: frintx v3.2d, v5.2d +; CHECK-i32-NEXT: ldp q16, q5, [sp] +; CHECK-i32-NEXT: frintx v18.2d, v6.2d +; CHECK-i32-NEXT: frintx v4.2d, v7.2d +; CHECK-i32-NEXT: ldp q22, q6, [sp, #64] +; CHECK-i32-NEXT: mov d20, v17.d[1] +; CHECK-i32-NEXT: mov d21, v19.d[1] +; CHECK-i32-NEXT: fcvtzs w8, d17 +; CHECK-i32-NEXT: fcvtzs w9, d19 +; CHECK-i32-NEXT: ldp q17, q7, [sp, #32] +; CHECK-i32-NEXT: fcvtzs w12, d0 +; CHECK-i32-NEXT: mov d19, v1.d[1] +; CHECK-i32-NEXT: fcvtzs w13, d1 +; CHECK-i32-NEXT: frintx v16.2d, v16.2d +; CHECK-i32-NEXT: mov d23, v18.d[1] +; CHECK-i32-NEXT: fcvtzs w15, d18 +; CHECK-i32-NEXT: fcvtzs w10, d20 +; CHECK-i32-NEXT: fcvtzs w11, d21 +; CHECK-i32-NEXT: mov d21, v0.d[1] +; CHECK-i32-NEXT: fmov s0, w8 +; CHECK-i32-NEXT: fmov s1, w9 +; CHECK-i32-NEXT: frintx v17.2d, v17.2d +; CHECK-i32-NEXT: frintx v20.2d, v22.2d +; CHECK-i32-NEXT: mov d22, v2.d[1] +; CHECK-i32-NEXT: fcvtzs w14, d19 +; CHECK-i32-NEXT: mov d18, v16.d[1] +; CHECK-i32-NEXT: frintx v7.2d, v7.2d +; CHECK-i32-NEXT: mov v0.s[1], w10 +; CHECK-i32-NEXT: fcvtzs w10, d2 +; CHECK-i32-NEXT: mov v1.s[1], w11 +; CHECK-i32-NEXT: fcvtzs w8, d21 +; CHECK-i32-NEXT: ldp q21, q19, [sp, #96] +; CHECK-i32-NEXT: fmov s2, w13 +; CHECK-i32-NEXT: fcvtzs w11, d23 +; CHECK-i32-NEXT: mov d23, v3.d[1] +; CHECK-i32-NEXT: fcvtzs w9, d22 +; CHECK-i32-NEXT: mov d22, v17.d[1] +; CHECK-i32-NEXT: fcvtzs w13, d18 +; CHECK-i32-NEXT: mov v0.s[2], w12 +; CHECK-i32-NEXT: fcvtzs w12, d16 +; CHECK-i32-NEXT: mov v1.s[2], w10 +; CHECK-i32-NEXT: fcvtzs w10, d3 +; CHECK-i32-NEXT: fmov s3, w15 +; CHECK-i32-NEXT: frintx v21.2d, v21.2d +; CHECK-i32-NEXT: mov v2.s[1], w14 +; CHECK-i32-NEXT: mov d16, v20.d[1] +; CHECK-i32-NEXT: fcvtzs w14, d17 +; CHECK-i32-NEXT: mov d17, v4.d[1] +; CHECK-i32-NEXT: fcvtzs w15, d22 +; CHECK-i32-NEXT: frintx v22.2d, v5.2d +; CHECK-i32-NEXT: mov v3.s[1], w11 +; CHECK-i32-NEXT: fcvtzs w11, d4 +; CHECK-i32-NEXT: fmov s4, w12 +; CHECK-i32-NEXT: fcvtzs w12, d20 +; CHECK-i32-NEXT: mov d18, v21.d[1] +; CHECK-i32-NEXT: mov d20, v7.d[1] +; CHECK-i32-NEXT: fmov s5, w14 +; CHECK-i32-NEXT: fcvtzs w14, d21 +; CHECK-i32-NEXT: mov v2.s[2], w10 +; CHECK-i32-NEXT: mov v4.s[1], w13 +; CHECK-i32-NEXT: fcvtzs w13, d16 +; CHECK-i32-NEXT: frintx v16.2d, v6.2d +; CHECK-i32-NEXT: fcvtzs w10, d23 +; CHECK-i32-NEXT: mov v3.s[2], w11 +; CHECK-i32-NEXT: fcvtzs w11, d17 +; CHECK-i32-NEXT: fmov s6, w12 +; CHECK-i32-NEXT: mov v5.s[1], w15 +; CHECK-i32-NEXT: fcvtzs w15, d18 +; CHECK-i32-NEXT: frintx v18.2d, v19.2d +; CHECK-i32-NEXT: fcvtzs w12, d22 +; CHECK-i32-NEXT: mov d19, v22.d[1] +; CHECK-i32-NEXT: mov v0.s[3], w8 +; CHECK-i32-NEXT: mov v1.s[3], w9 +; CHECK-i32-NEXT: mov v6.s[1], w13 +; CHECK-i32-NEXT: fcvtzs w13, d7 +; CHECK-i32-NEXT: fmov s7, w14 +; CHECK-i32-NEXT: fcvtzs w14, d16 +; CHECK-i32-NEXT: mov d16, v16.d[1] +; CHECK-i32-NEXT: mov v2.s[3], w10 +; CHECK-i32-NEXT: mov v4.s[2], w12 +; CHECK-i32-NEXT: fcvtzs w12, d19 +; CHECK-i32-NEXT: mov v3.s[3], w11 +; CHECK-i32-NEXT: mov v7.s[1], w15 +; CHECK-i32-NEXT: fcvtzs w15, d18 +; CHECK-i32-NEXT: mov d18, v18.d[1] +; CHECK-i32-NEXT: mov v5.s[2], w13 +; CHECK-i32-NEXT: fcvtzs w13, d20 +; CHECK-i32-NEXT: mov v6.s[2], w14 +; CHECK-i32-NEXT: fcvtzs w14, d16 +; CHECK-i32-NEXT: mov v4.s[3], w12 +; CHECK-i32-NEXT: mov v7.s[2], w15 +; CHECK-i32-NEXT: fcvtzs w15, d18 +; CHECK-i32-NEXT: mov v5.s[3], w13 +; CHECK-i32-NEXT: mov v6.s[3], w14 +; CHECK-i32-NEXT: mov v7.s[3], w15 +; CHECK-i32-NEXT: ret +; +; CHECK-i64-LABEL: lrint_v32f64: +; CHECK-i64: // %bb.0: +; CHECK-i64-NEXT: ldp q17, q16, [sp, #96] +; CHECK-i64-NEXT: frintx v7.2d, v7.2d +; CHECK-i64-NEXT: ldp q19, q18, [sp, #64] +; CHECK-i64-NEXT: frintx v6.2d, v6.2d +; CHECK-i64-NEXT: ldp q21, q20, [sp, #32] +; CHECK-i64-NEXT: frintx v5.2d, v5.2d +; CHECK-i64-NEXT: frintx v16.2d, v16.2d +; CHECK-i64-NEXT: frintx v17.2d, v17.2d +; CHECK-i64-NEXT: frintx v4.2d, v4.2d +; CHECK-i64-NEXT: frintx v18.2d, v18.2d +; CHECK-i64-NEXT: frintx v19.2d, v19.2d +; CHECK-i64-NEXT: frintx v3.2d, v3.2d +; CHECK-i64-NEXT: ldp q23, q22, [sp] +; CHECK-i64-NEXT: frintx v20.2d, v20.2d +; CHECK-i64-NEXT: frintx v21.2d, v21.2d +; CHECK-i64-NEXT: frintx v2.2d, v2.2d +; CHECK-i64-NEXT: frintx v1.2d, v1.2d +; CHECK-i64-NEXT: fcvtzs v16.2d, v16.2d +; CHECK-i64-NEXT: fcvtzs v17.2d, v17.2d +; CHECK-i64-NEXT: frintx v0.2d, v0.2d +; CHECK-i64-NEXT: frintx v22.2d, v22.2d +; CHECK-i64-NEXT: fcvtzs v18.2d, v18.2d +; CHECK-i64-NEXT: frintx v23.2d, v23.2d +; CHECK-i64-NEXT: fcvtzs v19.2d, v19.2d +; CHECK-i64-NEXT: fcvtzs v20.2d, v20.2d +; CHECK-i64-NEXT: fcvtzs v7.2d, v7.2d +; CHECK-i64-NEXT: fcvtzs v6.2d, v6.2d +; CHECK-i64-NEXT: fcvtzs v5.2d, v5.2d +; CHECK-i64-NEXT: fcvtzs v4.2d, v4.2d +; CHECK-i64-NEXT: stp q17, q16, [x8, #224] +; CHECK-i64-NEXT: fcvtzs v16.2d, v21.2d +; CHECK-i64-NEXT: fcvtzs v3.2d, v3.2d +; CHECK-i64-NEXT: fcvtzs v17.2d, v22.2d +; CHECK-i64-NEXT: fcvtzs v2.2d, v2.2d +; CHECK-i64-NEXT: fcvtzs v1.2d, v1.2d +; CHECK-i64-NEXT: stp q19, q18, [x8, #192] +; CHECK-i64-NEXT: fcvtzs v18.2d, v23.2d +; CHECK-i64-NEXT: fcvtzs v0.2d, v0.2d +; CHECK-i64-NEXT: stp q4, q5, [x8, #64] +; CHECK-i64-NEXT: stp q6, q7, [x8, #96] +; CHECK-i64-NEXT: stp q2, q3, [x8, #32] +; CHECK-i64-NEXT: stp q0, q1, [x8] +; CHECK-i64-NEXT: stp q18, q17, [x8, #128] +; CHECK-i64-NEXT: stp q16, q20, [x8, #160] +; CHECK-i64-NEXT: ret + %a = call <32 x iXLen> @llvm.lrint.v32iXLen.v16f64(<32 x double> %x) + ret <32 x iXLen> %a } -declare <32 x i64> @llvm.lrint.v32i64.v32f64(<32 x double>) +declare <32 x iXLen> @llvm.lrint.v32iXLen.v32f64(<32 x double>) From 357defa00167c07ea06b13eb8cc8bed0d11cc4d4 Mon Sep 17 00:00:00 2001 From: Ramkumar Ramachandra Date: Fri, 10 May 2024 12:45:53 +0100 Subject: [PATCH 6/6] AArch64/test: fix major find-replace issues --- .../CodeGen/AArch64/sve-fixed-vector-lrint.ll | 730 ++++++++++++++- llvm/test/CodeGen/AArch64/sve-lrint.ll | 834 +++++++++++++++++- llvm/test/CodeGen/AArch64/vector-lrint.ll | 40 +- 3 files changed, 1576 insertions(+), 28 deletions(-) diff --git a/llvm/test/CodeGen/AArch64/sve-fixed-vector-lrint.ll b/llvm/test/CodeGen/AArch64/sve-fixed-vector-lrint.ll index af1a84c56c448..9bdbe9b8ac62d 100644 --- a/llvm/test/CodeGen/AArch64/sve-fixed-vector-lrint.ll +++ b/llvm/test/CodeGen/AArch64/sve-fixed-vector-lrint.ll @@ -143,13 +143,363 @@ define <8 x iXLen> @lrint_v8f16(<8 x half> %x) { } declare <8 x iXLen> @llvm.lrint.v8iXLen.v8f16(<8 x half>) -define <16 x iXLen> @lrint_v16iXLen_v16f16(<16 x half> %x) { +define <16 x iXLen> @lrint_v16f16(<16 x half> %x) { +; CHECK-i32-LABEL: lrint_v16f16: +; CHECK-i32: // %bb.0: +; CHECK-i32-NEXT: frintx v1.8h, v1.8h +; CHECK-i32-NEXT: frintx v0.8h, v0.8h +; CHECK-i32-NEXT: mov h3, v1.h[4] +; CHECK-i32-NEXT: mov h2, v1.h[5] +; CHECK-i32-NEXT: mov h5, v0.h[4] +; CHECK-i32-NEXT: mov h4, v1.h[1] +; CHECK-i32-NEXT: mov h6, v0.h[1] +; CHECK-i32-NEXT: fcvtzs w11, h0 +; CHECK-i32-NEXT: fcvtzs w14, h1 +; CHECK-i32-NEXT: mov h7, v1.h[6] +; CHECK-i32-NEXT: mov h16, v1.h[3] +; CHECK-i32-NEXT: mov h17, v0.h[7] +; CHECK-i32-NEXT: mov h18, v0.h[3] +; CHECK-i32-NEXT: fcvtzs w9, h3 +; CHECK-i32-NEXT: mov h3, v0.h[5] +; CHECK-i32-NEXT: fcvtzs w8, h2 +; CHECK-i32-NEXT: mov h2, v1.h[2] +; CHECK-i32-NEXT: fcvtzs w12, h5 +; CHECK-i32-NEXT: fcvtzs w10, h4 +; CHECK-i32-NEXT: mov h4, v0.h[6] +; CHECK-i32-NEXT: mov h5, v0.h[2] +; CHECK-i32-NEXT: fcvtzs w13, h6 +; CHECK-i32-NEXT: mov h6, v1.h[7] +; CHECK-i32-NEXT: fmov s0, w11 +; CHECK-i32-NEXT: fcvtzs w16, h7 +; CHECK-i32-NEXT: fcvtzs w15, h3 +; CHECK-i32-NEXT: fmov s3, w9 +; CHECK-i32-NEXT: fcvtzs w9, h16 +; CHECK-i32-NEXT: fcvtzs w17, h2 +; CHECK-i32-NEXT: fmov s1, w12 +; CHECK-i32-NEXT: fmov s2, w14 +; CHECK-i32-NEXT: fcvtzs w11, h4 +; CHECK-i32-NEXT: fcvtzs w18, h5 +; CHECK-i32-NEXT: mov v0.s[1], w13 +; CHECK-i32-NEXT: mov v3.s[1], w8 +; CHECK-i32-NEXT: fcvtzs w8, h6 +; CHECK-i32-NEXT: fcvtzs w12, h18 +; CHECK-i32-NEXT: mov v1.s[1], w15 +; CHECK-i32-NEXT: mov v2.s[1], w10 +; CHECK-i32-NEXT: fcvtzs w10, h17 +; CHECK-i32-NEXT: mov v0.s[2], w18 +; CHECK-i32-NEXT: mov v3.s[2], w16 +; CHECK-i32-NEXT: mov v1.s[2], w11 +; CHECK-i32-NEXT: mov v2.s[2], w17 +; CHECK-i32-NEXT: mov v0.s[3], w12 +; CHECK-i32-NEXT: mov v3.s[3], w8 +; CHECK-i32-NEXT: mov v1.s[3], w10 +; CHECK-i32-NEXT: mov v2.s[3], w9 +; CHECK-i32-NEXT: ret +; +; CHECK-i64-LABEL: lrint_v16f16: +; CHECK-i64: // %bb.0: +; CHECK-i64-NEXT: ext v2.16b, v1.16b, v1.16b, #8 +; CHECK-i64-NEXT: frintx v1.4h, v1.4h +; CHECK-i64-NEXT: frintx v3.4h, v0.4h +; CHECK-i64-NEXT: ext v0.16b, v0.16b, v0.16b, #8 +; CHECK-i64-NEXT: frintx v2.4h, v2.4h +; CHECK-i64-NEXT: mov h4, v1.h[2] +; CHECK-i64-NEXT: mov h5, v3.h[2] +; CHECK-i64-NEXT: frintx v0.4h, v0.4h +; CHECK-i64-NEXT: mov h6, v3.h[1] +; CHECK-i64-NEXT: fcvtzs x9, h3 +; CHECK-i64-NEXT: mov h16, v1.h[1] +; CHECK-i64-NEXT: fcvtzs x12, h1 +; CHECK-i64-NEXT: mov h3, v3.h[3] +; CHECK-i64-NEXT: mov h17, v1.h[3] +; CHECK-i64-NEXT: mov h7, v2.h[3] +; CHECK-i64-NEXT: fcvtzs x8, h4 +; CHECK-i64-NEXT: fcvtzs x10, h5 +; CHECK-i64-NEXT: mov h4, v2.h[2] +; CHECK-i64-NEXT: mov h5, v0.h[2] +; CHECK-i64-NEXT: fcvtzs x11, h6 +; CHECK-i64-NEXT: mov h6, v0.h[3] +; CHECK-i64-NEXT: fcvtzs x15, h2 +; CHECK-i64-NEXT: mov h2, v2.h[1] +; CHECK-i64-NEXT: fcvtzs x14, h0 +; CHECK-i64-NEXT: fcvtzs x17, h3 +; CHECK-i64-NEXT: fcvtzs x0, h17 +; CHECK-i64-NEXT: fcvtzs x13, h7 +; CHECK-i64-NEXT: mov h7, v0.h[1] +; CHECK-i64-NEXT: fmov d0, x9 +; CHECK-i64-NEXT: fcvtzs x16, h4 +; CHECK-i64-NEXT: fcvtzs x9, h5 +; CHECK-i64-NEXT: fmov d4, x12 +; CHECK-i64-NEXT: fcvtzs x12, h16 +; CHECK-i64-NEXT: fmov d1, x10 +; CHECK-i64-NEXT: fcvtzs x10, h6 +; CHECK-i64-NEXT: fmov d5, x8 +; CHECK-i64-NEXT: fcvtzs x8, h2 +; CHECK-i64-NEXT: fmov d2, x14 +; CHECK-i64-NEXT: fcvtzs x18, h7 +; CHECK-i64-NEXT: fmov d6, x15 +; CHECK-i64-NEXT: mov v0.d[1], x11 +; CHECK-i64-NEXT: fmov d3, x9 +; CHECK-i64-NEXT: fmov d7, x16 +; CHECK-i64-NEXT: mov v1.d[1], x17 +; CHECK-i64-NEXT: mov v4.d[1], x12 +; CHECK-i64-NEXT: mov v5.d[1], x0 +; CHECK-i64-NEXT: mov v6.d[1], x8 +; CHECK-i64-NEXT: mov v2.d[1], x18 +; CHECK-i64-NEXT: mov v3.d[1], x10 +; CHECK-i64-NEXT: mov v7.d[1], x13 +; CHECK-i64-NEXT: ret %a = call <16 x iXLen> @llvm.lrint.v16iXLen.v16f16(<16 x half> %x) ret <16 x iXLen> %a } declare <16 x iXLen> @llvm.lrint.v16iXLen.v16f16(<16 x half>) -define <32 x iXLen> @lrint_v32iXLen_v32f16(<32 x half> %x) { +define <32 x iXLen> @lrint_v32f16(<32 x half> %x) { +; CHECK-i32-LABEL: lrint_v32f16: +; CHECK-i32: // %bb.0: +; CHECK-i32-NEXT: stp x26, x25, [sp, #-64]! // 16-byte Folded Spill +; CHECK-i32-NEXT: stp x24, x23, [sp, #16] // 16-byte Folded Spill +; CHECK-i32-NEXT: stp x22, x21, [sp, #32] // 16-byte Folded Spill +; CHECK-i32-NEXT: stp x20, x19, [sp, #48] // 16-byte Folded Spill +; CHECK-i32-NEXT: .cfi_def_cfa_offset 64 +; CHECK-i32-NEXT: .cfi_offset w19, -8 +; CHECK-i32-NEXT: .cfi_offset w20, -16 +; CHECK-i32-NEXT: .cfi_offset w21, -24 +; CHECK-i32-NEXT: .cfi_offset w22, -32 +; CHECK-i32-NEXT: .cfi_offset w23, -40 +; CHECK-i32-NEXT: .cfi_offset w24, -48 +; CHECK-i32-NEXT: .cfi_offset w25, -56 +; CHECK-i32-NEXT: .cfi_offset w26, -64 +; CHECK-i32-NEXT: frintx v3.8h, v3.8h +; CHECK-i32-NEXT: frintx v2.8h, v2.8h +; CHECK-i32-NEXT: frintx v1.8h, v1.8h +; CHECK-i32-NEXT: frintx v0.8h, v0.8h +; CHECK-i32-NEXT: mov h4, v3.h[7] +; CHECK-i32-NEXT: mov h5, v3.h[6] +; CHECK-i32-NEXT: mov h6, v3.h[5] +; CHECK-i32-NEXT: mov h7, v3.h[4] +; CHECK-i32-NEXT: mov h16, v3.h[3] +; CHECK-i32-NEXT: mov h17, v3.h[2] +; CHECK-i32-NEXT: mov h18, v3.h[1] +; CHECK-i32-NEXT: mov h19, v2.h[7] +; CHECK-i32-NEXT: fcvtzs w1, h3 +; CHECK-i32-NEXT: mov h3, v1.h[6] +; CHECK-i32-NEXT: fcvtzs w7, h2 +; CHECK-i32-NEXT: fcvtzs w22, h0 +; CHECK-i32-NEXT: fcvtzs w8, h4 +; CHECK-i32-NEXT: mov h4, v2.h[6] +; CHECK-i32-NEXT: fcvtzs w10, h5 +; CHECK-i32-NEXT: mov h5, v2.h[5] +; CHECK-i32-NEXT: fcvtzs w12, h6 +; CHECK-i32-NEXT: mov h6, v2.h[4] +; CHECK-i32-NEXT: fcvtzs w13, h7 +; CHECK-i32-NEXT: mov h7, v2.h[3] +; CHECK-i32-NEXT: fcvtzs w9, h16 +; CHECK-i32-NEXT: fcvtzs w11, h17 +; CHECK-i32-NEXT: mov h16, v2.h[2] +; CHECK-i32-NEXT: mov h17, v2.h[1] +; CHECK-i32-NEXT: fcvtzs w17, h4 +; CHECK-i32-NEXT: mov h4, v1.h[5] +; CHECK-i32-NEXT: mov h2, v0.h[5] +; CHECK-i32-NEXT: fcvtzs w0, h5 +; CHECK-i32-NEXT: fcvtzs w3, h6 +; CHECK-i32-NEXT: mov h5, v1.h[4] +; CHECK-i32-NEXT: mov h6, v0.h[4] +; CHECK-i32-NEXT: fcvtzs w16, h7 +; CHECK-i32-NEXT: mov h7, v0.h[1] +; CHECK-i32-NEXT: fcvtzs w15, h18 +; CHECK-i32-NEXT: fcvtzs w2, h3 +; CHECK-i32-NEXT: mov h3, v1.h[2] +; CHECK-i32-NEXT: fcvtzs w19, h4 +; CHECK-i32-NEXT: mov h4, v1.h[1] +; CHECK-i32-NEXT: mov h18, v0.h[6] +; CHECK-i32-NEXT: fcvtzs w20, h5 +; CHECK-i32-NEXT: fcvtzs w23, h2 +; CHECK-i32-NEXT: mov h2, v0.h[2] +; CHECK-i32-NEXT: fcvtzs w21, h6 +; CHECK-i32-NEXT: fcvtzs w25, h1 +; CHECK-i32-NEXT: fcvtzs w4, h17 +; CHECK-i32-NEXT: fcvtzs w24, h7 +; CHECK-i32-NEXT: fcvtzs w14, h19 +; CHECK-i32-NEXT: fcvtzs w18, h16 +; CHECK-i32-NEXT: fcvtzs w26, h4 +; CHECK-i32-NEXT: mov h16, v1.h[7] +; CHECK-i32-NEXT: mov h17, v1.h[3] +; CHECK-i32-NEXT: fcvtzs w5, h3 +; CHECK-i32-NEXT: mov h19, v0.h[7] +; CHECK-i32-NEXT: fcvtzs w6, h18 +; CHECK-i32-NEXT: mov h18, v0.h[3] +; CHECK-i32-NEXT: fmov s0, w22 +; CHECK-i32-NEXT: fmov s1, w21 +; CHECK-i32-NEXT: fcvtzs w21, h2 +; CHECK-i32-NEXT: fmov s2, w25 +; CHECK-i32-NEXT: fmov s3, w20 +; CHECK-i32-NEXT: fmov s4, w7 +; CHECK-i32-NEXT: fmov s5, w3 +; CHECK-i32-NEXT: fmov s6, w1 +; CHECK-i32-NEXT: fmov s7, w13 +; CHECK-i32-NEXT: mov v0.s[1], w24 +; CHECK-i32-NEXT: mov v1.s[1], w23 +; CHECK-i32-NEXT: ldp x24, x23, [sp, #16] // 16-byte Folded Reload +; CHECK-i32-NEXT: mov v2.s[1], w26 +; CHECK-i32-NEXT: mov v3.s[1], w19 +; CHECK-i32-NEXT: ldp x20, x19, [sp, #48] // 16-byte Folded Reload +; CHECK-i32-NEXT: mov v4.s[1], w4 +; CHECK-i32-NEXT: mov v5.s[1], w0 +; CHECK-i32-NEXT: mov v6.s[1], w15 +; CHECK-i32-NEXT: mov v7.s[1], w12 +; CHECK-i32-NEXT: fcvtzs w12, h16 +; CHECK-i32-NEXT: fcvtzs w13, h17 +; CHECK-i32-NEXT: fcvtzs w15, h19 +; CHECK-i32-NEXT: fcvtzs w0, h18 +; CHECK-i32-NEXT: mov v0.s[2], w21 +; CHECK-i32-NEXT: ldp x22, x21, [sp, #32] // 16-byte Folded Reload +; CHECK-i32-NEXT: mov v1.s[2], w6 +; CHECK-i32-NEXT: mov v2.s[2], w5 +; CHECK-i32-NEXT: mov v3.s[2], w2 +; CHECK-i32-NEXT: mov v4.s[2], w18 +; CHECK-i32-NEXT: mov v5.s[2], w17 +; CHECK-i32-NEXT: mov v6.s[2], w11 +; CHECK-i32-NEXT: mov v7.s[2], w10 +; CHECK-i32-NEXT: mov v0.s[3], w0 +; CHECK-i32-NEXT: mov v1.s[3], w15 +; CHECK-i32-NEXT: mov v2.s[3], w13 +; CHECK-i32-NEXT: mov v3.s[3], w12 +; CHECK-i32-NEXT: mov v4.s[3], w16 +; CHECK-i32-NEXT: mov v5.s[3], w14 +; CHECK-i32-NEXT: mov v6.s[3], w9 +; CHECK-i32-NEXT: mov v7.s[3], w8 +; CHECK-i32-NEXT: ldp x26, x25, [sp], #64 // 16-byte Folded Reload +; CHECK-i32-NEXT: ret +; +; CHECK-i64-LABEL: lrint_v32f16: +; CHECK-i64: // %bb.0: +; CHECK-i64-NEXT: stp x29, x30, [sp, #-16]! // 16-byte Folded Spill +; CHECK-i64-NEXT: sub x9, sp, #272 +; CHECK-i64-NEXT: mov x29, sp +; CHECK-i64-NEXT: and sp, x9, #0xffffffffffffffe0 +; CHECK-i64-NEXT: .cfi_def_cfa w29, 16 +; CHECK-i64-NEXT: .cfi_offset w30, -8 +; CHECK-i64-NEXT: .cfi_offset w29, -16 +; CHECK-i64-NEXT: frintx v5.4h, v0.4h +; CHECK-i64-NEXT: ext v0.16b, v0.16b, v0.16b, #8 +; CHECK-i64-NEXT: ext v4.16b, v1.16b, v1.16b, #8 +; CHECK-i64-NEXT: ext v17.16b, v2.16b, v2.16b, #8 +; CHECK-i64-NEXT: frintx v1.4h, v1.4h +; CHECK-i64-NEXT: frintx v2.4h, v2.4h +; CHECK-i64-NEXT: ptrue p0.d, vl4 +; CHECK-i64-NEXT: mov h6, v5.h[3] +; CHECK-i64-NEXT: frintx v0.4h, v0.4h +; CHECK-i64-NEXT: mov h7, v5.h[2] +; CHECK-i64-NEXT: mov h16, v5.h[1] +; CHECK-i64-NEXT: frintx v4.4h, v4.4h +; CHECK-i64-NEXT: fcvtzs x12, h5 +; CHECK-i64-NEXT: ext v5.16b, v3.16b, v3.16b, #8 +; CHECK-i64-NEXT: frintx v17.4h, v17.4h +; CHECK-i64-NEXT: frintx v3.4h, v3.4h +; CHECK-i64-NEXT: fcvtzs x9, h6 +; CHECK-i64-NEXT: mov h6, v0.h[3] +; CHECK-i64-NEXT: fcvtzs x10, h7 +; CHECK-i64-NEXT: mov h7, v0.h[2] +; CHECK-i64-NEXT: fcvtzs x11, h16 +; CHECK-i64-NEXT: mov h16, v0.h[1] +; CHECK-i64-NEXT: fcvtzs x13, h6 +; CHECK-i64-NEXT: mov h6, v4.h[3] +; CHECK-i64-NEXT: stp x10, x9, [sp, #48] +; CHECK-i64-NEXT: fcvtzs x9, h7 +; CHECK-i64-NEXT: mov h7, v4.h[2] +; CHECK-i64-NEXT: fcvtzs x10, h16 +; CHECK-i64-NEXT: mov h16, v4.h[1] +; CHECK-i64-NEXT: stp x12, x11, [sp, #32] +; CHECK-i64-NEXT: fcvtzs x11, h0 +; CHECK-i64-NEXT: frintx v0.4h, v5.4h +; CHECK-i64-NEXT: mov h5, v17.h[3] +; CHECK-i64-NEXT: fcvtzs x12, h6 +; CHECK-i64-NEXT: mov h6, v17.h[2] +; CHECK-i64-NEXT: stp x9, x13, [sp, #16] +; CHECK-i64-NEXT: fcvtzs x13, h7 +; CHECK-i64-NEXT: mov h7, v17.h[1] +; CHECK-i64-NEXT: fcvtzs x9, h16 +; CHECK-i64-NEXT: stp x11, x10, [sp] +; CHECK-i64-NEXT: fcvtzs x10, h4 +; CHECK-i64-NEXT: fcvtzs x11, h5 +; CHECK-i64-NEXT: mov h4, v0.h[3] +; CHECK-i64-NEXT: mov h5, v0.h[2] +; CHECK-i64-NEXT: stp x13, x12, [sp, #80] +; CHECK-i64-NEXT: fcvtzs x12, h6 +; CHECK-i64-NEXT: fcvtzs x13, h7 +; CHECK-i64-NEXT: mov h6, v0.h[1] +; CHECK-i64-NEXT: stp x10, x9, [sp, #64] +; CHECK-i64-NEXT: fcvtzs x9, h17 +; CHECK-i64-NEXT: mov h7, v1.h[3] +; CHECK-i64-NEXT: fcvtzs x10, h4 +; CHECK-i64-NEXT: mov h4, v1.h[2] +; CHECK-i64-NEXT: stp x12, x11, [sp, #144] +; CHECK-i64-NEXT: fcvtzs x11, h5 +; CHECK-i64-NEXT: mov h5, v1.h[1] +; CHECK-i64-NEXT: fcvtzs x12, h6 +; CHECK-i64-NEXT: stp x9, x13, [sp, #128] +; CHECK-i64-NEXT: fcvtzs x9, h0 +; CHECK-i64-NEXT: fcvtzs x13, h7 +; CHECK-i64-NEXT: mov h0, v2.h[3] +; CHECK-i64-NEXT: stp x11, x10, [sp, #208] +; CHECK-i64-NEXT: fcvtzs x10, h4 +; CHECK-i64-NEXT: mov h4, v2.h[2] +; CHECK-i64-NEXT: fcvtzs x11, h5 +; CHECK-i64-NEXT: mov h5, v2.h[1] +; CHECK-i64-NEXT: stp x9, x12, [sp, #192] +; CHECK-i64-NEXT: fcvtzs x9, h1 +; CHECK-i64-NEXT: fcvtzs x12, h0 +; CHECK-i64-NEXT: mov h0, v3.h[3] +; CHECK-i64-NEXT: mov h1, v3.h[2] +; CHECK-i64-NEXT: stp x10, x13, [sp, #112] +; CHECK-i64-NEXT: fcvtzs x10, h4 +; CHECK-i64-NEXT: mov h4, v3.h[1] +; CHECK-i64-NEXT: fcvtzs x13, h5 +; CHECK-i64-NEXT: stp x9, x11, [sp, #96] +; CHECK-i64-NEXT: fcvtzs x9, h2 +; CHECK-i64-NEXT: fcvtzs x11, h0 +; CHECK-i64-NEXT: stp x10, x12, [sp, #176] +; CHECK-i64-NEXT: fcvtzs x10, h1 +; CHECK-i64-NEXT: fcvtzs x12, h4 +; CHECK-i64-NEXT: stp x9, x13, [sp, #160] +; CHECK-i64-NEXT: fcvtzs x9, h3 +; CHECK-i64-NEXT: stp x10, x11, [sp, #240] +; CHECK-i64-NEXT: add x10, sp, #64 +; CHECK-i64-NEXT: stp x9, x12, [sp, #224] +; CHECK-i64-NEXT: add x9, sp, #32 +; CHECK-i64-NEXT: ld1d { z0.d }, p0/z, [x9] +; CHECK-i64-NEXT: mov x9, sp +; CHECK-i64-NEXT: ld1d { z2.d }, p0/z, [x10] +; CHECK-i64-NEXT: ld1d { z1.d }, p0/z, [x9] +; CHECK-i64-NEXT: add x9, sp, #224 +; CHECK-i64-NEXT: add x10, sp, #128 +; CHECK-i64-NEXT: ld1d { z3.d }, p0/z, [x9] +; CHECK-i64-NEXT: add x9, sp, #160 +; CHECK-i64-NEXT: ld1d { z4.d }, p0/z, [x10] +; CHECK-i64-NEXT: add x10, sp, #96 +; CHECK-i64-NEXT: ld1d { z5.d }, p0/z, [x9] +; CHECK-i64-NEXT: add x9, sp, #192 +; CHECK-i64-NEXT: ld1d { z6.d }, p0/z, [x10] +; CHECK-i64-NEXT: mov x10, #24 // =0x18 +; CHECK-i64-NEXT: ld1d { z7.d }, p0/z, [x9] +; CHECK-i64-NEXT: mov x9, #16 // =0x10 +; CHECK-i64-NEXT: st1d { z3.d }, p0, [x8, x10, lsl #3] +; CHECK-i64-NEXT: st1d { z5.d }, p0, [x8, x9, lsl #3] +; CHECK-i64-NEXT: mov x9, #8 // =0x8 +; CHECK-i64-NEXT: st1d { z6.d }, p0, [x8, x9, lsl #3] +; CHECK-i64-NEXT: mov x9, #28 // =0x1c +; CHECK-i64-NEXT: st1d { z7.d }, p0, [x8, x9, lsl #3] +; CHECK-i64-NEXT: mov x9, #20 // =0x14 +; CHECK-i64-NEXT: st1d { z4.d }, p0, [x8, x9, lsl #3] +; CHECK-i64-NEXT: mov x9, #12 // =0xc +; CHECK-i64-NEXT: st1d { z2.d }, p0, [x8, x9, lsl #3] +; CHECK-i64-NEXT: mov x9, #4 // =0x4 +; CHECK-i64-NEXT: st1d { z1.d }, p0, [x8, x9, lsl #3] +; CHECK-i64-NEXT: st1d { z0.d }, p0, [x8] +; CHECK-i64-NEXT: mov sp, x29 +; CHECK-i64-NEXT: ldp x29, x30, [sp], #16 // 16-byte Folded Reload +; CHECK-i64-NEXT: ret %a = call <32 x iXLen> @llvm.lrint.v32iXLen.v32f16(<32 x half> %x) ret <32 x iXLen> %a } @@ -289,13 +639,385 @@ define <8 x iXLen> @lrint_v8f32(<8 x float> %x) { } declare <8 x iXLen> @llvm.lrint.v8iXLen.v8f32(<8 x float>) -define <16 x iXLen> @lrint_v16iXLen_v16f32(<16 x float> %x) { +define <16 x iXLen> @lrint_v16f32(<16 x float> %x) { +; CHECK-i32-LABEL: lrint_v16f32: +; CHECK-i32: // %bb.0: +; CHECK-i32-NEXT: ptrue p0.d, vl2 +; CHECK-i32-NEXT: // kill: def $q2 killed $q2 def $z2 +; CHECK-i32-NEXT: // kill: def $q3 killed $q3 def $z3 +; CHECK-i32-NEXT: // kill: def $q0 killed $q0 def $z0 +; CHECK-i32-NEXT: // kill: def $q1 killed $q1 def $z1 +; CHECK-i32-NEXT: splice z2.d, p0, z2.d, z3.d +; CHECK-i32-NEXT: splice z0.d, p0, z0.d, z1.d +; CHECK-i32-NEXT: ptrue p0.s, vl8 +; CHECK-i32-NEXT: movprfx z1, z2 +; CHECK-i32-NEXT: frintx z1.s, p0/m, z2.s +; CHECK-i32-NEXT: frintx z0.s, p0/m, z0.s +; CHECK-i32-NEXT: mov z2.s, z1.s[5] +; CHECK-i32-NEXT: mov z3.s, z1.s[4] +; CHECK-i32-NEXT: mov z5.s, z0.s[5] +; CHECK-i32-NEXT: mov z7.s, z0.s[1] +; CHECK-i32-NEXT: fcvtzs w11, s0 +; CHECK-i32-NEXT: fcvtzs w13, s1 +; CHECK-i32-NEXT: mov z4.s, z1.s[7] +; CHECK-i32-NEXT: mov z6.s, z1.s[6] +; CHECK-i32-NEXT: mov z16.s, z0.s[7] +; CHECK-i32-NEXT: fcvtzs w8, s2 +; CHECK-i32-NEXT: mov z2.s, z0.s[4] +; CHECK-i32-NEXT: fcvtzs w9, s3 +; CHECK-i32-NEXT: mov z3.s, z1.s[1] +; CHECK-i32-NEXT: fcvtzs w10, s5 +; CHECK-i32-NEXT: fcvtzs w12, s7 +; CHECK-i32-NEXT: mov z5.s, z0.s[6] +; CHECK-i32-NEXT: mov z7.s, z1.s[2] +; CHECK-i32-NEXT: mov z17.s, z1.s[3] +; CHECK-i32-NEXT: fcvtzs w14, s2 +; CHECK-i32-NEXT: mov z2.s, z0.s[2] +; CHECK-i32-NEXT: mov z18.s, z0.s[3] +; CHECK-i32-NEXT: fcvtzs w15, s3 +; CHECK-i32-NEXT: fmov s0, w11 +; CHECK-i32-NEXT: fmov s3, w9 +; CHECK-i32-NEXT: fcvtzs w16, s6 +; CHECK-i32-NEXT: fcvtzs w17, s5 +; CHECK-i32-NEXT: fcvtzs w11, s7 +; CHECK-i32-NEXT: fcvtzs w18, s2 +; CHECK-i32-NEXT: fmov s2, w13 +; CHECK-i32-NEXT: fcvtzs w9, s16 +; CHECK-i32-NEXT: fmov s1, w14 +; CHECK-i32-NEXT: mov v0.s[1], w12 +; CHECK-i32-NEXT: mov v3.s[1], w8 +; CHECK-i32-NEXT: fcvtzs w8, s4 +; CHECK-i32-NEXT: fcvtzs w12, s18 +; CHECK-i32-NEXT: mov v2.s[1], w15 +; CHECK-i32-NEXT: mov v1.s[1], w10 +; CHECK-i32-NEXT: fcvtzs w10, s17 +; CHECK-i32-NEXT: mov v0.s[2], w18 +; CHECK-i32-NEXT: mov v3.s[2], w16 +; CHECK-i32-NEXT: mov v2.s[2], w11 +; CHECK-i32-NEXT: mov v1.s[2], w17 +; CHECK-i32-NEXT: mov v0.s[3], w12 +; CHECK-i32-NEXT: mov v3.s[3], w8 +; CHECK-i32-NEXT: mov v2.s[3], w10 +; CHECK-i32-NEXT: mov v1.s[3], w9 +; CHECK-i32-NEXT: ret +; +; CHECK-i64-LABEL: lrint_v16f32: +; CHECK-i64: // %bb.0: +; CHECK-i64-NEXT: frintx v3.4s, v3.4s +; CHECK-i64-NEXT: frintx v2.4s, v2.4s +; CHECK-i64-NEXT: frintx v1.4s, v1.4s +; CHECK-i64-NEXT: frintx v0.4s, v0.4s +; CHECK-i64-NEXT: mov s4, v3.s[2] +; CHECK-i64-NEXT: mov s5, v2.s[2] +; CHECK-i64-NEXT: mov s6, v1.s[2] +; CHECK-i64-NEXT: mov s7, v0.s[2] +; CHECK-i64-NEXT: fcvtzs x10, s1 +; CHECK-i64-NEXT: fcvtzs x11, s0 +; CHECK-i64-NEXT: mov s16, v0.s[1] +; CHECK-i64-NEXT: mov s17, v1.s[1] +; CHECK-i64-NEXT: mov s18, v3.s[1] +; CHECK-i64-NEXT: fcvtzs x14, s3 +; CHECK-i64-NEXT: fcvtzs x16, s2 +; CHECK-i64-NEXT: fcvtzs x8, s4 +; CHECK-i64-NEXT: mov s4, v2.s[1] +; CHECK-i64-NEXT: fcvtzs x9, s5 +; CHECK-i64-NEXT: mov s5, v1.s[3] +; CHECK-i64-NEXT: fcvtzs x12, s6 +; CHECK-i64-NEXT: mov s6, v0.s[3] +; CHECK-i64-NEXT: fcvtzs x13, s7 +; CHECK-i64-NEXT: mov s7, v3.s[3] +; CHECK-i64-NEXT: fmov d0, x11 +; CHECK-i64-NEXT: fcvtzs x17, s16 +; CHECK-i64-NEXT: fcvtzs x18, s18 +; CHECK-i64-NEXT: fcvtzs x15, s4 +; CHECK-i64-NEXT: mov s4, v2.s[3] +; CHECK-i64-NEXT: fmov d2, x10 +; CHECK-i64-NEXT: fcvtzs x11, s5 +; CHECK-i64-NEXT: fcvtzs x10, s6 +; CHECK-i64-NEXT: fmov d3, x12 +; CHECK-i64-NEXT: fmov d1, x13 +; CHECK-i64-NEXT: fcvtzs x12, s17 +; CHECK-i64-NEXT: fcvtzs x13, s7 +; CHECK-i64-NEXT: fmov d5, x9 +; CHECK-i64-NEXT: fmov d6, x14 +; CHECK-i64-NEXT: fmov d7, x8 +; CHECK-i64-NEXT: fcvtzs x0, s4 +; CHECK-i64-NEXT: fmov d4, x16 +; CHECK-i64-NEXT: mov v0.d[1], x17 +; CHECK-i64-NEXT: mov v1.d[1], x10 +; CHECK-i64-NEXT: mov v3.d[1], x11 +; CHECK-i64-NEXT: mov v2.d[1], x12 +; CHECK-i64-NEXT: mov v6.d[1], x18 +; CHECK-i64-NEXT: mov v7.d[1], x13 +; CHECK-i64-NEXT: mov v4.d[1], x15 +; CHECK-i64-NEXT: mov v5.d[1], x0 +; CHECK-i64-NEXT: ret %a = call <16 x iXLen> @llvm.lrint.v16iXLen.v16f32(<16 x float> %x) ret <16 x iXLen> %a } declare <16 x iXLen> @llvm.lrint.v16iXLen.v16f32(<16 x float>) -define <32 x iXLen> @lrint_v32iXLen_v32f32(<32 x float> %x) { +define <32 x iXLen> @lrint_v32f32(<32 x float> %x) { +; CHECK-i32-LABEL: lrint_v32f32: +; CHECK-i32: // %bb.0: +; CHECK-i32-NEXT: str x27, [sp, #-80]! // 8-byte Folded Spill +; CHECK-i32-NEXT: stp x26, x25, [sp, #16] // 16-byte Folded Spill +; CHECK-i32-NEXT: stp x24, x23, [sp, #32] // 16-byte Folded Spill +; CHECK-i32-NEXT: stp x22, x21, [sp, #48] // 16-byte Folded Spill +; CHECK-i32-NEXT: stp x20, x19, [sp, #64] // 16-byte Folded Spill +; CHECK-i32-NEXT: .cfi_def_cfa_offset 80 +; CHECK-i32-NEXT: .cfi_offset w19, -8 +; CHECK-i32-NEXT: .cfi_offset w20, -16 +; CHECK-i32-NEXT: .cfi_offset w21, -24 +; CHECK-i32-NEXT: .cfi_offset w22, -32 +; CHECK-i32-NEXT: .cfi_offset w23, -40 +; CHECK-i32-NEXT: .cfi_offset w24, -48 +; CHECK-i32-NEXT: .cfi_offset w25, -56 +; CHECK-i32-NEXT: .cfi_offset w26, -64 +; CHECK-i32-NEXT: .cfi_offset w27, -80 +; CHECK-i32-NEXT: ptrue p1.d, vl2 +; CHECK-i32-NEXT: // kill: def $q6 killed $q6 def $z6 +; CHECK-i32-NEXT: // kill: def $q7 killed $q7 def $z7 +; CHECK-i32-NEXT: // kill: def $q2 killed $q2 def $z2 +; CHECK-i32-NEXT: // kill: def $q4 killed $q4 def $z4 +; CHECK-i32-NEXT: // kill: def $q3 killed $q3 def $z3 +; CHECK-i32-NEXT: // kill: def $q5 killed $q5 def $z5 +; CHECK-i32-NEXT: // kill: def $q1 killed $q1 def $z1 +; CHECK-i32-NEXT: // kill: def $q0 killed $q0 def $z0 +; CHECK-i32-NEXT: ptrue p0.s, vl8 +; CHECK-i32-NEXT: splice z6.d, p1, z6.d, z7.d +; CHECK-i32-NEXT: splice z2.d, p1, z2.d, z3.d +; CHECK-i32-NEXT: splice z4.d, p1, z4.d, z5.d +; CHECK-i32-NEXT: splice z0.d, p1, z0.d, z1.d +; CHECK-i32-NEXT: movprfx z3, z6 +; CHECK-i32-NEXT: frintx z3.s, p0/m, z6.s +; CHECK-i32-NEXT: frintx z2.s, p0/m, z2.s +; CHECK-i32-NEXT: movprfx z1, z4 +; CHECK-i32-NEXT: frintx z1.s, p0/m, z4.s +; CHECK-i32-NEXT: frintx z0.s, p0/m, z0.s +; CHECK-i32-NEXT: mov z4.s, z3.s[7] +; CHECK-i32-NEXT: mov z5.s, z3.s[6] +; CHECK-i32-NEXT: mov z6.s, z3.s[5] +; CHECK-i32-NEXT: mov z16.s, z1.s[7] +; CHECK-i32-NEXT: mov z7.s, z3.s[4] +; CHECK-i32-NEXT: mov z17.s, z1.s[6] +; CHECK-i32-NEXT: mov z18.s, z1.s[5] +; CHECK-i32-NEXT: mov z19.s, z1.s[4] +; CHECK-i32-NEXT: fcvtzs w7, s3 +; CHECK-i32-NEXT: fcvtzs w8, s4 +; CHECK-i32-NEXT: mov z4.s, z2.s[7] +; CHECK-i32-NEXT: fcvtzs w10, s5 +; CHECK-i32-NEXT: mov z5.s, z2.s[6] +; CHECK-i32-NEXT: fcvtzs w13, s6 +; CHECK-i32-NEXT: fcvtzs w9, s16 +; CHECK-i32-NEXT: mov z6.s, z2.s[4] +; CHECK-i32-NEXT: mov z16.s, z0.s[6] +; CHECK-i32-NEXT: fcvtzs w14, s7 +; CHECK-i32-NEXT: fcvtzs w11, s4 +; CHECK-i32-NEXT: mov z4.s, z2.s[5] +; CHECK-i32-NEXT: mov z7.s, z0.s[7] +; CHECK-i32-NEXT: fcvtzs w16, s5 +; CHECK-i32-NEXT: mov z5.s, z0.s[4] +; CHECK-i32-NEXT: fcvtzs w12, s17 +; CHECK-i32-NEXT: fcvtzs w15, s18 +; CHECK-i32-NEXT: fcvtzs w17, s19 +; CHECK-i32-NEXT: mov z17.s, z0.s[5] +; CHECK-i32-NEXT: fcvtzs w3, s4 +; CHECK-i32-NEXT: mov z4.s, z3.s[1] +; CHECK-i32-NEXT: mov z18.s, z3.s[2] +; CHECK-i32-NEXT: fcvtzs w4, s6 +; CHECK-i32-NEXT: fcvtzs w0, s16 +; CHECK-i32-NEXT: fcvtzs w6, s5 +; CHECK-i32-NEXT: mov z16.s, z3.s[3] +; CHECK-i32-NEXT: mov z3.s, z0.s[1] +; CHECK-i32-NEXT: mov z5.s, z1.s[1] +; CHECK-i32-NEXT: mov z6.s, z2.s[1] +; CHECK-i32-NEXT: fcvtzs w21, s1 +; CHECK-i32-NEXT: fcvtzs w22, s0 +; CHECK-i32-NEXT: fcvtzs w23, s2 +; CHECK-i32-NEXT: fcvtzs w18, s7 +; CHECK-i32-NEXT: fcvtzs w2, s4 +; CHECK-i32-NEXT: mov z4.s, z1.s[2] +; CHECK-i32-NEXT: mov z7.s, z2.s[2] +; CHECK-i32-NEXT: fcvtzs w5, s17 +; CHECK-i32-NEXT: fcvtzs w24, s3 +; CHECK-i32-NEXT: fcvtzs w25, s5 +; CHECK-i32-NEXT: fcvtzs w26, s6 +; CHECK-i32-NEXT: fcvtzs w1, s18 +; CHECK-i32-NEXT: mov z18.s, z0.s[2] +; CHECK-i32-NEXT: mov z17.s, z1.s[3] +; CHECK-i32-NEXT: fcvtzs w19, s4 +; CHECK-i32-NEXT: mov z19.s, z2.s[3] +; CHECK-i32-NEXT: fcvtzs w20, s7 +; CHECK-i32-NEXT: mov z20.s, z0.s[3] +; CHECK-i32-NEXT: fmov s0, w22 +; CHECK-i32-NEXT: fmov s2, w23 +; CHECK-i32-NEXT: fmov s4, w21 +; CHECK-i32-NEXT: ldp x22, x21, [sp, #48] // 16-byte Folded Reload +; CHECK-i32-NEXT: fmov s1, w6 +; CHECK-i32-NEXT: fmov s6, w7 +; CHECK-i32-NEXT: fmov s3, w4 +; CHECK-i32-NEXT: fmov s5, w17 +; CHECK-i32-NEXT: fmov s7, w14 +; CHECK-i32-NEXT: fcvtzs w27, s18 +; CHECK-i32-NEXT: mov v0.s[1], w24 +; CHECK-i32-NEXT: ldp x24, x23, [sp, #32] // 16-byte Folded Reload +; CHECK-i32-NEXT: mov v2.s[1], w26 +; CHECK-i32-NEXT: mov v4.s[1], w25 +; CHECK-i32-NEXT: mov v1.s[1], w5 +; CHECK-i32-NEXT: ldp x26, x25, [sp, #16] // 16-byte Folded Reload +; CHECK-i32-NEXT: mov v3.s[1], w3 +; CHECK-i32-NEXT: mov v6.s[1], w2 +; CHECK-i32-NEXT: mov v5.s[1], w15 +; CHECK-i32-NEXT: mov v7.s[1], w13 +; CHECK-i32-NEXT: fcvtzs w13, s16 +; CHECK-i32-NEXT: fcvtzs w14, s17 +; CHECK-i32-NEXT: fcvtzs w15, s19 +; CHECK-i32-NEXT: fcvtzs w17, s20 +; CHECK-i32-NEXT: mov v0.s[2], w27 +; CHECK-i32-NEXT: mov v1.s[2], w0 +; CHECK-i32-NEXT: mov v2.s[2], w20 +; CHECK-i32-NEXT: mov v4.s[2], w19 +; CHECK-i32-NEXT: mov v3.s[2], w16 +; CHECK-i32-NEXT: ldp x20, x19, [sp, #64] // 16-byte Folded Reload +; CHECK-i32-NEXT: mov v6.s[2], w1 +; CHECK-i32-NEXT: mov v5.s[2], w12 +; CHECK-i32-NEXT: mov v7.s[2], w10 +; CHECK-i32-NEXT: mov v0.s[3], w17 +; CHECK-i32-NEXT: mov v1.s[3], w18 +; CHECK-i32-NEXT: mov v2.s[3], w15 +; CHECK-i32-NEXT: mov v4.s[3], w14 +; CHECK-i32-NEXT: mov v3.s[3], w11 +; CHECK-i32-NEXT: mov v6.s[3], w13 +; CHECK-i32-NEXT: mov v5.s[3], w9 +; CHECK-i32-NEXT: mov v7.s[3], w8 +; CHECK-i32-NEXT: ldr x27, [sp], #80 // 8-byte Folded Reload +; CHECK-i32-NEXT: ret +; +; CHECK-i64-LABEL: lrint_v32f32: +; CHECK-i64: // %bb.0: +; CHECK-i64-NEXT: stp x29, x30, [sp, #-16]! // 16-byte Folded Spill +; CHECK-i64-NEXT: sub x9, sp, #272 +; CHECK-i64-NEXT: mov x29, sp +; CHECK-i64-NEXT: and sp, x9, #0xffffffffffffffe0 +; CHECK-i64-NEXT: .cfi_def_cfa w29, 16 +; CHECK-i64-NEXT: .cfi_offset w30, -8 +; CHECK-i64-NEXT: .cfi_offset w29, -16 +; CHECK-i64-NEXT: frintx v0.4s, v0.4s +; CHECK-i64-NEXT: frintx v1.4s, v1.4s +; CHECK-i64-NEXT: frintx v2.4s, v2.4s +; CHECK-i64-NEXT: ptrue p0.d, vl4 +; CHECK-i64-NEXT: mov s16, v0.s[3] +; CHECK-i64-NEXT: mov s17, v0.s[2] +; CHECK-i64-NEXT: mov s18, v0.s[1] +; CHECK-i64-NEXT: fcvtzs x12, s0 +; CHECK-i64-NEXT: frintx v0.4s, v3.4s +; CHECK-i64-NEXT: mov s3, v2.s[3] +; CHECK-i64-NEXT: fcvtzs x9, s16 +; CHECK-i64-NEXT: mov s16, v1.s[3] +; CHECK-i64-NEXT: fcvtzs x10, s17 +; CHECK-i64-NEXT: mov s17, v1.s[2] +; CHECK-i64-NEXT: fcvtzs x11, s18 +; CHECK-i64-NEXT: mov s18, v1.s[1] +; CHECK-i64-NEXT: fcvtzs x13, s16 +; CHECK-i64-NEXT: stp x10, x9, [sp, #16] +; CHECK-i64-NEXT: mov s16, v2.s[2] +; CHECK-i64-NEXT: fcvtzs x9, s17 +; CHECK-i64-NEXT: fcvtzs x10, s18 +; CHECK-i64-NEXT: mov s17, v2.s[1] +; CHECK-i64-NEXT: stp x12, x11, [sp] +; CHECK-i64-NEXT: fcvtzs x11, s1 +; CHECK-i64-NEXT: frintx v1.4s, v4.4s +; CHECK-i64-NEXT: fcvtzs x12, s3 +; CHECK-i64-NEXT: mov s3, v0.s[3] +; CHECK-i64-NEXT: mov s4, v0.s[2] +; CHECK-i64-NEXT: stp x9, x13, [sp, #48] +; CHECK-i64-NEXT: fcvtzs x13, s16 +; CHECK-i64-NEXT: fcvtzs x9, s17 +; CHECK-i64-NEXT: mov s16, v0.s[1] +; CHECK-i64-NEXT: stp x11, x10, [sp, #32] +; CHECK-i64-NEXT: fcvtzs x10, s2 +; CHECK-i64-NEXT: frintx v2.4s, v5.4s +; CHECK-i64-NEXT: fcvtzs x11, s3 +; CHECK-i64-NEXT: mov s3, v1.s[3] +; CHECK-i64-NEXT: mov s5, v1.s[1] +; CHECK-i64-NEXT: stp x13, x12, [sp, #80] +; CHECK-i64-NEXT: fcvtzs x12, s4 +; CHECK-i64-NEXT: mov s4, v1.s[2] +; CHECK-i64-NEXT: fcvtzs x13, s16 +; CHECK-i64-NEXT: stp x10, x9, [sp, #64] +; CHECK-i64-NEXT: fcvtzs x9, s0 +; CHECK-i64-NEXT: mov s0, v2.s[3] +; CHECK-i64-NEXT: fcvtzs x10, s3 +; CHECK-i64-NEXT: frintx v3.4s, v6.4s +; CHECK-i64-NEXT: stp x12, x11, [sp, #112] +; CHECK-i64-NEXT: fcvtzs x11, s4 +; CHECK-i64-NEXT: mov s4, v2.s[2] +; CHECK-i64-NEXT: fcvtzs x12, s5 +; CHECK-i64-NEXT: mov s5, v2.s[1] +; CHECK-i64-NEXT: stp x9, x13, [sp, #96] +; CHECK-i64-NEXT: fcvtzs x9, s1 +; CHECK-i64-NEXT: fcvtzs x13, s0 +; CHECK-i64-NEXT: mov s0, v3.s[3] +; CHECK-i64-NEXT: frintx v1.4s, v7.4s +; CHECK-i64-NEXT: stp x11, x10, [sp, #144] +; CHECK-i64-NEXT: fcvtzs x10, s4 +; CHECK-i64-NEXT: mov s4, v3.s[2] +; CHECK-i64-NEXT: fcvtzs x11, s5 +; CHECK-i64-NEXT: mov s5, v3.s[1] +; CHECK-i64-NEXT: stp x9, x12, [sp, #128] +; CHECK-i64-NEXT: fcvtzs x9, s2 +; CHECK-i64-NEXT: fcvtzs x12, s0 +; CHECK-i64-NEXT: mov s0, v1.s[3] +; CHECK-i64-NEXT: mov s2, v1.s[2] +; CHECK-i64-NEXT: stp x10, x13, [sp, #176] +; CHECK-i64-NEXT: fcvtzs x10, s4 +; CHECK-i64-NEXT: mov s4, v1.s[1] +; CHECK-i64-NEXT: fcvtzs x13, s5 +; CHECK-i64-NEXT: stp x9, x11, [sp, #160] +; CHECK-i64-NEXT: fcvtzs x9, s3 +; CHECK-i64-NEXT: fcvtzs x11, s0 +; CHECK-i64-NEXT: stp x10, x12, [sp, #208] +; CHECK-i64-NEXT: fcvtzs x10, s2 +; CHECK-i64-NEXT: fcvtzs x12, s4 +; CHECK-i64-NEXT: stp x9, x13, [sp, #192] +; CHECK-i64-NEXT: fcvtzs x9, s1 +; CHECK-i64-NEXT: stp x10, x11, [sp, #240] +; CHECK-i64-NEXT: add x10, sp, #64 +; CHECK-i64-NEXT: stp x9, x12, [sp, #224] +; CHECK-i64-NEXT: mov x9, sp +; CHECK-i64-NEXT: ld1d { z0.d }, p0/z, [x9] +; CHECK-i64-NEXT: add x9, sp, #32 +; CHECK-i64-NEXT: ld1d { z2.d }, p0/z, [x10] +; CHECK-i64-NEXT: ld1d { z1.d }, p0/z, [x9] +; CHECK-i64-NEXT: add x9, sp, #224 +; CHECK-i64-NEXT: add x10, sp, #96 +; CHECK-i64-NEXT: ld1d { z3.d }, p0/z, [x9] +; CHECK-i64-NEXT: add x9, sp, #192 +; CHECK-i64-NEXT: ld1d { z4.d }, p0/z, [x10] +; CHECK-i64-NEXT: add x10, sp, #160 +; CHECK-i64-NEXT: ld1d { z5.d }, p0/z, [x9] +; CHECK-i64-NEXT: add x9, sp, #128 +; CHECK-i64-NEXT: ld1d { z6.d }, p0/z, [x10] +; CHECK-i64-NEXT: mov x10, #28 // =0x1c +; CHECK-i64-NEXT: ld1d { z7.d }, p0/z, [x9] +; CHECK-i64-NEXT: mov x9, #24 // =0x18 +; CHECK-i64-NEXT: st1d { z3.d }, p0, [x8, x10, lsl #3] +; CHECK-i64-NEXT: st1d { z5.d }, p0, [x8, x9, lsl #3] +; CHECK-i64-NEXT: mov x9, #20 // =0x14 +; CHECK-i64-NEXT: st1d { z6.d }, p0, [x8, x9, lsl #3] +; CHECK-i64-NEXT: mov x9, #16 // =0x10 +; CHECK-i64-NEXT: st1d { z7.d }, p0, [x8, x9, lsl #3] +; CHECK-i64-NEXT: mov x9, #12 // =0xc +; CHECK-i64-NEXT: st1d { z4.d }, p0, [x8, x9, lsl #3] +; CHECK-i64-NEXT: mov x9, #8 // =0x8 +; CHECK-i64-NEXT: st1d { z2.d }, p0, [x8, x9, lsl #3] +; CHECK-i64-NEXT: mov x9, #4 // =0x4 +; CHECK-i64-NEXT: st1d { z1.d }, p0, [x8, x9, lsl #3] +; CHECK-i64-NEXT: st1d { z0.d }, p0, [x8] +; CHECK-i64-NEXT: mov sp, x29 +; CHECK-i64-NEXT: ldp x29, x30, [sp], #16 // 16-byte Folded Reload +; CHECK-i64-NEXT: ret %a = call <32 x iXLen> @llvm.lrint.v32iXLen.v32f32(<32 x float> %x) ret <32 x iXLen> %a } diff --git a/llvm/test/CodeGen/AArch64/sve-lrint.ll b/llvm/test/CodeGen/AArch64/sve-lrint.ll index d8415be01f463..2a1432d881e57 100644 --- a/llvm/test/CodeGen/AArch64/sve-lrint.ll +++ b/llvm/test/CodeGen/AArch64/sve-lrint.ll @@ -177,13 +177,433 @@ define @lrint_v8f16( %x) { } declare @llvm.lrint.nxv8iXLen.nxv8f16() -define @lrint_v16iXLen_v16f16( %x) { +define @lrint_v16f16( %x) { +; CHECK-LABEL: lrint_v16f16: +; CHECK: // %bb.0: +; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: addvl sp, sp, #-3 +; CHECK-NEXT: str p10, [sp, #1, mul vl] // 2-byte Folded Spill +; CHECK-NEXT: str p9, [sp, #2, mul vl] // 2-byte Folded Spill +; CHECK-NEXT: str p8, [sp, #3, mul vl] // 2-byte Folded Spill +; CHECK-NEXT: str p7, [sp, #4, mul vl] // 2-byte Folded Spill +; CHECK-NEXT: str p6, [sp, #5, mul vl] // 2-byte Folded Spill +; CHECK-NEXT: str p5, [sp, #6, mul vl] // 2-byte Folded Spill +; CHECK-NEXT: str p4, [sp, #7, mul vl] // 2-byte Folded Spill +; CHECK-NEXT: str z9, [sp, #1, mul vl] // 16-byte Folded Spill +; CHECK-NEXT: str z8, [sp, #2, mul vl] // 16-byte Folded Spill +; CHECK-NEXT: .cfi_escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x18, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 24 * VG +; CHECK-NEXT: .cfi_offset w29, -16 +; CHECK-NEXT: .cfi_escape 0x10, 0x48, 0x0a, 0x11, 0x70, 0x22, 0x11, 0x78, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d8 @ cfa - 16 - 8 * VG +; CHECK-NEXT: .cfi_escape 0x10, 0x49, 0x0a, 0x11, 0x70, 0x22, 0x11, 0x70, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d9 @ cfa - 16 - 16 * VG +; CHECK-NEXT: uunpklo z2.s, z0.h +; CHECK-NEXT: uunpkhi z0.s, z0.h +; CHECK-NEXT: mov w8, #64511 // =0xfbff +; CHECK-NEXT: uunpklo z4.s, z1.h +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: uunpkhi z1.s, z1.h +; CHECK-NEXT: mov z5.h, w8 +; CHECK-NEXT: mov w8, #31743 // =0x7bff +; CHECK-NEXT: mov z25.d, #0x8000000000000000 +; CHECK-NEXT: mov z27.h, w8 +; CHECK-NEXT: mov z7.d, #0x7fffffffffffffff +; CHECK-NEXT: uunpklo z3.d, z2.s +; CHECK-NEXT: uunpkhi z2.d, z2.s +; CHECK-NEXT: uunpklo z6.d, z0.s +; CHECK-NEXT: uunpkhi z0.d, z0.s +; CHECK-NEXT: uunpklo z24.d, z4.s +; CHECK-NEXT: uunpkhi z4.d, z4.s +; CHECK-NEXT: uunpklo z26.d, z1.s +; CHECK-NEXT: uunpkhi z1.d, z1.s +; CHECK-NEXT: frintx z2.h, p0/m, z2.h +; CHECK-NEXT: frintx z3.h, p0/m, z3.h +; CHECK-NEXT: frintx z6.h, p0/m, z6.h +; CHECK-NEXT: movprfx z28, z0 +; CHECK-NEXT: frintx z28.h, p0/m, z0.h +; CHECK-NEXT: movprfx z29, z4 +; CHECK-NEXT: frintx z29.h, p0/m, z4.h +; CHECK-NEXT: frintx z24.h, p0/m, z24.h +; CHECK-NEXT: movprfx z30, z1 +; CHECK-NEXT: frintx z30.h, p0/m, z1.h +; CHECK-NEXT: frintx z26.h, p0/m, z26.h +; CHECK-NEXT: fcmge p5.h, p0/z, z2.h, z5.h +; CHECK-NEXT: fcmge p2.h, p0/z, z3.h, z5.h +; CHECK-NEXT: movprfx z1, z2 +; CHECK-NEXT: fcvtzs z1.d, p0/m, z2.h +; CHECK-NEXT: movprfx z0, z3 +; CHECK-NEXT: fcvtzs z0.d, p0/m, z3.h +; CHECK-NEXT: fcmge p6.h, p0/z, z6.h, z5.h +; CHECK-NEXT: fcmgt p3.h, p0/z, z3.h, z27.h +; CHECK-NEXT: fcmuo p1.h, p0/z, z3.h, z3.h +; CHECK-NEXT: fcmge p7.h, p0/z, z28.h, z5.h +; CHECK-NEXT: movprfx z3, z6 +; CHECK-NEXT: fcvtzs z3.d, p0/m, z6.h +; CHECK-NEXT: fcmge p8.h, p0/z, z24.h, z5.h +; CHECK-NEXT: fcmgt p4.h, p0/z, z2.h, z27.h +; CHECK-NEXT: fcmge p9.h, p0/z, z26.h, z5.h +; CHECK-NEXT: not p5.b, p0/z, p5.b +; CHECK-NEXT: movprfx z4, z24 +; CHECK-NEXT: fcvtzs z4.d, p0/m, z24.h +; CHECK-NEXT: fcmge p10.h, p0/z, z30.h, z5.h +; CHECK-NEXT: not p2.b, p0/z, p2.b +; CHECK-NEXT: movprfx z31, z26 +; CHECK-NEXT: fcvtzs z31.d, p0/m, z26.h +; CHECK-NEXT: movprfx z8, z30 +; CHECK-NEXT: fcvtzs z8.d, p0/m, z30.h +; CHECK-NEXT: mov z1.d, p5/m, z25.d +; CHECK-NEXT: fcmge p5.h, p0/z, z29.h, z5.h +; CHECK-NEXT: not p6.b, p0/z, p6.b +; CHECK-NEXT: mov z0.d, p2/m, z25.d +; CHECK-NEXT: fcmuo p2.h, p0/z, z2.h, z2.h +; CHECK-NEXT: movprfx z2, z28 +; CHECK-NEXT: fcvtzs z2.d, p0/m, z28.h +; CHECK-NEXT: movprfx z5, z29 +; CHECK-NEXT: fcvtzs z5.d, p0/m, z29.h +; CHECK-NEXT: not p7.b, p0/z, p7.b +; CHECK-NEXT: mov z3.d, p6/m, z25.d +; CHECK-NEXT: not p6.b, p0/z, p8.b +; CHECK-NEXT: fcmgt p8.h, p0/z, z6.h, z27.h +; CHECK-NEXT: mov z1.d, p4/m, z7.d +; CHECK-NEXT: not p5.b, p0/z, p5.b +; CHECK-NEXT: mov z0.d, p3/m, z7.d +; CHECK-NEXT: fcmgt p3.h, p0/z, z29.h, z27.h +; CHECK-NEXT: sel z9.d, p7, z25.d, z2.d +; CHECK-NEXT: not p7.b, p0/z, p9.b +; CHECK-NEXT: mov z4.d, p6/m, z25.d +; CHECK-NEXT: not p6.b, p0/z, p10.b +; CHECK-NEXT: fcmgt p10.h, p0/z, z28.h, z27.h +; CHECK-NEXT: mov z5.d, p5/m, z25.d +; CHECK-NEXT: fcmgt p5.h, p0/z, z24.h, z27.h +; CHECK-NEXT: fcmuo p9.h, p0/z, z6.h, z6.h +; CHECK-NEXT: sel z6.d, p7, z25.d, z31.d +; CHECK-NEXT: sel z25.d, p6, z25.d, z8.d +; CHECK-NEXT: ldr z8, [sp, #2, mul vl] // 16-byte Folded Reload +; CHECK-NEXT: fcmgt p6.h, p0/z, z26.h, z27.h +; CHECK-NEXT: fcmgt p7.h, p0/z, z30.h, z27.h +; CHECK-NEXT: fcmuo p4.h, p0/z, z28.h, z28.h +; CHECK-NEXT: sel z2.d, p8, z7.d, z3.d +; CHECK-NEXT: sel z3.d, p10, z7.d, z9.d +; CHECK-NEXT: ldr z9, [sp, #1, mul vl] // 16-byte Folded Reload +; CHECK-NEXT: fcmuo p8.h, p0/z, z29.h, z29.h +; CHECK-NEXT: mov z4.d, p5/m, z7.d +; CHECK-NEXT: fcmuo p5.h, p0/z, z24.h, z24.h +; CHECK-NEXT: fcmuo p10.h, p0/z, z26.h, z26.h +; CHECK-NEXT: mov z5.d, p3/m, z7.d +; CHECK-NEXT: mov z6.d, p6/m, z7.d +; CHECK-NEXT: ldr p6, [sp, #5, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: fcmuo p0.h, p0/z, z30.h, z30.h +; CHECK-NEXT: sel z7.d, p7, z7.d, z25.d +; CHECK-NEXT: ldr p7, [sp, #4, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: mov z2.d, p9/m, #0 // =0x0 +; CHECK-NEXT: ldr p9, [sp, #2, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: mov z3.d, p4/m, #0 // =0x0 +; CHECK-NEXT: ldr p4, [sp, #7, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: mov z4.d, p5/m, #0 // =0x0 +; CHECK-NEXT: mov z5.d, p8/m, #0 // =0x0 +; CHECK-NEXT: ldr p8, [sp, #3, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: mov z6.d, p10/m, #0 // =0x0 +; CHECK-NEXT: mov z0.d, p1/m, #0 // =0x0 +; CHECK-NEXT: ldr p10, [sp, #1, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: mov z1.d, p2/m, #0 // =0x0 +; CHECK-NEXT: mov z7.d, p0/m, #0 // =0x0 +; CHECK-NEXT: ldr p5, [sp, #6, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: addvl sp, sp, #3 +; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret %a = call @llvm.lrint.nxv16iXLen.nxv16f16( %x) ret %a } declare @llvm.lrint.nxv16iXLen.nxv16f16() -define @lrint_v32iXLen_v32f16( %x) { +define @lrint_v32f16( %x) { +; CHECK-LABEL: lrint_v32f16: +; CHECK: // %bb.0: +; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: addvl sp, sp, #-17 +; CHECK-NEXT: str p9, [sp, #2, mul vl] // 2-byte Folded Spill +; CHECK-NEXT: str p8, [sp, #3, mul vl] // 2-byte Folded Spill +; CHECK-NEXT: str p7, [sp, #4, mul vl] // 2-byte Folded Spill +; CHECK-NEXT: str p6, [sp, #5, mul vl] // 2-byte Folded Spill +; CHECK-NEXT: str p5, [sp, #6, mul vl] // 2-byte Folded Spill +; CHECK-NEXT: str p4, [sp, #7, mul vl] // 2-byte Folded Spill +; CHECK-NEXT: str z23, [sp, #1, mul vl] // 16-byte Folded Spill +; CHECK-NEXT: str z22, [sp, #2, mul vl] // 16-byte Folded Spill +; CHECK-NEXT: str z21, [sp, #3, mul vl] // 16-byte Folded Spill +; CHECK-NEXT: str z20, [sp, #4, mul vl] // 16-byte Folded Spill +; CHECK-NEXT: str z19, [sp, #5, mul vl] // 16-byte Folded Spill +; CHECK-NEXT: str z18, [sp, #6, mul vl] // 16-byte Folded Spill +; CHECK-NEXT: str z17, [sp, #7, mul vl] // 16-byte Folded Spill +; CHECK-NEXT: str z16, [sp, #8, mul vl] // 16-byte Folded Spill +; CHECK-NEXT: str z15, [sp, #9, mul vl] // 16-byte Folded Spill +; CHECK-NEXT: str z14, [sp, #10, mul vl] // 16-byte Folded Spill +; CHECK-NEXT: str z13, [sp, #11, mul vl] // 16-byte Folded Spill +; CHECK-NEXT: str z12, [sp, #12, mul vl] // 16-byte Folded Spill +; CHECK-NEXT: str z11, [sp, #13, mul vl] // 16-byte Folded Spill +; CHECK-NEXT: str z10, [sp, #14, mul vl] // 16-byte Folded Spill +; CHECK-NEXT: str z9, [sp, #15, mul vl] // 16-byte Folded Spill +; CHECK-NEXT: str z8, [sp, #16, mul vl] // 16-byte Folded Spill +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x88, 0x01, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 136 * VG +; CHECK-NEXT: .cfi_offset w29, -16 +; CHECK-NEXT: .cfi_escape 0x10, 0x48, 0x0a, 0x11, 0x70, 0x22, 0x11, 0x78, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d8 @ cfa - 16 - 8 * VG +; CHECK-NEXT: .cfi_escape 0x10, 0x49, 0x0a, 0x11, 0x70, 0x22, 0x11, 0x70, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d9 @ cfa - 16 - 16 * VG +; CHECK-NEXT: .cfi_escape 0x10, 0x4a, 0x0a, 0x11, 0x70, 0x22, 0x11, 0x68, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d10 @ cfa - 16 - 24 * VG +; CHECK-NEXT: .cfi_escape 0x10, 0x4b, 0x0a, 0x11, 0x70, 0x22, 0x11, 0x60, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d11 @ cfa - 16 - 32 * VG +; CHECK-NEXT: .cfi_escape 0x10, 0x4c, 0x0a, 0x11, 0x70, 0x22, 0x11, 0x58, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d12 @ cfa - 16 - 40 * VG +; CHECK-NEXT: .cfi_escape 0x10, 0x4d, 0x0a, 0x11, 0x70, 0x22, 0x11, 0x50, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d13 @ cfa - 16 - 48 * VG +; CHECK-NEXT: .cfi_escape 0x10, 0x4e, 0x0a, 0x11, 0x70, 0x22, 0x11, 0x48, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d14 @ cfa - 16 - 56 * VG +; CHECK-NEXT: .cfi_escape 0x10, 0x4f, 0x0a, 0x11, 0x70, 0x22, 0x11, 0x40, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d15 @ cfa - 16 - 64 * VG +; CHECK-NEXT: uunpkhi z5.s, z0.h +; CHECK-NEXT: uunpklo z4.s, z0.h +; CHECK-NEXT: mov w9, #64511 // =0xfbff +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: uunpklo z6.s, z1.h +; CHECK-NEXT: mov z30.h, w9 +; CHECK-NEXT: uunpkhi z10.s, z1.h +; CHECK-NEXT: mov w9, #31743 // =0x7bff +; CHECK-NEXT: mov z29.d, #0x8000000000000000 +; CHECK-NEXT: uunpklo z8.s, z2.h +; CHECK-NEXT: uunpkhi z13.s, z3.h +; CHECK-NEXT: uunpklo z18.s, z3.h +; CHECK-NEXT: uunpklo z7.d, z5.s +; CHECK-NEXT: uunpklo z0.d, z4.s +; CHECK-NEXT: uunpkhi z4.d, z4.s +; CHECK-NEXT: uunpkhi z24.d, z5.s +; CHECK-NEXT: uunpklo z25.d, z6.s +; CHECK-NEXT: uunpkhi z26.d, z6.s +; CHECK-NEXT: uunpklo z27.d, z10.s +; CHECK-NEXT: uunpkhi z10.d, z10.s +; CHECK-NEXT: uunpklo z12.d, z8.s +; CHECK-NEXT: uunpkhi z16.d, z8.s +; CHECK-NEXT: movprfx z5, z7 +; CHECK-NEXT: frintx z5.h, p0/m, z7.h +; CHECK-NEXT: movprfx z1, z4 +; CHECK-NEXT: frintx z1.h, p0/m, z4.h +; CHECK-NEXT: frintx z0.h, p0/m, z0.h +; CHECK-NEXT: movprfx z6, z24 +; CHECK-NEXT: frintx z6.h, p0/m, z24.h +; CHECK-NEXT: movprfx z24, z25 +; CHECK-NEXT: frintx z24.h, p0/m, z25.h +; CHECK-NEXT: movprfx z25, z26 +; CHECK-NEXT: frintx z25.h, p0/m, z26.h +; CHECK-NEXT: movprfx z28, z27 +; CHECK-NEXT: frintx z28.h, p0/m, z27.h +; CHECK-NEXT: movprfx z8, z10 +; CHECK-NEXT: frintx z8.h, p0/m, z10.h +; CHECK-NEXT: mov z7.h, w9 +; CHECK-NEXT: mov z4.d, #0x7fffffffffffffff +; CHECK-NEXT: rdvl x9, #15 +; CHECK-NEXT: fcmge p3.h, p0/z, z5.h, z30.h +; CHECK-NEXT: movprfx z11, z5 +; CHECK-NEXT: fcvtzs z11.d, p0/m, z5.h +; CHECK-NEXT: fcmge p2.h, p0/z, z1.h, z30.h +; CHECK-NEXT: fcmge p1.h, p0/z, z0.h, z30.h +; CHECK-NEXT: fcmge p4.h, p0/z, z6.h, z30.h +; CHECK-NEXT: movprfx z9, z6 +; CHECK-NEXT: fcvtzs z9.d, p0/m, z6.h +; CHECK-NEXT: movprfx z15, z25 +; CHECK-NEXT: fcvtzs z15.d, p0/m, z25.h +; CHECK-NEXT: movprfx z14, z24 +; CHECK-NEXT: fcvtzs z14.d, p0/m, z24.h +; CHECK-NEXT: movprfx z26, z0 +; CHECK-NEXT: fcvtzs z26.d, p0/m, z0.h +; CHECK-NEXT: movprfx z19, z28 +; CHECK-NEXT: fcvtzs z19.d, p0/m, z28.h +; CHECK-NEXT: movprfx z31, z1 +; CHECK-NEXT: fcvtzs z31.d, p0/m, z1.h +; CHECK-NEXT: not p3.b, p0/z, p3.b +; CHECK-NEXT: not p6.b, p0/z, p2.b +; CHECK-NEXT: fcmge p2.h, p0/z, z25.h, z30.h +; CHECK-NEXT: sel z27.d, p3, z29.d, z11.d +; CHECK-NEXT: uunpkhi z11.s, z2.h +; CHECK-NEXT: not p5.b, p0/z, p1.b +; CHECK-NEXT: fcmge p1.h, p0/z, z24.h, z30.h +; CHECK-NEXT: not p3.b, p0/z, p4.b +; CHECK-NEXT: fcmge p4.h, p0/z, z28.h, z30.h +; CHECK-NEXT: mov z26.d, p5/m, z29.d +; CHECK-NEXT: mov z31.d, p6/m, z29.d +; CHECK-NEXT: sel z2.d, p3, z29.d, z9.d +; CHECK-NEXT: movprfx z9, z12 +; CHECK-NEXT: frintx z9.h, p0/m, z12.h +; CHECK-NEXT: uunpkhi z12.d, z13.s +; CHECK-NEXT: uunpklo z17.d, z11.s +; CHECK-NEXT: not p2.b, p0/z, p2.b +; CHECK-NEXT: not p1.b, p0/z, p1.b +; CHECK-NEXT: sel z3.d, p2, z29.d, z15.d +; CHECK-NEXT: uunpklo z15.d, z13.s +; CHECK-NEXT: fcmge p2.h, p0/z, z8.h, z30.h +; CHECK-NEXT: sel z10.d, p1, z29.d, z14.d +; CHECK-NEXT: movprfx z14, z16 +; CHECK-NEXT: frintx z14.h, p0/m, z16.h +; CHECK-NEXT: uunpkhi z16.d, z18.s +; CHECK-NEXT: movprfx z13, z17 +; CHECK-NEXT: frintx z13.h, p0/m, z17.h +; CHECK-NEXT: movprfx z20, z12 +; CHECK-NEXT: frintx z20.h, p0/m, z12.h +; CHECK-NEXT: fcmge p3.h, p0/z, z9.h, z30.h +; CHECK-NEXT: uunpkhi z17.d, z11.s +; CHECK-NEXT: uunpklo z18.d, z18.s +; CHECK-NEXT: movprfx z12, z8 +; CHECK-NEXT: fcvtzs z12.d, p0/m, z8.h +; CHECK-NEXT: movprfx z21, z15 +; CHECK-NEXT: frintx z21.h, p0/m, z15.h +; CHECK-NEXT: not p1.b, p0/z, p4.b +; CHECK-NEXT: movprfx z15, z9 +; CHECK-NEXT: fcvtzs z15.d, p0/m, z9.h +; CHECK-NEXT: frintx z16.h, p0/m, z16.h +; CHECK-NEXT: not p2.b, p0/z, p2.b +; CHECK-NEXT: movprfx z22, z14 +; CHECK-NEXT: fcvtzs z22.d, p0/m, z14.h +; CHECK-NEXT: fcmge p4.h, p0/z, z13.h, z30.h +; CHECK-NEXT: fcmge p5.h, p0/z, z20.h, z30.h +; CHECK-NEXT: sel z11.d, p1, z29.d, z19.d +; CHECK-NEXT: not p3.b, p0/z, p3.b +; CHECK-NEXT: frintx z17.h, p0/m, z17.h +; CHECK-NEXT: frintx z18.h, p0/m, z18.h +; CHECK-NEXT: movprfx z19, z20 +; CHECK-NEXT: fcvtzs z19.d, p0/m, z20.h +; CHECK-NEXT: mov z12.d, p2/m, z29.d +; CHECK-NEXT: fcmge p2.h, p0/z, z21.h, z30.h +; CHECK-NEXT: fcmge p1.h, p0/z, z14.h, z30.h +; CHECK-NEXT: mov z15.d, p3/m, z29.d +; CHECK-NEXT: movprfx z23, z21 +; CHECK-NEXT: fcvtzs z23.d, p0/m, z21.h +; CHECK-NEXT: not p3.b, p0/z, p4.b +; CHECK-NEXT: fcmge p4.h, p0/z, z16.h, z30.h +; CHECK-NEXT: fcmgt p8.h, p0/z, z21.h, z7.h +; CHECK-NEXT: not p5.b, p0/z, p5.b +; CHECK-NEXT: fcmge p6.h, p0/z, z17.h, z30.h +; CHECK-NEXT: fcmge p7.h, p0/z, z18.h, z30.h +; CHECK-NEXT: movprfx z30, z16 +; CHECK-NEXT: fcvtzs z30.d, p0/m, z16.h +; CHECK-NEXT: not p2.b, p0/z, p2.b +; CHECK-NEXT: fcmuo p9.h, p0/z, z21.h, z21.h +; CHECK-NEXT: mov z19.d, p5/m, z29.d +; CHECK-NEXT: fcmgt p5.h, p0/z, z20.h, z7.h +; CHECK-NEXT: not p1.b, p0/z, p1.b +; CHECK-NEXT: not p4.b, p0/z, p4.b +; CHECK-NEXT: mov z23.d, p2/m, z29.d +; CHECK-NEXT: fcmuo p2.h, p0/z, z20.h, z20.h +; CHECK-NEXT: movprfx z20, z18 +; CHECK-NEXT: fcvtzs z20.d, p0/m, z18.h +; CHECK-NEXT: movprfx z21, z13 +; CHECK-NEXT: fcvtzs z21.d, p0/m, z13.h +; CHECK-NEXT: mov z22.d, p1/m, z29.d +; CHECK-NEXT: not p1.b, p0/z, p7.b +; CHECK-NEXT: mov z30.d, p4/m, z29.d +; CHECK-NEXT: fcmgt p4.h, p0/z, z18.h, z7.h +; CHECK-NEXT: mov z19.d, p5/m, z4.d +; CHECK-NEXT: fcmuo p7.h, p0/z, z18.h, z18.h +; CHECK-NEXT: movprfx z18, z17 +; CHECK-NEXT: fcvtzs z18.d, p0/m, z17.h +; CHECK-NEXT: fcmgt p5.h, p0/z, z16.h, z7.h +; CHECK-NEXT: not p6.b, p0/z, p6.b +; CHECK-NEXT: mov z23.d, p8/m, z4.d +; CHECK-NEXT: mov z20.d, p1/m, z29.d +; CHECK-NEXT: mov z21.d, p3/m, z29.d +; CHECK-NEXT: fcmuo p3.h, p0/z, z16.h, z16.h +; CHECK-NEXT: mov z19.d, p2/m, #0 // =0x0 +; CHECK-NEXT: fcmgt p2.h, p0/z, z17.h, z7.h +; CHECK-NEXT: ptrue p1.b +; CHECK-NEXT: sel z29.d, p6, z29.d, z18.d +; CHECK-NEXT: mov z23.d, p9/m, #0 // =0x0 +; CHECK-NEXT: fcmgt p6.h, p0/z, z14.h, z7.h +; CHECK-NEXT: mov z30.d, p5/m, z4.d +; CHECK-NEXT: sel z16.d, p4, z4.d, z20.d +; CHECK-NEXT: fcmuo p4.h, p0/z, z17.h, z17.h +; CHECK-NEXT: st1b { z19.b }, p1, [x8, x9] +; CHECK-NEXT: rdvl x9, #14 +; CHECK-NEXT: fcmgt p5.h, p0/z, z1.h, z7.h +; CHECK-NEXT: st1b { z23.b }, p1, [x8, x9] +; CHECK-NEXT: rdvl x9, #13 +; CHECK-NEXT: mov z29.d, p2/m, z4.d +; CHECK-NEXT: mov z30.d, p3/m, #0 // =0x0 +; CHECK-NEXT: fcmgt p3.h, p0/z, z13.h, z7.h +; CHECK-NEXT: mov z16.d, p7/m, #0 // =0x0 +; CHECK-NEXT: fcmgt p2.h, p0/z, z9.h, z7.h +; CHECK-NEXT: fcmuo p7.h, p0/z, z14.h, z14.h +; CHECK-NEXT: mov z29.d, p4/m, #0 // =0x0 +; CHECK-NEXT: fcmuo p4.h, p0/z, z13.h, z13.h +; CHECK-NEXT: st1b { z30.b }, p1, [x8, x9] +; CHECK-NEXT: rdvl x9, #12 +; CHECK-NEXT: sel z30.d, p5, z4.d, z31.d +; CHECK-NEXT: st1b { z16.b }, p1, [x8, x9] +; CHECK-NEXT: rdvl x9, #11 +; CHECK-NEXT: sel z31.d, p3, z4.d, z21.d +; CHECK-NEXT: st1b { z29.b }, p1, [x8, x9] +; CHECK-NEXT: rdvl x9, #10 +; CHECK-NEXT: fcmgt p5.h, p0/z, z24.h, z7.h +; CHECK-NEXT: fcmgt p3.h, p0/z, z28.h, z7.h +; CHECK-NEXT: sel z13.d, p2, z4.d, z15.d +; CHECK-NEXT: fcmuo p2.h, p0/z, z9.h, z9.h +; CHECK-NEXT: sel z29.d, p6, z4.d, z22.d +; CHECK-NEXT: mov z31.d, p4/m, #0 // =0x0 +; CHECK-NEXT: fcmgt p4.h, p0/z, z8.h, z7.h +; CHECK-NEXT: fcmgt p6.h, p0/z, z5.h, z7.h +; CHECK-NEXT: sel z9.d, p5, z4.d, z10.d +; CHECK-NEXT: fcmgt p5.h, p0/z, z6.h, z7.h +; CHECK-NEXT: st1b { z31.b }, p1, [x8, x9] +; CHECK-NEXT: rdvl x9, #9 +; CHECK-NEXT: mov z29.d, p7/m, #0 // =0x0 +; CHECK-NEXT: sel z10.d, p3, z4.d, z11.d +; CHECK-NEXT: fcmgt p3.h, p0/z, z25.h, z7.h +; CHECK-NEXT: mov z13.d, p2/m, #0 // =0x0 +; CHECK-NEXT: fcmuo p7.h, p0/z, z8.h, z8.h +; CHECK-NEXT: fcmuo p2.h, p0/z, z28.h, z28.h +; CHECK-NEXT: sel z28.d, p4, z4.d, z12.d +; CHECK-NEXT: st1b { z29.b }, p1, [x8, x9] +; CHECK-NEXT: rdvl x9, #8 +; CHECK-NEXT: fcmuo p4.h, p0/z, z25.h, z25.h +; CHECK-NEXT: st1b { z13.b }, p1, [x8, x9] +; CHECK-NEXT: fcmuo p1.h, p0/z, z24.h, z24.h +; CHECK-NEXT: mov z2.d, p5/m, z4.d +; CHECK-NEXT: mov z3.d, p3/m, z4.d +; CHECK-NEXT: fcmgt p3.h, p0/z, z0.h, z7.h +; CHECK-NEXT: mov z28.d, p7/m, #0 // =0x0 +; CHECK-NEXT: fcmuo p7.h, p0/z, z6.h, z6.h +; CHECK-NEXT: mov z10.d, p2/m, #0 // =0x0 +; CHECK-NEXT: fcmuo p2.h, p0/z, z5.h, z5.h +; CHECK-NEXT: sel z5.d, p6, z4.d, z27.d +; CHECK-NEXT: mov z3.d, p4/m, #0 // =0x0 +; CHECK-NEXT: fcmuo p4.h, p0/z, z1.h, z1.h +; CHECK-NEXT: mov z9.d, p1/m, #0 // =0x0 +; CHECK-NEXT: st1d { z28.d }, p0, [x8, #7, mul vl] +; CHECK-NEXT: fcmuo p1.h, p0/z, z0.h, z0.h +; CHECK-NEXT: sel z0.d, p3, z4.d, z26.d +; CHECK-NEXT: st1d { z10.d }, p0, [x8, #6, mul vl] +; CHECK-NEXT: mov z2.d, p7/m, #0 // =0x0 +; CHECK-NEXT: st1d { z3.d }, p0, [x8, #5, mul vl] +; CHECK-NEXT: mov z5.d, p2/m, #0 // =0x0 +; CHECK-NEXT: st1d { z9.d }, p0, [x8, #4, mul vl] +; CHECK-NEXT: mov z30.d, p4/m, #0 // =0x0 +; CHECK-NEXT: mov z0.d, p1/m, #0 // =0x0 +; CHECK-NEXT: st1d { z2.d }, p0, [x8, #3, mul vl] +; CHECK-NEXT: st1d { z5.d }, p0, [x8, #2, mul vl] +; CHECK-NEXT: st1d { z30.d }, p0, [x8, #1, mul vl] +; CHECK-NEXT: st1d { z0.d }, p0, [x8] +; CHECK-NEXT: ldr z23, [sp, #1, mul vl] // 16-byte Folded Reload +; CHECK-NEXT: ldr z22, [sp, #2, mul vl] // 16-byte Folded Reload +; CHECK-NEXT: ldr z21, [sp, #3, mul vl] // 16-byte Folded Reload +; CHECK-NEXT: ldr z20, [sp, #4, mul vl] // 16-byte Folded Reload +; CHECK-NEXT: ldr z19, [sp, #5, mul vl] // 16-byte Folded Reload +; CHECK-NEXT: ldr z18, [sp, #6, mul vl] // 16-byte Folded Reload +; CHECK-NEXT: ldr z17, [sp, #7, mul vl] // 16-byte Folded Reload +; CHECK-NEXT: ldr z16, [sp, #8, mul vl] // 16-byte Folded Reload +; CHECK-NEXT: ldr z15, [sp, #9, mul vl] // 16-byte Folded Reload +; CHECK-NEXT: ldr z14, [sp, #10, mul vl] // 16-byte Folded Reload +; CHECK-NEXT: ldr z13, [sp, #11, mul vl] // 16-byte Folded Reload +; CHECK-NEXT: ldr z12, [sp, #12, mul vl] // 16-byte Folded Reload +; CHECK-NEXT: ldr z11, [sp, #13, mul vl] // 16-byte Folded Reload +; CHECK-NEXT: ldr z10, [sp, #14, mul vl] // 16-byte Folded Reload +; CHECK-NEXT: ldr z9, [sp, #15, mul vl] // 16-byte Folded Reload +; CHECK-NEXT: ldr z8, [sp, #16, mul vl] // 16-byte Folded Reload +; CHECK-NEXT: ldr p9, [sp, #2, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: ldr p8, [sp, #3, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: ldr p7, [sp, #4, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: ldr p6, [sp, #5, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: ldr p5, [sp, #6, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: ldr p4, [sp, #7, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: addvl sp, sp, #17 +; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret %a = call @llvm.lrint.nxv32iXLen.nxv32f16( %x) ret %a } @@ -361,13 +781,419 @@ define @lrint_v8f32( %x) { } declare @llvm.lrint.nxv8iXLen.nxv8f32() -define @lrint_v16iXLen_v16f32( %x) { +define @lrint_v16f32( %x) { +; CHECK-LABEL: lrint_v16f32: +; CHECK: // %bb.0: +; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: addvl sp, sp, #-2 +; CHECK-NEXT: str p10, [sp, #1, mul vl] // 2-byte Folded Spill +; CHECK-NEXT: str p9, [sp, #2, mul vl] // 2-byte Folded Spill +; CHECK-NEXT: str p8, [sp, #3, mul vl] // 2-byte Folded Spill +; CHECK-NEXT: str p7, [sp, #4, mul vl] // 2-byte Folded Spill +; CHECK-NEXT: str p6, [sp, #5, mul vl] // 2-byte Folded Spill +; CHECK-NEXT: str p5, [sp, #6, mul vl] // 2-byte Folded Spill +; CHECK-NEXT: str p4, [sp, #7, mul vl] // 2-byte Folded Spill +; CHECK-NEXT: str z8, [sp, #1, mul vl] // 16-byte Folded Spill +; CHECK-NEXT: .cfi_escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 16 * VG +; CHECK-NEXT: .cfi_offset w29, -16 +; CHECK-NEXT: .cfi_escape 0x10, 0x48, 0x0a, 0x11, 0x70, 0x22, 0x11, 0x78, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d8 @ cfa - 16 - 8 * VG +; CHECK-NEXT: uunpklo z4.d, z0.s +; CHECK-NEXT: uunpkhi z0.d, z0.s +; CHECK-NEXT: mov w8, #-553648128 // =0xdf000000 +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: uunpklo z7.d, z1.s +; CHECK-NEXT: uunpkhi z1.d, z1.s +; CHECK-NEXT: uunpklo z24.d, z2.s +; CHECK-NEXT: uunpkhi z2.d, z2.s +; CHECK-NEXT: uunpklo z25.d, z3.s +; CHECK-NEXT: uunpkhi z3.d, z3.s +; CHECK-NEXT: mov z26.d, #0x7fffffffffffffff +; CHECK-NEXT: movprfx z5, z4 +; CHECK-NEXT: frintx z5.s, p0/m, z4.s +; CHECK-NEXT: movprfx z6, z0 +; CHECK-NEXT: frintx z6.s, p0/m, z0.s +; CHECK-NEXT: mov z4.s, w8 +; CHECK-NEXT: frintx z7.s, p0/m, z7.s +; CHECK-NEXT: movprfx z28, z1 +; CHECK-NEXT: frintx z28.s, p0/m, z1.s +; CHECK-NEXT: mov w8, #1593835519 // =0x5effffff +; CHECK-NEXT: mov z0.d, #0x8000000000000000 +; CHECK-NEXT: frintx z24.s, p0/m, z24.s +; CHECK-NEXT: movprfx z29, z2 +; CHECK-NEXT: frintx z29.s, p0/m, z2.s +; CHECK-NEXT: frintx z25.s, p0/m, z25.s +; CHECK-NEXT: movprfx z30, z3 +; CHECK-NEXT: frintx z30.s, p0/m, z3.s +; CHECK-NEXT: mov z27.s, w8 +; CHECK-NEXT: fcmge p1.s, p0/z, z5.s, z4.s +; CHECK-NEXT: fcmge p2.s, p0/z, z6.s, z4.s +; CHECK-NEXT: movprfx z1, z5 +; CHECK-NEXT: fcvtzs z1.d, p0/m, z5.s +; CHECK-NEXT: movprfx z2, z6 +; CHECK-NEXT: fcvtzs z2.d, p0/m, z6.s +; CHECK-NEXT: fcmge p5.s, p0/z, z7.s, z4.s +; CHECK-NEXT: fcmge p6.s, p0/z, z28.s, z4.s +; CHECK-NEXT: movprfx z3, z7 +; CHECK-NEXT: fcvtzs z3.d, p0/m, z7.s +; CHECK-NEXT: fcmge p8.s, p0/z, z29.s, z4.s +; CHECK-NEXT: fcmgt p3.s, p0/z, z5.s, z27.s +; CHECK-NEXT: fcmgt p7.s, p0/z, z6.s, z27.s +; CHECK-NEXT: fcmge p9.s, p0/z, z25.s, z4.s +; CHECK-NEXT: movprfx z31, z25 +; CHECK-NEXT: fcvtzs z31.d, p0/m, z25.s +; CHECK-NEXT: not p4.b, p0/z, p1.b +; CHECK-NEXT: fcmuo p1.s, p0/z, z5.s, z5.s +; CHECK-NEXT: movprfx z5, z28 +; CHECK-NEXT: fcvtzs z5.d, p0/m, z28.s +; CHECK-NEXT: not p2.b, p0/z, p2.b +; CHECK-NEXT: fcmge p10.s, p0/z, z30.s, z4.s +; CHECK-NEXT: movprfx z8, z30 +; CHECK-NEXT: fcvtzs z8.d, p0/m, z30.s +; CHECK-NEXT: mov z1.d, p4/m, z0.d +; CHECK-NEXT: fcmge p4.s, p0/z, z24.s, z4.s +; CHECK-NEXT: movprfx z4, z29 +; CHECK-NEXT: fcvtzs z4.d, p0/m, z29.s +; CHECK-NEXT: mov z2.d, p2/m, z0.d +; CHECK-NEXT: fcmuo p2.s, p0/z, z6.s, z6.s +; CHECK-NEXT: movprfx z6, z24 +; CHECK-NEXT: fcvtzs z6.d, p0/m, z24.s +; CHECK-NEXT: not p5.b, p0/z, p5.b +; CHECK-NEXT: not p6.b, p0/z, p6.b +; CHECK-NEXT: not p4.b, p0/z, p4.b +; CHECK-NEXT: mov z3.d, p5/m, z0.d +; CHECK-NEXT: not p5.b, p0/z, p8.b +; CHECK-NEXT: mov z5.d, p6/m, z0.d +; CHECK-NEXT: fcmgt p8.s, p0/z, z7.s, z27.s +; CHECK-NEXT: not p6.b, p0/z, p9.b +; CHECK-NEXT: mov z6.d, p4/m, z0.d +; CHECK-NEXT: fcmuo p9.s, p0/z, z7.s, z7.s +; CHECK-NEXT: not p4.b, p0/z, p10.b +; CHECK-NEXT: fcmgt p10.s, p0/z, z28.s, z27.s +; CHECK-NEXT: sel z7.d, p5, z0.d, z4.d +; CHECK-NEXT: fcmgt p5.s, p0/z, z24.s, z27.s +; CHECK-NEXT: mov z31.d, p6/m, z0.d +; CHECK-NEXT: fcmgt p6.s, p0/z, z30.s, z27.s +; CHECK-NEXT: mov z8.d, p4/m, z0.d +; CHECK-NEXT: sel z0.d, p3, z26.d, z1.d +; CHECK-NEXT: fcmgt p3.s, p0/z, z29.s, z27.s +; CHECK-NEXT: fcmgt p4.s, p0/z, z25.s, z27.s +; CHECK-NEXT: sel z1.d, p7, z26.d, z2.d +; CHECK-NEXT: fcmuo p7.s, p0/z, z28.s, z28.s +; CHECK-NEXT: sel z2.d, p8, z26.d, z3.d +; CHECK-NEXT: sel z3.d, p10, z26.d, z5.d +; CHECK-NEXT: fcmuo p8.s, p0/z, z29.s, z29.s +; CHECK-NEXT: sel z4.d, p5, z26.d, z6.d +; CHECK-NEXT: fcmuo p5.s, p0/z, z24.s, z24.s +; CHECK-NEXT: fcmuo p10.s, p0/z, z25.s, z25.s +; CHECK-NEXT: sel z5.d, p3, z26.d, z7.d +; CHECK-NEXT: fcmuo p0.s, p0/z, z30.s, z30.s +; CHECK-NEXT: sel z7.d, p6, z26.d, z8.d +; CHECK-NEXT: ldr z8, [sp, #1, mul vl] // 16-byte Folded Reload +; CHECK-NEXT: ldr p6, [sp, #5, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: sel z6.d, p4, z26.d, z31.d +; CHECK-NEXT: ldr p4, [sp, #7, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: mov z2.d, p9/m, #0 // =0x0 +; CHECK-NEXT: mov z3.d, p7/m, #0 // =0x0 +; CHECK-NEXT: ldr p9, [sp, #2, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: mov z4.d, p5/m, #0 // =0x0 +; CHECK-NEXT: mov z5.d, p8/m, #0 // =0x0 +; CHECK-NEXT: ldr p8, [sp, #3, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: mov z6.d, p10/m, #0 // =0x0 +; CHECK-NEXT: mov z0.d, p1/m, #0 // =0x0 +; CHECK-NEXT: ldr p10, [sp, #1, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: mov z1.d, p2/m, #0 // =0x0 +; CHECK-NEXT: mov z7.d, p0/m, #0 // =0x0 +; CHECK-NEXT: ldr p7, [sp, #4, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: ldr p5, [sp, #6, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: addvl sp, sp, #2 +; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret %a = call @llvm.lrint.nxv16iXLen.nxv16f32( %x) ret %a } declare @llvm.lrint.nxv16iXLen.nxv16f32() -define @lrint_v32iXLen_v32f32( %x) { +define @lrint_v32f32( %x) { +; CHECK-LABEL: lrint_v32f32: +; CHECK: // %bb.0: +; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: addvl sp, sp, #-17 +; CHECK-NEXT: str p9, [sp, #2, mul vl] // 2-byte Folded Spill +; CHECK-NEXT: str p8, [sp, #3, mul vl] // 2-byte Folded Spill +; CHECK-NEXT: str p7, [sp, #4, mul vl] // 2-byte Folded Spill +; CHECK-NEXT: str p6, [sp, #5, mul vl] // 2-byte Folded Spill +; CHECK-NEXT: str p5, [sp, #6, mul vl] // 2-byte Folded Spill +; CHECK-NEXT: str p4, [sp, #7, mul vl] // 2-byte Folded Spill +; CHECK-NEXT: str z23, [sp, #1, mul vl] // 16-byte Folded Spill +; CHECK-NEXT: str z22, [sp, #2, mul vl] // 16-byte Folded Spill +; CHECK-NEXT: str z21, [sp, #3, mul vl] // 16-byte Folded Spill +; CHECK-NEXT: str z20, [sp, #4, mul vl] // 16-byte Folded Spill +; CHECK-NEXT: str z19, [sp, #5, mul vl] // 16-byte Folded Spill +; CHECK-NEXT: str z18, [sp, #6, mul vl] // 16-byte Folded Spill +; CHECK-NEXT: str z17, [sp, #7, mul vl] // 16-byte Folded Spill +; CHECK-NEXT: str z16, [sp, #8, mul vl] // 16-byte Folded Spill +; CHECK-NEXT: str z15, [sp, #9, mul vl] // 16-byte Folded Spill +; CHECK-NEXT: str z14, [sp, #10, mul vl] // 16-byte Folded Spill +; CHECK-NEXT: str z13, [sp, #11, mul vl] // 16-byte Folded Spill +; CHECK-NEXT: str z12, [sp, #12, mul vl] // 16-byte Folded Spill +; CHECK-NEXT: str z11, [sp, #13, mul vl] // 16-byte Folded Spill +; CHECK-NEXT: str z10, [sp, #14, mul vl] // 16-byte Folded Spill +; CHECK-NEXT: str z9, [sp, #15, mul vl] // 16-byte Folded Spill +; CHECK-NEXT: str z8, [sp, #16, mul vl] // 16-byte Folded Spill +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x88, 0x01, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 136 * VG +; CHECK-NEXT: .cfi_offset w29, -16 +; CHECK-NEXT: .cfi_escape 0x10, 0x48, 0x0a, 0x11, 0x70, 0x22, 0x11, 0x78, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d8 @ cfa - 16 - 8 * VG +; CHECK-NEXT: .cfi_escape 0x10, 0x49, 0x0a, 0x11, 0x70, 0x22, 0x11, 0x70, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d9 @ cfa - 16 - 16 * VG +; CHECK-NEXT: .cfi_escape 0x10, 0x4a, 0x0a, 0x11, 0x70, 0x22, 0x11, 0x68, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d10 @ cfa - 16 - 24 * VG +; CHECK-NEXT: .cfi_escape 0x10, 0x4b, 0x0a, 0x11, 0x70, 0x22, 0x11, 0x60, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d11 @ cfa - 16 - 32 * VG +; CHECK-NEXT: .cfi_escape 0x10, 0x4c, 0x0a, 0x11, 0x70, 0x22, 0x11, 0x58, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d12 @ cfa - 16 - 40 * VG +; CHECK-NEXT: .cfi_escape 0x10, 0x4d, 0x0a, 0x11, 0x70, 0x22, 0x11, 0x50, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d13 @ cfa - 16 - 48 * VG +; CHECK-NEXT: .cfi_escape 0x10, 0x4e, 0x0a, 0x11, 0x70, 0x22, 0x11, 0x48, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d14 @ cfa - 16 - 56 * VG +; CHECK-NEXT: .cfi_escape 0x10, 0x4f, 0x0a, 0x11, 0x70, 0x22, 0x11, 0x40, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d15 @ cfa - 16 - 64 * VG +; CHECK-NEXT: uunpklo z24.d, z0.s +; CHECK-NEXT: uunpkhi z25.d, z0.s +; CHECK-NEXT: mov w9, #-553648128 // =0xdf000000 +; CHECK-NEXT: uunpklo z26.d, z1.s +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: uunpkhi z27.d, z1.s +; CHECK-NEXT: mov z31.s, w9 +; CHECK-NEXT: mov w9, #1593835519 // =0x5effffff +; CHECK-NEXT: uunpklo z28.d, z2.s +; CHECK-NEXT: mov z8.d, #0x8000000000000000 +; CHECK-NEXT: uunpklo z30.d, z3.s +; CHECK-NEXT: uunpklo z13.d, z4.s +; CHECK-NEXT: movprfx z0, z24 +; CHECK-NEXT: frintx z0.s, p0/m, z24.s +; CHECK-NEXT: movprfx z1, z25 +; CHECK-NEXT: frintx z1.s, p0/m, z25.s +; CHECK-NEXT: uunpkhi z15.d, z4.s +; CHECK-NEXT: movprfx z24, z26 +; CHECK-NEXT: frintx z24.s, p0/m, z26.s +; CHECK-NEXT: uunpkhi z26.d, z2.s +; CHECK-NEXT: movprfx z25, z27 +; CHECK-NEXT: frintx z25.s, p0/m, z27.s +; CHECK-NEXT: movprfx z27, z28 +; CHECK-NEXT: frintx z27.s, p0/m, z28.s +; CHECK-NEXT: uunpklo z16.d, z5.s +; CHECK-NEXT: uunpkhi z17.d, z7.s +; CHECK-NEXT: frintx z30.s, p0/m, z30.s +; CHECK-NEXT: uunpklo z18.d, z7.s +; CHECK-NEXT: uunpklo z21.d, z6.s +; CHECK-NEXT: fcmge p1.s, p0/z, z0.s, z31.s +; CHECK-NEXT: movprfx z9, z0 +; CHECK-NEXT: fcvtzs z9.d, p0/m, z0.s +; CHECK-NEXT: movprfx z10, z1 +; CHECK-NEXT: fcvtzs z10.d, p0/m, z1.s +; CHECK-NEXT: fcmge p2.s, p0/z, z1.s, z31.s +; CHECK-NEXT: fcmge p3.s, p0/z, z24.s, z31.s +; CHECK-NEXT: movprfx z11, z24 +; CHECK-NEXT: fcvtzs z11.d, p0/m, z24.s +; CHECK-NEXT: movprfx z29, z26 +; CHECK-NEXT: frintx z29.s, p0/m, z26.s +; CHECK-NEXT: fcmge p4.s, p0/z, z25.s, z31.s +; CHECK-NEXT: fcmge p5.s, p0/z, z27.s, z31.s +; CHECK-NEXT: movprfx z12, z27 +; CHECK-NEXT: fcvtzs z12.d, p0/m, z27.s +; CHECK-NEXT: movprfx z19, z30 +; CHECK-NEXT: fcvtzs z19.d, p0/m, z30.s +; CHECK-NEXT: movprfx z7, z16 +; CHECK-NEXT: frintx z7.s, p0/m, z16.s +; CHECK-NEXT: not p1.b, p0/z, p1.b +; CHECK-NEXT: frintx z17.s, p0/m, z17.s +; CHECK-NEXT: uunpkhi z16.d, z5.s +; CHECK-NEXT: not p2.b, p0/z, p2.b +; CHECK-NEXT: frintx z18.s, p0/m, z18.s +; CHECK-NEXT: mov z28.s, w9 +; CHECK-NEXT: not p6.b, p0/z, p3.b +; CHECK-NEXT: sel z26.d, p1, z8.d, z9.d +; CHECK-NEXT: movprfx z14, z29 +; CHECK-NEXT: fcvtzs z14.d, p0/m, z29.s +; CHECK-NEXT: sel z9.d, p2, z8.d, z10.d +; CHECK-NEXT: uunpkhi z10.d, z3.s +; CHECK-NEXT: rdvl x9, #15 +; CHECK-NEXT: sel z3.d, p6, z8.d, z11.d +; CHECK-NEXT: movprfx z11, z25 +; CHECK-NEXT: fcvtzs z11.d, p0/m, z25.s +; CHECK-NEXT: fcmge p3.s, p0/z, z29.s, z31.s +; CHECK-NEXT: not p4.b, p0/z, p4.b +; CHECK-NEXT: fcmge p1.s, p0/z, z30.s, z31.s +; CHECK-NEXT: movprfx z23, z18 +; CHECK-NEXT: fcvtzs z23.d, p0/m, z18.s +; CHECK-NEXT: not p2.b, p0/z, p5.b +; CHECK-NEXT: fcmge p5.s, p0/z, z17.s, z31.s +; CHECK-NEXT: frintx z16.s, p0/m, z16.s +; CHECK-NEXT: frintx z10.s, p0/m, z10.s +; CHECK-NEXT: mov z2.d, #0x7fffffffffffffff +; CHECK-NEXT: fcmgt p8.s, p0/z, z18.s, z28.s +; CHECK-NEXT: sel z4.d, p4, z8.d, z11.d +; CHECK-NEXT: movprfx z11, z13 +; CHECK-NEXT: frintx z11.s, p0/m, z13.s +; CHECK-NEXT: not p3.b, p0/z, p3.b +; CHECK-NEXT: sel z13.d, p2, z8.d, z12.d +; CHECK-NEXT: not p1.b, p0/z, p1.b +; CHECK-NEXT: fcmge p4.s, p0/z, z7.s, z31.s +; CHECK-NEXT: sel z12.d, p3, z8.d, z14.d +; CHECK-NEXT: movprfx z14, z15 +; CHECK-NEXT: frintx z14.s, p0/m, z15.s +; CHECK-NEXT: uunpkhi z15.d, z6.s +; CHECK-NEXT: movprfx z20, z10 +; CHECK-NEXT: fcvtzs z20.d, p0/m, z10.s +; CHECK-NEXT: fcmge p2.s, p0/z, z10.s, z31.s +; CHECK-NEXT: sel z5.d, p1, z8.d, z19.d +; CHECK-NEXT: movprfx z19, z11 +; CHECK-NEXT: fcvtzs z19.d, p0/m, z11.s +; CHECK-NEXT: fcmge p3.s, p0/z, z11.s, z31.s +; CHECK-NEXT: not p5.b, p0/z, p5.b +; CHECK-NEXT: fcmge p6.s, p0/z, z16.s, z31.s +; CHECK-NEXT: fcmuo p9.s, p0/z, z18.s, z18.s +; CHECK-NEXT: movprfx z22, z15 +; CHECK-NEXT: frintx z22.s, p0/m, z15.s +; CHECK-NEXT: fcmge p1.s, p0/z, z14.s, z31.s +; CHECK-NEXT: not p2.b, p0/z, p2.b +; CHECK-NEXT: not p3.b, p0/z, p3.b +; CHECK-NEXT: sel z6.d, p2, z8.d, z20.d +; CHECK-NEXT: movprfx z20, z21 +; CHECK-NEXT: frintx z20.s, p0/m, z21.s +; CHECK-NEXT: fcmge p2.s, p0/z, z18.s, z31.s +; CHECK-NEXT: sel z15.d, p3, z8.d, z19.d +; CHECK-NEXT: movprfx z19, z17 +; CHECK-NEXT: fcvtzs z19.d, p0/m, z17.s +; CHECK-NEXT: not p3.b, p0/z, p4.b +; CHECK-NEXT: fcmge p4.s, p0/z, z22.s, z31.s +; CHECK-NEXT: movprfx z21, z14 +; CHECK-NEXT: fcvtzs z21.d, p0/m, z14.s +; CHECK-NEXT: not p1.b, p0/z, p1.b +; CHECK-NEXT: movprfx z18, z7 +; CHECK-NEXT: fcvtzs z18.d, p0/m, z7.s +; CHECK-NEXT: not p6.b, p0/z, p6.b +; CHECK-NEXT: fcmge p7.s, p0/z, z20.s, z31.s +; CHECK-NEXT: movprfx z31, z22 +; CHECK-NEXT: fcvtzs z31.d, p0/m, z22.s +; CHECK-NEXT: not p2.b, p0/z, p2.b +; CHECK-NEXT: mov z19.d, p5/m, z8.d +; CHECK-NEXT: fcmgt p5.s, p0/z, z17.s, z28.s +; CHECK-NEXT: not p4.b, p0/z, p4.b +; CHECK-NEXT: mov z23.d, p2/m, z8.d +; CHECK-NEXT: fcmuo p2.s, p0/z, z17.s, z17.s +; CHECK-NEXT: movprfx z17, z20 +; CHECK-NEXT: fcvtzs z17.d, p0/m, z20.s +; CHECK-NEXT: mov z21.d, p1/m, z8.d +; CHECK-NEXT: mov z18.d, p3/m, z8.d +; CHECK-NEXT: not p1.b, p0/z, p7.b +; CHECK-NEXT: mov z31.d, p4/m, z8.d +; CHECK-NEXT: fcmgt p4.s, p0/z, z20.s, z28.s +; CHECK-NEXT: mov z19.d, p5/m, z2.d +; CHECK-NEXT: fcmuo p7.s, p0/z, z20.s, z20.s +; CHECK-NEXT: movprfx z20, z16 +; CHECK-NEXT: fcvtzs z20.d, p0/m, z16.s +; CHECK-NEXT: fcmgt p5.s, p0/z, z22.s, z28.s +; CHECK-NEXT: mov z23.d, p8/m, z2.d +; CHECK-NEXT: fcmuo p3.s, p0/z, z22.s, z22.s +; CHECK-NEXT: mov z17.d, p1/m, z8.d +; CHECK-NEXT: ptrue p1.b +; CHECK-NEXT: mov z19.d, p2/m, #0 // =0x0 +; CHECK-NEXT: fcmgt p2.s, p0/z, z16.s, z28.s +; CHECK-NEXT: sel z8.d, p6, z8.d, z20.d +; CHECK-NEXT: mov z23.d, p9/m, #0 // =0x0 +; CHECK-NEXT: fcmgt p6.s, p0/z, z14.s, z28.s +; CHECK-NEXT: mov z31.d, p5/m, z2.d +; CHECK-NEXT: mov z17.d, p4/m, z2.d +; CHECK-NEXT: fcmuo p4.s, p0/z, z16.s, z16.s +; CHECK-NEXT: st1b { z19.b }, p1, [x8, x9] +; CHECK-NEXT: rdvl x9, #14 +; CHECK-NEXT: fcmgt p5.s, p0/z, z1.s, z28.s +; CHECK-NEXT: st1b { z23.b }, p1, [x8, x9] +; CHECK-NEXT: rdvl x9, #13 +; CHECK-NEXT: mov z8.d, p2/m, z2.d +; CHECK-NEXT: mov z31.d, p3/m, #0 // =0x0 +; CHECK-NEXT: fcmgt p3.s, p0/z, z7.s, z28.s +; CHECK-NEXT: mov z17.d, p7/m, #0 // =0x0 +; CHECK-NEXT: fcmgt p2.s, p0/z, z11.s, z28.s +; CHECK-NEXT: fcmuo p7.s, p0/z, z14.s, z14.s +; CHECK-NEXT: mov z8.d, p4/m, #0 // =0x0 +; CHECK-NEXT: fcmuo p4.s, p0/z, z7.s, z7.s +; CHECK-NEXT: sel z7.d, p5, z2.d, z9.d +; CHECK-NEXT: st1b { z31.b }, p1, [x8, x9] +; CHECK-NEXT: rdvl x9, #12 +; CHECK-NEXT: fcmgt p5.s, p0/z, z27.s, z28.s +; CHECK-NEXT: st1b { z17.b }, p1, [x8, x9] +; CHECK-NEXT: rdvl x9, #11 +; CHECK-NEXT: sel z31.d, p3, z2.d, z18.d +; CHECK-NEXT: st1b { z8.b }, p1, [x8, x9] +; CHECK-NEXT: rdvl x9, #10 +; CHECK-NEXT: fcmgt p3.s, p0/z, z30.s, z28.s +; CHECK-NEXT: sel z9.d, p2, z2.d, z15.d +; CHECK-NEXT: fcmuo p2.s, p0/z, z11.s, z11.s +; CHECK-NEXT: sel z8.d, p6, z2.d, z21.d +; CHECK-NEXT: mov z31.d, p4/m, #0 // =0x0 +; CHECK-NEXT: fcmgt p4.s, p0/z, z10.s, z28.s +; CHECK-NEXT: fcmgt p6.s, p0/z, z24.s, z28.s +; CHECK-NEXT: sel z11.d, p5, z2.d, z13.d +; CHECK-NEXT: fcmgt p5.s, p0/z, z25.s, z28.s +; CHECK-NEXT: mov z8.d, p7/m, #0 // =0x0 +; CHECK-NEXT: mov z5.d, p3/m, z2.d +; CHECK-NEXT: fcmgt p3.s, p0/z, z29.s, z28.s +; CHECK-NEXT: st1b { z31.b }, p1, [x8, x9] +; CHECK-NEXT: rdvl x9, #9 +; CHECK-NEXT: mov z9.d, p2/m, #0 // =0x0 +; CHECK-NEXT: fcmuo p7.s, p0/z, z10.s, z10.s +; CHECK-NEXT: fcmuo p2.s, p0/z, z30.s, z30.s +; CHECK-NEXT: mov z6.d, p4/m, z2.d +; CHECK-NEXT: st1b { z8.b }, p1, [x8, x9] +; CHECK-NEXT: rdvl x9, #8 +; CHECK-NEXT: fcmuo p4.s, p0/z, z29.s, z29.s +; CHECK-NEXT: st1b { z9.b }, p1, [x8, x9] +; CHECK-NEXT: fcmuo p1.s, p0/z, z27.s, z27.s +; CHECK-NEXT: sel z27.d, p3, z2.d, z12.d +; CHECK-NEXT: fcmgt p3.s, p0/z, z0.s, z28.s +; CHECK-NEXT: mov z4.d, p5/m, z2.d +; CHECK-NEXT: mov z3.d, p6/m, z2.d +; CHECK-NEXT: mov z6.d, p7/m, #0 // =0x0 +; CHECK-NEXT: fcmuo p7.s, p0/z, z25.s, z25.s +; CHECK-NEXT: mov z5.d, p2/m, #0 // =0x0 +; CHECK-NEXT: fcmuo p2.s, p0/z, z24.s, z24.s +; CHECK-NEXT: mov z27.d, p4/m, #0 // =0x0 +; CHECK-NEXT: fcmuo p4.s, p0/z, z1.s, z1.s +; CHECK-NEXT: mov z11.d, p1/m, #0 // =0x0 +; CHECK-NEXT: fcmuo p1.s, p0/z, z0.s, z0.s +; CHECK-NEXT: st1d { z6.d }, p0, [x8, #7, mul vl] +; CHECK-NEXT: sel z0.d, p3, z2.d, z26.d +; CHECK-NEXT: st1d { z5.d }, p0, [x8, #6, mul vl] +; CHECK-NEXT: mov z4.d, p7/m, #0 // =0x0 +; CHECK-NEXT: st1d { z27.d }, p0, [x8, #5, mul vl] +; CHECK-NEXT: mov z3.d, p2/m, #0 // =0x0 +; CHECK-NEXT: mov z7.d, p4/m, #0 // =0x0 +; CHECK-NEXT: st1d { z11.d }, p0, [x8, #4, mul vl] +; CHECK-NEXT: mov z0.d, p1/m, #0 // =0x0 +; CHECK-NEXT: st1d { z4.d }, p0, [x8, #3, mul vl] +; CHECK-NEXT: st1d { z3.d }, p0, [x8, #2, mul vl] +; CHECK-NEXT: st1d { z7.d }, p0, [x8, #1, mul vl] +; CHECK-NEXT: st1d { z0.d }, p0, [x8] +; CHECK-NEXT: ldr z23, [sp, #1, mul vl] // 16-byte Folded Reload +; CHECK-NEXT: ldr z22, [sp, #2, mul vl] // 16-byte Folded Reload +; CHECK-NEXT: ldr z21, [sp, #3, mul vl] // 16-byte Folded Reload +; CHECK-NEXT: ldr z20, [sp, #4, mul vl] // 16-byte Folded Reload +; CHECK-NEXT: ldr z19, [sp, #5, mul vl] // 16-byte Folded Reload +; CHECK-NEXT: ldr z18, [sp, #6, mul vl] // 16-byte Folded Reload +; CHECK-NEXT: ldr z17, [sp, #7, mul vl] // 16-byte Folded Reload +; CHECK-NEXT: ldr z16, [sp, #8, mul vl] // 16-byte Folded Reload +; CHECK-NEXT: ldr z15, [sp, #9, mul vl] // 16-byte Folded Reload +; CHECK-NEXT: ldr z14, [sp, #10, mul vl] // 16-byte Folded Reload +; CHECK-NEXT: ldr z13, [sp, #11, mul vl] // 16-byte Folded Reload +; CHECK-NEXT: ldr z12, [sp, #12, mul vl] // 16-byte Folded Reload +; CHECK-NEXT: ldr z11, [sp, #13, mul vl] // 16-byte Folded Reload +; CHECK-NEXT: ldr z10, [sp, #14, mul vl] // 16-byte Folded Reload +; CHECK-NEXT: ldr z9, [sp, #15, mul vl] // 16-byte Folded Reload +; CHECK-NEXT: ldr z8, [sp, #16, mul vl] // 16-byte Folded Reload +; CHECK-NEXT: ldr p9, [sp, #2, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: ldr p8, [sp, #3, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: ldr p7, [sp, #4, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: ldr p6, [sp, #5, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: ldr p5, [sp, #6, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: ldr p4, [sp, #7, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: addvl sp, sp, #17 +; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret %a = call @llvm.lrint.nxv32iXLen.nxv32f32( %x) ret %a } diff --git a/llvm/test/CodeGen/AArch64/vector-lrint.ll b/llvm/test/CodeGen/AArch64/vector-lrint.ll index 62fb8b1679fc7..44f29f1420fe2 100644 --- a/llvm/test/CodeGen/AArch64/vector-lrint.ll +++ b/llvm/test/CodeGen/AArch64/vector-lrint.ll @@ -14,14 +14,14 @@ ; CHECK-i32-GI-NEXT: warning: Instruction selection used fallback path for lrint_v2f16 ; CHECK-i32-GI-NEXT: warning: Instruction selection used fallback path for lrint_v4f16 ; CHECK-i32-GI-NEXT: warning: Instruction selection used fallback path for lrint_v8f16 -; CHECK-i32-GI-NEXT: warning: Instruction selection used fallback path for lrint_v16i64_v16f16 -; CHECK-i32-GI-NEXT: warning: Instruction selection used fallback path for lrint_v32i64_v32f16 +; CHECK-i32-GI-NEXT: warning: Instruction selection used fallback path for lrint_v16f16 +; CHECK-i32-GI-NEXT: warning: Instruction selection used fallback path for lrint_v32f16 ; CHECK-i32-GI-NEXT: warning: Instruction selection used fallback path for lrint_v1f32 ; CHECK-i32-GI-NEXT: warning: Instruction selection used fallback path for lrint_v2f32 ; CHECK-i32-GI-NEXT: warning: Instruction selection used fallback path for lrint_v4f32 ; CHECK-i32-GI-NEXT: warning: Instruction selection used fallback path for lrint_v8f32 -; CHECK-i32-GI-NEXT: warning: Instruction selection used fallback path for lrint_v16i64_v16f32 -; CHECK-i32-GI-NEXT: warning: Instruction selection used fallback path for lrint_v32i64_v32f32 +; CHECK-i32-GI-NEXT: warning: Instruction selection used fallback path for lrint_v16f32 +; CHECK-i32-GI-NEXT: warning: Instruction selection used fallback path for lrint_v32f32 ; CHECK-i32-GI-NEXT: warning: Instruction selection used fallback path for lrint_v1f64 ; CHECK-i32-GI-NEXT: warning: Instruction selection used fallback path for lrint_v2f64 ; CHECK-i32-GI-NEXT: warning: Instruction selection used fallback path for lrint_v4f64 @@ -32,13 +32,13 @@ ; CHECK-i64-GI: warning: Instruction selection used fallback path for lrint_v2f16 ; CHECK-i64-GI-NEXT: warning: Instruction selection used fallback path for lrint_v4f16 ; CHECK-i64-GI-NEXT: warning: Instruction selection used fallback path for lrint_v8f16 -; CHECK-i64-GI-NEXT: warning: Instruction selection used fallback path for lrint_v16i64_v16f16 -; CHECK-i64-GI-NEXT: warning: Instruction selection used fallback path for lrint_v32i64_v32f16 +; CHECK-i64-GI-NEXT: warning: Instruction selection used fallback path for lrint_v16f16 +; CHECK-i64-GI-NEXT: warning: Instruction selection used fallback path for lrint_v32f16 ; CHECK-i64-GI-NEXT: warning: Instruction selection used fallback path for lrint_v2f32 ; CHECK-i64-GI-NEXT: warning: Instruction selection used fallback path for lrint_v4f32 ; CHECK-i64-GI-NEXT: warning: Instruction selection used fallback path for lrint_v8f32 -; CHECK-i64-GI-NEXT: warning: Instruction selection used fallback path for lrint_v16i64_v16f32 -; CHECK-i64-GI-NEXT: warning: Instruction selection used fallback path for lrint_v32i64_v32f32 +; CHECK-i64-GI-NEXT: warning: Instruction selection used fallback path for lrint_v16f32 +; CHECK-i64-GI-NEXT: warning: Instruction selection used fallback path for lrint_v32f32 ; CHECK-i64-GI-NEXT: warning: Instruction selection used fallback path for lrint_v2f64 ; CHECK-i64-GI-NEXT: warning: Instruction selection used fallback path for lrint_v4f64 ; CHECK-i64-GI-NEXT: warning: Instruction selection used fallback path for lrint_v8f64 @@ -244,8 +244,8 @@ define <8 x iXLen> @lrint_v8f16(<8 x half> %x) { } declare <8 x iXLen> @llvm.lrint.v8iXLen.v8f16(<8 x half>) -define <16 x iXLen> @lrint_v16i64_v16f16(<16 x half> %x) { -; CHECK-i32-LABEL: lrint_v16i64_v16f16: +define <16 x iXLen> @lrint_v16f16(<16 x half> %x) { +; CHECK-i32-LABEL: lrint_v16f16: ; CHECK-i32: // %bb.0: ; CHECK-i32-NEXT: ext v2.16b, v0.16b, v0.16b, #8 ; CHECK-i32-NEXT: ext v3.16b, v1.16b, v1.16b, #8 @@ -327,7 +327,7 @@ define <16 x iXLen> @lrint_v16i64_v16f16(<16 x half> %x) { ; CHECK-i32-NEXT: mov v2.s[3], w10 ; CHECK-i32-NEXT: ret ; -; CHECK-i64-LABEL: lrint_v16i64_v16f16: +; CHECK-i64-LABEL: lrint_v16f16: ; CHECK-i64: // %bb.0: ; CHECK-i64-NEXT: ext v2.16b, v0.16b, v0.16b, #8 ; CHECK-i64-NEXT: ext v3.16b, v1.16b, v1.16b, #8 @@ -413,8 +413,8 @@ define <16 x iXLen> @lrint_v16i64_v16f16(<16 x half> %x) { } declare <16 x iXLen> @llvm.lrint.v16iXLen.v16f16(<16 x half>) -define <32 x iXLen> @lrint_v32i64_v32f16(<32 x half> %x) { -; CHECK-i32-LABEL: lrint_v32i64_v32f16: +define <32 x iXLen> @lrint_v32f16(<32 x half> %x) { +; CHECK-i32-LABEL: lrint_v32f16: ; CHECK-i32: // %bb.0: ; CHECK-i32-NEXT: ext v5.16b, v0.16b, v0.16b, #8 ; CHECK-i32-NEXT: ext v4.16b, v1.16b, v1.16b, #8 @@ -576,7 +576,7 @@ define <32 x iXLen> @lrint_v32i64_v32f16(<32 x half> %x) { ; CHECK-i32-NEXT: mov v3.16b, v17.16b ; CHECK-i32-NEXT: ret ; -; CHECK-i64-LABEL: lrint_v32i64_v32f16: +; CHECK-i64-LABEL: lrint_v32f16: ; CHECK-i64: // %bb.0: ; CHECK-i64-NEXT: ext v4.16b, v1.16b, v1.16b, #8 ; CHECK-i64-NEXT: ext v5.16b, v2.16b, v2.16b, #8 @@ -866,8 +866,8 @@ define <8 x iXLen> @lrint_v8f32(<8 x float> %x) { } declare <8 x iXLen> @llvm.lrint.v8iXLen.v8f32(<8 x float>) -define <16 x iXLen> @lrint_v16i64_v16f32(<16 x float> %x) { -; CHECK-i32-LABEL: lrint_v16i64_v16f32: +define <16 x iXLen> @lrint_v16f32(<16 x float> %x) { +; CHECK-i32-LABEL: lrint_v16f32: ; CHECK-i32: // %bb.0: ; CHECK-i32-NEXT: frintx v0.4s, v0.4s ; CHECK-i32-NEXT: frintx v1.4s, v1.4s @@ -879,7 +879,7 @@ define <16 x iXLen> @lrint_v16i64_v16f32(<16 x float> %x) { ; CHECK-i32-NEXT: fcvtzs v3.4s, v3.4s ; CHECK-i32-NEXT: ret ; -; CHECK-i64-LABEL: lrint_v16i64_v16f32: +; CHECK-i64-LABEL: lrint_v16f32: ; CHECK-i64: // %bb.0: ; CHECK-i64-NEXT: frintx v4.2s, v0.2s ; CHECK-i64-NEXT: frintx v5.2s, v1.2s @@ -939,8 +939,8 @@ define <16 x iXLen> @lrint_v16i64_v16f32(<16 x float> %x) { } declare <16 x iXLen> @llvm.lrint.v16iXLen.v16f32(<16 x float>) -define <32 x iXLen> @lrint_v32i64_v32f32(<32 x float> %x) { -; CHECK-i32-LABEL: lrint_v32i64_v32f32: +define <32 x iXLen> @lrint_v32f32(<32 x float> %x) { +; CHECK-i32-LABEL: lrint_v32f32: ; CHECK-i32: // %bb.0: ; CHECK-i32-NEXT: frintx v0.4s, v0.4s ; CHECK-i32-NEXT: frintx v1.4s, v1.4s @@ -960,7 +960,7 @@ define <32 x iXLen> @lrint_v32i64_v32f32(<32 x float> %x) { ; CHECK-i32-NEXT: fcvtzs v7.4s, v7.4s ; CHECK-i32-NEXT: ret ; -; CHECK-i64-LABEL: lrint_v32i64_v32f32: +; CHECK-i64-LABEL: lrint_v32f32: ; CHECK-i64: // %bb.0: ; CHECK-i64-NEXT: ext v17.16b, v3.16b, v3.16b, #8 ; CHECK-i64-NEXT: ext v18.16b, v4.16b, v4.16b, #8