From 7e58c4fb5168d7db68f573ea41852469ff8bac4f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Thorsten=20Sch=C3=BCtt?= Date: Tue, 15 Oct 2024 07:56:23 +0200 Subject: [PATCH 1/9] [GlobalISel] Combine G_UNMERGE_VALUES with anyext and build vector G_UNMERGE_VALUES (G_ANYEXT (G_BUILD_VECTOR)) ag G_UNMERGE_VALUES llvm/test/CodeGen/AArch64/GlobalISel | grep ANYEXT [ANYEXT] is build vector or shuffle vector Prior art: https://reviews.llvm.org/D87117 https://reviews.llvm.org/D87166 https://reviews.llvm.org/D87174 https://reviews.llvm.org/D87427 ; CHECK-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<8 x s8>) = G_BUILD_VECTOR [[C2]](s8), [[C2]](s8), [[C2]](s8), [[C2]](s8), [[DEF1]](s8), [[DEF1]](s8), [[DEF1]](s8), [[DEF1]](s8) ; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:_(<8 x s16>) = G_ANYEXT [[BUILD_VECTOR2]](<8 x s8>) ; CHECK-NEXT: [[UV10:%[0-9]+]]:_(<4 x s16>), [[UV11:%[0-9]+]]:_(<4 x s16>) = G_UNMERGE_VALUES [[ANYEXT1]](<8 x s16>) Test: llvm/test/CodeGen/AArch64/GlobalISel/combine-unmerge.mir --- .../llvm/CodeGen/GlobalISel/CombinerHelper.h | 4 + .../CodeGen/GlobalISel/GenericMachineInstrs.h | 8 + .../include/llvm/Target/GlobalISel/Combine.td | 29 +- .../lib/CodeGen/GlobalISel/CombinerHelper.cpp | 63 +++ llvm/lib/Target/AArch64/AArch64Combine.td | 4 +- .../combine-shift-immed-mismatch-crash.mir | 43 +- .../GlobalISel/combine-shifts-undef.mir | 15 +- .../AArch64/GlobalISel/combine-trunc.mir | 25 +- .../AArch64/GlobalISel/combine-unmerge.mir | 117 ++++- .../legalize-shuffle-vector-widen-crash.ll | 19 +- llvm/test/CodeGen/AArch64/add.ll | 46 +- llvm/test/CodeGen/AArch64/andorxor.ll | 138 +++--- .../AArch64/arm64-extract-insert-varidx.ll | 34 +- llvm/test/CodeGen/AArch64/bitcast.ll | 41 +- llvm/test/CodeGen/AArch64/concat-vector.ll | 7 +- llvm/test/CodeGen/AArch64/fptoi.ll | 10 +- .../test/CodeGen/AArch64/fptosi-sat-scalar.ll | 51 +- .../test/CodeGen/AArch64/fptosi-sat-vector.ll | 438 ++++++++---------- .../test/CodeGen/AArch64/fptoui-sat-scalar.ll | 49 +- .../test/CodeGen/AArch64/fptoui-sat-vector.ll | 363 +++++++-------- llvm/test/CodeGen/AArch64/load.ll | 7 +- llvm/test/CodeGen/AArch64/mul.ll | 46 +- .../AArch64/neon-bitwise-instructions.ll | 36 +- .../AArch64/neon-compare-instructions.ll | 23 +- llvm/test/CodeGen/AArch64/setcc_knownbits.ll | 18 +- llvm/test/CodeGen/AArch64/sext.ll | 90 ++-- llvm/test/CodeGen/AArch64/shift-logic.ll | 3 +- llvm/test/CodeGen/AArch64/sub.ll | 46 +- llvm/test/CodeGen/AArch64/xtn.ll | 5 +- llvm/test/CodeGen/AArch64/zext.ll | 80 ++-- .../AMDGPU/GlobalISel/combine-trunc-shift.mir | 21 +- .../CodeGen/AMDGPU/GlobalISel/sext_inreg.ll | 19 + llvm/test/CodeGen/AMDGPU/GlobalISel/shl.ll | 42 +- 33 files changed, 1013 insertions(+), 927 deletions(-) diff --git a/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h b/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h index 76d51ab819f44..ecca7396b9019 100644 --- a/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h +++ b/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h @@ -918,6 +918,10 @@ class CombinerHelper { bool matchCanonicalizeICmp(const MachineInstr &MI, BuildFnTy &MatchInfo); bool matchCanonicalizeFCmp(const MachineInstr &MI, BuildFnTy &MatchInfo); + // unmerge_values anyext build vector + bool matchUnmergeValuesAnyExtBuildVector(const MachineInstr &MI, + BuildFnTy &MatchInfo); + private: /// Checks for legality of an indexed variant of \p LdSt. bool isIndexedLoadStoreLegal(GLoadStore &LdSt) const; diff --git a/llvm/include/llvm/CodeGen/GlobalISel/GenericMachineInstrs.h b/llvm/include/llvm/CodeGen/GlobalISel/GenericMachineInstrs.h index d9f3f4ab3935d..92d37753791c6 100644 --- a/llvm/include/llvm/CodeGen/GlobalISel/GenericMachineInstrs.h +++ b/llvm/include/llvm/CodeGen/GlobalISel/GenericMachineInstrs.h @@ -868,6 +868,14 @@ class GZext : public GCastOp { }; }; +/// Represents an any ext. +class GAnyExt : public GCastOp { +public: + static bool classof(const MachineInstr *MI) { + return MI->getOpcode() == TargetOpcode::G_ANYEXT; + }; +}; + /// Represents a trunc. class GTrunc : public GCastOp { public: diff --git a/llvm/include/llvm/Target/GlobalISel/Combine.td b/llvm/include/llvm/Target/GlobalISel/Combine.td index d0373a7dadfcf..8a98303f4437e 100644 --- a/llvm/include/llvm/Target/GlobalISel/Combine.td +++ b/llvm/include/llvm/Target/GlobalISel/Combine.td @@ -420,7 +420,7 @@ def unary_undef_to_zero: GICombineRule< // replaced with undef. def propagate_undef_any_op: GICombineRule< (defs root:$root), - (match (wip_match_opcode G_ADD, G_FPTOSI, G_FPTOUI, G_SUB, G_XOR, G_TRUNC, G_BITCAST):$root, + (match (wip_match_opcode G_ADD, G_FPTOSI, G_FPTOUI, G_SUB, G_XOR, G_TRUNC, G_BITCAST, G_ANYEXT):$root, [{ return Helper.matchAnyExplicitUseIsUndef(*${root}); }]), (apply [{ Helper.replaceInstWithUndef(*${root}); }])>; @@ -428,7 +428,7 @@ def propagate_undef_any_op: GICombineRule< // replaced with undef. def propagate_undef_all_ops: GICombineRule< (defs root:$root), - (match (wip_match_opcode G_SHUFFLE_VECTOR):$root, + (match (wip_match_opcode G_SHUFFLE_VECTOR, G_BUILD_VECTOR):$root, [{ return Helper.matchAllExplicitUsesAreUndef(*${root}); }]), (apply [{ Helper.replaceInstWithUndef(*${root}); }])>; @@ -832,6 +832,14 @@ def unmerge_dead_to_trunc : GICombineRule< (apply [{ Helper.applyCombineUnmergeWithDeadLanesToTrunc(*${d}); }]) >; +// Transform unmerge any build vector -> build vector anyext +def unmerge_anyext_build_vector : GICombineRule< + (defs root:$root, build_fn_matchinfo:$matchinfo), + (match (wip_match_opcode G_UNMERGE_VALUES): $root, + [{ return Helper.matchUnmergeValuesAnyExtBuildVector(*${root}, ${matchinfo}); }]), + (apply [{ Helper.applyBuildFn(*${root}, ${matchinfo}); }]) +>; + // Transform x,y = unmerge(zext(z)) -> x = zext z; y = 0. def unmerge_zext_to_zext : GICombineRule< (defs root:$d), @@ -840,6 +848,16 @@ def unmerge_zext_to_zext : GICombineRule< (apply [{ Helper.applyCombineUnmergeZExtToZExt(*${d}); }]) >; +def merge_combines: GICombineGroup<[ + unmerge_anyext_build_vector, + unmerge_merge, + merge_unmerge, + unmerge_cst, + unmerge_undef, + unmerge_dead_to_trunc, + unmerge_zext_to_zext +]>; + // Under certain conditions, transform: // trunc (shl x, K) -> shl (trunc x), K// // trunc ([al]shr x, K) -> (trunc ([al]shr (trunc x), K)) @@ -1851,7 +1869,6 @@ def undef_combines : GICombineGroup<[undef_to_fp_zero, undef_to_int_zero, propagate_undef_all_ops, propagate_undef_shuffle_mask, erase_undef_store, - unmerge_undef, insert_extract_vec_elt_out_of_bounds]>; def identity_combines : GICombineGroup<[select_same_val, right_identity_zero, @@ -1911,8 +1928,6 @@ def all_combines : GICombineGroup<[integer_reassoc_combines, trivial_combines, width_reduction_combines, select_combines, known_bits_simplifications, not_cmp_fold, opt_brcond_by_inverting_cond, - unmerge_merge, unmerge_cst, unmerge_dead_to_trunc, - unmerge_zext_to_zext, merge_unmerge, trunc_shift, const_combines, xor_of_and_with_same_reg, ptr_add_with_zero, shift_immed_chain, shift_of_shifted_logic_chain, load_or_combine, div_rem_to_divrem, funnel_shift_combines, bitreverse_shift, commute_shift, @@ -1920,11 +1935,11 @@ def all_combines : GICombineGroup<[integer_reassoc_combines, trivial_combines, constant_fold_cast_op, fabs_fneg_fold, intdiv_combines, mulh_combines, redundant_neg_operands, and_or_disjoint_mask, fma_combines, fold_binop_into_select, - sub_add_reg, select_to_minmax, + sub_add_reg, select_to_minmax, fsub_to_fneg, commute_constant_to_rhs, match_ands, match_ors, combine_concat_vector, match_addos, sext_trunc, zext_trunc, prefer_sign_combines, combine_shuffle_concat, - combine_use_vector_truncate]>; + combine_use_vector_truncate, merge_combines]>; // A combine group used to for prelegalizer combiners at -O0. The combines in // this group have been selected based on experiments to balance code size and diff --git a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp index f9b1621955c21..9b63de2713ade 100644 --- a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp +++ b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp @@ -7611,3 +7611,66 @@ bool CombinerHelper::matchFoldAMinusC1PlusC2(const MachineInstr &MI, return true; } + +bool CombinerHelper::matchUnmergeValuesAnyExtBuildVector(const MachineInstr &MI, + BuildFnTy &MatchInfo) { + const GUnmerge *Unmerge = cast(&MI); + + if (!MRI.hasOneNonDBGUse(Unmerge->getSourceReg())) + return false; + + const MachineInstr *Source = MRI.getVRegDef(Unmerge->getSourceReg()); + + LLT DstTy = MRI.getType(Unmerge->getReg(0)); + + // We want to unmerge into vectors. + if (!DstTy.isFixedVector()) + return false; + + if (const GAnyExt *Any = dyn_cast(Source)) { + const MachineInstr *NextSource = MRI.getVRegDef(Any->getSrcReg()); + + if (const GBuildVector *BV = dyn_cast(NextSource)) { + // G_UNMERGE_VALUES G_ANYEXT G_BUILD_VECTOR + + if (!MRI.hasOneNonDBGUse(BV->getReg(0))) + return false; + + // FIXME: check element types? + if (BV->getNumSources() % Unmerge->getNumDefs() != 0) + return false; + + LLT BigBvTy = MRI.getType(BV->getReg(0)); + LLT SmallBvTy = DstTy; + LLT SmallBvElemenTy = SmallBvTy.getElementType(); + + if (!isLegalOrBeforeLegalizer( + {TargetOpcode::G_BUILD_VECTOR, {SmallBvTy, SmallBvElemenTy}})) + return false; + + // check scalar anyext + if (!isLegalOrBeforeLegalizer( + {TargetOpcode::G_ANYEXT, + {SmallBvElemenTy, BigBvTy.getElementType()}})) + return false; + + MatchInfo = [=](MachineIRBuilder &B) { + // build into each G_UNMERGE_VALUES def + // a small build vector with anyext from the source build vector + for (unsigned I = 0; I < Unmerge->getNumDefs(); ++I) { + SmallVector Ops; + for (unsigned J = 0; J < SmallBvTy.getNumElements(); ++J) { + auto AnyExt = B.buildAnyExt( + SmallBvElemenTy, + BV->getSourceReg(I * SmallBvTy.getNumElements() + J)); + Ops.push_back(AnyExt.getReg(0)); + } + B.buildBuildVector(Unmerge->getOperand(I).getReg(), Ops); + }; + }; + return true; + }; + }; + + return false; +} diff --git a/llvm/lib/Target/AArch64/AArch64Combine.td b/llvm/lib/Target/AArch64/AArch64Combine.td index 321190c83b79f..8af8cdfeba6ac 100644 --- a/llvm/lib/Target/AArch64/AArch64Combine.td +++ b/llvm/lib/Target/AArch64/AArch64Combine.td @@ -322,13 +322,13 @@ def AArch64PostLegalizerCombiner extractvecelt_pairwise_add, redundant_or, mul_const, redundant_sext_inreg, form_bitfield_extract, rotate_out_of_range, - icmp_to_true_false_known_bits, merge_unmerge, + icmp_to_true_false_known_bits, select_combines, fold_merge_to_zext, constant_fold_binops, identity_combines, ptr_add_immed_chain, overlapping_and, split_store_zero_128, undef_combines, select_to_minmax, or_to_bsp, combine_concat_vector, - commute_constant_to_rhs, + commute_constant_to_rhs, merge_combines, push_freeze_to_prevent_poison_from_propagating, combine_mul_cmlt, combine_use_vector_truncate]> { } diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/combine-shift-immed-mismatch-crash.mir b/llvm/test/CodeGen/AArch64/GlobalISel/combine-shift-immed-mismatch-crash.mir index 96a6f18b1d410..0f6dd23b5bb5e 100644 --- a/llvm/test/CodeGen/AArch64/GlobalISel/combine-shift-immed-mismatch-crash.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/combine-shift-immed-mismatch-crash.mir @@ -9,24 +9,31 @@ liveins: body: | ; CHECK-LABEL: name: shift_immed_chain_mismatch_size_crash ; CHECK: bb.0: - ; CHECK: successors: %bb.1(0x40000000), %bb.2(0x40000000) - ; CHECK: liveins: $x0 - ; CHECK: [[DEF:%[0-9]+]]:_(s1) = G_IMPLICIT_DEF - ; CHECK: [[DEF1:%[0-9]+]]:_(p0) = G_IMPLICIT_DEF - ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 9 - ; CHECK: G_BRCOND [[DEF]](s1), %bb.2 - ; CHECK: G_BR %bb.1 - ; CHECK: bb.1: - ; CHECK: successors: - ; CHECK: bb.2: - ; CHECK: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[DEF1]](p0) :: (load (s32) from `ptr undef`, align 8) - ; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; CHECK: [[SHL:%[0-9]+]]:_(s32) = nsw G_SHL [[LOAD]], [[C1]](s32) - ; CHECK: [[MUL:%[0-9]+]]:_(s32) = nsw G_MUL [[SHL]], [[C]] - ; CHECK: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; CHECK: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[MUL]], [[C2]](s64) - ; CHECK: $w0 = COPY [[SHL1]](s32) - ; CHECK: RET_ReallyLR implicit $w0 + ; CHECK-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000) + ; CHECK-NEXT: liveins: $x0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s1) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(p0) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 9 + ; CHECK-NEXT: G_BRCOND [[DEF]](s1), %bb.2 + ; CHECK-NEXT: G_BR %bb.1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.1: + ; CHECK-NEXT: successors: + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.2: + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[DEF1]](p0) :: (load (s32) from `ptr undef`, align 8) + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = nsw G_SHL [[LOAD]], [[C1]](s32) + ; CHECK-NEXT: [[MUL:%[0-9]+]]:_(s32) = nsw G_MUL [[SHL]], [[C]] + ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = nsw G_SHL [[MUL]], [[C2]](s32) + ; CHECK-NEXT: [[SEXT:%[0-9]+]]:_(s64) = G_SEXT [[SHL1]](s32) + ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 + ; CHECK-NEXT: [[SHL2:%[0-9]+]]:_(s64) = G_SHL [[SEXT]], [[C3]](s64) + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[SHL2]](s64) + ; CHECK-NEXT: $w0 = COPY [[TRUNC]](s32) + ; CHECK-NEXT: RET_ReallyLR implicit $w0 bb.1: liveins: $x0 diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/combine-shifts-undef.mir b/llvm/test/CodeGen/AArch64/GlobalISel/combine-shifts-undef.mir index d4dc24741527b..236d49fc99c62 100644 --- a/llvm/test/CodeGen/AArch64/GlobalISel/combine-shifts-undef.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/combine-shifts-undef.mir @@ -13,9 +13,8 @@ body: | ; CHECK-LABEL: name: shl_by_ge_bw ; CHECK: liveins: $w0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[DEF]](s16) - ; CHECK-NEXT: $w0 = COPY [[ANYEXT]](s32) + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; CHECK-NEXT: $w0 = COPY [[DEF]](s32) ; CHECK-NEXT: RET_ReallyLR implicit $w0 %1:_(s32) = COPY $w0 %0:_(s16) = G_TRUNC %1(s32) @@ -39,9 +38,8 @@ body: | ; CHECK-LABEL: name: lshr_by_ge_bw ; CHECK: liveins: $w0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[DEF]](s16) - ; CHECK-NEXT: $w0 = COPY [[ANYEXT]](s32) + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; CHECK-NEXT: $w0 = COPY [[DEF]](s32) ; CHECK-NEXT: RET_ReallyLR implicit $w0 %1:_(s32) = COPY $w0 %0:_(s16) = G_TRUNC %1(s32) @@ -65,9 +63,8 @@ body: | ; CHECK-LABEL: name: ashr_by_ge_bw ; CHECK: liveins: $w0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[DEF]](s16) - ; CHECK-NEXT: $w0 = COPY [[ANYEXT]](s32) + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; CHECK-NEXT: $w0 = COPY [[DEF]](s32) ; CHECK-NEXT: RET_ReallyLR implicit $w0 %1:_(s32) = COPY $w0 %0:_(s16) = G_TRUNC %1(s32) diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/combine-trunc.mir b/llvm/test/CodeGen/AArch64/GlobalISel/combine-trunc.mir index 9a2b9dd4b2b60..82d0dd7b37cc4 100644 --- a/llvm/test/CodeGen/AArch64/GlobalISel/combine-trunc.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/combine-trunc.mir @@ -141,23 +141,14 @@ legalized: true body: | bb.1: liveins: $w0 - ; CHECK-PRE-LABEL: name: test_combine_trunc_shl_s32_by_2 - ; CHECK-PRE: liveins: $w0 - ; CHECK-PRE-NEXT: {{ $}} - ; CHECK-PRE-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $w0 - ; CHECK-PRE-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 - ; CHECK-PRE-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; CHECK-PRE-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[TRUNC]], [[C]](s32) - ; CHECK-PRE-NEXT: $h0 = COPY [[SHL]](s16) - ; - ; CHECK-POST-LABEL: name: test_combine_trunc_shl_s32_by_2 - ; CHECK-POST: liveins: $w0 - ; CHECK-POST-NEXT: {{ $}} - ; CHECK-POST-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $w0 - ; CHECK-POST-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 - ; CHECK-POST-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY]], [[C]](s32) - ; CHECK-POST-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[SHL]](s32) - ; CHECK-POST-NEXT: $h0 = COPY [[TRUNC]](s16) + ; CHECK-LABEL: name: test_combine_trunc_shl_s32_by_2 + ; CHECK: liveins: $w0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $w0 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY]], [[C]](s32) + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[SHL]](s32) + ; CHECK-NEXT: $h0 = COPY [[TRUNC]](s16) %0:_(s32) = COPY $w0 %1:_(s32) = G_CONSTANT i32 2 %2:_(s32) = G_SHL %0(s32), %1(s32) diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/combine-unmerge.mir b/llvm/test/CodeGen/AArch64/GlobalISel/combine-unmerge.mir index c2c6e04d2d0ce..7566d38e6c6cf 100644 --- a/llvm/test/CodeGen/AArch64/GlobalISel/combine-unmerge.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/combine-unmerge.mir @@ -54,9 +54,8 @@ body: | bb.1: ; CHECK-LABEL: name: test_combine_unmerge_build_vector ; CHECK: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF ; CHECK-NEXT: $w0 = COPY [[DEF]](s32) - ; CHECK-NEXT: $w1 = COPY [[DEF1]](s32) + ; CHECK-NEXT: $w1 = COPY [[DEF]](s32) %0:_(s32) = G_IMPLICIT_DEF %1:_(s32) = G_IMPLICIT_DEF %2:_(<2 x s32>) = G_BUILD_VECTOR %0(s32), %1(s32) @@ -74,11 +73,9 @@ body: | bb.1: ; CHECK-LABEL: name: test_combine_unmerge_buildvector_3ops ; CHECK: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[DEF2:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF ; CHECK-NEXT: $w0 = COPY [[DEF]](s32) - ; CHECK-NEXT: $w1 = COPY [[DEF1]](s32) - ; CHECK-NEXT: $w2 = COPY [[DEF2]](s32) + ; CHECK-NEXT: $w1 = COPY [[DEF]](s32) + ; CHECK-NEXT: $w2 = COPY [[DEF]](s32) %0:_(s32) = G_IMPLICIT_DEF %1:_(s32) = G_IMPLICIT_DEF %5:_(s32) = G_IMPLICIT_DEF @@ -434,3 +431,111 @@ body: | $w0 = COPY %1(s32) $w1 = COPY %2(s32) ... + +# Check that we unmerge the build vector on the anyext +--- +name: test_anyext_buildvector +body: | + bb.1: + ; CHECK-LABEL: name: test_anyext_buildvector + ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $w0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $w0 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $w0 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $w0 + ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[COPY]](s32) + ; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:_(s64) = G_ANYEXT [[COPY1]](s32) + ; CHECK-NEXT: %un1:_(<2 x s64>) = G_BUILD_VECTOR [[ANYEXT]](s64), [[ANYEXT1]](s64) + ; CHECK-NEXT: [[ANYEXT2:%[0-9]+]]:_(s64) = G_ANYEXT [[COPY2]](s32) + ; CHECK-NEXT: [[ANYEXT3:%[0-9]+]]:_(s64) = G_ANYEXT [[COPY3]](s32) + ; CHECK-NEXT: %un2:_(<2 x s64>) = G_BUILD_VECTOR [[ANYEXT2]](s64), [[ANYEXT3]](s64) + ; CHECK-NEXT: $q0 = COPY %un1(<2 x s64>) + ; CHECK-NEXT: $q1 = COPY %un2(<2 x s64>) + %0:_(s32) = COPY $w0 + %1:_(s32) = COPY $w0 + %2:_(s32) = COPY $w0 + %3:_(s32) = COPY $w0 + %bv:_(<4 x s32>) = G_BUILD_VECTOR %0(s32), %1(s32), %2(s32), %3(s32) + %any:_(<4 x s64>) = G_ANYEXT %bv(<4 x s32>) + %un1:_(<2 x s64>), %un2:_(<2 x s64>) = G_UNMERGE_VALUES %any(<4 x s64>) + $q0 = COPY %un1(<2 x s64>) + $q1 = COPY %un2(<2 x s64>) +... + +# Check that we unmerge the build vector on the anyext and undef +--- +name: test_anyext_buildvector_undef +body: | + bb.1: + ; CHECK-LABEL: name: test_anyext_buildvector_undef + ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $w0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $w0 + ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[COPY]](s32) + ; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:_(s64) = G_ANYEXT [[COPY1]](s32) + ; CHECK-NEXT: %un1:_(<2 x s64>) = G_BUILD_VECTOR [[ANYEXT]](s64), [[ANYEXT1]](s64) + ; CHECK-NEXT: %un2:_(<2 x s64>) = G_IMPLICIT_DEF + ; CHECK-NEXT: $q0 = COPY %un1(<2 x s64>) + ; CHECK-NEXT: $q1 = COPY %un2(<2 x s64>) + %0:_(s32) = COPY $w0 + %1:_(s32) = COPY $w0 + %2:_(s32) = G_IMPLICIT_DEF + %3:_(s32) = G_IMPLICIT_DEF + %bv:_(<4 x s32>) = G_BUILD_VECTOR %0(s32), %1(s32), %2(s32), %3(s32) + %any:_(<4 x s64>) = G_ANYEXT %bv(<4 x s32>) + %un1:_(<2 x s64>), %un2:_(<2 x s64>) = G_UNMERGE_VALUES %any(<4 x s64>) + $q0 = COPY %un1(<2 x s64>) + $q1 = COPY %un2(<2 x s64>) +... + +# Check that we don't unmerge the build vector on the anyext, multi-use +--- +name: test_anyext_buildvector_multi +body: | + bb.1: + ; CHECK-LABEL: name: test_anyext_buildvector_multi + ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $w0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $w0 + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; CHECK-NEXT: %bv:_(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[DEF]](s32), [[DEF1]](s32) + ; CHECK-NEXT: %any:_(<4 x s64>) = G_ANYEXT %bv(<4 x s32>) + ; CHECK-NEXT: %un1:_(<2 x s64>), %un2:_(<2 x s64>) = G_UNMERGE_VALUES %any(<4 x s64>) + ; CHECK-NEXT: $q0 = COPY %un1(<2 x s64>) + ; CHECK-NEXT: $q1 = COPY %un2(<2 x s64>) + ; CHECK-NEXT: $q2 = COPY %bv(<4 x s32>) + %0:_(s32) = COPY $w0 + %1:_(s32) = COPY $w0 + %2:_(s32) = G_IMPLICIT_DEF + %3:_(s32) = G_IMPLICIT_DEF + %bv:_(<4 x s32>) = G_BUILD_VECTOR %0(s32), %1(s32), %2(s32), %3(s32) + %any:_(<4 x s64>) = G_ANYEXT %bv(<4 x s32>) + %un1:_(<2 x s64>), %un2:_(<2 x s64>) = G_UNMERGE_VALUES %any(<4 x s64>) + $q0 = COPY %un1(<2 x s64>) + $q1 = COPY %un2(<2 x s64>) + $q2 = COPY %bv(<4 x s32>) +... + +# Check that we don't unmerge the build vector on the anyext into scalar +--- +name: test_anyext_buildvector_scalar +body: | + bb.1: + ; CHECK-LABEL: name: test_anyext_buildvector_scalar + ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $w0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $w0 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $w0 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $w0 + ; CHECK-NEXT: %bv:_(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) + ; CHECK-NEXT: %any:_(<4 x s64>) = G_ANYEXT %bv(<4 x s32>) + ; CHECK-NEXT: %un1:_(s128), %un2:_(s128) = G_UNMERGE_VALUES %any(<4 x s64>) + ; CHECK-NEXT: $q0 = COPY %un1(s128) + ; CHECK-NEXT: $q1 = COPY %un2(s128) + %0:_(s32) = COPY $w0 + %1:_(s32) = COPY $w0 + %2:_(s32) = COPY $w0 + %3:_(s32) = COPY $w0 + %bv:_(<4 x s32>) = G_BUILD_VECTOR %0(s32), %1(s32), %2(s32), %3(s32) + %any:_(<4 x s64>) = G_ANYEXT %bv(<4 x s32>) + %un1:_(s128), %un2:_(s128) = G_UNMERGE_VALUES %any(<4 x s64>) + $q0 = COPY %un1(s128) + $q1 = COPY %un2(s128) +... diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-shuffle-vector-widen-crash.ll b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-shuffle-vector-widen-crash.ll index 87c1307ad2955..a8377c04dbec8 100644 --- a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-shuffle-vector-widen-crash.ll +++ b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-shuffle-vector-widen-crash.ll @@ -10,17 +10,18 @@ define i32 @bar() { ; CHECK: ; %bb.0: ; %bb ; CHECK-NEXT: movi.2d v0, #0000000000000000 ; CHECK-NEXT: mov b1, v0[1] -; CHECK-NEXT: mov b2, v0[3] +; CHECK-NEXT: fmov w8, s0 ; CHECK-NEXT: mov b3, v0[2] +; CHECK-NEXT: mov b0, v0[3] +; CHECK-NEXT: mov.s v2[0], w8 ; CHECK-NEXT: fmov w8, s1 -; CHECK-NEXT: fmov w9, s2 -; CHECK-NEXT: mov.h v0[1], w8 -; CHECK-NEXT: mov.h v3[1], w9 -; CHECK-NEXT: ushll.4s v0, v0, #0 -; CHECK-NEXT: ushll.4s v1, v3, #0 -; CHECK-NEXT: mov.d v0[1], v1[0] -; CHECK-NEXT: movi.4s v1, #1 -; CHECK-NEXT: and.16b v0, v0, v1 +; CHECK-NEXT: mov.s v2[1], w8 +; CHECK-NEXT: fmov w8, s3 +; CHECK-NEXT: mov.s v2[2], w8 +; CHECK-NEXT: fmov w8, s0 +; CHECK-NEXT: movi.4s v0, #1 +; CHECK-NEXT: mov.s v2[3], w8 +; CHECK-NEXT: and.16b v0, v2, v0 ; CHECK-NEXT: addv.4s s0, v0 ; CHECK-NEXT: fmov w0, s0 ; CHECK-NEXT: ret diff --git a/llvm/test/CodeGen/AArch64/add.ll b/llvm/test/CodeGen/AArch64/add.ll index ce7e3101a7a54..e3072dc41d933 100644 --- a/llvm/test/CodeGen/AArch64/add.ll +++ b/llvm/test/CodeGen/AArch64/add.ll @@ -155,21 +155,23 @@ define void @v4i8(ptr %p1, ptr %p2) { ; CHECK-GI-NEXT: fmov s0, w8 ; CHECK-GI-NEXT: fmov s1, w9 ; CHECK-GI-NEXT: mov b2, v0.b[1] -; CHECK-GI-NEXT: mov v3.b[0], v0.b[0] -; CHECK-GI-NEXT: mov b4, v1.b[1] -; CHECK-GI-NEXT: mov v5.b[0], v1.b[0] -; CHECK-GI-NEXT: mov v3.b[1], v2.b[0] -; CHECK-GI-NEXT: mov b2, v0.b[2] -; CHECK-GI-NEXT: mov b0, v0.b[3] -; CHECK-GI-NEXT: mov v5.b[1], v4.b[0] -; CHECK-GI-NEXT: mov b4, v1.b[2] -; CHECK-GI-NEXT: mov b1, v1.b[3] -; CHECK-GI-NEXT: mov v3.b[2], v2.b[0] -; CHECK-GI-NEXT: mov v5.b[2], v4.b[0] -; CHECK-GI-NEXT: mov v3.b[3], v0.b[0] -; CHECK-GI-NEXT: mov v5.b[3], v1.b[0] -; CHECK-GI-NEXT: ushll v0.8h, v3.8b, #0 -; CHECK-GI-NEXT: ushll v1.8h, v5.8b, #0 +; CHECK-GI-NEXT: mov b3, v1.b[1] +; CHECK-GI-NEXT: mov b4, v0.b[2] +; CHECK-GI-NEXT: mov b5, v0.b[3] +; CHECK-GI-NEXT: fmov w8, s2 +; CHECK-GI-NEXT: mov b2, v1.b[2] +; CHECK-GI-NEXT: fmov w9, s3 +; CHECK-GI-NEXT: mov b3, v1.b[3] +; CHECK-GI-NEXT: mov v0.h[1], w8 +; CHECK-GI-NEXT: mov v1.h[1], w9 +; CHECK-GI-NEXT: fmov w8, s4 +; CHECK-GI-NEXT: fmov w9, s2 +; CHECK-GI-NEXT: mov v0.h[2], w8 +; CHECK-GI-NEXT: mov v1.h[2], w9 +; CHECK-GI-NEXT: fmov w8, s5 +; CHECK-GI-NEXT: fmov w9, s3 +; CHECK-GI-NEXT: mov v0.h[3], w8 +; CHECK-GI-NEXT: mov v1.h[3], w9 ; CHECK-GI-NEXT: add v0.4h, v0.4h, v1.4h ; CHECK-GI-NEXT: uzp1 v0.8b, v0.8b, v0.8b ; CHECK-GI-NEXT: fmov w8, s0 @@ -238,14 +240,12 @@ define void @v2i16(ptr %p1, ptr %p2) { ; ; CHECK-GI-LABEL: v2i16: ; CHECK-GI: // %bb.0: // %entry -; CHECK-GI-NEXT: ldr h0, [x0] -; CHECK-GI-NEXT: ldr h1, [x1] -; CHECK-GI-NEXT: add x8, x0, #2 -; CHECK-GI-NEXT: add x9, x1, #2 -; CHECK-GI-NEXT: ld1 { v0.h }[1], [x8] -; CHECK-GI-NEXT: ld1 { v1.h }[1], [x9] -; CHECK-GI-NEXT: ushll v0.4s, v0.4h, #0 -; CHECK-GI-NEXT: ushll v1.4s, v1.4h, #0 +; CHECK-GI-NEXT: ld1 { v0.h }[0], [x0] +; CHECK-GI-NEXT: ld1 { v1.h }[0], [x1] +; CHECK-GI-NEXT: ldr h2, [x0, #2] +; CHECK-GI-NEXT: ldr h3, [x1, #2] +; CHECK-GI-NEXT: mov v0.s[1], v2.s[0] +; CHECK-GI-NEXT: mov v1.s[1], v3.s[0] ; CHECK-GI-NEXT: add v0.2s, v0.2s, v1.2s ; CHECK-GI-NEXT: mov s1, v0.s[1] ; CHECK-GI-NEXT: str h0, [x0] diff --git a/llvm/test/CodeGen/AArch64/andorxor.ll b/llvm/test/CodeGen/AArch64/andorxor.ll index 459daece90dee..5c7429aebb31e 100644 --- a/llvm/test/CodeGen/AArch64/andorxor.ll +++ b/llvm/test/CodeGen/AArch64/andorxor.ll @@ -447,21 +447,23 @@ define void @and_v4i8(ptr %p1, ptr %p2) { ; CHECK-GI-NEXT: fmov s0, w8 ; CHECK-GI-NEXT: fmov s1, w9 ; CHECK-GI-NEXT: mov b2, v0.b[1] -; CHECK-GI-NEXT: mov v3.b[0], v0.b[0] -; CHECK-GI-NEXT: mov b4, v1.b[1] -; CHECK-GI-NEXT: mov v5.b[0], v1.b[0] -; CHECK-GI-NEXT: mov v3.b[1], v2.b[0] -; CHECK-GI-NEXT: mov b2, v0.b[2] -; CHECK-GI-NEXT: mov b0, v0.b[3] -; CHECK-GI-NEXT: mov v5.b[1], v4.b[0] -; CHECK-GI-NEXT: mov b4, v1.b[2] -; CHECK-GI-NEXT: mov b1, v1.b[3] -; CHECK-GI-NEXT: mov v3.b[2], v2.b[0] -; CHECK-GI-NEXT: mov v5.b[2], v4.b[0] -; CHECK-GI-NEXT: mov v3.b[3], v0.b[0] -; CHECK-GI-NEXT: mov v5.b[3], v1.b[0] -; CHECK-GI-NEXT: ushll v0.8h, v3.8b, #0 -; CHECK-GI-NEXT: ushll v1.8h, v5.8b, #0 +; CHECK-GI-NEXT: mov b3, v1.b[1] +; CHECK-GI-NEXT: mov b4, v0.b[2] +; CHECK-GI-NEXT: mov b5, v0.b[3] +; CHECK-GI-NEXT: fmov w8, s2 +; CHECK-GI-NEXT: mov b2, v1.b[2] +; CHECK-GI-NEXT: fmov w9, s3 +; CHECK-GI-NEXT: mov b3, v1.b[3] +; CHECK-GI-NEXT: mov v0.h[1], w8 +; CHECK-GI-NEXT: mov v1.h[1], w9 +; CHECK-GI-NEXT: fmov w8, s4 +; CHECK-GI-NEXT: fmov w9, s2 +; CHECK-GI-NEXT: mov v0.h[2], w8 +; CHECK-GI-NEXT: mov v1.h[2], w9 +; CHECK-GI-NEXT: fmov w8, s5 +; CHECK-GI-NEXT: fmov w9, s3 +; CHECK-GI-NEXT: mov v0.h[3], w8 +; CHECK-GI-NEXT: mov v1.h[3], w9 ; CHECK-GI-NEXT: and v0.8b, v0.8b, v1.8b ; CHECK-GI-NEXT: uzp1 v0.8b, v0.8b, v0.8b ; CHECK-GI-NEXT: fmov w8, s0 @@ -494,21 +496,23 @@ define void @or_v4i8(ptr %p1, ptr %p2) { ; CHECK-GI-NEXT: fmov s0, w8 ; CHECK-GI-NEXT: fmov s1, w9 ; CHECK-GI-NEXT: mov b2, v0.b[1] -; CHECK-GI-NEXT: mov v3.b[0], v0.b[0] -; CHECK-GI-NEXT: mov b4, v1.b[1] -; CHECK-GI-NEXT: mov v5.b[0], v1.b[0] -; CHECK-GI-NEXT: mov v3.b[1], v2.b[0] -; CHECK-GI-NEXT: mov b2, v0.b[2] -; CHECK-GI-NEXT: mov b0, v0.b[3] -; CHECK-GI-NEXT: mov v5.b[1], v4.b[0] -; CHECK-GI-NEXT: mov b4, v1.b[2] -; CHECK-GI-NEXT: mov b1, v1.b[3] -; CHECK-GI-NEXT: mov v3.b[2], v2.b[0] -; CHECK-GI-NEXT: mov v5.b[2], v4.b[0] -; CHECK-GI-NEXT: mov v3.b[3], v0.b[0] -; CHECK-GI-NEXT: mov v5.b[3], v1.b[0] -; CHECK-GI-NEXT: ushll v0.8h, v3.8b, #0 -; CHECK-GI-NEXT: ushll v1.8h, v5.8b, #0 +; CHECK-GI-NEXT: mov b3, v1.b[1] +; CHECK-GI-NEXT: mov b4, v0.b[2] +; CHECK-GI-NEXT: mov b5, v0.b[3] +; CHECK-GI-NEXT: fmov w8, s2 +; CHECK-GI-NEXT: mov b2, v1.b[2] +; CHECK-GI-NEXT: fmov w9, s3 +; CHECK-GI-NEXT: mov b3, v1.b[3] +; CHECK-GI-NEXT: mov v0.h[1], w8 +; CHECK-GI-NEXT: mov v1.h[1], w9 +; CHECK-GI-NEXT: fmov w8, s4 +; CHECK-GI-NEXT: fmov w9, s2 +; CHECK-GI-NEXT: mov v0.h[2], w8 +; CHECK-GI-NEXT: mov v1.h[2], w9 +; CHECK-GI-NEXT: fmov w8, s5 +; CHECK-GI-NEXT: fmov w9, s3 +; CHECK-GI-NEXT: mov v0.h[3], w8 +; CHECK-GI-NEXT: mov v1.h[3], w9 ; CHECK-GI-NEXT: orr v0.8b, v0.8b, v1.8b ; CHECK-GI-NEXT: uzp1 v0.8b, v0.8b, v0.8b ; CHECK-GI-NEXT: fmov w8, s0 @@ -541,21 +545,23 @@ define void @xor_v4i8(ptr %p1, ptr %p2) { ; CHECK-GI-NEXT: fmov s0, w8 ; CHECK-GI-NEXT: fmov s1, w9 ; CHECK-GI-NEXT: mov b2, v0.b[1] -; CHECK-GI-NEXT: mov v3.b[0], v0.b[0] -; CHECK-GI-NEXT: mov b4, v1.b[1] -; CHECK-GI-NEXT: mov v5.b[0], v1.b[0] -; CHECK-GI-NEXT: mov v3.b[1], v2.b[0] -; CHECK-GI-NEXT: mov b2, v0.b[2] -; CHECK-GI-NEXT: mov b0, v0.b[3] -; CHECK-GI-NEXT: mov v5.b[1], v4.b[0] -; CHECK-GI-NEXT: mov b4, v1.b[2] -; CHECK-GI-NEXT: mov b1, v1.b[3] -; CHECK-GI-NEXT: mov v3.b[2], v2.b[0] -; CHECK-GI-NEXT: mov v5.b[2], v4.b[0] -; CHECK-GI-NEXT: mov v3.b[3], v0.b[0] -; CHECK-GI-NEXT: mov v5.b[3], v1.b[0] -; CHECK-GI-NEXT: ushll v0.8h, v3.8b, #0 -; CHECK-GI-NEXT: ushll v1.8h, v5.8b, #0 +; CHECK-GI-NEXT: mov b3, v1.b[1] +; CHECK-GI-NEXT: mov b4, v0.b[2] +; CHECK-GI-NEXT: mov b5, v0.b[3] +; CHECK-GI-NEXT: fmov w8, s2 +; CHECK-GI-NEXT: mov b2, v1.b[2] +; CHECK-GI-NEXT: fmov w9, s3 +; CHECK-GI-NEXT: mov b3, v1.b[3] +; CHECK-GI-NEXT: mov v0.h[1], w8 +; CHECK-GI-NEXT: mov v1.h[1], w9 +; CHECK-GI-NEXT: fmov w8, s4 +; CHECK-GI-NEXT: fmov w9, s2 +; CHECK-GI-NEXT: mov v0.h[2], w8 +; CHECK-GI-NEXT: mov v1.h[2], w9 +; CHECK-GI-NEXT: fmov w8, s5 +; CHECK-GI-NEXT: fmov w9, s3 +; CHECK-GI-NEXT: mov v0.h[3], w8 +; CHECK-GI-NEXT: mov v1.h[3], w9 ; CHECK-GI-NEXT: eor v0.8b, v0.8b, v1.8b ; CHECK-GI-NEXT: uzp1 v0.8b, v0.8b, v0.8b ; CHECK-GI-NEXT: fmov w8, s0 @@ -698,14 +704,12 @@ define void @and_v2i16(ptr %p1, ptr %p2) { ; ; CHECK-GI-LABEL: and_v2i16: ; CHECK-GI: // %bb.0: // %entry -; CHECK-GI-NEXT: ldr h0, [x0] -; CHECK-GI-NEXT: ldr h1, [x1] -; CHECK-GI-NEXT: add x8, x0, #2 -; CHECK-GI-NEXT: add x9, x1, #2 -; CHECK-GI-NEXT: ld1 { v0.h }[1], [x8] -; CHECK-GI-NEXT: ld1 { v1.h }[1], [x9] -; CHECK-GI-NEXT: ushll v0.4s, v0.4h, #0 -; CHECK-GI-NEXT: ushll v1.4s, v1.4h, #0 +; CHECK-GI-NEXT: ld1 { v0.h }[0], [x0] +; CHECK-GI-NEXT: ld1 { v1.h }[0], [x1] +; CHECK-GI-NEXT: ldr h2, [x0, #2] +; CHECK-GI-NEXT: ldr h3, [x1, #2] +; CHECK-GI-NEXT: mov v0.s[1], v2.s[0] +; CHECK-GI-NEXT: mov v1.s[1], v3.s[0] ; CHECK-GI-NEXT: and v0.8b, v0.8b, v1.8b ; CHECK-GI-NEXT: mov s1, v0.s[1] ; CHECK-GI-NEXT: str h0, [x0] @@ -737,14 +741,12 @@ define void @or_v2i16(ptr %p1, ptr %p2) { ; ; CHECK-GI-LABEL: or_v2i16: ; CHECK-GI: // %bb.0: // %entry -; CHECK-GI-NEXT: ldr h0, [x0] -; CHECK-GI-NEXT: ldr h1, [x1] -; CHECK-GI-NEXT: add x8, x0, #2 -; CHECK-GI-NEXT: add x9, x1, #2 -; CHECK-GI-NEXT: ld1 { v0.h }[1], [x8] -; CHECK-GI-NEXT: ld1 { v1.h }[1], [x9] -; CHECK-GI-NEXT: ushll v0.4s, v0.4h, #0 -; CHECK-GI-NEXT: ushll v1.4s, v1.4h, #0 +; CHECK-GI-NEXT: ld1 { v0.h }[0], [x0] +; CHECK-GI-NEXT: ld1 { v1.h }[0], [x1] +; CHECK-GI-NEXT: ldr h2, [x0, #2] +; CHECK-GI-NEXT: ldr h3, [x1, #2] +; CHECK-GI-NEXT: mov v0.s[1], v2.s[0] +; CHECK-GI-NEXT: mov v1.s[1], v3.s[0] ; CHECK-GI-NEXT: orr v0.8b, v0.8b, v1.8b ; CHECK-GI-NEXT: mov s1, v0.s[1] ; CHECK-GI-NEXT: str h0, [x0] @@ -776,14 +778,12 @@ define void @xor_v2i16(ptr %p1, ptr %p2) { ; ; CHECK-GI-LABEL: xor_v2i16: ; CHECK-GI: // %bb.0: // %entry -; CHECK-GI-NEXT: ldr h0, [x0] -; CHECK-GI-NEXT: ldr h1, [x1] -; CHECK-GI-NEXT: add x8, x0, #2 -; CHECK-GI-NEXT: add x9, x1, #2 -; CHECK-GI-NEXT: ld1 { v0.h }[1], [x8] -; CHECK-GI-NEXT: ld1 { v1.h }[1], [x9] -; CHECK-GI-NEXT: ushll v0.4s, v0.4h, #0 -; CHECK-GI-NEXT: ushll v1.4s, v1.4h, #0 +; CHECK-GI-NEXT: ld1 { v0.h }[0], [x0] +; CHECK-GI-NEXT: ld1 { v1.h }[0], [x1] +; CHECK-GI-NEXT: ldr h2, [x0, #2] +; CHECK-GI-NEXT: ldr h3, [x1, #2] +; CHECK-GI-NEXT: mov v0.s[1], v2.s[0] +; CHECK-GI-NEXT: mov v1.s[1], v3.s[0] ; CHECK-GI-NEXT: eor v0.8b, v0.8b, v1.8b ; CHECK-GI-NEXT: mov s1, v0.s[1] ; CHECK-GI-NEXT: str h0, [x0] diff --git a/llvm/test/CodeGen/AArch64/arm64-extract-insert-varidx.ll b/llvm/test/CodeGen/AArch64/arm64-extract-insert-varidx.ll index 8611532d6ea92..7a4cdd52db904 100644 --- a/llvm/test/CodeGen/AArch64/arm64-extract-insert-varidx.ll +++ b/llvm/test/CodeGen/AArch64/arm64-extract-insert-varidx.ll @@ -29,21 +29,23 @@ define <4 x i8> @test_varidx_extract_v8s8(<8 x i8> %x, i32 %idx) { ; CHECK-GISEL-NEXT: .cfi_def_cfa_offset 16 ; CHECK-GISEL-NEXT: mov w9, w0 ; CHECK-GISEL-NEXT: // kill: def $d0 killed $d0 def $q0 +; CHECK-GISEL-NEXT: mov b1, v0.b[1] ; CHECK-GISEL-NEXT: add x8, sp, #8 -; CHECK-GISEL-NEXT: str d0, [sp, #8] ; CHECK-GISEL-NEXT: and x9, x9, #0x7 -; CHECK-GISEL-NEXT: mov b2, v0.b[1] -; CHECK-GISEL-NEXT: mov b3, v0.b[2] +; CHECK-GISEL-NEXT: str d0, [sp, #8] +; CHECK-GISEL-NEXT: mov b2, v0.b[2] ; CHECK-GISEL-NEXT: lsl x10, x9, #1 ; CHECK-GISEL-NEXT: mov b0, v0.b[3] ; CHECK-GISEL-NEXT: sub x9, x10, x9 -; CHECK-GISEL-NEXT: ldr b1, [x8, x9] -; CHECK-GISEL-NEXT: mov v1.b[0], v1.b[0] -; CHECK-GISEL-NEXT: mov v1.b[1], v2.b[0] -; CHECK-GISEL-NEXT: mov v1.b[2], v3.b[0] -; CHECK-GISEL-NEXT: mov v1.b[3], v0.b[0] -; CHECK-GISEL-NEXT: ushll v0.8h, v1.8b, #0 -; CHECK-GISEL-NEXT: // kill: def $d0 killed $d0 killed $q0 +; CHECK-GISEL-NEXT: ldrb w8, [x8, x9] +; CHECK-GISEL-NEXT: fmov w9, s1 +; CHECK-GISEL-NEXT: fmov s1, w8 +; CHECK-GISEL-NEXT: fmov w8, s2 +; CHECK-GISEL-NEXT: mov v1.h[1], w9 +; CHECK-GISEL-NEXT: mov v1.h[2], w8 +; CHECK-GISEL-NEXT: fmov w8, s0 +; CHECK-GISEL-NEXT: mov v1.h[3], w8 +; CHECK-GISEL-NEXT: fmov d0, d1 ; CHECK-GISEL-NEXT: add sp, sp, #16 ; CHECK-GISEL-NEXT: ret %tmp = extractelement <8 x i8> %x, i32 %idx @@ -179,13 +181,15 @@ define <2 x i16> @test_varidx_extract_v4s16(<4 x i16> %x, i32 %idx) { ; CHECK-GISEL-NEXT: sub sp, sp, #16 ; CHECK-GISEL-NEXT: .cfi_def_cfa_offset 16 ; CHECK-GISEL-NEXT: mov w9, w0 +; CHECK-GISEL-NEXT: mov w8, #2 // =0x2 +; CHECK-GISEL-NEXT: add x10, sp, #8 +; CHECK-GISEL-NEXT: and x9, x9, #0x3 ; CHECK-GISEL-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-GISEL-NEXT: add x8, sp, #8 ; CHECK-GISEL-NEXT: str d0, [sp, #8] -; CHECK-GISEL-NEXT: and x9, x9, #0x3 -; CHECK-GISEL-NEXT: ldr h1, [x8, x9, lsl #1] -; CHECK-GISEL-NEXT: mov v1.h[1], v0.h[1] -; CHECK-GISEL-NEXT: ushll v0.4s, v1.4h, #0 +; CHECK-GISEL-NEXT: madd x8, x9, x8, x10 +; CHECK-GISEL-NEXT: umov w9, v0.h[1] +; CHECK-GISEL-NEXT: ld1 { v0.h }[0], [x8] +; CHECK-GISEL-NEXT: mov v0.s[1], w9 ; CHECK-GISEL-NEXT: // kill: def $d0 killed $d0 killed $q0 ; CHECK-GISEL-NEXT: add sp, sp, #16 ; CHECK-GISEL-NEXT: ret diff --git a/llvm/test/CodeGen/AArch64/bitcast.ll b/llvm/test/CodeGen/AArch64/bitcast.ll index bbdf8b0a13d35..39f2572d9fd35 100644 --- a/llvm/test/CodeGen/AArch64/bitcast.ll +++ b/llvm/test/CodeGen/AArch64/bitcast.ll @@ -81,13 +81,14 @@ define <4 x i8> @bitcast_i32_v4i8(i32 %a, i32 %b){ ; CHECK-GI-NEXT: add w8, w0, w1 ; CHECK-GI-NEXT: fmov s0, w8 ; CHECK-GI-NEXT: mov b1, v0.b[1] -; CHECK-GI-NEXT: mov v2.b[0], v0.b[0] -; CHECK-GI-NEXT: mov b3, v0.b[2] -; CHECK-GI-NEXT: mov b0, v0.b[3] -; CHECK-GI-NEXT: mov v2.b[1], v1.b[0] -; CHECK-GI-NEXT: mov v2.b[2], v3.b[0] -; CHECK-GI-NEXT: mov v2.b[3], v0.b[0] -; CHECK-GI-NEXT: ushll v0.8h, v2.8b, #0 +; CHECK-GI-NEXT: mov b2, v0.b[2] +; CHECK-GI-NEXT: fmov w8, s1 +; CHECK-GI-NEXT: mov b1, v0.b[3] +; CHECK-GI-NEXT: mov v0.h[1], w8 +; CHECK-GI-NEXT: fmov w8, s2 +; CHECK-GI-NEXT: mov v0.h[2], w8 +; CHECK-GI-NEXT: fmov w8, s1 +; CHECK-GI-NEXT: mov v0.h[3], w8 ; CHECK-GI-NEXT: // kill: def $d0 killed $d0 killed $q0 ; CHECK-GI-NEXT: ret %c = add i32 %a, %b @@ -134,8 +135,9 @@ define <2 x i16> @bitcast_i32_v2i16(i32 %a, i32 %b){ ; CHECK-GI-NEXT: add w8, w0, w1 ; CHECK-GI-NEXT: fmov s0, w8 ; CHECK-GI-NEXT: mov h1, v0.h[1] -; CHECK-GI-NEXT: mov v0.h[1], v1.h[0] -; CHECK-GI-NEXT: ushll v0.4s, v0.4h, #0 +; CHECK-GI-NEXT: mov v0.s[0], w8 +; CHECK-GI-NEXT: fmov w8, s1 +; CHECK-GI-NEXT: mov v0.s[1], w8 ; CHECK-GI-NEXT: // kill: def $d0 killed $d0 killed $q0 ; CHECK-GI-NEXT: ret %c = add i32 %a, %b @@ -414,13 +416,14 @@ define <4 x i8> @bitcast_v2i16_v4i8(<2 x i16> %a, <2 x i16> %b){ ; CHECK-GI-NEXT: add v0.2s, v0.2s, v1.2s ; CHECK-GI-NEXT: uzp1 v0.4h, v0.4h, v0.4h ; CHECK-GI-NEXT: mov b1, v0.b[1] -; CHECK-GI-NEXT: mov v2.b[0], v0.b[0] -; CHECK-GI-NEXT: mov b3, v0.b[2] -; CHECK-GI-NEXT: mov b0, v0.b[3] -; CHECK-GI-NEXT: mov v2.b[1], v1.b[0] -; CHECK-GI-NEXT: mov v2.b[2], v3.b[0] -; CHECK-GI-NEXT: mov v2.b[3], v0.b[0] -; CHECK-GI-NEXT: ushll v0.8h, v2.8b, #0 +; CHECK-GI-NEXT: mov b2, v0.b[2] +; CHECK-GI-NEXT: fmov w8, s1 +; CHECK-GI-NEXT: mov b1, v0.b[3] +; CHECK-GI-NEXT: mov v0.h[1], w8 +; CHECK-GI-NEXT: fmov w8, s2 +; CHECK-GI-NEXT: mov v0.h[2], w8 +; CHECK-GI-NEXT: fmov w8, s1 +; CHECK-GI-NEXT: mov v0.h[3], w8 ; CHECK-GI-NEXT: // kill: def $d0 killed $d0 killed $q0 ; CHECK-GI-NEXT: ret %c = add <2 x i16> %a, %b @@ -449,8 +452,10 @@ define <2 x i16> @bitcast_v4i8_v2i16(<4 x i8> %a, <4 x i8> %b){ ; CHECK-GI-NEXT: add v0.4h, v0.4h, v1.4h ; CHECK-GI-NEXT: uzp1 v0.8b, v0.8b, v0.8b ; CHECK-GI-NEXT: mov h1, v0.h[1] -; CHECK-GI-NEXT: mov v0.h[1], v1.h[0] -; CHECK-GI-NEXT: ushll v0.4s, v0.4h, #0 +; CHECK-GI-NEXT: fmov w8, s0 +; CHECK-GI-NEXT: mov v0.s[0], w8 +; CHECK-GI-NEXT: fmov w8, s1 +; CHECK-GI-NEXT: mov v0.s[1], w8 ; CHECK-GI-NEXT: // kill: def $d0 killed $d0 killed $q0 ; CHECK-GI-NEXT: ret %c = add <4 x i8> %a, %b diff --git a/llvm/test/CodeGen/AArch64/concat-vector.ll b/llvm/test/CodeGen/AArch64/concat-vector.ll index 0033999b9bd51..41b336bc3e8c0 100644 --- a/llvm/test/CodeGen/AArch64/concat-vector.ll +++ b/llvm/test/CodeGen/AArch64/concat-vector.ll @@ -14,11 +14,10 @@ define <4 x i8> @concat1(<2 x i8> %A, <2 x i8> %B) { ; CHECK-GI-NEXT: mov w8, v0.s[1] ; CHECK-GI-NEXT: // kill: def $d1 killed $d1 def $q1 ; CHECK-GI-NEXT: mov w9, v1.s[1] -; CHECK-GI-NEXT: mov v0.b[1], w8 +; CHECK-GI-NEXT: mov v0.h[1], w8 ; CHECK-GI-NEXT: fmov w8, s1 -; CHECK-GI-NEXT: mov v0.b[2], w8 -; CHECK-GI-NEXT: mov v0.b[3], w9 -; CHECK-GI-NEXT: ushll v0.8h, v0.8b, #0 +; CHECK-GI-NEXT: mov v0.h[2], w8 +; CHECK-GI-NEXT: mov v0.h[3], w9 ; CHECK-GI-NEXT: // kill: def $d0 killed $d0 killed $q0 ; CHECK-GI-NEXT: ret %v4i8 = shufflevector <2 x i8> %A, <2 x i8> %B, <4 x i32> diff --git a/llvm/test/CodeGen/AArch64/fptoi.ll b/llvm/test/CodeGen/AArch64/fptoi.ll index c436c410a4e39..9c4f0207b84ce 100644 --- a/llvm/test/CodeGen/AArch64/fptoi.ll +++ b/llvm/test/CodeGen/AArch64/fptoi.ll @@ -7616,10 +7616,9 @@ define <2 x i16> @fptos_v2f128_v2i16(<2 x fp128> %a) { ; CHECK-GI-NEXT: ldr q0, [sp] // 16-byte Folded Reload ; CHECK-GI-NEXT: mov w19, w0 ; CHECK-GI-NEXT: bl __fixtfsi -; CHECK-GI-NEXT: fmov s0, w19 +; CHECK-GI-NEXT: mov v0.s[0], w19 ; CHECK-GI-NEXT: ldp x30, x19, [sp, #16] // 16-byte Folded Reload -; CHECK-GI-NEXT: mov v0.h[1], w0 -; CHECK-GI-NEXT: ushll v0.4s, v0.4h, #0 +; CHECK-GI-NEXT: mov v0.s[1], w0 ; CHECK-GI-NEXT: // kill: def $d0 killed $d0 killed $q0 ; CHECK-GI-NEXT: add sp, sp, #32 ; CHECK-GI-NEXT: ret @@ -7660,10 +7659,9 @@ define <2 x i16> @fptou_v2f128_v2i16(<2 x fp128> %a) { ; CHECK-GI-NEXT: ldr q0, [sp] // 16-byte Folded Reload ; CHECK-GI-NEXT: mov w19, w0 ; CHECK-GI-NEXT: bl __fixunstfsi -; CHECK-GI-NEXT: fmov s0, w19 +; CHECK-GI-NEXT: mov v0.s[0], w19 ; CHECK-GI-NEXT: ldp x30, x19, [sp, #16] // 16-byte Folded Reload -; CHECK-GI-NEXT: mov v0.h[1], w0 -; CHECK-GI-NEXT: ushll v0.4s, v0.4h, #0 +; CHECK-GI-NEXT: mov v0.s[1], w0 ; CHECK-GI-NEXT: // kill: def $d0 killed $d0 killed $q0 ; CHECK-GI-NEXT: add sp, sp, #32 ; CHECK-GI-NEXT: ret diff --git a/llvm/test/CodeGen/AArch64/fptosi-sat-scalar.ll b/llvm/test/CodeGen/AArch64/fptosi-sat-scalar.ll index 9c52b024d3e25..17c87a5dae419 100644 --- a/llvm/test/CodeGen/AArch64/fptosi-sat-scalar.ll +++ b/llvm/test/CodeGen/AArch64/fptosi-sat-scalar.ll @@ -977,51 +977,46 @@ define i32 @test_signed_f128_i32(fp128 %f) { ; ; CHECK-GI-LABEL: test_signed_f128_i32: ; CHECK-GI: // %bb.0: -; CHECK-GI-NEXT: sub sp, sp, #64 -; CHECK-GI-NEXT: stp d9, d8, [sp, #32] // 16-byte Folded Spill -; CHECK-GI-NEXT: stp x30, x19, [sp, #48] // 16-byte Folded Spill -; CHECK-GI-NEXT: .cfi_def_cfa_offset 64 +; CHECK-GI-NEXT: sub sp, sp, #48 +; CHECK-GI-NEXT: str x30, [sp, #16] // 8-byte Folded Spill +; CHECK-GI-NEXT: stp x20, x19, [sp, #32] // 16-byte Folded Spill +; CHECK-GI-NEXT: .cfi_def_cfa_offset 48 ; CHECK-GI-NEXT: .cfi_offset w19, -8 -; CHECK-GI-NEXT: .cfi_offset w30, -16 -; CHECK-GI-NEXT: .cfi_offset b8, -24 -; CHECK-GI-NEXT: .cfi_offset b9, -32 +; CHECK-GI-NEXT: .cfi_offset w20, -16 +; CHECK-GI-NEXT: .cfi_offset w30, -32 ; CHECK-GI-NEXT: adrp x8, .LCPI30_1 +; CHECK-GI-NEXT: str q0, [sp] // 16-byte Folded Spill ; CHECK-GI-NEXT: ldr q1, [x8, :lo12:.LCPI30_1] -; CHECK-GI-NEXT: stp q1, q0, [sp] // 32-byte Folded Spill ; CHECK-GI-NEXT: bl __getf2 -; CHECK-GI-NEXT: ldp q3, q2, [sp] // 32-byte Folded Reload +; CHECK-GI-NEXT: ldr q0, [sp] // 16-byte Folded Reload ; CHECK-GI-NEXT: cmp w0, #0 -; CHECK-GI-NEXT: mov d0, v2.d[1] -; CHECK-GI-NEXT: mov d1, v3.d[1] -; CHECK-GI-NEXT: fcsel d8, d2, d3, lt -; CHECK-GI-NEXT: fmov x8, d8 -; CHECK-GI-NEXT: fcsel d9, d0, d1, lt -; CHECK-GI-NEXT: mov v0.d[0], x8 -; CHECK-GI-NEXT: fmov x8, d9 -; CHECK-GI-NEXT: mov v0.d[1], x8 +; CHECK-GI-NEXT: mov x9, #-4603241769126068224 // =0xc01e000000000000 +; CHECK-GI-NEXT: fmov x8, d0 +; CHECK-GI-NEXT: csel x19, x8, xzr, lt +; CHECK-GI-NEXT: mov x8, v0.d[1] +; CHECK-GI-NEXT: mov v0.d[0], x19 +; CHECK-GI-NEXT: csel x20, x8, x9, lt ; CHECK-GI-NEXT: adrp x8, .LCPI30_0 +; CHECK-GI-NEXT: mov v0.d[1], x20 ; CHECK-GI-NEXT: ldr q1, [x8, :lo12:.LCPI30_0] -; CHECK-GI-NEXT: str q1, [sp] // 16-byte Folded Spill ; CHECK-GI-NEXT: bl __gttf2 -; CHECK-GI-NEXT: ldr q1, [sp] // 16-byte Folded Reload ; CHECK-GI-NEXT: cmp w0, #0 -; CHECK-GI-NEXT: mov d0, v1.d[1] -; CHECK-GI-NEXT: fcsel d1, d8, d1, gt -; CHECK-GI-NEXT: fmov x8, d1 -; CHECK-GI-NEXT: fcsel d2, d9, d0, gt +; CHECK-GI-NEXT: csel x8, x19, xzr, gt ; CHECK-GI-NEXT: mov v0.d[0], x8 -; CHECK-GI-NEXT: fmov x8, d2 +; CHECK-GI-NEXT: mov x8, #281474976448512 // =0xfffffffc0000 +; CHECK-GI-NEXT: movk x8, #16413, lsl #48 +; CHECK-GI-NEXT: csel x8, x20, x8, gt ; CHECK-GI-NEXT: mov v0.d[1], x8 ; CHECK-GI-NEXT: bl __fixtfsi -; CHECK-GI-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload +; CHECK-GI-NEXT: ldr q0, [sp] // 16-byte Folded Reload ; CHECK-GI-NEXT: mov w19, w0 ; CHECK-GI-NEXT: mov v1.16b, v0.16b ; CHECK-GI-NEXT: bl __unordtf2 ; CHECK-GI-NEXT: cmp w0, #0 -; CHECK-GI-NEXT: ldp d9, d8, [sp, #32] // 16-byte Folded Reload +; CHECK-GI-NEXT: ldr x30, [sp, #16] // 8-byte Folded Reload ; CHECK-GI-NEXT: csel w0, wzr, w19, ne -; CHECK-GI-NEXT: ldp x30, x19, [sp, #48] // 16-byte Folded Reload -; CHECK-GI-NEXT: add sp, sp, #64 +; CHECK-GI-NEXT: ldp x20, x19, [sp, #32] // 16-byte Folded Reload +; CHECK-GI-NEXT: add sp, sp, #48 ; CHECK-GI-NEXT: ret %x = call i32 @llvm.fptosi.sat.i32.f128(fp128 %f) ret i32 %x diff --git a/llvm/test/CodeGen/AArch64/fptosi-sat-vector.ll b/llvm/test/CodeGen/AArch64/fptosi-sat-vector.ll index 29a9082173ea5..9ef6d61c350ec 100644 --- a/llvm/test/CodeGen/AArch64/fptosi-sat-vector.ll +++ b/llvm/test/CodeGen/AArch64/fptosi-sat-vector.ll @@ -525,53 +525,48 @@ define <1 x i32> @test_signed_v1f128_v1i32(<1 x fp128> %f) { ; ; CHECK-GI-LABEL: test_signed_v1f128_v1i32: ; CHECK-GI: // %bb.0: -; CHECK-GI-NEXT: sub sp, sp, #64 -; CHECK-GI-NEXT: stp d9, d8, [sp, #32] // 16-byte Folded Spill -; CHECK-GI-NEXT: stp x30, x19, [sp, #48] // 16-byte Folded Spill -; CHECK-GI-NEXT: .cfi_def_cfa_offset 64 +; CHECK-GI-NEXT: sub sp, sp, #48 +; CHECK-GI-NEXT: str x30, [sp, #16] // 8-byte Folded Spill +; CHECK-GI-NEXT: stp x20, x19, [sp, #32] // 16-byte Folded Spill +; CHECK-GI-NEXT: .cfi_def_cfa_offset 48 ; CHECK-GI-NEXT: .cfi_offset w19, -8 -; CHECK-GI-NEXT: .cfi_offset w30, -16 -; CHECK-GI-NEXT: .cfi_offset b8, -24 -; CHECK-GI-NEXT: .cfi_offset b9, -32 +; CHECK-GI-NEXT: .cfi_offset w20, -16 +; CHECK-GI-NEXT: .cfi_offset w30, -32 ; CHECK-GI-NEXT: adrp x8, .LCPI14_1 +; CHECK-GI-NEXT: str q0, [sp] // 16-byte Folded Spill ; CHECK-GI-NEXT: ldr q1, [x8, :lo12:.LCPI14_1] -; CHECK-GI-NEXT: stp q1, q0, [sp] // 32-byte Folded Spill ; CHECK-GI-NEXT: bl __getf2 -; CHECK-GI-NEXT: ldp q3, q2, [sp] // 32-byte Folded Reload +; CHECK-GI-NEXT: ldr q0, [sp] // 16-byte Folded Reload ; CHECK-GI-NEXT: cmp w0, #0 -; CHECK-GI-NEXT: mov d0, v2.d[1] -; CHECK-GI-NEXT: mov d1, v3.d[1] -; CHECK-GI-NEXT: fcsel d8, d2, d3, lt -; CHECK-GI-NEXT: fmov x8, d8 -; CHECK-GI-NEXT: fcsel d9, d0, d1, lt -; CHECK-GI-NEXT: mov v0.d[0], x8 -; CHECK-GI-NEXT: fmov x8, d9 -; CHECK-GI-NEXT: mov v0.d[1], x8 +; CHECK-GI-NEXT: mov x9, #-4603241769126068224 // =0xc01e000000000000 +; CHECK-GI-NEXT: fmov x8, d0 +; CHECK-GI-NEXT: csel x19, x8, xzr, lt +; CHECK-GI-NEXT: mov x8, v0.d[1] +; CHECK-GI-NEXT: mov v0.d[0], x19 +; CHECK-GI-NEXT: csel x20, x8, x9, lt ; CHECK-GI-NEXT: adrp x8, .LCPI14_0 +; CHECK-GI-NEXT: mov v0.d[1], x20 ; CHECK-GI-NEXT: ldr q1, [x8, :lo12:.LCPI14_0] -; CHECK-GI-NEXT: str q1, [sp] // 16-byte Folded Spill ; CHECK-GI-NEXT: bl __gttf2 -; CHECK-GI-NEXT: ldr q1, [sp] // 16-byte Folded Reload ; CHECK-GI-NEXT: cmp w0, #0 -; CHECK-GI-NEXT: mov d0, v1.d[1] -; CHECK-GI-NEXT: fcsel d1, d8, d1, gt -; CHECK-GI-NEXT: fmov x8, d1 -; CHECK-GI-NEXT: fcsel d2, d9, d0, gt +; CHECK-GI-NEXT: csel x8, x19, xzr, gt ; CHECK-GI-NEXT: mov v0.d[0], x8 -; CHECK-GI-NEXT: fmov x8, d2 +; CHECK-GI-NEXT: mov x8, #281474976448512 // =0xfffffffc0000 +; CHECK-GI-NEXT: movk x8, #16413, lsl #48 +; CHECK-GI-NEXT: csel x8, x20, x8, gt ; CHECK-GI-NEXT: mov v0.d[1], x8 ; CHECK-GI-NEXT: bl __fixtfsi -; CHECK-GI-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload +; CHECK-GI-NEXT: ldr q0, [sp] // 16-byte Folded Reload ; CHECK-GI-NEXT: mov w19, w0 ; CHECK-GI-NEXT: mov v1.16b, v0.16b ; CHECK-GI-NEXT: bl __unordtf2 ; CHECK-GI-NEXT: cmp w0, #0 -; CHECK-GI-NEXT: ldp d9, d8, [sp, #32] // 16-byte Folded Reload +; CHECK-GI-NEXT: ldr x30, [sp, #16] // 8-byte Folded Reload ; CHECK-GI-NEXT: csel w8, wzr, w19, ne -; CHECK-GI-NEXT: ldp x30, x19, [sp, #48] // 16-byte Folded Reload +; CHECK-GI-NEXT: ldp x20, x19, [sp, #32] // 16-byte Folded Reload ; CHECK-GI-NEXT: mov v0.s[0], w8 ; CHECK-GI-NEXT: // kill: def $d0 killed $d0 killed $q0 -; CHECK-GI-NEXT: add sp, sp, #64 +; CHECK-GI-NEXT: add sp, sp, #48 ; CHECK-GI-NEXT: ret %x = call <1 x i32> @llvm.fptosi.sat.v1f128.v1i32(<1 x fp128> %f) ret <1 x i32> %x @@ -645,92 +640,82 @@ define <2 x i32> @test_signed_v2f128_v2i32(<2 x fp128> %f) { ; ; CHECK-GI-LABEL: test_signed_v2f128_v2i32: ; CHECK-GI: // %bb.0: -; CHECK-GI-NEXT: sub sp, sp, #128 -; CHECK-GI-NEXT: stp d11, d10, [sp, #64] // 16-byte Folded Spill -; CHECK-GI-NEXT: stp d9, d8, [sp, #80] // 16-byte Folded Spill -; CHECK-GI-NEXT: str x30, [sp, #96] // 8-byte Folded Spill -; CHECK-GI-NEXT: stp x20, x19, [sp, #112] // 16-byte Folded Spill -; CHECK-GI-NEXT: .cfi_def_cfa_offset 128 +; CHECK-GI-NEXT: sub sp, sp, #112 +; CHECK-GI-NEXT: str x30, [sp, #64] // 8-byte Folded Spill +; CHECK-GI-NEXT: stp x22, x21, [sp, #80] // 16-byte Folded Spill +; CHECK-GI-NEXT: stp x20, x19, [sp, #96] // 16-byte Folded Spill +; CHECK-GI-NEXT: .cfi_def_cfa_offset 112 ; CHECK-GI-NEXT: .cfi_offset w19, -8 ; CHECK-GI-NEXT: .cfi_offset w20, -16 -; CHECK-GI-NEXT: .cfi_offset w30, -32 -; CHECK-GI-NEXT: .cfi_offset b8, -40 -; CHECK-GI-NEXT: .cfi_offset b9, -48 -; CHECK-GI-NEXT: .cfi_offset b10, -56 -; CHECK-GI-NEXT: .cfi_offset b11, -64 +; CHECK-GI-NEXT: .cfi_offset w21, -24 +; CHECK-GI-NEXT: .cfi_offset w22, -32 +; CHECK-GI-NEXT: .cfi_offset w30, -48 ; CHECK-GI-NEXT: adrp x8, .LCPI15_1 -; CHECK-GI-NEXT: str q0, [sp] // 16-byte Folded Spill +; CHECK-GI-NEXT: str q1, [sp, #48] // 16-byte Folded Spill ; CHECK-GI-NEXT: ldr q2, [x8, :lo12:.LCPI15_1] -; CHECK-GI-NEXT: stp q2, q1, [sp, #32] // 32-byte Folded Spill +; CHECK-GI-NEXT: str q0, [sp, #32] // 16-byte Folded Spill ; CHECK-GI-NEXT: mov v1.16b, v2.16b +; CHECK-GI-NEXT: str q2, [sp, #16] // 16-byte Folded Spill ; CHECK-GI-NEXT: bl __getf2 -; CHECK-GI-NEXT: ldr q2, [sp] // 16-byte Folded Reload -; CHECK-GI-NEXT: ldr q1, [sp, #32] // 16-byte Folded Reload +; CHECK-GI-NEXT: ldr q0, [sp, #32] // 16-byte Folded Reload ; CHECK-GI-NEXT: cmp w0, #0 -; CHECK-GI-NEXT: mov d0, v2.d[1] -; CHECK-GI-NEXT: mov d8, v1.d[1] -; CHECK-GI-NEXT: fcsel d9, d2, d1, lt -; CHECK-GI-NEXT: fmov x8, d9 -; CHECK-GI-NEXT: fcsel d10, d0, d8, lt -; CHECK-GI-NEXT: mov v0.d[0], x8 -; CHECK-GI-NEXT: fmov x8, d10 -; CHECK-GI-NEXT: mov v0.d[1], x8 +; CHECK-GI-NEXT: mov x20, #-4603241769126068224 // =0xc01e000000000000 +; CHECK-GI-NEXT: fmov x8, d0 +; CHECK-GI-NEXT: csel x19, x8, xzr, lt +; CHECK-GI-NEXT: mov x8, v0.d[1] +; CHECK-GI-NEXT: mov v0.d[0], x19 +; CHECK-GI-NEXT: csel x21, x8, x20, lt ; CHECK-GI-NEXT: adrp x8, .LCPI15_0 +; CHECK-GI-NEXT: mov v0.d[1], x21 ; CHECK-GI-NEXT: ldr q1, [x8, :lo12:.LCPI15_0] -; CHECK-GI-NEXT: str q1, [sp, #16] // 16-byte Folded Spill +; CHECK-GI-NEXT: str q1, [sp] // 16-byte Folded Spill ; CHECK-GI-NEXT: bl __gttf2 -; CHECK-GI-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload ; CHECK-GI-NEXT: cmp w0, #0 -; CHECK-GI-NEXT: mov d11, v0.d[1] -; CHECK-GI-NEXT: fcsel d0, d9, d0, gt -; CHECK-GI-NEXT: fmov x8, d0 -; CHECK-GI-NEXT: fcsel d1, d10, d11, gt +; CHECK-GI-NEXT: mov x22, #281474976448512 // =0xfffffffc0000 +; CHECK-GI-NEXT: csel x8, x19, xzr, gt +; CHECK-GI-NEXT: movk x22, #16413, lsl #48 ; CHECK-GI-NEXT: mov v0.d[0], x8 -; CHECK-GI-NEXT: fmov x8, d1 +; CHECK-GI-NEXT: csel x8, x21, x22, gt ; CHECK-GI-NEXT: mov v0.d[1], x8 ; CHECK-GI-NEXT: bl __fixtfsi -; CHECK-GI-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-GI-NEXT: ldr q0, [sp, #32] // 16-byte Folded Reload ; CHECK-GI-NEXT: mov w19, w0 ; CHECK-GI-NEXT: mov v1.16b, v0.16b ; CHECK-GI-NEXT: bl __unordtf2 -; CHECK-GI-NEXT: ldp q1, q0, [sp, #32] // 32-byte Folded Reload +; CHECK-GI-NEXT: ldr q0, [sp, #48] // 16-byte Folded Reload +; CHECK-GI-NEXT: ldr q1, [sp, #16] // 16-byte Folded Reload ; CHECK-GI-NEXT: cmp w0, #0 -; CHECK-GI-NEXT: csel w20, wzr, w19, ne +; CHECK-GI-NEXT: csel w21, wzr, w19, ne ; CHECK-GI-NEXT: bl __getf2 -; CHECK-GI-NEXT: ldp q2, q1, [sp, #32] // 32-byte Folded Reload +; CHECK-GI-NEXT: ldr q0, [sp, #48] // 16-byte Folded Reload ; CHECK-GI-NEXT: cmp w0, #0 -; CHECK-GI-NEXT: mov d0, v1.d[1] -; CHECK-GI-NEXT: fcsel d9, d1, d2, lt -; CHECK-GI-NEXT: ldr q1, [sp, #16] // 16-byte Folded Reload -; CHECK-GI-NEXT: fmov x8, d9 -; CHECK-GI-NEXT: fcsel d8, d0, d8, lt -; CHECK-GI-NEXT: mov v0.d[0], x8 -; CHECK-GI-NEXT: fmov x8, d8 -; CHECK-GI-NEXT: mov v0.d[1], x8 +; CHECK-GI-NEXT: ldr q1, [sp] // 16-byte Folded Reload +; CHECK-GI-NEXT: fmov x8, d0 +; CHECK-GI-NEXT: csel x19, x8, xzr, lt +; CHECK-GI-NEXT: mov x8, v0.d[1] +; CHECK-GI-NEXT: mov v0.d[0], x19 +; CHECK-GI-NEXT: csel x20, x8, x20, lt +; CHECK-GI-NEXT: mov v0.d[1], x20 ; CHECK-GI-NEXT: bl __gttf2 -; CHECK-GI-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload ; CHECK-GI-NEXT: cmp w0, #0 -; CHECK-GI-NEXT: fcsel d1, d8, d11, gt -; CHECK-GI-NEXT: fcsel d0, d9, d0, gt -; CHECK-GI-NEXT: fmov x8, d0 +; CHECK-GI-NEXT: csel x8, x19, xzr, gt ; CHECK-GI-NEXT: mov v0.d[0], x8 -; CHECK-GI-NEXT: fmov x8, d1 +; CHECK-GI-NEXT: csel x8, x20, x22, gt ; CHECK-GI-NEXT: mov v0.d[1], x8 ; CHECK-GI-NEXT: bl __fixtfsi ; CHECK-GI-NEXT: ldr q0, [sp, #48] // 16-byte Folded Reload ; CHECK-GI-NEXT: mov w19, w0 ; CHECK-GI-NEXT: mov v1.16b, v0.16b ; CHECK-GI-NEXT: bl __unordtf2 -; CHECK-GI-NEXT: mov v0.s[0], w20 +; CHECK-GI-NEXT: mov v0.s[0], w21 ; CHECK-GI-NEXT: cmp w0, #0 -; CHECK-GI-NEXT: ldr x30, [sp, #96] // 8-byte Folded Reload +; CHECK-GI-NEXT: ldr x30, [sp, #64] // 8-byte Folded Reload ; CHECK-GI-NEXT: csel w8, wzr, w19, ne -; CHECK-GI-NEXT: ldp x20, x19, [sp, #112] // 16-byte Folded Reload -; CHECK-GI-NEXT: ldp d9, d8, [sp, #80] // 16-byte Folded Reload -; CHECK-GI-NEXT: ldp d11, d10, [sp, #64] // 16-byte Folded Reload +; CHECK-GI-NEXT: ldp x20, x19, [sp, #96] // 16-byte Folded Reload +; CHECK-GI-NEXT: ldp x22, x21, [sp, #80] // 16-byte Folded Reload ; CHECK-GI-NEXT: mov v0.s[1], w8 ; CHECK-GI-NEXT: // kill: def $d0 killed $d0 killed $q0 -; CHECK-GI-NEXT: add sp, sp, #128 +; CHECK-GI-NEXT: add sp, sp, #112 ; CHECK-GI-NEXT: ret %x = call <2 x i32> @llvm.fptosi.sat.v2f128.v2i32(<2 x fp128> %f) ret <2 x i32> %x @@ -825,124 +810,107 @@ define <3 x i32> @test_signed_v3f128_v3i32(<3 x fp128> %f) { ; ; CHECK-GI-LABEL: test_signed_v3f128_v3i32: ; CHECK-GI: // %bb.0: -; CHECK-GI-NEXT: sub sp, sp, #144 -; CHECK-GI-NEXT: stp d11, d10, [sp, #80] // 16-byte Folded Spill -; CHECK-GI-NEXT: stp d9, d8, [sp, #96] // 16-byte Folded Spill -; CHECK-GI-NEXT: stp x30, x21, [sp, #112] // 16-byte Folded Spill -; CHECK-GI-NEXT: stp x20, x19, [sp, #128] // 16-byte Folded Spill -; CHECK-GI-NEXT: .cfi_def_cfa_offset 144 +; CHECK-GI-NEXT: sub sp, sp, #128 +; CHECK-GI-NEXT: stp x30, x23, [sp, #80] // 16-byte Folded Spill +; CHECK-GI-NEXT: stp x22, x21, [sp, #96] // 16-byte Folded Spill +; CHECK-GI-NEXT: stp x20, x19, [sp, #112] // 16-byte Folded Spill +; CHECK-GI-NEXT: .cfi_def_cfa_offset 128 ; CHECK-GI-NEXT: .cfi_offset w19, -8 ; CHECK-GI-NEXT: .cfi_offset w20, -16 ; CHECK-GI-NEXT: .cfi_offset w21, -24 -; CHECK-GI-NEXT: .cfi_offset w30, -32 -; CHECK-GI-NEXT: .cfi_offset b8, -40 -; CHECK-GI-NEXT: .cfi_offset b9, -48 -; CHECK-GI-NEXT: .cfi_offset b10, -56 -; CHECK-GI-NEXT: .cfi_offset b11, -64 +; CHECK-GI-NEXT: .cfi_offset w22, -32 +; CHECK-GI-NEXT: .cfi_offset w23, -40 +; CHECK-GI-NEXT: .cfi_offset w30, -48 ; CHECK-GI-NEXT: adrp x8, .LCPI16_1 -; CHECK-GI-NEXT: stp q0, q1, [sp] // 32-byte Folded Spill +; CHECK-GI-NEXT: str q1, [sp, #48] // 16-byte Folded Spill ; CHECK-GI-NEXT: ldr q1, [x8, :lo12:.LCPI16_1] -; CHECK-GI-NEXT: str q2, [sp, #32] // 16-byte Folded Spill -; CHECK-GI-NEXT: str q1, [sp, #64] // 16-byte Folded Spill +; CHECK-GI-NEXT: str q0, [sp] // 16-byte Folded Spill +; CHECK-GI-NEXT: str q2, [sp, #64] // 16-byte Folded Spill +; CHECK-GI-NEXT: str q1, [sp, #32] // 16-byte Folded Spill ; CHECK-GI-NEXT: bl __getf2 -; CHECK-GI-NEXT: ldr q2, [sp] // 16-byte Folded Reload -; CHECK-GI-NEXT: ldr q1, [sp, #64] // 16-byte Folded Reload +; CHECK-GI-NEXT: ldr q0, [sp] // 16-byte Folded Reload ; CHECK-GI-NEXT: cmp w0, #0 -; CHECK-GI-NEXT: mov d0, v2.d[1] -; CHECK-GI-NEXT: mov d8, v1.d[1] -; CHECK-GI-NEXT: fcsel d10, d2, d1, lt -; CHECK-GI-NEXT: fmov x8, d10 -; CHECK-GI-NEXT: fcsel d11, d0, d8, lt -; CHECK-GI-NEXT: mov v0.d[0], x8 -; CHECK-GI-NEXT: fmov x8, d11 -; CHECK-GI-NEXT: mov v0.d[1], x8 +; CHECK-GI-NEXT: mov x20, #-4603241769126068224 // =0xc01e000000000000 +; CHECK-GI-NEXT: fmov x8, d0 +; CHECK-GI-NEXT: csel x19, x8, xzr, lt +; CHECK-GI-NEXT: mov x8, v0.d[1] +; CHECK-GI-NEXT: mov v0.d[0], x19 +; CHECK-GI-NEXT: csel x21, x8, x20, lt ; CHECK-GI-NEXT: adrp x8, .LCPI16_0 +; CHECK-GI-NEXT: mov v0.d[1], x21 ; CHECK-GI-NEXT: ldr q1, [x8, :lo12:.LCPI16_0] -; CHECK-GI-NEXT: str q1, [sp, #48] // 16-byte Folded Spill +; CHECK-GI-NEXT: str q1, [sp, #16] // 16-byte Folded Spill ; CHECK-GI-NEXT: bl __gttf2 -; CHECK-GI-NEXT: ldr q0, [sp, #48] // 16-byte Folded Reload ; CHECK-GI-NEXT: cmp w0, #0 -; CHECK-GI-NEXT: mov d9, v0.d[1] -; CHECK-GI-NEXT: fcsel d0, d10, d0, gt -; CHECK-GI-NEXT: fmov x8, d0 -; CHECK-GI-NEXT: fcsel d1, d11, d9, gt +; CHECK-GI-NEXT: mov x22, #281474976448512 // =0xfffffffc0000 +; CHECK-GI-NEXT: csel x8, x19, xzr, gt +; CHECK-GI-NEXT: movk x22, #16413, lsl #48 ; CHECK-GI-NEXT: mov v0.d[0], x8 -; CHECK-GI-NEXT: fmov x8, d1 +; CHECK-GI-NEXT: csel x8, x21, x22, gt ; CHECK-GI-NEXT: mov v0.d[1], x8 ; CHECK-GI-NEXT: bl __fixtfsi ; CHECK-GI-NEXT: ldr q0, [sp] // 16-byte Folded Reload ; CHECK-GI-NEXT: mov w19, w0 ; CHECK-GI-NEXT: mov v1.16b, v0.16b ; CHECK-GI-NEXT: bl __unordtf2 -; CHECK-GI-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload -; CHECK-GI-NEXT: ldr q1, [sp, #64] // 16-byte Folded Reload +; CHECK-GI-NEXT: ldp q1, q0, [sp, #32] // 32-byte Folded Reload ; CHECK-GI-NEXT: cmp w0, #0 -; CHECK-GI-NEXT: csel w20, wzr, w19, ne +; CHECK-GI-NEXT: csel w21, wzr, w19, ne ; CHECK-GI-NEXT: bl __getf2 -; CHECK-GI-NEXT: ldr q1, [sp, #16] // 16-byte Folded Reload -; CHECK-GI-NEXT: ldr q2, [sp, #64] // 16-byte Folded Reload -; CHECK-GI-NEXT: cmp w0, #0 -; CHECK-GI-NEXT: mov d0, v1.d[1] -; CHECK-GI-NEXT: fcsel d10, d1, d2, lt -; CHECK-GI-NEXT: ldr q1, [sp, #48] // 16-byte Folded Reload -; CHECK-GI-NEXT: fmov x8, d10 -; CHECK-GI-NEXT: fcsel d11, d0, d8, lt -; CHECK-GI-NEXT: mov v0.d[0], x8 -; CHECK-GI-NEXT: fmov x8, d11 -; CHECK-GI-NEXT: mov v0.d[1], x8 -; CHECK-GI-NEXT: bl __gttf2 ; CHECK-GI-NEXT: ldr q0, [sp, #48] // 16-byte Folded Reload ; CHECK-GI-NEXT: cmp w0, #0 -; CHECK-GI-NEXT: fcsel d1, d11, d9, gt -; CHECK-GI-NEXT: fcsel d0, d10, d0, gt +; CHECK-GI-NEXT: ldr q1, [sp, #16] // 16-byte Folded Reload ; CHECK-GI-NEXT: fmov x8, d0 +; CHECK-GI-NEXT: csel x19, x8, xzr, lt +; CHECK-GI-NEXT: mov x8, v0.d[1] +; CHECK-GI-NEXT: mov v0.d[0], x19 +; CHECK-GI-NEXT: csel x23, x8, x20, lt +; CHECK-GI-NEXT: mov v0.d[1], x23 +; CHECK-GI-NEXT: bl __gttf2 +; CHECK-GI-NEXT: cmp w0, #0 +; CHECK-GI-NEXT: csel x8, x19, xzr, gt ; CHECK-GI-NEXT: mov v0.d[0], x8 -; CHECK-GI-NEXT: fmov x8, d1 +; CHECK-GI-NEXT: csel x8, x23, x22, gt ; CHECK-GI-NEXT: mov v0.d[1], x8 ; CHECK-GI-NEXT: bl __fixtfsi -; CHECK-GI-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload +; CHECK-GI-NEXT: ldr q0, [sp, #48] // 16-byte Folded Reload ; CHECK-GI-NEXT: mov w19, w0 ; CHECK-GI-NEXT: mov v1.16b, v0.16b ; CHECK-GI-NEXT: bl __unordtf2 -; CHECK-GI-NEXT: ldr q0, [sp, #32] // 16-byte Folded Reload -; CHECK-GI-NEXT: ldr q1, [sp, #64] // 16-byte Folded Reload +; CHECK-GI-NEXT: ldr q0, [sp, #64] // 16-byte Folded Reload +; CHECK-GI-NEXT: ldr q1, [sp, #32] // 16-byte Folded Reload ; CHECK-GI-NEXT: cmp w0, #0 -; CHECK-GI-NEXT: csel w21, wzr, w19, ne +; CHECK-GI-NEXT: csel w23, wzr, w19, ne ; CHECK-GI-NEXT: bl __getf2 -; CHECK-GI-NEXT: ldp q3, q1, [sp, #32] // 32-byte Folded Reload +; CHECK-GI-NEXT: ldr q0, [sp, #64] // 16-byte Folded Reload ; CHECK-GI-NEXT: cmp w0, #0 -; CHECK-GI-NEXT: ldr q2, [sp, #64] // 16-byte Folded Reload -; CHECK-GI-NEXT: mov d0, v3.d[1] -; CHECK-GI-NEXT: fcsel d10, d3, d2, lt -; CHECK-GI-NEXT: fmov x8, d10 -; CHECK-GI-NEXT: fcsel d8, d0, d8, lt -; CHECK-GI-NEXT: mov v0.d[0], x8 -; CHECK-GI-NEXT: fmov x8, d8 -; CHECK-GI-NEXT: mov v0.d[1], x8 +; CHECK-GI-NEXT: ldr q1, [sp, #16] // 16-byte Folded Reload +; CHECK-GI-NEXT: fmov x8, d0 +; CHECK-GI-NEXT: csel x19, x8, xzr, lt +; CHECK-GI-NEXT: mov x8, v0.d[1] +; CHECK-GI-NEXT: mov v0.d[0], x19 +; CHECK-GI-NEXT: csel x20, x8, x20, lt +; CHECK-GI-NEXT: mov v0.d[1], x20 ; CHECK-GI-NEXT: bl __gttf2 -; CHECK-GI-NEXT: ldr q0, [sp, #48] // 16-byte Folded Reload ; CHECK-GI-NEXT: cmp w0, #0 -; CHECK-GI-NEXT: fcsel d1, d8, d9, gt -; CHECK-GI-NEXT: fcsel d0, d10, d0, gt -; CHECK-GI-NEXT: fmov x8, d0 +; CHECK-GI-NEXT: csel x8, x19, xzr, gt ; CHECK-GI-NEXT: mov v0.d[0], x8 -; CHECK-GI-NEXT: fmov x8, d1 +; CHECK-GI-NEXT: csel x8, x20, x22, gt ; CHECK-GI-NEXT: mov v0.d[1], x8 ; CHECK-GI-NEXT: bl __fixtfsi -; CHECK-GI-NEXT: ldr q0, [sp, #32] // 16-byte Folded Reload +; CHECK-GI-NEXT: ldr q0, [sp, #64] // 16-byte Folded Reload ; CHECK-GI-NEXT: mov w19, w0 ; CHECK-GI-NEXT: mov v1.16b, v0.16b ; CHECK-GI-NEXT: bl __unordtf2 -; CHECK-GI-NEXT: mov v0.s[0], w20 +; CHECK-GI-NEXT: mov v0.s[0], w21 ; CHECK-GI-NEXT: cmp w0, #0 ; CHECK-GI-NEXT: csel w8, wzr, w19, ne -; CHECK-GI-NEXT: ldp x20, x19, [sp, #128] // 16-byte Folded Reload -; CHECK-GI-NEXT: ldp d9, d8, [sp, #96] // 16-byte Folded Reload -; CHECK-GI-NEXT: ldp d11, d10, [sp, #80] // 16-byte Folded Reload -; CHECK-GI-NEXT: mov v0.s[1], w21 -; CHECK-GI-NEXT: ldp x30, x21, [sp, #112] // 16-byte Folded Reload +; CHECK-GI-NEXT: ldp x20, x19, [sp, #112] // 16-byte Folded Reload +; CHECK-GI-NEXT: ldp x22, x21, [sp, #96] // 16-byte Folded Reload +; CHECK-GI-NEXT: mov v0.s[1], w23 +; CHECK-GI-NEXT: ldp x30, x23, [sp, #80] // 16-byte Folded Reload ; CHECK-GI-NEXT: mov v0.s[2], w8 -; CHECK-GI-NEXT: add sp, sp, #144 +; CHECK-GI-NEXT: add sp, sp, #128 ; CHECK-GI-NEXT: ret %x = call <3 x i32> @llvm.fptosi.sat.v3f128.v3i32(<3 x fp128> %f) ret <3 x i32> %x @@ -1057,52 +1025,44 @@ define <4 x i32> @test_signed_v4f128_v4i32(<4 x fp128> %f) { ; ; CHECK-GI-LABEL: test_signed_v4f128_v4i32: ; CHECK-GI: // %bb.0: -; CHECK-GI-NEXT: sub sp, sp, #176 -; CHECK-GI-NEXT: stp d11, d10, [sp, #96] // 16-byte Folded Spill -; CHECK-GI-NEXT: stp d9, d8, [sp, #112] // 16-byte Folded Spill -; CHECK-GI-NEXT: str x30, [sp, #128] // 8-byte Folded Spill -; CHECK-GI-NEXT: stp x22, x21, [sp, #144] // 16-byte Folded Spill -; CHECK-GI-NEXT: stp x20, x19, [sp, #160] // 16-byte Folded Spill -; CHECK-GI-NEXT: .cfi_def_cfa_offset 176 +; CHECK-GI-NEXT: sub sp, sp, #160 +; CHECK-GI-NEXT: str x30, [sp, #96] // 8-byte Folded Spill +; CHECK-GI-NEXT: stp x24, x23, [sp, #112] // 16-byte Folded Spill +; CHECK-GI-NEXT: stp x22, x21, [sp, #128] // 16-byte Folded Spill +; CHECK-GI-NEXT: stp x20, x19, [sp, #144] // 16-byte Folded Spill +; CHECK-GI-NEXT: .cfi_def_cfa_offset 160 ; CHECK-GI-NEXT: .cfi_offset w19, -8 ; CHECK-GI-NEXT: .cfi_offset w20, -16 ; CHECK-GI-NEXT: .cfi_offset w21, -24 ; CHECK-GI-NEXT: .cfi_offset w22, -32 -; CHECK-GI-NEXT: .cfi_offset w30, -48 -; CHECK-GI-NEXT: .cfi_offset b8, -56 -; CHECK-GI-NEXT: .cfi_offset b9, -64 -; CHECK-GI-NEXT: .cfi_offset b10, -72 -; CHECK-GI-NEXT: .cfi_offset b11, -80 +; CHECK-GI-NEXT: .cfi_offset w23, -40 +; CHECK-GI-NEXT: .cfi_offset w24, -48 +; CHECK-GI-NEXT: .cfi_offset w30, -64 ; CHECK-GI-NEXT: adrp x8, .LCPI17_1 ; CHECK-GI-NEXT: stp q0, q1, [sp] // 32-byte Folded Spill ; CHECK-GI-NEXT: ldr q1, [x8, :lo12:.LCPI17_1] ; CHECK-GI-NEXT: str q2, [sp, #32] // 16-byte Folded Spill -; CHECK-GI-NEXT: str q3, [sp, #48] // 16-byte Folded Spill -; CHECK-GI-NEXT: str q1, [sp, #80] // 16-byte Folded Spill +; CHECK-GI-NEXT: stp q1, q3, [sp, #64] // 32-byte Folded Spill ; CHECK-GI-NEXT: bl __getf2 -; CHECK-GI-NEXT: ldr q2, [sp] // 16-byte Folded Reload -; CHECK-GI-NEXT: ldr q1, [sp, #80] // 16-byte Folded Reload +; CHECK-GI-NEXT: ldr q0, [sp] // 16-byte Folded Reload ; CHECK-GI-NEXT: cmp w0, #0 -; CHECK-GI-NEXT: mov d0, v2.d[1] -; CHECK-GI-NEXT: mov d8, v1.d[1] -; CHECK-GI-NEXT: fcsel d10, d2, d1, lt -; CHECK-GI-NEXT: fmov x8, d10 -; CHECK-GI-NEXT: fcsel d11, d0, d8, lt -; CHECK-GI-NEXT: mov v0.d[0], x8 -; CHECK-GI-NEXT: fmov x8, d11 -; CHECK-GI-NEXT: mov v0.d[1], x8 +; CHECK-GI-NEXT: mov x20, #-4603241769126068224 // =0xc01e000000000000 +; CHECK-GI-NEXT: fmov x8, d0 +; CHECK-GI-NEXT: csel x19, x8, xzr, lt +; CHECK-GI-NEXT: mov x8, v0.d[1] +; CHECK-GI-NEXT: mov v0.d[0], x19 +; CHECK-GI-NEXT: csel x21, x8, x20, lt ; CHECK-GI-NEXT: adrp x8, .LCPI17_0 +; CHECK-GI-NEXT: mov v0.d[1], x21 ; CHECK-GI-NEXT: ldr q1, [x8, :lo12:.LCPI17_0] -; CHECK-GI-NEXT: str q1, [sp, #64] // 16-byte Folded Spill +; CHECK-GI-NEXT: str q1, [sp, #48] // 16-byte Folded Spill ; CHECK-GI-NEXT: bl __gttf2 -; CHECK-GI-NEXT: ldr q0, [sp, #64] // 16-byte Folded Reload ; CHECK-GI-NEXT: cmp w0, #0 -; CHECK-GI-NEXT: mov d9, v0.d[1] -; CHECK-GI-NEXT: fcsel d0, d10, d0, gt -; CHECK-GI-NEXT: fmov x8, d0 -; CHECK-GI-NEXT: fcsel d1, d11, d9, gt +; CHECK-GI-NEXT: mov x22, #281474976448512 // =0xfffffffc0000 +; CHECK-GI-NEXT: csel x8, x19, xzr, gt +; CHECK-GI-NEXT: movk x22, #16413, lsl #48 ; CHECK-GI-NEXT: mov v0.d[0], x8 -; CHECK-GI-NEXT: fmov x8, d1 +; CHECK-GI-NEXT: csel x8, x21, x22, gt ; CHECK-GI-NEXT: mov v0.d[1], x8 ; CHECK-GI-NEXT: bl __fixtfsi ; CHECK-GI-NEXT: ldr q0, [sp] // 16-byte Folded Reload @@ -1110,28 +1070,24 @@ define <4 x i32> @test_signed_v4f128_v4i32(<4 x fp128> %f) { ; CHECK-GI-NEXT: mov v1.16b, v0.16b ; CHECK-GI-NEXT: bl __unordtf2 ; CHECK-GI-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload -; CHECK-GI-NEXT: ldr q1, [sp, #80] // 16-byte Folded Reload +; CHECK-GI-NEXT: ldr q1, [sp, #64] // 16-byte Folded Reload ; CHECK-GI-NEXT: cmp w0, #0 -; CHECK-GI-NEXT: csel w20, wzr, w19, ne +; CHECK-GI-NEXT: csel w21, wzr, w19, ne ; CHECK-GI-NEXT: bl __getf2 -; CHECK-GI-NEXT: ldp q1, q4, [sp, #64] // 32-byte Folded Reload +; CHECK-GI-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload ; CHECK-GI-NEXT: cmp w0, #0 -; CHECK-GI-NEXT: ldr q2, [sp, #16] // 16-byte Folded Reload -; CHECK-GI-NEXT: mov d0, v2.d[1] -; CHECK-GI-NEXT: fcsel d10, d2, d4, lt -; CHECK-GI-NEXT: fmov x8, d10 -; CHECK-GI-NEXT: fcsel d11, d0, d8, lt -; CHECK-GI-NEXT: mov v0.d[0], x8 -; CHECK-GI-NEXT: fmov x8, d11 -; CHECK-GI-NEXT: mov v0.d[1], x8 +; CHECK-GI-NEXT: ldr q1, [sp, #48] // 16-byte Folded Reload +; CHECK-GI-NEXT: fmov x8, d0 +; CHECK-GI-NEXT: csel x19, x8, xzr, lt +; CHECK-GI-NEXT: mov x8, v0.d[1] +; CHECK-GI-NEXT: mov v0.d[0], x19 +; CHECK-GI-NEXT: csel x23, x8, x20, lt +; CHECK-GI-NEXT: mov v0.d[1], x23 ; CHECK-GI-NEXT: bl __gttf2 -; CHECK-GI-NEXT: ldr q0, [sp, #64] // 16-byte Folded Reload ; CHECK-GI-NEXT: cmp w0, #0 -; CHECK-GI-NEXT: fcsel d1, d11, d9, gt -; CHECK-GI-NEXT: fcsel d0, d10, d0, gt -; CHECK-GI-NEXT: fmov x8, d0 +; CHECK-GI-NEXT: csel x8, x19, xzr, gt ; CHECK-GI-NEXT: mov v0.d[0], x8 -; CHECK-GI-NEXT: fmov x8, d1 +; CHECK-GI-NEXT: csel x8, x23, x22, gt ; CHECK-GI-NEXT: mov v0.d[1], x8 ; CHECK-GI-NEXT: bl __fixtfsi ; CHECK-GI-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload @@ -1139,76 +1095,64 @@ define <4 x i32> @test_signed_v4f128_v4i32(<4 x fp128> %f) { ; CHECK-GI-NEXT: mov v1.16b, v0.16b ; CHECK-GI-NEXT: bl __unordtf2 ; CHECK-GI-NEXT: ldr q0, [sp, #32] // 16-byte Folded Reload -; CHECK-GI-NEXT: ldr q1, [sp, #80] // 16-byte Folded Reload +; CHECK-GI-NEXT: ldr q1, [sp, #64] // 16-byte Folded Reload ; CHECK-GI-NEXT: cmp w0, #0 -; CHECK-GI-NEXT: csel w21, wzr, w19, ne +; CHECK-GI-NEXT: csel w23, wzr, w19, ne ; CHECK-GI-NEXT: bl __getf2 -; CHECK-GI-NEXT: ldr q1, [sp, #32] // 16-byte Folded Reload -; CHECK-GI-NEXT: ldr q2, [sp, #80] // 16-byte Folded Reload +; CHECK-GI-NEXT: ldp q0, q1, [sp, #32] // 32-byte Folded Reload ; CHECK-GI-NEXT: cmp w0, #0 -; CHECK-GI-NEXT: mov d0, v1.d[1] -; CHECK-GI-NEXT: fcsel d10, d1, d2, lt -; CHECK-GI-NEXT: ldr q1, [sp, #64] // 16-byte Folded Reload -; CHECK-GI-NEXT: fmov x8, d10 -; CHECK-GI-NEXT: fcsel d11, d0, d8, lt -; CHECK-GI-NEXT: mov v0.d[0], x8 -; CHECK-GI-NEXT: fmov x8, d11 -; CHECK-GI-NEXT: mov v0.d[1], x8 +; CHECK-GI-NEXT: fmov x8, d0 +; CHECK-GI-NEXT: csel x19, x8, xzr, lt +; CHECK-GI-NEXT: mov x8, v0.d[1] +; CHECK-GI-NEXT: mov v0.d[0], x19 +; CHECK-GI-NEXT: csel x24, x8, x20, lt +; CHECK-GI-NEXT: mov v0.d[1], x24 ; CHECK-GI-NEXT: bl __gttf2 -; CHECK-GI-NEXT: ldr q0, [sp, #64] // 16-byte Folded Reload ; CHECK-GI-NEXT: cmp w0, #0 -; CHECK-GI-NEXT: fcsel d1, d11, d9, gt -; CHECK-GI-NEXT: fcsel d0, d10, d0, gt -; CHECK-GI-NEXT: fmov x8, d0 +; CHECK-GI-NEXT: csel x8, x19, xzr, gt ; CHECK-GI-NEXT: mov v0.d[0], x8 -; CHECK-GI-NEXT: fmov x8, d1 +; CHECK-GI-NEXT: csel x8, x24, x22, gt ; CHECK-GI-NEXT: mov v0.d[1], x8 ; CHECK-GI-NEXT: bl __fixtfsi ; CHECK-GI-NEXT: ldr q0, [sp, #32] // 16-byte Folded Reload ; CHECK-GI-NEXT: mov w19, w0 ; CHECK-GI-NEXT: mov v1.16b, v0.16b ; CHECK-GI-NEXT: bl __unordtf2 -; CHECK-GI-NEXT: ldr q0, [sp, #48] // 16-byte Folded Reload -; CHECK-GI-NEXT: ldr q1, [sp, #80] // 16-byte Folded Reload +; CHECK-GI-NEXT: ldp q1, q0, [sp, #64] // 32-byte Folded Reload ; CHECK-GI-NEXT: cmp w0, #0 -; CHECK-GI-NEXT: csel w22, wzr, w19, ne +; CHECK-GI-NEXT: csel w24, wzr, w19, ne ; CHECK-GI-NEXT: bl __getf2 -; CHECK-GI-NEXT: ldp q5, q1, [sp, #48] // 32-byte Folded Reload +; CHECK-GI-NEXT: ldr q0, [sp, #80] // 16-byte Folded Reload ; CHECK-GI-NEXT: cmp w0, #0 -; CHECK-GI-NEXT: ldr q2, [sp, #80] // 16-byte Folded Reload -; CHECK-GI-NEXT: mov d0, v5.d[1] -; CHECK-GI-NEXT: fcsel d10, d5, d2, lt -; CHECK-GI-NEXT: fmov x8, d10 -; CHECK-GI-NEXT: fcsel d8, d0, d8, lt -; CHECK-GI-NEXT: mov v0.d[0], x8 -; CHECK-GI-NEXT: fmov x8, d8 -; CHECK-GI-NEXT: mov v0.d[1], x8 +; CHECK-GI-NEXT: ldr q1, [sp, #48] // 16-byte Folded Reload +; CHECK-GI-NEXT: fmov x8, d0 +; CHECK-GI-NEXT: csel x19, x8, xzr, lt +; CHECK-GI-NEXT: mov x8, v0.d[1] +; CHECK-GI-NEXT: mov v0.d[0], x19 +; CHECK-GI-NEXT: csel x20, x8, x20, lt +; CHECK-GI-NEXT: mov v0.d[1], x20 ; CHECK-GI-NEXT: bl __gttf2 -; CHECK-GI-NEXT: ldr q0, [sp, #64] // 16-byte Folded Reload ; CHECK-GI-NEXT: cmp w0, #0 -; CHECK-GI-NEXT: fcsel d1, d8, d9, gt -; CHECK-GI-NEXT: fcsel d0, d10, d0, gt -; CHECK-GI-NEXT: fmov x8, d0 +; CHECK-GI-NEXT: csel x8, x19, xzr, gt ; CHECK-GI-NEXT: mov v0.d[0], x8 -; CHECK-GI-NEXT: fmov x8, d1 +; CHECK-GI-NEXT: csel x8, x20, x22, gt ; CHECK-GI-NEXT: mov v0.d[1], x8 ; CHECK-GI-NEXT: bl __fixtfsi -; CHECK-GI-NEXT: ldr q0, [sp, #48] // 16-byte Folded Reload +; CHECK-GI-NEXT: ldr q0, [sp, #80] // 16-byte Folded Reload ; CHECK-GI-NEXT: mov w19, w0 ; CHECK-GI-NEXT: mov v1.16b, v0.16b ; CHECK-GI-NEXT: bl __unordtf2 -; CHECK-GI-NEXT: mov v0.s[0], w20 +; CHECK-GI-NEXT: mov v0.s[0], w21 ; CHECK-GI-NEXT: cmp w0, #0 -; CHECK-GI-NEXT: ldr x30, [sp, #128] // 8-byte Folded Reload +; CHECK-GI-NEXT: ldr x30, [sp, #96] // 8-byte Folded Reload ; CHECK-GI-NEXT: csel w8, wzr, w19, ne -; CHECK-GI-NEXT: ldp x20, x19, [sp, #160] // 16-byte Folded Reload -; CHECK-GI-NEXT: ldp d9, d8, [sp, #112] // 16-byte Folded Reload -; CHECK-GI-NEXT: ldp d11, d10, [sp, #96] // 16-byte Folded Reload -; CHECK-GI-NEXT: mov v0.s[1], w21 -; CHECK-GI-NEXT: mov v0.s[2], w22 -; CHECK-GI-NEXT: ldp x22, x21, [sp, #144] // 16-byte Folded Reload +; CHECK-GI-NEXT: ldp x20, x19, [sp, #144] // 16-byte Folded Reload +; CHECK-GI-NEXT: ldp x22, x21, [sp, #128] // 16-byte Folded Reload +; CHECK-GI-NEXT: mov v0.s[1], w23 +; CHECK-GI-NEXT: mov v0.s[2], w24 +; CHECK-GI-NEXT: ldp x24, x23, [sp, #112] // 16-byte Folded Reload ; CHECK-GI-NEXT: mov v0.s[3], w8 -; CHECK-GI-NEXT: add sp, sp, #176 +; CHECK-GI-NEXT: add sp, sp, #160 ; CHECK-GI-NEXT: ret %x = call <4 x i32> @llvm.fptosi.sat.v4f128.v4i32(<4 x fp128> %f) ret <4 x i32> %x diff --git a/llvm/test/CodeGen/AArch64/fptoui-sat-scalar.ll b/llvm/test/CodeGen/AArch64/fptoui-sat-scalar.ll index 60f961fa8f944..3c19fca4a22ae 100644 --- a/llvm/test/CodeGen/AArch64/fptoui-sat-scalar.ll +++ b/llvm/test/CodeGen/AArch64/fptoui-sat-scalar.ll @@ -787,43 +787,38 @@ define i32 @test_unsigned_f128_i32(fp128 %f) { ; ; CHECK-GI-LABEL: test_unsigned_f128_i32: ; CHECK-GI: // %bb.0: -; CHECK-GI-NEXT: sub sp, sp, #64 -; CHECK-GI-NEXT: stp d9, d8, [sp, #32] // 16-byte Folded Spill -; CHECK-GI-NEXT: str x30, [sp, #48] // 8-byte Folded Spill -; CHECK-GI-NEXT: .cfi_def_cfa_offset 64 -; CHECK-GI-NEXT: .cfi_offset w30, -16 -; CHECK-GI-NEXT: .cfi_offset b8, -24 -; CHECK-GI-NEXT: .cfi_offset b9, -32 +; CHECK-GI-NEXT: sub sp, sp, #48 +; CHECK-GI-NEXT: str x30, [sp, #16] // 8-byte Folded Spill +; CHECK-GI-NEXT: stp x20, x19, [sp, #32] // 16-byte Folded Spill +; CHECK-GI-NEXT: .cfi_def_cfa_offset 48 +; CHECK-GI-NEXT: .cfi_offset w19, -8 +; CHECK-GI-NEXT: .cfi_offset w20, -16 +; CHECK-GI-NEXT: .cfi_offset w30, -32 ; CHECK-GI-NEXT: adrp x8, .LCPI30_1 +; CHECK-GI-NEXT: str q0, [sp] // 16-byte Folded Spill ; CHECK-GI-NEXT: ldr q1, [x8, :lo12:.LCPI30_1] -; CHECK-GI-NEXT: stp q0, q1, [sp] // 32-byte Folded Spill ; CHECK-GI-NEXT: bl __getf2 -; CHECK-GI-NEXT: ldp q3, q2, [sp] // 32-byte Folded Reload +; CHECK-GI-NEXT: ldr q0, [sp] // 16-byte Folded Reload ; CHECK-GI-NEXT: cmp w0, #0 -; CHECK-GI-NEXT: mov d0, v3.d[1] -; CHECK-GI-NEXT: mov d1, v2.d[1] -; CHECK-GI-NEXT: fcsel d8, d3, d2, lt -; CHECK-GI-NEXT: fmov x8, d8 -; CHECK-GI-NEXT: fcsel d9, d0, d1, lt -; CHECK-GI-NEXT: mov v0.d[0], x8 -; CHECK-GI-NEXT: fmov x8, d9 -; CHECK-GI-NEXT: mov v0.d[1], x8 +; CHECK-GI-NEXT: fmov x8, d0 +; CHECK-GI-NEXT: csel x19, x8, xzr, lt +; CHECK-GI-NEXT: mov x8, v0.d[1] +; CHECK-GI-NEXT: mov v0.d[0], x19 +; CHECK-GI-NEXT: csel x20, x8, xzr, lt ; CHECK-GI-NEXT: adrp x8, .LCPI30_0 +; CHECK-GI-NEXT: mov v0.d[1], x20 ; CHECK-GI-NEXT: ldr q1, [x8, :lo12:.LCPI30_0] -; CHECK-GI-NEXT: str q1, [sp, #16] // 16-byte Folded Spill ; CHECK-GI-NEXT: bl __gttf2 -; CHECK-GI-NEXT: ldr q1, [sp, #16] // 16-byte Folded Reload ; CHECK-GI-NEXT: cmp w0, #0 -; CHECK-GI-NEXT: ldr x30, [sp, #48] // 8-byte Folded Reload -; CHECK-GI-NEXT: mov d0, v1.d[1] -; CHECK-GI-NEXT: fcsel d1, d8, d1, gt -; CHECK-GI-NEXT: fmov x8, d1 -; CHECK-GI-NEXT: fcsel d2, d9, d0, gt -; CHECK-GI-NEXT: ldp d9, d8, [sp, #32] // 16-byte Folded Reload +; CHECK-GI-NEXT: ldr x30, [sp, #16] // 8-byte Folded Reload +; CHECK-GI-NEXT: csel x8, x19, xzr, gt ; CHECK-GI-NEXT: mov v0.d[0], x8 -; CHECK-GI-NEXT: fmov x8, d2 +; CHECK-GI-NEXT: mov x8, #281474976579584 // =0xfffffffe0000 +; CHECK-GI-NEXT: movk x8, #16414, lsl #48 +; CHECK-GI-NEXT: csel x8, x20, x8, gt +; CHECK-GI-NEXT: ldp x20, x19, [sp, #32] // 16-byte Folded Reload ; CHECK-GI-NEXT: mov v0.d[1], x8 -; CHECK-GI-NEXT: add sp, sp, #64 +; CHECK-GI-NEXT: add sp, sp, #48 ; CHECK-GI-NEXT: b __fixunstfsi %x = call i32 @llvm.fptoui.sat.i32.f128(fp128 %f) ret i32 %x diff --git a/llvm/test/CodeGen/AArch64/fptoui-sat-vector.ll b/llvm/test/CodeGen/AArch64/fptoui-sat-vector.ll index 046ec0d079029..e1670ad2dc053 100644 --- a/llvm/test/CodeGen/AArch64/fptoui-sat-vector.ll +++ b/llvm/test/CodeGen/AArch64/fptoui-sat-vector.ll @@ -481,46 +481,41 @@ define <1 x i32> @test_unsigned_v1f128_v1i32(<1 x fp128> %f) { ; ; CHECK-GI-LABEL: test_unsigned_v1f128_v1i32: ; CHECK-GI: // %bb.0: -; CHECK-GI-NEXT: sub sp, sp, #64 -; CHECK-GI-NEXT: stp d9, d8, [sp, #32] // 16-byte Folded Spill -; CHECK-GI-NEXT: str x30, [sp, #48] // 8-byte Folded Spill -; CHECK-GI-NEXT: .cfi_def_cfa_offset 64 -; CHECK-GI-NEXT: .cfi_offset w30, -16 -; CHECK-GI-NEXT: .cfi_offset b8, -24 -; CHECK-GI-NEXT: .cfi_offset b9, -32 +; CHECK-GI-NEXT: sub sp, sp, #48 +; CHECK-GI-NEXT: str x30, [sp, #16] // 8-byte Folded Spill +; CHECK-GI-NEXT: stp x20, x19, [sp, #32] // 16-byte Folded Spill +; CHECK-GI-NEXT: .cfi_def_cfa_offset 48 +; CHECK-GI-NEXT: .cfi_offset w19, -8 +; CHECK-GI-NEXT: .cfi_offset w20, -16 +; CHECK-GI-NEXT: .cfi_offset w30, -32 ; CHECK-GI-NEXT: adrp x8, .LCPI14_1 +; CHECK-GI-NEXT: str q0, [sp] // 16-byte Folded Spill ; CHECK-GI-NEXT: ldr q1, [x8, :lo12:.LCPI14_1] -; CHECK-GI-NEXT: stp q0, q1, [sp] // 32-byte Folded Spill ; CHECK-GI-NEXT: bl __getf2 -; CHECK-GI-NEXT: ldp q3, q2, [sp] // 32-byte Folded Reload +; CHECK-GI-NEXT: ldr q0, [sp] // 16-byte Folded Reload ; CHECK-GI-NEXT: cmp w0, #0 -; CHECK-GI-NEXT: mov d0, v3.d[1] -; CHECK-GI-NEXT: mov d1, v2.d[1] -; CHECK-GI-NEXT: fcsel d8, d3, d2, lt -; CHECK-GI-NEXT: fmov x8, d8 -; CHECK-GI-NEXT: fcsel d9, d0, d1, lt -; CHECK-GI-NEXT: mov v0.d[0], x8 -; CHECK-GI-NEXT: fmov x8, d9 -; CHECK-GI-NEXT: mov v0.d[1], x8 +; CHECK-GI-NEXT: fmov x8, d0 +; CHECK-GI-NEXT: csel x19, x8, xzr, lt +; CHECK-GI-NEXT: mov x8, v0.d[1] +; CHECK-GI-NEXT: mov v0.d[0], x19 +; CHECK-GI-NEXT: csel x20, x8, xzr, lt ; CHECK-GI-NEXT: adrp x8, .LCPI14_0 +; CHECK-GI-NEXT: mov v0.d[1], x20 ; CHECK-GI-NEXT: ldr q1, [x8, :lo12:.LCPI14_0] -; CHECK-GI-NEXT: str q1, [sp, #16] // 16-byte Folded Spill ; CHECK-GI-NEXT: bl __gttf2 -; CHECK-GI-NEXT: ldr q1, [sp, #16] // 16-byte Folded Reload ; CHECK-GI-NEXT: cmp w0, #0 -; CHECK-GI-NEXT: mov d0, v1.d[1] -; CHECK-GI-NEXT: fcsel d1, d8, d1, gt -; CHECK-GI-NEXT: fmov x8, d1 -; CHECK-GI-NEXT: fcsel d2, d9, d0, gt +; CHECK-GI-NEXT: csel x8, x19, xzr, gt ; CHECK-GI-NEXT: mov v0.d[0], x8 -; CHECK-GI-NEXT: fmov x8, d2 +; CHECK-GI-NEXT: mov x8, #281474976579584 // =0xfffffffe0000 +; CHECK-GI-NEXT: movk x8, #16414, lsl #48 +; CHECK-GI-NEXT: csel x8, x20, x8, gt ; CHECK-GI-NEXT: mov v0.d[1], x8 ; CHECK-GI-NEXT: bl __fixunstfsi -; CHECK-GI-NEXT: ldp d9, d8, [sp, #32] // 16-byte Folded Reload +; CHECK-GI-NEXT: ldp x20, x19, [sp, #32] // 16-byte Folded Reload ; CHECK-GI-NEXT: mov v0.s[0], w0 -; CHECK-GI-NEXT: ldr x30, [sp, #48] // 8-byte Folded Reload +; CHECK-GI-NEXT: ldr x30, [sp, #16] // 8-byte Folded Reload ; CHECK-GI-NEXT: // kill: def $d0 killed $d0 killed $q0 -; CHECK-GI-NEXT: add sp, sp, #64 +; CHECK-GI-NEXT: add sp, sp, #48 ; CHECK-GI-NEXT: ret %x = call <1 x i32> @llvm.fptoui.sat.v1f128.v1i32(<1 x fp128> %f) ret <1 x i32> %x @@ -579,75 +574,64 @@ define <2 x i32> @test_unsigned_v2f128_v2i32(<2 x fp128> %f) { ; CHECK-GI-LABEL: test_unsigned_v2f128_v2i32: ; CHECK-GI: // %bb.0: ; CHECK-GI-NEXT: sub sp, sp, #96 -; CHECK-GI-NEXT: stp d11, d10, [sp, #48] // 16-byte Folded Spill -; CHECK-GI-NEXT: stp d9, d8, [sp, #64] // 16-byte Folded Spill -; CHECK-GI-NEXT: stp x30, x19, [sp, #80] // 16-byte Folded Spill +; CHECK-GI-NEXT: str x30, [sp, #48] // 8-byte Folded Spill +; CHECK-GI-NEXT: stp x22, x21, [sp, #64] // 16-byte Folded Spill +; CHECK-GI-NEXT: stp x20, x19, [sp, #80] // 16-byte Folded Spill ; CHECK-GI-NEXT: .cfi_def_cfa_offset 96 ; CHECK-GI-NEXT: .cfi_offset w19, -8 -; CHECK-GI-NEXT: .cfi_offset w30, -16 -; CHECK-GI-NEXT: .cfi_offset b8, -24 -; CHECK-GI-NEXT: .cfi_offset b9, -32 -; CHECK-GI-NEXT: .cfi_offset b10, -40 -; CHECK-GI-NEXT: .cfi_offset b11, -48 +; CHECK-GI-NEXT: .cfi_offset w20, -16 +; CHECK-GI-NEXT: .cfi_offset w21, -24 +; CHECK-GI-NEXT: .cfi_offset w22, -32 +; CHECK-GI-NEXT: .cfi_offset w30, -48 ; CHECK-GI-NEXT: adrp x8, .LCPI15_1 -; CHECK-GI-NEXT: str q1, [sp] // 16-byte Folded Spill +; CHECK-GI-NEXT: str q0, [sp] // 16-byte Folded Spill ; CHECK-GI-NEXT: ldr q2, [x8, :lo12:.LCPI15_1] -; CHECK-GI-NEXT: str q0, [sp, #16] // 16-byte Folded Spill +; CHECK-GI-NEXT: stp q2, q1, [sp, #16] // 32-byte Folded Spill ; CHECK-GI-NEXT: mov v1.16b, v2.16b -; CHECK-GI-NEXT: str q2, [sp, #32] // 16-byte Folded Spill ; CHECK-GI-NEXT: bl __getf2 -; CHECK-GI-NEXT: ldp q2, q1, [sp, #16] // 32-byte Folded Reload +; CHECK-GI-NEXT: ldr q0, [sp] // 16-byte Folded Reload ; CHECK-GI-NEXT: cmp w0, #0 -; CHECK-GI-NEXT: mov d0, v2.d[1] -; CHECK-GI-NEXT: mov d8, v1.d[1] -; CHECK-GI-NEXT: fcsel d9, d2, d1, lt -; CHECK-GI-NEXT: fmov x8, d9 -; CHECK-GI-NEXT: fcsel d10, d0, d8, lt -; CHECK-GI-NEXT: mov v0.d[0], x8 -; CHECK-GI-NEXT: fmov x8, d10 -; CHECK-GI-NEXT: mov v0.d[1], x8 +; CHECK-GI-NEXT: fmov x8, d0 +; CHECK-GI-NEXT: csel x19, x8, xzr, lt +; CHECK-GI-NEXT: mov x8, v0.d[1] +; CHECK-GI-NEXT: mov v0.d[0], x19 +; CHECK-GI-NEXT: csel x20, x8, xzr, lt ; CHECK-GI-NEXT: adrp x8, .LCPI15_0 +; CHECK-GI-NEXT: mov v0.d[1], x20 ; CHECK-GI-NEXT: ldr q1, [x8, :lo12:.LCPI15_0] -; CHECK-GI-NEXT: str q1, [sp, #16] // 16-byte Folded Spill +; CHECK-GI-NEXT: str q1, [sp] // 16-byte Folded Spill ; CHECK-GI-NEXT: bl __gttf2 -; CHECK-GI-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload ; CHECK-GI-NEXT: cmp w0, #0 -; CHECK-GI-NEXT: mov d11, v0.d[1] -; CHECK-GI-NEXT: fcsel d0, d9, d0, gt -; CHECK-GI-NEXT: fmov x8, d0 -; CHECK-GI-NEXT: fcsel d1, d10, d11, gt +; CHECK-GI-NEXT: mov x21, #281474976579584 // =0xfffffffe0000 +; CHECK-GI-NEXT: csel x8, x19, xzr, gt +; CHECK-GI-NEXT: movk x21, #16414, lsl #48 ; CHECK-GI-NEXT: mov v0.d[0], x8 -; CHECK-GI-NEXT: fmov x8, d1 +; CHECK-GI-NEXT: csel x8, x20, x21, gt ; CHECK-GI-NEXT: mov v0.d[1], x8 ; CHECK-GI-NEXT: bl __fixunstfsi -; CHECK-GI-NEXT: ldr q0, [sp] // 16-byte Folded Reload -; CHECK-GI-NEXT: ldr q1, [sp, #32] // 16-byte Folded Reload +; CHECK-GI-NEXT: ldp q1, q0, [sp, #16] // 32-byte Folded Reload ; CHECK-GI-NEXT: mov w19, w0 ; CHECK-GI-NEXT: bl __getf2 -; CHECK-GI-NEXT: ldp q3, q1, [sp] // 32-byte Folded Reload +; CHECK-GI-NEXT: ldr q0, [sp, #32] // 16-byte Folded Reload ; CHECK-GI-NEXT: cmp w0, #0 -; CHECK-GI-NEXT: ldr q2, [sp, #32] // 16-byte Folded Reload -; CHECK-GI-NEXT: mov d0, v3.d[1] -; CHECK-GI-NEXT: fcsel d9, d3, d2, lt -; CHECK-GI-NEXT: fmov x8, d9 -; CHECK-GI-NEXT: fcsel d8, d0, d8, lt -; CHECK-GI-NEXT: mov v0.d[0], x8 -; CHECK-GI-NEXT: fmov x8, d8 -; CHECK-GI-NEXT: mov v0.d[1], x8 +; CHECK-GI-NEXT: ldr q1, [sp] // 16-byte Folded Reload +; CHECK-GI-NEXT: fmov x8, d0 +; CHECK-GI-NEXT: csel x20, x8, xzr, lt +; CHECK-GI-NEXT: mov x8, v0.d[1] +; CHECK-GI-NEXT: mov v0.d[0], x20 +; CHECK-GI-NEXT: csel x22, x8, xzr, lt +; CHECK-GI-NEXT: mov v0.d[1], x22 ; CHECK-GI-NEXT: bl __gttf2 -; CHECK-GI-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload ; CHECK-GI-NEXT: cmp w0, #0 -; CHECK-GI-NEXT: fcsel d1, d8, d11, gt -; CHECK-GI-NEXT: fcsel d0, d9, d0, gt -; CHECK-GI-NEXT: fmov x8, d0 +; CHECK-GI-NEXT: csel x8, x20, xzr, gt ; CHECK-GI-NEXT: mov v0.d[0], x8 -; CHECK-GI-NEXT: fmov x8, d1 +; CHECK-GI-NEXT: csel x8, x22, x21, gt ; CHECK-GI-NEXT: mov v0.d[1], x8 ; CHECK-GI-NEXT: bl __fixunstfsi ; CHECK-GI-NEXT: mov v0.s[0], w19 -; CHECK-GI-NEXT: ldp x30, x19, [sp, #80] // 16-byte Folded Reload -; CHECK-GI-NEXT: ldp d9, d8, [sp, #64] // 16-byte Folded Reload -; CHECK-GI-NEXT: ldp d11, d10, [sp, #48] // 16-byte Folded Reload +; CHECK-GI-NEXT: ldp x20, x19, [sp, #80] // 16-byte Folded Reload +; CHECK-GI-NEXT: ldp x22, x21, [sp, #64] // 16-byte Folded Reload +; CHECK-GI-NEXT: ldr x30, [sp, #48] // 8-byte Folded Reload ; CHECK-GI-NEXT: mov v0.s[1], w0 ; CHECK-GI-NEXT: // kill: def $d0 killed $d0 killed $q0 ; CHECK-GI-NEXT: add sp, sp, #96 @@ -723,106 +707,87 @@ define <3 x i32> @test_unsigned_v3f128_v3i32(<3 x fp128> %f) { ; ; CHECK-GI-LABEL: test_unsigned_v3f128_v3i32: ; CHECK-GI: // %bb.0: -; CHECK-GI-NEXT: sub sp, sp, #128 -; CHECK-GI-NEXT: stp d11, d10, [sp, #64] // 16-byte Folded Spill -; CHECK-GI-NEXT: stp d9, d8, [sp, #80] // 16-byte Folded Spill -; CHECK-GI-NEXT: str x30, [sp, #96] // 8-byte Folded Spill -; CHECK-GI-NEXT: stp x20, x19, [sp, #112] // 16-byte Folded Spill -; CHECK-GI-NEXT: .cfi_def_cfa_offset 128 +; CHECK-GI-NEXT: sub sp, sp, #112 +; CHECK-GI-NEXT: stp x30, x23, [sp, #64] // 16-byte Folded Spill +; CHECK-GI-NEXT: stp x22, x21, [sp, #80] // 16-byte Folded Spill +; CHECK-GI-NEXT: stp x20, x19, [sp, #96] // 16-byte Folded Spill +; CHECK-GI-NEXT: .cfi_def_cfa_offset 112 ; CHECK-GI-NEXT: .cfi_offset w19, -8 ; CHECK-GI-NEXT: .cfi_offset w20, -16 -; CHECK-GI-NEXT: .cfi_offset w30, -32 -; CHECK-GI-NEXT: .cfi_offset b8, -40 -; CHECK-GI-NEXT: .cfi_offset b9, -48 -; CHECK-GI-NEXT: .cfi_offset b10, -56 -; CHECK-GI-NEXT: .cfi_offset b11, -64 +; CHECK-GI-NEXT: .cfi_offset w21, -24 +; CHECK-GI-NEXT: .cfi_offset w22, -32 +; CHECK-GI-NEXT: .cfi_offset w23, -40 +; CHECK-GI-NEXT: .cfi_offset w30, -48 ; CHECK-GI-NEXT: adrp x8, .LCPI16_1 -; CHECK-GI-NEXT: str q1, [sp] // 16-byte Folded Spill +; CHECK-GI-NEXT: stp q1, q0, [sp] // 32-byte Folded Spill ; CHECK-GI-NEXT: ldr q1, [x8, :lo12:.LCPI16_1] -; CHECK-GI-NEXT: str q0, [sp, #32] // 16-byte Folded Spill -; CHECK-GI-NEXT: str q2, [sp, #16] // 16-byte Folded Spill -; CHECK-GI-NEXT: str q1, [sp, #48] // 16-byte Folded Spill +; CHECK-GI-NEXT: stp q1, q2, [sp, #32] // 32-byte Folded Spill ; CHECK-GI-NEXT: bl __getf2 -; CHECK-GI-NEXT: ldp q2, q1, [sp, #32] // 32-byte Folded Reload +; CHECK-GI-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload ; CHECK-GI-NEXT: cmp w0, #0 -; CHECK-GI-NEXT: mov d0, v2.d[1] -; CHECK-GI-NEXT: mov d8, v1.d[1] -; CHECK-GI-NEXT: fcsel d10, d2, d1, lt -; CHECK-GI-NEXT: fmov x8, d10 -; CHECK-GI-NEXT: fcsel d11, d0, d8, lt -; CHECK-GI-NEXT: mov v0.d[0], x8 -; CHECK-GI-NEXT: fmov x8, d11 -; CHECK-GI-NEXT: mov v0.d[1], x8 +; CHECK-GI-NEXT: fmov x8, d0 +; CHECK-GI-NEXT: csel x19, x8, xzr, lt +; CHECK-GI-NEXT: mov x8, v0.d[1] +; CHECK-GI-NEXT: mov v0.d[0], x19 +; CHECK-GI-NEXT: csel x20, x8, xzr, lt ; CHECK-GI-NEXT: adrp x8, .LCPI16_0 +; CHECK-GI-NEXT: mov v0.d[1], x20 ; CHECK-GI-NEXT: ldr q1, [x8, :lo12:.LCPI16_0] -; CHECK-GI-NEXT: str q1, [sp, #32] // 16-byte Folded Spill +; CHECK-GI-NEXT: str q1, [sp, #16] // 16-byte Folded Spill ; CHECK-GI-NEXT: bl __gttf2 -; CHECK-GI-NEXT: ldr q0, [sp, #32] // 16-byte Folded Reload ; CHECK-GI-NEXT: cmp w0, #0 -; CHECK-GI-NEXT: mov d9, v0.d[1] -; CHECK-GI-NEXT: fcsel d0, d10, d0, gt -; CHECK-GI-NEXT: fmov x8, d0 -; CHECK-GI-NEXT: fcsel d1, d11, d9, gt +; CHECK-GI-NEXT: mov x21, #281474976579584 // =0xfffffffe0000 +; CHECK-GI-NEXT: csel x8, x19, xzr, gt +; CHECK-GI-NEXT: movk x21, #16414, lsl #48 ; CHECK-GI-NEXT: mov v0.d[0], x8 -; CHECK-GI-NEXT: fmov x8, d1 +; CHECK-GI-NEXT: csel x8, x20, x21, gt ; CHECK-GI-NEXT: mov v0.d[1], x8 ; CHECK-GI-NEXT: bl __fixunstfsi ; CHECK-GI-NEXT: ldr q0, [sp] // 16-byte Folded Reload -; CHECK-GI-NEXT: ldr q1, [sp, #48] // 16-byte Folded Reload +; CHECK-GI-NEXT: ldr q1, [sp, #32] // 16-byte Folded Reload ; CHECK-GI-NEXT: mov w19, w0 ; CHECK-GI-NEXT: bl __getf2 -; CHECK-GI-NEXT: ldp q1, q3, [sp, #32] // 32-byte Folded Reload +; CHECK-GI-NEXT: ldp q0, q1, [sp] // 32-byte Folded Reload ; CHECK-GI-NEXT: cmp w0, #0 -; CHECK-GI-NEXT: ldr q2, [sp] // 16-byte Folded Reload -; CHECK-GI-NEXT: mov d0, v2.d[1] -; CHECK-GI-NEXT: fcsel d10, d2, d3, lt -; CHECK-GI-NEXT: fmov x8, d10 -; CHECK-GI-NEXT: fcsel d11, d0, d8, lt -; CHECK-GI-NEXT: mov v0.d[0], x8 -; CHECK-GI-NEXT: fmov x8, d11 -; CHECK-GI-NEXT: mov v0.d[1], x8 +; CHECK-GI-NEXT: fmov x8, d0 +; CHECK-GI-NEXT: csel x20, x8, xzr, lt +; CHECK-GI-NEXT: mov x8, v0.d[1] +; CHECK-GI-NEXT: mov v0.d[0], x20 +; CHECK-GI-NEXT: csel x22, x8, xzr, lt +; CHECK-GI-NEXT: mov v0.d[1], x22 ; CHECK-GI-NEXT: bl __gttf2 -; CHECK-GI-NEXT: ldr q0, [sp, #32] // 16-byte Folded Reload ; CHECK-GI-NEXT: cmp w0, #0 -; CHECK-GI-NEXT: fcsel d1, d11, d9, gt -; CHECK-GI-NEXT: fcsel d0, d10, d0, gt -; CHECK-GI-NEXT: fmov x8, d0 +; CHECK-GI-NEXT: csel x8, x20, xzr, gt ; CHECK-GI-NEXT: mov v0.d[0], x8 -; CHECK-GI-NEXT: fmov x8, d1 +; CHECK-GI-NEXT: csel x8, x22, x21, gt ; CHECK-GI-NEXT: mov v0.d[1], x8 ; CHECK-GI-NEXT: bl __fixunstfsi -; CHECK-GI-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload -; CHECK-GI-NEXT: ldr q1, [sp, #48] // 16-byte Folded Reload +; CHECK-GI-NEXT: ldp q1, q0, [sp, #32] // 32-byte Folded Reload ; CHECK-GI-NEXT: mov w20, w0 ; CHECK-GI-NEXT: bl __getf2 -; CHECK-GI-NEXT: ldp q4, q1, [sp, #16] // 32-byte Folded Reload +; CHECK-GI-NEXT: ldr q0, [sp, #48] // 16-byte Folded Reload ; CHECK-GI-NEXT: cmp w0, #0 -; CHECK-GI-NEXT: ldr q2, [sp, #48] // 16-byte Folded Reload -; CHECK-GI-NEXT: mov d0, v4.d[1] -; CHECK-GI-NEXT: fcsel d10, d4, d2, lt -; CHECK-GI-NEXT: fmov x8, d10 -; CHECK-GI-NEXT: fcsel d8, d0, d8, lt -; CHECK-GI-NEXT: mov v0.d[0], x8 -; CHECK-GI-NEXT: fmov x8, d8 -; CHECK-GI-NEXT: mov v0.d[1], x8 +; CHECK-GI-NEXT: ldr q1, [sp, #16] // 16-byte Folded Reload +; CHECK-GI-NEXT: fmov x8, d0 +; CHECK-GI-NEXT: csel x22, x8, xzr, lt +; CHECK-GI-NEXT: mov x8, v0.d[1] +; CHECK-GI-NEXT: mov v0.d[0], x22 +; CHECK-GI-NEXT: csel x23, x8, xzr, lt +; CHECK-GI-NEXT: mov v0.d[1], x23 ; CHECK-GI-NEXT: bl __gttf2 -; CHECK-GI-NEXT: ldr q0, [sp, #32] // 16-byte Folded Reload ; CHECK-GI-NEXT: cmp w0, #0 -; CHECK-GI-NEXT: fcsel d1, d8, d9, gt -; CHECK-GI-NEXT: fcsel d0, d10, d0, gt -; CHECK-GI-NEXT: fmov x8, d0 +; CHECK-GI-NEXT: csel x8, x22, xzr, gt ; CHECK-GI-NEXT: mov v0.d[0], x8 -; CHECK-GI-NEXT: fmov x8, d1 +; CHECK-GI-NEXT: csel x8, x23, x21, gt ; CHECK-GI-NEXT: mov v0.d[1], x8 ; CHECK-GI-NEXT: bl __fixunstfsi ; CHECK-GI-NEXT: mov v0.s[0], w19 -; CHECK-GI-NEXT: ldp d9, d8, [sp, #80] // 16-byte Folded Reload -; CHECK-GI-NEXT: ldp d11, d10, [sp, #64] // 16-byte Folded Reload -; CHECK-GI-NEXT: ldr x30, [sp, #96] // 8-byte Folded Reload +; CHECK-GI-NEXT: ldp x22, x21, [sp, #80] // 16-byte Folded Reload +; CHECK-GI-NEXT: ldp x30, x23, [sp, #64] // 16-byte Folded Reload ; CHECK-GI-NEXT: mov v0.s[1], w20 -; CHECK-GI-NEXT: ldp x20, x19, [sp, #112] // 16-byte Folded Reload +; CHECK-GI-NEXT: ldp x20, x19, [sp, #96] // 16-byte Folded Reload ; CHECK-GI-NEXT: mov v0.s[2], w0 -; CHECK-GI-NEXT: add sp, sp, #128 +; CHECK-GI-NEXT: add sp, sp, #112 ; CHECK-GI-NEXT: ret %x = call <3 x i32> @llvm.fptoui.sat.v3f128.v3i32(<3 x fp128> %f) ret <3 x i32> %x @@ -912,19 +877,18 @@ define <4 x i32> @test_unsigned_v4f128_v4i32(<4 x fp128> %f) { ; CHECK-GI-LABEL: test_unsigned_v4f128_v4i32: ; CHECK-GI: // %bb.0: ; CHECK-GI-NEXT: sub sp, sp, #144 -; CHECK-GI-NEXT: stp d11, d10, [sp, #80] // 16-byte Folded Spill -; CHECK-GI-NEXT: stp d9, d8, [sp, #96] // 16-byte Folded Spill -; CHECK-GI-NEXT: stp x30, x21, [sp, #112] // 16-byte Folded Spill +; CHECK-GI-NEXT: str x30, [sp, #80] // 8-byte Folded Spill +; CHECK-GI-NEXT: stp x24, x23, [sp, #96] // 16-byte Folded Spill +; CHECK-GI-NEXT: stp x22, x21, [sp, #112] // 16-byte Folded Spill ; CHECK-GI-NEXT: stp x20, x19, [sp, #128] // 16-byte Folded Spill ; CHECK-GI-NEXT: .cfi_def_cfa_offset 144 ; CHECK-GI-NEXT: .cfi_offset w19, -8 ; CHECK-GI-NEXT: .cfi_offset w20, -16 ; CHECK-GI-NEXT: .cfi_offset w21, -24 -; CHECK-GI-NEXT: .cfi_offset w30, -32 -; CHECK-GI-NEXT: .cfi_offset b8, -40 -; CHECK-GI-NEXT: .cfi_offset b9, -48 -; CHECK-GI-NEXT: .cfi_offset b10, -56 -; CHECK-GI-NEXT: .cfi_offset b11, -64 +; CHECK-GI-NEXT: .cfi_offset w22, -32 +; CHECK-GI-NEXT: .cfi_offset w23, -40 +; CHECK-GI-NEXT: .cfi_offset w24, -48 +; CHECK-GI-NEXT: .cfi_offset w30, -64 ; CHECK-GI-NEXT: adrp x8, .LCPI17_1 ; CHECK-GI-NEXT: stp q1, q2, [sp] // 32-byte Folded Spill ; CHECK-GI-NEXT: ldr q1, [x8, :lo12:.LCPI17_1] @@ -932,109 +896,92 @@ define <4 x i32> @test_unsigned_v4f128_v4i32(<4 x fp128> %f) { ; CHECK-GI-NEXT: str q3, [sp, #32] // 16-byte Folded Spill ; CHECK-GI-NEXT: str q1, [sp, #64] // 16-byte Folded Spill ; CHECK-GI-NEXT: bl __getf2 -; CHECK-GI-NEXT: ldp q2, q1, [sp, #48] // 32-byte Folded Reload +; CHECK-GI-NEXT: ldr q0, [sp, #48] // 16-byte Folded Reload ; CHECK-GI-NEXT: cmp w0, #0 -; CHECK-GI-NEXT: mov d0, v2.d[1] -; CHECK-GI-NEXT: mov d8, v1.d[1] -; CHECK-GI-NEXT: fcsel d10, d2, d1, lt -; CHECK-GI-NEXT: fmov x8, d10 -; CHECK-GI-NEXT: fcsel d11, d0, d8, lt -; CHECK-GI-NEXT: mov v0.d[0], x8 -; CHECK-GI-NEXT: fmov x8, d11 -; CHECK-GI-NEXT: mov v0.d[1], x8 +; CHECK-GI-NEXT: fmov x8, d0 +; CHECK-GI-NEXT: csel x19, x8, xzr, lt +; CHECK-GI-NEXT: mov x8, v0.d[1] +; CHECK-GI-NEXT: mov v0.d[0], x19 +; CHECK-GI-NEXT: csel x20, x8, xzr, lt ; CHECK-GI-NEXT: adrp x8, .LCPI17_0 +; CHECK-GI-NEXT: mov v0.d[1], x20 ; CHECK-GI-NEXT: ldr q1, [x8, :lo12:.LCPI17_0] ; CHECK-GI-NEXT: str q1, [sp, #48] // 16-byte Folded Spill ; CHECK-GI-NEXT: bl __gttf2 -; CHECK-GI-NEXT: ldr q0, [sp, #48] // 16-byte Folded Reload ; CHECK-GI-NEXT: cmp w0, #0 -; CHECK-GI-NEXT: mov d9, v0.d[1] -; CHECK-GI-NEXT: fcsel d0, d10, d0, gt -; CHECK-GI-NEXT: fmov x8, d0 -; CHECK-GI-NEXT: fcsel d1, d11, d9, gt +; CHECK-GI-NEXT: mov x22, #281474976579584 // =0xfffffffe0000 +; CHECK-GI-NEXT: csel x8, x19, xzr, gt +; CHECK-GI-NEXT: movk x22, #16414, lsl #48 ; CHECK-GI-NEXT: mov v0.d[0], x8 -; CHECK-GI-NEXT: fmov x8, d1 +; CHECK-GI-NEXT: csel x8, x20, x22, gt ; CHECK-GI-NEXT: mov v0.d[1], x8 ; CHECK-GI-NEXT: bl __fixunstfsi ; CHECK-GI-NEXT: ldr q0, [sp] // 16-byte Folded Reload ; CHECK-GI-NEXT: ldr q1, [sp, #64] // 16-byte Folded Reload ; CHECK-GI-NEXT: mov w19, w0 ; CHECK-GI-NEXT: bl __getf2 -; CHECK-GI-NEXT: ldp q1, q4, [sp, #48] // 32-byte Folded Reload +; CHECK-GI-NEXT: ldr q0, [sp] // 16-byte Folded Reload ; CHECK-GI-NEXT: cmp w0, #0 -; CHECK-GI-NEXT: ldr q2, [sp] // 16-byte Folded Reload -; CHECK-GI-NEXT: mov d0, v2.d[1] -; CHECK-GI-NEXT: fcsel d10, d2, d4, lt -; CHECK-GI-NEXT: fmov x8, d10 -; CHECK-GI-NEXT: fcsel d11, d0, d8, lt -; CHECK-GI-NEXT: mov v0.d[0], x8 -; CHECK-GI-NEXT: fmov x8, d11 -; CHECK-GI-NEXT: mov v0.d[1], x8 +; CHECK-GI-NEXT: ldr q1, [sp, #48] // 16-byte Folded Reload +; CHECK-GI-NEXT: fmov x8, d0 +; CHECK-GI-NEXT: csel x20, x8, xzr, lt +; CHECK-GI-NEXT: mov x8, v0.d[1] +; CHECK-GI-NEXT: mov v0.d[0], x20 +; CHECK-GI-NEXT: csel x21, x8, xzr, lt +; CHECK-GI-NEXT: mov v0.d[1], x21 ; CHECK-GI-NEXT: bl __gttf2 -; CHECK-GI-NEXT: ldr q0, [sp, #48] // 16-byte Folded Reload ; CHECK-GI-NEXT: cmp w0, #0 -; CHECK-GI-NEXT: fcsel d1, d11, d9, gt -; CHECK-GI-NEXT: fcsel d0, d10, d0, gt -; CHECK-GI-NEXT: fmov x8, d0 +; CHECK-GI-NEXT: csel x8, x20, xzr, gt ; CHECK-GI-NEXT: mov v0.d[0], x8 -; CHECK-GI-NEXT: fmov x8, d1 +; CHECK-GI-NEXT: csel x8, x21, x22, gt ; CHECK-GI-NEXT: mov v0.d[1], x8 ; CHECK-GI-NEXT: bl __fixunstfsi ; CHECK-GI-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload ; CHECK-GI-NEXT: ldr q1, [sp, #64] // 16-byte Folded Reload ; CHECK-GI-NEXT: mov w20, w0 ; CHECK-GI-NEXT: bl __getf2 -; CHECK-GI-NEXT: ldp q1, q5, [sp, #48] // 32-byte Folded Reload +; CHECK-GI-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload ; CHECK-GI-NEXT: cmp w0, #0 -; CHECK-GI-NEXT: ldr q2, [sp, #16] // 16-byte Folded Reload -; CHECK-GI-NEXT: mov d0, v2.d[1] -; CHECK-GI-NEXT: fcsel d10, d2, d5, lt -; CHECK-GI-NEXT: fmov x8, d10 -; CHECK-GI-NEXT: fcsel d11, d0, d8, lt -; CHECK-GI-NEXT: mov v0.d[0], x8 -; CHECK-GI-NEXT: fmov x8, d11 -; CHECK-GI-NEXT: mov v0.d[1], x8 +; CHECK-GI-NEXT: ldr q1, [sp, #48] // 16-byte Folded Reload +; CHECK-GI-NEXT: fmov x8, d0 +; CHECK-GI-NEXT: csel x21, x8, xzr, lt +; CHECK-GI-NEXT: mov x8, v0.d[1] +; CHECK-GI-NEXT: mov v0.d[0], x21 +; CHECK-GI-NEXT: csel x23, x8, xzr, lt +; CHECK-GI-NEXT: mov v0.d[1], x23 ; CHECK-GI-NEXT: bl __gttf2 -; CHECK-GI-NEXT: ldr q0, [sp, #48] // 16-byte Folded Reload ; CHECK-GI-NEXT: cmp w0, #0 -; CHECK-GI-NEXT: fcsel d1, d11, d9, gt -; CHECK-GI-NEXT: fcsel d0, d10, d0, gt -; CHECK-GI-NEXT: fmov x8, d0 +; CHECK-GI-NEXT: csel x8, x21, xzr, gt ; CHECK-GI-NEXT: mov v0.d[0], x8 -; CHECK-GI-NEXT: fmov x8, d1 +; CHECK-GI-NEXT: csel x8, x23, x22, gt ; CHECK-GI-NEXT: mov v0.d[1], x8 ; CHECK-GI-NEXT: bl __fixunstfsi ; CHECK-GI-NEXT: ldr q0, [sp, #32] // 16-byte Folded Reload ; CHECK-GI-NEXT: ldr q1, [sp, #64] // 16-byte Folded Reload ; CHECK-GI-NEXT: mov w21, w0 ; CHECK-GI-NEXT: bl __getf2 -; CHECK-GI-NEXT: ldp q6, q1, [sp, #32] // 32-byte Folded Reload +; CHECK-GI-NEXT: ldp q0, q1, [sp, #32] // 32-byte Folded Reload ; CHECK-GI-NEXT: cmp w0, #0 -; CHECK-GI-NEXT: ldr q2, [sp, #64] // 16-byte Folded Reload -; CHECK-GI-NEXT: mov d0, v6.d[1] -; CHECK-GI-NEXT: fcsel d10, d6, d2, lt -; CHECK-GI-NEXT: fmov x8, d10 -; CHECK-GI-NEXT: fcsel d8, d0, d8, lt -; CHECK-GI-NEXT: mov v0.d[0], x8 -; CHECK-GI-NEXT: fmov x8, d8 -; CHECK-GI-NEXT: mov v0.d[1], x8 +; CHECK-GI-NEXT: fmov x8, d0 +; CHECK-GI-NEXT: csel x23, x8, xzr, lt +; CHECK-GI-NEXT: mov x8, v0.d[1] +; CHECK-GI-NEXT: mov v0.d[0], x23 +; CHECK-GI-NEXT: csel x24, x8, xzr, lt +; CHECK-GI-NEXT: mov v0.d[1], x24 ; CHECK-GI-NEXT: bl __gttf2 -; CHECK-GI-NEXT: ldr q0, [sp, #48] // 16-byte Folded Reload ; CHECK-GI-NEXT: cmp w0, #0 -; CHECK-GI-NEXT: fcsel d1, d8, d9, gt -; CHECK-GI-NEXT: fcsel d0, d10, d0, gt -; CHECK-GI-NEXT: fmov x8, d0 +; CHECK-GI-NEXT: csel x8, x23, xzr, gt ; CHECK-GI-NEXT: mov v0.d[0], x8 -; CHECK-GI-NEXT: fmov x8, d1 +; CHECK-GI-NEXT: csel x8, x24, x22, gt ; CHECK-GI-NEXT: mov v0.d[1], x8 ; CHECK-GI-NEXT: bl __fixunstfsi ; CHECK-GI-NEXT: mov v0.s[0], w19 -; CHECK-GI-NEXT: ldp d9, d8, [sp, #96] // 16-byte Folded Reload -; CHECK-GI-NEXT: ldp d11, d10, [sp, #80] // 16-byte Folded Reload +; CHECK-GI-NEXT: ldp x24, x23, [sp, #96] // 16-byte Folded Reload +; CHECK-GI-NEXT: ldr x30, [sp, #80] // 8-byte Folded Reload ; CHECK-GI-NEXT: mov v0.s[1], w20 ; CHECK-GI-NEXT: ldp x20, x19, [sp, #128] // 16-byte Folded Reload ; CHECK-GI-NEXT: mov v0.s[2], w21 -; CHECK-GI-NEXT: ldp x30, x21, [sp, #112] // 16-byte Folded Reload +; CHECK-GI-NEXT: ldp x22, x21, [sp, #112] // 16-byte Folded Reload ; CHECK-GI-NEXT: mov v0.s[3], w0 ; CHECK-GI-NEXT: add sp, sp, #144 ; CHECK-GI-NEXT: ret diff --git a/llvm/test/CodeGen/AArch64/load.ll b/llvm/test/CodeGen/AArch64/load.ll index 167e9d1c19643..70ab10e716875 100644 --- a/llvm/test/CodeGen/AArch64/load.ll +++ b/llvm/test/CodeGen/AArch64/load.ll @@ -157,10 +157,9 @@ define <2 x i16> @load_v2i16(ptr %ptr){ ; ; CHECK-GI-LABEL: load_v2i16: ; CHECK-GI: // %bb.0: -; CHECK-GI-NEXT: ldr h0, [x0] -; CHECK-GI-NEXT: add x8, x0, #2 -; CHECK-GI-NEXT: ld1 { v0.h }[1], [x8] -; CHECK-GI-NEXT: ushll v0.4s, v0.4h, #0 +; CHECK-GI-NEXT: ld1 { v0.h }[0], [x0] +; CHECK-GI-NEXT: ldr h1, [x0, #2] +; CHECK-GI-NEXT: mov v0.s[1], v1.s[0] ; CHECK-GI-NEXT: // kill: def $d0 killed $d0 killed $q0 ; CHECK-GI-NEXT: ret %a = load <2 x i16>, ptr %ptr diff --git a/llvm/test/CodeGen/AArch64/mul.ll b/llvm/test/CodeGen/AArch64/mul.ll index 5e7f71c18c27a..9ca975d9e742e 100644 --- a/llvm/test/CodeGen/AArch64/mul.ll +++ b/llvm/test/CodeGen/AArch64/mul.ll @@ -167,21 +167,23 @@ define void @v4i8(ptr %p1, ptr %p2) { ; CHECK-GI-NEXT: fmov s0, w8 ; CHECK-GI-NEXT: fmov s1, w9 ; CHECK-GI-NEXT: mov b2, v0.b[1] -; CHECK-GI-NEXT: mov v3.b[0], v0.b[0] -; CHECK-GI-NEXT: mov b4, v1.b[1] -; CHECK-GI-NEXT: mov v5.b[0], v1.b[0] -; CHECK-GI-NEXT: mov v3.b[1], v2.b[0] -; CHECK-GI-NEXT: mov b2, v0.b[2] -; CHECK-GI-NEXT: mov b0, v0.b[3] -; CHECK-GI-NEXT: mov v5.b[1], v4.b[0] -; CHECK-GI-NEXT: mov b4, v1.b[2] -; CHECK-GI-NEXT: mov b1, v1.b[3] -; CHECK-GI-NEXT: mov v3.b[2], v2.b[0] -; CHECK-GI-NEXT: mov v5.b[2], v4.b[0] -; CHECK-GI-NEXT: mov v3.b[3], v0.b[0] -; CHECK-GI-NEXT: mov v5.b[3], v1.b[0] -; CHECK-GI-NEXT: ushll v0.8h, v3.8b, #0 -; CHECK-GI-NEXT: ushll v1.8h, v5.8b, #0 +; CHECK-GI-NEXT: mov b3, v1.b[1] +; CHECK-GI-NEXT: mov b4, v0.b[2] +; CHECK-GI-NEXT: mov b5, v0.b[3] +; CHECK-GI-NEXT: fmov w8, s2 +; CHECK-GI-NEXT: mov b2, v1.b[2] +; CHECK-GI-NEXT: fmov w9, s3 +; CHECK-GI-NEXT: mov b3, v1.b[3] +; CHECK-GI-NEXT: mov v0.h[1], w8 +; CHECK-GI-NEXT: mov v1.h[1], w9 +; CHECK-GI-NEXT: fmov w8, s4 +; CHECK-GI-NEXT: fmov w9, s2 +; CHECK-GI-NEXT: mov v0.h[2], w8 +; CHECK-GI-NEXT: mov v1.h[2], w9 +; CHECK-GI-NEXT: fmov w8, s5 +; CHECK-GI-NEXT: fmov w9, s3 +; CHECK-GI-NEXT: mov v0.h[3], w8 +; CHECK-GI-NEXT: mov v1.h[3], w9 ; CHECK-GI-NEXT: mul v0.4h, v0.4h, v1.4h ; CHECK-GI-NEXT: uzp1 v0.8b, v0.8b, v0.8b ; CHECK-GI-NEXT: fmov w8, s0 @@ -250,14 +252,12 @@ define void @v2i16(ptr %p1, ptr %p2) { ; ; CHECK-GI-LABEL: v2i16: ; CHECK-GI: // %bb.0: // %entry -; CHECK-GI-NEXT: ldr h0, [x0] -; CHECK-GI-NEXT: ldr h1, [x1] -; CHECK-GI-NEXT: add x8, x0, #2 -; CHECK-GI-NEXT: add x9, x1, #2 -; CHECK-GI-NEXT: ld1 { v0.h }[1], [x8] -; CHECK-GI-NEXT: ld1 { v1.h }[1], [x9] -; CHECK-GI-NEXT: ushll v0.4s, v0.4h, #0 -; CHECK-GI-NEXT: ushll v1.4s, v1.4h, #0 +; CHECK-GI-NEXT: ld1 { v0.h }[0], [x0] +; CHECK-GI-NEXT: ld1 { v1.h }[0], [x1] +; CHECK-GI-NEXT: ldr h2, [x0, #2] +; CHECK-GI-NEXT: ldr h3, [x1, #2] +; CHECK-GI-NEXT: mov v0.s[1], v2.s[0] +; CHECK-GI-NEXT: mov v1.s[1], v3.s[0] ; CHECK-GI-NEXT: mul v0.2s, v0.2s, v1.2s ; CHECK-GI-NEXT: mov s1, v0.s[1] ; CHECK-GI-NEXT: str h0, [x0] diff --git a/llvm/test/CodeGen/AArch64/neon-bitwise-instructions.ll b/llvm/test/CodeGen/AArch64/neon-bitwise-instructions.ll index dbb4270fb8002..f6dbf5251fc27 100644 --- a/llvm/test/CodeGen/AArch64/neon-bitwise-instructions.ll +++ b/llvm/test/CodeGen/AArch64/neon-bitwise-instructions.ll @@ -1120,10 +1120,9 @@ define <4 x i16> @vselect_constant_cond_zero_v4i16(<4 x i16> %a) { ; CHECK-GI-NEXT: mov w8, #1 // =0x1 ; CHECK-GI-NEXT: mov w9, #0 // =0x0 ; CHECK-GI-NEXT: fmov s1, w8 -; CHECK-GI-NEXT: mov v1.b[1], w9 -; CHECK-GI-NEXT: mov v1.b[2], w9 -; CHECK-GI-NEXT: mov v1.b[3], w8 -; CHECK-GI-NEXT: ushll v1.8h, v1.8b, #0 +; CHECK-GI-NEXT: mov v1.h[1], w9 +; CHECK-GI-NEXT: mov v1.h[2], w9 +; CHECK-GI-NEXT: mov v1.h[3], w8 ; CHECK-GI-NEXT: shl v1.4h, v1.4h, #15 ; CHECK-GI-NEXT: sshr v1.4h, v1.4h, #15 ; CHECK-GI-NEXT: and v0.8b, v0.8b, v1.8b @@ -1144,13 +1143,10 @@ define <4 x i32> @vselect_constant_cond_zero_v4i32(<4 x i32> %a) { ; CHECK-GI: // %bb.0: ; CHECK-GI-NEXT: mov w8, #1 // =0x1 ; CHECK-GI-NEXT: mov w9, #0 // =0x0 -; CHECK-GI-NEXT: fmov s1, w8 -; CHECK-GI-NEXT: fmov s2, w9 -; CHECK-GI-NEXT: mov v2.h[1], w8 -; CHECK-GI-NEXT: mov v1.h[1], w9 -; CHECK-GI-NEXT: ushll v1.4s, v1.4h, #0 -; CHECK-GI-NEXT: ushll v2.4s, v2.4h, #0 -; CHECK-GI-NEXT: mov v1.d[1], v2.d[0] +; CHECK-GI-NEXT: mov v1.s[0], w8 +; CHECK-GI-NEXT: mov v1.s[1], w9 +; CHECK-GI-NEXT: mov v1.s[2], w9 +; CHECK-GI-NEXT: mov v1.s[3], w8 ; CHECK-GI-NEXT: shl v1.4s, v1.4s, #31 ; CHECK-GI-NEXT: sshr v1.4s, v1.4s, #31 ; CHECK-GI-NEXT: and v0.16b, v0.16b, v1.16b @@ -1196,10 +1192,9 @@ define <4 x i16> @vselect_constant_cond_v4i16(<4 x i16> %a, <4 x i16> %b) { ; CHECK-GI-NEXT: mov w8, #1 // =0x1 ; CHECK-GI-NEXT: mov w9, #0 // =0x0 ; CHECK-GI-NEXT: fmov s2, w8 -; CHECK-GI-NEXT: mov v2.b[1], w9 -; CHECK-GI-NEXT: mov v2.b[2], w9 -; CHECK-GI-NEXT: mov v2.b[3], w8 -; CHECK-GI-NEXT: ushll v2.8h, v2.8b, #0 +; CHECK-GI-NEXT: mov v2.h[1], w9 +; CHECK-GI-NEXT: mov v2.h[2], w9 +; CHECK-GI-NEXT: mov v2.h[3], w8 ; CHECK-GI-NEXT: shl v2.4h, v2.4h, #15 ; CHECK-GI-NEXT: sshr v2.4h, v2.4h, #15 ; CHECK-GI-NEXT: bif v0.8b, v1.8b, v2.8b @@ -1220,13 +1215,10 @@ define <4 x i32> @vselect_constant_cond_v4i32(<4 x i32> %a, <4 x i32> %b) { ; CHECK-GI: // %bb.0: ; CHECK-GI-NEXT: mov w8, #1 // =0x1 ; CHECK-GI-NEXT: mov w9, #0 // =0x0 -; CHECK-GI-NEXT: fmov s2, w8 -; CHECK-GI-NEXT: fmov s3, w9 -; CHECK-GI-NEXT: mov v3.h[1], w8 -; CHECK-GI-NEXT: mov v2.h[1], w9 -; CHECK-GI-NEXT: ushll v2.4s, v2.4h, #0 -; CHECK-GI-NEXT: ushll v3.4s, v3.4h, #0 -; CHECK-GI-NEXT: mov v2.d[1], v3.d[0] +; CHECK-GI-NEXT: mov v2.s[0], w8 +; CHECK-GI-NEXT: mov v2.s[1], w9 +; CHECK-GI-NEXT: mov v2.s[2], w9 +; CHECK-GI-NEXT: mov v2.s[3], w8 ; CHECK-GI-NEXT: shl v2.4s, v2.4s, #31 ; CHECK-GI-NEXT: sshr v2.4s, v2.4s, #31 ; CHECK-GI-NEXT: bif v0.16b, v1.16b, v2.16b diff --git a/llvm/test/CodeGen/AArch64/neon-compare-instructions.ll b/llvm/test/CodeGen/AArch64/neon-compare-instructions.ll index adc89f7a0d99d..8f7d5dd5588b9 100644 --- a/llvm/test/CodeGen/AArch64/neon-compare-instructions.ll +++ b/llvm/test/CodeGen/AArch64/neon-compare-instructions.ll @@ -2672,14 +2672,9 @@ define <4 x i32> @fcmal4xfloat(<4 x float> %A, <4 x float> %B) { ; CHECK-GI-LABEL: fcmal4xfloat: ; CHECK-GI: // %bb.0: ; CHECK-GI-NEXT: mov w8, #1 // =0x1 -; CHECK-GI-NEXT: fmov s0, w8 -; CHECK-GI-NEXT: mov v1.16b, v0.16b -; CHECK-GI-NEXT: mov v0.h[1], w8 -; CHECK-GI-NEXT: mov v1.h[1], w8 -; CHECK-GI-NEXT: ushll v0.4s, v0.4h, #0 -; CHECK-GI-NEXT: ushll v1.4s, v1.4h, #0 -; CHECK-GI-NEXT: mov v1.d[1], v0.d[0] -; CHECK-GI-NEXT: shl v0.4s, v1.4s, #31 +; CHECK-GI-NEXT: dup v0.2s, w8 +; CHECK-GI-NEXT: mov v0.d[1], v0.d[0] +; CHECK-GI-NEXT: shl v0.4s, v0.4s, #31 ; CHECK-GI-NEXT: sshr v0.4s, v0.4s, #31 ; CHECK-GI-NEXT: ret %tmp3 = fcmp true <4 x float> %A, %B @@ -2723,14 +2718,10 @@ define <4 x i32> @fcmnv4xfloat(<4 x float> %A, <4 x float> %B) { ; CHECK-GI-LABEL: fcmnv4xfloat: ; CHECK-GI: // %bb.0: ; CHECK-GI-NEXT: mov w8, #0 // =0x0 -; CHECK-GI-NEXT: fmov s0, w8 -; CHECK-GI-NEXT: mov v1.16b, v0.16b -; CHECK-GI-NEXT: mov v0.h[1], w8 -; CHECK-GI-NEXT: mov v1.h[1], w8 -; CHECK-GI-NEXT: ushll v0.4s, v0.4h, #0 -; CHECK-GI-NEXT: ushll v1.4s, v1.4h, #0 -; CHECK-GI-NEXT: mov v1.d[1], v0.d[0] -; CHECK-GI-NEXT: shl v0.4s, v1.4s, #31 +; CHECK-GI-NEXT: mov v0.s[0], w8 +; CHECK-GI-NEXT: mov v0.s[1], w8 +; CHECK-GI-NEXT: mov v0.d[1], v0.d[0] +; CHECK-GI-NEXT: shl v0.4s, v0.4s, #31 ; CHECK-GI-NEXT: sshr v0.4s, v0.4s, #31 ; CHECK-GI-NEXT: ret %tmp3 = fcmp false <4 x float> %A, %B diff --git a/llvm/test/CodeGen/AArch64/setcc_knownbits.ll b/llvm/test/CodeGen/AArch64/setcc_knownbits.ll index 4dd2a896cd81c..5cb933148a1a4 100644 --- a/llvm/test/CodeGen/AArch64/setcc_knownbits.ll +++ b/llvm/test/CodeGen/AArch64/setcc_knownbits.ll @@ -57,18 +57,12 @@ land.end: ; preds = %land.rhs, %entry declare i64 @llvm.ctlz.i64(i64 %in, i1) define i1 @lshr_ctlz_undef_cmpeq_one_i64(i64 %in) { -; CHECK-SD-LABEL: lshr_ctlz_undef_cmpeq_one_i64: -; CHECK-SD: // %bb.0: -; CHECK-SD-NEXT: clz x8, x0 -; CHECK-SD-NEXT: lsr x0, x8, #6 -; CHECK-SD-NEXT: // kill: def $w0 killed $w0 killed $x0 -; CHECK-SD-NEXT: ret -; -; CHECK-GI-LABEL: lshr_ctlz_undef_cmpeq_one_i64: -; CHECK-GI: // %bb.0: -; CHECK-GI-NEXT: clz x8, x0 -; CHECK-GI-NEXT: lsr w0, w8, #6 -; CHECK-GI-NEXT: ret +; CHECK-LABEL: lshr_ctlz_undef_cmpeq_one_i64: +; CHECK: // %bb.0: +; CHECK-NEXT: clz x8, x0 +; CHECK-NEXT: lsr x0, x8, #6 +; CHECK-NEXT: // kill: def $w0 killed $w0 killed $x0 +; CHECK-NEXT: ret %ctlz = call i64 @llvm.ctlz.i64(i64 %in, i1 -1) %lshr = lshr i64 %ctlz, 6 %icmp = icmp eq i64 %lshr, 1 diff --git a/llvm/test/CodeGen/AArch64/sext.ll b/llvm/test/CodeGen/AArch64/sext.ll index 0f256c1f18f58..853ed92c91fbc 100644 --- a/llvm/test/CodeGen/AArch64/sext.ll +++ b/llvm/test/CodeGen/AArch64/sext.ll @@ -1198,58 +1198,50 @@ define <16 x i64> @sext_v16i10_v16i64(<16 x i10> %a) { ; ; CHECK-GI-LABEL: sext_v16i10_v16i64: ; CHECK-GI: // %bb.0: // %entry -; CHECK-GI-NEXT: ldr w8, [sp] -; CHECK-GI-NEXT: ldr w10, [sp, #32] -; CHECK-GI-NEXT: fmov s0, w0 -; CHECK-GI-NEXT: fmov s1, w4 -; CHECK-GI-NEXT: ldr w9, [sp, #8] -; CHECK-GI-NEXT: ldr w11, [sp, #40] -; CHECK-GI-NEXT: fmov s2, w8 -; CHECK-GI-NEXT: fmov s3, w10 -; CHECK-GI-NEXT: ldr w8, [sp, #16] -; CHECK-GI-NEXT: mov v0.h[1], w1 -; CHECK-GI-NEXT: mov v1.h[1], w5 -; CHECK-GI-NEXT: mov v2.h[1], w9 -; CHECK-GI-NEXT: mov v3.h[1], w11 -; CHECK-GI-NEXT: ldr w9, [sp, #48] -; CHECK-GI-NEXT: mov v0.h[2], w2 -; CHECK-GI-NEXT: mov v1.h[2], w6 -; CHECK-GI-NEXT: mov v2.h[2], w8 -; CHECK-GI-NEXT: mov v3.h[2], w9 -; CHECK-GI-NEXT: ldr w8, [sp, #24] -; CHECK-GI-NEXT: ldr w9, [sp, #56] -; CHECK-GI-NEXT: mov v0.h[3], w3 -; CHECK-GI-NEXT: mov v1.h[3], w7 -; CHECK-GI-NEXT: mov v2.h[3], w8 -; CHECK-GI-NEXT: mov v3.h[3], w9 -; CHECK-GI-NEXT: ushll v0.4s, v0.4h, #0 -; CHECK-GI-NEXT: ushll v1.4s, v1.4h, #0 -; CHECK-GI-NEXT: ushll v2.4s, v2.4h, #0 -; CHECK-GI-NEXT: ushll v3.4s, v3.4h, #0 -; CHECK-GI-NEXT: ushll v4.2d, v0.2s, #0 -; CHECK-GI-NEXT: ushll2 v0.2d, v0.4s, #0 -; CHECK-GI-NEXT: ushll v5.2d, v1.2s, #0 -; CHECK-GI-NEXT: ushll2 v1.2d, v1.4s, #0 -; CHECK-GI-NEXT: ushll v6.2d, v2.2s, #0 -; CHECK-GI-NEXT: ushll2 v2.2d, v2.4s, #0 -; CHECK-GI-NEXT: ushll v7.2d, v3.2s, #0 -; CHECK-GI-NEXT: ushll2 v3.2d, v3.4s, #0 -; CHECK-GI-NEXT: shl v4.2d, v4.2d, #54 -; CHECK-GI-NEXT: shl v16.2d, v0.2d, #54 +; CHECK-GI-NEXT: mov v1.s[0], w0 +; CHECK-GI-NEXT: mov v2.s[0], w2 +; CHECK-GI-NEXT: ldr s0, [sp] +; CHECK-GI-NEXT: mov v3.s[0], w4 +; CHECK-GI-NEXT: mov v4.s[0], w6 +; CHECK-GI-NEXT: ldr s5, [sp, #8] +; CHECK-GI-NEXT: ldr s6, [sp, #16] +; CHECK-GI-NEXT: ldr s7, [sp, #24] +; CHECK-GI-NEXT: ldr s16, [sp, #32] +; CHECK-GI-NEXT: ldr s17, [sp, #40] +; CHECK-GI-NEXT: ldr s18, [sp, #48] +; CHECK-GI-NEXT: ldr s19, [sp, #56] +; CHECK-GI-NEXT: mov v1.s[1], w1 +; CHECK-GI-NEXT: mov v0.s[1], v5.s[0] +; CHECK-GI-NEXT: mov v2.s[1], w3 +; CHECK-GI-NEXT: mov v3.s[1], w5 +; CHECK-GI-NEXT: mov v4.s[1], w7 +; CHECK-GI-NEXT: mov v6.s[1], v7.s[0] +; CHECK-GI-NEXT: mov v16.s[1], v17.s[0] +; CHECK-GI-NEXT: mov v18.s[1], v19.s[0] +; CHECK-GI-NEXT: ushll v0.2d, v0.2s, #0 +; CHECK-GI-NEXT: ushll v1.2d, v1.2s, #0 +; CHECK-GI-NEXT: ushll v2.2d, v2.2s, #0 +; CHECK-GI-NEXT: ushll v3.2d, v3.2s, #0 +; CHECK-GI-NEXT: ushll v4.2d, v4.2s, #0 +; CHECK-GI-NEXT: ushll v5.2d, v6.2s, #0 +; CHECK-GI-NEXT: ushll v6.2d, v16.2s, #0 +; CHECK-GI-NEXT: ushll v7.2d, v18.2s, #0 +; CHECK-GI-NEXT: shl v0.2d, v0.2d, #54 +; CHECK-GI-NEXT: shl v1.2d, v1.2d, #54 +; CHECK-GI-NEXT: shl v2.2d, v2.2d, #54 +; CHECK-GI-NEXT: shl v3.2d, v3.2d, #54 +; CHECK-GI-NEXT: shl v16.2d, v4.2d, #54 ; CHECK-GI-NEXT: shl v5.2d, v5.2d, #54 -; CHECK-GI-NEXT: shl v17.2d, v1.2d, #54 ; CHECK-GI-NEXT: shl v6.2d, v6.2d, #54 -; CHECK-GI-NEXT: shl v18.2d, v2.2d, #54 ; CHECK-GI-NEXT: shl v7.2d, v7.2d, #54 -; CHECK-GI-NEXT: shl v19.2d, v3.2d, #54 -; CHECK-GI-NEXT: sshr v0.2d, v4.2d, #54 -; CHECK-GI-NEXT: sshr v1.2d, v16.2d, #54 -; CHECK-GI-NEXT: sshr v2.2d, v5.2d, #54 -; CHECK-GI-NEXT: sshr v3.2d, v17.2d, #54 -; CHECK-GI-NEXT: sshr v4.2d, v6.2d, #54 -; CHECK-GI-NEXT: sshr v5.2d, v18.2d, #54 -; CHECK-GI-NEXT: sshr v6.2d, v7.2d, #54 -; CHECK-GI-NEXT: sshr v7.2d, v19.2d, #54 +; CHECK-GI-NEXT: sshr v4.2d, v0.2d, #54 +; CHECK-GI-NEXT: sshr v0.2d, v1.2d, #54 +; CHECK-GI-NEXT: sshr v1.2d, v2.2d, #54 +; CHECK-GI-NEXT: sshr v2.2d, v3.2d, #54 +; CHECK-GI-NEXT: sshr v3.2d, v16.2d, #54 +; CHECK-GI-NEXT: sshr v5.2d, v5.2d, #54 +; CHECK-GI-NEXT: sshr v6.2d, v6.2d, #54 +; CHECK-GI-NEXT: sshr v7.2d, v7.2d, #54 ; CHECK-GI-NEXT: ret entry: %c = sext <16 x i10> %a to <16 x i64> diff --git a/llvm/test/CodeGen/AArch64/shift-logic.ll b/llvm/test/CodeGen/AArch64/shift-logic.ll index b1ad31d1475ce..6978e624bbed7 100644 --- a/llvm/test/CodeGen/AArch64/shift-logic.ll +++ b/llvm/test/CodeGen/AArch64/shift-logic.ll @@ -280,7 +280,8 @@ define void @apint_type_mismatch(i16 %a, ptr %p) { ; CHECK-GISEL: // %bb.0: // %entry ; CHECK-GISEL-NEXT: ubfx w8, w0, #3, #13 ; CHECK-GISEL-NEXT: and w8, w8, #0xff -; CHECK-GISEL-NEXT: lsl w8, w8, #3 +; CHECK-GISEL-NEXT: and x8, x8, #0xffff +; CHECK-GISEL-NEXT: lsl x8, x8, #3 ; CHECK-GISEL-NEXT: str w8, [x1] ; CHECK-GISEL-NEXT: ret entry: diff --git a/llvm/test/CodeGen/AArch64/sub.ll b/llvm/test/CodeGen/AArch64/sub.ll index c298e6d8a1ff2..8f35a69f52b85 100644 --- a/llvm/test/CodeGen/AArch64/sub.ll +++ b/llvm/test/CodeGen/AArch64/sub.ll @@ -155,21 +155,23 @@ define void @v4i8(ptr %p1, ptr %p2) { ; CHECK-GI-NEXT: fmov s0, w8 ; CHECK-GI-NEXT: fmov s1, w9 ; CHECK-GI-NEXT: mov b2, v0.b[1] -; CHECK-GI-NEXT: mov v3.b[0], v0.b[0] -; CHECK-GI-NEXT: mov b4, v1.b[1] -; CHECK-GI-NEXT: mov v5.b[0], v1.b[0] -; CHECK-GI-NEXT: mov v3.b[1], v2.b[0] -; CHECK-GI-NEXT: mov b2, v0.b[2] -; CHECK-GI-NEXT: mov b0, v0.b[3] -; CHECK-GI-NEXT: mov v5.b[1], v4.b[0] -; CHECK-GI-NEXT: mov b4, v1.b[2] -; CHECK-GI-NEXT: mov b1, v1.b[3] -; CHECK-GI-NEXT: mov v3.b[2], v2.b[0] -; CHECK-GI-NEXT: mov v5.b[2], v4.b[0] -; CHECK-GI-NEXT: mov v3.b[3], v0.b[0] -; CHECK-GI-NEXT: mov v5.b[3], v1.b[0] -; CHECK-GI-NEXT: ushll v0.8h, v3.8b, #0 -; CHECK-GI-NEXT: ushll v1.8h, v5.8b, #0 +; CHECK-GI-NEXT: mov b3, v1.b[1] +; CHECK-GI-NEXT: mov b4, v0.b[2] +; CHECK-GI-NEXT: mov b5, v0.b[3] +; CHECK-GI-NEXT: fmov w8, s2 +; CHECK-GI-NEXT: mov b2, v1.b[2] +; CHECK-GI-NEXT: fmov w9, s3 +; CHECK-GI-NEXT: mov b3, v1.b[3] +; CHECK-GI-NEXT: mov v0.h[1], w8 +; CHECK-GI-NEXT: mov v1.h[1], w9 +; CHECK-GI-NEXT: fmov w8, s4 +; CHECK-GI-NEXT: fmov w9, s2 +; CHECK-GI-NEXT: mov v0.h[2], w8 +; CHECK-GI-NEXT: mov v1.h[2], w9 +; CHECK-GI-NEXT: fmov w8, s5 +; CHECK-GI-NEXT: fmov w9, s3 +; CHECK-GI-NEXT: mov v0.h[3], w8 +; CHECK-GI-NEXT: mov v1.h[3], w9 ; CHECK-GI-NEXT: sub v0.4h, v0.4h, v1.4h ; CHECK-GI-NEXT: uzp1 v0.8b, v0.8b, v0.8b ; CHECK-GI-NEXT: fmov w8, s0 @@ -238,14 +240,12 @@ define void @v2i16(ptr %p1, ptr %p2) { ; ; CHECK-GI-LABEL: v2i16: ; CHECK-GI: // %bb.0: // %entry -; CHECK-GI-NEXT: ldr h0, [x0] -; CHECK-GI-NEXT: ldr h1, [x1] -; CHECK-GI-NEXT: add x8, x0, #2 -; CHECK-GI-NEXT: add x9, x1, #2 -; CHECK-GI-NEXT: ld1 { v0.h }[1], [x8] -; CHECK-GI-NEXT: ld1 { v1.h }[1], [x9] -; CHECK-GI-NEXT: ushll v0.4s, v0.4h, #0 -; CHECK-GI-NEXT: ushll v1.4s, v1.4h, #0 +; CHECK-GI-NEXT: ld1 { v0.h }[0], [x0] +; CHECK-GI-NEXT: ld1 { v1.h }[0], [x1] +; CHECK-GI-NEXT: ldr h2, [x0, #2] +; CHECK-GI-NEXT: ldr h3, [x1, #2] +; CHECK-GI-NEXT: mov v0.s[1], v2.s[0] +; CHECK-GI-NEXT: mov v1.s[1], v3.s[0] ; CHECK-GI-NEXT: sub v0.2s, v0.2s, v1.2s ; CHECK-GI-NEXT: mov s1, v0.s[1] ; CHECK-GI-NEXT: str h0, [x0] diff --git a/llvm/test/CodeGen/AArch64/xtn.ll b/llvm/test/CodeGen/AArch64/xtn.ll index fb3f8ebd7d141..8a4d6b8c7b789 100644 --- a/llvm/test/CodeGen/AArch64/xtn.ll +++ b/llvm/test/CodeGen/AArch64/xtn.ll @@ -174,9 +174,8 @@ define <2 x i16> @xtn_v2i128_v2i16(<2 x i128> %a) { ; ; CHECK-GI-LABEL: xtn_v2i128_v2i16: ; CHECK-GI: // %bb.0: // %entry -; CHECK-GI-NEXT: fmov s0, w0 -; CHECK-GI-NEXT: mov v0.h[1], w2 -; CHECK-GI-NEXT: ushll v0.4s, v0.4h, #0 +; CHECK-GI-NEXT: mov v0.s[0], w0 +; CHECK-GI-NEXT: mov v0.s[1], w2 ; CHECK-GI-NEXT: // kill: def $d0 killed $d0 killed $q0 ; CHECK-GI-NEXT: ret entry: diff --git a/llvm/test/CodeGen/AArch64/zext.ll b/llvm/test/CodeGen/AArch64/zext.ll index 7e95b6684e821..0d5010113ce0b 100644 --- a/llvm/test/CodeGen/AArch64/zext.ll +++ b/llvm/test/CodeGen/AArch64/zext.ll @@ -1169,52 +1169,44 @@ define <16 x i64> @zext_v16i10_v16i64(<16 x i10> %a) { ; ; CHECK-GI-LABEL: zext_v16i10_v16i64: ; CHECK-GI: // %bb.0: // %entry -; CHECK-GI-NEXT: ldr w8, [sp] -; CHECK-GI-NEXT: ldr w10, [sp, #32] -; CHECK-GI-NEXT: fmov s0, w0 -; CHECK-GI-NEXT: fmov s1, w4 -; CHECK-GI-NEXT: ldr w9, [sp, #8] -; CHECK-GI-NEXT: ldr w11, [sp, #40] -; CHECK-GI-NEXT: fmov s2, w8 -; CHECK-GI-NEXT: fmov s3, w10 -; CHECK-GI-NEXT: ldr w8, [sp, #16] -; CHECK-GI-NEXT: mov v0.h[1], w1 -; CHECK-GI-NEXT: mov v1.h[1], w5 -; CHECK-GI-NEXT: mov v2.h[1], w9 -; CHECK-GI-NEXT: mov v3.h[1], w11 -; CHECK-GI-NEXT: ldr w9, [sp, #48] -; CHECK-GI-NEXT: mov v0.h[2], w2 -; CHECK-GI-NEXT: mov v1.h[2], w6 -; CHECK-GI-NEXT: mov v2.h[2], w8 -; CHECK-GI-NEXT: mov v3.h[2], w9 -; CHECK-GI-NEXT: ldr w8, [sp, #24] -; CHECK-GI-NEXT: ldr w9, [sp, #56] -; CHECK-GI-NEXT: mov v0.h[3], w3 -; CHECK-GI-NEXT: mov v1.h[3], w7 -; CHECK-GI-NEXT: mov v2.h[3], w8 -; CHECK-GI-NEXT: mov v3.h[3], w9 +; CHECK-GI-NEXT: mov v0.s[0], w0 +; CHECK-GI-NEXT: mov v1.s[0], w2 +; CHECK-GI-NEXT: ldr s3, [sp] +; CHECK-GI-NEXT: mov v2.s[0], w4 +; CHECK-GI-NEXT: mov v5.s[0], w6 +; CHECK-GI-NEXT: ldr s4, [sp, #8] +; CHECK-GI-NEXT: ldr s6, [sp, #16] +; CHECK-GI-NEXT: ldr s7, [sp, #24] +; CHECK-GI-NEXT: ldr s16, [sp, #32] +; CHECK-GI-NEXT: ldr s17, [sp, #40] +; CHECK-GI-NEXT: ldr s18, [sp, #48] +; CHECK-GI-NEXT: ldr s19, [sp, #56] +; CHECK-GI-NEXT: mov v0.s[1], w1 +; CHECK-GI-NEXT: mov v1.s[1], w3 +; CHECK-GI-NEXT: mov v3.s[1], v4.s[0] +; CHECK-GI-NEXT: mov v2.s[1], w5 +; CHECK-GI-NEXT: mov v5.s[1], w7 +; CHECK-GI-NEXT: mov v6.s[1], v7.s[0] +; CHECK-GI-NEXT: mov v16.s[1], v17.s[0] +; CHECK-GI-NEXT: mov v18.s[1], v19.s[0] ; CHECK-GI-NEXT: adrp x8, .LCPI54_0 ; CHECK-GI-NEXT: ldr q7, [x8, :lo12:.LCPI54_0] -; CHECK-GI-NEXT: ushll v0.4s, v0.4h, #0 -; CHECK-GI-NEXT: ushll v1.4s, v1.4h, #0 -; CHECK-GI-NEXT: ushll v2.4s, v2.4h, #0 -; CHECK-GI-NEXT: ushll v3.4s, v3.4h, #0 -; CHECK-GI-NEXT: ushll v4.2d, v0.2s, #0 -; CHECK-GI-NEXT: ushll2 v5.2d, v0.4s, #0 -; CHECK-GI-NEXT: ushll v6.2d, v1.2s, #0 -; CHECK-GI-NEXT: ushll2 v16.2d, v1.4s, #0 -; CHECK-GI-NEXT: ushll v17.2d, v2.2s, #0 -; CHECK-GI-NEXT: ushll2 v18.2d, v2.4s, #0 -; CHECK-GI-NEXT: ushll v19.2d, v3.2s, #0 -; CHECK-GI-NEXT: ushll2 v20.2d, v3.4s, #0 -; CHECK-GI-NEXT: and v0.16b, v4.16b, v7.16b -; CHECK-GI-NEXT: and v1.16b, v5.16b, v7.16b -; CHECK-GI-NEXT: and v2.16b, v6.16b, v7.16b -; CHECK-GI-NEXT: and v3.16b, v16.16b, v7.16b -; CHECK-GI-NEXT: and v4.16b, v17.16b, v7.16b -; CHECK-GI-NEXT: and v5.16b, v18.16b, v7.16b -; CHECK-GI-NEXT: and v6.16b, v19.16b, v7.16b -; CHECK-GI-NEXT: and v7.16b, v20.16b, v7.16b +; CHECK-GI-NEXT: ushll v0.2d, v0.2s, #0 +; CHECK-GI-NEXT: ushll v1.2d, v1.2s, #0 +; CHECK-GI-NEXT: ushll v2.2d, v2.2s, #0 +; CHECK-GI-NEXT: ushll v4.2d, v5.2s, #0 +; CHECK-GI-NEXT: ushll v5.2d, v3.2s, #0 +; CHECK-GI-NEXT: ushll v6.2d, v6.2s, #0 +; CHECK-GI-NEXT: ushll v16.2d, v16.2s, #0 +; CHECK-GI-NEXT: ushll v17.2d, v18.2s, #0 +; CHECK-GI-NEXT: and v0.16b, v0.16b, v7.16b +; CHECK-GI-NEXT: and v1.16b, v1.16b, v7.16b +; CHECK-GI-NEXT: and v2.16b, v2.16b, v7.16b +; CHECK-GI-NEXT: and v3.16b, v4.16b, v7.16b +; CHECK-GI-NEXT: and v4.16b, v5.16b, v7.16b +; CHECK-GI-NEXT: and v5.16b, v6.16b, v7.16b +; CHECK-GI-NEXT: and v6.16b, v16.16b, v7.16b +; CHECK-GI-NEXT: and v7.16b, v17.16b, v7.16b ; CHECK-GI-NEXT: ret entry: %c = zext <16 x i10> %a to <16 x i64> diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-trunc-shift.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-trunc-shift.mir index df7fc56799137..db40334e4d191 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-trunc-shift.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-trunc-shift.mir @@ -14,9 +14,9 @@ body: | ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY]](s64) - ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[TRUNC]], [[C]](s32) - ; CHECK-NEXT: $vgpr0 = COPY [[SHL]](s32) + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[COPY]], [[C]](s32) + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[SHL]](s64) + ; CHECK-NEXT: $vgpr0 = COPY [[TRUNC]](s32) %0:_(s64) = COPY $vgpr0_vgpr1 %1:_(s32) = G_CONSTANT i32 1 %2:_(s64) = G_SHL %0:_, %1 @@ -82,9 +82,10 @@ body: | ; CHECK: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK-NEXT: %src:_(s64) = G_ZEXT [[COPY]](s32) ; CHECK-NEXT: %amt:_(s32) = G_CONSTANT i32 16 - ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY]], %amt(s32) - ; CHECK-NEXT: %trunc:_(s16) = G_TRUNC [[LSHR]](s32) + ; CHECK-NEXT: %shift:_(s64) = G_LSHR %src, %amt(s32) + ; CHECK-NEXT: %trunc:_(s16) = G_TRUNC %shift(s64) ; CHECK-NEXT: %foo:_(s16) = G_CONSTANT i16 55 ; CHECK-NEXT: %keep:_(s32) = G_MERGE_VALUES %trunc(s16), %foo(s16) ; CHECK-NEXT: $vgpr0 = COPY %keep(s32) @@ -108,9 +109,10 @@ body: | ; CHECK: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK-NEXT: %src:_(s64) = G_ZEXT [[COPY]](s32) ; CHECK-NEXT: %amt:_(s32) = G_CONSTANT i32 16 - ; CHECK-NEXT: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[COPY]], %amt(s32) - ; CHECK-NEXT: %trunc:_(s16) = G_TRUNC [[ASHR]](s32) + ; CHECK-NEXT: %shift:_(s64) = G_ASHR %src, %amt(s32) + ; CHECK-NEXT: %trunc:_(s16) = G_TRUNC %shift(s64) ; CHECK-NEXT: %foo:_(s16) = G_CONSTANT i16 55 ; CHECK-NEXT: %keep:_(s32) = G_MERGE_VALUES %trunc(s16), %foo(s16) ; CHECK-NEXT: $vgpr0 = COPY %keep(s32) @@ -161,9 +163,10 @@ body: | ; CHECK: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK-NEXT: %src:_(s64) = G_ZEXT [[COPY]](s32) ; CHECK-NEXT: %amt:_(s32) = G_CONSTANT i32 6 - ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY]], %amt(s32) - ; CHECK-NEXT: %trunc:_(s26) = G_TRUNC [[LSHR]](s32) + ; CHECK-NEXT: %shift:_(s64) = G_LSHR %src, %amt(s32) + ; CHECK-NEXT: %trunc:_(s26) = G_TRUNC %shift(s64) ; CHECK-NEXT: %foo:_(s26) = G_CONSTANT i26 55 ; CHECK-NEXT: %keep0:_(s26) = G_ADD %trunc, %foo ; CHECK-NEXT: %keep1:_(s32) = G_ANYEXT %keep0(s26) diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/sext_inreg.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/sext_inreg.ll index bac80f0777c02..3b1fc900484a8 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/sext_inreg.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/sext_inreg.ll @@ -47,6 +47,7 @@ define amdgpu_ps i8 @s_sext_inreg_i8(i8 inreg %value) { ; ; GFX8-LABEL: s_sext_inreg_i8: ; GFX8: ; %bb.0: +; GFX8-NEXT: s_and_b32 s0, s0, 0xff ; GFX8-NEXT: s_lshl_b32 s0, s0, 3 ; GFX8-NEXT: s_sext_i32_i8 s0, s0 ; GFX8-NEXT: s_ashr_i32 s0, s0, 3 @@ -54,6 +55,7 @@ define amdgpu_ps i8 @s_sext_inreg_i8(i8 inreg %value) { ; ; GFX9-LABEL: s_sext_inreg_i8: ; GFX9: ; %bb.0: +; GFX9-NEXT: s_and_b32 s0, s0, 0xff ; GFX9-NEXT: s_lshl_b32 s0, s0, 3 ; GFX9-NEXT: s_sext_i32_i8 s0, s0 ; GFX9-NEXT: s_ashr_i32 s0, s0, 3 @@ -61,6 +63,7 @@ define amdgpu_ps i8 @s_sext_inreg_i8(i8 inreg %value) { ; ; GFX10PLUS-LABEL: s_sext_inreg_i8: ; GFX10PLUS: ; %bb.0: +; GFX10PLUS-NEXT: s_and_b32 s0, s0, 0xff ; GFX10PLUS-NEXT: s_lshl_b32 s0, s0, 3 ; GFX10PLUS-NEXT: s_sext_i32_i8 s0, s0 ; GFX10PLUS-NEXT: s_ashr_i32 s0, s0, 3 @@ -78,6 +81,7 @@ define amdgpu_ps i8 @s_sext_inreg_i8_6(i8 inreg %value) { ; ; GFX8-LABEL: s_sext_inreg_i8_6: ; GFX8: ; %bb.0: +; GFX8-NEXT: s_and_b32 s0, s0, 0xff ; GFX8-NEXT: s_lshl_b32 s0, s0, 6 ; GFX8-NEXT: s_sext_i32_i8 s0, s0 ; GFX8-NEXT: s_ashr_i32 s0, s0, 6 @@ -85,6 +89,7 @@ define amdgpu_ps i8 @s_sext_inreg_i8_6(i8 inreg %value) { ; ; GFX9-LABEL: s_sext_inreg_i8_6: ; GFX9: ; %bb.0: +; GFX9-NEXT: s_and_b32 s0, s0, 0xff ; GFX9-NEXT: s_lshl_b32 s0, s0, 6 ; GFX9-NEXT: s_sext_i32_i8 s0, s0 ; GFX9-NEXT: s_ashr_i32 s0, s0, 6 @@ -92,6 +97,7 @@ define amdgpu_ps i8 @s_sext_inreg_i8_6(i8 inreg %value) { ; ; GFX10PLUS-LABEL: s_sext_inreg_i8_6: ; GFX10PLUS: ; %bb.0: +; GFX10PLUS-NEXT: s_and_b32 s0, s0, 0xff ; GFX10PLUS-NEXT: s_lshl_b32 s0, s0, 6 ; GFX10PLUS-NEXT: s_sext_i32_i8 s0, s0 ; GFX10PLUS-NEXT: s_ashr_i32 s0, s0, 6 @@ -578,6 +584,7 @@ define amdgpu_ps i16 @s_sext_inreg_i16_9(i16 inreg %value) { ; ; GFX8-LABEL: s_sext_inreg_i16_9: ; GFX8: ; %bb.0: +; GFX8-NEXT: s_and_b32 s0, s0, 0xffff ; GFX8-NEXT: s_lshl_b32 s0, s0, 9 ; GFX8-NEXT: s_sext_i32_i16 s0, s0 ; GFX8-NEXT: s_ashr_i32 s0, s0, 9 @@ -585,6 +592,7 @@ define amdgpu_ps i16 @s_sext_inreg_i16_9(i16 inreg %value) { ; ; GFX9-LABEL: s_sext_inreg_i16_9: ; GFX9: ; %bb.0: +; GFX9-NEXT: s_and_b32 s0, s0, 0xffff ; GFX9-NEXT: s_lshl_b32 s0, s0, 9 ; GFX9-NEXT: s_sext_i32_i16 s0, s0 ; GFX9-NEXT: s_ashr_i32 s0, s0, 9 @@ -592,6 +600,7 @@ define amdgpu_ps i16 @s_sext_inreg_i16_9(i16 inreg %value) { ; ; GFX10PLUS-LABEL: s_sext_inreg_i16_9: ; GFX10PLUS: ; %bb.0: +; GFX10PLUS-NEXT: s_and_b32 s0, s0, 0xffff ; GFX10PLUS-NEXT: s_lshl_b32 s0, s0, 9 ; GFX10PLUS-NEXT: s_sext_i32_i16 s0, s0 ; GFX10PLUS-NEXT: s_ashr_i32 s0, s0, 9 @@ -609,6 +618,7 @@ define amdgpu_ps i16 @s_sext_inreg_i16_15(i16 inreg %value) { ; ; GFX8-LABEL: s_sext_inreg_i16_15: ; GFX8: ; %bb.0: +; GFX8-NEXT: s_and_b32 s0, s0, 0xffff ; GFX8-NEXT: s_lshl_b32 s0, s0, 15 ; GFX8-NEXT: s_sext_i32_i16 s0, s0 ; GFX8-NEXT: s_ashr_i32 s0, s0, 15 @@ -616,6 +626,7 @@ define amdgpu_ps i16 @s_sext_inreg_i16_15(i16 inreg %value) { ; ; GFX9-LABEL: s_sext_inreg_i16_15: ; GFX9: ; %bb.0: +; GFX9-NEXT: s_and_b32 s0, s0, 0xffff ; GFX9-NEXT: s_lshl_b32 s0, s0, 15 ; GFX9-NEXT: s_sext_i32_i16 s0, s0 ; GFX9-NEXT: s_ashr_i32 s0, s0, 15 @@ -623,6 +634,7 @@ define amdgpu_ps i16 @s_sext_inreg_i16_15(i16 inreg %value) { ; ; GFX10PLUS-LABEL: s_sext_inreg_i16_15: ; GFX10PLUS: ; %bb.0: +; GFX10PLUS-NEXT: s_and_b32 s0, s0, 0xffff ; GFX10PLUS-NEXT: s_lshl_b32 s0, s0, 15 ; GFX10PLUS-NEXT: s_sext_i32_i16 s0, s0 ; GFX10PLUS-NEXT: s_ashr_i32 s0, s0, 15 @@ -720,6 +732,7 @@ define amdgpu_ps i32 @s_sext_inreg_v2i16_11(<2 x i16> inreg %value) { ; GFX8-LABEL: s_sext_inreg_v2i16_11: ; GFX8: ; %bb.0: ; GFX8-NEXT: s_lshr_b32 s1, s0, 16 +; GFX8-NEXT: s_and_b32 s0, s0, 0xffff ; GFX8-NEXT: s_lshl_b32 s0, s0, 11 ; GFX8-NEXT: s_lshl_b32 s1, s1, 11 ; GFX8-NEXT: s_sext_i32_i16 s0, s0 @@ -854,7 +867,9 @@ define amdgpu_ps <2 x i32> @s_sext_inreg_v4i16_14(<4 x i16> inreg %value) { ; GFX8-LABEL: s_sext_inreg_v4i16_14: ; GFX8: ; %bb.0: ; GFX8-NEXT: s_lshr_b32 s2, s0, 16 +; GFX8-NEXT: s_and_b32 s0, s0, 0xffff ; GFX8-NEXT: s_lshr_b32 s3, s1, 16 +; GFX8-NEXT: s_and_b32 s1, s1, 0xffff ; GFX8-NEXT: s_lshl_b32 s0, s0, 14 ; GFX8-NEXT: s_lshl_b32 s2, s2, 14 ; GFX8-NEXT: s_lshl_b32 s1, s1, 14 @@ -1068,15 +1083,19 @@ define amdgpu_ps <4 x i32> @s_sext_inreg_v8i16_5(<8 x i16> inreg %value) { ; GFX8-LABEL: s_sext_inreg_v8i16_5: ; GFX8: ; %bb.0: ; GFX8-NEXT: s_lshr_b32 s4, s0, 16 +; GFX8-NEXT: s_and_b32 s0, s0, 0xffff ; GFX8-NEXT: s_lshr_b32 s5, s1, 16 +; GFX8-NEXT: s_and_b32 s1, s1, 0xffff ; GFX8-NEXT: s_lshl_b32 s0, s0, 5 ; GFX8-NEXT: s_lshl_b32 s4, s4, 5 ; GFX8-NEXT: s_lshr_b32 s6, s2, 16 +; GFX8-NEXT: s_and_b32 s2, s2, 0xffff ; GFX8-NEXT: s_lshl_b32 s1, s1, 5 ; GFX8-NEXT: s_lshl_b32 s5, s5, 5 ; GFX8-NEXT: s_sext_i32_i16 s0, s0 ; GFX8-NEXT: s_sext_i32_i16 s4, s4 ; GFX8-NEXT: s_lshr_b32 s7, s3, 16 +; GFX8-NEXT: s_and_b32 s3, s3, 0xffff ; GFX8-NEXT: s_lshl_b32 s2, s2, 5 ; GFX8-NEXT: s_lshl_b32 s6, s6, 5 ; GFX8-NEXT: s_sext_i32_i16 s1, s1 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/shl.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/shl.ll index 4cf1c92539c36..a89e6bf6b20f6 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/shl.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/shl.ll @@ -91,13 +91,26 @@ define amdgpu_ps i8 @s_shl_i8(i8 inreg %value, i8 inreg %amount) { } define amdgpu_ps i8 @s_shl_i8_7(i8 inreg %value) { -; GCN-LABEL: s_shl_i8_7: -; GCN: ; %bb.0: -; GCN-NEXT: s_lshl_b32 s0, s0, 7 -; GCN-NEXT: ; return to shader part epilog +; GFX6-LABEL: s_shl_i8_7: +; GFX6: ; %bb.0: +; GFX6-NEXT: s_lshl_b32 s0, s0, 7 +; GFX6-NEXT: ; return to shader part epilog +; +; GFX8-LABEL: s_shl_i8_7: +; GFX8: ; %bb.0: +; GFX8-NEXT: s_and_b32 s0, s0, 0xff +; GFX8-NEXT: s_lshl_b32 s0, s0, 7 +; GFX8-NEXT: ; return to shader part epilog +; +; GFX9-LABEL: s_shl_i8_7: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_and_b32 s0, s0, 0xff +; GFX9-NEXT: s_lshl_b32 s0, s0, 7 +; GFX9-NEXT: ; return to shader part epilog ; ; GFX10PLUS-LABEL: s_shl_i8_7: ; GFX10PLUS: ; %bb.0: +; GFX10PLUS-NEXT: s_and_b32 s0, s0, 0xff ; GFX10PLUS-NEXT: s_lshl_b32 s0, s0, 7 ; GFX10PLUS-NEXT: ; return to shader part epilog %result = shl i8 %value, 7 @@ -647,13 +660,26 @@ define amdgpu_ps i16 @s_shl_i16(i16 inreg %value, i16 inreg %amount) { } define amdgpu_ps i16 @s_shl_i16_15(i16 inreg %value) { -; GCN-LABEL: s_shl_i16_15: -; GCN: ; %bb.0: -; GCN-NEXT: s_lshl_b32 s0, s0, 15 -; GCN-NEXT: ; return to shader part epilog +; GFX6-LABEL: s_shl_i16_15: +; GFX6: ; %bb.0: +; GFX6-NEXT: s_lshl_b32 s0, s0, 15 +; GFX6-NEXT: ; return to shader part epilog +; +; GFX8-LABEL: s_shl_i16_15: +; GFX8: ; %bb.0: +; GFX8-NEXT: s_and_b32 s0, s0, 0xffff +; GFX8-NEXT: s_lshl_b32 s0, s0, 15 +; GFX8-NEXT: ; return to shader part epilog +; +; GFX9-LABEL: s_shl_i16_15: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_and_b32 s0, s0, 0xffff +; GFX9-NEXT: s_lshl_b32 s0, s0, 15 +; GFX9-NEXT: ; return to shader part epilog ; ; GFX10PLUS-LABEL: s_shl_i16_15: ; GFX10PLUS: ; %bb.0: +; GFX10PLUS-NEXT: s_and_b32 s0, s0, 0xffff ; GFX10PLUS-NEXT: s_lshl_b32 s0, s0, 15 ; GFX10PLUS-NEXT: ; return to shader part epilog %result = shl i16 %value, 15 From 0c79139efe721a88ca048bf478842d601bb5057e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Thorsten=20Sch=C3=BCtt?= Date: Tue, 15 Oct 2024 18:03:00 +0200 Subject: [PATCH 2/9] fix riscv test --- llvm/test/CodeGen/RISCV/GlobalISel/shift.ll | 78 ++++++++++++++++++--- 1 file changed, 69 insertions(+), 9 deletions(-) diff --git a/llvm/test/CodeGen/RISCV/GlobalISel/shift.ll b/llvm/test/CodeGen/RISCV/GlobalISel/shift.ll index 3e090507ad642..2a950710ed2ca 100644 --- a/llvm/test/CodeGen/RISCV/GlobalISel/shift.ll +++ b/llvm/test/CodeGen/RISCV/GlobalISel/shift.ll @@ -5,12 +5,20 @@ define i16 @test_lshr_i48(i48 %x) { ; RV32-LABEL: test_lshr_i48: ; RV32: # %bb.0: +; RV32-NEXT: lui a2, 16 +; RV32-NEXT: addi a2, a2, -1 +; RV32-NEXT: and a1, a1, a2 ; RV32-NEXT: srli a0, a0, 16 +; RV32-NEXT: slli a1, a1, 16 +; RV32-NEXT: or a0, a0, a1 ; RV32-NEXT: ret ; ; RV64-LABEL: test_lshr_i48: ; RV64: # %bb.0: -; RV64-NEXT: srliw a0, a0, 16 +; RV64-NEXT: li a1, -1 +; RV64-NEXT: srli a1, a1, 16 +; RV64-NEXT: and a0, a0, a1 +; RV64-NEXT: srli a0, a0, 16 ; RV64-NEXT: ret %lshr = lshr i48 %x, 16 %trunc = trunc i48 %lshr to i16 @@ -20,12 +28,18 @@ define i16 @test_lshr_i48(i48 %x) { define i16 @test_ashr_i48(i48 %x) { ; RV32-LABEL: test_ashr_i48: ; RV32: # %bb.0: -; RV32-NEXT: srai a0, a0, 16 +; RV32-NEXT: slli a1, a1, 16 +; RV32-NEXT: srai a1, a1, 16 +; RV32-NEXT: srli a0, a0, 16 +; RV32-NEXT: slli a1, a1, 16 +; RV32-NEXT: or a0, a0, a1 ; RV32-NEXT: ret ; ; RV64-LABEL: test_ashr_i48: ; RV64: # %bb.0: -; RV64-NEXT: sraiw a0, a0, 16 +; RV64-NEXT: slli a0, a0, 16 +; RV64-NEXT: srai a0, a0, 16 +; RV64-NEXT: srai a0, a0, 16 ; RV64-NEXT: ret %ashr = ashr i48 %x, 16 %trunc = trunc i48 %ashr to i16 @@ -40,7 +54,7 @@ define i16 @test_shl_i48(i48 %x) { ; ; RV64-LABEL: test_shl_i48: ; RV64: # %bb.0: -; RV64-NEXT: slliw a0, a0, 8 +; RV64-NEXT: slli a0, a0, 8 ; RV64-NEXT: ret %shl = shl i48 %x, 8 %trunc = trunc i48 %shl to i16 @@ -51,13 +65,34 @@ define i16 @test_lshr_i48_2(i48 %x, i48 %y) { ; RV32-LABEL: test_lshr_i48_2: ; RV32: # %bb.0: ; RV32-NEXT: andi a2, a2, 15 -; RV32-NEXT: srl a0, a0, a2 +; RV32-NEXT: lui a3, 16 +; RV32-NEXT: addi a3, a3, -1 +; RV32-NEXT: li a4, 32 +; RV32-NEXT: and a1, a1, a3 +; RV32-NEXT: bltu a2, a4, .LBB3_2 +; RV32-NEXT: # %bb.1: +; RV32-NEXT: addi a3, a2, -32 +; RV32-NEXT: srl a1, a1, a3 +; RV32-NEXT: bnez a2, .LBB3_3 +; RV32-NEXT: j .LBB3_4 +; RV32-NEXT: .LBB3_2: +; RV32-NEXT: srl a3, a0, a2 +; RV32-NEXT: neg a4, a2 +; RV32-NEXT: sll a1, a1, a4 +; RV32-NEXT: or a1, a3, a1 +; RV32-NEXT: beqz a2, .LBB3_4 +; RV32-NEXT: .LBB3_3: +; RV32-NEXT: mv a0, a1 +; RV32-NEXT: .LBB3_4: ; RV32-NEXT: ret ; ; RV64-LABEL: test_lshr_i48_2: ; RV64: # %bb.0: ; RV64-NEXT: andi a1, a1, 15 -; RV64-NEXT: srlw a0, a0, a1 +; RV64-NEXT: li a2, -1 +; RV64-NEXT: srli a2, a2, 16 +; RV64-NEXT: and a0, a0, a2 +; RV64-NEXT: srl a0, a0, a1 ; RV64-NEXT: ret %and = and i48 %y, 15 %lshr = lshr i48 %x, %and @@ -69,13 +104,32 @@ define i16 @test_ashr_i48_2(i48 %x, i48 %y) { ; RV32-LABEL: test_ashr_i48_2: ; RV32: # %bb.0: ; RV32-NEXT: andi a2, a2, 15 -; RV32-NEXT: sra a0, a0, a2 +; RV32-NEXT: slli a1, a1, 16 +; RV32-NEXT: li a3, 32 +; RV32-NEXT: srai a1, a1, 16 +; RV32-NEXT: bltu a2, a3, .LBB4_2 +; RV32-NEXT: # %bb.1: +; RV32-NEXT: addi a3, a2, -32 +; RV32-NEXT: sra a1, a1, a3 +; RV32-NEXT: bnez a2, .LBB4_3 +; RV32-NEXT: j .LBB4_4 +; RV32-NEXT: .LBB4_2: +; RV32-NEXT: srl a3, a0, a2 +; RV32-NEXT: neg a4, a2 +; RV32-NEXT: sll a1, a1, a4 +; RV32-NEXT: or a1, a3, a1 +; RV32-NEXT: beqz a2, .LBB4_4 +; RV32-NEXT: .LBB4_3: +; RV32-NEXT: mv a0, a1 +; RV32-NEXT: .LBB4_4: ; RV32-NEXT: ret ; ; RV64-LABEL: test_ashr_i48_2: ; RV64: # %bb.0: ; RV64-NEXT: andi a1, a1, 15 -; RV64-NEXT: sraw a0, a0, a1 +; RV64-NEXT: slli a0, a0, 16 +; RV64-NEXT: srai a0, a0, 16 +; RV64-NEXT: sra a0, a0, a1 ; RV64-NEXT: ret %and = and i48 %y, 15 %ashr = ashr i48 %x, %and @@ -87,13 +141,19 @@ define i16 @test_shl_i48_2(i48 %x, i48 %y) { ; RV32-LABEL: test_shl_i48_2: ; RV32: # %bb.0: ; RV32-NEXT: andi a2, a2, 15 +; RV32-NEXT: li a1, 32 +; RV32-NEXT: bltu a2, a1, .LBB5_2 +; RV32-NEXT: # %bb.1: +; RV32-NEXT: li a0, 0 +; RV32-NEXT: ret +; RV32-NEXT: .LBB5_2: ; RV32-NEXT: sll a0, a0, a2 ; RV32-NEXT: ret ; ; RV64-LABEL: test_shl_i48_2: ; RV64: # %bb.0: ; RV64-NEXT: andi a1, a1, 15 -; RV64-NEXT: sllw a0, a0, a1 +; RV64-NEXT: sll a0, a0, a1 ; RV64-NEXT: ret %and = and i48 %y, 15 %shl = shl i48 %x, %and From d13650d7555115528abc3d2ebad9f4455caca777 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Thorsten=20Sch=C3=BCtt?= Date: Tue, 15 Oct 2024 19:36:59 +0200 Subject: [PATCH 3/9] address review comments re-add lost combine --- .../llvm/CodeGen/GlobalISel/CombinerHelper.h | 2 +- .../include/llvm/Target/GlobalISel/Combine.td | 2 +- .../lib/CodeGen/GlobalISel/CombinerHelper.cpp | 12 +-- .../combine-shift-immed-mismatch-crash.mir | 10 +-- .../AArch64/GlobalISel/combine-trunc.mir | 25 ++++-- llvm/test/CodeGen/AArch64/setcc_knownbits.ll | 18 +++-- llvm/test/CodeGen/AArch64/shift-logic.ll | 3 +- .../AMDGPU/GlobalISel/combine-trunc-shift.mir | 21 +++-- .../CodeGen/AMDGPU/GlobalISel/sext_inreg.ll | 19 ----- llvm/test/CodeGen/AMDGPU/GlobalISel/shl.ll | 42 ++-------- llvm/test/CodeGen/RISCV/GlobalISel/shift.ll | 78 +++---------------- 11 files changed, 67 insertions(+), 165 deletions(-) diff --git a/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h b/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h index ecca7396b9019..9240a3c3127eb 100644 --- a/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h +++ b/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h @@ -918,7 +918,7 @@ class CombinerHelper { bool matchCanonicalizeICmp(const MachineInstr &MI, BuildFnTy &MatchInfo); bool matchCanonicalizeFCmp(const MachineInstr &MI, BuildFnTy &MatchInfo); - // unmerge_values anyext build vector + // unmerge_values(anyext(build vector)) -> build vector(anyext) bool matchUnmergeValuesAnyExtBuildVector(const MachineInstr &MI, BuildFnTy &MatchInfo); diff --git a/llvm/include/llvm/Target/GlobalISel/Combine.td b/llvm/include/llvm/Target/GlobalISel/Combine.td index 8a98303f4437e..ead4149fc1106 100644 --- a/llvm/include/llvm/Target/GlobalISel/Combine.td +++ b/llvm/include/llvm/Target/GlobalISel/Combine.td @@ -1926,7 +1926,7 @@ def all_combines : GICombineGroup<[integer_reassoc_combines, trivial_combines, reassocs, ptr_add_immed_chain, cmp_combines, shl_ashr_to_sext_inreg, sext_inreg_of_load, width_reduction_combines, select_combines, - known_bits_simplifications, + known_bits_simplifications, trunc_shift, not_cmp_fold, opt_brcond_by_inverting_cond, const_combines, xor_of_and_with_same_reg, ptr_add_with_zero, shift_immed_chain, shift_of_shifted_logic_chain, load_or_combine, diff --git a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp index 9b63de2713ade..165a4b59af16f 100644 --- a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp +++ b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp @@ -7648,21 +7648,21 @@ bool CombinerHelper::matchUnmergeValuesAnyExtBuildVector(const MachineInstr &MI, {TargetOpcode::G_BUILD_VECTOR, {SmallBvTy, SmallBvElemenTy}})) return false; - // check scalar anyext + // We check the legality of scalar anyext. if (!isLegalOrBeforeLegalizer( {TargetOpcode::G_ANYEXT, {SmallBvElemenTy, BigBvTy.getElementType()}})) return false; MatchInfo = [=](MachineIRBuilder &B) { - // build into each G_UNMERGE_VALUES def - // a small build vector with anyext from the source build vector + // Build into each G_UNMERGE_VALUES def + // a small build vector with anyext from the source build vector. for (unsigned I = 0; I < Unmerge->getNumDefs(); ++I) { SmallVector Ops; for (unsigned J = 0; J < SmallBvTy.getNumElements(); ++J) { - auto AnyExt = B.buildAnyExt( - SmallBvElemenTy, - BV->getSourceReg(I * SmallBvTy.getNumElements() + J)); + Register SourceArray = + BV->getSourceReg(I * SmallBvTy.getNumElements() + J); + auto AnyExt = B.buildAnyExt(SmallBvElemenTy, SourceArray); Ops.push_back(AnyExt.getReg(0)); } B.buildBuildVector(Unmerge->getOperand(I).getReg(), Ops); diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/combine-shift-immed-mismatch-crash.mir b/llvm/test/CodeGen/AArch64/GlobalISel/combine-shift-immed-mismatch-crash.mir index 0f6dd23b5bb5e..16a8f80897846 100644 --- a/llvm/test/CodeGen/AArch64/GlobalISel/combine-shift-immed-mismatch-crash.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/combine-shift-immed-mismatch-crash.mir @@ -26,13 +26,9 @@ body: | ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = nsw G_SHL [[LOAD]], [[C1]](s32) ; CHECK-NEXT: [[MUL:%[0-9]+]]:_(s32) = nsw G_MUL [[SHL]], [[C]] - ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = nsw G_SHL [[MUL]], [[C2]](s32) - ; CHECK-NEXT: [[SEXT:%[0-9]+]]:_(s64) = G_SEXT [[SHL1]](s32) - ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; CHECK-NEXT: [[SHL2:%[0-9]+]]:_(s64) = G_SHL [[SEXT]], [[C3]](s64) - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[SHL2]](s64) - ; CHECK-NEXT: $w0 = COPY [[TRUNC]](s32) + ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 + ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[MUL]], [[C2]](s64) + ; CHECK-NEXT: $w0 = COPY [[SHL1]](s32) ; CHECK-NEXT: RET_ReallyLR implicit $w0 bb.1: liveins: $x0 diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/combine-trunc.mir b/llvm/test/CodeGen/AArch64/GlobalISel/combine-trunc.mir index 82d0dd7b37cc4..9a2b9dd4b2b60 100644 --- a/llvm/test/CodeGen/AArch64/GlobalISel/combine-trunc.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/combine-trunc.mir @@ -141,14 +141,23 @@ legalized: true body: | bb.1: liveins: $w0 - ; CHECK-LABEL: name: test_combine_trunc_shl_s32_by_2 - ; CHECK: liveins: $w0 - ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $w0 - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 - ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY]], [[C]](s32) - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[SHL]](s32) - ; CHECK-NEXT: $h0 = COPY [[TRUNC]](s16) + ; CHECK-PRE-LABEL: name: test_combine_trunc_shl_s32_by_2 + ; CHECK-PRE: liveins: $w0 + ; CHECK-PRE-NEXT: {{ $}} + ; CHECK-PRE-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $w0 + ; CHECK-PRE-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 + ; CHECK-PRE-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) + ; CHECK-PRE-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[TRUNC]], [[C]](s32) + ; CHECK-PRE-NEXT: $h0 = COPY [[SHL]](s16) + ; + ; CHECK-POST-LABEL: name: test_combine_trunc_shl_s32_by_2 + ; CHECK-POST: liveins: $w0 + ; CHECK-POST-NEXT: {{ $}} + ; CHECK-POST-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $w0 + ; CHECK-POST-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 + ; CHECK-POST-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY]], [[C]](s32) + ; CHECK-POST-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[SHL]](s32) + ; CHECK-POST-NEXT: $h0 = COPY [[TRUNC]](s16) %0:_(s32) = COPY $w0 %1:_(s32) = G_CONSTANT i32 2 %2:_(s32) = G_SHL %0(s32), %1(s32) diff --git a/llvm/test/CodeGen/AArch64/setcc_knownbits.ll b/llvm/test/CodeGen/AArch64/setcc_knownbits.ll index 5cb933148a1a4..4dd2a896cd81c 100644 --- a/llvm/test/CodeGen/AArch64/setcc_knownbits.ll +++ b/llvm/test/CodeGen/AArch64/setcc_knownbits.ll @@ -57,12 +57,18 @@ land.end: ; preds = %land.rhs, %entry declare i64 @llvm.ctlz.i64(i64 %in, i1) define i1 @lshr_ctlz_undef_cmpeq_one_i64(i64 %in) { -; CHECK-LABEL: lshr_ctlz_undef_cmpeq_one_i64: -; CHECK: // %bb.0: -; CHECK-NEXT: clz x8, x0 -; CHECK-NEXT: lsr x0, x8, #6 -; CHECK-NEXT: // kill: def $w0 killed $w0 killed $x0 -; CHECK-NEXT: ret +; CHECK-SD-LABEL: lshr_ctlz_undef_cmpeq_one_i64: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: clz x8, x0 +; CHECK-SD-NEXT: lsr x0, x8, #6 +; CHECK-SD-NEXT: // kill: def $w0 killed $w0 killed $x0 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: lshr_ctlz_undef_cmpeq_one_i64: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: clz x8, x0 +; CHECK-GI-NEXT: lsr w0, w8, #6 +; CHECK-GI-NEXT: ret %ctlz = call i64 @llvm.ctlz.i64(i64 %in, i1 -1) %lshr = lshr i64 %ctlz, 6 %icmp = icmp eq i64 %lshr, 1 diff --git a/llvm/test/CodeGen/AArch64/shift-logic.ll b/llvm/test/CodeGen/AArch64/shift-logic.ll index 6978e624bbed7..b1ad31d1475ce 100644 --- a/llvm/test/CodeGen/AArch64/shift-logic.ll +++ b/llvm/test/CodeGen/AArch64/shift-logic.ll @@ -280,8 +280,7 @@ define void @apint_type_mismatch(i16 %a, ptr %p) { ; CHECK-GISEL: // %bb.0: // %entry ; CHECK-GISEL-NEXT: ubfx w8, w0, #3, #13 ; CHECK-GISEL-NEXT: and w8, w8, #0xff -; CHECK-GISEL-NEXT: and x8, x8, #0xffff -; CHECK-GISEL-NEXT: lsl x8, x8, #3 +; CHECK-GISEL-NEXT: lsl w8, w8, #3 ; CHECK-GISEL-NEXT: str w8, [x1] ; CHECK-GISEL-NEXT: ret entry: diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-trunc-shift.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-trunc-shift.mir index db40334e4d191..df7fc56799137 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-trunc-shift.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-trunc-shift.mir @@ -14,9 +14,9 @@ body: | ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[COPY]], [[C]](s32) - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[SHL]](s64) - ; CHECK-NEXT: $vgpr0 = COPY [[TRUNC]](s32) + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY]](s64) + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[TRUNC]], [[C]](s32) + ; CHECK-NEXT: $vgpr0 = COPY [[SHL]](s32) %0:_(s64) = COPY $vgpr0_vgpr1 %1:_(s32) = G_CONSTANT i32 1 %2:_(s64) = G_SHL %0:_, %1 @@ -82,10 +82,9 @@ body: | ; CHECK: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: %src:_(s64) = G_ZEXT [[COPY]](s32) ; CHECK-NEXT: %amt:_(s32) = G_CONSTANT i32 16 - ; CHECK-NEXT: %shift:_(s64) = G_LSHR %src, %amt(s32) - ; CHECK-NEXT: %trunc:_(s16) = G_TRUNC %shift(s64) + ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY]], %amt(s32) + ; CHECK-NEXT: %trunc:_(s16) = G_TRUNC [[LSHR]](s32) ; CHECK-NEXT: %foo:_(s16) = G_CONSTANT i16 55 ; CHECK-NEXT: %keep:_(s32) = G_MERGE_VALUES %trunc(s16), %foo(s16) ; CHECK-NEXT: $vgpr0 = COPY %keep(s32) @@ -109,10 +108,9 @@ body: | ; CHECK: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: %src:_(s64) = G_ZEXT [[COPY]](s32) ; CHECK-NEXT: %amt:_(s32) = G_CONSTANT i32 16 - ; CHECK-NEXT: %shift:_(s64) = G_ASHR %src, %amt(s32) - ; CHECK-NEXT: %trunc:_(s16) = G_TRUNC %shift(s64) + ; CHECK-NEXT: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[COPY]], %amt(s32) + ; CHECK-NEXT: %trunc:_(s16) = G_TRUNC [[ASHR]](s32) ; CHECK-NEXT: %foo:_(s16) = G_CONSTANT i16 55 ; CHECK-NEXT: %keep:_(s32) = G_MERGE_VALUES %trunc(s16), %foo(s16) ; CHECK-NEXT: $vgpr0 = COPY %keep(s32) @@ -163,10 +161,9 @@ body: | ; CHECK: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: %src:_(s64) = G_ZEXT [[COPY]](s32) ; CHECK-NEXT: %amt:_(s32) = G_CONSTANT i32 6 - ; CHECK-NEXT: %shift:_(s64) = G_LSHR %src, %amt(s32) - ; CHECK-NEXT: %trunc:_(s26) = G_TRUNC %shift(s64) + ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY]], %amt(s32) + ; CHECK-NEXT: %trunc:_(s26) = G_TRUNC [[LSHR]](s32) ; CHECK-NEXT: %foo:_(s26) = G_CONSTANT i26 55 ; CHECK-NEXT: %keep0:_(s26) = G_ADD %trunc, %foo ; CHECK-NEXT: %keep1:_(s32) = G_ANYEXT %keep0(s26) diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/sext_inreg.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/sext_inreg.ll index 3b1fc900484a8..bac80f0777c02 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/sext_inreg.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/sext_inreg.ll @@ -47,7 +47,6 @@ define amdgpu_ps i8 @s_sext_inreg_i8(i8 inreg %value) { ; ; GFX8-LABEL: s_sext_inreg_i8: ; GFX8: ; %bb.0: -; GFX8-NEXT: s_and_b32 s0, s0, 0xff ; GFX8-NEXT: s_lshl_b32 s0, s0, 3 ; GFX8-NEXT: s_sext_i32_i8 s0, s0 ; GFX8-NEXT: s_ashr_i32 s0, s0, 3 @@ -55,7 +54,6 @@ define amdgpu_ps i8 @s_sext_inreg_i8(i8 inreg %value) { ; ; GFX9-LABEL: s_sext_inreg_i8: ; GFX9: ; %bb.0: -; GFX9-NEXT: s_and_b32 s0, s0, 0xff ; GFX9-NEXT: s_lshl_b32 s0, s0, 3 ; GFX9-NEXT: s_sext_i32_i8 s0, s0 ; GFX9-NEXT: s_ashr_i32 s0, s0, 3 @@ -63,7 +61,6 @@ define amdgpu_ps i8 @s_sext_inreg_i8(i8 inreg %value) { ; ; GFX10PLUS-LABEL: s_sext_inreg_i8: ; GFX10PLUS: ; %bb.0: -; GFX10PLUS-NEXT: s_and_b32 s0, s0, 0xff ; GFX10PLUS-NEXT: s_lshl_b32 s0, s0, 3 ; GFX10PLUS-NEXT: s_sext_i32_i8 s0, s0 ; GFX10PLUS-NEXT: s_ashr_i32 s0, s0, 3 @@ -81,7 +78,6 @@ define amdgpu_ps i8 @s_sext_inreg_i8_6(i8 inreg %value) { ; ; GFX8-LABEL: s_sext_inreg_i8_6: ; GFX8: ; %bb.0: -; GFX8-NEXT: s_and_b32 s0, s0, 0xff ; GFX8-NEXT: s_lshl_b32 s0, s0, 6 ; GFX8-NEXT: s_sext_i32_i8 s0, s0 ; GFX8-NEXT: s_ashr_i32 s0, s0, 6 @@ -89,7 +85,6 @@ define amdgpu_ps i8 @s_sext_inreg_i8_6(i8 inreg %value) { ; ; GFX9-LABEL: s_sext_inreg_i8_6: ; GFX9: ; %bb.0: -; GFX9-NEXT: s_and_b32 s0, s0, 0xff ; GFX9-NEXT: s_lshl_b32 s0, s0, 6 ; GFX9-NEXT: s_sext_i32_i8 s0, s0 ; GFX9-NEXT: s_ashr_i32 s0, s0, 6 @@ -97,7 +92,6 @@ define amdgpu_ps i8 @s_sext_inreg_i8_6(i8 inreg %value) { ; ; GFX10PLUS-LABEL: s_sext_inreg_i8_6: ; GFX10PLUS: ; %bb.0: -; GFX10PLUS-NEXT: s_and_b32 s0, s0, 0xff ; GFX10PLUS-NEXT: s_lshl_b32 s0, s0, 6 ; GFX10PLUS-NEXT: s_sext_i32_i8 s0, s0 ; GFX10PLUS-NEXT: s_ashr_i32 s0, s0, 6 @@ -584,7 +578,6 @@ define amdgpu_ps i16 @s_sext_inreg_i16_9(i16 inreg %value) { ; ; GFX8-LABEL: s_sext_inreg_i16_9: ; GFX8: ; %bb.0: -; GFX8-NEXT: s_and_b32 s0, s0, 0xffff ; GFX8-NEXT: s_lshl_b32 s0, s0, 9 ; GFX8-NEXT: s_sext_i32_i16 s0, s0 ; GFX8-NEXT: s_ashr_i32 s0, s0, 9 @@ -592,7 +585,6 @@ define amdgpu_ps i16 @s_sext_inreg_i16_9(i16 inreg %value) { ; ; GFX9-LABEL: s_sext_inreg_i16_9: ; GFX9: ; %bb.0: -; GFX9-NEXT: s_and_b32 s0, s0, 0xffff ; GFX9-NEXT: s_lshl_b32 s0, s0, 9 ; GFX9-NEXT: s_sext_i32_i16 s0, s0 ; GFX9-NEXT: s_ashr_i32 s0, s0, 9 @@ -600,7 +592,6 @@ define amdgpu_ps i16 @s_sext_inreg_i16_9(i16 inreg %value) { ; ; GFX10PLUS-LABEL: s_sext_inreg_i16_9: ; GFX10PLUS: ; %bb.0: -; GFX10PLUS-NEXT: s_and_b32 s0, s0, 0xffff ; GFX10PLUS-NEXT: s_lshl_b32 s0, s0, 9 ; GFX10PLUS-NEXT: s_sext_i32_i16 s0, s0 ; GFX10PLUS-NEXT: s_ashr_i32 s0, s0, 9 @@ -618,7 +609,6 @@ define amdgpu_ps i16 @s_sext_inreg_i16_15(i16 inreg %value) { ; ; GFX8-LABEL: s_sext_inreg_i16_15: ; GFX8: ; %bb.0: -; GFX8-NEXT: s_and_b32 s0, s0, 0xffff ; GFX8-NEXT: s_lshl_b32 s0, s0, 15 ; GFX8-NEXT: s_sext_i32_i16 s0, s0 ; GFX8-NEXT: s_ashr_i32 s0, s0, 15 @@ -626,7 +616,6 @@ define amdgpu_ps i16 @s_sext_inreg_i16_15(i16 inreg %value) { ; ; GFX9-LABEL: s_sext_inreg_i16_15: ; GFX9: ; %bb.0: -; GFX9-NEXT: s_and_b32 s0, s0, 0xffff ; GFX9-NEXT: s_lshl_b32 s0, s0, 15 ; GFX9-NEXT: s_sext_i32_i16 s0, s0 ; GFX9-NEXT: s_ashr_i32 s0, s0, 15 @@ -634,7 +623,6 @@ define amdgpu_ps i16 @s_sext_inreg_i16_15(i16 inreg %value) { ; ; GFX10PLUS-LABEL: s_sext_inreg_i16_15: ; GFX10PLUS: ; %bb.0: -; GFX10PLUS-NEXT: s_and_b32 s0, s0, 0xffff ; GFX10PLUS-NEXT: s_lshl_b32 s0, s0, 15 ; GFX10PLUS-NEXT: s_sext_i32_i16 s0, s0 ; GFX10PLUS-NEXT: s_ashr_i32 s0, s0, 15 @@ -732,7 +720,6 @@ define amdgpu_ps i32 @s_sext_inreg_v2i16_11(<2 x i16> inreg %value) { ; GFX8-LABEL: s_sext_inreg_v2i16_11: ; GFX8: ; %bb.0: ; GFX8-NEXT: s_lshr_b32 s1, s0, 16 -; GFX8-NEXT: s_and_b32 s0, s0, 0xffff ; GFX8-NEXT: s_lshl_b32 s0, s0, 11 ; GFX8-NEXT: s_lshl_b32 s1, s1, 11 ; GFX8-NEXT: s_sext_i32_i16 s0, s0 @@ -867,9 +854,7 @@ define amdgpu_ps <2 x i32> @s_sext_inreg_v4i16_14(<4 x i16> inreg %value) { ; GFX8-LABEL: s_sext_inreg_v4i16_14: ; GFX8: ; %bb.0: ; GFX8-NEXT: s_lshr_b32 s2, s0, 16 -; GFX8-NEXT: s_and_b32 s0, s0, 0xffff ; GFX8-NEXT: s_lshr_b32 s3, s1, 16 -; GFX8-NEXT: s_and_b32 s1, s1, 0xffff ; GFX8-NEXT: s_lshl_b32 s0, s0, 14 ; GFX8-NEXT: s_lshl_b32 s2, s2, 14 ; GFX8-NEXT: s_lshl_b32 s1, s1, 14 @@ -1083,19 +1068,15 @@ define amdgpu_ps <4 x i32> @s_sext_inreg_v8i16_5(<8 x i16> inreg %value) { ; GFX8-LABEL: s_sext_inreg_v8i16_5: ; GFX8: ; %bb.0: ; GFX8-NEXT: s_lshr_b32 s4, s0, 16 -; GFX8-NEXT: s_and_b32 s0, s0, 0xffff ; GFX8-NEXT: s_lshr_b32 s5, s1, 16 -; GFX8-NEXT: s_and_b32 s1, s1, 0xffff ; GFX8-NEXT: s_lshl_b32 s0, s0, 5 ; GFX8-NEXT: s_lshl_b32 s4, s4, 5 ; GFX8-NEXT: s_lshr_b32 s6, s2, 16 -; GFX8-NEXT: s_and_b32 s2, s2, 0xffff ; GFX8-NEXT: s_lshl_b32 s1, s1, 5 ; GFX8-NEXT: s_lshl_b32 s5, s5, 5 ; GFX8-NEXT: s_sext_i32_i16 s0, s0 ; GFX8-NEXT: s_sext_i32_i16 s4, s4 ; GFX8-NEXT: s_lshr_b32 s7, s3, 16 -; GFX8-NEXT: s_and_b32 s3, s3, 0xffff ; GFX8-NEXT: s_lshl_b32 s2, s2, 5 ; GFX8-NEXT: s_lshl_b32 s6, s6, 5 ; GFX8-NEXT: s_sext_i32_i16 s1, s1 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/shl.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/shl.ll index a89e6bf6b20f6..4cf1c92539c36 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/shl.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/shl.ll @@ -91,26 +91,13 @@ define amdgpu_ps i8 @s_shl_i8(i8 inreg %value, i8 inreg %amount) { } define amdgpu_ps i8 @s_shl_i8_7(i8 inreg %value) { -; GFX6-LABEL: s_shl_i8_7: -; GFX6: ; %bb.0: -; GFX6-NEXT: s_lshl_b32 s0, s0, 7 -; GFX6-NEXT: ; return to shader part epilog -; -; GFX8-LABEL: s_shl_i8_7: -; GFX8: ; %bb.0: -; GFX8-NEXT: s_and_b32 s0, s0, 0xff -; GFX8-NEXT: s_lshl_b32 s0, s0, 7 -; GFX8-NEXT: ; return to shader part epilog -; -; GFX9-LABEL: s_shl_i8_7: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_and_b32 s0, s0, 0xff -; GFX9-NEXT: s_lshl_b32 s0, s0, 7 -; GFX9-NEXT: ; return to shader part epilog +; GCN-LABEL: s_shl_i8_7: +; GCN: ; %bb.0: +; GCN-NEXT: s_lshl_b32 s0, s0, 7 +; GCN-NEXT: ; return to shader part epilog ; ; GFX10PLUS-LABEL: s_shl_i8_7: ; GFX10PLUS: ; %bb.0: -; GFX10PLUS-NEXT: s_and_b32 s0, s0, 0xff ; GFX10PLUS-NEXT: s_lshl_b32 s0, s0, 7 ; GFX10PLUS-NEXT: ; return to shader part epilog %result = shl i8 %value, 7 @@ -660,26 +647,13 @@ define amdgpu_ps i16 @s_shl_i16(i16 inreg %value, i16 inreg %amount) { } define amdgpu_ps i16 @s_shl_i16_15(i16 inreg %value) { -; GFX6-LABEL: s_shl_i16_15: -; GFX6: ; %bb.0: -; GFX6-NEXT: s_lshl_b32 s0, s0, 15 -; GFX6-NEXT: ; return to shader part epilog -; -; GFX8-LABEL: s_shl_i16_15: -; GFX8: ; %bb.0: -; GFX8-NEXT: s_and_b32 s0, s0, 0xffff -; GFX8-NEXT: s_lshl_b32 s0, s0, 15 -; GFX8-NEXT: ; return to shader part epilog -; -; GFX9-LABEL: s_shl_i16_15: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_and_b32 s0, s0, 0xffff -; GFX9-NEXT: s_lshl_b32 s0, s0, 15 -; GFX9-NEXT: ; return to shader part epilog +; GCN-LABEL: s_shl_i16_15: +; GCN: ; %bb.0: +; GCN-NEXT: s_lshl_b32 s0, s0, 15 +; GCN-NEXT: ; return to shader part epilog ; ; GFX10PLUS-LABEL: s_shl_i16_15: ; GFX10PLUS: ; %bb.0: -; GFX10PLUS-NEXT: s_and_b32 s0, s0, 0xffff ; GFX10PLUS-NEXT: s_lshl_b32 s0, s0, 15 ; GFX10PLUS-NEXT: ; return to shader part epilog %result = shl i16 %value, 15 diff --git a/llvm/test/CodeGen/RISCV/GlobalISel/shift.ll b/llvm/test/CodeGen/RISCV/GlobalISel/shift.ll index 2a950710ed2ca..3e090507ad642 100644 --- a/llvm/test/CodeGen/RISCV/GlobalISel/shift.ll +++ b/llvm/test/CodeGen/RISCV/GlobalISel/shift.ll @@ -5,20 +5,12 @@ define i16 @test_lshr_i48(i48 %x) { ; RV32-LABEL: test_lshr_i48: ; RV32: # %bb.0: -; RV32-NEXT: lui a2, 16 -; RV32-NEXT: addi a2, a2, -1 -; RV32-NEXT: and a1, a1, a2 ; RV32-NEXT: srli a0, a0, 16 -; RV32-NEXT: slli a1, a1, 16 -; RV32-NEXT: or a0, a0, a1 ; RV32-NEXT: ret ; ; RV64-LABEL: test_lshr_i48: ; RV64: # %bb.0: -; RV64-NEXT: li a1, -1 -; RV64-NEXT: srli a1, a1, 16 -; RV64-NEXT: and a0, a0, a1 -; RV64-NEXT: srli a0, a0, 16 +; RV64-NEXT: srliw a0, a0, 16 ; RV64-NEXT: ret %lshr = lshr i48 %x, 16 %trunc = trunc i48 %lshr to i16 @@ -28,18 +20,12 @@ define i16 @test_lshr_i48(i48 %x) { define i16 @test_ashr_i48(i48 %x) { ; RV32-LABEL: test_ashr_i48: ; RV32: # %bb.0: -; RV32-NEXT: slli a1, a1, 16 -; RV32-NEXT: srai a1, a1, 16 -; RV32-NEXT: srli a0, a0, 16 -; RV32-NEXT: slli a1, a1, 16 -; RV32-NEXT: or a0, a0, a1 +; RV32-NEXT: srai a0, a0, 16 ; RV32-NEXT: ret ; ; RV64-LABEL: test_ashr_i48: ; RV64: # %bb.0: -; RV64-NEXT: slli a0, a0, 16 -; RV64-NEXT: srai a0, a0, 16 -; RV64-NEXT: srai a0, a0, 16 +; RV64-NEXT: sraiw a0, a0, 16 ; RV64-NEXT: ret %ashr = ashr i48 %x, 16 %trunc = trunc i48 %ashr to i16 @@ -54,7 +40,7 @@ define i16 @test_shl_i48(i48 %x) { ; ; RV64-LABEL: test_shl_i48: ; RV64: # %bb.0: -; RV64-NEXT: slli a0, a0, 8 +; RV64-NEXT: slliw a0, a0, 8 ; RV64-NEXT: ret %shl = shl i48 %x, 8 %trunc = trunc i48 %shl to i16 @@ -65,34 +51,13 @@ define i16 @test_lshr_i48_2(i48 %x, i48 %y) { ; RV32-LABEL: test_lshr_i48_2: ; RV32: # %bb.0: ; RV32-NEXT: andi a2, a2, 15 -; RV32-NEXT: lui a3, 16 -; RV32-NEXT: addi a3, a3, -1 -; RV32-NEXT: li a4, 32 -; RV32-NEXT: and a1, a1, a3 -; RV32-NEXT: bltu a2, a4, .LBB3_2 -; RV32-NEXT: # %bb.1: -; RV32-NEXT: addi a3, a2, -32 -; RV32-NEXT: srl a1, a1, a3 -; RV32-NEXT: bnez a2, .LBB3_3 -; RV32-NEXT: j .LBB3_4 -; RV32-NEXT: .LBB3_2: -; RV32-NEXT: srl a3, a0, a2 -; RV32-NEXT: neg a4, a2 -; RV32-NEXT: sll a1, a1, a4 -; RV32-NEXT: or a1, a3, a1 -; RV32-NEXT: beqz a2, .LBB3_4 -; RV32-NEXT: .LBB3_3: -; RV32-NEXT: mv a0, a1 -; RV32-NEXT: .LBB3_4: +; RV32-NEXT: srl a0, a0, a2 ; RV32-NEXT: ret ; ; RV64-LABEL: test_lshr_i48_2: ; RV64: # %bb.0: ; RV64-NEXT: andi a1, a1, 15 -; RV64-NEXT: li a2, -1 -; RV64-NEXT: srli a2, a2, 16 -; RV64-NEXT: and a0, a0, a2 -; RV64-NEXT: srl a0, a0, a1 +; RV64-NEXT: srlw a0, a0, a1 ; RV64-NEXT: ret %and = and i48 %y, 15 %lshr = lshr i48 %x, %and @@ -104,32 +69,13 @@ define i16 @test_ashr_i48_2(i48 %x, i48 %y) { ; RV32-LABEL: test_ashr_i48_2: ; RV32: # %bb.0: ; RV32-NEXT: andi a2, a2, 15 -; RV32-NEXT: slli a1, a1, 16 -; RV32-NEXT: li a3, 32 -; RV32-NEXT: srai a1, a1, 16 -; RV32-NEXT: bltu a2, a3, .LBB4_2 -; RV32-NEXT: # %bb.1: -; RV32-NEXT: addi a3, a2, -32 -; RV32-NEXT: sra a1, a1, a3 -; RV32-NEXT: bnez a2, .LBB4_3 -; RV32-NEXT: j .LBB4_4 -; RV32-NEXT: .LBB4_2: -; RV32-NEXT: srl a3, a0, a2 -; RV32-NEXT: neg a4, a2 -; RV32-NEXT: sll a1, a1, a4 -; RV32-NEXT: or a1, a3, a1 -; RV32-NEXT: beqz a2, .LBB4_4 -; RV32-NEXT: .LBB4_3: -; RV32-NEXT: mv a0, a1 -; RV32-NEXT: .LBB4_4: +; RV32-NEXT: sra a0, a0, a2 ; RV32-NEXT: ret ; ; RV64-LABEL: test_ashr_i48_2: ; RV64: # %bb.0: ; RV64-NEXT: andi a1, a1, 15 -; RV64-NEXT: slli a0, a0, 16 -; RV64-NEXT: srai a0, a0, 16 -; RV64-NEXT: sra a0, a0, a1 +; RV64-NEXT: sraw a0, a0, a1 ; RV64-NEXT: ret %and = and i48 %y, 15 %ashr = ashr i48 %x, %and @@ -141,19 +87,13 @@ define i16 @test_shl_i48_2(i48 %x, i48 %y) { ; RV32-LABEL: test_shl_i48_2: ; RV32: # %bb.0: ; RV32-NEXT: andi a2, a2, 15 -; RV32-NEXT: li a1, 32 -; RV32-NEXT: bltu a2, a1, .LBB5_2 -; RV32-NEXT: # %bb.1: -; RV32-NEXT: li a0, 0 -; RV32-NEXT: ret -; RV32-NEXT: .LBB5_2: ; RV32-NEXT: sll a0, a0, a2 ; RV32-NEXT: ret ; ; RV64-LABEL: test_shl_i48_2: ; RV64: # %bb.0: ; RV64-NEXT: andi a1, a1, 15 -; RV64-NEXT: sll a0, a0, a1 +; RV64-NEXT: sllw a0, a0, a1 ; RV64-NEXT: ret %and = and i48 %y, 15 %shl = shl i48 %x, %and From 758226e02b603c4743f1afc36db3c06bd530d2d0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Thorsten=20Sch=C3=BCtt?= Date: Tue, 15 Oct 2024 20:35:27 +0200 Subject: [PATCH 4/9] address review comments II --- .../lib/CodeGen/GlobalISel/CombinerHelper.cpp | 87 +++++++++++-------- 1 file changed, 53 insertions(+), 34 deletions(-) diff --git a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp index 165a4b59af16f..daac89375a038 100644 --- a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp +++ b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp @@ -7623,53 +7623,72 @@ bool CombinerHelper::matchUnmergeValuesAnyExtBuildVector(const MachineInstr &MI, LLT DstTy = MRI.getType(Unmerge->getReg(0)); + // $bv:_((<8 x s8>) = G_BUILD_VECTOR .... + // $anyext:_(<8 x s16>) = G_ANYEXT $bv + // $uv:_(<4 x s16>), $uv1:_(<4 x s16>) = G_UNMERGE_VALUES $any + // + // -> + // + // $any:_(s16) = G_ANY $bv[0] + // $any1:_(s16) = G_ANY bv[1] + // $any2:_(s16) = G_ANY bv[2] + // $any3:_(s16) = G_ANY bv[3] + // $any4:_(s16) = G_ANY $bv[4] + // $any5:_(s16) = G_ANY $bv[5] + // $any6:_(s16) = G_ANY $bv[6] + // $any7:_(s16) = G_ANY $bv[7] + // $uv:_(<4 x s16>) = G_BUILD_VECTOR $any, $any1, $any2, $any3 + // $uv1:_(<4 x s16>) = G_BUILD_VECTOR $any4, $any5, $any6, $any7 + // We want to unmerge into vectors. if (!DstTy.isFixedVector()) return false; - if (const GAnyExt *Any = dyn_cast(Source)) { - const MachineInstr *NextSource = MRI.getVRegDef(Any->getSrcReg()); + const GAnyExt *Any = dyn_cast(Source); + if (!Any) + return false; - if (const GBuildVector *BV = dyn_cast(NextSource)) { - // G_UNMERGE_VALUES G_ANYEXT G_BUILD_VECTOR + const MachineInstr *NextSource = MRI.getVRegDef(Any->getSrcReg()); - if (!MRI.hasOneNonDBGUse(BV->getReg(0))) - return false; + if (const GBuildVector *BV = dyn_cast(NextSource)) { + // G_UNMERGE_VALUES G_ANYEXT G_BUILD_VECTOR - // FIXME: check element types? - if (BV->getNumSources() % Unmerge->getNumDefs() != 0) - return false; + if (!MRI.hasOneNonDBGUse(BV->getReg(0))) + return false; - LLT BigBvTy = MRI.getType(BV->getReg(0)); - LLT SmallBvTy = DstTy; - LLT SmallBvElemenTy = SmallBvTy.getElementType(); + // FIXME: check element types? + if (BV->getNumSources() % Unmerge->getNumDefs() != 0) + return false; - if (!isLegalOrBeforeLegalizer( - {TargetOpcode::G_BUILD_VECTOR, {SmallBvTy, SmallBvElemenTy}})) - return false; + LLT BigBvTy = MRI.getType(BV->getReg(0)); + LLT SmallBvTy = DstTy; + LLT SmallBvElemenTy = SmallBvTy.getElementType(); - // We check the legality of scalar anyext. - if (!isLegalOrBeforeLegalizer( - {TargetOpcode::G_ANYEXT, - {SmallBvElemenTy, BigBvTy.getElementType()}})) - return false; + if (!isLegalOrBeforeLegalizer( + {TargetOpcode::G_BUILD_VECTOR, {SmallBvTy, SmallBvElemenTy}})) + return false; - MatchInfo = [=](MachineIRBuilder &B) { - // Build into each G_UNMERGE_VALUES def - // a small build vector with anyext from the source build vector. - for (unsigned I = 0; I < Unmerge->getNumDefs(); ++I) { - SmallVector Ops; - for (unsigned J = 0; J < SmallBvTy.getNumElements(); ++J) { - Register SourceArray = - BV->getSourceReg(I * SmallBvTy.getNumElements() + J); - auto AnyExt = B.buildAnyExt(SmallBvElemenTy, SourceArray); - Ops.push_back(AnyExt.getReg(0)); - } - B.buildBuildVector(Unmerge->getOperand(I).getReg(), Ops); - }; + // We check the legality of scalar anyext. + if (!isLegalOrBeforeLegalizer( + {TargetOpcode::G_ANYEXT, + {SmallBvElemenTy, BigBvTy.getElementType()}})) + return false; + + MatchInfo = [=](MachineIRBuilder &B) { + // Build into each G_UNMERGE_VALUES def + // a small build vector with anyext from the source build vector. + for (unsigned I = 0; I < Unmerge->getNumDefs(); ++I) { + SmallVector Ops; + for (unsigned J = 0; J < SmallBvTy.getNumElements(); ++J) { + Register SourceArray = + BV->getSourceReg(I * SmallBvTy.getNumElements() + J); + auto AnyExt = B.buildAnyExt(SmallBvElemenTy, SourceArray); + Ops.push_back(AnyExt.getReg(0)); + } + B.buildBuildVector(Unmerge->getOperand(I).getReg(), Ops); }; - return true; }; + return true; }; return false; From e312b4ecc08b04621f7fc00e95df73e4ef802024 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Thorsten=20Sch=C3=BCtt?= Date: Tue, 15 Oct 2024 20:59:09 +0200 Subject: [PATCH 5/9] fix docstring --- llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp index daac89375a038..e67f22bc68392 100644 --- a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp +++ b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp @@ -7630,9 +7630,9 @@ bool CombinerHelper::matchUnmergeValuesAnyExtBuildVector(const MachineInstr &MI, // -> // // $any:_(s16) = G_ANY $bv[0] - // $any1:_(s16) = G_ANY bv[1] - // $any2:_(s16) = G_ANY bv[2] - // $any3:_(s16) = G_ANY bv[3] + // $any1:_(s16) = G_ANY $bv[1] + // $any2:_(s16) = G_ANY $bv[2] + // $any3:_(s16) = G_ANY $bv[3] // $any4:_(s16) = G_ANY $bv[4] // $any5:_(s16) = G_ANY $bv[5] // $any6:_(s16) = G_ANY $bv[6] From 31956353e2a7304fdfabd7a08835d1cc77226a7b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Thorsten=20Sch=C3=BCtt?= Date: Tue, 15 Oct 2024 21:09:52 +0200 Subject: [PATCH 6/9] small fix --- llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp index e67f22bc68392..b882b4334015b 100644 --- a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp +++ b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp @@ -7624,7 +7624,7 @@ bool CombinerHelper::matchUnmergeValuesAnyExtBuildVector(const MachineInstr &MI, LLT DstTy = MRI.getType(Unmerge->getReg(0)); // $bv:_((<8 x s8>) = G_BUILD_VECTOR .... - // $anyext:_(<8 x s16>) = G_ANYEXT $bv + // $any:_(<8 x s16>) = G_ANYEXT $bv // $uv:_(<4 x s16>), $uv1:_(<4 x s16>) = G_UNMERGE_VALUES $any // // -> From 944f801d66121e58f48912eda62595722522597f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Thorsten=20Sch=C3=BCtt?= Date: Tue, 15 Oct 2024 21:11:08 +0200 Subject: [PATCH 7/9] small fix II --- llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp index b882b4334015b..ff5bbab661cf6 100644 --- a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp +++ b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp @@ -7623,7 +7623,7 @@ bool CombinerHelper::matchUnmergeValuesAnyExtBuildVector(const MachineInstr &MI, LLT DstTy = MRI.getType(Unmerge->getReg(0)); - // $bv:_((<8 x s8>) = G_BUILD_VECTOR .... + // $bv:_(<8 x s8>) = G_BUILD_VECTOR .... // $any:_(<8 x s16>) = G_ANYEXT $bv // $uv:_(<4 x s16>), $uv1:_(<4 x s16>) = G_UNMERGE_VALUES $any // From afdec48548f129ca42ae9e58495c0c945da59b5e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Thorsten=20Sch=C3=BCtt?= Date: Tue, 15 Oct 2024 22:01:53 +0200 Subject: [PATCH 8/9] small fix III --- llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp index ff5bbab661cf6..b7ddf9f479ef8 100644 --- a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp +++ b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp @@ -7629,14 +7629,14 @@ bool CombinerHelper::matchUnmergeValuesAnyExtBuildVector(const MachineInstr &MI, // // -> // - // $any:_(s16) = G_ANY $bv[0] - // $any1:_(s16) = G_ANY $bv[1] - // $any2:_(s16) = G_ANY $bv[2] - // $any3:_(s16) = G_ANY $bv[3] - // $any4:_(s16) = G_ANY $bv[4] - // $any5:_(s16) = G_ANY $bv[5] - // $any6:_(s16) = G_ANY $bv[6] - // $any7:_(s16) = G_ANY $bv[7] + // $any:_(s16) = G_ANYEXT $bv[0] + // $any1:_(s16) = G_ANYEXT $bv[1] + // $any2:_(s16) = G_ANYEXT $bv[2] + // $any3:_(s16) = G_ANYEXT $bv[3] + // $any4:_(s16) = G_ANYEXT $bv[4] + // $any5:_(s16) = G_ANYEXT $bv[5] + // $any6:_(s16) = G_ANYEXT $bv[6] + // $any7:_(s16) = G_ANYEXT $bv[7] // $uv:_(<4 x s16>) = G_BUILD_VECTOR $any, $any1, $any2, $any3 // $uv1:_(<4 x s16>) = G_BUILD_VECTOR $any4, $any5, $any6, $any7 From b81f0091ee0bbdcce7e7963a8706a97d30e0cc16 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Thorsten=20Sch=C3=BCtt?= Date: Fri, 18 Oct 2024 16:01:00 +0200 Subject: [PATCH 9/9] fix test --- .../legalize-shuffle-vector-widen-crash.ll | 21 ++++++++----------- 1 file changed, 9 insertions(+), 12 deletions(-) diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-shuffle-vector-widen-crash.ll b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-shuffle-vector-widen-crash.ll index a8377c04dbec8..be80886ed3efe 100644 --- a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-shuffle-vector-widen-crash.ll +++ b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-shuffle-vector-widen-crash.ll @@ -9,19 +9,16 @@ define i32 @bar() { ; CHECK-LABEL: bar: ; CHECK: ; %bb.0: ; %bb ; CHECK-NEXT: movi.2d v0, #0000000000000000 -; CHECK-NEXT: mov b1, v0[1] -; CHECK-NEXT: fmov w8, s0 -; CHECK-NEXT: mov b3, v0[2] -; CHECK-NEXT: mov b0, v0[3] -; CHECK-NEXT: mov.s v2[0], w8 -; CHECK-NEXT: fmov w8, s1 -; CHECK-NEXT: mov.s v2[1], w8 -; CHECK-NEXT: fmov w8, s3 -; CHECK-NEXT: mov.s v2[2], w8 -; CHECK-NEXT: fmov w8, s0 +; CHECK-NEXT: umov.b w8, v0[0] +; CHECK-NEXT: umov.b w9, v0[1] +; CHECK-NEXT: mov.s v1[0], w8 +; CHECK-NEXT: umov.b w8, v0[2] +; CHECK-NEXT: mov.s v1[1], w9 +; CHECK-NEXT: umov.b w9, v0[3] ; CHECK-NEXT: movi.4s v0, #1 -; CHECK-NEXT: mov.s v2[3], w8 -; CHECK-NEXT: and.16b v0, v2, v0 +; CHECK-NEXT: mov.s v1[2], w8 +; CHECK-NEXT: mov.s v1[3], w9 +; CHECK-NEXT: and.16b v0, v1, v0 ; CHECK-NEXT: addv.4s s0, v0 ; CHECK-NEXT: fmov w0, s0 ; CHECK-NEXT: ret