diff --git a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp index 91d2497fdb7e2..a868860f343ba 100644 --- a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp +++ b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp @@ -5314,6 +5314,17 @@ LegalizerHelper::moreElementsVector(MachineInstr &MI, unsigned TypeIdx, Observer.changedInstr(MI); return Legalized; } + case TargetOpcode::G_ICMP: { + // TODO: the symmetric MoreTy works for targets like, e.g. NEON. + // For targets, like e.g. MVE, the result is a predicated vector (i1). + // This will need some refactoring. + Observer.changingInstr(MI); + moreElementsVectorSrc(MI, MoreTy, 2); + moreElementsVectorSrc(MI, MoreTy, 3); + moreElementsVectorDst(MI, MoreTy, 0); + Observer.changedInstr(MI); + return Legalized; + } default: return UnableToLegalize; } diff --git a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp index e94f9d0c68ffe..b561cb12c93a1 100644 --- a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp +++ b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp @@ -495,6 +495,7 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST) }) .clampScalar(0, MinFPScalar, s128); + // FIXME: fix moreElementsToNextPow2 getActionDefinitionsBuilder(G_ICMP) .legalFor({{s32, s32}, {s32, s64}, @@ -524,7 +525,11 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST) .minScalarOrEltIf( [=](const LegalityQuery &Query) { return Query.Types[1] == v2p0; }, 0, s64) - .clampNumElements(0, v2s32, v4s32); + .moreElementsToNextPow2(0) + .clampNumElements(0, v8s8, v16s8) + .clampNumElements(0, v4s16, v8s16) + .clampNumElements(0, v2s32, v4s32) + .clampNumElements(0, v2s64, v2s64); getActionDefinitionsBuilder(G_FCMP) // If we don't have full FP16 support, then scalarize the elements of @@ -863,6 +868,7 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST) }, 0, s8) .minScalarOrElt(0, s8) // Worst case, we need at least s8. + .moreElementsToNextPow2(1) .clampMaxNumElements(1, s64, 2) .clampMaxNumElements(1, s32, 4) .clampMaxNumElements(1, s16, 8) diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-cmp.mir b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-cmp.mir index e9b3aa0a3a8fd..542cf018a6c00 100644 --- a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-cmp.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-cmp.mir @@ -330,3 +330,208 @@ body: | successors: bb.3: RET_ReallyLR +... +--- +name: test_3xs32_eq_pr_78181 +tracksRegLiveness: true +body: | + bb.1: + liveins: $x0 + ; CHECK-LABEL: name: test_3xs32_eq_pr_78181 + ; CHECK: liveins: $x0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: %const:_(s32) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR %const(s32), %const(s32), %const(s32), %const(s32) + ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR %const(s32), %const(s32), %const(s32), %const(s32) + ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(<4 x s32>) = G_ICMP intpred(eq), [[BUILD_VECTOR]](<4 x s32>), [[BUILD_VECTOR1]] + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 + ; CHECK-NEXT: [[EVEC:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[ICMP]](<4 x s32>), [[C]](s64) + ; CHECK-NEXT: $w0 = COPY [[EVEC]](s32) + ; CHECK-NEXT: RET_ReallyLR + %const:_(s32) = G_IMPLICIT_DEF + %rhs:_(<3 x s32>) = G_BUILD_VECTOR %const(s32), %const(s32), %const(s32) + %lhs:_(<3 x s32>) = G_BUILD_VECTOR %const(s32), %const(s32), %const(s32) + %cmp:_(<3 x s32>) = G_ICMP intpred(eq), %lhs(<3 x s32>), %rhs + %1:_(s32) = G_CONSTANT i32 1 + %2:_(s32) = G_EXTRACT_VECTOR_ELT %cmp(<3 x s32>), %1(s32) + $w0 = COPY %2(s32) + RET_ReallyLR +... +--- +name: test_3xs16_eq_pr_78181 +tracksRegLiveness: true +body: | + bb.1: + liveins: $x0 + ; CHECK-LABEL: name: test_3xs16_eq_pr_78181 + ; CHECK: liveins: $x0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: %const:_(s16) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s16>) = G_BUILD_VECTOR %const(s16), %const(s16), %const(s16), %const(s16) + ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s16>) = G_BUILD_VECTOR %const(s16), %const(s16), %const(s16), %const(s16) + ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(<4 x s16>) = G_ICMP intpred(eq), [[BUILD_VECTOR]](<4 x s16>), [[BUILD_VECTOR1]] + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s16), [[UV1:%[0-9]+]]:_(s16), [[UV2:%[0-9]+]]:_(s16), [[UV3:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[ICMP]](<4 x s16>) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 + ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[UV]](s16) + ; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[UV1]](s16) + ; CHECK-NEXT: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[UV2]](s16) + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[ANYEXT]](s32), [[ANYEXT1]](s32), [[ANYEXT2]](s32), [[DEF]](s32) + ; CHECK-NEXT: [[EVEC:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[BUILD_VECTOR2]](<4 x s32>), [[C]](s64) + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; CHECK-NEXT: %zext:_(s32) = G_AND [[EVEC]], [[C1]] + ; CHECK-NEXT: $w0 = COPY %zext(s32) + ; CHECK-NEXT: RET_ReallyLR + %const:_(s16) = G_IMPLICIT_DEF + %rhs:_(<3 x s16>) = G_BUILD_VECTOR %const(s16), %const(s16), %const(s16) + %lhs:_(<3 x s16>) = G_BUILD_VECTOR %const(s16), %const(s16), %const(s16) + %cmp:_(<3 x s16>) = G_ICMP intpred(eq), %lhs(<3 x s16>), %rhs + %1:_(s32) = G_CONSTANT i32 1 + %2:_(s16) = G_EXTRACT_VECTOR_ELT %cmp(<3 x s16>), %1(s32) + %zext:_(s32) = G_ZEXT %2(s16) + $w0 = COPY %zext(s32) + RET_ReallyLR +... +--- +name: test_3xs8_eq_pr_78181 +tracksRegLiveness: true +body: | + bb.1: + liveins: $x0 + ; CHECK-LABEL: name: test_3xs8_eq_pr_78181 + ; CHECK: liveins: $x0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: %const:_(s8) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s8>) = G_BUILD_VECTOR %const(s8), %const(s8), %const(s8), %const(s8), %const(s8), %const(s8), %const(s8), %const(s8) + ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<8 x s8>) = G_BUILD_VECTOR %const(s8), %const(s8), %const(s8), %const(s8), %const(s8), %const(s8), %const(s8), %const(s8) + ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(<8 x s8>) = G_ICMP intpred(eq), [[BUILD_VECTOR]](<8 x s8>), [[BUILD_VECTOR1]] + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<4 x s8>), [[UV1:%[0-9]+]]:_(<4 x s8>) = G_UNMERGE_VALUES [[ICMP]](<8 x s8>) + ; CHECK-NEXT: [[UV2:%[0-9]+]]:_(s8), [[UV3:%[0-9]+]]:_(s8), [[UV4:%[0-9]+]]:_(s8), [[UV5:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[UV]](<4 x s8>) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 + ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[UV2]](s8) + ; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[UV3]](s8) + ; CHECK-NEXT: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[UV4]](s8) + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[ANYEXT]](s32), [[ANYEXT1]](s32), [[ANYEXT2]](s32), [[DEF]](s32) + ; CHECK-NEXT: [[EVEC:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[BUILD_VECTOR2]](<4 x s32>), [[C]](s64) + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 + ; CHECK-NEXT: %zext:_(s32) = G_AND [[EVEC]], [[C1]] + ; CHECK-NEXT: $w0 = COPY %zext(s32) + ; CHECK-NEXT: RET_ReallyLR + %const:_(s8) = G_IMPLICIT_DEF + %rhs:_(<3 x s8>) = G_BUILD_VECTOR %const(s8), %const(s8), %const(s8) + %lhs:_(<3 x s8>) = G_BUILD_VECTOR %const(s8), %const(s8), %const(s8) + %cmp:_(<3 x s8>) = G_ICMP intpred(eq), %lhs(<3 x s8>), %rhs + %1:_(s32) = G_CONSTANT i32 1 + %2:_(s8) = G_EXTRACT_VECTOR_ELT %cmp(<3 x s8>), %1(s32) + %zext:_(s32) = G_ZEXT %2(s8) + $w0 = COPY %zext(s32) + RET_ReallyLR +... +--- +name: test_3xs64_eq_clamp +tracksRegLiveness: true +body: | + bb.1: + liveins: $x0 + ; CHECK-LABEL: name: test_3xs64_eq_clamp + ; CHECK: liveins: $x0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: %const:_(s64) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR %const(s64), %const(s64) + ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR %const(s64), %const(s64) + ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(<2 x s64>) = G_ICMP intpred(eq), [[BUILD_VECTOR]](<2 x s64>), [[BUILD_VECTOR1]] + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 + ; CHECK-NEXT: [[EVEC:%[0-9]+]]:_(s64) = G_EXTRACT_VECTOR_ELT [[ICMP]](<2 x s64>), [[C]](s64) + ; CHECK-NEXT: $x0 = COPY [[EVEC]](s64) + ; CHECK-NEXT: RET_ReallyLR + %const:_(s64) = G_IMPLICIT_DEF + %rhs:_(<3 x s64>) = G_BUILD_VECTOR %const(s64), %const(s64), %const(s64) + %lhs:_(<3 x s64>) = G_BUILD_VECTOR %const(s64), %const(s64), %const(s64) + %cmp:_(<3 x s64>) = G_ICMP intpred(eq), %lhs(<3 x s64>), %rhs + %1:_(s32) = G_CONSTANT i32 1 + %2:_(s64) = G_EXTRACT_VECTOR_ELT %cmp(<3 x s64>), %1(s32) + $x0 = COPY %2(s64) + RET_ReallyLR +... +--- +name: test_5xs32_eq_clamp +tracksRegLiveness: true +body: | + bb.1: + liveins: $x0 + ; CHECK-LABEL: name: test_5xs32_eq_clamp + ; CHECK: liveins: $x0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: %const:_(s32) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR %const(s32), %const(s32), %const(s32), %const(s32) + ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR %const(s32), %const(s32), %const(s32), %const(s32) + ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(<4 x s32>) = G_ICMP intpred(eq), [[BUILD_VECTOR]](<4 x s32>), [[BUILD_VECTOR1]] + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 + ; CHECK-NEXT: [[EVEC:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[ICMP]](<4 x s32>), [[C]](s64) + ; CHECK-NEXT: $w0 = COPY [[EVEC]](s32) + ; CHECK-NEXT: RET_ReallyLR + %const:_(s32) = G_IMPLICIT_DEF + %rhs:_(<5 x s32>) = G_BUILD_VECTOR %const(s32), %const(s32), %const(s32), %const(s32), %const(s32) + %lhs:_(<5 x s32>) = G_BUILD_VECTOR %const(s32), %const(s32), %const(s32), %const(s32), %const(s32) + %cmp:_(<5 x s32>) = G_ICMP intpred(eq), %lhs(<5 x s32>), %rhs + %1:_(s32) = G_CONSTANT i32 1 + %2:_(s32) = G_EXTRACT_VECTOR_ELT %cmp(<5 x s32>), %1(s32) + $w0 = COPY %2(s32) + RET_ReallyLR +... +--- +name: test_7xs16_eq_clamp +tracksRegLiveness: true +body: | + bb.1: + liveins: $x0 + ; CHECK-LABEL: name: test_7xs16_eq_clamp + ; CHECK: liveins: $x0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: %const:_(s16) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s16>) = G_BUILD_VECTOR %const(s16), %const(s16), %const(s16), %const(s16), %const(s16), %const(s16), %const(s16), %const(s16) + ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<8 x s16>) = G_BUILD_VECTOR %const(s16), %const(s16), %const(s16), %const(s16), %const(s16), %const(s16), %const(s16), %const(s16) + ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(<8 x s16>) = G_ICMP intpred(eq), [[BUILD_VECTOR]](<8 x s16>), [[BUILD_VECTOR1]] + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 + ; CHECK-NEXT: [[EVEC:%[0-9]+]]:_(s16) = G_EXTRACT_VECTOR_ELT [[ICMP]](<8 x s16>), [[C]](s64) + ; CHECK-NEXT: %zext:_(s32) = G_ZEXT [[EVEC]](s16) + ; CHECK-NEXT: $w0 = COPY %zext(s32) + ; CHECK-NEXT: RET_ReallyLR + %const:_(s16) = G_IMPLICIT_DEF + %rhs:_(<7 x s16>) = G_BUILD_VECTOR %const(s16), %const(s16), %const(s16), %const(s16), %const(s16), %const(s16), %const(s16) + %lhs:_(<7 x s16>) = G_BUILD_VECTOR %const(s16), %const(s16), %const(s16), %const(s16), %const(s16), %const(s16), %const(s16) + %cmp:_(<7 x s16>) = G_ICMP intpred(eq), %lhs(<7 x s16>), %rhs + %1:_(s32) = G_CONSTANT i32 1 + %2:_(s16) = G_EXTRACT_VECTOR_ELT %cmp(<7 x s16>), %1(s32) + %zext:_(s32) = G_ZEXT %2(s16) + $w0 = COPY %zext(s32) + RET_ReallyLR +... +--- +name: test_9xs8_eq_clamp +tracksRegLiveness: true +body: | + bb.1: + liveins: $x0 + ; CHECK-LABEL: name: test_9xs8_eq_clamp + ; CHECK: liveins: $x0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: %const:_(s8) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<16 x s8>) = G_BUILD_VECTOR %const(s8), %const(s8), %const(s8), %const(s8), %const(s8), %const(s8), %const(s8), %const(s8), %const(s8), %const(s8), %const(s8), %const(s8), %const(s8), %const(s8), %const(s8), %const(s8) + ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<16 x s8>) = G_BUILD_VECTOR %const(s8), %const(s8), %const(s8), %const(s8), %const(s8), %const(s8), %const(s8), %const(s8), %const(s8), %const(s8), %const(s8), %const(s8), %const(s8), %const(s8), %const(s8), %const(s8) + ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(<16 x s8>) = G_ICMP intpred(eq), [[BUILD_VECTOR]](<16 x s8>), [[BUILD_VECTOR1]] + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 + ; CHECK-NEXT: [[EVEC:%[0-9]+]]:_(s8) = G_EXTRACT_VECTOR_ELT [[ICMP]](<16 x s8>), [[C]](s64) + ; CHECK-NEXT: %zext:_(s32) = G_ZEXT [[EVEC]](s8) + ; CHECK-NEXT: $w0 = COPY %zext(s32) + ; CHECK-NEXT: RET_ReallyLR + %const:_(s8) = G_IMPLICIT_DEF + %rhs:_(<9 x s8>) = G_BUILD_VECTOR %const(s8), %const(s8), %const(s8), %const(s8), %const(s8), %const(s8), %const(s8), %const(s8), %const(s8) + %lhs:_(<9 x s8>) = G_BUILD_VECTOR %const(s8), %const(s8), %const(s8), %const(s8), %const(s8), %const(s8), %const(s8), %const(s8), %const(s8) + %cmp:_(<9 x s8>) = G_ICMP intpred(eq), %lhs(<9 x s8>), %rhs + %1:_(s32) = G_CONSTANT i32 1 + %2:_(s8) = G_EXTRACT_VECTOR_ELT %cmp(<9 x s8>), %1(s32) + %zext:_(s32) = G_ZEXT %2(s8) + $w0 = COPY %zext(s32) + RET_ReallyLR diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-shuffle-vector.mir b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-shuffle-vector.mir index 07946388590e2..2cfee7bcc462a 100644 --- a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-shuffle-vector.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-shuffle-vector.mir @@ -316,26 +316,14 @@ body: | ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[DEF]](s32) ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[DEF]](s32) ; CHECK-NEXT: [[SHUF:%[0-9]+]]:_(<4 x s32>) = G_SHUFFLE_VECTOR [[BUILD_VECTOR]](<4 x s32>), [[BUILD_VECTOR1]], shufflemask(0, 1, 5, 6) - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<2 x s32>), [[UV1:%[0-9]+]]:_(<2 x s32>) = G_UNMERGE_VALUES [[SHUF]](<4 x s32>) - ; CHECK-NEXT: [[UV2:%[0-9]+]]:_(<2 x s32>), [[UV3:%[0-9]+]]:_(<2 x s32>) = G_UNMERGE_VALUES [[SHUF]](<4 x s32>) - ; CHECK-NEXT: [[UV4:%[0-9]+]]:_(<2 x s32>), [[UV5:%[0-9]+]]:_(<2 x s32>) = G_UNMERGE_VALUES [[SHUF]](<4 x s32>) - ; CHECK-NEXT: [[UV6:%[0-9]+]]:_(<2 x s32>), [[UV7:%[0-9]+]]:_(<2 x s32>) = G_UNMERGE_VALUES [[SHUF]](<4 x s32>) - ; CHECK-NEXT: [[UV8:%[0-9]+]]:_(<2 x s32>), [[UV9:%[0-9]+]]:_(<2 x s32>) = G_UNMERGE_VALUES [[SHUF]](<4 x s32>) - ; CHECK-NEXT: [[UV10:%[0-9]+]]:_(<2 x s32>), [[UV11:%[0-9]+]]:_(<2 x s32>) = G_UNMERGE_VALUES [[SHUF]](<4 x s32>) - ; CHECK-NEXT: [[UV12:%[0-9]+]]:_(<2 x s32>), [[UV13:%[0-9]+]]:_(<2 x s32>) = G_UNMERGE_VALUES [[SHUF]](<4 x s32>) - ; CHECK-NEXT: [[UV14:%[0-9]+]]:_(<2 x s32>), [[UV15:%[0-9]+]]:_(<2 x s32>) = G_UNMERGE_VALUES [[SHUF]](<4 x s32>) ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s32>) = G_CONCAT_VECTORS [[UV]](<2 x s32>), [[UV3]](<2 x s32>) - ; CHECK-NEXT: [[EVEC:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[CONCAT_VECTORS]](<4 x s32>), [[C]](s64) + ; CHECK-NEXT: [[EVEC:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[SHUF]](<4 x s32>), [[C]](s64) ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; CHECK-NEXT: [[CONCAT_VECTORS1:%[0-9]+]]:_(<4 x s32>) = G_CONCAT_VECTORS [[UV4]](<2 x s32>), [[UV7]](<2 x s32>) - ; CHECK-NEXT: [[EVEC1:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[CONCAT_VECTORS1]](<4 x s32>), [[C1]](s64) + ; CHECK-NEXT: [[EVEC1:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[SHUF]](<4 x s32>), [[C1]](s64) ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; CHECK-NEXT: [[CONCAT_VECTORS2:%[0-9]+]]:_(<4 x s32>) = G_CONCAT_VECTORS [[UV8]](<2 x s32>), [[UV11]](<2 x s32>) - ; CHECK-NEXT: [[EVEC2:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[CONCAT_VECTORS2]](<4 x s32>), [[C2]](s64) + ; CHECK-NEXT: [[EVEC2:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[SHUF]](<4 x s32>), [[C2]](s64) ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 3 - ; CHECK-NEXT: [[CONCAT_VECTORS3:%[0-9]+]]:_(<4 x s32>) = G_CONCAT_VECTORS [[UV12]](<2 x s32>), [[UV15]](<2 x s32>) - ; CHECK-NEXT: [[EVEC3:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[CONCAT_VECTORS3]](<4 x s32>), [[C3]](s64) + ; CHECK-NEXT: [[EVEC3:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[SHUF]](<4 x s32>), [[C3]](s64) ; CHECK-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[EVEC]](s32), [[EVEC1]](s32), [[EVEC2]](s32), [[EVEC3]](s32) ; CHECK-NEXT: $q0 = COPY [[BUILD_VECTOR2]](<4 x s32>) ; CHECK-NEXT: RET_ReallyLR implicit $q0 diff --git a/llvm/test/CodeGen/AArch64/arm64-vabs.ll b/llvm/test/CodeGen/AArch64/arm64-vabs.ll index 7c71449a31633..fe4da2e7cf36b 100644 --- a/llvm/test/CodeGen/AArch64/arm64-vabs.ll +++ b/llvm/test/CodeGen/AArch64/arm64-vabs.ll @@ -2,10 +2,7 @@ ; RUN: llc < %s -mtriple=arm64-eabi -aarch64-neon-syntax=apple | FileCheck -check-prefixes=CHECK,CHECK-SD %s ; RUN: llc < %s -mtriple=arm64-eabi -aarch64-neon-syntax=apple -global-isel -global-isel-abort=2 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-GI -; CHECK-GI: warning: Instruction selection used fallback path for uabd16b_rdx -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for uabd4s_rdx -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for sabd4s_rdx -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for abs_8b +; CHECK-GI: warning: Instruction selection used fallback path for abs_8b ; CHECK-GI-NEXT: warning: Instruction selection used fallback path for abs_16b ; CHECK-GI-NEXT: warning: Instruction selection used fallback path for abs_4h ; CHECK-GI-NEXT: warning: Instruction selection used fallback path for abs_8h @@ -244,14 +241,32 @@ declare i16 @llvm.vector.reduce.add.v16i16(<16 x i16>) declare i32 @llvm.vector.reduce.add.v16i32(<16 x i32>) define i16 @uabd16b_rdx(ptr %a, ptr %b) { -; CHECK-LABEL: uabd16b_rdx: -; CHECK: // %bb.0: -; CHECK-NEXT: ldr q0, [x0] -; CHECK-NEXT: ldr q1, [x1] -; CHECK-NEXT: uabd.16b v0, v0, v1 -; CHECK-NEXT: uaddlv.16b h0, v0 -; CHECK-NEXT: fmov w0, s0 -; CHECK-NEXT: ret +; CHECK-SD-LABEL: uabd16b_rdx: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: ldr q0, [x0] +; CHECK-SD-NEXT: ldr q1, [x1] +; CHECK-SD-NEXT: uabd.16b v0, v0, v1 +; CHECK-SD-NEXT: uaddlv.16b h0, v0 +; CHECK-SD-NEXT: fmov w0, s0 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: uabd16b_rdx: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: ldr q1, [x0] +; CHECK-GI-NEXT: ldr q2, [x1] +; CHECK-GI-NEXT: movi.2d v0, #0000000000000000 +; CHECK-GI-NEXT: usubl.8h v3, v1, v2 +; CHECK-GI-NEXT: usubl2.8h v1, v1, v2 +; CHECK-GI-NEXT: neg.8h v2, v3 +; CHECK-GI-NEXT: neg.8h v4, v1 +; CHECK-GI-NEXT: cmgt.8h v5, v0, v3 +; CHECK-GI-NEXT: cmgt.8h v0, v0, v1 +; CHECK-GI-NEXT: bif.16b v2, v3, v5 +; CHECK-GI-NEXT: bsl.16b v0, v4, v1 +; CHECK-GI-NEXT: add.8h v0, v2, v0 +; CHECK-GI-NEXT: addv.8h h0, v0 +; CHECK-GI-NEXT: fmov w0, s0 +; CHECK-GI-NEXT: ret %aload = load <16 x i8>, ptr %a, align 1 %bload = load <16 x i8>, ptr %b, align 1 %aext = zext <16 x i8> %aload to <16 x i16> @@ -468,14 +483,32 @@ declare i64 @llvm.vector.reduce.add.v4i64(<4 x i64>) declare i64 @llvm.vector.reduce.add.v2i64(<2 x i64>) define i64 @uabd4s_rdx(ptr %a, ptr %b, i32 %h) { -; CHECK-LABEL: uabd4s_rdx: -; CHECK: // %bb.0: -; CHECK-NEXT: ldr q0, [x0] -; CHECK-NEXT: ldr q1, [x1] -; CHECK-NEXT: uabd.4s v0, v0, v1 -; CHECK-NEXT: uaddlv.4s d0, v0 -; CHECK-NEXT: fmov x0, d0 -; CHECK-NEXT: ret +; CHECK-SD-LABEL: uabd4s_rdx: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: ldr q0, [x0] +; CHECK-SD-NEXT: ldr q1, [x1] +; CHECK-SD-NEXT: uabd.4s v0, v0, v1 +; CHECK-SD-NEXT: uaddlv.4s d0, v0 +; CHECK-SD-NEXT: fmov x0, d0 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: uabd4s_rdx: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: ldr q1, [x0] +; CHECK-GI-NEXT: ldr q2, [x1] +; CHECK-GI-NEXT: movi.2d v0, #0000000000000000 +; CHECK-GI-NEXT: usubl.2d v3, v1, v2 +; CHECK-GI-NEXT: usubl2.2d v1, v1, v2 +; CHECK-GI-NEXT: neg.2d v2, v3 +; CHECK-GI-NEXT: neg.2d v4, v1 +; CHECK-GI-NEXT: cmgt.2d v5, v0, v3 +; CHECK-GI-NEXT: cmgt.2d v0, v0, v1 +; CHECK-GI-NEXT: bif.16b v2, v3, v5 +; CHECK-GI-NEXT: bsl.16b v0, v4, v1 +; CHECK-GI-NEXT: add.2d v0, v2, v0 +; CHECK-GI-NEXT: addp.2d d0, v0 +; CHECK-GI-NEXT: fmov x0, d0 +; CHECK-GI-NEXT: ret %aload = load <4 x i32>, ptr %a, align 1 %bload = load <4 x i32>, ptr %b, align 1 %aext = zext <4 x i32> %aload to <4 x i64> @@ -489,12 +522,28 @@ define i64 @uabd4s_rdx(ptr %a, ptr %b, i32 %h) { } define i64 @sabd4s_rdx(<4 x i32> %a, <4 x i32> %b) { -; CHECK-LABEL: sabd4s_rdx: -; CHECK: // %bb.0: -; CHECK-NEXT: sabd.4s v0, v0, v1 -; CHECK-NEXT: uaddlv.4s d0, v0 -; CHECK-NEXT: fmov x0, d0 -; CHECK-NEXT: ret +; CHECK-SD-LABEL: sabd4s_rdx: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: sabd.4s v0, v0, v1 +; CHECK-SD-NEXT: uaddlv.4s d0, v0 +; CHECK-SD-NEXT: fmov x0, d0 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: sabd4s_rdx: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: ssubl.2d v3, v0, v1 +; CHECK-GI-NEXT: ssubl2.2d v0, v0, v1 +; CHECK-GI-NEXT: movi.2d v2, #0000000000000000 +; CHECK-GI-NEXT: neg.2d v1, v3 +; CHECK-GI-NEXT: neg.2d v4, v0 +; CHECK-GI-NEXT: cmgt.2d v5, v2, v3 +; CHECK-GI-NEXT: cmgt.2d v2, v2, v0 +; CHECK-GI-NEXT: bif.16b v1, v3, v5 +; CHECK-GI-NEXT: bit.16b v0, v4, v2 +; CHECK-GI-NEXT: add.2d v0, v1, v0 +; CHECK-GI-NEXT: addp.2d d0, v0 +; CHECK-GI-NEXT: fmov x0, d0 +; CHECK-GI-NEXT: ret %aext = sext <4 x i32> %a to <4 x i64> %bext = sext <4 x i32> %b to <4 x i64> %abdiff = sub nsw <4 x i64> %aext, %bext diff --git a/llvm/test/CodeGen/AArch64/icmp.ll b/llvm/test/CodeGen/AArch64/icmp.ll index d2b44bb5e3f9f..26711ea584c97 100644 --- a/llvm/test/CodeGen/AArch64/icmp.ll +++ b/llvm/test/CodeGen/AArch64/icmp.ll @@ -2,11 +2,7 @@ ; RUN: llc -mtriple=aarch64-none-eabi -verify-machineinstrs %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-SD ; RUN: llc -mtriple=aarch64-none-eabi -global-isel -global-isel-abort=2 -verify-machineinstrs %s -o - 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-GI -; CHECK-GI: warning: Instruction selection used fallback path for v3i64_i64 -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for v4i64_i64 -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for v3i32_i32 -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for v16i16_i16 -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for v32i8_i8 +; CHECK-GI: warning: Instruction selection used fallback path for v3i32_i32 define i64 @i64_i64(i64 %a, i64 %b, i64 %d, i64 %e) { ; CHECK-LABEL: i64_i64: @@ -71,33 +67,63 @@ entry: } define <3 x i64> @v3i64_i64(<3 x i64> %a, <3 x i64> %b, <3 x i64> %d, <3 x i64> %e) { -; CHECK-LABEL: v3i64_i64: -; CHECK: // %bb.0: // %entry -; CHECK-NEXT: // kill: def $d4 killed $d4 def $q4 -; CHECK-NEXT: // kill: def $d3 killed $d3 def $q3 -; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1 -; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-NEXT: // kill: def $d6 killed $d6 def $q6 -; CHECK-NEXT: // kill: def $d7 killed $d7 def $q7 -; CHECK-NEXT: // kill: def $d5 killed $d5 def $q5 -; CHECK-NEXT: // kill: def $d2 killed $d2 def $q2 -; CHECK-NEXT: ldr d16, [sp, #24] -; CHECK-NEXT: ldr d17, [sp] -; CHECK-NEXT: mov v3.d[1], v4.d[0] -; CHECK-NEXT: mov v0.d[1], v1.d[0] -; CHECK-NEXT: mov v6.d[1], v7.d[0] -; CHECK-NEXT: ldp d1, d4, [sp, #8] -; CHECK-NEXT: mov v1.d[1], v4.d[0] -; CHECK-NEXT: cmgt v0.2d, v3.2d, v0.2d -; CHECK-NEXT: bsl v0.16b, v6.16b, v1.16b -; CHECK-NEXT: cmgt v1.2d, v5.2d, v2.2d -; CHECK-NEXT: mov v2.16b, v1.16b -; CHECK-NEXT: ext v1.16b, v0.16b, v0.16b, #8 -; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0 -; CHECK-NEXT: // kill: def $d1 killed $d1 killed $q1 -; CHECK-NEXT: bsl v2.16b, v17.16b, v16.16b -; CHECK-NEXT: // kill: def $d2 killed $d2 killed $q2 -; CHECK-NEXT: ret +; CHECK-SD-LABEL: v3i64_i64: +; CHECK-SD: // %bb.0: // %entry +; CHECK-SD-NEXT: // kill: def $d4 killed $d4 def $q4 +; CHECK-SD-NEXT: // kill: def $d3 killed $d3 def $q3 +; CHECK-SD-NEXT: // kill: def $d1 killed $d1 def $q1 +; CHECK-SD-NEXT: // kill: def $d0 killed $d0 def $q0 +; CHECK-SD-NEXT: // kill: def $d6 killed $d6 def $q6 +; CHECK-SD-NEXT: // kill: def $d7 killed $d7 def $q7 +; CHECK-SD-NEXT: // kill: def $d5 killed $d5 def $q5 +; CHECK-SD-NEXT: // kill: def $d2 killed $d2 def $q2 +; CHECK-SD-NEXT: ldr d16, [sp, #24] +; CHECK-SD-NEXT: ldr d17, [sp] +; CHECK-SD-NEXT: mov v3.d[1], v4.d[0] +; CHECK-SD-NEXT: mov v0.d[1], v1.d[0] +; CHECK-SD-NEXT: mov v6.d[1], v7.d[0] +; CHECK-SD-NEXT: ldp d1, d4, [sp, #8] +; CHECK-SD-NEXT: mov v1.d[1], v4.d[0] +; CHECK-SD-NEXT: cmgt v0.2d, v3.2d, v0.2d +; CHECK-SD-NEXT: bsl v0.16b, v6.16b, v1.16b +; CHECK-SD-NEXT: cmgt v1.2d, v5.2d, v2.2d +; CHECK-SD-NEXT: mov v2.16b, v1.16b +; CHECK-SD-NEXT: ext v1.16b, v0.16b, v0.16b, #8 +; CHECK-SD-NEXT: // kill: def $d0 killed $d0 killed $q0 +; CHECK-SD-NEXT: // kill: def $d1 killed $d1 killed $q1 +; CHECK-SD-NEXT: bsl v2.16b, v17.16b, v16.16b +; CHECK-SD-NEXT: // kill: def $d2 killed $d2 killed $q2 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: v3i64_i64: +; CHECK-GI: // %bb.0: // %entry +; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0 +; CHECK-GI-NEXT: // kill: def $d1 killed $d1 def $q1 +; CHECK-GI-NEXT: // kill: def $d3 killed $d3 def $q3 +; CHECK-GI-NEXT: // kill: def $d4 killed $d4 def $q4 +; CHECK-GI-NEXT: // kill: def $d2 killed $d2 def $q2 +; CHECK-GI-NEXT: // kill: def $d6 killed $d6 def $q6 +; CHECK-GI-NEXT: // kill: def $d5 killed $d5 def $q5 +; CHECK-GI-NEXT: // kill: def $d7 killed $d7 def $q7 +; CHECK-GI-NEXT: ldr x8, [sp] +; CHECK-GI-NEXT: ldr x10, [sp, #24] +; CHECK-GI-NEXT: mov v0.d[1], v1.d[0] +; CHECK-GI-NEXT: mov v3.d[1], v4.d[0] +; CHECK-GI-NEXT: cmgt v2.2d, v5.2d, v2.2d +; CHECK-GI-NEXT: ldp d1, d4, [sp, #8] +; CHECK-GI-NEXT: mov v6.d[1], v7.d[0] +; CHECK-GI-NEXT: fmov x9, d2 +; CHECK-GI-NEXT: mov v1.d[1], v4.d[0] +; CHECK-GI-NEXT: cmgt v0.2d, v3.2d, v0.2d +; CHECK-GI-NEXT: sbfx x9, x9, #0, #1 +; CHECK-GI-NEXT: bsl v0.16b, v6.16b, v1.16b +; CHECK-GI-NEXT: and x8, x8, x9 +; CHECK-GI-NEXT: bic x9, x10, x9 +; CHECK-GI-NEXT: orr x8, x8, x9 +; CHECK-GI-NEXT: fmov d2, x8 +; CHECK-GI-NEXT: mov d1, v0.d[1] +; CHECK-GI-NEXT: // kill: def $d0 killed $d0 killed $q0 +; CHECK-GI-NEXT: ret entry: %c = icmp slt <3 x i64> %a, %b %s = select <3 x i1> %c, <3 x i64> %d, <3 x i64> %e @@ -105,13 +131,21 @@ entry: } define <4 x i64> @v4i64_i64(<4 x i64> %a, <4 x i64> %b, <4 x i64> %d, <4 x i64> %e) { -; CHECK-LABEL: v4i64_i64: -; CHECK: // %bb.0: // %entry -; CHECK-NEXT: cmgt v1.2d, v3.2d, v1.2d -; CHECK-NEXT: cmgt v0.2d, v2.2d, v0.2d -; CHECK-NEXT: bsl v1.16b, v5.16b, v7.16b -; CHECK-NEXT: bsl v0.16b, v4.16b, v6.16b -; CHECK-NEXT: ret +; CHECK-SD-LABEL: v4i64_i64: +; CHECK-SD: // %bb.0: // %entry +; CHECK-SD-NEXT: cmgt v1.2d, v3.2d, v1.2d +; CHECK-SD-NEXT: cmgt v0.2d, v2.2d, v0.2d +; CHECK-SD-NEXT: bsl v1.16b, v5.16b, v7.16b +; CHECK-SD-NEXT: bsl v0.16b, v4.16b, v6.16b +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: v4i64_i64: +; CHECK-GI: // %bb.0: // %entry +; CHECK-GI-NEXT: cmgt v0.2d, v2.2d, v0.2d +; CHECK-GI-NEXT: cmgt v1.2d, v3.2d, v1.2d +; CHECK-GI-NEXT: bsl v0.16b, v4.16b, v6.16b +; CHECK-GI-NEXT: bsl v1.16b, v5.16b, v7.16b +; CHECK-GI-NEXT: ret entry: %c = icmp slt <4 x i64> %a, %b %s = select <4 x i1> %c, <4 x i64> %d, <4 x i64> %e @@ -201,13 +235,21 @@ entry: } define <16 x i16> @v16i16_i16(<16 x i16> %a, <16 x i16> %b, <16 x i16> %d, <16 x i16> %e) { -; CHECK-LABEL: v16i16_i16: -; CHECK: // %bb.0: // %entry -; CHECK-NEXT: cmgt v1.8h, v3.8h, v1.8h -; CHECK-NEXT: cmgt v0.8h, v2.8h, v0.8h -; CHECK-NEXT: bsl v1.16b, v5.16b, v7.16b -; CHECK-NEXT: bsl v0.16b, v4.16b, v6.16b -; CHECK-NEXT: ret +; CHECK-SD-LABEL: v16i16_i16: +; CHECK-SD: // %bb.0: // %entry +; CHECK-SD-NEXT: cmgt v1.8h, v3.8h, v1.8h +; CHECK-SD-NEXT: cmgt v0.8h, v2.8h, v0.8h +; CHECK-SD-NEXT: bsl v1.16b, v5.16b, v7.16b +; CHECK-SD-NEXT: bsl v0.16b, v4.16b, v6.16b +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: v16i16_i16: +; CHECK-GI: // %bb.0: // %entry +; CHECK-GI-NEXT: cmgt v0.8h, v2.8h, v0.8h +; CHECK-GI-NEXT: cmgt v1.8h, v3.8h, v1.8h +; CHECK-GI-NEXT: bsl v0.16b, v4.16b, v6.16b +; CHECK-GI-NEXT: bsl v1.16b, v5.16b, v7.16b +; CHECK-GI-NEXT: ret entry: %c = icmp slt <16 x i16> %a, %b %s = select <16 x i1> %c, <16 x i16> %d, <16 x i16> %e @@ -239,13 +281,21 @@ entry: } define <32 x i8> @v32i8_i8(<32 x i8> %a, <32 x i8> %b, <32 x i8> %d, <32 x i8> %e) { -; CHECK-LABEL: v32i8_i8: -; CHECK: // %bb.0: // %entry -; CHECK-NEXT: cmgt v1.16b, v3.16b, v1.16b -; CHECK-NEXT: cmgt v0.16b, v2.16b, v0.16b -; CHECK-NEXT: bsl v1.16b, v5.16b, v7.16b -; CHECK-NEXT: bsl v0.16b, v4.16b, v6.16b -; CHECK-NEXT: ret +; CHECK-SD-LABEL: v32i8_i8: +; CHECK-SD: // %bb.0: // %entry +; CHECK-SD-NEXT: cmgt v1.16b, v3.16b, v1.16b +; CHECK-SD-NEXT: cmgt v0.16b, v2.16b, v0.16b +; CHECK-SD-NEXT: bsl v1.16b, v5.16b, v7.16b +; CHECK-SD-NEXT: bsl v0.16b, v4.16b, v6.16b +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: v32i8_i8: +; CHECK-GI: // %bb.0: // %entry +; CHECK-GI-NEXT: cmgt v0.16b, v2.16b, v0.16b +; CHECK-GI-NEXT: cmgt v1.16b, v3.16b, v1.16b +; CHECK-GI-NEXT: bsl v0.16b, v4.16b, v6.16b +; CHECK-GI-NEXT: bsl v1.16b, v5.16b, v7.16b +; CHECK-GI-NEXT: ret entry: %c = icmp slt <32 x i8> %a, %b %s = select <32 x i1> %c, <32 x i8> %d, <32 x i8> %e