diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp index ae34e6b7dcc3c..854e8891e4e3d 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -1336,7 +1336,7 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM, setOperationAction(ISD::MUL, MVT::v1i64, Custom); // Saturates - for (MVT VT : { MVT::v8i8, MVT::v4i16, MVT::v2i32, + for (MVT VT : { MVT::v8i8, MVT::v4i16, MVT::v2i32, MVT::v1i64, MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64 }) { setOperationAction(ISD::SADDSAT, VT, Legal); setOperationAction(ISD::UADDSAT, VT, Legal); diff --git a/llvm/lib/Target/AArch64/AArch64InstrFormats.td b/llvm/lib/Target/AArch64/AArch64InstrFormats.td index 6adf84879052f..d5c907988888f 100644 --- a/llvm/lib/Target/AArch64/AArch64InstrFormats.td +++ b/llvm/lib/Target/AArch64/AArch64InstrFormats.td @@ -7764,9 +7764,9 @@ multiclass SIMDThreeScalarD opc, string asm, } multiclass SIMDThreeScalarBHSD opc, string asm, - SDPatternOperator OpNode> { + SDPatternOperator OpNode, SDPatternOperator SatOp> { def v1i64 : BaseSIMDThreeScalar; + [(set (v1i64 FPR64:$Rd), (SatOp (v1i64 FPR64:$Rn), (v1i64 FPR64:$Rm)))]>; def v1i32 : BaseSIMDThreeScalar; def v1i16 : BaseSIMDThreeScalar; def v1i8 : BaseSIMDThreeScalar; diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.td b/llvm/lib/Target/AArch64/AArch64InstrInfo.td index 96d0146c1e752..1bd77c9d80333 100644 --- a/llvm/lib/Target/AArch64/AArch64InstrInfo.td +++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.td @@ -6336,19 +6336,19 @@ defm FCMGT : SIMDThreeScalarFPCmp<1, 1, 0b100, "fcmgt", AArch64fcmgt>; defm FMULX : SIMDFPThreeScalar<0, 0, 0b011, "fmulx", int_aarch64_neon_fmulx, HasNEONandIsStreamingSafe>; defm FRECPS : SIMDFPThreeScalar<0, 0, 0b111, "frecps", int_aarch64_neon_frecps, HasNEONandIsStreamingSafe>; defm FRSQRTS : SIMDFPThreeScalar<0, 1, 0b111, "frsqrts", int_aarch64_neon_frsqrts, HasNEONandIsStreamingSafe>; -defm SQADD : SIMDThreeScalarBHSD<0, 0b00001, "sqadd", int_aarch64_neon_sqadd>; +defm SQADD : SIMDThreeScalarBHSD<0, 0b00001, "sqadd", int_aarch64_neon_sqadd, saddsat>; defm SQDMULH : SIMDThreeScalarHS< 0, 0b10110, "sqdmulh", int_aarch64_neon_sqdmulh>; defm SQRDMULH : SIMDThreeScalarHS< 1, 0b10110, "sqrdmulh", int_aarch64_neon_sqrdmulh>; -defm SQRSHL : SIMDThreeScalarBHSD<0, 0b01011, "sqrshl",int_aarch64_neon_sqrshl>; -defm SQSHL : SIMDThreeScalarBHSD<0, 0b01001, "sqshl", int_aarch64_neon_sqshl>; -defm SQSUB : SIMDThreeScalarBHSD<0, 0b00101, "sqsub", int_aarch64_neon_sqsub>; +defm SQRSHL : SIMDThreeScalarBHSD<0, 0b01011, "sqrshl", int_aarch64_neon_sqrshl, int_aarch64_neon_sqrshl>; +defm SQSHL : SIMDThreeScalarBHSD<0, 0b01001, "sqshl", int_aarch64_neon_sqshl, int_aarch64_neon_sqshl>; +defm SQSUB : SIMDThreeScalarBHSD<0, 0b00101, "sqsub", int_aarch64_neon_sqsub, ssubsat>; defm SRSHL : SIMDThreeScalarD< 0, 0b01010, "srshl", int_aarch64_neon_srshl>; defm SSHL : SIMDThreeScalarD< 0, 0b01000, "sshl", int_aarch64_neon_sshl>; defm SUB : SIMDThreeScalarD< 1, 0b10000, "sub", sub>; -defm UQADD : SIMDThreeScalarBHSD<1, 0b00001, "uqadd", int_aarch64_neon_uqadd>; -defm UQRSHL : SIMDThreeScalarBHSD<1, 0b01011, "uqrshl",int_aarch64_neon_uqrshl>; -defm UQSHL : SIMDThreeScalarBHSD<1, 0b01001, "uqshl", int_aarch64_neon_uqshl>; -defm UQSUB : SIMDThreeScalarBHSD<1, 0b00101, "uqsub", int_aarch64_neon_uqsub>; +defm UQADD : SIMDThreeScalarBHSD<1, 0b00001, "uqadd", int_aarch64_neon_uqadd, uaddsat>; +defm UQRSHL : SIMDThreeScalarBHSD<1, 0b01011, "uqrshl", int_aarch64_neon_uqrshl, int_aarch64_neon_uqrshl>; +defm UQSHL : SIMDThreeScalarBHSD<1, 0b01001, "uqshl", int_aarch64_neon_uqshl, int_aarch64_neon_uqshl>; +defm UQSUB : SIMDThreeScalarBHSD<1, 0b00101, "uqsub", int_aarch64_neon_uqsub, usubsat>; defm URSHL : SIMDThreeScalarD< 1, 0b01010, "urshl", int_aarch64_neon_urshl>; defm USHL : SIMDThreeScalarD< 1, 0b01000, "ushl", int_aarch64_neon_ushl>; let Predicates = [HasRDM] in { diff --git a/llvm/test/CodeGen/AArch64/arm64-vqadd.ll b/llvm/test/CodeGen/AArch64/arm64-vqadd.ll index fa515fe352d8f..ff1fedad43393 100644 --- a/llvm/test/CodeGen/AArch64/arm64-vqadd.ll +++ b/llvm/test/CodeGen/AArch64/arm64-vqadd.ll @@ -45,13 +45,7 @@ define <1 x i64> @sqadd1d(ptr %A, ptr %B) nounwind { ; CHECK: // %bb.0: ; CHECK-NEXT: ldr d0, [x0] ; CHECK-NEXT: ldr d1, [x1] -; CHECK-NEXT: fmov x8, d1 -; CHECK-NEXT: fmov x9, d0 -; CHECK-NEXT: adds x8, x9, x8 -; CHECK-NEXT: asr x9, x8, #63 -; CHECK-NEXT: eor x9, x9, #0x8000000000000000 -; CHECK-NEXT: csel x8, x9, x8, vs -; CHECK-NEXT: fmov d0, x8 +; CHECK-NEXT: sqadd d0, d0, d1 ; CHECK-NEXT: ret %tmp1 = load <1 x i64>, ptr %A %tmp2 = load <1 x i64>, ptr %B @@ -104,11 +98,7 @@ define <1 x i64> @uqadd1d(ptr %A, ptr %B) nounwind { ; CHECK: // %bb.0: ; CHECK-NEXT: ldr d0, [x0] ; CHECK-NEXT: ldr d1, [x1] -; CHECK-NEXT: fmov x8, d1 -; CHECK-NEXT: fmov x9, d0 -; CHECK-NEXT: adds x8, x9, x8 -; CHECK-NEXT: csinv x8, x8, xzr, lo -; CHECK-NEXT: fmov d0, x8 +; CHECK-NEXT: uqadd d0, d0, d1 ; CHECK-NEXT: ret %tmp1 = load <1 x i64>, ptr %A %tmp2 = load <1 x i64>, ptr %B diff --git a/llvm/test/CodeGen/AArch64/arm64-vqsub.ll b/llvm/test/CodeGen/AArch64/arm64-vqsub.ll index ffcb7d668d637..b8168eba8cebb 100644 --- a/llvm/test/CodeGen/AArch64/arm64-vqsub.ll +++ b/llvm/test/CodeGen/AArch64/arm64-vqsub.ll @@ -45,13 +45,7 @@ define <1 x i64> @sqsub1d(ptr %A, ptr %B) nounwind { ; CHECK: // %bb.0: ; CHECK-NEXT: ldr d0, [x0] ; CHECK-NEXT: ldr d1, [x1] -; CHECK-NEXT: fmov x8, d1 -; CHECK-NEXT: fmov x9, d0 -; CHECK-NEXT: subs x8, x9, x8 -; CHECK-NEXT: asr x9, x8, #63 -; CHECK-NEXT: eor x9, x9, #0x8000000000000000 -; CHECK-NEXT: csel x8, x9, x8, vs -; CHECK-NEXT: fmov d0, x8 +; CHECK-NEXT: sqsub d0, d0, d1 ; CHECK-NEXT: ret %tmp1 = load <1 x i64>, ptr %A %tmp2 = load <1 x i64>, ptr %B @@ -104,11 +98,7 @@ define <1 x i64> @uqsub1d(ptr %A, ptr %B) nounwind { ; CHECK: // %bb.0: ; CHECK-NEXT: ldr d0, [x0] ; CHECK-NEXT: ldr d1, [x1] -; CHECK-NEXT: fmov x8, d1 -; CHECK-NEXT: fmov x9, d0 -; CHECK-NEXT: subs x8, x9, x8 -; CHECK-NEXT: csel x8, xzr, x8, lo -; CHECK-NEXT: fmov d0, x8 +; CHECK-NEXT: uqsub d0, d0, d1 ; CHECK-NEXT: ret %tmp1 = load <1 x i64>, ptr %A %tmp2 = load <1 x i64>, ptr %B diff --git a/llvm/test/CodeGen/AArch64/sadd_sat_vec.ll b/llvm/test/CodeGen/AArch64/sadd_sat_vec.ll index 5f0d4c7bffe53..1c4a504d0ab70 100644 --- a/llvm/test/CodeGen/AArch64/sadd_sat_vec.ll +++ b/llvm/test/CodeGen/AArch64/sadd_sat_vec.ll @@ -447,13 +447,9 @@ define <16 x i32> @v16i32(<16 x i32> %x, <16 x i32> %y) nounwind { define void @v1i64(ptr %px, ptr %py, ptr %pz) nounwind { ; CHECK-SD-LABEL: v1i64: ; CHECK-SD: // %bb.0: -; CHECK-SD-NEXT: ldr x8, [x1] -; CHECK-SD-NEXT: ldr x9, [x0] -; CHECK-SD-NEXT: adds x8, x9, x8 -; CHECK-SD-NEXT: asr x9, x8, #63 -; CHECK-SD-NEXT: eor x9, x9, #0x8000000000000000 -; CHECK-SD-NEXT: csel x8, x9, x8, vs -; CHECK-SD-NEXT: fmov d0, x8 +; CHECK-SD-NEXT: ldr d0, [x0] +; CHECK-SD-NEXT: ldr d1, [x1] +; CHECK-SD-NEXT: sqadd d0, d0, d1 ; CHECK-SD-NEXT: str d0, [x2] ; CHECK-SD-NEXT: ret ; diff --git a/llvm/test/CodeGen/AArch64/ssub_sat_vec.ll b/llvm/test/CodeGen/AArch64/ssub_sat_vec.ll index ed79d0158651a..3af858713525b 100644 --- a/llvm/test/CodeGen/AArch64/ssub_sat_vec.ll +++ b/llvm/test/CodeGen/AArch64/ssub_sat_vec.ll @@ -449,13 +449,9 @@ define <16 x i32> @v16i32(<16 x i32> %x, <16 x i32> %y) nounwind { define void @v1i64(ptr %px, ptr %py, ptr %pz) nounwind { ; CHECK-SD-LABEL: v1i64: ; CHECK-SD: // %bb.0: -; CHECK-SD-NEXT: ldr x8, [x1] -; CHECK-SD-NEXT: ldr x9, [x0] -; CHECK-SD-NEXT: subs x8, x9, x8 -; CHECK-SD-NEXT: asr x9, x8, #63 -; CHECK-SD-NEXT: eor x9, x9, #0x8000000000000000 -; CHECK-SD-NEXT: csel x8, x9, x8, vs -; CHECK-SD-NEXT: fmov d0, x8 +; CHECK-SD-NEXT: ldr d0, [x0] +; CHECK-SD-NEXT: ldr d1, [x1] +; CHECK-SD-NEXT: sqsub d0, d0, d1 ; CHECK-SD-NEXT: str d0, [x2] ; CHECK-SD-NEXT: ret ; diff --git a/llvm/test/CodeGen/AArch64/uadd_sat_vec.ll b/llvm/test/CodeGen/AArch64/uadd_sat_vec.ll index dcfb5176db12d..3cfb24aaccb11 100644 --- a/llvm/test/CodeGen/AArch64/uadd_sat_vec.ll +++ b/llvm/test/CodeGen/AArch64/uadd_sat_vec.ll @@ -439,11 +439,9 @@ define <16 x i32> @v16i32(<16 x i32> %x, <16 x i32> %y) nounwind { define void @v1i64(ptr %px, ptr %py, ptr %pz) nounwind { ; CHECK-SD-LABEL: v1i64: ; CHECK-SD: // %bb.0: -; CHECK-SD-NEXT: ldr x8, [x1] -; CHECK-SD-NEXT: ldr x9, [x0] -; CHECK-SD-NEXT: adds x8, x9, x8 -; CHECK-SD-NEXT: csinv x8, x8, xzr, lo -; CHECK-SD-NEXT: fmov d0, x8 +; CHECK-SD-NEXT: ldr d0, [x0] +; CHECK-SD-NEXT: ldr d1, [x1] +; CHECK-SD-NEXT: uqadd d0, d0, d1 ; CHECK-SD-NEXT: str d0, [x2] ; CHECK-SD-NEXT: ret ; diff --git a/llvm/test/CodeGen/AArch64/usub_sat_vec.ll b/llvm/test/CodeGen/AArch64/usub_sat_vec.ll index 0049aba62d27f..a71cf95a728db 100644 --- a/llvm/test/CodeGen/AArch64/usub_sat_vec.ll +++ b/llvm/test/CodeGen/AArch64/usub_sat_vec.ll @@ -436,11 +436,9 @@ define <16 x i32> @v16i32(<16 x i32> %x, <16 x i32> %y) nounwind { define void @v1i64(ptr %px, ptr %py, ptr %pz) nounwind { ; CHECK-SD-LABEL: v1i64: ; CHECK-SD: // %bb.0: -; CHECK-SD-NEXT: ldr x8, [x1] -; CHECK-SD-NEXT: ldr x9, [x0] -; CHECK-SD-NEXT: subs x8, x9, x8 -; CHECK-SD-NEXT: csel x8, xzr, x8, lo -; CHECK-SD-NEXT: fmov d0, x8 +; CHECK-SD-NEXT: ldr d0, [x0] +; CHECK-SD-NEXT: ldr d1, [x1] +; CHECK-SD-NEXT: uqsub d0, d0, d1 ; CHECK-SD-NEXT: str d0, [x2] ; CHECK-SD-NEXT: ret ;