From 3bbc260533b1338eb60af22af6f7b22334bd9e96 Mon Sep 17 00:00:00 2001 From: Farzon Lotfi Date: Fri, 14 Jun 2024 02:57:16 -0400 Subject: [PATCH 1/2] [CodeGen] Support SLPVectorizer cases of tan across all backends Add a default f16 type promotion --- .../include/llvm/Analysis/TargetLibraryInfo.h | 3 + .../llvm/Analysis/TargetTransformInfoImpl.h | 11 +- llvm/lib/Analysis/ValueTracking.cpp | 4 + llvm/lib/CodeGen/TargetLoweringBase.cpp | 3 +- llvm/test/CodeGen/RISCV/half-intrinsics.ll | 120 ++++++++++++++++++ .../CodeGen/WebAssembly/simd-unsupported.ll | 16 +++ .../Transforms/LoopVectorize/intrinsic.ll | 54 ++++++++ ...ccelerate-vector-functions-inseltpoison.ll | 12 +- .../AArch64/accelerate-vector-functions.ll | 12 +- .../test/Transforms/SLPVectorizer/X86/call.ll | 19 +++ 10 files changed, 235 insertions(+), 19 deletions(-) diff --git a/llvm/include/llvm/Analysis/TargetLibraryInfo.h b/llvm/include/llvm/Analysis/TargetLibraryInfo.h index f5da222d11f55..bb882f02a6e43 100644 --- a/llvm/include/llvm/Analysis/TargetLibraryInfo.h +++ b/llvm/include/llvm/Analysis/TargetLibraryInfo.h @@ -415,10 +415,12 @@ class TargetLibraryInfo { return false; switch (F) { default: break; + // clang-format off case LibFunc_copysign: case LibFunc_copysignf: case LibFunc_copysignl: case LibFunc_fabs: case LibFunc_fabsf: case LibFunc_fabsl: case LibFunc_sin: case LibFunc_sinf: case LibFunc_sinl: case LibFunc_cos: case LibFunc_cosf: case LibFunc_cosl: + case LibFunc_tan: case LibFunc_tanf: case LibFunc_tanl: case LibFunc_sqrt: case LibFunc_sqrtf: case LibFunc_sqrtl: case LibFunc_sqrt_finite: case LibFunc_sqrtf_finite: case LibFunc_sqrtl_finite: @@ -437,6 +439,7 @@ class TargetLibraryInfo { case LibFunc_memcmp: case LibFunc_bcmp: case LibFunc_strcmp: case LibFunc_strcpy: case LibFunc_stpcpy: case LibFunc_strlen: case LibFunc_strnlen: case LibFunc_memchr: case LibFunc_mempcpy: + // clang-format on return true; } return false; diff --git a/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h b/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h index 7828bdc1f1f43..b1d426830f0da 100644 --- a/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h +++ b/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h @@ -156,14 +156,17 @@ class TargetTransformInfoImplBase { StringRef Name = F->getName(); // These will all likely lower to a single selection DAG node. + // clang-format off if (Name == "copysign" || Name == "copysignf" || Name == "copysignl" || - Name == "fabs" || Name == "fabsf" || Name == "fabsl" || Name == "sin" || + Name == "fabs" || Name == "fabsf" || Name == "fabsl" || Name == "fmin" || Name == "fminf" || Name == "fminl" || Name == "fmax" || Name == "fmaxf" || Name == "fmaxl" || - Name == "sinf" || Name == "sinl" || Name == "cos" || Name == "cosf" || - Name == "cosl" || Name == "sqrt" || Name == "sqrtf" || Name == "sqrtl") + Name == "sin" || Name == "sinf" || Name == "sinl" || + Name == "cos" || Name == "cosf" || Name == "cosl" || + Name == "tan" || Name == "tanf" || Name == "tanl" || + Name == "sqrt" || Name == "sqrtf" || Name == "sqrtl") return false; - + // clang-format on // These are all likely to be optimized into something smaller. if (Name == "pow" || Name == "powf" || Name == "powl" || Name == "exp2" || Name == "exp2l" || Name == "exp2f" || Name == "floor" || diff --git a/llvm/lib/Analysis/ValueTracking.cpp b/llvm/lib/Analysis/ValueTracking.cpp index 8126d2a1acc27..0a477082a7d12 100644 --- a/llvm/lib/Analysis/ValueTracking.cpp +++ b/llvm/lib/Analysis/ValueTracking.cpp @@ -3993,6 +3993,10 @@ Intrinsic::ID llvm::getIntrinsicForCallSite(const CallBase &CB, case LibFunc_cosf: case LibFunc_cosl: return Intrinsic::cos; + case LibFunc_tan: + case LibFunc_tanf: + case LibFunc_tanl: + return Intrinsic::tan; case LibFunc_exp: case LibFunc_expf: case LibFunc_expl: diff --git a/llvm/lib/CodeGen/TargetLoweringBase.cpp b/llvm/lib/CodeGen/TargetLoweringBase.cpp index 8240a1fd7e2ff..de534994fa48c 100644 --- a/llvm/lib/CodeGen/TargetLoweringBase.cpp +++ b/llvm/lib/CodeGen/TargetLoweringBase.cpp @@ -961,7 +961,7 @@ void TargetLoweringBase::initActions() { setOperationAction( {ISD::FCOPYSIGN, ISD::SIGN_EXTEND_INREG, ISD::ANY_EXTEND_VECTOR_INREG, ISD::SIGN_EXTEND_VECTOR_INREG, ISD::ZERO_EXTEND_VECTOR_INREG, - ISD::SPLAT_VECTOR, ISD::LRINT, ISD::LLRINT}, + ISD::SPLAT_VECTOR, ISD::LRINT, ISD::LLRINT, ISD::FTAN}, VT, Expand); // Constrained floating-point operations default to expand. @@ -1020,6 +1020,7 @@ void TargetLoweringBase::initActions() { ISD::FTAN}, {MVT::f32, MVT::f64, MVT::f128}, Expand); + setOperationAction(ISD::FTAN, MVT::f16, Promote); // Default ISD::TRAP to expand (which turns it into abort). setOperationAction(ISD::TRAP, MVT::Other, Expand); diff --git a/llvm/test/CodeGen/RISCV/half-intrinsics.ll b/llvm/test/CodeGen/RISCV/half-intrinsics.ll index c493a9b2cb1df..bfc26b0d65980 100644 --- a/llvm/test/CodeGen/RISCV/half-intrinsics.ll +++ b/llvm/test/CodeGen/RISCV/half-intrinsics.ll @@ -2862,3 +2862,123 @@ define i1 @isnan_d_fpclass(half %x) { %1 = call i1 @llvm.is.fpclass.f16(half %x, i32 3) ; nan ret i1 %1 } + +declare half @llvm.tan.f16(half) + +define half @tan_f16(half %a) nounwind { +; RV32IZFH-LABEL: tan_f16: +; RV32IZFH: # %bb.0: +; RV32IZFH-NEXT: addi sp, sp, -16 +; RV32IZFH-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32IZFH-NEXT: fcvt.s.h fa0, fa0 +; RV32IZFH-NEXT: call tanf +; RV32IZFH-NEXT: fcvt.h.s fa0, fa0 +; RV32IZFH-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32IZFH-NEXT: addi sp, sp, 16 +; RV32IZFH-NEXT: ret +; +; RV64IZFH-LABEL: tan_f16: +; RV64IZFH: # %bb.0: +; RV64IZFH-NEXT: addi sp, sp, -16 +; RV64IZFH-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64IZFH-NEXT: fcvt.s.h fa0, fa0 +; RV64IZFH-NEXT: call tanf +; RV64IZFH-NEXT: fcvt.h.s fa0, fa0 +; RV64IZFH-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64IZFH-NEXT: addi sp, sp, 16 +; RV64IZFH-NEXT: ret +; +; RV32IZHINX-LABEL: tan_f16: +; RV32IZHINX: # %bb.0: +; RV32IZHINX-NEXT: addi sp, sp, -16 +; RV32IZHINX-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32IZHINX-NEXT: fcvt.s.h a0, a0 +; RV32IZHINX-NEXT: call tanf +; RV32IZHINX-NEXT: fcvt.h.s a0, a0 +; RV32IZHINX-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32IZHINX-NEXT: addi sp, sp, 16 +; RV32IZHINX-NEXT: ret +; +; RV64IZHINX-LABEL: tan_f16: +; RV64IZHINX: # %bb.0: +; RV64IZHINX-NEXT: addi sp, sp, -16 +; RV64IZHINX-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64IZHINX-NEXT: fcvt.s.h a0, a0 +; RV64IZHINX-NEXT: call tanf +; RV64IZHINX-NEXT: fcvt.h.s a0, a0 +; RV64IZHINX-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64IZHINX-NEXT: addi sp, sp, 16 +; RV64IZHINX-NEXT: ret +; +; RV32I-LABEL: tan_f16: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: slli a0, a0, 16 +; RV32I-NEXT: srli a0, a0, 16 +; RV32I-NEXT: call __extendhfsf2 +; RV32I-NEXT: call tanf +; RV32I-NEXT: call __truncsfhf2 +; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: ret +; +; RV64I-LABEL: tan_f16: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -16 +; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: slli a0, a0, 48 +; RV64I-NEXT: srli a0, a0, 48 +; RV64I-NEXT: call __extendhfsf2 +; RV64I-NEXT: call tanf +; RV64I-NEXT: call __truncsfhf2 +; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: ret +; +; RV32IZFHMIN-LABEL: tan_f16: +; RV32IZFHMIN: # %bb.0: +; RV32IZFHMIN-NEXT: addi sp, sp, -16 +; RV32IZFHMIN-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32IZFHMIN-NEXT: fcvt.s.h fa0, fa0 +; RV32IZFHMIN-NEXT: call tanf +; RV32IZFHMIN-NEXT: fcvt.h.s fa0, fa0 +; RV32IZFHMIN-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32IZFHMIN-NEXT: addi sp, sp, 16 +; RV32IZFHMIN-NEXT: ret +; +; RV64IZFHMIN-LABEL: tan_f16: +; RV64IZFHMIN: # %bb.0: +; RV64IZFHMIN-NEXT: addi sp, sp, -16 +; RV64IZFHMIN-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64IZFHMIN-NEXT: fcvt.s.h fa0, fa0 +; RV64IZFHMIN-NEXT: call tanf +; RV64IZFHMIN-NEXT: fcvt.h.s fa0, fa0 +; RV64IZFHMIN-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64IZFHMIN-NEXT: addi sp, sp, 16 +; RV64IZFHMIN-NEXT: ret +; +; RV32IZHINXMIN-LABEL: tan_f16: +; RV32IZHINXMIN: # %bb.0: +; RV32IZHINXMIN-NEXT: addi sp, sp, -16 +; RV32IZHINXMIN-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32IZHINXMIN-NEXT: fcvt.s.h a0, a0 +; RV32IZHINXMIN-NEXT: call tanf +; RV32IZHINXMIN-NEXT: fcvt.h.s a0, a0 +; RV32IZHINXMIN-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32IZHINXMIN-NEXT: addi sp, sp, 16 +; RV32IZHINXMIN-NEXT: ret +; +; RV64IZHINXMIN-LABEL: tan_f16: +; RV64IZHINXMIN: # %bb.0: +; RV64IZHINXMIN-NEXT: addi sp, sp, -16 +; RV64IZHINXMIN-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64IZHINXMIN-NEXT: fcvt.s.h a0, a0 +; RV64IZHINXMIN-NEXT: call tanf +; RV64IZHINXMIN-NEXT: fcvt.h.s a0, a0 +; RV64IZHINXMIN-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64IZHINXMIN-NEXT: addi sp, sp, 16 +; RV64IZHINXMIN-NEXT: ret + %1 = call half @llvm.tan.f16(half %a) + ret half %1 +} diff --git a/llvm/test/CodeGen/WebAssembly/simd-unsupported.ll b/llvm/test/CodeGen/WebAssembly/simd-unsupported.ll index d214a3af5a151..1d6e073271efa 100644 --- a/llvm/test/CodeGen/WebAssembly/simd-unsupported.ll +++ b/llvm/test/CodeGen/WebAssembly/simd-unsupported.ll @@ -377,6 +377,14 @@ define <4 x float> @cos_v4f32(<4 x float> %x) { ret <4 x float> %v } +; CHECK-LABEL: tan_v4f32: +; CHECK: call $push[[L:[0-9]+]]=, tanf +declare <4 x float> @llvm.tan.v4f32(<4 x float>) +define <4 x float> @tan_v4f32(<4 x float> %x) { + %v = call <4 x float> @llvm.tan.v4f32(<4 x float> %x) + ret <4 x float> %v +} + ; CHECK-LABEL: powi_v4f32: ; CHECK: call $push[[L:[0-9]+]]=, __powisf2 declare <4 x float> @llvm.powi.v4f32.i32(<4 x float>, i32) @@ -469,6 +477,14 @@ define <2 x double> @cos_v2f64(<2 x double> %x) { ret <2 x double> %v } +; CHECK-LABEL: tan_v2f64: +; CHECK: call $push[[L:[0-9]+]]=, tan +declare <2 x double> @llvm.tan.v2f64(<2 x double>) +define <2 x double> @tan_v2f64(<2 x double> %x) { + %v = call <2 x double> @llvm.tan.v2f64(<2 x double> %x) + ret <2 x double> %v +} + ; CHECK-LABEL: powi_v2f64: ; CHECK: call $push[[L:[0-9]+]]=, __powidf2 declare <2 x double> @llvm.powi.v2f64.i32(<2 x double>, i32) diff --git a/llvm/test/Transforms/LoopVectorize/intrinsic.ll b/llvm/test/Transforms/LoopVectorize/intrinsic.ll index 0f070347dd4ef..9c910d70807a1 100644 --- a/llvm/test/Transforms/LoopVectorize/intrinsic.ll +++ b/llvm/test/Transforms/LoopVectorize/intrinsic.ll @@ -162,6 +162,60 @@ for.end: ; preds = %for.body, %entry declare double @llvm.cos.f64(double) +define void @tan_f32(i32 %n, ptr %y, ptr %x) { +; CHECK-LABEL: @tan_f32( +; CHECK: llvm.tan.v4f32 +; CHECK: ret void +; +entry: + %cmp6 = icmp sgt i32 %n, 0 + br i1 %cmp6, label %for.body, label %for.end + +for.body: ; preds = %entry, %for.body + %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ] + %arrayidx = getelementptr inbounds float, ptr %y, i64 %indvars.iv + %0 = load float, ptr %arrayidx, align 4 + %call = tail call float @llvm.tan.f32(float %0) + %arrayidx2 = getelementptr inbounds float, ptr %x, i64 %indvars.iv + store float %call, ptr %arrayidx2, align 4 + %indvars.iv.next = add i64 %indvars.iv, 1 + %lftr.wideiv = trunc i64 %indvars.iv.next to i32 + %exitcond = icmp eq i32 %lftr.wideiv, %n + br i1 %exitcond, label %for.end, label %for.body + +for.end: ; preds = %for.body, %entry + ret void +} + +declare float @llvm.tan.f32(float) + +define void @tan_f64(i32 %n, ptr %y, ptr %x) { +; CHECK-LABEL: @tan_f64( +; CHECK: llvm.tan.v4f64 +; CHECK: ret void +; +entry: + %cmp6 = icmp sgt i32 %n, 0 + br i1 %cmp6, label %for.body, label %for.end + +for.body: ; preds = %entry, %for.body + %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ] + %arrayidx = getelementptr inbounds double, ptr %y, i64 %indvars.iv + %0 = load double, ptr %arrayidx, align 8 + %call = tail call double @llvm.tan.f64(double %0) + %arrayidx2 = getelementptr inbounds double, ptr %x, i64 %indvars.iv + store double %call, ptr %arrayidx2, align 8 + %indvars.iv.next = add i64 %indvars.iv, 1 + %lftr.wideiv = trunc i64 %indvars.iv.next to i32 + %exitcond = icmp eq i32 %lftr.wideiv, %n + br i1 %exitcond, label %for.end, label %for.body + +for.end: ; preds = %for.body, %entry + ret void +} + +declare double @llvm.tan.f64(double) + define void @exp_f32(i32 %n, ptr %y, ptr %x) { ; CHECK-LABEL: @exp_f32( ; CHECK: llvm.exp.v4f32 diff --git a/llvm/test/Transforms/SLPVectorizer/AArch64/accelerate-vector-functions-inseltpoison.ll b/llvm/test/Transforms/SLPVectorizer/AArch64/accelerate-vector-functions-inseltpoison.ll index 6db27e597a63f..eae38295ba08c 100644 --- a/llvm/test/Transforms/SLPVectorizer/AArch64/accelerate-vector-functions-inseltpoison.ll +++ b/llvm/test/Transforms/SLPVectorizer/AArch64/accelerate-vector-functions-inseltpoison.ll @@ -548,13 +548,11 @@ define <4 x float> @tan_4x(ptr %a) { ; NOACCELERATE-NEXT: [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1 ; NOACCELERATE-NEXT: [[TMP2:%.*]] = tail call fast float @tanf(float [[VECEXT_1]]) ; NOACCELERATE-NEXT: [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1 -; NOACCELERATE-NEXT: [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2 -; NOACCELERATE-NEXT: [[TMP3:%.*]] = tail call fast float @tanf(float [[VECEXT_2]]) -; NOACCELERATE-NEXT: [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2 -; NOACCELERATE-NEXT: [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3 -; NOACCELERATE-NEXT: [[TMP4:%.*]] = tail call fast float @tanf(float [[VECEXT_3]]) -; NOACCELERATE-NEXT: [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3 -; NOACCELERATE-NEXT: ret <4 x float> [[VECINS_3]] +; NOACCELERATE-NEXT: [[TMP3:%.*]] = shufflevector <4 x float> [[TMP0]], <4 x float> poison, <2 x i32> +; NOACCELERATE-NEXT: [[TMP4:%.*]] = call fast <2 x float> @llvm.tan.v2f32(<2 x float> [[TMP3]]) +; NOACCELERATE-NEXT: [[TMP5:%.*]] = shufflevector <2 x float> [[TMP4]], <2 x float> poison, <4 x i32> +; NOACCELERATE-NEXT: [[VECINS_31:%.*]] = shufflevector <4 x float> [[VECINS_1]], <4 x float> [[TMP5]], <4 x i32> +; NOACCELERATE-NEXT: ret <4 x float> [[VECINS_31]] ; entry: %0 = load <4 x float>, ptr %a, align 16 diff --git a/llvm/test/Transforms/SLPVectorizer/AArch64/accelerate-vector-functions.ll b/llvm/test/Transforms/SLPVectorizer/AArch64/accelerate-vector-functions.ll index 24e16deacb3af..5e2dd305f0557 100644 --- a/llvm/test/Transforms/SLPVectorizer/AArch64/accelerate-vector-functions.ll +++ b/llvm/test/Transforms/SLPVectorizer/AArch64/accelerate-vector-functions.ll @@ -548,13 +548,11 @@ define <4 x float> @tan_4x(ptr %a) { ; NOACCELERATE-NEXT: [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1 ; NOACCELERATE-NEXT: [[TMP2:%.*]] = tail call fast float @tanf(float [[VECEXT_1]]) ; NOACCELERATE-NEXT: [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1 -; NOACCELERATE-NEXT: [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2 -; NOACCELERATE-NEXT: [[TMP3:%.*]] = tail call fast float @tanf(float [[VECEXT_2]]) -; NOACCELERATE-NEXT: [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2 -; NOACCELERATE-NEXT: [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3 -; NOACCELERATE-NEXT: [[TMP4:%.*]] = tail call fast float @tanf(float [[VECEXT_3]]) -; NOACCELERATE-NEXT: [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3 -; NOACCELERATE-NEXT: ret <4 x float> [[VECINS_3]] +; NOACCELERATE-NEXT: [[TMP3:%.*]] = shufflevector <4 x float> [[TMP0]], <4 x float> poison, <2 x i32> +; NOACCELERATE-NEXT: [[TMP4:%.*]] = call fast <2 x float> @llvm.tan.v2f32(<2 x float> [[TMP3]]) +; NOACCELERATE-NEXT: [[TMP5:%.*]] = shufflevector <2 x float> [[TMP4]], <2 x float> poison, <4 x i32> +; NOACCELERATE-NEXT: [[VECINS_31:%.*]] = shufflevector <4 x float> [[VECINS_1]], <4 x float> [[TMP5]], <4 x i32> +; NOACCELERATE-NEXT: ret <4 x float> [[VECINS_31]] ; entry: %0 = load <4 x float>, ptr %a, align 16 diff --git a/llvm/test/Transforms/SLPVectorizer/X86/call.ll b/llvm/test/Transforms/SLPVectorizer/X86/call.ll index 4181148a4d829..8835e3b144be6 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/call.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/call.ll @@ -6,6 +6,7 @@ target triple = "x86_64-apple-macosx10.8.0" declare double @sin(double) nounwind willreturn declare double @cos(double) nounwind willreturn +declare double @tan(double) nounwind willreturn declare double @pow(double, double) nounwind willreturn declare double @exp2(double) nounwind willreturn declare double @sqrt(double) nounwind willreturn @@ -48,6 +49,24 @@ define void @cos_libm(ptr %a, ptr %b) { ret void } +define void @tan_libm(ptr %a, ptr %b) { +; CHECK-LABEL: @tan_libm( +; CHECK-NEXT: [[TMP2:%.*]] = load <2 x double>, ptr [[A:%.*]], align 8 +; CHECK-NEXT: [[TMP3:%.*]] = call <2 x double> @llvm.tan.v2f64(<2 x double> [[TMP2]]) +; CHECK-NEXT: store <2 x double> [[TMP3]], ptr [[B:%.*]], align 8 +; CHECK-NEXT: ret void +; + %a0 = load double, ptr %a, align 8 + %idx1 = getelementptr inbounds double, ptr %a, i64 1 + %a1 = load double, ptr %idx1, align 8 + %tan1 = tail call double @tan(double %a0) nounwind readnone + %tan2 = tail call double @tan(double %a1) nounwind readnone + store double %tan1, ptr %b, align 8 + %idx2 = getelementptr inbounds double, ptr %b, i64 1 + store double %tan2, ptr %idx2, align 8 + ret void +} + define void @pow_libm(ptr %a, ptr %b) { ; CHECK-LABEL: @pow_libm( ; CHECK-NEXT: [[TMP2:%.*]] = load <2 x double>, ptr [[A:%.*]], align 8 From 62d5393c6fed4029996e14ba4ee30eceb143a017 Mon Sep 17 00:00:00 2001 From: Farzon Lotfi Date: Fri, 14 Jun 2024 05:13:18 -0400 Subject: [PATCH 2/2] remove target specific changes --- llvm/lib/CodeGen/TargetLoweringBase.cpp | 3 +- llvm/test/CodeGen/RISCV/half-intrinsics.ll | 120 ------------------ .../CodeGen/WebAssembly/simd-unsupported.ll | 16 --- .../Transforms/LoopVectorize/intrinsic.ll | 54 -------- 4 files changed, 1 insertion(+), 192 deletions(-) diff --git a/llvm/lib/CodeGen/TargetLoweringBase.cpp b/llvm/lib/CodeGen/TargetLoweringBase.cpp index de534994fa48c..8240a1fd7e2ff 100644 --- a/llvm/lib/CodeGen/TargetLoweringBase.cpp +++ b/llvm/lib/CodeGen/TargetLoweringBase.cpp @@ -961,7 +961,7 @@ void TargetLoweringBase::initActions() { setOperationAction( {ISD::FCOPYSIGN, ISD::SIGN_EXTEND_INREG, ISD::ANY_EXTEND_VECTOR_INREG, ISD::SIGN_EXTEND_VECTOR_INREG, ISD::ZERO_EXTEND_VECTOR_INREG, - ISD::SPLAT_VECTOR, ISD::LRINT, ISD::LLRINT, ISD::FTAN}, + ISD::SPLAT_VECTOR, ISD::LRINT, ISD::LLRINT}, VT, Expand); // Constrained floating-point operations default to expand. @@ -1020,7 +1020,6 @@ void TargetLoweringBase::initActions() { ISD::FTAN}, {MVT::f32, MVT::f64, MVT::f128}, Expand); - setOperationAction(ISD::FTAN, MVT::f16, Promote); // Default ISD::TRAP to expand (which turns it into abort). setOperationAction(ISD::TRAP, MVT::Other, Expand); diff --git a/llvm/test/CodeGen/RISCV/half-intrinsics.ll b/llvm/test/CodeGen/RISCV/half-intrinsics.ll index bfc26b0d65980..c493a9b2cb1df 100644 --- a/llvm/test/CodeGen/RISCV/half-intrinsics.ll +++ b/llvm/test/CodeGen/RISCV/half-intrinsics.ll @@ -2862,123 +2862,3 @@ define i1 @isnan_d_fpclass(half %x) { %1 = call i1 @llvm.is.fpclass.f16(half %x, i32 3) ; nan ret i1 %1 } - -declare half @llvm.tan.f16(half) - -define half @tan_f16(half %a) nounwind { -; RV32IZFH-LABEL: tan_f16: -; RV32IZFH: # %bb.0: -; RV32IZFH-NEXT: addi sp, sp, -16 -; RV32IZFH-NEXT: sw ra, 12(sp) # 4-byte Folded Spill -; RV32IZFH-NEXT: fcvt.s.h fa0, fa0 -; RV32IZFH-NEXT: call tanf -; RV32IZFH-NEXT: fcvt.h.s fa0, fa0 -; RV32IZFH-NEXT: lw ra, 12(sp) # 4-byte Folded Reload -; RV32IZFH-NEXT: addi sp, sp, 16 -; RV32IZFH-NEXT: ret -; -; RV64IZFH-LABEL: tan_f16: -; RV64IZFH: # %bb.0: -; RV64IZFH-NEXT: addi sp, sp, -16 -; RV64IZFH-NEXT: sd ra, 8(sp) # 8-byte Folded Spill -; RV64IZFH-NEXT: fcvt.s.h fa0, fa0 -; RV64IZFH-NEXT: call tanf -; RV64IZFH-NEXT: fcvt.h.s fa0, fa0 -; RV64IZFH-NEXT: ld ra, 8(sp) # 8-byte Folded Reload -; RV64IZFH-NEXT: addi sp, sp, 16 -; RV64IZFH-NEXT: ret -; -; RV32IZHINX-LABEL: tan_f16: -; RV32IZHINX: # %bb.0: -; RV32IZHINX-NEXT: addi sp, sp, -16 -; RV32IZHINX-NEXT: sw ra, 12(sp) # 4-byte Folded Spill -; RV32IZHINX-NEXT: fcvt.s.h a0, a0 -; RV32IZHINX-NEXT: call tanf -; RV32IZHINX-NEXT: fcvt.h.s a0, a0 -; RV32IZHINX-NEXT: lw ra, 12(sp) # 4-byte Folded Reload -; RV32IZHINX-NEXT: addi sp, sp, 16 -; RV32IZHINX-NEXT: ret -; -; RV64IZHINX-LABEL: tan_f16: -; RV64IZHINX: # %bb.0: -; RV64IZHINX-NEXT: addi sp, sp, -16 -; RV64IZHINX-NEXT: sd ra, 8(sp) # 8-byte Folded Spill -; RV64IZHINX-NEXT: fcvt.s.h a0, a0 -; RV64IZHINX-NEXT: call tanf -; RV64IZHINX-NEXT: fcvt.h.s a0, a0 -; RV64IZHINX-NEXT: ld ra, 8(sp) # 8-byte Folded Reload -; RV64IZHINX-NEXT: addi sp, sp, 16 -; RV64IZHINX-NEXT: ret -; -; RV32I-LABEL: tan_f16: -; RV32I: # %bb.0: -; RV32I-NEXT: addi sp, sp, -16 -; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill -; RV32I-NEXT: slli a0, a0, 16 -; RV32I-NEXT: srli a0, a0, 16 -; RV32I-NEXT: call __extendhfsf2 -; RV32I-NEXT: call tanf -; RV32I-NEXT: call __truncsfhf2 -; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload -; RV32I-NEXT: addi sp, sp, 16 -; RV32I-NEXT: ret -; -; RV64I-LABEL: tan_f16: -; RV64I: # %bb.0: -; RV64I-NEXT: addi sp, sp, -16 -; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill -; RV64I-NEXT: slli a0, a0, 48 -; RV64I-NEXT: srli a0, a0, 48 -; RV64I-NEXT: call __extendhfsf2 -; RV64I-NEXT: call tanf -; RV64I-NEXT: call __truncsfhf2 -; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload -; RV64I-NEXT: addi sp, sp, 16 -; RV64I-NEXT: ret -; -; RV32IZFHMIN-LABEL: tan_f16: -; RV32IZFHMIN: # %bb.0: -; RV32IZFHMIN-NEXT: addi sp, sp, -16 -; RV32IZFHMIN-NEXT: sw ra, 12(sp) # 4-byte Folded Spill -; RV32IZFHMIN-NEXT: fcvt.s.h fa0, fa0 -; RV32IZFHMIN-NEXT: call tanf -; RV32IZFHMIN-NEXT: fcvt.h.s fa0, fa0 -; RV32IZFHMIN-NEXT: lw ra, 12(sp) # 4-byte Folded Reload -; RV32IZFHMIN-NEXT: addi sp, sp, 16 -; RV32IZFHMIN-NEXT: ret -; -; RV64IZFHMIN-LABEL: tan_f16: -; RV64IZFHMIN: # %bb.0: -; RV64IZFHMIN-NEXT: addi sp, sp, -16 -; RV64IZFHMIN-NEXT: sd ra, 8(sp) # 8-byte Folded Spill -; RV64IZFHMIN-NEXT: fcvt.s.h fa0, fa0 -; RV64IZFHMIN-NEXT: call tanf -; RV64IZFHMIN-NEXT: fcvt.h.s fa0, fa0 -; RV64IZFHMIN-NEXT: ld ra, 8(sp) # 8-byte Folded Reload -; RV64IZFHMIN-NEXT: addi sp, sp, 16 -; RV64IZFHMIN-NEXT: ret -; -; RV32IZHINXMIN-LABEL: tan_f16: -; RV32IZHINXMIN: # %bb.0: -; RV32IZHINXMIN-NEXT: addi sp, sp, -16 -; RV32IZHINXMIN-NEXT: sw ra, 12(sp) # 4-byte Folded Spill -; RV32IZHINXMIN-NEXT: fcvt.s.h a0, a0 -; RV32IZHINXMIN-NEXT: call tanf -; RV32IZHINXMIN-NEXT: fcvt.h.s a0, a0 -; RV32IZHINXMIN-NEXT: lw ra, 12(sp) # 4-byte Folded Reload -; RV32IZHINXMIN-NEXT: addi sp, sp, 16 -; RV32IZHINXMIN-NEXT: ret -; -; RV64IZHINXMIN-LABEL: tan_f16: -; RV64IZHINXMIN: # %bb.0: -; RV64IZHINXMIN-NEXT: addi sp, sp, -16 -; RV64IZHINXMIN-NEXT: sd ra, 8(sp) # 8-byte Folded Spill -; RV64IZHINXMIN-NEXT: fcvt.s.h a0, a0 -; RV64IZHINXMIN-NEXT: call tanf -; RV64IZHINXMIN-NEXT: fcvt.h.s a0, a0 -; RV64IZHINXMIN-NEXT: ld ra, 8(sp) # 8-byte Folded Reload -; RV64IZHINXMIN-NEXT: addi sp, sp, 16 -; RV64IZHINXMIN-NEXT: ret - %1 = call half @llvm.tan.f16(half %a) - ret half %1 -} diff --git a/llvm/test/CodeGen/WebAssembly/simd-unsupported.ll b/llvm/test/CodeGen/WebAssembly/simd-unsupported.ll index 1d6e073271efa..d214a3af5a151 100644 --- a/llvm/test/CodeGen/WebAssembly/simd-unsupported.ll +++ b/llvm/test/CodeGen/WebAssembly/simd-unsupported.ll @@ -377,14 +377,6 @@ define <4 x float> @cos_v4f32(<4 x float> %x) { ret <4 x float> %v } -; CHECK-LABEL: tan_v4f32: -; CHECK: call $push[[L:[0-9]+]]=, tanf -declare <4 x float> @llvm.tan.v4f32(<4 x float>) -define <4 x float> @tan_v4f32(<4 x float> %x) { - %v = call <4 x float> @llvm.tan.v4f32(<4 x float> %x) - ret <4 x float> %v -} - ; CHECK-LABEL: powi_v4f32: ; CHECK: call $push[[L:[0-9]+]]=, __powisf2 declare <4 x float> @llvm.powi.v4f32.i32(<4 x float>, i32) @@ -477,14 +469,6 @@ define <2 x double> @cos_v2f64(<2 x double> %x) { ret <2 x double> %v } -; CHECK-LABEL: tan_v2f64: -; CHECK: call $push[[L:[0-9]+]]=, tan -declare <2 x double> @llvm.tan.v2f64(<2 x double>) -define <2 x double> @tan_v2f64(<2 x double> %x) { - %v = call <2 x double> @llvm.tan.v2f64(<2 x double> %x) - ret <2 x double> %v -} - ; CHECK-LABEL: powi_v2f64: ; CHECK: call $push[[L:[0-9]+]]=, __powidf2 declare <2 x double> @llvm.powi.v2f64.i32(<2 x double>, i32) diff --git a/llvm/test/Transforms/LoopVectorize/intrinsic.ll b/llvm/test/Transforms/LoopVectorize/intrinsic.ll index 9c910d70807a1..0f070347dd4ef 100644 --- a/llvm/test/Transforms/LoopVectorize/intrinsic.ll +++ b/llvm/test/Transforms/LoopVectorize/intrinsic.ll @@ -162,60 +162,6 @@ for.end: ; preds = %for.body, %entry declare double @llvm.cos.f64(double) -define void @tan_f32(i32 %n, ptr %y, ptr %x) { -; CHECK-LABEL: @tan_f32( -; CHECK: llvm.tan.v4f32 -; CHECK: ret void -; -entry: - %cmp6 = icmp sgt i32 %n, 0 - br i1 %cmp6, label %for.body, label %for.end - -for.body: ; preds = %entry, %for.body - %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ] - %arrayidx = getelementptr inbounds float, ptr %y, i64 %indvars.iv - %0 = load float, ptr %arrayidx, align 4 - %call = tail call float @llvm.tan.f32(float %0) - %arrayidx2 = getelementptr inbounds float, ptr %x, i64 %indvars.iv - store float %call, ptr %arrayidx2, align 4 - %indvars.iv.next = add i64 %indvars.iv, 1 - %lftr.wideiv = trunc i64 %indvars.iv.next to i32 - %exitcond = icmp eq i32 %lftr.wideiv, %n - br i1 %exitcond, label %for.end, label %for.body - -for.end: ; preds = %for.body, %entry - ret void -} - -declare float @llvm.tan.f32(float) - -define void @tan_f64(i32 %n, ptr %y, ptr %x) { -; CHECK-LABEL: @tan_f64( -; CHECK: llvm.tan.v4f64 -; CHECK: ret void -; -entry: - %cmp6 = icmp sgt i32 %n, 0 - br i1 %cmp6, label %for.body, label %for.end - -for.body: ; preds = %entry, %for.body - %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ] - %arrayidx = getelementptr inbounds double, ptr %y, i64 %indvars.iv - %0 = load double, ptr %arrayidx, align 8 - %call = tail call double @llvm.tan.f64(double %0) - %arrayidx2 = getelementptr inbounds double, ptr %x, i64 %indvars.iv - store double %call, ptr %arrayidx2, align 8 - %indvars.iv.next = add i64 %indvars.iv, 1 - %lftr.wideiv = trunc i64 %indvars.iv.next to i32 - %exitcond = icmp eq i32 %lftr.wideiv, %n - br i1 %exitcond, label %for.end, label %for.body - -for.end: ; preds = %for.body, %entry - ret void -} - -declare double @llvm.tan.f64(double) - define void @exp_f32(i32 %n, ptr %y, ptr %x) { ; CHECK-LABEL: @exp_f32( ; CHECK: llvm.exp.v4f32