From 94b1c80823fdacd6ac3552d8606f95af0d4721f3 Mon Sep 17 00:00:00 2001 From: "Wang, Phoebe" Date: Fri, 29 Nov 2024 16:54:45 +0800 Subject: [PATCH] [X86][FP16] Fix masking problem of VF[,C]MADDCSH intrinsics Fixes: #98306 --- llvm/lib/Target/X86/X86ISelLowering.cpp | 3 +++ llvm/lib/Target/X86/X86InstrAVX512.td | 6 +++--- llvm/test/CodeGen/X86/avx512cfmulsh-instrinsics.ll | 12 ++++++++++++ 3 files changed, 18 insertions(+), 3 deletions(-) diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index 96b03feaa4580..98ae86533628c 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -26265,6 +26265,9 @@ SDValue X86TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, } if (!NewOp) NewOp = DAG.getNode(IntrData->Opc0, dl, VT, Src1, Src2, Src3); + if (IntrData->Opc0 == X86ISD::VFMADDCSH || + IntrData->Opc0 == X86ISD::VFCMADDCSH) + return getScalarMaskingNode(NewOp, Mask, PassThru, Subtarget, DAG); return getVectorMaskingNode(NewOp, Mask, PassThru, Subtarget, DAG); } case IFMA_OP: diff --git a/llvm/lib/Target/X86/X86InstrAVX512.td b/llvm/lib/Target/X86/X86InstrAVX512.td index 299a2a74d86fc..83a2e981ffd7a 100644 --- a/llvm/lib/Target/X86/X86InstrAVX512.td +++ b/llvm/lib/Target/X86/X86InstrAVX512.td @@ -13533,17 +13533,17 @@ let Uses = [MXCSR] in { multiclass avx512_cfmaop_sh_common opc, string OpcodeStr, SDNode OpNode, SDNode OpNodeRnd, bit IsCommutable> { let Predicates = [HasFP16], Constraints = "@earlyclobber $dst, $src1 = $dst" in { - defm r : AVX512_maskable_3src, Sched<[WriteFMAX]>; - defm m : AVX512_maskable_3src, Sched<[WriteFMAX.Folded, WriteFMAX.ReadAfterFold]>; - defm rb : AVX512_maskable_3src, diff --git a/llvm/test/CodeGen/X86/avx512cfmulsh-instrinsics.ll b/llvm/test/CodeGen/X86/avx512cfmulsh-instrinsics.ll index 43e085f37ff67..e449c7192e4bf 100644 --- a/llvm/test/CodeGen/X86/avx512cfmulsh-instrinsics.ll +++ b/llvm/test/CodeGen/X86/avx512cfmulsh-instrinsics.ll @@ -277,3 +277,15 @@ define <4 x float> @test_int_x86_avx512fp16_maskz_cfcmadd_sh(<4 x float> %x0, <4 %res = call <4 x float> @llvm.x86.avx512fp16.maskz.vfcmadd.csh(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3, i32 9) ret <4 x float> %res } + +define <4 x float> @PR98306() { +; CHECK-LABEL: PR98306: +; CHECK: ## %bb.0: +; CHECK-NEXT: kxorw %k0, %k0, %k1 +; CHECK-NEXT: vmovaps {{.*#+}} xmm1 = [7.8125E-3,1.050912E+6,4.203776E+6,1.6815616E+7] +; CHECK-NEXT: vmovaps {{.*#+}} xmm0 = [3.2E+1,4.03288064E+8,8.0658432E+8,1.61318502E+9] +; CHECK-NEXT: vfmaddcsh {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm0 {%k1} {z} +; CHECK-NEXT: retq + %res = call <4 x float> @llvm.x86.avx512fp16.maskz.vfmadd.csh(<4 x float> , <4 x float> , <4 x float> , i8 0, i32 4) + ret <4 x float> %res +}