-
Notifications
You must be signed in to change notification settings - Fork 14.3k
Revert "[X86] Remove knl/knm specific ISAs supports (#92883)" #93123
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Merged
Conversation
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This reverts commit 282d2ab.
@llvm/pr-subscribers-llvm-transforms @llvm/pr-subscribers-clang Author: Freddy Ye (FreddyLeaf) ChangesThis reverts commit 282d2ab. Patch is 141.61 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/93123.diff 53 Files Affected:
diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst
index d410d8acd135b..0c4a343b70009 100644
--- a/clang/docs/ReleaseNotes.rst
+++ b/clang/docs/ReleaseNotes.rst
@@ -801,8 +801,6 @@ AMDGPU Support
X86 Support
^^^^^^^^^^^
-- Remove knl/knm specific ISA supports: AVX512PF, AVX512ER, PREFETCHWT1
-
Arm and AArch64 Support
^^^^^^^^^^^^^^^^^^^^^^^
diff --git a/clang/include/clang/Basic/BuiltinsX86.def b/clang/include/clang/Basic/BuiltinsX86.def
index 7074479786b97..eafcc219c1096 100644
--- a/clang/include/clang/Basic/BuiltinsX86.def
+++ b/clang/include/clang/Basic/BuiltinsX86.def
@@ -832,11 +832,23 @@ TARGET_BUILTIN(__builtin_ia32_rsqrt14ss_mask, "V4fV4fV4fV4fUc", "ncV:128:", "avx
TARGET_BUILTIN(__builtin_ia32_rsqrt14pd512_mask, "V8dV8dV8dUc", "ncV:512:", "avx512f,evex512")
TARGET_BUILTIN(__builtin_ia32_rsqrt14ps512_mask, "V16fV16fV16fUs", "ncV:512:", "avx512f,evex512")
+TARGET_BUILTIN(__builtin_ia32_rsqrt28sd_round_mask, "V2dV2dV2dV2dUcIi", "ncV:128:", "avx512er")
+TARGET_BUILTIN(__builtin_ia32_rsqrt28ss_round_mask, "V4fV4fV4fV4fUcIi", "ncV:128:", "avx512er")
+TARGET_BUILTIN(__builtin_ia32_rsqrt28pd_mask, "V8dV8dV8dUcIi", "ncV:512:", "avx512er,evex512")
+TARGET_BUILTIN(__builtin_ia32_rsqrt28ps_mask, "V16fV16fV16fUsIi", "ncV:512:", "avx512er,evex512")
+
TARGET_BUILTIN(__builtin_ia32_rcp14sd_mask, "V2dV2dV2dV2dUc", "ncV:128:", "avx512f")
TARGET_BUILTIN(__builtin_ia32_rcp14ss_mask, "V4fV4fV4fV4fUc", "ncV:128:", "avx512f")
TARGET_BUILTIN(__builtin_ia32_rcp14pd512_mask, "V8dV8dV8dUc", "ncV:512:", "avx512f,evex512")
TARGET_BUILTIN(__builtin_ia32_rcp14ps512_mask, "V16fV16fV16fUs", "ncV:512:", "avx512f,evex512")
+TARGET_BUILTIN(__builtin_ia32_rcp28sd_round_mask, "V2dV2dV2dV2dUcIi", "ncV:128:", "avx512er")
+TARGET_BUILTIN(__builtin_ia32_rcp28ss_round_mask, "V4fV4fV4fV4fUcIi", "ncV:128:", "avx512er")
+TARGET_BUILTIN(__builtin_ia32_rcp28pd_mask, "V8dV8dV8dUcIi", "ncV:512:", "avx512er,evex512")
+TARGET_BUILTIN(__builtin_ia32_rcp28ps_mask, "V16fV16fV16fUsIi", "ncV:512:", "avx512er,evex512")
+TARGET_BUILTIN(__builtin_ia32_exp2pd_mask, "V8dV8dV8dUcIi", "ncV:512:", "avx512er,evex512")
+TARGET_BUILTIN(__builtin_ia32_exp2ps_mask, "V16fV16fV16fUsIi", "ncV:512:", "avx512er,evex512")
+
TARGET_BUILTIN(__builtin_ia32_cvttps2dq512_mask, "V16iV16fV16iUsIi", "ncV:512:", "avx512f,evex512")
TARGET_BUILTIN(__builtin_ia32_cvttps2udq512_mask, "V16iV16fV16iUsIi", "ncV:512:", "avx512f,evex512")
TARGET_BUILTIN(__builtin_ia32_cvttpd2dq512_mask, "V8iV8dV8iUcIi", "ncV:512:", "avx512f,evex512")
@@ -948,6 +960,15 @@ TARGET_BUILTIN(__builtin_ia32_scattersiv16si, "vv*UsV16iV16iIi", "nV:512:", "avx
TARGET_BUILTIN(__builtin_ia32_scatterdiv8di, "vv*UcV8OiV8OiIi", "nV:512:", "avx512f,evex512")
TARGET_BUILTIN(__builtin_ia32_scatterdiv16si, "vv*UcV8OiV8iIi", "nV:512:", "avx512f,evex512")
+TARGET_BUILTIN(__builtin_ia32_gatherpfdpd, "vUcV8ivC*IiIi", "nV:512:", "avx512pf,evex512")
+TARGET_BUILTIN(__builtin_ia32_gatherpfdps, "vUsV16ivC*IiIi", "nV:512:", "avx512pf,evex512")
+TARGET_BUILTIN(__builtin_ia32_gatherpfqpd, "vUcV8OivC*IiIi", "nV:512:", "avx512pf,evex512")
+TARGET_BUILTIN(__builtin_ia32_gatherpfqps, "vUcV8OivC*IiIi", "nV:512:", "avx512pf,evex512")
+TARGET_BUILTIN(__builtin_ia32_scatterpfdpd, "vUcV8iv*IiIi", "nV:512:", "avx512pf,evex512")
+TARGET_BUILTIN(__builtin_ia32_scatterpfdps, "vUsV16iv*IiIi", "nV:512:", "avx512pf,evex512")
+TARGET_BUILTIN(__builtin_ia32_scatterpfqpd, "vUcV8Oiv*IiIi", "nV:512:", "avx512pf,evex512")
+TARGET_BUILTIN(__builtin_ia32_scatterpfqps, "vUcV8Oiv*IiIi", "nV:512:", "avx512pf,evex512")
+
TARGET_BUILTIN(__builtin_ia32_knotqi, "UcUc", "nc", "avx512dq")
TARGET_BUILTIN(__builtin_ia32_knothi, "UsUs", "nc", "avx512f")
TARGET_BUILTIN(__builtin_ia32_knotsi, "UiUi", "nc", "avx512bw")
diff --git a/clang/include/clang/Basic/DiagnosticCommonKinds.td b/clang/include/clang/Basic/DiagnosticCommonKinds.td
index 1e44bc4ad09b6..0738f43ca555c 100644
--- a/clang/include/clang/Basic/DiagnosticCommonKinds.td
+++ b/clang/include/clang/Basic/DiagnosticCommonKinds.td
@@ -361,6 +361,9 @@ def warn_invalid_feature_combination : Warning<
def warn_target_unrecognized_env : Warning<
"mismatch between architecture and environment in target triple '%0'; did you mean '%1'?">,
InGroup<InvalidCommandLineArgument>;
+def warn_knl_knm_isa_support_removed : Warning<
+ "KNL, KNM related Intel Xeon Phi CPU's specific ISA's supports will be removed in LLVM 19.">,
+ InGroup<DiagGroup<"knl-knm-isa-support-removed">>;
def err_target_unsupported_abi_with_fpu : Error<
"'%0' ABI is not supported with FPU">;
diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td
index 9a5bffce20460..8cbb7f854ee72 100644
--- a/clang/include/clang/Driver/Options.td
+++ b/clang/include/clang/Driver/Options.td
@@ -6111,10 +6111,14 @@ def mavx512cd : Flag<["-"], "mavx512cd">, Group<m_x86_Features_Group>;
def mno_avx512cd : Flag<["-"], "mno-avx512cd">, Group<m_x86_Features_Group>;
def mavx512dq : Flag<["-"], "mavx512dq">, Group<m_x86_Features_Group>;
def mno_avx512dq : Flag<["-"], "mno-avx512dq">, Group<m_x86_Features_Group>;
+def mavx512er : Flag<["-"], "mavx512er">, Group<m_x86_Features_Group>;
+def mno_avx512er : Flag<["-"], "mno-avx512er">, Group<m_x86_Features_Group>;
def mavx512fp16 : Flag<["-"], "mavx512fp16">, Group<m_x86_Features_Group>;
def mno_avx512fp16 : Flag<["-"], "mno-avx512fp16">, Group<m_x86_Features_Group>;
def mavx512ifma : Flag<["-"], "mavx512ifma">, Group<m_x86_Features_Group>;
def mno_avx512ifma : Flag<["-"], "mno-avx512ifma">, Group<m_x86_Features_Group>;
+def mavx512pf : Flag<["-"], "mavx512pf">, Group<m_x86_Features_Group>;
+def mno_avx512pf : Flag<["-"], "mno-avx512pf">, Group<m_x86_Features_Group>;
def mavx512vbmi : Flag<["-"], "mavx512vbmi">, Group<m_x86_Features_Group>;
def mno_avx512vbmi : Flag<["-"], "mno-avx512vbmi">, Group<m_x86_Features_Group>;
def mavx512vbmi2 : Flag<["-"], "mavx512vbmi2">, Group<m_x86_Features_Group>;
@@ -6205,6 +6209,8 @@ def mpopcnt : Flag<["-"], "mpopcnt">, Group<m_x86_Features_Group>;
def mno_popcnt : Flag<["-"], "mno-popcnt">, Group<m_x86_Features_Group>;
def mprefetchi : Flag<["-"], "mprefetchi">, Group<m_x86_Features_Group>;
def mno_prefetchi : Flag<["-"], "mno-prefetchi">, Group<m_x86_Features_Group>;
+def mprefetchwt1 : Flag<["-"], "mprefetchwt1">, Group<m_x86_Features_Group>;
+def mno_prefetchwt1 : Flag<["-"], "mno-prefetchwt1">, Group<m_x86_Features_Group>;
def mprfchw : Flag<["-"], "mprfchw">, Group<m_x86_Features_Group>;
def mno_prfchw : Flag<["-"], "mno-prfchw">, Group<m_x86_Features_Group>;
def mptwrite : Flag<["-"], "mptwrite">, Group<m_x86_Features_Group>;
diff --git a/clang/lib/Basic/Targets/X86.cpp b/clang/lib/Basic/Targets/X86.cpp
index 3a30cff917bb4..b823eaf6ce336 100644
--- a/clang/lib/Basic/Targets/X86.cpp
+++ b/clang/lib/Basic/Targets/X86.cpp
@@ -310,9 +310,15 @@ bool X86TargetInfo::handleTargetFeatures(std::vector<std::string> &Features,
HasAVX512VNNI = true;
} else if (Feature == "+avx512bf16") {
HasAVX512BF16 = true;
+ } else if (Feature == "+avx512er") {
+ HasAVX512ER = true;
+ Diags.Report(diag::warn_knl_knm_isa_support_removed);
} else if (Feature == "+avx512fp16") {
HasAVX512FP16 = true;
HasLegalHalfType = true;
+ } else if (Feature == "+avx512pf") {
+ HasAVX512PF = true;
+ Diags.Report(diag::warn_knl_knm_isa_support_removed);
} else if (Feature == "+avx512dq") {
HasAVX512DQ = true;
} else if (Feature == "+avx512bitalg") {
@@ -369,6 +375,9 @@ bool X86TargetInfo::handleTargetFeatures(std::vector<std::string> &Features,
HasWBNOINVD = true;
} else if (Feature == "+prefetchi") {
HasPREFETCHI = true;
+ } else if (Feature == "+prefetchwt1") {
+ HasPREFETCHWT1 = true;
+ Diags.Report(diag::warn_knl_knm_isa_support_removed);
} else if (Feature == "+clzero") {
HasCLZERO = true;
} else if (Feature == "+cldemote") {
@@ -831,8 +840,12 @@ void X86TargetInfo::getTargetDefines(const LangOptions &Opts,
Builder.defineMacro("__AVX512VNNI__");
if (HasAVX512BF16)
Builder.defineMacro("__AVX512BF16__");
+ if (HasAVX512ER)
+ Builder.defineMacro("__AVX512ER__");
if (HasAVX512FP16)
Builder.defineMacro("__AVX512FP16__");
+ if (HasAVX512PF)
+ Builder.defineMacro("__AVX512PF__");
if (HasAVX512DQ)
Builder.defineMacro("__AVX512DQ__");
if (HasAVX512BITALG)
@@ -884,6 +897,8 @@ void X86TargetInfo::getTargetDefines(const LangOptions &Opts,
Builder.defineMacro("__SM4__");
if (HasPREFETCHI)
Builder.defineMacro("__PREFETCHI__");
+ if (HasPREFETCHWT1)
+ Builder.defineMacro("__PREFETCHWT1__");
if (HasCLZERO)
Builder.defineMacro("__CLZERO__");
if (HasKL)
@@ -1069,7 +1084,9 @@ bool X86TargetInfo::isValidFeatureName(StringRef Name) const {
.Case("avx512vpopcntdq", true)
.Case("avx512vnni", true)
.Case("avx512bf16", true)
+ .Case("avx512er", true)
.Case("avx512fp16", true)
+ .Case("avx512pf", true)
.Case("avx512dq", true)
.Case("avx512bitalg", true)
.Case("avx512bw", true)
@@ -1117,6 +1134,7 @@ bool X86TargetInfo::isValidFeatureName(StringRef Name) const {
.Case("pku", true)
.Case("popcnt", true)
.Case("prefetchi", true)
+ .Case("prefetchwt1", true)
.Case("prfchw", true)
.Case("ptwrite", true)
.Case("raoint", true)
@@ -1183,7 +1201,9 @@ bool X86TargetInfo::hasFeature(StringRef Feature) const {
.Case("avx512vpopcntdq", HasAVX512VPOPCNTDQ)
.Case("avx512vnni", HasAVX512VNNI)
.Case("avx512bf16", HasAVX512BF16)
+ .Case("avx512er", HasAVX512ER)
.Case("avx512fp16", HasAVX512FP16)
+ .Case("avx512pf", HasAVX512PF)
.Case("avx512dq", HasAVX512DQ)
.Case("avx512bitalg", HasAVX512BITALG)
.Case("avx512bw", HasAVX512BW)
@@ -1233,6 +1253,7 @@ bool X86TargetInfo::hasFeature(StringRef Feature) const {
.Case("pku", HasPKU)
.Case("popcnt", HasPOPCNT)
.Case("prefetchi", HasPREFETCHI)
+ .Case("prefetchwt1", HasPREFETCHWT1)
.Case("prfchw", HasPRFCHW)
.Case("ptwrite", HasPTWRITE)
.Case("raoint", HasRAOINT)
diff --git a/clang/lib/Basic/Targets/X86.h b/clang/lib/Basic/Targets/X86.h
index 0633b7e0da96a..6a0a6cb84203d 100644
--- a/clang/lib/Basic/Targets/X86.h
+++ b/clang/lib/Basic/Targets/X86.h
@@ -103,6 +103,8 @@ class LLVM_LIBRARY_VISIBILITY X86TargetInfo : public TargetInfo {
bool HasAVX512VNNI = false;
bool HasAVX512FP16 = false;
bool HasAVX512BF16 = false;
+ bool HasAVX512ER = false;
+ bool HasAVX512PF = false;
bool HasAVX512DQ = false;
bool HasAVX512BITALG = false;
bool HasAVX512BW = false;
@@ -134,6 +136,7 @@ class LLVM_LIBRARY_VISIBILITY X86TargetInfo : public TargetInfo {
bool HasCLWB = false;
bool HasMOVBE = false;
bool HasPREFETCHI = false;
+ bool HasPREFETCHWT1 = false;
bool HasRDPID = false;
bool HasRDPRU = false;
bool HasRetpolineExternalThunk = false;
diff --git a/clang/lib/Headers/CMakeLists.txt b/clang/lib/Headers/CMakeLists.txt
index dbff92b4e59b4..5f02c71f6ca51 100644
--- a/clang/lib/Headers/CMakeLists.txt
+++ b/clang/lib/Headers/CMakeLists.txt
@@ -153,10 +153,12 @@ set(x86_files
avx512bwintrin.h
avx512cdintrin.h
avx512dqintrin.h
+ avx512erintrin.h
avx512fintrin.h
avx512fp16intrin.h
avx512ifmaintrin.h
avx512ifmavlintrin.h
+ avx512pfintrin.h
avx512vbmi2intrin.h
avx512vbmiintrin.h
avx512vbmivlintrin.h
diff --git a/clang/lib/Headers/avx512erintrin.h b/clang/lib/Headers/avx512erintrin.h
new file mode 100644
index 0000000000000..1c5a2d2d208ff
--- /dev/null
+++ b/clang/lib/Headers/avx512erintrin.h
@@ -0,0 +1,271 @@
+/*===---- avx512erintrin.h - AVX512ER intrinsics ---------------------------===
+ *
+ * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+ * See https://llvm.org/LICENSE.txt for license information.
+ * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+ *
+ *===-----------------------------------------------------------------------===
+ */
+#ifndef __IMMINTRIN_H
+#error "Never use <avx512erintrin.h> directly; include <immintrin.h> instead."
+#endif
+
+#ifndef __AVX512ERINTRIN_H
+#define __AVX512ERINTRIN_H
+
+/* exp2a23 */
+#define _mm512_exp2a23_round_pd(A, R) \
+ ((__m512d)__builtin_ia32_exp2pd_mask((__v8df)(__m512d)(A), \
+ (__v8df)_mm512_setzero_pd(), \
+ (__mmask8)-1, (int)(R)))
+
+#define _mm512_mask_exp2a23_round_pd(S, M, A, R) \
+ ((__m512d)__builtin_ia32_exp2pd_mask((__v8df)(__m512d)(A), \
+ (__v8df)(__m512d)(S), (__mmask8)(M), \
+ (int)(R)))
+
+#define _mm512_maskz_exp2a23_round_pd(M, A, R) \
+ ((__m512d)__builtin_ia32_exp2pd_mask((__v8df)(__m512d)(A), \
+ (__v8df)_mm512_setzero_pd(), \
+ (__mmask8)(M), (int)(R)))
+
+#define _mm512_exp2a23_pd(A) \
+ _mm512_exp2a23_round_pd((A), _MM_FROUND_CUR_DIRECTION)
+
+#define _mm512_mask_exp2a23_pd(S, M, A) \
+ _mm512_mask_exp2a23_round_pd((S), (M), (A), _MM_FROUND_CUR_DIRECTION)
+
+#define _mm512_maskz_exp2a23_pd(M, A) \
+ _mm512_maskz_exp2a23_round_pd((M), (A), _MM_FROUND_CUR_DIRECTION)
+
+#define _mm512_exp2a23_round_ps(A, R) \
+ ((__m512)__builtin_ia32_exp2ps_mask((__v16sf)(__m512)(A), \
+ (__v16sf)_mm512_setzero_ps(), \
+ (__mmask16)-1, (int)(R)))
+
+#define _mm512_mask_exp2a23_round_ps(S, M, A, R) \
+ ((__m512)__builtin_ia32_exp2ps_mask((__v16sf)(__m512)(A), \
+ (__v16sf)(__m512)(S), (__mmask16)(M), \
+ (int)(R)))
+
+#define _mm512_maskz_exp2a23_round_ps(M, A, R) \
+ ((__m512)__builtin_ia32_exp2ps_mask((__v16sf)(__m512)(A), \
+ (__v16sf)_mm512_setzero_ps(), \
+ (__mmask16)(M), (int)(R)))
+
+#define _mm512_exp2a23_ps(A) \
+ _mm512_exp2a23_round_ps((A), _MM_FROUND_CUR_DIRECTION)
+
+#define _mm512_mask_exp2a23_ps(S, M, A) \
+ _mm512_mask_exp2a23_round_ps((S), (M), (A), _MM_FROUND_CUR_DIRECTION)
+
+#define _mm512_maskz_exp2a23_ps(M, A) \
+ _mm512_maskz_exp2a23_round_ps((M), (A), _MM_FROUND_CUR_DIRECTION)
+
+/* rsqrt28 */
+#define _mm512_rsqrt28_round_pd(A, R) \
+ ((__m512d)__builtin_ia32_rsqrt28pd_mask((__v8df)(__m512d)(A), \
+ (__v8df)_mm512_setzero_pd(), \
+ (__mmask8)-1, (int)(R)))
+
+#define _mm512_mask_rsqrt28_round_pd(S, M, A, R) \
+ ((__m512d)__builtin_ia32_rsqrt28pd_mask((__v8df)(__m512d)(A), \
+ (__v8df)(__m512d)(S), (__mmask8)(M), \
+ (int)(R)))
+
+#define _mm512_maskz_rsqrt28_round_pd(M, A, R) \
+ ((__m512d)__builtin_ia32_rsqrt28pd_mask((__v8df)(__m512d)(A), \
+ (__v8df)_mm512_setzero_pd(), \
+ (__mmask8)(M), (int)(R)))
+
+#define _mm512_rsqrt28_pd(A) \
+ _mm512_rsqrt28_round_pd((A), _MM_FROUND_CUR_DIRECTION)
+
+#define _mm512_mask_rsqrt28_pd(S, M, A) \
+ _mm512_mask_rsqrt28_round_pd((S), (M), (A), _MM_FROUND_CUR_DIRECTION)
+
+#define _mm512_maskz_rsqrt28_pd(M, A) \
+ _mm512_maskz_rsqrt28_round_pd((M), (A), _MM_FROUND_CUR_DIRECTION)
+
+#define _mm512_rsqrt28_round_ps(A, R) \
+ ((__m512)__builtin_ia32_rsqrt28ps_mask((__v16sf)(__m512)(A), \
+ (__v16sf)_mm512_setzero_ps(), \
+ (__mmask16)-1, (int)(R)))
+
+#define _mm512_mask_rsqrt28_round_ps(S, M, A, R) \
+ ((__m512)__builtin_ia32_rsqrt28ps_mask((__v16sf)(__m512)(A), \
+ (__v16sf)(__m512)(S), (__mmask16)(M), \
+ (int)(R)))
+
+#define _mm512_maskz_rsqrt28_round_ps(M, A, R) \
+ ((__m512)__builtin_ia32_rsqrt28ps_mask((__v16sf)(__m512)(A), \
+ (__v16sf)_mm512_setzero_ps(), \
+ (__mmask16)(M), (int)(R)))
+
+#define _mm512_rsqrt28_ps(A) \
+ _mm512_rsqrt28_round_ps((A), _MM_FROUND_CUR_DIRECTION)
+
+#define _mm512_mask_rsqrt28_ps(S, M, A) \
+ _mm512_mask_rsqrt28_round_ps((S), (M), A, _MM_FROUND_CUR_DIRECTION)
+
+#define _mm512_maskz_rsqrt28_ps(M, A) \
+ _mm512_maskz_rsqrt28_round_ps((M), (A), _MM_FROUND_CUR_DIRECTION)
+
+#define _mm_rsqrt28_round_ss(A, B, R) \
+ ((__m128)__builtin_ia32_rsqrt28ss_round_mask((__v4sf)(__m128)(A), \
+ (__v4sf)(__m128)(B), \
+ (__v4sf)_mm_setzero_ps(), \
+ (__mmask8)-1, (int)(R)))
+
+#define _mm_mask_rsqrt28_round_ss(S, M, A, B, R) \
+ ((__m128)__builtin_ia32_rsqrt28ss_round_mask((__v4sf)(__m128)(A), \
+ (__v4sf)(__m128)(B), \
+ (__v4sf)(__m128)(S), \
+ (__mmask8)(M), (int)(R)))
+
+#define _mm_maskz_rsqrt28_round_ss(M, A, B, R) \
+ ((__m128)__builtin_ia32_rsqrt28ss_round_mask((__v4sf)(__m128)(A), \
+ (__v4sf)(__m128)(B), \
+ (__v4sf)_mm_setzero_ps(), \
+ (__mmask8)(M), (int)(R)))
+
+#define _mm_rsqrt28_ss(A, B) \
+ _mm_rsqrt28_round_ss((A), (B), _MM_FROUND_CUR_DIRECTION)
+
+#define _mm_mask_rsqrt28_ss(S, M, A, B) \
+ _mm_mask_rsqrt28_round_ss((S), (M), (A), (B), _MM_FROUND_CUR_DIRECTION)
+
+#define _mm_maskz_rsqrt28_ss(M, A, B) \
+ _mm_maskz_rsqrt28_round_ss((M), (A), (B), _MM_FROUND_CUR_DIRECTION)
+
+#define _mm_rsqrt28_round_sd(A, B, R) \
+ ((__m128d)__builtin_ia32_rsqrt28sd_round_mask((__v2df)(__m128d)(A), \
+ (__v2df)(__m128d)(B), \
+ (__v2df)_mm_setzero_pd(), \
+ (__mmask8)-1, (int)(R)))
+
+#define _mm_mask_rsqrt28_round_sd(S, M, A, B, R) \
+ ((__m128d)__builtin_ia32_rsqrt28sd_round_mask((__v2df)(__m128d)(A), \
+ (__v2df)(__m128d)(B), \
+ (__v2df)(__m128d)(S), \
+ (__mmask8)(M), (int)(R)))
+
+#define _mm_maskz_rsqrt28_round_sd(M, A, B, R) \
+ ((__m128d)__builtin_ia32_rsqrt28sd_round_mask((__v2df)(__m128d)(A), \
+ (__v2df)(__m128d)(B), \
+ (__v2df)_mm_setzero_pd(), \
+ (__mmask8)(M), (int)(R)))
+
+#define _mm_rsqrt28_sd(A, B) \
+ _mm_rsqrt28_round_sd((A), (B), _MM_FROUND_CUR_DIRECTION)
+
+#define _mm_mask_rsqrt28_sd(S, M, A, B) \
+ _mm_mask_rsqrt28_round_sd((S), (M), (A), (B), _MM_FROUND_CUR_DIRECTION)
+
+#define _mm_maskz_rsqrt28_sd(M, A, B) \
+ _mm_maskz_rsqrt28_round_sd((M), (A), (B), _MM_FROUND_CUR_DIRECTION)
+
+/* rcp28 */
+#define _mm512_rcp28_round_pd(A, R) \
+ ((__m512d)__builtin_ia32_rcp28pd_mask((__v8df)(__m512d)(A), \
+ (__v8df)_mm512_setzero_pd(), \
+ (__mmask8)-1, (int)(R)))
+
+#define _mm512_mask_rcp28_round_pd(S, M, A, R) \
+ ((__m512d)__builtin_ia32_rcp28pd_mask((__v8df)(__m512d)(A), \
+ (__v8df)(__m512d)(S), (__mmask8)(M), \
+ (int)(R)))
+
+#define _mm512_maskz_rcp28_round_pd(M, A, R) \
+ ((__m512d)__builtin_ia32_rcp28pd_mask((__v8df)(__m512d)(A), \
+ (__v8df)_mm512_setzero_pd(), \
+ (__mmask8)(M), (int)(R)))
+
+#define _mm512_rcp28_pd(A) \
+ _mm512_rcp28_round_pd((A), _MM_FROUND_CUR_DIRECTION)
+
+#define _mm512_mask_rcp28_pd(S, M, A) \
+ _mm512_mask_rcp28_round_pd((S), (M), (A), _MM_FROUND_CUR_DIRECTION)
+
+#define _mm512_maskz_rcp28_pd(M, A) \
+ _mm512_maskz_rcp28_round_pd((M), (A), _MM_FROUND_CUR_DIRECTION)
+
+#define _mm512_rcp28_round_ps(A, R) \
+ ((__m...
[truncated]
|
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Labels
backend:X86
clang:driver
'clang' and 'clang++' user-facing binaries. Not 'clang-cl'
clang:frontend
Language frontend issues, e.g. anything involving "Sema"
clang:headers
Headers provided by Clang, e.g. for intrinsics
clang
Clang issues not falling into any other category
llvm:ir
llvm:transforms
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
This reverts commit 282d2ab.