From d7e2afad4b97e55b950379ef090b983d18fe425a Mon Sep 17 00:00:00 2001
From: Daniel Smith <daniel.smith@datadoghq.com>
Date: Sun, 31 May 2020 18:21:47 +0000
Subject: [PATCH 01/11] Fix comparison comments

---
 crates/core_arch/src/x86/avx512f.rs | 32 ++++++++++++++---------------
 1 file changed, 16 insertions(+), 16 deletions(-)

diff --git a/crates/core_arch/src/x86/avx512f.rs b/crates/core_arch/src/x86/avx512f.rs
index 7bc2671a96..08e866f479 100644
--- a/crates/core_arch/src/x86/avx512f.rs
+++ b/crates/core_arch/src/x86/avx512f.rs
@@ -115,7 +115,7 @@ pub unsafe fn _mm512_mask_cmplt_epu64_mask(m: __mmask8, a: __m512i, b: __m512i)
     _mm512_cmplt_epu64_mask(a, b) & m
 }
 
-/// Compare packed unsigned 64-bit integers in a and b for less-than, and store the results in a mask vector.
+/// Compare packed unsigned 64-bit integers in a and b for greater-than, and store the results in a mask vector.
 ///
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062&text=_mm512_cmpgt_epu64)
 #[inline]
@@ -125,7 +125,7 @@ pub unsafe fn _mm512_cmpgt_epu64_mask(a: __m512i, b: __m512i) -> __mmask8 {
     simd_bitmask::<__m512i, _>(simd_gt(a.as_u64x8(), b.as_u64x8()))
 }
 
-///Compare packed unsigned 64-bit integers in a and b for less-than, and store the results in a mask vector k
+///Compare packed unsigned 64-bit integers in a and b for greater-than, and store the results in a mask vector k
 /// using zeromask m (elements are zeroed out when the corresponding mask bit is not set).
 ///
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,1063&text=_mm512_mask_cmpgt_epu64)
@@ -136,7 +136,7 @@ pub unsafe fn _mm512_mask_cmpgt_epu64_mask(m: __mmask8, a: __m512i, b: __m512i)
     _mm512_cmpgt_epu64_mask(a, b) & m
 }
 
-/// Compare packed unsigned 64-bit integers in a and b for less-than, and store the results in a mask vector.
+/// Compare packed unsigned 64-bit integers in a and b for less-than-or-equal, and store the results in a mask vector.
 ///
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062&text=_mm512_cmple_epu64)
 #[inline]
@@ -146,7 +146,7 @@ pub unsafe fn _mm512_cmple_epu64_mask(a: __m512i, b: __m512i) -> __mmask8 {
     _mm512_cmpgt_epu64_mask(b, a)
 }
 
-///Compare packed unsigned 64-bit integers in a and b for less-than, and store the results in a mask vector k
+///Compare packed unsigned 64-bit integers in a and b for less-than-or-equal, and store the results in a mask vector k
 /// using zeromask m (elements are zeroed out when the corresponding mask bit is not set).
 ///
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,1063&text=_mm512_mask_cmple_epu64)
@@ -157,7 +157,7 @@ pub unsafe fn _mm512_mask_cmple_epu64_mask(m: __mmask8, a: __m512i, b: __m512i)
     _mm512_cmpgt_epu64_mask(b, a) & m
 }
 
-/// Compare packed unsigned 64-bit integers in a and b for less-than, and store the results in a mask vector.
+/// Compare packed unsigned 64-bit integers in a and b for greater-than-or-equal, and store the results in a mask vector.
 ///
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062&text=_mm512_cmpge_epu64)
 #[inline]
@@ -167,7 +167,7 @@ pub unsafe fn _mm512_cmpge_epu64_mask(a: __m512i, b: __m512i) -> __mmask8 {
     _mm512_cmplt_epu64_mask(b, a)
 }
 
-///Compare packed unsigned 64-bit integers in a and b for less-than, and store the results in a mask vector k
+///Compare packed unsigned 64-bit integers in a and b for greater-than-or-equal, and store the results in a mask vector k
 /// using zeromask m (elements are zeroed out when the corresponding mask bit is not set).
 ///
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,1063&text=_mm512_mask_cmpge_epu64)
@@ -178,7 +178,7 @@ pub unsafe fn _mm512_mask_cmpge_epu64_mask(m: __mmask8, a: __m512i, b: __m512i)
     _mm512_cmplt_epu64_mask(b, a) & m
 }
 
-/// Compare packed unsigned 64-bit integers in a and b for less-than, and store the results in a mask vector.
+/// Compare packed unsigned 64-bit integers in a and b for equality, and store the results in a mask vector.
 ///
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062&text=_mm512_cmpeq_epu64)
 #[inline]
@@ -188,7 +188,7 @@ pub unsafe fn _mm512_cmpeq_epu64_mask(a: __m512i, b: __m512i) -> __mmask8 {
     simd_bitmask::<__m512i, _>(simd_eq(a.as_u64x8(), b.as_u64x8()))
 }
 
-///Compare packed unsigned 64-bit integers in a and b for less-than, and store the results in a mask vector k
+///Compare packed unsigned 64-bit integers in a and b for equality, and store the results in a mask vector k
 /// using zeromask m (elements are zeroed out when the corresponding mask bit is not set).
 ///
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,1063&text=_mm512_mask_cmpeq_epu64)
@@ -220,7 +220,7 @@ pub unsafe fn _mm512_mask_cmplt_epi64_mask(m: __mmask8, a: __m512i, b: __m512i)
     _mm512_cmplt_epi64_mask(a, b) & m
 }
 
-/// Compare packed unsigned 64-bit integers in a and b for less-than, and store the results in a mask vector.
+/// Compare packed unsigned 64-bit integers in a and b for greater-than, and store the results in a mask vector.
 ///
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062&text=_mm512_cmpgt_epi64)
 #[inline]
@@ -230,7 +230,7 @@ pub unsafe fn _mm512_cmpgt_epi64_mask(a: __m512i, b: __m512i) -> __mmask8 {
     simd_bitmask::<__m512i, _>(simd_gt(a.as_i64x8(), b.as_i64x8()))
 }
 
-///Compare packed unsigned 64-bit integers in a and b for less-than, and store the results in a mask vector k
+///Compare packed unsigned 64-bit integers in a and b for greater-than, and store the results in a mask vector k
 /// using zeromask m (elements are zeroed out when the corresponding mask bit is not set).
 ///
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,1063&text=_mm512_mask_cmpgt_epi64)
@@ -241,7 +241,7 @@ pub unsafe fn _mm512_mask_cmpgt_epi64_mask(m: __mmask8, a: __m512i, b: __m512i)
     _mm512_cmpgt_epi64_mask(a, b) & m
 }
 
-/// Compare packed unsigned 64-bit integers in a and b for less-than, and store the results in a mask vector.
+/// Compare packed unsigned 64-bit integers in a and b for less-than-or-equal, and store the results in a mask vector.
 ///
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062&text=_mm512_cmple_epi64)
 #[inline]
@@ -251,7 +251,7 @@ pub unsafe fn _mm512_cmple_epi64_mask(a: __m512i, b: __m512i) -> __mmask8 {
     _mm512_cmpgt_epi64_mask(b, a)
 }
 
-///Compare packed unsigned 64-bit integers in a and b for less-than, and store the results in a mask vector k
+///Compare packed unsigned 64-bit integers in a and b for less-than-or-equal, and store the results in a mask vector k
 /// using zeromask m (elements are zeroed out when the corresponding mask bit is not set).
 ///
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,1063&text=_mm512_mask_cmple_epi64)
@@ -262,7 +262,7 @@ pub unsafe fn _mm512_mask_cmple_epi64_mask(m: __mmask8, a: __m512i, b: __m512i)
     _mm512_cmpgt_epi64_mask(b, a) & m
 }
 
-/// Compare packed unsigned 64-bit integers in a and b for less-than, and store the results in a mask vector.
+/// Compare packed unsigned 64-bit integers in a and b for greater-than-or-equal, and store the results in a mask vector.
 ///
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062&text=_mm512_cmpge_epi64)
 #[inline]
@@ -272,7 +272,7 @@ pub unsafe fn _mm512_cmpge_epi64_mask(a: __m512i, b: __m512i) -> __mmask8 {
     _mm512_cmplt_epi64_mask(b, a)
 }
 
-///Compare packed unsigned 64-bit integers in a and b for less-than, and store the results in a mask vector k
+///Compare packed unsigned 64-bit integers in a and b for greater-than-or-equal, and store the results in a mask vector k
 /// using zeromask m (elements are zeroed out when the corresponding mask bit is not set).
 ///
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,1063&text=_mm512_mask_cmpge_epi64)
@@ -283,7 +283,7 @@ pub unsafe fn _mm512_mask_cmpge_epi64_mask(m: __mmask8, a: __m512i, b: __m512i)
     _mm512_cmplt_epi64_mask(b, a) & m
 }
 
-/// Compare packed unsigned 64-bit integers in a and b for less-than, and store the results in a mask vector.
+/// Compare packed unsigned 64-bit integers in a and b for equality, and store the results in a mask vector.
 ///
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062&text=_mm512_cmpeq_epi64)
 #[inline]
@@ -293,7 +293,7 @@ pub unsafe fn _mm512_cmpeq_epi64_mask(a: __m512i, b: __m512i) -> __mmask8 {
     simd_bitmask::<__m512i, _>(simd_eq(a.as_i64x8(), b.as_i64x8()))
 }
 
-///Compare packed unsigned 64-bit integers in a and b for less-than, and store the results in a mask vector k
+///Compare packed unsigned 64-bit integers in a and b for equality, and store the results in a mask vector k
 /// using zeromask m (elements are zeroed out when the corresponding mask bit is not set).
 ///
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,1063&text=_mm512_mask_cmpeq_epi64)

From dcf5d47b09a47911b4e6319cc7c52192d95aedc3 Mon Sep 17 00:00:00 2001
From: Daniel Smith <daniel.smith@datadoghq.com>
Date: Sun, 31 May 2020 18:26:25 +0000
Subject: [PATCH 02/11] s/unsigned/signed/ for epi64

---
 crates/core_arch/src/x86/avx512f.rs | 20 ++++++++++----------
 1 file changed, 10 insertions(+), 10 deletions(-)

diff --git a/crates/core_arch/src/x86/avx512f.rs b/crates/core_arch/src/x86/avx512f.rs
index 08e866f479..17746a7b07 100644
--- a/crates/core_arch/src/x86/avx512f.rs
+++ b/crates/core_arch/src/x86/avx512f.rs
@@ -199,7 +199,7 @@ pub unsafe fn _mm512_mask_cmpeq_epu64_mask(m: __mmask8, a: __m512i, b: __m512i)
     _mm512_cmpeq_epu64_mask(a, b) & m
 }
 
-/// Compare packed unsigned 64-bit integers in a and b for less-than, and store the results in a mask vector.
+/// Compare packed signed 64-bit integers in a and b for less-than, and store the results in a mask vector.
 ///
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062&text=_mm512_cmplt_epi64)
 #[inline]
@@ -209,7 +209,7 @@ pub unsafe fn _mm512_cmplt_epi64_mask(a: __m512i, b: __m512i) -> __mmask8 {
     simd_bitmask::<__m512i, _>(simd_lt(a.as_i64x8(), b.as_i64x8()))
 }
 
-///Compare packed unsigned 64-bit integers in a and b for less-than, and store the results in a mask vector k
+///Compare packed signed 64-bit integers in a and b for less-than, and store the results in a mask vector k
 /// using zeromask m (elements are zeroed out when the corresponding mask bit is not set).
 ///
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,1063&text=_mm512_mask_cmplt_epi64)
@@ -220,7 +220,7 @@ pub unsafe fn _mm512_mask_cmplt_epi64_mask(m: __mmask8, a: __m512i, b: __m512i)
     _mm512_cmplt_epi64_mask(a, b) & m
 }
 
-/// Compare packed unsigned 64-bit integers in a and b for greater-than, and store the results in a mask vector.
+/// Compare packed signed 64-bit integers in a and b for greater-than, and store the results in a mask vector.
 ///
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062&text=_mm512_cmpgt_epi64)
 #[inline]
@@ -230,7 +230,7 @@ pub unsafe fn _mm512_cmpgt_epi64_mask(a: __m512i, b: __m512i) -> __mmask8 {
     simd_bitmask::<__m512i, _>(simd_gt(a.as_i64x8(), b.as_i64x8()))
 }
 
-///Compare packed unsigned 64-bit integers in a and b for greater-than, and store the results in a mask vector k
+///Compare packed signed 64-bit integers in a and b for greater-than, and store the results in a mask vector k
 /// using zeromask m (elements are zeroed out when the corresponding mask bit is not set).
 ///
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,1063&text=_mm512_mask_cmpgt_epi64)
@@ -241,7 +241,7 @@ pub unsafe fn _mm512_mask_cmpgt_epi64_mask(m: __mmask8, a: __m512i, b: __m512i)
     _mm512_cmpgt_epi64_mask(a, b) & m
 }
 
-/// Compare packed unsigned 64-bit integers in a and b for less-than-or-equal, and store the results in a mask vector.
+/// Compare packed signed 64-bit integers in a and b for less-than-or-equal, and store the results in a mask vector.
 ///
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062&text=_mm512_cmple_epi64)
 #[inline]
@@ -251,7 +251,7 @@ pub unsafe fn _mm512_cmple_epi64_mask(a: __m512i, b: __m512i) -> __mmask8 {
     _mm512_cmpgt_epi64_mask(b, a)
 }
 
-///Compare packed unsigned 64-bit integers in a and b for less-than-or-equal, and store the results in a mask vector k
+///Compare packed signed 64-bit integers in a and b for less-than-or-equal, and store the results in a mask vector k
 /// using zeromask m (elements are zeroed out when the corresponding mask bit is not set).
 ///
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,1063&text=_mm512_mask_cmple_epi64)
@@ -262,7 +262,7 @@ pub unsafe fn _mm512_mask_cmple_epi64_mask(m: __mmask8, a: __m512i, b: __m512i)
     _mm512_cmpgt_epi64_mask(b, a) & m
 }
 
-/// Compare packed unsigned 64-bit integers in a and b for greater-than-or-equal, and store the results in a mask vector.
+/// Compare packed signed 64-bit integers in a and b for greater-than-or-equal, and store the results in a mask vector.
 ///
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062&text=_mm512_cmpge_epi64)
 #[inline]
@@ -272,7 +272,7 @@ pub unsafe fn _mm512_cmpge_epi64_mask(a: __m512i, b: __m512i) -> __mmask8 {
     _mm512_cmplt_epi64_mask(b, a)
 }
 
-///Compare packed unsigned 64-bit integers in a and b for greater-than-or-equal, and store the results in a mask vector k
+///Compare packed signed 64-bit integers in a and b for greater-than-or-equal, and store the results in a mask vector k
 /// using zeromask m (elements are zeroed out when the corresponding mask bit is not set).
 ///
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,1063&text=_mm512_mask_cmpge_epi64)
@@ -283,7 +283,7 @@ pub unsafe fn _mm512_mask_cmpge_epi64_mask(m: __mmask8, a: __m512i, b: __m512i)
     _mm512_cmplt_epi64_mask(b, a) & m
 }
 
-/// Compare packed unsigned 64-bit integers in a and b for equality, and store the results in a mask vector.
+/// Compare packed signed 64-bit integers in a and b for equality, and store the results in a mask vector.
 ///
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062&text=_mm512_cmpeq_epi64)
 #[inline]
@@ -293,7 +293,7 @@ pub unsafe fn _mm512_cmpeq_epi64_mask(a: __m512i, b: __m512i) -> __mmask8 {
     simd_bitmask::<__m512i, _>(simd_eq(a.as_i64x8(), b.as_i64x8()))
 }
 
-///Compare packed unsigned 64-bit integers in a and b for equality, and store the results in a mask vector k
+///Compare packed signed 64-bit integers in a and b for equality, and store the results in a mask vector k
 /// using zeromask m (elements are zeroed out when the corresponding mask bit is not set).
 ///
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,1063&text=_mm512_mask_cmpeq_epi64)

From d9d0fc9199a4b034c2984f9985cb2e474c552a39 Mon Sep 17 00:00:00 2001
From: Daniel Smith <daniel.smith@datadoghq.com>
Date: Sun, 31 May 2020 18:52:34 +0000
Subject: [PATCH 03/11] Add neq integer comparisons

---
 crates/core_arch/src/x86/avx512f.rs    | 44 +++++++++++++++++++++++++-
 crates/core_arch/src/x86_64/avx512f.rs | 34 ++++++++++++++++++++
 2 files changed, 77 insertions(+), 1 deletion(-)

diff --git a/crates/core_arch/src/x86/avx512f.rs b/crates/core_arch/src/x86/avx512f.rs
index 17746a7b07..eb6beb4275 100644
--- a/crates/core_arch/src/x86/avx512f.rs
+++ b/crates/core_arch/src/x86/avx512f.rs
@@ -199,7 +199,28 @@ pub unsafe fn _mm512_mask_cmpeq_epu64_mask(m: __mmask8, a: __m512i, b: __m512i)
     _mm512_cmpeq_epu64_mask(a, b) & m
 }
 
-/// Compare packed signed 64-bit integers in a and b for less-than, and store the results in a mask vector.
+/// Compare packed unsigned 64-bit integers in a and b for inequality, and store the results in a mask vector.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062&text=_mm512_cmpneq_epu64)
+#[inline]
+#[target_feature(enable = "avx512f")]
+#[cfg_attr(test, assert_instr(vpcmpneqq))]
+pub unsafe fn _mm512_cmpneq_epu64_mask(a: __m512i, b: __m512i) -> __mmask8 {
+    simd_bitmask::<__m512i, _>(simd_ne(a.as_u64x8(), b.as_u64x8()))
+}
+
+///Compare packed unsigned 64-bit integers in a and b for inequality, and store the results in a mask vector k
+/// using zeromask m (elements are zeroed out when the corresponding mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,1063&text=_mm512_mask_cmpneq_epu64)
+#[inline]
+#[target_feature(enable = "avx512f")]
+#[cfg_attr(test, assert_instr(vpcmpneqq))]
+pub unsafe fn _mm512_mask_cmpneq_epu64_mask(m: __mmask8, a: __m512i, b: __m512i) -> __mmask8 {
+    _mm512_cmpneq_epu64_mask(a, b) & m
+}
+
+/// Compare packed unsigned 64-bit integers in a and b for less-than, and store the results in a mask vector.
 ///
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062&text=_mm512_cmplt_epi64)
 #[inline]
@@ -304,6 +325,27 @@ pub unsafe fn _mm512_mask_cmpeq_epi64_mask(m: __mmask8, a: __m512i, b: __m512i)
     _mm512_cmpeq_epi64_mask(a, b) & m
 }
 
+/// Compare packed signed 64-bit integers in a and b for inequality, and store the results in a mask vector.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062&text=_mm512_cmpneq_epi64)
+#[inline]
+#[target_feature(enable = "avx512f")]
+#[cfg_attr(test, assert_instr(vpcmpneqq))]
+pub unsafe fn _mm512_cmpneq_epi64_mask(a: __m512i, b: __m512i) -> __mmask8 {
+    simd_bitmask::<__m512i, _>(simd_ne(a.as_i64x8(), b.as_i64x8()))
+}
+
+///Compare packed signed 64-bit integers in a and b for inequality, and store the results in a mask vector k
+/// using zeromask m (elements are zeroed out when the corresponding mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,1063&text=_mm512_mask_cmpneq_epi64)
+#[inline]
+#[target_feature(enable = "avx512f")]
+#[cfg_attr(test, assert_instr(vpcmpneqq))]
+pub unsafe fn _mm512_mask_cmpneq_epi64_mask(m: __mmask8, a: __m512i, b: __m512i) -> __mmask8 {
+    _mm512_cmpneq_epi64_mask(a, b) & m
+}
+
 #[cfg(test)]
 mod tests {
     use std;
diff --git a/crates/core_arch/src/x86_64/avx512f.rs b/crates/core_arch/src/x86_64/avx512f.rs
index 0b32988d7c..e6d4ca21d2 100644
--- a/crates/core_arch/src/x86_64/avx512f.rs
+++ b/crates/core_arch/src/x86_64/avx512f.rs
@@ -136,6 +136,23 @@ mod tests {
         assert_eq!(r, 0b01001010);
     }
 
+    #[simd_test(enable = "avx512f")]
+    unsafe fn test_mm512_cmpneq_epu64_mask() {
+        let a = _mm512_set_epi64(0, 1, -1, u64::MAX as i64, i64::MAX, i64::MIN, 100, -100);
+        let b = _mm512_set_epi64(0, 1, 13, 42, i64::MAX, i64::MIN, 100, -100);
+        let m = _mm512_cmpneq_epu64_mask(b, a);
+        assert_eq!(m, !_mm512_cmpeq_epu64_mask(b, a));
+    }
+
+    #[simd_test(enable = "avx512f")]
+    unsafe fn test_mm512_mask_cmpneq_epu64_mask() {
+        let a = _mm512_set_epi64(0, 1, -1, u64::MAX as i64, i64::MAX, i64::MIN, -100, 100);
+        let b = _mm512_set_epi64(0, 1, 13, 42, i64::MAX, i64::MIN, 100, -100);
+        let mask = 0b01111010;
+        let r = _mm512_mask_cmpneq_epu64_mask(mask, b, a);
+        assert_eq!(r, 0b00110010);
+    }
+
     #[simd_test(enable = "avx512f")]
     unsafe fn test_mm512_cmplt_epi64_mask() {
         let a = _mm512_set_epi64(0, 1, -1, 13, i64::MAX, i64::MIN, 100, -100);
@@ -223,6 +240,23 @@ mod tests {
         assert_eq!(r, 0b01001010);
     }
 
+    #[simd_test(enable = "avx512f")]
+    unsafe fn test_mm512_cmpneq_epi64_mask() {
+        let a = _mm512_set_epi64(0, 1, -1, 13, i64::MAX, i64::MIN, 100, -100);
+        let b = _mm512_set_epi64(0, 1, 13, 42, i64::MAX, i64::MIN, 100, -100);
+        let m = _mm512_cmpneq_epi64_mask(b, a);
+        assert_eq!(m, !_mm512_cmpeq_epi64_mask(b, a));
+    }
+
+    #[simd_test(enable = "avx512f")]
+    unsafe fn test_mm512_mask_cmpneq_epi64_mask() {
+        let a = _mm512_set_epi64(0, 1, -1, 13, i64::MAX, i64::MIN, -100, 100);
+        let b = _mm512_set_epi64(0, 1, 13, 42, i64::MAX, i64::MIN, 100, -100);
+        let mask = 0b01111010;
+        let r = _mm512_mask_cmpneq_epi64_mask(mask, b, a);
+        assert_eq!(r, 0b00110010)
+    }
+
     #[simd_test(enable = "avx512f")]
     unsafe fn test_mm512_set_epi64() {
         let r = _mm512_setr_epi64(0, 1, 2, 3, 4, 5, 6, 7);

From f775ef17006e8e50d6535fed1c29b9f97aae6916 Mon Sep 17 00:00:00 2001
From: Daniel Smith <daniel.smith@datadoghq.com>
Date: Sat, 6 Jun 2020 17:06:56 +0000
Subject: [PATCH 04/11] Make instruction check less specific for CI

---
 crates/core_arch/src/x86/avx512f.rs | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/crates/core_arch/src/x86/avx512f.rs b/crates/core_arch/src/x86/avx512f.rs
index 4f2617f50e..adce90c646 100644
--- a/crates/core_arch/src/x86/avx512f.rs
+++ b/crates/core_arch/src/x86/avx512f.rs
@@ -204,7 +204,7 @@ pub unsafe fn _mm512_mask_cmpeq_epu64_mask(m: __mmask8, a: __m512i, b: __m512i)
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062&text=_mm512_cmpneq_epu64)
 #[inline]
 #[target_feature(enable = "avx512f")]
-#[cfg_attr(test, assert_instr(vpcmpneqq))]
+#[cfg_attr(test, assert_instr(vpcmp))]
 pub unsafe fn _mm512_cmpneq_epu64_mask(a: __m512i, b: __m512i) -> __mmask8 {
     simd_bitmask::<__m512i, _>(simd_ne(a.as_u64x8(), b.as_u64x8()))
 }
@@ -215,7 +215,7 @@ pub unsafe fn _mm512_cmpneq_epu64_mask(a: __m512i, b: __m512i) -> __mmask8 {
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,1063&text=_mm512_mask_cmpneq_epu64)
 #[inline]
 #[target_feature(enable = "avx512f")]
-#[cfg_attr(test, assert_instr(vpcmpneqq))]
+#[cfg_attr(test, assert_instr(vpcmp))]
 pub unsafe fn _mm512_mask_cmpneq_epu64_mask(m: __mmask8, a: __m512i, b: __m512i) -> __mmask8 {
     _mm512_cmpneq_epu64_mask(a, b) & m
 }
@@ -330,7 +330,7 @@ pub unsafe fn _mm512_mask_cmpeq_epi64_mask(m: __mmask8, a: __m512i, b: __m512i)
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062&text=_mm512_cmpneq_epi64)
 #[inline]
 #[target_feature(enable = "avx512f")]
-#[cfg_attr(test, assert_instr(vpcmpneqq))]
+#[cfg_attr(test, assert_instr(vpcmp))]
 pub unsafe fn _mm512_cmpneq_epi64_mask(a: __m512i, b: __m512i) -> __mmask8 {
     simd_bitmask::<__m512i, _>(simd_ne(a.as_i64x8(), b.as_i64x8()))
 }
@@ -341,7 +341,7 @@ pub unsafe fn _mm512_cmpneq_epi64_mask(a: __m512i, b: __m512i) -> __mmask8 {
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,1063&text=_mm512_mask_cmpneq_epi64)
 #[inline]
 #[target_feature(enable = "avx512f")]
-#[cfg_attr(test, assert_instr(vpcmpneqq))]
+#[cfg_attr(test, assert_instr(vpcmp))]
 pub unsafe fn _mm512_mask_cmpneq_epi64_mask(m: __mmask8, a: __m512i, b: __m512i) -> __mmask8 {
     _mm512_cmpneq_epi64_mask(a, b) & m
 }

From 2957e2e88762526592ec1dc7cf411fd964697be4 Mon Sep 17 00:00:00 2001
From: Daniel Smith <daniel.smith@datadoghq.com>
Date: Sat, 6 Jun 2020 19:01:17 +0000
Subject: [PATCH 05/11] Add comparison operator integer comparisons

---
 crates/core_arch/src/x86/avx512f.rs      | 109 ++++++++++++++++++++++-
 crates/core_arch/src/x86/mod.rs          |   3 +
 crates/core_arch/src/x86_64/avx512f.rs   |  34 +++++++
 crates/stdarch-verify/src/lib.rs         |   1 +
 crates/stdarch-verify/tests/x86-intel.rs |   3 +
 5 files changed, 149 insertions(+), 1 deletion(-)

diff --git a/crates/core_arch/src/x86/avx512f.rs b/crates/core_arch/src/x86/avx512f.rs
index adce90c646..b4eb4e2b77 100644
--- a/crates/core_arch/src/x86/avx512f.rs
+++ b/crates/core_arch/src/x86/avx512f.rs
@@ -220,6 +220,47 @@ pub unsafe fn _mm512_mask_cmpneq_epu64_mask(m: __mmask8, a: __m512i, b: __m512i)
     _mm512_cmpneq_epu64_mask(a, b) & m
 }
 
+/// Compare packed unsigned 64-bit integers in a and b based on the comparison operand specified by op.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,1063&text=_mm512_mask_cmp_epu64)
+#[inline]
+#[target_feature(enable = "avx512f")]
+#[rustc_args_required_const(2)]
+#[cfg_attr(test, assert_instr(vpcmp, op = 0))]
+pub unsafe fn _mm512_cmp_epu64_mask(a: __m512i, b: __m512i, op: _MM_CMPINT_ENUM) -> __mmask8 {
+    let neg_one = -1;
+    macro_rules! call {
+        ($imm3:expr) => {
+            vpcmpuq(a.as_i64x8(), b.as_i64x8(), $imm3, neg_one)
+        };
+    }
+    let r = constify_imm3!(op, call);
+    transmute(r)
+}
+
+/// Compare packed unsigned 64-bit integers in a and b based on the comparison operand specified by op,
+///  using zeromask m (elements are zeroed out when the corresponding mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,1063&text=_mm512_mask_cmp_epu64)
+#[inline]
+#[target_feature(enable = "avx512f")]
+#[rustc_args_required_const(3)]
+#[cfg_attr(test, assert_instr(vpcmp, op = 0))]
+pub unsafe fn _mm512_mask_cmp_epu64_mask(
+    m: __mmask8,
+    a: __m512i,
+    b: __m512i,
+    op: _MM_CMPINT_ENUM,
+) -> __mmask8 {
+    macro_rules! call {
+        ($imm3:expr) => {
+            vpcmpuq(a.as_i64x8(), b.as_i64x8(), $imm3, m as i8)
+        };
+    }
+    let r = constify_imm3!(op, call);
+    transmute(r)
+}
+
 /// Compare packed signed 64-bit integers in a and b for less-than, and store the results in a mask vector.
 ///
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062&text=_mm512_cmplt_epi64)
@@ -335,7 +376,7 @@ pub unsafe fn _mm512_cmpneq_epi64_mask(a: __m512i, b: __m512i) -> __mmask8 {
     simd_bitmask::<__m512i, _>(simd_ne(a.as_i64x8(), b.as_i64x8()))
 }
 
-///Compare packed signed 64-bit integers in a and b for inequality, and store the results in a mask vector k
+/// Compare packed signed 64-bit integers in a and b for inequality, and store the results in a mask vector k
 /// using zeromask m (elements are zeroed out when the corresponding mask bit is not set).
 ///
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,1063&text=_mm512_mask_cmpneq_epi64)
@@ -346,6 +387,72 @@ pub unsafe fn _mm512_mask_cmpneq_epi64_mask(m: __mmask8, a: __m512i, b: __m512i)
     _mm512_cmpneq_epi64_mask(a, b) & m
 }
 
+/// Compare packed signed 64-bit integers in a and b based on the comparison operand specified by op.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,1063&text=_mm512_mask_cmp_epi64)
+#[inline]
+#[target_feature(enable = "avx512f")]
+#[rustc_args_required_const(2)]
+#[cfg_attr(test, assert_instr(vpcmp, op = 0))]
+pub unsafe fn _mm512_cmp_epi64_mask(a: __m512i, b: __m512i, op: _MM_CMPINT_ENUM) -> __mmask8 {
+    let neg_one = -1;
+    macro_rules! call {
+        ($imm3:expr) => {
+            vpcmpq(a.as_i64x8(), b.as_i64x8(), $imm3, neg_one)
+        };
+    }
+    let r = constify_imm3!(op, call);
+    transmute(r)
+}
+
+/// Compare packed signed 64-bit integers in a and b based on the comparison operand specified by op,
+///  using zeromask m (elements are zeroed out when the corresponding mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,1063&text=_mm512_mask_cmp_epi64)
+#[inline]
+#[target_feature(enable = "avx512f")]
+#[rustc_args_required_const(3)]
+#[cfg_attr(test, assert_instr(vpcmp, op = 0))]
+pub unsafe fn _mm512_mask_cmp_epi64_mask(
+    m: __mmask8,
+    a: __m512i,
+    b: __m512i,
+    op: _MM_CMPINT_ENUM,
+) -> __mmask8 {
+    macro_rules! call {
+        ($imm3:expr) => {
+            vpcmpq(a.as_i64x8(), b.as_i64x8(), $imm3, m as i8)
+        };
+    }
+    let r = constify_imm3!(op, call);
+    transmute(r)
+}
+
+/// Equal
+pub const _MM_CMPINT_EQ: _MM_CMPINT_ENUM = 0x00;
+/// Less-than
+pub const _MM_CMPINT_LT: _MM_CMPINT_ENUM = 0x01;
+/// Less-than-or-equal
+pub const _MM_CMPINT_LE: _MM_CMPINT_ENUM = 0x02;
+/// False
+pub const _MM_CMPINT_FALSE: _MM_CMPINT_ENUM = 0x03;
+/// Not-equal
+pub const _MM_CMPINT_NE: _MM_CMPINT_ENUM = 0x04;
+/// Not less-than
+pub const _MM_CMPINT_NLT: _MM_CMPINT_ENUM = 0x05;
+/// Not less-than-or-equal
+pub const _MM_CMPINT_NLE: _MM_CMPINT_ENUM = 0x06;
+/// True
+pub const _MM_CMPINT_TRUE: _MM_CMPINT_ENUM = 0x07;
+
+#[allow(improper_ctypes)]
+extern "C" {
+    #[link_name = "llvm.x86.avx512.mask.ucmp.q.512"]
+    fn vpcmpuq(a: i64x8, b: i64x8, op: i32, m: i8) -> i8;
+    #[link_name = "llvm.x86.avx512.mask.cmp.q.512"]
+    fn vpcmpq(a: i64x8, b: i64x8, op: i32, m: i8) -> i8;
+}
+
 #[cfg(test)]
 mod tests {
     use std;
diff --git a/crates/core_arch/src/x86/mod.rs b/crates/core_arch/src/x86/mod.rs
index 1347010588..9c9057f467 100644
--- a/crates/core_arch/src/x86/mod.rs
+++ b/crates/core_arch/src/x86/mod.rs
@@ -350,6 +350,9 @@ pub type __mmask16 = u16;
 #[allow(non_camel_case_types)]
 pub type __mmask8 = u8;
 
+/// The `_MM_CMPINT_ENUM` type used to specify comparison operations in AVX-512 intrinsics.
+pub type _MM_CMPINT_ENUM = i32;
+
 #[cfg(test)]
 mod test;
 #[cfg(test)]
diff --git a/crates/core_arch/src/x86_64/avx512f.rs b/crates/core_arch/src/x86_64/avx512f.rs
index e6d4ca21d2..c6d4f896b3 100644
--- a/crates/core_arch/src/x86_64/avx512f.rs
+++ b/crates/core_arch/src/x86_64/avx512f.rs
@@ -153,6 +153,23 @@ mod tests {
         assert_eq!(r, 0b00110010);
     }
 
+    #[simd_test(enable = "avx512f")]
+    unsafe fn test_mm512_cmp_epu64_mask() {
+        let a = _mm512_set_epi64(0, 1, -1, u64::MAX as i64, i64::MAX, i64::MIN, 100, -100);
+        let b = _mm512_set1_epi64(-1);
+        let m = _mm512_cmp_epu64_mask(a, b, _MM_CMPINT_LT);
+        assert_eq!(m, 0b11001111);
+    }
+
+    #[simd_test(enable = "avx512f")]
+    unsafe fn test_mm512_mask_cmp_epu64_mask() {
+        let a = _mm512_set_epi64(0, 1, -1, u64::MAX as i64, i64::MAX, i64::MIN, 100, -100);
+        let b = _mm512_set1_epi64(-1);
+        let mask = 0b01111010;
+        let r = _mm512_mask_cmp_epu64_mask(mask, a, b, _MM_CMPINT_LT);
+        assert_eq!(r, 0b01001010);
+    }
+
     #[simd_test(enable = "avx512f")]
     unsafe fn test_mm512_cmplt_epi64_mask() {
         let a = _mm512_set_epi64(0, 1, -1, 13, i64::MAX, i64::MIN, 100, -100);
@@ -257,6 +274,23 @@ mod tests {
         assert_eq!(r, 0b00110010)
     }
 
+    #[simd_test(enable = "avx512f")]
+    unsafe fn test_mm512_cmp_epi64_mask() {
+        let a = _mm512_set_epi64(0, 1, -1, 13, i64::MAX, i64::MIN, 100, -100);
+        let b = _mm512_set1_epi64(-1);
+        let m = _mm512_cmp_epi64_mask(a, b, _MM_CMPINT_LT);
+        assert_eq!(m, 0b00000101);
+    }
+
+    #[simd_test(enable = "avx512f")]
+    unsafe fn test_mm512_mask_cmp_epi64_mask() {
+        let a = _mm512_set_epi64(0, 1, -1, 13, i64::MAX, i64::MIN, 100, -100);
+        let b = _mm512_set1_epi64(-1);
+        let mask = 0b01100110;
+        let r = _mm512_mask_cmp_epi64_mask(mask, a, b, _MM_CMPINT_LT);
+        assert_eq!(r, 0b00000100);
+    }
+
     #[simd_test(enable = "avx512f")]
     unsafe fn test_mm512_set_epi64() {
         let r = _mm512_setr_epi64(0, 1, 2, 3, 4, 5, 6, 7);
diff --git a/crates/stdarch-verify/src/lib.rs b/crates/stdarch-verify/src/lib.rs
index 62ad41c48f..37224013f0 100644
--- a/crates/stdarch-verify/src/lib.rs
+++ b/crates/stdarch-verify/src/lib.rs
@@ -147,6 +147,7 @@ fn to_type(t: &syn::Type) -> proc_macro2::TokenStream {
             "__m512i" => quote! { &M512I },
             "__mmask8" => quote! { &MMASK8 },
             "__mmask16" => quote! { &MMASK16 },
+            "_MM_CMPINT_ENUM" => quote! { &MM_CMPINT_ENUM },
             "__m64" => quote! { &M64 },
             "bool" => quote! { &BOOL },
             "f32" => quote! { &F32 },
diff --git a/crates/stdarch-verify/tests/x86-intel.rs b/crates/stdarch-verify/tests/x86-intel.rs
index bf8ede6071..32edb39032 100644
--- a/crates/stdarch-verify/tests/x86-intel.rs
+++ b/crates/stdarch-verify/tests/x86-intel.rs
@@ -55,6 +55,7 @@ static M512I: Type = Type::M512I;
 static M512D: Type = Type::M512D;
 static MMASK8: Type = Type::MMASK8;
 static MMASK16: Type = Type::MMASK16;
+static MM_CMPINT_ENUM: Type = Type::MM_CMPINT_ENUM;
 
 static TUPLE: Type = Type::Tuple;
 static CPUID: Type = Type::CpuidResult;
@@ -79,6 +80,7 @@ enum Type {
     M512I,
     MMASK8,
     MMASK16,
+    MM_CMPINT_ENUM,
     Tuple,
     CpuidResult,
     Never,
@@ -657,6 +659,7 @@ fn equate(t: &Type, intel: &str, intrinsic: &str, is_const: bool) -> Result<(),
 
         (&Type::MMASK8, "__mmask8") => {}
         (&Type::MMASK16, "__mmask16") => {}
+        (&Type::MM_CMPINT_ENUM, "const _MM_CMPINT_ENUM") => require_const()?,
 
         // This is a macro (?) in C which seems to mutate its arguments, but
         // that means that we're taking pointers to arguments in rust

From 7538c0fc87c27b7c18436a241b15f88891f2af79 Mon Sep 17 00:00:00 2001
From: Daniel Smith <daniel.smith@datadoghq.com>
Date: Sat, 6 Jun 2020 19:05:24 +0000
Subject: [PATCH 06/11] Fix comments

---
 crates/core_arch/src/x86/avx512f.rs | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/crates/core_arch/src/x86/avx512f.rs b/crates/core_arch/src/x86/avx512f.rs
index b4eb4e2b77..2715a8f5e0 100644
--- a/crates/core_arch/src/x86/avx512f.rs
+++ b/crates/core_arch/src/x86/avx512f.rs
@@ -209,10 +209,10 @@ pub unsafe fn _mm512_cmpneq_epu64_mask(a: __m512i, b: __m512i) -> __mmask8 {
     simd_bitmask::<__m512i, _>(simd_ne(a.as_u64x8(), b.as_u64x8()))
 }
 
-///Compare packed unsigned 64-bit integers in a and b for inequality, and store the results in a mask vector k
+/// Compare packed unsigned 64-bit integers in a and b for inequality, and store the results in a mask vector k
 /// using zeromask m (elements are zeroed out when the corresponding mask bit is not set).
 ///
-/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,1063&text=_mm512_mask_cmpneq_epu64)
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,1063&text=_mm512_mask_cmpneq_epu64_mask)
 #[inline]
 #[target_feature(enable = "avx512f")]
 #[cfg_attr(test, assert_instr(vpcmp))]
@@ -222,7 +222,7 @@ pub unsafe fn _mm512_mask_cmpneq_epu64_mask(m: __mmask8, a: __m512i, b: __m512i)
 
 /// Compare packed unsigned 64-bit integers in a and b based on the comparison operand specified by op.
 ///
-/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,1063&text=_mm512_mask_cmp_epu64)
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,1063&text=_mm512_mask_cmp_epu64_mask)
 #[inline]
 #[target_feature(enable = "avx512f")]
 #[rustc_args_required_const(2)]
@@ -241,7 +241,7 @@ pub unsafe fn _mm512_cmp_epu64_mask(a: __m512i, b: __m512i, op: _MM_CMPINT_ENUM)
 /// Compare packed unsigned 64-bit integers in a and b based on the comparison operand specified by op,
 ///  using zeromask m (elements are zeroed out when the corresponding mask bit is not set).
 ///
-/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,1063&text=_mm512_mask_cmp_epu64)
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,1063&text=_mm512_mask_cmp_epu64_mask)
 #[inline]
 #[target_feature(enable = "avx512f")]
 #[rustc_args_required_const(3)]
@@ -389,7 +389,7 @@ pub unsafe fn _mm512_mask_cmpneq_epi64_mask(m: __mmask8, a: __m512i, b: __m512i)
 
 /// Compare packed signed 64-bit integers in a and b based on the comparison operand specified by op.
 ///
-/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,1063&text=_mm512_mask_cmp_epi64)
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,1063&text=_mm512_mask_cmp_epi64_mask)
 #[inline]
 #[target_feature(enable = "avx512f")]
 #[rustc_args_required_const(2)]
@@ -408,7 +408,7 @@ pub unsafe fn _mm512_cmp_epi64_mask(a: __m512i, b: __m512i, op: _MM_CMPINT_ENUM)
 /// Compare packed signed 64-bit integers in a and b based on the comparison operand specified by op,
 ///  using zeromask m (elements are zeroed out when the corresponding mask bit is not set).
 ///
-/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,1063&text=_mm512_mask_cmp_epi64)
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,1063&text=_mm512_mask_cmp_epi64_mask)
 #[inline]
 #[target_feature(enable = "avx512f")]
 #[rustc_args_required_const(3)]

From 33a4dd595f3167e6323a9037d8bb41bbb91f4feb Mon Sep 17 00:00:00 2001
From: Daniel Smith <daniel.smith@datadoghq.com>
Date: Sat, 6 Jun 2020 19:09:00 +0000
Subject: [PATCH 07/11] Allow non camel case types

---
 crates/core_arch/src/x86/mod.rs | 1 +
 1 file changed, 1 insertion(+)

diff --git a/crates/core_arch/src/x86/mod.rs b/crates/core_arch/src/x86/mod.rs
index 9c9057f467..74ba99d551 100644
--- a/crates/core_arch/src/x86/mod.rs
+++ b/crates/core_arch/src/x86/mod.rs
@@ -351,6 +351,7 @@ pub type __mmask16 = u16;
 pub type __mmask8 = u8;
 
 /// The `_MM_CMPINT_ENUM` type used to specify comparison operations in AVX-512 intrinsics.
+#[allow(non_camel_case_types)]
 pub type _MM_CMPINT_ENUM = i32;
 
 #[cfg(test)]

From a74886bd288d49fd790941b156cd36b0fb5f91b6 Mon Sep 17 00:00:00 2001
From: Daniel Smith <daniel.smith@datadoghq.com>
Date: Sun, 7 Jun 2020 16:55:25 +0000
Subject: [PATCH 08/11] Add cmplt_ep(i|u)32

---
 crates/core_arch/src/simd.rs             |   6 +
 crates/core_arch/src/x86/avx512f.rs      | 146 +++++++++++++++++++++++
 crates/core_arch/src/x86/mod.rs          |   5 +
 crates/core_arch/src/x86_64/avx512f.rs   |   6 +
 crates/stdarch-verify/tests/x86-intel.rs |   3 -
 5 files changed, 163 insertions(+), 3 deletions(-)

diff --git a/crates/core_arch/src/simd.rs b/crates/core_arch/src/simd.rs
index 7e4f7e8cce..3e5af4fffa 100644
--- a/crates/core_arch/src/simd.rs
+++ b/crates/core_arch/src/simd.rs
@@ -198,6 +198,12 @@ simd_ty!(i32x16[i32]:
          | x0, x1, x2, x3, x4, x5, x6, x7,
          x8, x9, x10, x11, x12, x13, x14, x15);
 
+simd_ty!(u32x16[u32]:
+         u32, u32, u32, u32, u32, u32, u32, u32,
+         u32, u32, u32, u32, u32, u32, u32, u32
+         | x0, x1, x2, x3, x4, x5, x6, x7,
+         x8, x9, x10, x11, x12, x13, x14, x15);
+
 simd_ty!(i64x8[i64]:
          i64, i64, i64, i64, i64, i64, i64, i64
          | x0, x1, x2, x3, x4, x5, x6, x7);
diff --git a/crates/core_arch/src/x86/avx512f.rs b/crates/core_arch/src/x86/avx512f.rs
index 2715a8f5e0..e3fd02ade0 100644
--- a/crates/core_arch/src/x86/avx512f.rs
+++ b/crates/core_arch/src/x86/avx512f.rs
@@ -87,6 +87,39 @@ pub unsafe fn _mm512_setr_epi32(
     transmute(r)
 }
 
+/// Sets packed 32-bit integers in `dst` with the supplied values.
+#[inline]
+#[target_feature(enable = "avx512f")]
+pub unsafe fn _mm512_set_epi32(
+    e15: i32,
+    e14: i32,
+    e13: i32,
+    e12: i32,
+    e11: i32,
+    e10: i32,
+    e9: i32,
+    e8: i32,
+    e7: i32,
+    e6: i32,
+    e5: i32,
+    e4: i32,
+    e3: i32,
+    e2: i32,
+    e1: i32,
+    e0: i32,
+) -> __m512i {
+    _mm512_setr_epi32(
+        e0, e1, e2, e3, e4, e5, e6, e7, e8, e9, e10, e11, e12, e13, e14, e15,
+    )
+}
+
+/// Broadcast 32-bit integer `a` to all elements of `dst`.
+#[inline]
+#[target_feature(enable = "avx512f")]
+pub unsafe fn _mm512_set1_epi32(a: i32) -> __m512i {
+    transmute(i32x16::splat(a))
+}
+
 /// Broadcast 64-bit integer `a` to all elements of `dst`.
 #[inline]
 #[target_feature(enable = "avx512f")]
@@ -94,6 +127,27 @@ pub unsafe fn _mm512_set1_epi64(a: i64) -> __m512i {
     transmute(i64x8::splat(a))
 }
 
+/// Compare packed unsigned 32-bit integers in a and b for less-than, and store the results in a mask vector.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062&text=_mm512_cmplt_epu32)
+#[inline]
+#[target_feature(enable = "avx512f")]
+#[cfg_attr(test, assert_instr(vpcmp))]
+pub unsafe fn _mm512_cmplt_epu32_mask(a: __m512i, b: __m512i) -> __mmask16 {
+    simd_bitmask::<u32x16, _>(simd_lt(a.as_u32x16(), b.as_u32x16()))
+}
+
+/// Compare packed unsigned 32-bit integers in a and b for less-than, and store the results in a mask vector k
+/// using zeromask m (elements are zeroed out when the corresponding mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,1063&text=_mm512_mask_cmplt_epu32)
+#[inline]
+#[target_feature(enable = "avx512f")]
+#[cfg_attr(test, assert_instr(vpcmp))]
+pub unsafe fn _mm512_mask_cmplt_epu32_mask(m: __mmask16, a: __m512i, b: __m512i) -> __mmask16 {
+    _mm512_cmplt_epu32_mask(a, b) & m
+}
+
 /// Compare packed unsigned 64-bit integers in a and b for less-than, and store the results in a mask vector.
 ///
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062&text=_mm512_cmplt_epu64)
@@ -261,6 +315,27 @@ pub unsafe fn _mm512_mask_cmp_epu64_mask(
     transmute(r)
 }
 
+/// Compare packed unsigned 32-bit integers in a and b for less-than, and store the results in a mask vector.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062&text=_mm512_cmplt_epi32)
+#[inline]
+#[target_feature(enable = "avx512f")]
+#[cfg_attr(test, assert_instr(vpcmp))]
+pub unsafe fn _mm512_cmplt_epi32_mask(a: __m512i, b: __m512i) -> __mmask16 {
+    simd_bitmask::<i32x16, _>(simd_lt(a.as_i32x16(), b.as_i32x16()))
+}
+
+/// Compare packed unsigned 32-bit integers in a and b for less-than, and store the results in a mask vector k
+/// using zeromask m (elements are zeroed out when the corresponding mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,1063&text=_mm512_mask_cmplt_epi32)
+#[inline]
+#[target_feature(enable = "avx512f")]
+#[cfg_attr(test, assert_instr(vpcmp))]
+pub unsafe fn _mm512_mask_cmplt_epi32_mask(m: __mmask16, a: __m512i, b: __m512i) -> __mmask16 {
+    _mm512_cmplt_epi32_mask(a, b) & m
+}
+
 /// Compare packed signed 64-bit integers in a and b for less-than, and store the results in a mask vector.
 ///
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062&text=_mm512_cmplt_epi64)
@@ -556,4 +631,75 @@ mod tests {
         );
         assert_eq_m512i(r, e);
     }
+
+    #[simd_test(enable = "avx512f")]
+    unsafe fn test_mm512_cmplt_epu32_mask() {
+        #[rustfmt::skip]
+        let a = _mm512_set_epi32(0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100,
+                                 0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100);
+        let b = _mm512_set1_epi32(-1);
+        let m = _mm512_cmplt_epu32_mask(a, b);
+        assert_eq!(m, 0b11001111_11001111);
+    }
+
+    #[simd_test(enable = "avx512f")]
+    unsafe fn test_mm512_mask_cmplt_epu32_mask() {
+        #[rustfmt::skip]
+        let a = _mm512_set_epi32(0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100,
+                                 0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100);
+        let b = _mm512_set1_epi32(-1);
+        let mask = 0b01111010_01111010;
+        let r = _mm512_mask_cmplt_epu32_mask(mask, a, b);
+        assert_eq!(r, 0b01001010_01001010);
+    }
+
+    #[simd_test(enable = "avx512f")]
+    unsafe fn test_mm512_cmplt_epi32_mask() {
+        #[rustfmt::skip]
+        let a = _mm512_set_epi32(0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100,
+                                 0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100);
+        let b = _mm512_set1_epi32(-1);
+        let m = _mm512_cmplt_epi32_mask(a, b);
+        assert_eq!(m, 0b00000101_00000101);
+    }
+
+    #[simd_test(enable = "avx512f")]
+    unsafe fn test_mm512_mask_cmplt_epi32_mask() {
+        #[rustfmt::skip]
+        let a = _mm512_set_epi32(0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100,
+                                 0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100);
+        let b = _mm512_set1_epi32(-1);
+        let mask = 0b01100110_01100110;
+        let r = _mm512_mask_cmplt_epi32_mask(mask, a, b);
+        assert_eq!(r, 0b00000100_00000100);
+    }
+
+    #[simd_test(enable = "avx512f")]
+    unsafe fn test_mm512_set_epi32() {
+        let r = _mm512_setr_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
+        assert_eq_m512i(
+            r,
+            _mm512_set_epi32(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0),
+        )
+    }
+
+    #[simd_test(enable = "avx512f")]
+    unsafe fn test_mm512_setr_epi32() {
+        let r = _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
+        assert_eq_m512i(
+            r,
+            _mm512_setr_epi32(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0),
+        )
+    }
+
+    #[simd_test(enable = "avx512f")]
+    unsafe fn test_mm512_set1_epi32() {
+        let r = _mm512_set_epi32(2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2);
+        assert_eq_m512i(r, _mm512_set1_epi32(2));
+    }
+
+    #[simd_test(enable = "avx512f")]
+    unsafe fn test_mm512_setzero_si512() {
+        assert_eq_m512i(_mm512_set1_epi32(0), _mm512_setzero_si512());
+    }
 }
diff --git a/crates/core_arch/src/x86/mod.rs b/crates/core_arch/src/x86/mod.rs
index 74ba99d551..0c2f9a8142 100644
--- a/crates/core_arch/src/x86/mod.rs
+++ b/crates/core_arch/src/x86/mod.rs
@@ -513,6 +513,11 @@ impl m256iExt for __m256i {
 pub(crate) trait m512iExt: Sized {
     fn as_m512i(self) -> __m512i;
 
+    #[inline]
+    fn as_u32x16(self) -> crate::core_arch::simd::u32x16 {
+        unsafe { transmute(self.as_m512i()) }
+    }
+
     #[inline]
     fn as_i32x16(self) -> crate::core_arch::simd::i32x16 {
         unsafe { transmute(self.as_m512i()) }
diff --git a/crates/core_arch/src/x86_64/avx512f.rs b/crates/core_arch/src/x86_64/avx512f.rs
index c6d4f896b3..51b163972e 100644
--- a/crates/core_arch/src/x86_64/avx512f.rs
+++ b/crates/core_arch/src/x86_64/avx512f.rs
@@ -302,4 +302,10 @@ mod tests {
         let r = _mm512_set_epi64(0, 1, 2, 3, 4, 5, 6, 7);
         assert_eq_m512i(r, _mm512_setr_epi64(7, 6, 5, 4, 3, 2, 1, 0))
     }
+
+    #[simd_test(enable = "avx512f")]
+    unsafe fn test_mm512_set1_epi64() {
+        let r = _mm512_set_epi64(2, 2, 2, 2, 2, 2, 2, 2);
+        assert_eq_m512i(r, _mm512_set1_epi64(2));
+    }
 }
diff --git a/crates/stdarch-verify/tests/x86-intel.rs b/crates/stdarch-verify/tests/x86-intel.rs
index 32edb39032..5dc21fa445 100644
--- a/crates/stdarch-verify/tests/x86-intel.rs
+++ b/crates/stdarch-verify/tests/x86-intel.rs
@@ -213,9 +213,6 @@ fn verify_all_signatures() {
                 "_mm256_undefined_si256",
                 "_bextr2_u32",
                 "_mm_tzcnt_32",
-                "_mm512_setzero_si512",
-                "_mm512_setr_epi32",
-                "_mm512_set1_epi64",
                 "_m_paddb",
                 "_m_paddw",
                 "_m_paddd",

From e8cfdb82b80a0cf26dfaf06c16a565e02bfa3ca9 Mon Sep 17 00:00:00 2001
From: Daniel Smith <daniel.smith@datadoghq.com>
Date: Sat, 13 Jun 2020 16:45:40 +0000
Subject: [PATCH 09/11] Allow AVX512f or KNC intrinsics to be gated by avx512f

---
 crates/stdarch-verify/tests/x86-intel.rs | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/crates/stdarch-verify/tests/x86-intel.rs b/crates/stdarch-verify/tests/x86-intel.rs
index 5dc21fa445..7364648369 100644
--- a/crates/stdarch-verify/tests/x86-intel.rs
+++ b/crates/stdarch-verify/tests/x86-intel.rs
@@ -452,6 +452,10 @@ fn matches(rust: &Function, intel: &Intrinsic) -> Result<(), String> {
             // The XML file names IFMA as "avx512ifma52", while Rust calls
             // it "avx512ifma".
             "avx512ifma52" => String::from("avx512ifma"),
+            // Some AVX512f intrinsics are also supported by Knight's Corner.
+            // The XML lists them as avx512f/kncni, but we are solely gating
+            // them behind avx512f since we don't have a KNC feature yet.
+            "avx512f/kncni" => String::from("avx512f"),
             // See: https://github.com/rust-lang/stdarch/issues/738
             // The intrinsics guide calls `f16c` `fp16c` in disagreement with
             // Intel's architecture manuals.

From 690a03ccab3dee9b84df6edb51e9dec846c19d66 Mon Sep 17 00:00:00 2001
From: Daniel Smith <daniel.smith@datadoghq.com>
Date: Sat, 13 Jun 2020 17:20:56 +0000
Subject: [PATCH 10/11] Add remaining 32bit integer comparisons

---
 crates/core_arch/src/x86/avx512f.rs | 610 +++++++++++++++++++++++++++-
 1 file changed, 589 insertions(+), 21 deletions(-)

diff --git a/crates/core_arch/src/x86/avx512f.rs b/crates/core_arch/src/x86/avx512f.rs
index e3fd02ade0..8cb6b92624 100644
--- a/crates/core_arch/src/x86/avx512f.rs
+++ b/crates/core_arch/src/x86/avx512f.rs
@@ -148,6 +148,319 @@ pub unsafe fn _mm512_mask_cmplt_epu32_mask(m: __mmask16, a: __m512i, b: __m512i)
     _mm512_cmplt_epu32_mask(a, b) & m
 }
 
+/// Compare packed unsigned 32-bit integers in a and b for greater-than, and store the results in a mask vector.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062&text=_mm512_cmpgt_epu32)
+#[inline]
+#[target_feature(enable = "avx512f")]
+#[cfg_attr(test, assert_instr(vpcmp))]
+pub unsafe fn _mm512_cmpgt_epu32_mask(a: __m512i, b: __m512i) -> __mmask16 {
+    simd_bitmask::<u32x16, _>(simd_gt(a.as_u32x16(), b.as_u32x16()))
+}
+
+/// Compare packed unsigned 32-bit integers in a and b for greater-than, and store the results in a mask vector k
+/// using zeromask m (elements are zeroed out when the corresponding mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,1063&text=_mm512_mask_cmpgt_epu32)
+#[inline]
+#[target_feature(enable = "avx512f")]
+#[cfg_attr(test, assert_instr(vpcmp))]
+pub unsafe fn _mm512_mask_cmpgt_epu32_mask(m: __mmask16, a: __m512i, b: __m512i) -> __mmask16 {
+    _mm512_cmpgt_epu32_mask(a, b) & m
+}
+
+/// Compare packed unsigned 32-bit integers in a and b for less-than-or-equal, and store the results in a mask vector.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062&text=_mm512_cmple_epu32)
+#[inline]
+#[target_feature(enable = "avx512f")]
+#[cfg_attr(test, assert_instr(vpcmp))]
+pub unsafe fn _mm512_cmple_epu32_mask(a: __m512i, b: __m512i) -> __mmask16 {
+    _mm512_cmpgt_epu32_mask(b, a)
+}
+
+/// Compare packed unsigned 32-bit integers in a and b for less-than-or-equal, and store the results in a mask vector k
+/// using zeromask m (elements are zeroed out when the corresponding mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,1063&text=_mm512_mask_cmple_epu32)
+#[inline]
+#[target_feature(enable = "avx512f")]
+#[cfg_attr(test, assert_instr(vpcmp))]
+pub unsafe fn _mm512_mask_cmple_epu32_mask(m: __mmask16, a: __m512i, b: __m512i) -> __mmask16 {
+    _mm512_cmpgt_epu32_mask(b, a) & m
+}
+
+/// Compare packed unsigned 32-bit integers in a and b for greater-than-or-equal, and store the results in a mask vector.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062&text=_mm512_cmpge_epu32)
+#[inline]
+#[target_feature(enable = "avx512f")]
+#[cfg_attr(test, assert_instr(vpcmp))]
+pub unsafe fn _mm512_cmpge_epu32_mask(a: __m512i, b: __m512i) -> __mmask16 {
+    _mm512_cmplt_epu32_mask(b, a)
+}
+
+/// Compare packed unsigned 32-bit integers in a and b for greater-than-or-equal, and store the results in a mask vector k
+/// using zeromask m (elements are zeroed out when the corresponding mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,1063&text=_mm512_mask_cmpge_epu32)
+#[inline]
+#[target_feature(enable = "avx512f")]
+#[cfg_attr(test, assert_instr(vpcmp))]
+pub unsafe fn _mm512_mask_cmpge_epu32_mask(m: __mmask16, a: __m512i, b: __m512i) -> __mmask16 {
+    _mm512_cmplt_epu32_mask(b, a) & m
+}
+
+/// Compare packed unsigned 32-bit integers in a and b for equality, and store the results in a mask vector.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062&text=_mm512_cmpeq_epu32)
+#[inline]
+#[target_feature(enable = "avx512f")]
+#[cfg_attr(test, assert_instr(vpcmp))]
+pub unsafe fn _mm512_cmpeq_epu32_mask(a: __m512i, b: __m512i) -> __mmask16 {
+    simd_bitmask::<u32x16, _>(simd_eq(a.as_u32x16(), b.as_u32x16()))
+}
+
+/// Compare packed unsigned 32-bit integers in a and b for equality, and store the results in a mask vector k
+/// using zeromask m (elements are zeroed out when the corresponding mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,1063&text=_mm512_mask_cmpeq_epu32)
+#[inline]
+#[target_feature(enable = "avx512f")]
+#[cfg_attr(test, assert_instr(vpcmp))]
+pub unsafe fn _mm512_mask_cmpeq_epu32_mask(m: __mmask16, a: __m512i, b: __m512i) -> __mmask16 {
+    _mm512_cmpeq_epu32_mask(a, b) & m
+}
+
+/// Compare packed unsigned 32-bit integers in a and b for inequality, and store the results in a mask vector.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062&text=_mm512_cmpneq_epu32)
+#[inline]
+#[target_feature(enable = "avx512f")]
+#[cfg_attr(test, assert_instr(vpcmp))]
+pub unsafe fn _mm512_cmpneq_epu32_mask(a: __m512i, b: __m512i) -> __mmask16 {
+    simd_bitmask::<u32x16, _>(simd_ne(a.as_u32x16(), b.as_u32x16()))
+}
+
+/// Compare packed unsigned 32-bit integers in a and b for inequality, and store the results in a mask vector k
+/// using zeromask m (elements are zeroed out when the corresponding mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,1063&text=_mm512_mask_cmpneq_epu32_mask)
+#[inline]
+#[target_feature(enable = "avx512f")]
+#[cfg_attr(test, assert_instr(vpcmp))]
+pub unsafe fn _mm512_mask_cmpneq_epu32_mask(m: __mmask16, a: __m512i, b: __m512i) -> __mmask16 {
+    _mm512_cmpneq_epu32_mask(a, b) & m
+}
+
+/// Compare packed unsigned 32-bit integers in a and b based on the comparison operand specified by op.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,1063&text=_mm512_mask_cmp_epu32_mask)
+#[inline]
+#[target_feature(enable = "avx512f")]
+#[rustc_args_required_const(2)]
+#[cfg_attr(test, assert_instr(vpcmp, op = 0))]
+pub unsafe fn _mm512_cmp_epu32_mask(a: __m512i, b: __m512i, op: _MM_CMPINT_ENUM) -> __mmask16 {
+    let neg_one = -1;
+    macro_rules! call {
+        ($imm3:expr) => {
+            vpcmpud(a.as_i32x16(), b.as_i32x16(), $imm3, neg_one)
+        };
+    }
+    let r = constify_imm3!(op, call);
+    transmute(r)
+}
+
+/// Compare packed unsigned 32-bit integers in a and b based on the comparison operand specified by op,
+///  using zeromask m (elements are zeroed out when the corresponding mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,1063&text=_mm512_mask_cmp_epu32_mask)
+#[inline]
+#[target_feature(enable = "avx512f")]
+#[rustc_args_required_const(3)]
+#[cfg_attr(test, assert_instr(vpcmp, op = 0))]
+pub unsafe fn _mm512_mask_cmp_epu32_mask(
+    m: __mmask16,
+    a: __m512i,
+    b: __m512i,
+    op: _MM_CMPINT_ENUM,
+) -> __mmask16 {
+    macro_rules! call {
+        ($imm3:expr) => {
+            vpcmpud(a.as_i32x16(), b.as_i32x16(), $imm3, m as i16)
+        };
+    }
+    let r = constify_imm3!(op, call);
+    transmute(r)
+}
+
+/// Compare packed unsigned 32-bit integers in a and b for less-than, and store the results in a mask vector.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062&text=_mm512_cmplt_epi32)
+#[inline]
+#[target_feature(enable = "avx512f")]
+#[cfg_attr(test, assert_instr(vpcmp))]
+pub unsafe fn _mm512_cmplt_epi32_mask(a: __m512i, b: __m512i) -> __mmask16 {
+    simd_bitmask::<i32x16, _>(simd_lt(a.as_i32x16(), b.as_i32x16()))
+}
+
+/// Compare packed unsigned 32-bit integers in a and b for less-than, and store the results in a mask vector k
+/// using zeromask m (elements are zeroed out when the corresponding mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,1063&text=_mm512_mask_cmplt_epi32)
+#[inline]
+#[target_feature(enable = "avx512f")]
+#[cfg_attr(test, assert_instr(vpcmp))]
+pub unsafe fn _mm512_mask_cmplt_epi32_mask(m: __mmask16, a: __m512i, b: __m512i) -> __mmask16 {
+    _mm512_cmplt_epi32_mask(a, b) & m
+}
+
+/// Compare packed signed 32-bit integers in a and b for greater-than, and store the results in a mask vector.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062&text=_mm512_cmpgt_epi32)
+#[inline]
+#[target_feature(enable = "avx512f")]
+#[cfg_attr(test, assert_instr(vpcmp))]
+pub unsafe fn _mm512_cmpgt_epi32_mask(a: __m512i, b: __m512i) -> __mmask16 {
+    simd_bitmask::<i32x16, _>(simd_gt(a.as_i32x16(), b.as_i32x16()))
+}
+
+/// Compare packed signed 32-bit integers in a and b for greater-than, and store the results in a mask vector k
+/// using zeromask m (elements are zeroed out when the corresponding mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,1063&text=_mm512_mask_cmpgt_epi32)
+#[inline]
+#[target_feature(enable = "avx512f")]
+#[cfg_attr(test, assert_instr(vpcmp))]
+pub unsafe fn _mm512_mask_cmpgt_epi32_mask(m: __mmask16, a: __m512i, b: __m512i) -> __mmask16 {
+    _mm512_cmpgt_epi32_mask(a, b) & m
+}
+
+/// Compare packed signed 32-bit integers in a and b for less-than-or-equal, and store the results in a mask vector.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062&text=_mm512_cmple_epi32)
+#[inline]
+#[target_feature(enable = "avx512f")]
+#[cfg_attr(test, assert_instr(vpcmp))]
+pub unsafe fn _mm512_cmple_epi32_mask(a: __m512i, b: __m512i) -> __mmask16 {
+    _mm512_cmpgt_epi32_mask(b, a)
+}
+
+/// Compare packed signed 32-bit integers in a and b for less-than-or-equal, and store the results in a mask vector k
+/// using zeromask m (elements are zeroed out when the corresponding mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,1063&text=_mm512_mask_cmple_epi32)
+#[inline]
+#[target_feature(enable = "avx512f")]
+#[cfg_attr(test, assert_instr(vpcmp))]
+pub unsafe fn _mm512_mask_cmple_epi32_mask(m: __mmask16, a: __m512i, b: __m512i) -> __mmask16 {
+    _mm512_cmpgt_epi32_mask(b, a) & m
+}
+
+/// Compare packed signed 32-bit integers in a and b for greater-than-or-equal, and store the results in a mask vector.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062&text=_mm512_cmpge_epi32)
+#[inline]
+#[target_feature(enable = "avx512f")]
+#[cfg_attr(test, assert_instr(vpcmp))]
+pub unsafe fn _mm512_cmpge_epi32_mask(a: __m512i, b: __m512i) -> __mmask16 {
+    _mm512_cmplt_epi32_mask(b, a)
+}
+
+/// Compare packed signed 32-bit integers in a and b for greater-than-or-equal, and store the results in a mask vector k
+/// using zeromask m (elements are zeroed out when the corresponding mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,1063&text=_mm512_mask_cmpge_epi32)
+#[inline]
+#[target_feature(enable = "avx512f")]
+#[cfg_attr(test, assert_instr(vpcmp))]
+pub unsafe fn _mm512_mask_cmpge_epi32_mask(m: __mmask16, a: __m512i, b: __m512i) -> __mmask16 {
+    _mm512_cmplt_epi32_mask(b, a) & m
+}
+
+/// Compare packed signed 32-bit integers in a and b for equality, and store the results in a mask vector.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062&text=_mm512_cmpeq_epi32)
+#[inline]
+#[target_feature(enable = "avx512f")]
+#[cfg_attr(test, assert_instr(vpcmp))]
+pub unsafe fn _mm512_cmpeq_epi32_mask(a: __m512i, b: __m512i) -> __mmask16 {
+    simd_bitmask::<i32x16, _>(simd_eq(a.as_i32x16(), b.as_i32x16()))
+}
+
+/// Compare packed signed 32-bit integers in a and b for equality, and store the results in a mask vector k
+/// using zeromask m (elements are zeroed out when the corresponding mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,1063&text=_mm512_mask_cmpeq_epi32)
+#[inline]
+#[target_feature(enable = "avx512f")]
+#[cfg_attr(test, assert_instr(vpcmp))]
+pub unsafe fn _mm512_mask_cmpeq_epi32_mask(m: __mmask16, a: __m512i, b: __m512i) -> __mmask16 {
+    _mm512_cmpeq_epi32_mask(a, b) & m
+}
+
+/// Compare packed signed 32-bit integers in a and b for inequality, and store the results in a mask vector.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062&text=_mm512_cmpneq_epi32)
+#[inline]
+#[target_feature(enable = "avx512f")]
+#[cfg_attr(test, assert_instr(vpcmp))]
+pub unsafe fn _mm512_cmpneq_epi32_mask(a: __m512i, b: __m512i) -> __mmask16 {
+    simd_bitmask::<i32x16, _>(simd_ne(a.as_i32x16(), b.as_i32x16()))
+}
+
+/// Compare packed signed 32-bit integers in a and b for inequality, and store the results in a mask vector k
+/// using zeromask m (elements are zeroed out when the corresponding mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,1063&text=_mm512_mask_cmpneq_epi32)
+#[inline]
+#[target_feature(enable = "avx512f")]
+#[cfg_attr(test, assert_instr(vpcmp))]
+pub unsafe fn _mm512_mask_cmpneq_epi32_mask(m: __mmask16, a: __m512i, b: __m512i) -> __mmask16 {
+    _mm512_cmpneq_epi32_mask(a, b) & m
+}
+
+/// Compare packed signed 32-bit integers in a and b based on the comparison operand specified by op.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,1063&text=_mm512_mask_cmp_epi32_mask)
+#[inline]
+#[target_feature(enable = "avx512f")]
+#[rustc_args_required_const(2)]
+#[cfg_attr(test, assert_instr(vpcmp, op = 0))]
+pub unsafe fn _mm512_cmp_epi32_mask(a: __m512i, b: __m512i, op: _MM_CMPINT_ENUM) -> __mmask16 {
+    let neg_one = -1;
+    macro_rules! call {
+        ($imm3:expr) => {
+            vpcmpd(a.as_i32x16(), b.as_i32x16(), $imm3, neg_one)
+        };
+    }
+    let r = constify_imm3!(op, call);
+    transmute(r)
+}
+
+/// Compare packed signed 32-bit integers in a and b based on the comparison operand specified by op,
+///  using zeromask m (elements are zeroed out when the corresponding mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,1063&text=_mm512_mask_cmp_epi32_mask)
+#[inline]
+#[target_feature(enable = "avx512f")]
+#[rustc_args_required_const(3)]
+#[cfg_attr(test, assert_instr(vpcmp, op = 0))]
+pub unsafe fn _mm512_mask_cmp_epi32_mask(
+    m: __mmask16,
+    a: __m512i,
+    b: __m512i,
+    op: _MM_CMPINT_ENUM,
+) -> __mmask16 {
+    macro_rules! call {
+        ($imm3:expr) => {
+            vpcmpd(a.as_i32x16(), b.as_i32x16(), $imm3, m as i16)
+        };
+    }
+    let r = constify_imm3!(op, call);
+    transmute(r)
+}
+
 /// Compare packed unsigned 64-bit integers in a and b for less-than, and store the results in a mask vector.
 ///
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062&text=_mm512_cmplt_epu64)
@@ -315,27 +628,6 @@ pub unsafe fn _mm512_mask_cmp_epu64_mask(
     transmute(r)
 }
 
-/// Compare packed unsigned 32-bit integers in a and b for less-than, and store the results in a mask vector.
-///
-/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062&text=_mm512_cmplt_epi32)
-#[inline]
-#[target_feature(enable = "avx512f")]
-#[cfg_attr(test, assert_instr(vpcmp))]
-pub unsafe fn _mm512_cmplt_epi32_mask(a: __m512i, b: __m512i) -> __mmask16 {
-    simd_bitmask::<i32x16, _>(simd_lt(a.as_i32x16(), b.as_i32x16()))
-}
-
-/// Compare packed unsigned 32-bit integers in a and b for less-than, and store the results in a mask vector k
-/// using zeromask m (elements are zeroed out when the corresponding mask bit is not set).
-///
-/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,1063&text=_mm512_mask_cmplt_epi32)
-#[inline]
-#[target_feature(enable = "avx512f")]
-#[cfg_attr(test, assert_instr(vpcmp))]
-pub unsafe fn _mm512_mask_cmplt_epi32_mask(m: __mmask16, a: __m512i, b: __m512i) -> __mmask16 {
-    _mm512_cmplt_epi32_mask(a, b) & m
-}
-
 /// Compare packed signed 64-bit integers in a and b for less-than, and store the results in a mask vector.
 ///
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062&text=_mm512_cmplt_epi64)
@@ -526,6 +818,10 @@ extern "C" {
     fn vpcmpuq(a: i64x8, b: i64x8, op: i32, m: i8) -> i8;
     #[link_name = "llvm.x86.avx512.mask.cmp.q.512"]
     fn vpcmpq(a: i64x8, b: i64x8, op: i32, m: i8) -> i8;
+    #[link_name = "llvm.x86.avx512.mask.ucmp.d.512"]
+    fn vpcmpud(a: i32x16, b: i32x16, op: i32, m: i16) -> i16;
+    #[link_name = "llvm.x86.avx512.mask.cmp.d.512"]
+    fn vpcmpd(a: i32x16, b: i32x16, op: i32, m: i16) -> i16;
 }
 
 #[cfg(test)]
@@ -653,6 +949,142 @@ mod tests {
         assert_eq!(r, 0b01001010_01001010);
     }
 
+    #[simd_test(enable = "avx512f")]
+    unsafe fn test_mm512_cmpgt_epu32_mask() {
+        #[rustfmt::skip]
+        let a = _mm512_set_epi32(0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100,
+                                 0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100);
+        let b = _mm512_set1_epi32(-1);
+        let m = _mm512_cmpgt_epu32_mask(b, a);
+        assert_eq!(m, 0b11001111_11001111);
+    }
+
+    #[simd_test(enable = "avx512f")]
+    unsafe fn test_mm512_mask_cmpgt_epu32_mask() {
+        #[rustfmt::skip]
+        let a = _mm512_set_epi32(0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100,
+                                 0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100);
+        let b = _mm512_set1_epi32(-1);
+        let mask = 0b01111010_01111010;
+        let r = _mm512_mask_cmpgt_epu32_mask(mask, b, a);
+        assert_eq!(r, 0b01001010_01001010);
+    }
+
+    #[simd_test(enable = "avx512f")]
+    unsafe fn test_mm512_cmple_epu32_mask() {
+        #[rustfmt::skip]
+        let a = _mm512_set_epi32(0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100,
+                                 0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100);
+        let b = _mm512_set1_epi32(-1);
+        assert_eq!(_mm512_cmple_epu32_mask(a, b), _mm512_cmpgt_epu32_mask(b, a))
+    }
+
+    #[simd_test(enable = "avx512f")]
+    unsafe fn test_mm512_mask_cmple_epu32_mask() {
+        #[rustfmt::skip]
+        let a = _mm512_set_epi32(0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100,
+                                 0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100);
+        let b = _mm512_set1_epi32(-1);
+        let mask = 0b01111010_01111010;
+        assert_eq!(
+            _mm512_mask_cmple_epu32_mask(mask, a, b),
+            _mm512_mask_cmpgt_epu32_mask(mask, b, a)
+        );
+    }
+
+    #[simd_test(enable = "avx512f")]
+    unsafe fn test_mm512_cmpge_epu32_mask() {
+        #[rustfmt::skip]
+        let a = _mm512_set_epi32(0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100,
+                                 0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100);
+        let b = _mm512_set1_epi32(-1);
+        assert_eq!(_mm512_cmpge_epu32_mask(a, b), _mm512_cmplt_epu32_mask(b, a))
+    }
+
+    #[simd_test(enable = "avx512f")]
+    unsafe fn test_mm512_mask_cmpge_epu32_mask() {
+        #[rustfmt::skip]
+        let a = _mm512_set_epi32(0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100,
+                                 0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100);
+        let b = _mm512_set1_epi32(-1);
+        let mask = 0b01111010_01111010;
+        assert_eq!(
+            _mm512_mask_cmpge_epu32_mask(mask, a, b),
+            _mm512_mask_cmplt_epu32_mask(mask, b, a)
+        );
+    }
+
+    #[simd_test(enable = "avx512f")]
+    unsafe fn test_mm512_cmpeq_epu32_mask() {
+        #[rustfmt::skip]
+        let a = _mm512_set_epi32(0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100,
+                                 0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100);
+        #[rustfmt::skip]
+        let b = _mm512_set_epi32(0, 1, 13, 42, i32::MAX, i32::MIN, 100, -100,
+                                 0, 1, 13, 42, i32::MAX, i32::MIN, 100, -100);
+        let m = _mm512_cmpeq_epu32_mask(b, a);
+        assert_eq!(m, 0b11001111_11001111);
+    }
+
+    #[simd_test(enable = "avx512f")]
+    unsafe fn test_mm512_mask_cmpeq_epu32_mask() {
+        #[rustfmt::skip]
+        let a = _mm512_set_epi32(0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100,
+                                 0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100);
+        #[rustfmt::skip]
+        let b = _mm512_set_epi32(0, 1, 13, 42, i32::MAX, i32::MIN, 100, -100,
+                                 0, 1, 13, 42, i32::MAX, i32::MIN, 100, -100);
+        let mask = 0b01111010_01111010;
+        let r = _mm512_mask_cmpeq_epu32_mask(mask, b, a);
+        assert_eq!(r, 0b01001010_01001010);
+    }
+
+    #[simd_test(enable = "avx512f")]
+    unsafe fn test_mm512_cmpneq_epu32_mask() {
+        #[rustfmt::skip]
+        let a = _mm512_set_epi32(0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100,
+                                 0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100);
+        #[rustfmt::skip]
+        let b = _mm512_set_epi32(0, 1, 13, 42, i32::MAX, i32::MIN, 100, -100,
+                                 0, 1, 13, 42, i32::MAX, i32::MIN, 100, -100);
+        let m = _mm512_cmpneq_epu32_mask(b, a);
+        assert_eq!(m, !_mm512_cmpeq_epu32_mask(b, a));
+    }
+
+    #[simd_test(enable = "avx512f")]
+    unsafe fn test_mm512_mask_cmpneq_epu32_mask() {
+        #[rustfmt::skip]
+        let a = _mm512_set_epi32(0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, -100, 100,
+                                 0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, -100, 100);
+        #[rustfmt::skip]
+        let b = _mm512_set_epi32(0, 1, 13, 42, i32::MAX, i32::MIN, 100, -100,
+                                 0, 1, 13, 42, i32::MAX, i32::MIN, 100, -100);
+        let mask = 0b01111010_01111010;
+        let r = _mm512_mask_cmpneq_epu32_mask(mask, b, a);
+        assert_eq!(r, 0b00110010_00110010);
+    }
+
+    #[simd_test(enable = "avx512f")]
+    unsafe fn test_mm512_cmp_epu32_mask() {
+        #[rustfmt::skip]
+        let a = _mm512_set_epi32(0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100,
+                                 0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100);
+        let b = _mm512_set1_epi32(-1);
+        let m = _mm512_cmp_epu32_mask(a, b, _MM_CMPINT_LT);
+        assert_eq!(m, 0b11001111_11001111);
+    }
+
+    #[simd_test(enable = "avx512f")]
+    unsafe fn test_mm512_mask_cmp_epu32_mask() {
+        #[rustfmt::skip]
+        let a = _mm512_set_epi32(0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100,
+                                 0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100);
+        let b = _mm512_set1_epi32(-1);
+        let mask = 0b01111010_01111010;
+        let r = _mm512_mask_cmp_epu32_mask(mask, a, b, _MM_CMPINT_LT);
+        assert_eq!(r, 0b01001010_01001010);
+    }
+
     #[simd_test(enable = "avx512f")]
     unsafe fn test_mm512_cmplt_epi32_mask() {
         #[rustfmt::skip]
@@ -674,6 +1106,142 @@ mod tests {
         assert_eq!(r, 0b00000100_00000100);
     }
 
+    #[simd_test(enable = "avx512f")]
+    unsafe fn test_mm512_cmpgt_epi32_mask() {
+        #[rustfmt::skip]
+        let a = _mm512_set_epi32(0, 1, -1, 13, i32::MAX, i32::MIN, 100, -100,
+                                 0, 1, -1, 13, i32::MAX, i32::MIN, 100, -100);
+        let b = _mm512_set1_epi32(-1);
+        let m = _mm512_cmpgt_epi32_mask(b, a);
+        assert_eq!(m, 0b00000101_00000101);
+    }
+
+    #[simd_test(enable = "avx512f")]
+    unsafe fn test_mm512_mask_cmpgt_epi32_mask() {
+        #[rustfmt::skip]
+        let a = _mm512_set_epi32(0, 1, -1, 13, i32::MAX, i32::MIN, 100, -100,
+                                 0, 1, -1, 13, i32::MAX, i32::MIN, 100, -100);
+        let b = _mm512_set1_epi32(-1);
+        let mask = 0b01100110_01100110;
+        let r = _mm512_mask_cmpgt_epi32_mask(mask, b, a);
+        assert_eq!(r, 0b00000100_00000100);
+    }
+
+    #[simd_test(enable = "avx512f")]
+    unsafe fn test_mm512_cmple_epi32_mask() {
+        #[rustfmt::skip]
+        let a = _mm512_set_epi32(0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100,
+                                 0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100);
+        let b = _mm512_set1_epi32(-1);
+        assert_eq!(_mm512_cmple_epi32_mask(a, b), _mm512_cmpgt_epi32_mask(b, a))
+    }
+
+    #[simd_test(enable = "avx512f")]
+    unsafe fn test_mm512_mask_cmple_epi32_mask() {
+        #[rustfmt::skip]
+        let a = _mm512_set_epi32(0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100,
+                                 0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100);
+        let b = _mm512_set1_epi32(-1);
+        let mask = 0b01111010_01111010;
+        assert_eq!(
+            _mm512_mask_cmple_epi32_mask(mask, a, b),
+            _mm512_mask_cmpgt_epi32_mask(mask, b, a)
+        );
+    }
+
+    #[simd_test(enable = "avx512f")]
+    unsafe fn test_mm512_cmpge_epi32_mask() {
+        #[rustfmt::skip]
+        let a = _mm512_set_epi32(0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100,
+                                 0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100);
+        let b = _mm512_set1_epi32(-1);
+        assert_eq!(_mm512_cmpge_epi32_mask(a, b), _mm512_cmplt_epi32_mask(b, a))
+    }
+
+    #[simd_test(enable = "avx512f")]
+    unsafe fn test_mm512_mask_cmpge_epi32_mask() {
+        #[rustfmt::skip]
+        let a = _mm512_set_epi32(0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100,
+                                 0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100);
+        let b = _mm512_set1_epi32(-1);
+        let mask = 0b01111010_01111010;
+        assert_eq!(
+            _mm512_mask_cmpge_epi32_mask(mask, a, b),
+            _mm512_mask_cmplt_epi32_mask(mask, b, a)
+        );
+    }
+
+    #[simd_test(enable = "avx512f")]
+    unsafe fn test_mm512_cmpeq_epi32_mask() {
+        #[rustfmt::skip]
+        let a = _mm512_set_epi32(0, 1, -1, 13, i32::MAX, i32::MIN, 100, -100,
+                                 0, 1, -1, 13, i32::MAX, i32::MIN, 100, -100);
+        #[rustfmt::skip]
+        let b = _mm512_set_epi32(0, 1, 13, 42, i32::MAX, i32::MIN, 100, -100,
+                                 0, 1, 13, 42, i32::MAX, i32::MIN, 100, -100);
+        let m = _mm512_cmpeq_epi32_mask(b, a);
+        assert_eq!(m, 0b11001111_11001111);
+    }
+
+    #[simd_test(enable = "avx512f")]
+    unsafe fn test_mm512_mask_cmpeq_epi32_mask() {
+        #[rustfmt::skip]
+        let a = _mm512_set_epi32(0, 1, -1, 13, i32::MAX, i32::MIN, 100, -100,
+                                 0, 1, -1, 13, i32::MAX, i32::MIN, 100, -100);
+        #[rustfmt::skip]
+        let b = _mm512_set_epi32(0, 1, 13, 42, i32::MAX, i32::MIN, 100, -100,
+                                 0, 1, 13, 42, i32::MAX, i32::MIN, 100, -100);
+        let mask = 0b01111010_01111010;
+        let r = _mm512_mask_cmpeq_epi32_mask(mask, b, a);
+        assert_eq!(r, 0b01001010_01001010);
+    }
+
+    #[simd_test(enable = "avx512f")]
+    unsafe fn test_mm512_cmpneq_epi32_mask() {
+        #[rustfmt::skip]
+        let a = _mm512_set_epi32(0, 1, -1, 13, i32::MAX, i32::MIN, 100, -100,
+                                 0, 1, -1, 13, i32::MAX, i32::MIN, 100, -100);
+        #[rustfmt::skip]
+        let b = _mm512_set_epi32(0, 1, 13, 42, i32::MAX, i32::MIN, 100, -100,
+                                 0, 1, 13, 42, i32::MAX, i32::MIN, 100, -100);
+        let m = _mm512_cmpneq_epi32_mask(b, a);
+        assert_eq!(m, !_mm512_cmpeq_epi32_mask(b, a));
+    }
+
+    #[simd_test(enable = "avx512f")]
+    unsafe fn test_mm512_mask_cmpneq_epi32_mask() {
+        #[rustfmt::skip]
+        let a = _mm512_set_epi32(0, 1, -1, 13, i32::MAX, i32::MIN, -100, 100,
+                                 0, 1, -1, 13, i32::MAX, i32::MIN, -100, 100);
+        #[rustfmt::skip]
+        let b = _mm512_set_epi32(0, 1, 13, 42, i32::MAX, i32::MIN, 100, -100,
+                                 0, 1, 13, 42, i32::MAX, i32::MIN, 100, -100);
+        let mask = 0b01111010_01111010;
+        let r = _mm512_mask_cmpneq_epi32_mask(mask, b, a);
+        assert_eq!(r, 0b00110010_00110010)
+    }
+
+    #[simd_test(enable = "avx512f")]
+    unsafe fn test_mm512_cmp_epi32_mask() {
+        #[rustfmt::skip]
+        let a = _mm512_set_epi32(0, 1, -1, 13, i32::MAX, i32::MIN, 100, -100,
+                                 0, 1, -1, 13, i32::MAX, i32::MIN, 100, -100);
+        let b = _mm512_set1_epi32(-1);
+        let m = _mm512_cmp_epi32_mask(a, b, _MM_CMPINT_LT);
+        assert_eq!(m, 0b00000101_00000101);
+    }
+
+    #[simd_test(enable = "avx512f")]
+    unsafe fn test_mm512_mask_cmp_epi32_mask() {
+        #[rustfmt::skip]
+        let a = _mm512_set_epi32(0, 1, -1, 13, i32::MAX, i32::MIN, 100, -100,
+                                 0, 1, -1, 13, i32::MAX, i32::MIN, 100, -100);
+        let b = _mm512_set1_epi32(-1);
+        let mask = 0b01100110_01100110;
+        let r = _mm512_mask_cmp_epi32_mask(mask, a, b, _MM_CMPINT_LT);
+        assert_eq!(r, 0b00000100_00000100);
+    }
+
     #[simd_test(enable = "avx512f")]
     unsafe fn test_mm512_set_epi32() {
         let r = _mm512_setr_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);

From 832166ad7783ed21aa7afc628676e7f3331f0969 Mon Sep 17 00:00:00 2001
From: Daniel Smith <daniel.smith@datadoghq.com>
Date: Sat, 13 Jun 2020 17:36:23 +0000
Subject: [PATCH 11/11] Fix verify test with updated XML

---
 crates/stdarch-verify/tests/x86-intel.rs | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/crates/stdarch-verify/tests/x86-intel.rs b/crates/stdarch-verify/tests/x86-intel.rs
index f265f5d2e5..5adf5e6ef5 100644
--- a/crates/stdarch-verify/tests/x86-intel.rs
+++ b/crates/stdarch-verify/tests/x86-intel.rs
@@ -667,7 +667,7 @@ fn equate(t: &Type, intel: &str, intrinsic: &str, is_const: bool) -> Result<(),
 
         (&Type::MMASK8, "__mmask8") => {}
         (&Type::MMASK16, "__mmask16") => {}
-        (&Type::MM_CMPINT_ENUM, "const _MM_CMPINT_ENUM") => require_const()?,
+        (&Type::MM_CMPINT_ENUM, "_MM_CMPINT_ENUM") => {}
 
         // This is a macro (?) in C which seems to mutate its arguments, but
         // that means that we're taking pointers to arguments in rust