From 1af65a7e7bbc3bd17d638afc1219e8266674b0c2 Mon Sep 17 00:00:00 2001 From: Freddy Ye Date: Wed, 20 Dec 2023 14:17:46 +0800 Subject: [PATCH 1/7] [CFE][X86] Remove Xeon Phi CPU names supports. --- clang/lib/Basic/Targets/X86.cpp | 7 - clang/lib/Driver/ToolChains/Arch/X86.cpp | 2 +- .../CodeGen/X86/x86-inline-asm-v-constraint.c | 1 - clang/test/CodeGen/attr-cpuspecific-cpus.c | 3 - clang/test/CodeGen/attr-cpuspecific.c | 90 +++++----- clang/test/Driver/cl-x86-flags.c | 4 +- clang/test/Frontend/x86-target-cpu.c | 2 - clang/test/Misc/target-invalid-cpu-note.c | 8 +- .../Preprocessor/predefined-arch-macros.c | 158 ------------------ clang/test/Preprocessor/x86_target_features.c | 2 - .../llvm/TargetParser/X86TargetParser.h | 2 - llvm/lib/TargetParser/X86TargetParser.cpp | 12 -- 12 files changed, 52 insertions(+), 239 deletions(-) diff --git a/clang/lib/Basic/Targets/X86.cpp b/clang/lib/Basic/Targets/X86.cpp index b97f88647fa49..2483bc8b08f01 100644 --- a/clang/lib/Basic/Targets/X86.cpp +++ b/clang/lib/Basic/Targets/X86.cpp @@ -633,11 +633,6 @@ void X86TargetInfo::getTargetDefines(const LangOptions &Opts, // recent primary x86 CPUs, and we should keep it that way. defineCPUMacros(Builder, "corei7"); break; - case CK_KNL: - defineCPUMacros(Builder, "knl"); - break; - case CK_KNM: - break; case CK_Lakemont: defineCPUMacros(Builder, "i586", /*Tuning*/false); defineCPUMacros(Builder, "pentium", /*Tuning*/false); @@ -1569,8 +1564,6 @@ std::optional X86TargetInfo::getCPUCacheLineSize() const { case CK_GraniterapidsD: case CK_Emeraldrapids: case CK_Clearwaterforest: - case CK_KNL: - case CK_KNM: // K7 case CK_Athlon: case CK_AthlonXP: diff --git a/clang/lib/Driver/ToolChains/Arch/X86.cpp b/clang/lib/Driver/ToolChains/Arch/X86.cpp index fef0522aaf45b..eea339c842431 100644 --- a/clang/lib/Driver/ToolChains/Arch/X86.cpp +++ b/clang/lib/Driver/ToolChains/Arch/X86.cpp @@ -42,7 +42,7 @@ std::string x86::getX86TargetCPU(const Driver &D, const ArgList &Args, llvm::StringMap ArchMap({ {"AVX", "sandybridge"}, {"AVX2", "haswell"}, - {"AVX512F", "knl"}, + {"AVX512F", "skylake-avx512"}, {"AVX512", "skylake-avx512"}, }); if (Triple.getArch() == llvm::Triple::x86) { diff --git a/clang/test/CodeGen/X86/x86-inline-asm-v-constraint.c b/clang/test/CodeGen/X86/x86-inline-asm-v-constraint.c index b75a84d7a7bcb..07b5650470dbd 100644 --- a/clang/test/CodeGen/X86/x86-inline-asm-v-constraint.c +++ b/clang/test/CodeGen/X86/x86-inline-asm-v-constraint.c @@ -1,7 +1,6 @@ // RUN: %clang_cc1 %s -triple x86_64-unknown-linux-gnu -emit-llvm -target-cpu x86-64 -o - |FileCheck %s --check-prefix SSE // RUN: %clang_cc1 %s -triple x86_64-unknown-linux-gnu -emit-llvm -target-cpu skylake -D AVX -o - | FileCheck %s --check-prefixes AVX,SSE // RUN: %clang_cc1 %s -triple x86_64-unknown-linux-gnu -emit-llvm -target-cpu skylake-avx512 -D AVX512 -D AVX -o - | FileCheck %s --check-prefixes AVX512,AVX,SSE -// RUN: %clang_cc1 %s -triple x86_64-unknown-linux-gnu -emit-llvm -target-cpu knl -D AVX -D AVX512 -o - | FileCheck %s --check-prefixes AVX512,AVX,SSE typedef float __m128 __attribute__ ((vector_size (16))); typedef float __m256 __attribute__ ((vector_size (32))); diff --git a/clang/test/CodeGen/attr-cpuspecific-cpus.c b/clang/test/CodeGen/attr-cpuspecific-cpus.c index dd154fd227b25..f8afe0baadd28 100644 --- a/clang/test/CodeGen/attr-cpuspecific-cpus.c +++ b/clang/test/CodeGen/attr-cpuspecific-cpus.c @@ -32,11 +32,9 @@ ATTR(cpu_specific(haswell)) void CPU(void){} ATTR(cpu_specific(core_4th_gen_avx_tsx)) void CPU(void){} ATTR(cpu_specific(broadwell)) void CPU(void){} ATTR(cpu_specific(core_5th_gen_avx_tsx)) void CPU(void){} -ATTR(cpu_specific(knl)) void CPU(void){} ATTR(cpu_specific(skylake)) void CPU(void){} ATTR(cpu_specific(skylake_avx512)) void CPU(void){} ATTR(cpu_specific(cannonlake)) void CPU(void){} -ATTR(cpu_specific(knm)) void CPU(void){} ATTR(cpu_specific(cascadelake)) void CPU(void){} ATTR(cpu_specific(cooperlake)) void CPU(void){} ATTR(cpu_specific(icelake_client)) void CPU(void){} @@ -50,7 +48,6 @@ ATTR(cpu_specific(core_2nd_gen_avx)) void CPU1(void){} ATTR(cpu_specific(core_3rd_gen_avx)) void CPU2(void){} ATTR(cpu_specific(core_4th_gen_avx)) void CPU3(void){} ATTR(cpu_specific(core_5th_gen_avx)) void CPU4(void){} -ATTR(cpu_specific(mic_avx512)) void CPU5(void){} ATTR(cpu_specific(pentiumpro)) void CPU6(void){} ATTR(cpu_specific(pentium3)) void CPU7(void){} ATTR(cpu_specific(pentium3m)) void CPU8(void){} diff --git a/clang/test/CodeGen/attr-cpuspecific.c b/clang/test/CodeGen/attr-cpuspecific.c index 2c3e6931800cd..478480aa24996 100644 --- a/clang/test/CodeGen/attr-cpuspecific.c +++ b/clang/test/CodeGen/attr-cpuspecific.c @@ -70,14 +70,14 @@ void NotCalled(void){} // declaration. void TwoVersions(void); -ATTR(cpu_dispatch(ivybridge, knl)) +ATTR(cpu_dispatch(ivybridge, skx)) void TwoVersions(void); // LINUX: define weak_odr ptr @TwoVersions.resolver() // LINUX: call void @__cpu_indicator_init // LINUX: %[[FEAT_INIT:.+]] = load i32, ptr getelementptr inbounds ({ i32, i32, i32, [1 x i32] }, ptr @__cpu_model, i32 0, i32 3, i32 0), align 4 -// LINUX: %[[FEAT_JOIN:.+]] = and i32 %[[FEAT_INIT]], 59754495 -// LINUX: %[[FEAT_CHECK:.+]] = icmp eq i32 %[[FEAT_JOIN]], 59754495 -// LINUX: ret ptr @TwoVersions.Z +// LINUX: %[[FEAT_JOIN:.+]] = and i32 %[[FEAT_INIT]], 16762879 +// LINUX: %[[FEAT_CHECK:.+]] = icmp eq i32 %[[FEAT_JOIN]], 16762879 +// LINUX: ret ptr @TwoVersions.a // LINUX: ret ptr @TwoVersions.S // LINUX: call void @llvm.trap // LINUX: unreachable @@ -85,9 +85,9 @@ void TwoVersions(void); // WINDOWS: define weak_odr dso_local void @TwoVersions() comdat // WINDOWS: call void @__cpu_indicator_init() // WINDOWS: %[[FEAT_INIT:.+]] = load i32, ptr getelementptr inbounds ({ i32, i32, i32, [1 x i32] }, ptr @__cpu_model, i32 0, i32 3, i32 0), align 4 -// WINDOWS: %[[FEAT_JOIN:.+]] = and i32 %[[FEAT_INIT]], 59754495 -// WINDOWS: %[[FEAT_CHECK:.+]] = icmp eq i32 %[[FEAT_JOIN]], 59754495 -// WINDOWS: call void @TwoVersions.Z() +// WINDOWS: %[[FEAT_JOIN:.+]] = and i32 %[[FEAT_INIT]], 16762879 +// WINDOWS: %[[FEAT_CHECK:.+]] = icmp eq i32 %[[FEAT_JOIN]], 16762879 +// WINDOWS: call void @TwoVersions.a() // WINDOWS-NEXT: ret void // WINDOWS: call void @TwoVersions.S() // WINDOWS-NEXT: ret void @@ -98,45 +98,45 @@ ATTR(cpu_specific(ivybridge)) void TwoVersions(void){} // CHECK: define {{.*}}void @TwoVersions.S() #[[S]] -ATTR(cpu_specific(knl)) +ATTR(cpu_specific(skx)) void TwoVersions(void){} -// CHECK: define {{.*}}void @TwoVersions.Z() #[[K:[0-9]+]] +// CHECK: define {{.*}}void @TwoVersions.a() #[[K:[0-9]+]] -ATTR(cpu_specific(ivybridge, knl)) +ATTR(cpu_specific(ivybridge, skx)) void TwoVersionsSameAttr(void){} // CHECK: define {{.*}}void @TwoVersionsSameAttr.S() #[[S]] -// CHECK: define {{.*}}void @TwoVersionsSameAttr.Z() #[[K]] +// CHECK: define {{.*}}void @TwoVersionsSameAttr.a() #[[K]] -ATTR(cpu_specific(atom, ivybridge, knl)) +ATTR(cpu_specific(atom, ivybridge, skx)) void ThreeVersionsSameAttr(void){} // CHECK: define {{.*}}void @ThreeVersionsSameAttr.O() #[[O:[0-9]+]] // CHECK: define {{.*}}void @ThreeVersionsSameAttr.S() #[[S]] -// CHECK: define {{.*}}void @ThreeVersionsSameAttr.Z() #[[K]] +// CHECK: define {{.*}}void @ThreeVersionsSameAttr.a() #[[K]] -ATTR(cpu_specific(knl)) +ATTR(cpu_specific(skx)) void CpuSpecificNoDispatch(void) {} -// CHECK: define {{.*}}void @CpuSpecificNoDispatch.Z() #[[K:[0-9]+]] +// CHECK: define {{.*}}void @CpuSpecificNoDispatch.a() #[[K:[0-9]+]] -ATTR(cpu_dispatch(knl)) +ATTR(cpu_dispatch(skx)) void OrderDispatchUsageSpecific(void); // LINUX: define weak_odr ptr @OrderDispatchUsageSpecific.resolver() // LINUX: call void @__cpu_indicator_init -// LINUX: ret ptr @OrderDispatchUsageSpecific.Z +// LINUX: ret ptr @OrderDispatchUsageSpecific.a // LINUX: call void @llvm.trap // LINUX: unreachable // WINDOWS: define weak_odr dso_local void @OrderDispatchUsageSpecific() comdat // WINDOWS: call void @__cpu_indicator_init() -// WINDOWS: call void @OrderDispatchUsageSpecific.Z() +// WINDOWS: call void @OrderDispatchUsageSpecific.a() // WINDOWS-NEXT: ret void // WINDOWS: call void @llvm.trap // WINDOWS: unreachable -// CHECK: define {{.*}}void @OrderDispatchUsageSpecific.Z() +// CHECK: define {{.*}}void @OrderDispatchUsageSpecific.a() -ATTR(cpu_specific(knl)) +ATTR(cpu_specific(skx)) void OrderSpecificUsageDispatch(void) {} -// CHECK: define {{.*}}void @OrderSpecificUsageDispatch.Z() #[[K:[0-9]+]] +// CHECK: define {{.*}}void @OrderSpecificUsageDispatch.a() #[[K:[0-9]+]] void usages(void) { SingleVersion(); @@ -165,17 +165,17 @@ void usages(void) { // LINUX: declare void @CpuSpecificNoDispatch.ifunc() // has an extra config to emit! -ATTR(cpu_dispatch(ivybridge, knl, atom)) +ATTR(cpu_dispatch(ivybridge, skx, atom)) void TwoVersionsSameAttr(void); // LINUX: define weak_odr ptr @TwoVersionsSameAttr.resolver() -// LINUX: ret ptr @TwoVersionsSameAttr.Z +// LINUX: ret ptr @TwoVersionsSameAttr.a // LINUX: ret ptr @TwoVersionsSameAttr.S // LINUX: ret ptr @TwoVersionsSameAttr.O // LINUX: call void @llvm.trap // LINUX: unreachable // WINDOWS: define weak_odr dso_local void @TwoVersionsSameAttr() comdat -// WINDOWS: call void @TwoVersionsSameAttr.Z +// WINDOWS: call void @TwoVersionsSameAttr.a // WINDOWS-NEXT: ret void // WINDOWS: call void @TwoVersionsSameAttr.S // WINDOWS-NEXT: ret void @@ -184,11 +184,11 @@ void TwoVersionsSameAttr(void); // WINDOWS: call void @llvm.trap // WINDOWS: unreachable -ATTR(cpu_dispatch(atom, ivybridge, knl)) +ATTR(cpu_dispatch(atom, ivybridge, skx)) void ThreeVersionsSameAttr(void){} // LINUX: define weak_odr ptr @ThreeVersionsSameAttr.resolver() // LINUX: call void @__cpu_indicator_init -// LINUX: ret ptr @ThreeVersionsSameAttr.Z +// LINUX: ret ptr @ThreeVersionsSameAttr.a // LINUX: ret ptr @ThreeVersionsSameAttr.S // LINUX: ret ptr @ThreeVersionsSameAttr.O // LINUX: call void @llvm.trap @@ -196,7 +196,7 @@ void ThreeVersionsSameAttr(void){} // WINDOWS: define weak_odr dso_local void @ThreeVersionsSameAttr() comdat // WINDOWS: call void @__cpu_indicator_init -// WINDOWS: call void @ThreeVersionsSameAttr.Z +// WINDOWS: call void @ThreeVersionsSameAttr.a // WINDOWS-NEXT: ret void // WINDOWS: call void @ThreeVersionsSameAttr.S // WINDOWS-NEXT: ret void @@ -205,22 +205,22 @@ void ThreeVersionsSameAttr(void){} // WINDOWS: call void @llvm.trap // WINDOWS: unreachable -ATTR(cpu_dispatch(knl)) +ATTR(cpu_dispatch(skx)) void OrderSpecificUsageDispatch(void); // LINUX: define weak_odr ptr @OrderSpecificUsageDispatch.resolver() -// LINUX: ret ptr @OrderSpecificUsageDispatch.Z +// LINUX: ret ptr @OrderSpecificUsageDispatch.a // WINDOWS: define weak_odr dso_local void @OrderSpecificUsageDispatch() comdat // WINDOWS: call void @__cpu_indicator_init -// WINDOWS: call void @OrderSpecificUsageDispatch.Z +// WINDOWS: call void @OrderSpecificUsageDispatch.a // WINDOWS-NEXT: ret void // No Cpu Specific options. -ATTR(cpu_dispatch(atom, ivybridge, knl)) +ATTR(cpu_dispatch(atom, ivybridge, skx)) void NoSpecifics(void); // LINUX: define weak_odr ptr @NoSpecifics.resolver() // LINUX: call void @__cpu_indicator_init -// LINUX: ret ptr @NoSpecifics.Z +// LINUX: ret ptr @NoSpecifics.a // LINUX: ret ptr @NoSpecifics.S // LINUX: ret ptr @NoSpecifics.O // LINUX: call void @llvm.trap @@ -228,7 +228,7 @@ void NoSpecifics(void); // WINDOWS: define weak_odr dso_local void @NoSpecifics() comdat // WINDOWS: call void @__cpu_indicator_init -// WINDOWS: call void @NoSpecifics.Z +// WINDOWS: call void @NoSpecifics.a // WINDOWS-NEXT: ret void // WINDOWS: call void @NoSpecifics.S // WINDOWS-NEXT: ret void @@ -237,11 +237,11 @@ void NoSpecifics(void); // WINDOWS: call void @llvm.trap // WINDOWS: unreachable -ATTR(cpu_dispatch(atom, generic, ivybridge, knl)) +ATTR(cpu_dispatch(atom, generic, ivybridge, skx)) void HasGeneric(void); // LINUX: define weak_odr ptr @HasGeneric.resolver() // LINUX: call void @__cpu_indicator_init -// LINUX: ret ptr @HasGeneric.Z +// LINUX: ret ptr @HasGeneric.a // LINUX: ret ptr @HasGeneric.S // LINUX: ret ptr @HasGeneric.O // LINUX: ret ptr @HasGeneric.A @@ -249,7 +249,7 @@ void HasGeneric(void); // WINDOWS: define weak_odr dso_local void @HasGeneric() comdat // WINDOWS: call void @__cpu_indicator_init -// WINDOWS: call void @HasGeneric.Z +// WINDOWS: call void @HasGeneric.a // WINDOWS-NEXT: ret void // WINDOWS: call void @HasGeneric.S // WINDOWS-NEXT: ret void @@ -259,11 +259,11 @@ void HasGeneric(void); // WINDOWS-NEXT: ret void // WINDOWS-NOT: call void @llvm.trap -ATTR(cpu_dispatch(atom, generic, ivybridge, knl)) +ATTR(cpu_dispatch(atom, generic, ivybridge, skx)) void HasParams(int i, double d); // LINUX: define weak_odr ptr @HasParams.resolver() // LINUX: call void @__cpu_indicator_init -// LINUX: ret ptr @HasParams.Z +// LINUX: ret ptr @HasParams.a // LINUX: ret ptr @HasParams.S // LINUX: ret ptr @HasParams.O // LINUX: ret ptr @HasParams.A @@ -271,7 +271,7 @@ void HasParams(int i, double d); // WINDOWS: define weak_odr dso_local void @HasParams(i32 %0, double %1) comdat // WINDOWS: call void @__cpu_indicator_init -// WINDOWS: call void @HasParams.Z(i32 %0, double %1) +// WINDOWS: call void @HasParams.a(i32 %0, double %1) // WINDOWS-NEXT: ret void // WINDOWS: call void @HasParams.S(i32 %0, double %1) // WINDOWS-NEXT: ret void @@ -281,11 +281,11 @@ void HasParams(int i, double d); // WINDOWS-NEXT: ret void // WINDOWS-NOT: call void @llvm.trap -ATTR(cpu_dispatch(atom, generic, ivybridge, knl)) +ATTR(cpu_dispatch(atom, generic, ivybridge, skx)) int HasParamsAndReturn(int i, double d); // LINUX: define weak_odr ptr @HasParamsAndReturn.resolver() // LINUX: call void @__cpu_indicator_init -// LINUX: ret ptr @HasParamsAndReturn.Z +// LINUX: ret ptr @HasParamsAndReturn.a // LINUX: ret ptr @HasParamsAndReturn.S // LINUX: ret ptr @HasParamsAndReturn.O // LINUX: ret ptr @HasParamsAndReturn.A @@ -293,7 +293,7 @@ int HasParamsAndReturn(int i, double d); // WINDOWS: define weak_odr dso_local i32 @HasParamsAndReturn(i32 %0, double %1) comdat // WINDOWS: call void @__cpu_indicator_init -// WINDOWS: %[[RET:.+]] = musttail call i32 @HasParamsAndReturn.Z(i32 %0, double %1) +// WINDOWS: %[[RET:.+]] = musttail call i32 @HasParamsAndReturn.a(i32 %0, double %1) // WINDOWS-NEXT: ret i32 %[[RET]] // WINDOWS: %[[RET:.+]] = musttail call i32 @HasParamsAndReturn.S(i32 %0, double %1) // WINDOWS-NEXT: ret i32 %[[RET]] @@ -349,12 +349,12 @@ int DispatchFirst(void) {return 1;} // WINDOWS: define dso_local i32 @DispatchFirst.B // WINDOWS: ret i32 1 -ATTR(cpu_specific(knl)) +ATTR(cpu_specific(skx)) void OrderDispatchUsageSpecific(void) {} // CHECK: attributes #[[S]] = {{.*}}"target-features"="+avx,+cmov,+crc32,+cx16,+cx8,+f16c,+fsgsbase,+fxsr,+mmx,+pclmul,+popcnt,+rdrnd,+sahf,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+x87,+xsave,+xsaveopt" // CHECK-SAME: "tune-cpu"="ivybridge" -// CHECK: attributes #[[K]] = {{.*}}"target-features"="+adx,+aes,+avx,+avx2,+avx512cd,+avx512er,+avx512f,+avx512pf,+bmi,+bmi2,+cmov,+crc32,+cx16,+cx8,+evex512,+f16c,+fma,+fsgsbase,+fxsr,+invpcid,+lzcnt,+mmx,+movbe,+pclmul,+popcnt,+prefetchwt1,+prfchw,+rdrnd,+rdseed,+sahf,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+x87,+xsave,+xsaveopt" -// CHECK-SAME: "tune-cpu"="knl" +// CHECK: attributes #[[K]] = {{.*}}"target-features"="+adx,+aes,+avx,+avx2,+avx512bw,+avx512cd,+avx512dq,+avx512f,+avx512vl,+bmi,+bmi2,+clflushopt,+clwb,+cmov,+crc32,+cx16,+cx8,+evex512,+f16c,+fma,+fsgsbase,+fxsr,+invpcid,+lzcnt,+mmx,+movbe,+pclmul,+pku,+popcnt,+prfchw,+rdrnd,+rdseed,+sahf,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+x87,+xsave,+xsavec,+xsaveopt,+xsaves" +// CHECK-SAME: "tune-cpu"="skx" // CHECK: attributes #[[O]] = {{.*}}"target-features"="+cmov,+cx16,+cx8,+fxsr,+mmx,+movbe,+sahf,+sse,+sse2,+sse3,+ssse3,+x87" // CHECK-SAME: "tune-cpu"="atom" diff --git a/clang/test/Driver/cl-x86-flags.c b/clang/test/Driver/cl-x86-flags.c index 51b16f0ce3546..ae3201db6deec 100644 --- a/clang/test/Driver/cl-x86-flags.c +++ b/clang/test/Driver/cl-x86-flags.c @@ -71,7 +71,7 @@ // RUN: %clang_cl -m32 -arch:AVX512F --target=i386-pc-windows /c /Fo%t.obj -Xclang -verify -DTEST_32_ARCH_AVX512F -- %s #if defined(TEST_32_ARCH_AVX512F) -#if _M_IX86_FP != 2 || !__AVX__ || !__AVX2__ || !__AVX512F__ || __AVX512BW__ +#if _M_IX86_FP != 2 || !__AVX__ || !__AVX2__ || !__AVX512F__ || !__AVX512BW__ #error fail #endif #endif @@ -111,7 +111,7 @@ // RUN: %clang_cl -m64 -arch:AVX512F --target=i386-pc-windows /c /Fo%t.obj -Xclang -verify -DTEST_64_ARCH_AVX512F -- %s #if defined(TEST_64_ARCH_AVX512F) -#if _M_IX86_FP || !__AVX__ || !__AVX2__ || !__AVX512F__ || __AVX512BW__ +#if _M_IX86_FP || !__AVX__ || !__AVX2__ || !__AVX512F__ || !__AVX512BW__ #error fail #endif #endif diff --git a/clang/test/Frontend/x86-target-cpu.c b/clang/test/Frontend/x86-target-cpu.c index 6c8502ac2c21e..116647164dcc5 100644 --- a/clang/test/Frontend/x86-target-cpu.c +++ b/clang/test/Frontend/x86-target-cpu.c @@ -15,8 +15,6 @@ // RUN: %clang_cc1 -triple x86_64-unknown-unknown -target-cpu cannonlake -verify %s // RUN: %clang_cc1 -triple x86_64-unknown-unknown -target-cpu icelake-client -verify %s // RUN: %clang_cc1 -triple x86_64-unknown-unknown -target-cpu icelake-server -verify %s -// RUN: %clang_cc1 -triple x86_64-unknown-unknown -target-cpu knl -verify %s -// RUN: %clang_cc1 -triple x86_64-unknown-unknown -target-cpu knm -verify %s // RUN: %clang_cc1 -triple x86_64-unknown-unknown -target-cpu bonnell -verify %s // RUN: %clang_cc1 -triple x86_64-unknown-unknown -target-cpu silvermont -verify %s // RUN: %clang_cc1 -triple x86_64-unknown-unknown -target-cpu k8 -verify %s diff --git a/clang/test/Misc/target-invalid-cpu-note.c b/clang/test/Misc/target-invalid-cpu-note.c index 48e9f05d9b03d..6aa8d82ebc67b 100644 --- a/clang/test/Misc/target-invalid-cpu-note.c +++ b/clang/test/Misc/target-invalid-cpu-note.c @@ -13,19 +13,19 @@ // RUN: not %clang_cc1 -triple i386--- -target-cpu not-a-cpu -fsyntax-only %s 2>&1 | FileCheck %s --check-prefix X86 // X86: error: unknown target CPU 'not-a-cpu' -// X86-NEXT: note: valid target CPU values are: i386, i486, winchip-c6, winchip2, c3, i586, pentium, pentium-mmx, pentiumpro, i686, pentium2, pentium3, pentium3m, pentium-m, c3-2, yonah, pentium4, pentium4m, prescott, nocona, core2, penryn, bonnell, atom, silvermont, slm, goldmont, goldmont-plus, tremont, nehalem, corei7, westmere, sandybridge, corei7-avx, ivybridge, core-avx-i, haswell, core-avx2, broadwell, skylake, skylake-avx512, skx, cascadelake, cooperlake, cannonlake, icelake-client, rocketlake, icelake-server, tigerlake, sapphirerapids, alderlake, raptorlake, meteorlake, arrowlake, arrowlake-s, lunarlake, gracemont, pantherlake, sierraforest, grandridge, graniterapids, graniterapids-d, emeraldrapids, clearwaterforest, knl, knm, lakemont, k6, k6-2, k6-3, athlon, athlon-tbird, athlon-xp, athlon-mp, athlon-4, k8, athlon64, athlon-fx, opteron, k8-sse3, athlon64-sse3, opteron-sse3, amdfam10, barcelona, btver1, btver2, bdver1, bdver2, bdver3, bdver4, znver1, znver2, znver3, znver4, x86-64, x86-64-v2, x86-64-v3, x86-64-v4, geode{{$}} +// X86-NEXT: note: valid target CPU values are: i386, i486, winchip-c6, winchip2, c3, i586, pentium, pentium-mmx, pentiumpro, i686, pentium2, pentium3, pentium3m, pentium-m, c3-2, yonah, pentium4, pentium4m, prescott, nocona, core2, penryn, bonnell, atom, silvermont, slm, goldmont, goldmont-plus, tremont, nehalem, corei7, westmere, sandybridge, corei7-avx, ivybridge, core-avx-i, haswell, core-avx2, broadwell, skylake, skylake-avx512, skx, cascadelake, cooperlake, cannonlake, icelake-client, rocketlake, icelake-server, tigerlake, sapphirerapids, alderlake, raptorlake, meteorlake, arrowlake, arrowlake-s, lunarlake, gracemont, pantherlake, sierraforest, grandridge, graniterapids, graniterapids-d, emeraldrapids, clearwaterforest, lakemont, k6, k6-2, k6-3, athlon, athlon-tbird, athlon-xp, athlon-mp, athlon-4, k8, athlon64, athlon-fx, opteron, k8-sse3, athlon64-sse3, opteron-sse3, amdfam10, barcelona, btver1, btver2, bdver1, bdver2, bdver3, bdver4, znver1, znver2, znver3, znver4, x86-64, x86-64-v2, x86-64-v3, x86-64-v4, geode{{$}} // RUN: not %clang_cc1 -triple x86_64--- -target-cpu not-a-cpu -fsyntax-only %s 2>&1 | FileCheck %s --check-prefix X86_64 // X86_64: error: unknown target CPU 'not-a-cpu' -// X86_64-NEXT: note: valid target CPU values are: nocona, core2, penryn, bonnell, atom, silvermont, slm, goldmont, goldmont-plus, tremont, nehalem, corei7, westmere, sandybridge, corei7-avx, ivybridge, core-avx-i, haswell, core-avx2, broadwell, skylake, skylake-avx512, skx, cascadelake, cooperlake, cannonlake, icelake-client, rocketlake, icelake-server, tigerlake, sapphirerapids, alderlake, raptorlake, meteorlake, arrowlake, arrowlake-s, lunarlake, gracemont, pantherlake, sierraforest, grandridge, graniterapids, graniterapids-d, emeraldrapids, clearwaterforest, knl, knm, k8, athlon64, athlon-fx, opteron, k8-sse3, athlon64-sse3, opteron-sse3, amdfam10, barcelona, btver1, btver2, bdver1, bdver2, bdver3, bdver4, znver1, znver2, znver3, znver4, x86-64, x86-64-v2, x86-64-v3, x86-64-v4{{$}} +// X86_64-NEXT: note: valid target CPU values are: nocona, core2, penryn, bonnell, atom, silvermont, slm, goldmont, goldmont-plus, tremont, nehalem, corei7, westmere, sandybridge, corei7-avx, ivybridge, core-avx-i, haswell, core-avx2, broadwell, skylake, skylake-avx512, skx, cascadelake, cooperlake, cannonlake, icelake-client, rocketlake, icelake-server, tigerlake, sapphirerapids, alderlake, raptorlake, meteorlake, arrowlake, arrowlake-s, lunarlake, gracemont, pantherlake, sierraforest, grandridge, graniterapids, graniterapids-d, emeraldrapids, clearwaterforest, k8, athlon64, athlon-fx, opteron, k8-sse3, athlon64-sse3, opteron-sse3, amdfam10, barcelona, btver1, btver2, bdver1, bdver2, bdver3, bdver4, znver1, znver2, znver3, znver4, x86-64, x86-64-v2, x86-64-v3, x86-64-v4{{$}} // RUN: not %clang_cc1 -triple i386--- -tune-cpu not-a-cpu -fsyntax-only %s 2>&1 | FileCheck %s --check-prefix TUNE_X86 // TUNE_X86: error: unknown target CPU 'not-a-cpu' -// TUNE_X86-NEXT: note: valid target CPU values are: i386, i486, winchip-c6, winchip2, c3, i586, pentium, pentium-mmx, pentiumpro, i686, pentium2, pentium3, pentium3m, pentium-m, c3-2, yonah, pentium4, pentium4m, prescott, nocona, core2, penryn, bonnell, atom, silvermont, slm, goldmont, goldmont-plus, tremont, nehalem, corei7, westmere, sandybridge, corei7-avx, ivybridge, core-avx-i, haswell, core-avx2, broadwell, skylake, skylake-avx512, skx, cascadelake, cooperlake, cannonlake, icelake-client, rocketlake, icelake-server, tigerlake, sapphirerapids, alderlake, raptorlake, meteorlake, arrowlake, arrowlake-s, lunarlake, gracemont, pantherlake, sierraforest, grandridge, graniterapids, graniterapids-d, emeraldrapids, clearwaterforest, knl, knm, lakemont, k6, k6-2, k6-3, athlon, athlon-tbird, athlon-xp, athlon-mp, athlon-4, k8, athlon64, athlon-fx, opteron, k8-sse3, athlon64-sse3, opteron-sse3, amdfam10, barcelona, btver1, btver2, bdver1, bdver2, bdver3, bdver4, znver1, znver2, znver3, znver4, x86-64, geode{{$}} +// TUNE_X86-NEXT: note: valid target CPU values are: i386, i486, winchip-c6, winchip2, c3, i586, pentium, pentium-mmx, pentiumpro, i686, pentium2, pentium3, pentium3m, pentium-m, c3-2, yonah, pentium4, pentium4m, prescott, nocona, core2, penryn, bonnell, atom, silvermont, slm, goldmont, goldmont-plus, tremont, nehalem, corei7, westmere, sandybridge, corei7-avx, ivybridge, core-avx-i, haswell, core-avx2, broadwell, skylake, skylake-avx512, skx, cascadelake, cooperlake, cannonlake, icelake-client, rocketlake, icelake-server, tigerlake, sapphirerapids, alderlake, raptorlake, meteorlake, arrowlake, arrowlake-s, lunarlake, gracemont, pantherlake, sierraforest, grandridge, graniterapids, graniterapids-d, emeraldrapids, clearwaterforest, lakemont, k6, k6-2, k6-3, athlon, athlon-tbird, athlon-xp, athlon-mp, athlon-4, k8, athlon64, athlon-fx, opteron, k8-sse3, athlon64-sse3, opteron-sse3, amdfam10, barcelona, btver1, btver2, bdver1, bdver2, bdver3, bdver4, znver1, znver2, znver3, znver4, x86-64, geode{{$}} // RUN: not %clang_cc1 -triple x86_64--- -tune-cpu not-a-cpu -fsyntax-only %s 2>&1 | FileCheck %s --check-prefix TUNE_X86_64 // TUNE_X86_64: error: unknown target CPU 'not-a-cpu' -// TUNE_X86_64-NEXT: note: valid target CPU values are: i386, i486, winchip-c6, winchip2, c3, i586, pentium, pentium-mmx, pentiumpro, i686, pentium2, pentium3, pentium3m, pentium-m, c3-2, yonah, pentium4, pentium4m, prescott, nocona, core2, penryn, bonnell, atom, silvermont, slm, goldmont, goldmont-plus, tremont, nehalem, corei7, westmere, sandybridge, corei7-avx, ivybridge, core-avx-i, haswell, core-avx2, broadwell, skylake, skylake-avx512, skx, cascadelake, cooperlake, cannonlake, icelake-client, rocketlake, icelake-server, tigerlake, sapphirerapids, alderlake, raptorlake, meteorlake, arrowlake, arrowlake-s, lunarlake, gracemont, pantherlake, sierraforest, grandridge, graniterapids, graniterapids-d, emeraldrapids, clearwaterforest, knl, knm, lakemont, k6, k6-2, k6-3, athlon, athlon-tbird, athlon-xp, athlon-mp, athlon-4, k8, athlon64, athlon-fx, opteron, k8-sse3, athlon64-sse3, opteron-sse3, amdfam10, barcelona, btver1, btver2, bdver1, bdver2, bdver3, bdver4, znver1, znver2, znver3, znver4, x86-64, geode{{$}} +// TUNE_X86_64-NEXT: note: valid target CPU values are: i386, i486, winchip-c6, winchip2, c3, i586, pentium, pentium-mmx, pentiumpro, i686, pentium2, pentium3, pentium3m, pentium-m, c3-2, yonah, pentium4, pentium4m, prescott, nocona, core2, penryn, bonnell, atom, silvermont, slm, goldmont, goldmont-plus, tremont, nehalem, corei7, westmere, sandybridge, corei7-avx, ivybridge, core-avx-i, haswell, core-avx2, broadwell, skylake, skylake-avx512, skx, cascadelake, cooperlake, cannonlake, icelake-client, rocketlake, icelake-server, tigerlake, sapphirerapids, alderlake, raptorlake, meteorlake, arrowlake, arrowlake-s, lunarlake, gracemont, pantherlake, sierraforest, grandridge, graniterapids, graniterapids-d, emeraldrapids, clearwaterforest, lakemont, k6, k6-2, k6-3, athlon, athlon-tbird, athlon-xp, athlon-mp, athlon-4, k8, athlon64, athlon-fx, opteron, k8-sse3, athlon64-sse3, opteron-sse3, amdfam10, barcelona, btver1, btver2, bdver1, bdver2, bdver3, bdver4, znver1, znver2, znver3, znver4, x86-64, geode{{$}} // RUN: not %clang_cc1 -triple nvptx--- -target-cpu not-a-cpu -fsyntax-only %s 2>&1 | FileCheck %s --check-prefix NVPTX // NVPTX: error: unknown target CPU 'not-a-cpu' diff --git a/clang/test/Preprocessor/predefined-arch-macros.c b/clang/test/Preprocessor/predefined-arch-macros.c index 1ae6faea77678..305d6363b9c00 100644 --- a/clang/test/Preprocessor/predefined-arch-macros.c +++ b/clang/test/Preprocessor/predefined-arch-macros.c @@ -787,164 +787,6 @@ // CHECK_SKL_M64: #define __x86_64 1 // CHECK_SKL_M64: #define __x86_64__ 1 -// RUN: %clang -march=knl -m32 -E -dM %s -o - 2>&1 \ -// RUN: -target i386-unknown-linux \ -// RUN: | FileCheck -match-full-lines %s -check-prefix=CHECK_KNL_M32 -// CHECK_KNL_M32: #define __AES__ 1 -// CHECK_KNL_M32: #define __AVX2__ 1 -// CHECK_KNL_M32: #define __AVX512CD__ 1 -// CHECK_KNL_M32: #define __AVX512ER__ 1 -// CHECK_KNL_M32: #define __AVX512F__ 1 -// CHECK_KNL_M32: #define __AVX512PF__ 1 -// CHECK_KNL_M32: #define __AVX__ 1 -// CHECK_KNL_M32: #define __BMI2__ 1 -// CHECK_KNL_M32: #define __BMI__ 1 -// CHECK_KNL_M32-NOT: #define __EVEX256__ 1 -// CHECK_KNL_M32: #define __EVEX512__ 1 -// CHECK_KNL_M32: #define __F16C__ 1 -// CHECK_KNL_M32: #define __FMA__ 1 -// CHECK_KNL_M32: #define __LZCNT__ 1 -// CHECK_KNL_M32: #define __MMX__ 1 -// CHECK_KNL_M32: #define __MOVBE__ 1 -// CHECK_KNL_M32: #define __PCLMUL__ 1 -// CHECK_KNL_M32: #define __POPCNT__ 1 -// CHECK_KNL_M32: #define __PREFETCHWT1__ 1 -// CHECK_KNL_M32: #define __PRFCHW__ 1 -// CHECK_KNL_M32: #define __RDRND__ 1 -// CHECK_KNL_M32: #define __SSE2__ 1 -// CHECK_KNL_M32: #define __SSE3__ 1 -// CHECK_KNL_M32: #define __SSE4_1__ 1 -// CHECK_KNL_M32: #define __SSE4_2__ 1 -// CHECK_KNL_M32: #define __SSE__ 1 -// CHECK_KNL_M32: #define __SSSE3__ 1 -// CHECK_KNL_M32: #define __XSAVEOPT__ 1 -// CHECK_KNL_M32: #define __XSAVE__ 1 -// CHECK_KNL_M32: #define __i386 1 -// CHECK_KNL_M32: #define __i386__ 1 -// CHECK_KNL_M32: #define __knl 1 -// CHECK_KNL_M32: #define __knl__ 1 -// CHECK_KNL_M32: #define __tune_knl__ 1 -// CHECK_KNL_M32: #define i386 1 - -// RUN: %clang -march=knl -m64 -E -dM %s -o - 2>&1 \ -// RUN: -target i386-unknown-linux \ -// RUN: | FileCheck -match-full-lines %s -check-prefix=CHECK_KNL_M64 -// CHECK_KNL_M64: #define __AES__ 1 -// CHECK_KNL_M64: #define __AVX2__ 1 -// CHECK_KNL_M64: #define __AVX512CD__ 1 -// CHECK_KNL_M64: #define __AVX512ER__ 1 -// CHECK_KNL_M64: #define __AVX512F__ 1 -// CHECK_KNL_M64: #define __AVX512PF__ 1 -// CHECK_KNL_M64: #define __AVX__ 1 -// CHECK_KNL_M64: #define __BMI2__ 1 -// CHECK_KNL_M64: #define __BMI__ 1 -// CHECK_KNL_M64-NOT: #define __EVEX256__ 1 -// CHECK_KNL_M64: #define __EVEX512__ 1 -// CHECK_KNL_M64: #define __F16C__ 1 -// CHECK_KNL_M64: #define __FMA__ 1 -// CHECK_KNL_M64: #define __LZCNT__ 1 -// CHECK_KNL_M64: #define __MMX__ 1 -// CHECK_KNL_M64: #define __MOVBE__ 1 -// CHECK_KNL_M64: #define __PCLMUL__ 1 -// CHECK_KNL_M64: #define __POPCNT__ 1 -// CHECK_KNL_M64: #define __PREFETCHWT1__ 1 -// CHECK_KNL_M64: #define __PRFCHW__ 1 -// CHECK_KNL_M64: #define __RDRND__ 1 -// CHECK_KNL_M64: #define __SSE2_MATH__ 1 -// CHECK_KNL_M64: #define __SSE2__ 1 -// CHECK_KNL_M64: #define __SSE3__ 1 -// CHECK_KNL_M64: #define __SSE4_1__ 1 -// CHECK_KNL_M64: #define __SSE4_2__ 1 -// CHECK_KNL_M64: #define __SSE_MATH__ 1 -// CHECK_KNL_M64: #define __SSE__ 1 -// CHECK_KNL_M64: #define __SSSE3__ 1 -// CHECK_KNL_M64: #define __XSAVEOPT__ 1 -// CHECK_KNL_M64: #define __XSAVE__ 1 -// CHECK_KNL_M64: #define __amd64 1 -// CHECK_KNL_M64: #define __amd64__ 1 -// CHECK_KNL_M64: #define __knl 1 -// CHECK_KNL_M64: #define __knl__ 1 -// CHECK_KNL_M64: #define __tune_knl__ 1 -// CHECK_KNL_M64: #define __x86_64 1 -// CHECK_KNL_M64: #define __x86_64__ 1 - -// RUN: %clang -march=knm -m32 -E -dM %s -o - 2>&1 \ -// RUN: -target i386-unknown-linux \ -// RUN: | FileCheck -match-full-lines %s -check-prefix=CHECK_KNM_M32 -// CHECK_KNM_M32: #define __AES__ 1 -// CHECK_KNM_M32: #define __AVX2__ 1 -// CHECK_KNM_M32: #define __AVX512CD__ 1 -// CHECK_KNM_M32: #define __AVX512ER__ 1 -// CHECK_KNM_M32: #define __AVX512F__ 1 -// CHECK_KNM_M32: #define __AVX512PF__ 1 -// CHECK_KNM_M32: #define __AVX512VPOPCNTDQ__ 1 -// CHECK_KNM_M32: #define __AVX__ 1 -// CHECK_KNM_M32: #define __BMI2__ 1 -// CHECK_KNM_M32: #define __BMI__ 1 -// CHECK_KNM_M32-NOT: #define __EVEX256__ 1 -// CHECK_KNM_M32: #define __EVEX512__ 1 -// CHECK_KNM_M32: #define __F16C__ 1 -// CHECK_KNM_M32: #define __FMA__ 1 -// CHECK_KNM_M32: #define __LZCNT__ 1 -// CHECK_KNM_M32: #define __MMX__ 1 -// CHECK_KNM_M32: #define __MOVBE__ 1 -// CHECK_KNM_M32: #define __PCLMUL__ 1 -// CHECK_KNM_M32: #define __POPCNT__ 1 -// CHECK_KNM_M32: #define __PREFETCHWT1__ 1 -// CHECK_KNM_M32: #define __PRFCHW__ 1 -// CHECK_KNM_M32: #define __RDRND__ 1 -// CHECK_KNM_M32: #define __SSE2__ 1 -// CHECK_KNM_M32: #define __SSE3__ 1 -// CHECK_KNM_M32: #define __SSE4_1__ 1 -// CHECK_KNM_M32: #define __SSE4_2__ 1 -// CHECK_KNM_M32: #define __SSE__ 1 -// CHECK_KNM_M32: #define __SSSE3__ 1 -// CHECK_KNM_M32: #define __XSAVEOPT__ 1 -// CHECK_KNM_M32: #define __XSAVE__ 1 -// CHECK_KNM_M32: #define __i386 1 -// CHECK_KNM_M32: #define __i386__ 1 -// CHECK_KNM_M32: #define i386 1 - -// RUN: %clang -march=knm -m64 -E -dM %s -o - 2>&1 \ -// RUN: -target i386-unknown-linux \ -// RUN: | FileCheck -match-full-lines %s -check-prefix=CHECK_KNM_M64 -// CHECK_KNM_M64: #define __AES__ 1 -// CHECK_KNM_M64: #define __AVX2__ 1 -// CHECK_KNM_M64: #define __AVX512CD__ 1 -// CHECK_KNM_M64: #define __AVX512ER__ 1 -// CHECK_KNM_M64: #define __AVX512F__ 1 -// CHECK_KNM_M64: #define __AVX512PF__ 1 -// CHECK_KNM_M64: #define __AVX512VPOPCNTDQ__ 1 -// CHECK_KNM_M64: #define __AVX__ 1 -// CHECK_KNM_M64: #define __BMI2__ 1 -// CHECK_KNM_M64: #define __BMI__ 1 -// CHECK_KNM_M64-NOT: #define __EVEX256__ 1 -// CHECK_KNM_M64: #define __EVEX512__ 1 -// CHECK_KNM_M64: #define __F16C__ 1 -// CHECK_KNM_M64: #define __FMA__ 1 -// CHECK_KNM_M64: #define __LZCNT__ 1 -// CHECK_KNM_M64: #define __MMX__ 1 -// CHECK_KNM_M64: #define __MOVBE__ 1 -// CHECK_KNM_M64: #define __PCLMUL__ 1 -// CHECK_KNM_M64: #define __POPCNT__ 1 -// CHECK_KNM_M64: #define __PREFETCHWT1__ 1 -// CHECK_KNM_M64: #define __PRFCHW__ 1 -// CHECK_KNM_M64: #define __RDRND__ 1 -// CHECK_KNM_M64: #define __SSE2_MATH__ 1 -// CHECK_KNM_M64: #define __SSE2__ 1 -// CHECK_KNM_M64: #define __SSE3__ 1 -// CHECK_KNM_M64: #define __SSE4_1__ 1 -// CHECK_KNM_M64: #define __SSE4_2__ 1 -// CHECK_KNM_M64: #define __SSE_MATH__ 1 -// CHECK_KNM_M64: #define __SSE__ 1 -// CHECK_KNM_M64: #define __SSSE3__ 1 -// CHECK_KNM_M64: #define __XSAVEOPT__ 1 -// CHECK_KNM_M64: #define __XSAVE__ 1 -// CHECK_KNM_M64: #define __amd64 1 -// CHECK_KNM_M64: #define __amd64__ 1 -// CHECK_KNM_M64: #define __x86_64 1 -// CHECK_KNM_M64: #define __x86_64__ 1 - // RUN: %clang -march=skylake-avx512 -m32 -E -dM %s -o - 2>&1 \ // RUN: -target i386-unknown-linux \ // RUN: | FileCheck -match-full-lines %s -check-prefix=CHECK_SKX_M32 diff --git a/clang/test/Preprocessor/x86_target_features.c b/clang/test/Preprocessor/x86_target_features.c index 888eecd08d84a..8ef565ccbc85c 100644 --- a/clang/test/Preprocessor/x86_target_features.c +++ b/clang/test/Preprocessor/x86_target_features.c @@ -287,7 +287,6 @@ // RUN: %clang -target i386-unknown-unknown -march=nehalem -mno-sse4.2 -x c -E -dM -o - %s | FileCheck -match-full-lines --check-prefix=CPUPOPCNT %s // RUN: %clang -target i386-unknown-unknown -march=silvermont -mno-sse4.2 -x c -E -dM -o - %s | FileCheck -match-full-lines --check-prefix=CPUPOPCNT %s -// RUN: %clang -target i386-unknown-unknown -march=knl -mno-sse4.2 -x c -E -dM -o - %s | FileCheck -match-full-lines --check-prefix=CPUPOPCNT %s // CPUPOPCNT: #define __POPCNT__ 1 @@ -305,7 +304,6 @@ // RUN: %clang -target i386-unknown-unknown -march=pentium3 -mno-sse -x c -E -dM -o - %s | FileCheck -match-full-lines --check-prefix=MARCHMMXNOSSE %s // RUN: %clang -target i386-unknown-unknown -march=atom -mno-sse -x c -E -dM -o - %s | FileCheck -match-full-lines --check-prefix=MARCHMMXNOSSE %s -// RUN: %clang -target i386-unknown-unknown -march=knl -mno-sse -x c -E -dM -o - %s | FileCheck -match-full-lines --check-prefix=MARCHMMXNOSSE %s // RUN: %clang -target i386-unknown-unknown -march=btver1 -mno-sse -x c -E -dM -o - %s | FileCheck -match-full-lines --check-prefix=MARCHMMXNOSSE %s // RUN: %clang -target i386-unknown-unknown -march=znver1 -mno-sse -x c -E -dM -o - %s | FileCheck -match-full-lines --check-prefix=MARCHMMXNOSSE %s diff --git a/llvm/include/llvm/TargetParser/X86TargetParser.h b/llvm/include/llvm/TargetParser/X86TargetParser.h index 2083e585af4ac..f3b5b767f56e4 100644 --- a/llvm/include/llvm/TargetParser/X86TargetParser.h +++ b/llvm/include/llvm/TargetParser/X86TargetParser.h @@ -121,8 +121,6 @@ enum CPUKind { CK_GraniterapidsD, CK_Emeraldrapids, CK_Clearwaterforest, - CK_KNL, - CK_KNM, CK_Lakemont, CK_K6, CK_K6_2, diff --git a/llvm/lib/TargetParser/X86TargetParser.cpp b/llvm/lib/TargetParser/X86TargetParser.cpp index 085554f18b2b6..df900aed145ff 100644 --- a/llvm/lib/TargetParser/X86TargetParser.cpp +++ b/llvm/lib/TargetParser/X86TargetParser.cpp @@ -93,13 +93,6 @@ constexpr FeatureBitset FeaturesHaswell = constexpr FeatureBitset FeaturesBroadwell = FeaturesHaswell | FeatureADX | FeaturePRFCHW | FeatureRDSEED; -// Intel Knights Landing and Knights Mill -// Knights Landing has feature parity with Broadwell. -constexpr FeatureBitset FeaturesKNL = - FeaturesBroadwell | FeatureAES | FeatureAVX512F | FeatureEVEX512 | - FeatureAVX512CD | FeatureAVX512ER | FeatureAVX512PF | FeaturePREFETCHWT1; -constexpr FeatureBitset FeaturesKNM = FeaturesKNL | FeatureAVX512VPOPCNTDQ; - // Intel Skylake processors. constexpr FeatureBitset FeaturesSkylakeClient = FeaturesBroadwell | FeatureAES | FeatureCLFLUSHOPT | FeatureXSAVEC | @@ -379,11 +372,6 @@ constexpr ProcInfo Processors[] = { { {"emeraldrapids"}, CK_Emeraldrapids, FEATURE_AVX512BF16, FeaturesSapphireRapids, 'n', false }, // Clearwaterforest microarchitecture based processors. { {"clearwaterforest"}, CK_Lunarlake, FEATURE_AVX2, FeaturesClearwaterforest, 'p', false }, - // Knights Landing processor. - { {"knl"}, CK_KNL, FEATURE_AVX512F, FeaturesKNL, 'Z', false }, - { {"mic_avx512"}, CK_KNL, FEATURE_AVX512F, FeaturesKNL, 'Z', true }, - // Knights Mill processor. - { {"knm"}, CK_KNM, FEATURE_AVX5124FMAPS, FeaturesKNM, 'j', false }, // Lakemont microarchitecture based processors. { {"lakemont"}, CK_Lakemont, ~0U, FeatureCMPXCHG8B, '\0', false }, // K6 architecture processors. From 62f5c0ce3f8946f432ecf8e3535574c0d99615d5 Mon Sep 17 00:00:00 2001 From: Freddy Ye Date: Thu, 21 Dec 2023 20:42:21 +0800 Subject: [PATCH 2/7] [X86][BE] Remove Xeaon Phi CPU names supports. --- llvm/lib/Target/X86/X86.td | 53 -------------------------------------- 1 file changed, 53 deletions(-) diff --git a/llvm/lib/Target/X86/X86.td b/llvm/lib/Target/X86/X86.td index 5fd6828f43129..d13dc9a271e59 100644 --- a/llvm/lib/Target/X86/X86.td +++ b/llvm/lib/Target/X86/X86.td @@ -1281,51 +1281,6 @@ def ProcessorFeatures { list CWFFeatures = !listconcat(ARLSFeatures, CWFAdditionalFeatures); - // Knights Landing - list KNLFeatures = [FeatureX87, - FeatureCX8, - FeatureCMOV, - FeatureMMX, - FeatureFXSR, - FeatureNOPL, - FeatureX86_64, - FeatureCX16, - FeatureCRC32, - FeaturePOPCNT, - FeaturePCLMUL, - FeatureXSAVE, - FeatureXSAVEOPT, - FeatureLAHFSAHF64, - FeatureAES, - FeatureRDRAND, - FeatureF16C, - FeatureFSGSBase, - FeatureAVX512, - FeatureEVEX512, - FeatureERI, - FeatureCDI, - FeaturePFI, - FeaturePREFETCHWT1, - FeatureADX, - FeatureRDSEED, - FeatureMOVBE, - FeatureLZCNT, - FeatureBMI, - FeatureBMI2, - FeatureFMA, - FeaturePRFCHW]; - list KNLTuning = [TuningSlowDivide64, - TuningSlow3OpsLEA, - TuningSlowIncDec, - TuningSlowTwoMemOps, - TuningPreferMaskRegisters, - TuningFastGather, - TuningFastMOVBE, - TuningSlowPMADDWD]; - // TODO Add AVX5124FMAPS/AVX5124VNNIW features - list KNMFeatures = - !listconcat(KNLFeatures, [FeatureVPOPCNTDQ]); - // Barcelona list BarcelonaFeatures = [FeatureX87, FeatureCX8, @@ -1746,14 +1701,6 @@ foreach P = ["broadwell", "core_5th_gen_avx", "core_5th_gen_avx_tsx"] in { def : ProcModel<"skylake", SkylakeClientModel, ProcessorFeatures.SKLFeatures, ProcessorFeatures.SKLTuning>; -// FIXME: define KNL scheduler model -foreach P = ["knl", "mic_avx512"] in { - def : ProcModel; -} -def : ProcModel<"knm", HaswellModel, ProcessorFeatures.KNMFeatures, - ProcessorFeatures.KNLTuning>; - foreach P = ["skylake-avx512", "skx", "skylake_avx512"] in { def : ProcModel; From c5031694c2c6995daf60286436d0ef8e10e24157 Mon Sep 17 00:00:00 2001 From: Freddy Ye Date: Thu, 21 Dec 2023 16:34:53 +0800 Subject: [PATCH 3/7] [X86][BE] Workaround backend tests to work. replace '-mcpu=knl' with '-mcpu=broadwell -mattr=+avx512f,+avx512cd,+evex512,-vzeroupper' --- .../X86/masked-intrinsic-cost-inseltpoison.ll | 2 +- .../Analysis/CostModel/X86/masked-intrinsic-cost.ll | 2 +- llvm/test/Analysis/CostModel/X86/mul32.ll | 2 +- llvm/test/Analysis/CostModel/X86/mul64.ll | 2 +- llvm/test/CodeGen/X86/all-ones-vector.ll | 4 ++-- llvm/test/CodeGen/X86/atomic-idempotent.ll | 2 +- llvm/test/CodeGen/X86/avx-isa-check.ll | 8 ++++---- llvm/test/CodeGen/X86/avx512-bugfix-23634.ll | 2 +- llvm/test/CodeGen/X86/avx512-bugfix-25270.ll | 2 +- llvm/test/CodeGen/X86/avx512-build-vector.ll | 2 +- llvm/test/CodeGen/X86/avx512-calling-conv.ll | 10 +++++----- llvm/test/CodeGen/X86/avx512-hadd-hsub.ll | 2 +- llvm/test/CodeGen/X86/avx512-inc-dec.ll | 2 +- llvm/test/CodeGen/X86/avx512-intel-ocl.ll | 8 ++++---- llvm/test/CodeGen/X86/avx512-logic.ll | 2 +- llvm/test/CodeGen/X86/avx512-memfold.ll | 2 +- llvm/test/CodeGen/X86/avx512-mov.ll | 2 +- llvm/test/CodeGen/X86/avx512-rotate.ll | 2 +- llvm/test/CodeGen/X86/avx512-scalar.ll | 2 +- llvm/test/CodeGen/X86/avx512-scalarIntrinsics.ll | 2 +- llvm/test/CodeGen/X86/avx512-scalar_mask.ll | 2 +- llvm/test/CodeGen/X86/avx512-shift.ll | 2 +- llvm/test/CodeGen/X86/avx512-vselect-crash.ll | 2 +- llvm/test/CodeGen/X86/avx512-vselect.ll | 2 +- llvm/test/CodeGen/X86/avx512bw-intrinsics-canonical.ll | 4 ++-- llvm/test/CodeGen/X86/avx512bw-mov.ll | 2 +- .../CodeGen/X86/avx512bwvl-intrinsics-canonical.ll | 2 +- llvm/test/CodeGen/X86/avx512bwvl-mov.ll | 2 +- llvm/test/CodeGen/X86/avx512cfma-intrinsics.ll | 2 +- llvm/test/CodeGen/X86/avx512cfmul-intrinsics.ll | 2 +- llvm/test/CodeGen/X86/avx512cfmulsh-instrinsics.ll | 2 +- llvm/test/CodeGen/X86/avx512vl-arith.ll | 4 ++-- llvm/test/CodeGen/X86/avx512vl-logic.ll | 2 +- llvm/test/CodeGen/X86/avx512vl-mov.ll | 2 +- llvm/test/CodeGen/X86/avx512vl-vec-cmp.ll | 2 +- llvm/test/CodeGen/X86/combine-select.ll | 2 +- llvm/test/CodeGen/X86/cpus-intel.ll | 3 --- llvm/test/CodeGen/X86/i1narrowfail.ll | 2 +- llvm/test/CodeGen/X86/memcpy.ll | 2 +- llvm/test/CodeGen/X86/memset-zero.ll | 2 +- llvm/test/CodeGen/X86/pku.ll | 4 ++-- llvm/test/CodeGen/X86/pr32451.ll | 2 +- llvm/test/CodeGen/X86/pr34139.ll | 2 +- llvm/test/CodeGen/X86/recip-fastmath.ll | 2 +- llvm/test/CodeGen/X86/recip-fastmath2.ll | 2 +- llvm/test/CodeGen/X86/setcc-lowering.ll | 2 +- llvm/test/CodeGen/X86/slow-pmulld.ll | 4 ++-- llvm/test/CodeGen/X86/slow-unaligned-mem.ll | 2 +- llvm/test/CodeGen/X86/vector-shuffle-512-v32.ll | 2 +- llvm/test/CodeGen/X86/vselect-minmax.ll | 2 +- llvm/test/CodeGen/X86/x86-interrupt_cc.ll | 4 ++-- llvm/test/CodeGen/X86/xaluo.ll | 2 +- llvm/test/CodeGen/X86/xmulo.ll | 2 +- llvm/test/MC/Disassembler/X86/avx-512.txt | 2 +- llvm/test/MC/X86/x86_long_nop.s | 2 +- .../Transforms/LoopVectorize/X86/gather_scatter.ll | 4 ++-- .../Transforms/LoopVectorize/X86/masked_load_store.ll | 2 +- .../SLPVectorizer/X86/alternate-calls-inseltpoison.ll | 2 +- .../Transforms/SLPVectorizer/X86/alternate-calls.ll | 2 +- .../SLPVectorizer/X86/alternate-cast-inseltpoison.ll | 2 +- .../Transforms/SLPVectorizer/X86/alternate-cast.ll | 2 +- .../SLPVectorizer/X86/alternate-fp-inseltpoison.ll | 2 +- llvm/test/Transforms/SLPVectorizer/X86/alternate-fp.ll | 2 +- .../SLPVectorizer/X86/alternate-int-inseltpoison.ll | 2 +- .../test/Transforms/SLPVectorizer/X86/alternate-int.ll | 2 +- llvm/test/Transforms/SLPVectorizer/X86/arith-abs.ll | 2 +- .../Transforms/SLPVectorizer/X86/arith-add-saddo.ll | 2 +- .../Transforms/SLPVectorizer/X86/arith-add-ssat.ll | 2 +- .../Transforms/SLPVectorizer/X86/arith-add-uaddo.ll | 2 +- .../Transforms/SLPVectorizer/X86/arith-add-usat.ll | 2 +- llvm/test/Transforms/SLPVectorizer/X86/arith-add.ll | 2 +- llvm/test/Transforms/SLPVectorizer/X86/arith-div.ll | 2 +- llvm/test/Transforms/SLPVectorizer/X86/arith-fix.ll | 2 +- .../Transforms/SLPVectorizer/X86/arith-fshl-rot.ll | 2 +- llvm/test/Transforms/SLPVectorizer/X86/arith-fshl.ll | 2 +- .../Transforms/SLPVectorizer/X86/arith-fshr-rot.ll | 2 +- llvm/test/Transforms/SLPVectorizer/X86/arith-fshr.ll | 2 +- .../Transforms/SLPVectorizer/X86/arith-mul-smulo.ll | 2 +- .../Transforms/SLPVectorizer/X86/arith-mul-umulo.ll | 2 +- llvm/test/Transforms/SLPVectorizer/X86/arith-mul.ll | 2 +- llvm/test/Transforms/SLPVectorizer/X86/arith-smax.ll | 2 +- llvm/test/Transforms/SLPVectorizer/X86/arith-smin.ll | 2 +- .../Transforms/SLPVectorizer/X86/arith-sub-ssat.ll | 2 +- .../Transforms/SLPVectorizer/X86/arith-sub-ssubo.ll | 2 +- .../Transforms/SLPVectorizer/X86/arith-sub-usat.ll | 2 +- .../Transforms/SLPVectorizer/X86/arith-sub-usubo.ll | 2 +- llvm/test/Transforms/SLPVectorizer/X86/arith-sub.ll | 2 +- llvm/test/Transforms/SLPVectorizer/X86/arith-umax.ll | 2 +- llvm/test/Transforms/SLPVectorizer/X86/arith-umin.ll | 2 +- .../Transforms/SLPVectorizer/X86/hadd-inseltpoison.ll | 2 +- llvm/test/Transforms/SLPVectorizer/X86/hadd.ll | 2 +- .../Transforms/SLPVectorizer/X86/hsub-inseltpoison.ll | 2 +- llvm/test/Transforms/SLPVectorizer/X86/hsub.ll | 2 +- .../Transforms/SLPVectorizer/X86/sext-inseltpoison.ll | 2 +- llvm/test/Transforms/SLPVectorizer/X86/sext.ll | 2 +- llvm/test/Transforms/SLPVectorizer/X86/shift-ashr.ll | 2 +- llvm/test/Transforms/SLPVectorizer/X86/shift-lshr.ll | 2 +- llvm/test/Transforms/SLPVectorizer/X86/shift-shl.ll | 2 +- .../Transforms/SLPVectorizer/X86/zext-inseltpoison.ll | 2 +- llvm/test/Transforms/SLPVectorizer/X86/zext.ll | 2 +- llvm/test/tools/llvm-mca/X86/cpus.s | 2 +- .../test/tools/llvm-mca/X86/register-file-statistics.s | 2 +- llvm/test/tools/llvm-mca/X86/scheduler-queue-usage.s | 2 +- 103 files changed, 119 insertions(+), 122 deletions(-) diff --git a/llvm/test/Analysis/CostModel/X86/masked-intrinsic-cost-inseltpoison.ll b/llvm/test/Analysis/CostModel/X86/masked-intrinsic-cost-inseltpoison.ll index 897344d622d0f..81b37cfa977c2 100644 --- a/llvm/test/Analysis/CostModel/X86/masked-intrinsic-cost-inseltpoison.ll +++ b/llvm/test/Analysis/CostModel/X86/masked-intrinsic-cost-inseltpoison.ll @@ -5,7 +5,7 @@ ; RUN: opt < %s -S -mtriple=x86_64-apple-darwin -mattr=+avx2 -passes="print" 2>&1 -disable-output | FileCheck %s --check-prefixes=AVX,AVX2 ; ; RUN: opt < %s -S -mtriple=x86_64-apple-darwin -mcpu=skylake -passes="print" 2>&1 -disable-output | FileCheck %s --check-prefixes=AVX,SKL -; RUN: opt < %s -S -mtriple=x86_64-apple-darwin -mcpu=knl -passes="print" 2>&1 -disable-output | FileCheck %s --check-prefixes=AVX512,KNL +; RUN: opt < %s -S -mtriple=x86_64-apple-darwin -mcpu=broadwell -mattr=+avx512f,+avx512cd,+evex512,-vzeroupper -passes="print" 2>&1 -disable-output | FileCheck %s --check-prefixes=AVX512,KNL ; RUN: opt < %s -S -mtriple=x86_64-apple-darwin -mcpu=skx -passes="print" 2>&1 -disable-output | FileCheck %s --check-prefixes=AVX512,SKX define i32 @masked_load() { diff --git a/llvm/test/Analysis/CostModel/X86/masked-intrinsic-cost.ll b/llvm/test/Analysis/CostModel/X86/masked-intrinsic-cost.ll index 5f22b2e39f947..49a83d6fe1e26 100644 --- a/llvm/test/Analysis/CostModel/X86/masked-intrinsic-cost.ll +++ b/llvm/test/Analysis/CostModel/X86/masked-intrinsic-cost.ll @@ -5,7 +5,7 @@ ; RUN: opt < %s -S -mtriple=x86_64-apple-darwin -mattr=+avx2 -passes="print" 2>&1 -disable-output | FileCheck %s --check-prefixes=AVX,AVX2 ; ; RUN: opt < %s -S -mtriple=x86_64-apple-darwin -mcpu=skylake -passes="print" 2>&1 -disable-output | FileCheck %s --check-prefixes=AVX,SKL -; RUN: opt < %s -S -mtriple=x86_64-apple-darwin -mcpu=knl -passes="print" 2>&1 -disable-output | FileCheck %s --check-prefixes=AVX512,KNL +; RUN: opt < %s -S -mtriple=x86_64-apple-darwin -mcpu=broadwell -mattr=+avx512f,+avx512cd,+evex512,-vzeroupper -passes="print" 2>&1 -disable-output | FileCheck %s --check-prefixes=AVX512,KNL ; RUN: opt < %s -S -mtriple=x86_64-apple-darwin -mcpu=skx -passes="print" 2>&1 -disable-output | FileCheck %s --check-prefixes=AVX512,SKX define i32 @masked_load() { diff --git a/llvm/test/Analysis/CostModel/X86/mul32.ll b/llvm/test/Analysis/CostModel/X86/mul32.ll index d50fc41e7b049..4db2167a6dad5 100644 --- a/llvm/test/Analysis/CostModel/X86/mul32.ll +++ b/llvm/test/Analysis/CostModel/X86/mul32.ll @@ -11,7 +11,7 @@ ; RUN: opt < %s -passes="print" 2>&1 -disable-output -mtriple=x86_64-apple-macosx10.8.0 -mcpu=slm | FileCheck %s --check-prefixes=SLM ; RUN: opt < %s -passes="print" 2>&1 -disable-output -mtriple=x86_64-apple-macosx10.8.0 -mcpu=goldmont | FileCheck %s --check-prefixes=GLM ; RUN: opt < %s -passes="print" 2>&1 -disable-output -mtriple=x86_64-apple-macosx10.8.0 -mcpu=btver2 | FileCheck %s --check-prefixes=AVX1 -; RUN: opt < %s -passes="print" 2>&1 -disable-output -mtriple=x86_64-apple-macosx10.8.0 -mcpu=knl | FileCheck %s --check-prefixes=AVX512 +; RUN: opt < %s -passes="print" 2>&1 -disable-output -mtriple=x86_64-apple-macosx10.8.0 -mcpu=broadwell -mattr=+avx512f,+avx512cd,+evex512,-vzeroupper | FileCheck %s --check-prefixes=AVX512 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" target triple = "x86_64-apple-macosx10.8.0" diff --git a/llvm/test/Analysis/CostModel/X86/mul64.ll b/llvm/test/Analysis/CostModel/X86/mul64.ll index 718972093e8e5..e598974d993bb 100644 --- a/llvm/test/Analysis/CostModel/X86/mul64.ll +++ b/llvm/test/Analysis/CostModel/X86/mul64.ll @@ -11,7 +11,7 @@ ; RUN: opt < %s -passes="print" 2>&1 -disable-output -mtriple=x86_64-apple-macosx10.8.0 -mcpu=slm | FileCheck %s --check-prefixes=SLM ; RUN: opt < %s -passes="print" 2>&1 -disable-output -mtriple=x86_64-apple-macosx10.8.0 -mcpu=goldmont | FileCheck %s --check-prefixes=GLM ; RUN: opt < %s -passes="print" 2>&1 -disable-output -mtriple=x86_64-apple-macosx10.8.0 -mcpu=btver2 | FileCheck %s --check-prefixes=AVX1 -; RUN: opt < %s -passes="print" 2>&1 -disable-output -mtriple=x86_64-apple-macosx10.8.0 -mcpu=knl | FileCheck %s --check-prefixes=AVX512,AVX512F +; RUN: opt < %s -passes="print" 2>&1 -disable-output -mtriple=x86_64-apple-macosx10.8.0 -mcpu=broadwell -mattr=+avx512f,+avx512cd,+evex512,-vzeroupper | FileCheck %s --check-prefixes=AVX512,AVX512F ; ; mul vXi8 -> mXi64 diff --git a/llvm/test/CodeGen/X86/all-ones-vector.ll b/llvm/test/CodeGen/X86/all-ones-vector.ll index d624f6c13e367..ee9df562d1f2f 100644 --- a/llvm/test/CodeGen/X86/all-ones-vector.ll +++ b/llvm/test/CodeGen/X86/all-ones-vector.ll @@ -2,12 +2,12 @@ ; RUN: llc < %s -mtriple=i386-unknown -mattr=+sse2 | FileCheck %s --check-prefix=SSE ; RUN: llc < %s -mtriple=i386-unknown -mattr=+avx | FileCheck %s --check-prefixes=AVX,AVX1 ; RUN: llc < %s -mtriple=i386-unknown -mattr=+avx2 | FileCheck %s --check-prefixes=AVX,AVX256,AVX2 -; RUN: llc < %s -mtriple=i386-unknown -mcpu=knl | FileCheck %s --check-prefixes=AVX,AVX256,AVX512 +; RUN: llc < %s -mtriple=i386-unknown -mcpu=broadwell -mattr=+avx512f,+avx512cd,+evex512,-vzeroupper | FileCheck %s --check-prefixes=AVX,AVX256,AVX512 ; RUN: llc < %s -mtriple=i386-unknown -mcpu=skx | FileCheck %s --check-prefixes=AVX,AVX256,AVX512 ; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+sse2 | FileCheck %s --check-prefix=SSE ; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+avx | FileCheck %s --check-prefixes=AVX,AVX1 ; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+avx2 | FileCheck %s --check-prefixes=AVX,AVX256,AVX2 -; RUN: llc < %s -mtriple=x86_64-unknown -mcpu=knl | FileCheck %s --check-prefixes=AVX,AVX256,AVX512 +; RUN: llc < %s -mtriple=x86_64-unknown -mcpu=broadwell -mattr=+avx512f,+avx512cd,+evex512,-vzeroupper | FileCheck %s --check-prefixes=AVX,AVX256,AVX512 ; RUN: llc < %s -mtriple=x86_64-unknown -mcpu=skx | FileCheck %s --check-prefixes=AVX,AVX256,AVX512 define <16 x i8> @allones_v16i8() nounwind { diff --git a/llvm/test/CodeGen/X86/atomic-idempotent.ll b/llvm/test/CodeGen/X86/atomic-idempotent.ll index d5c46485068a6..ff41b451ac347 100644 --- a/llvm/test/CodeGen/X86/atomic-idempotent.ll +++ b/llvm/test/CodeGen/X86/atomic-idempotent.ll @@ -3,7 +3,7 @@ ; RUN: llc < %s -mtriple=i686-- -verify-machineinstrs -mattr=+sse2 | FileCheck %s --check-prefixes=X86,X86-GENERIC,X86-SSE2 ; RUN: llc < %s -mtriple=i686-- -verify-machineinstrs -mcpu=slm -mattr=-sse2 | FileCheck %s --check-prefixes=X86,X86-GENERIC,X86-SLM ; RUN: llc < %s -mtriple=i686-- -verify-machineinstrs -mcpu=goldmont -mattr=-sse2 | FileCheck %s --check-prefixes=X86,X86-GENERIC,X86-SLM -; RUN: llc < %s -mtriple=i686-- -verify-machineinstrs -mcpu=knl -mattr=-sse2 | FileCheck %s --check-prefixes=X86,X86-GENERIC,X86-SLM +; RUN: llc < %s -mtriple=i686-- -verify-machineinstrs -mcpu=broadwell -mattr=+avx512f,+avx512cd,+evex512,-vzeroupper -mattr=-sse2 | FileCheck %s --check-prefixes=X86,X86-GENERIC,X86-SLM ; RUN: llc < %s -mtriple=i686-- -verify-machineinstrs -mcpu=atom -mattr=-sse2 | FileCheck %s --check-prefixes=X86,X86-ATOM ; On x86, an atomic rmw operation that does not modify the value in memory diff --git a/llvm/test/CodeGen/X86/avx-isa-check.ll b/llvm/test/CodeGen/X86/avx-isa-check.ll index c82a37781ab05..e6b17d64f5180 100644 --- a/llvm/test/CodeGen/X86/avx-isa-check.ll +++ b/llvm/test/CodeGen/X86/avx-isa-check.ll @@ -2,10 +2,10 @@ ; RUN: llc < %s -mtriple=x86_64-apple-darwin -show-mc-encoding -mcpu=corei7-avx -o /dev/null ; RUN: llc < %s -mtriple=x86_64-apple-darwin -show-mc-encoding -mcpu=core-avx2 -mattr=+avx2 -o /dev/null -; RUN: llc < %s -mtriple=x86_64-apple-darwin -show-mc-encoding -mcpu=knl -o /dev/null -; RUN: llc < %s -mtriple=x86_64-apple-darwin -show-mc-encoding -mcpu=knl -mattr=+avx512vl -o /dev/null -; RUN: llc < %s -mtriple=x86_64-apple-darwin -show-mc-encoding -mcpu=knl -mattr=+avx512bw -o /dev/null -; RUN: llc < %s -mtriple=x86_64-apple-darwin -show-mc-encoding -mcpu=knl -mattr=+avx512vl -mattr=+avx512bw -o /dev/null +; RUN: llc < %s -mtriple=x86_64-apple-darwin -show-mc-encoding -mcpu=broadwell -mattr=+avx512f,+avx512cd,+evex512,-vzeroupper -o /dev/null +; RUN: llc < %s -mtriple=x86_64-apple-darwin -show-mc-encoding -mcpu=broadwell -mattr=+avx512f,+avx512cd,+evex512,-vzeroupper -mattr=+avx512vl -o /dev/null +; RUN: llc < %s -mtriple=x86_64-apple-darwin -show-mc-encoding -mcpu=broadwell -mattr=+avx512f,+avx512cd,+evex512,-vzeroupper -mattr=+avx512bw -o /dev/null +; RUN: llc < %s -mtriple=x86_64-apple-darwin -show-mc-encoding -mcpu=broadwell -mattr=+avx512f,+avx512cd,+evex512,-vzeroupper -mattr=+avx512vl -mattr=+avx512bw -o /dev/null ; RUN: llc < %s -mtriple=x86_64-apple-darwin -show-mc-encoding -mcpu=skx -o /dev/null define <4 x i64> @vpand_256(<4 x i64> %a, <4 x i64> %b) nounwind uwtable readnone ssp { diff --git a/llvm/test/CodeGen/X86/avx512-bugfix-23634.ll b/llvm/test/CodeGen/X86/avx512-bugfix-23634.ll index ee20747e23dcc..5eb0f80f42264 100644 --- a/llvm/test/CodeGen/X86/avx512-bugfix-23634.ll +++ b/llvm/test/CodeGen/X86/avx512-bugfix-23634.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=knl | FileCheck %s +; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=broadwell -mattr=+avx512f,+avx512cd,+evex512,-vzeroupper | FileCheck %s target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" target triple = "x86_64-unknown-linux-gnu" diff --git a/llvm/test/CodeGen/X86/avx512-bugfix-25270.ll b/llvm/test/CodeGen/X86/avx512-bugfix-25270.ll index 60390958b3e47..d672320d89b6c 100644 --- a/llvm/test/CodeGen/X86/avx512-bugfix-25270.ll +++ b/llvm/test/CodeGen/X86/avx512-bugfix-25270.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=knl | FileCheck %s +; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=broadwell -mattr=+avx512f,+avx512cd,+evex512,-vzeroupper | FileCheck %s declare void @Print__512(<16 x i32>) #0 diff --git a/llvm/test/CodeGen/X86/avx512-build-vector.ll b/llvm/test/CodeGen/X86/avx512-build-vector.ll index b001ebf4d19b7..60a035ccbbfa5 100644 --- a/llvm/test/CodeGen/X86/avx512-build-vector.ll +++ b/llvm/test/CodeGen/X86/avx512-build-vector.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=knl | FileCheck %s +; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=broadwell -mattr=+avx512f,+avx512cd,+evex512,-vzeroupper | FileCheck %s define <16 x i32> @test2(<16 x i32> %x) { ; CHECK-LABEL: test2: diff --git a/llvm/test/CodeGen/X86/avx512-calling-conv.ll b/llvm/test/CodeGen/X86/avx512-calling-conv.ll index b39b089faa2a5..6efd692762db8 100644 --- a/llvm/test/CodeGen/X86/avx512-calling-conv.ll +++ b/llvm/test/CodeGen/X86/avx512-calling-conv.ll @@ -1,7 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc < %s -mtriple=x86_64-apple-darwin9 -mcpu=knl | FileCheck %s --check-prefix=ALL_X64 --check-prefix=KNL +; RUN: llc < %s -mtriple=x86_64-apple-darwin9 -mcpu=broadwell -mattr=+avx512f,+avx512cd,+evex512,-vzeroupper | FileCheck %s --check-prefix=ALL_X64 --check-prefix=KNL ; RUN: llc < %s -mtriple=x86_64-apple-darwin9 -mcpu=skx | FileCheck %s --check-prefix=ALL_X64 --check-prefix=SKX -; RUN: llc < %s -mtriple=i686-apple-darwin9 -mcpu=knl | FileCheck %s --check-prefix=KNL_X32 +; RUN: llc < %s -mtriple=i686-apple-darwin9 -mcpu=broadwell -mattr=+avx512f,+avx512cd,+evex512,-vzeroupper | FileCheck %s --check-prefix=KNL_X32 ; RUN: llc < %s -mtriple=x86_64-apple-darwin9 -mcpu=skx -fast-isel | FileCheck %s --check-prefix=FASTISEL define <16 x i1> @test1() { @@ -681,12 +681,12 @@ define <17 x i1> @test16(<17 x i1> %a, <17 x i1> %b) nounwind { ; KNL-NEXT: pushq %rbx ; KNL-NEXT: xorl %r10d, %r10d ; KNL-NEXT: testb $1, {{[0-9]+}}(%rsp) -; KNL-NEXT: movl $65535, %eax ## imm = 0xFFFF +; KNL-NEXT: movl $65535, %ebx ## imm = 0xFFFF ; KNL-NEXT: movl $0, %r11d -; KNL-NEXT: cmovnel %eax, %r11d +; KNL-NEXT: cmovnel %ebx, %r11d ; KNL-NEXT: testb $1, {{[0-9]+}}(%rsp) -; KNL-NEXT: cmovnel %eax, %r10d ; KNL-NEXT: movq %rdi, %rax +; KNL-NEXT: cmovnel %ebx, %r10d ; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %edi ; KNL-NEXT: andl $1, %edi ; KNL-NEXT: kmovw %edi, %k0 diff --git a/llvm/test/CodeGen/X86/avx512-hadd-hsub.ll b/llvm/test/CodeGen/X86/avx512-hadd-hsub.ll index ae3777453c68d..d323db2f96d97 100644 --- a/llvm/test/CodeGen/X86/avx512-hadd-hsub.ll +++ b/llvm/test/CodeGen/X86/avx512-hadd-hsub.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -;RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=knl | FileCheck %s --check-prefixes=CHECK,KNL +;RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=broadwell -mattr=+avx512f,+avx512cd,+evex512,-vzeroupper | FileCheck %s --check-prefixes=CHECK,KNL ;RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=skx | FileCheck %s --check-prefixes=CHECK,SKX define i32 @hadd_16(<16 x i32> %x225) { diff --git a/llvm/test/CodeGen/X86/avx512-inc-dec.ll b/llvm/test/CodeGen/X86/avx512-inc-dec.ll index beef481315d75..803deb9bc4947 100644 --- a/llvm/test/CodeGen/X86/avx512-inc-dec.ll +++ b/llvm/test/CodeGen/X86/avx512-inc-dec.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=knl | FileCheck %s +; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=broadwell -mattr=+avx512f,+avx512cd,+evex512,-vzeroupper | FileCheck %s define i32 @test(i32 %a, i32 %b) { ; CHECK-LABEL: test: diff --git a/llvm/test/CodeGen/X86/avx512-intel-ocl.ll b/llvm/test/CodeGen/X86/avx512-intel-ocl.ll index 25d182afd66e7..24985ad6eb427 100644 --- a/llvm/test/CodeGen/X86/avx512-intel-ocl.ll +++ b/llvm/test/CodeGen/X86/avx512-intel-ocl.ll @@ -1,11 +1,11 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc < %s -mtriple=i686-apple-darwin -mcpu=knl | FileCheck %s -check-prefix=X32 +; RUN: llc < %s -mtriple=i686-apple-darwin -mcpu=broadwell -mattr=+avx512f,+avx512cd,+evex512,-vzeroupper | FileCheck %s -check-prefix=X32 ; RUN: llc < %s -mtriple=i686-apple-darwin -mcpu=skx | FileCheck %s -check-prefix=X32 -; RUN: llc < %s -mtriple=i386-pc-win32 -mcpu=knl | FileCheck %s -check-prefix=WIN32 +; RUN: llc < %s -mtriple=i386-pc-win32 -mcpu=broadwell -mattr=+avx512f,+avx512cd,+evex512,-vzeroupper | FileCheck %s -check-prefix=WIN32 ; RUN: llc < %s -mtriple=i386-pc-win32 -mcpu=skx | FileCheck %s -check-prefix=WIN32 -; RUN: llc < %s -mtriple=x86_64-win32 -mcpu=knl | FileCheck %s -check-prefixes=WIN64,WIN64-KNL +; RUN: llc < %s -mtriple=x86_64-win32 -mcpu=broadwell -mattr=+avx512f,+avx512cd,+evex512,-vzeroupper | FileCheck %s -check-prefixes=WIN64,WIN64-KNL ; RUN: llc < %s -mtriple=x86_64-win32 -mcpu=skx | FileCheck %s -check-prefixes=WIN64,WIN64-SKX -; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=knl | FileCheck %s -check-prefixes=X64,X64-KNL +; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=broadwell -mattr=+avx512f,+avx512cd,+evex512,-vzeroupper | FileCheck %s -check-prefixes=X64,X64-KNL ; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=skx | FileCheck %s -check-prefixes=X64,X64-SKX declare <16 x float> @func_float16_ptr(<16 x float>, ptr) diff --git a/llvm/test/CodeGen/X86/avx512-logic.ll b/llvm/test/CodeGen/X86/avx512-logic.ll index e53e194ba05c2..8267eef1bcc0c 100644 --- a/llvm/test/CodeGen/X86/avx512-logic.ll +++ b/llvm/test/CodeGen/X86/avx512-logic.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=knl | FileCheck %s --check-prefix=ALL --check-prefix=KNL +; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=broadwell -mattr=+avx512f,+avx512cd,+evex512,-vzeroupper | FileCheck %s --check-prefix=ALL --check-prefix=KNL ; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=skx | FileCheck %s --check-prefix=ALL --check-prefix=SKX diff --git a/llvm/test/CodeGen/X86/avx512-memfold.ll b/llvm/test/CodeGen/X86/avx512-memfold.ll index 906687569529d..b874fe7e36a19 100644 --- a/llvm/test/CodeGen/X86/avx512-memfold.ll +++ b/llvm/test/CodeGen/X86/avx512-memfold.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc < %s -disable-peephole -mtriple=x86_64-apple-darwin -mcpu=knl | FileCheck %s +; RUN: llc < %s -disable-peephole -mtriple=x86_64-apple-darwin -mcpu=broadwell -mattr=+avx512f,+avx512cd,+evex512,-vzeroupper | FileCheck %s define i8 @test_int_x86_avx512_mask_cmp_ss(<4 x float> %a, ptr %b, i8 %mask) { ; CHECK-LABEL: test_int_x86_avx512_mask_cmp_ss: diff --git a/llvm/test/CodeGen/X86/avx512-mov.ll b/llvm/test/CodeGen/X86/avx512-mov.ll index 88682cea75466..e463d032294c4 100644 --- a/llvm/test/CodeGen/X86/avx512-mov.ll +++ b/llvm/test/CodeGen/X86/avx512-mov.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=knl --show-mc-encoding| FileCheck %s +; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=broadwell -mattr=+avx512f,+avx512cd,+evex512,-vzeroupper --show-mc-encoding| FileCheck %s define i32 @test1(float %x) { ; CHECK-LABEL: test1: diff --git a/llvm/test/CodeGen/X86/avx512-rotate.ll b/llvm/test/CodeGen/X86/avx512-rotate.ll index fcf1a5c3e13c2..cd47d4007c4d2 100644 --- a/llvm/test/CodeGen/X86/avx512-rotate.ll +++ b/llvm/test/CodeGen/X86/avx512-rotate.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=knl | FileCheck %s --check-prefix=CHECK --check-prefix=KNL +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=broadwell -mattr=+avx512f,+avx512cd,+evex512,-vzeroupper | FileCheck %s --check-prefix=CHECK --check-prefix=KNL ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=skx | FileCheck %s --check-prefix=CHECK --check-prefix=SKX declare <16 x i32> @llvm.x86.avx512.mask.prolv.d.512(<16 x i32>, <16 x i32>, <16 x i32>, i16) diff --git a/llvm/test/CodeGen/X86/avx512-scalar.ll b/llvm/test/CodeGen/X86/avx512-scalar.ll index 200d36d116770..7a5bab932af19 100644 --- a/llvm/test/CodeGen/X86/avx512-scalar.ll +++ b/llvm/test/CodeGen/X86/avx512-scalar.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=knl --show-mc-encoding | FileCheck %s --check-prefix=AVX512 --check-prefix=AVX512-KNL +; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=broadwell -mattr=+avx512f,+avx512cd,+evex512,-vzeroupper --show-mc-encoding | FileCheck %s --check-prefix=AVX512 --check-prefix=AVX512-KNL ; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=skx --show-mc-encoding | FileCheck %s --check-prefix=AVX512 --check-prefix=AVX512-SKX ; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=corei7-avx --show-mc-encoding | FileCheck %s --check-prefix=AVX diff --git a/llvm/test/CodeGen/X86/avx512-scalarIntrinsics.ll b/llvm/test/CodeGen/X86/avx512-scalarIntrinsics.ll index 9767237c4818e..e4084a867f3a5 100644 --- a/llvm/test/CodeGen/X86/avx512-scalarIntrinsics.ll +++ b/llvm/test/CodeGen/X86/avx512-scalarIntrinsics.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=skx | FileCheck %s --check-prefix=CHECK --check-prefix=SKX -; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=knl | FileCheck %s --check-prefix=CHECK --check-prefix=KNL +; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=broadwell -mattr=+avx512f,+avx512cd,+evex512,-vzeroupper | FileCheck %s --check-prefix=CHECK --check-prefix=KNL define <4 x float> @test_rsqrt14_ss(<4 x float> %a0) { diff --git a/llvm/test/CodeGen/X86/avx512-scalar_mask.ll b/llvm/test/CodeGen/X86/avx512-scalar_mask.ll index 9e9fc57cf7b67..88461a91e7929 100644 --- a/llvm/test/CodeGen/X86/avx512-scalar_mask.ll +++ b/llvm/test/CodeGen/X86/avx512-scalar_mask.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=knl | FileCheck %s +; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=broadwell -mattr=+avx512f,+avx512cd,+evex512,-vzeroupper | FileCheck %s declare <4 x float> @llvm.x86.avx512.mask.vfmadd.ss(<4 x float>, <4 x float>, <4 x float>, i8, i32) declare <4 x float> @llvm.x86.avx512.maskz.vfmadd.ss(<4 x float>, <4 x float>, <4 x float>, i8, i32) diff --git a/llvm/test/CodeGen/X86/avx512-shift.ll b/llvm/test/CodeGen/X86/avx512-shift.ll index 7a337c75d0d0c..82a66147e4ef5 100644 --- a/llvm/test/CodeGen/X86/avx512-shift.ll +++ b/llvm/test/CodeGen/X86/avx512-shift.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -;RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=knl | FileCheck %s --check-prefixes=CHECK,KNL +;RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=broadwell -mattr=+avx512f,+avx512cd,+evex512,-vzeroupper | FileCheck %s --check-prefixes=CHECK,KNL ;RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=skx | FileCheck %s --check-prefixes=CHECK,SKX define <16 x i32> @ashr_16_i32(<16 x i32> %a) { diff --git a/llvm/test/CodeGen/X86/avx512-vselect-crash.ll b/llvm/test/CodeGen/X86/avx512-vselect-crash.ll index 31ccf867f7aaf..e6790b51fa738 100644 --- a/llvm/test/CodeGen/X86/avx512-vselect-crash.ll +++ b/llvm/test/CodeGen/X86/avx512-vselect-crash.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=knl | FileCheck %s +; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=broadwell -mattr=+avx512f,+avx512cd,+evex512,-vzeroupper | FileCheck %s define <16 x i32> @test() { ; CHECK-LABEL: test: diff --git a/llvm/test/CodeGen/X86/avx512-vselect.ll b/llvm/test/CodeGen/X86/avx512-vselect.ll index c402e8d7b7714..e272190593c79 100644 --- a/llvm/test/CodeGen/X86/avx512-vselect.ll +++ b/llvm/test/CodeGen/X86/avx512-vselect.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc < %s -mcpu=skx | FileCheck %s --check-prefixes=CHECK,CHECK-SKX -; RUN: llc < %s -mcpu=knl | FileCheck %s --check-prefixes=CHECK,CHECK-KNL +; RUN: llc < %s -mcpu=broadwell -mattr=+avx512f,+avx512cd,+evex512,-vzeroupper | FileCheck %s --check-prefixes=CHECK,CHECK-KNL target triple = "x86_64-unknown-unknown" diff --git a/llvm/test/CodeGen/X86/avx512bw-intrinsics-canonical.ll b/llvm/test/CodeGen/X86/avx512bw-intrinsics-canonical.ll index 21ae182acc6b0..98b8fc4e5f02a 100644 --- a/llvm/test/CodeGen/X86/avx512bw-intrinsics-canonical.ll +++ b/llvm/test/CodeGen/X86/avx512bw-intrinsics-canonical.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=knl -mattr=+avx512bw | FileCheck %s --check-prefix=AVX512BW -; RUN: llc < %s -mtriple=i386-unknown-linux-gnu -mcpu=knl -mattr=+avx512bw | FileCheck %s --check-prefix=AVX512F-32 +; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=broadwell -mattr=+avx512f,+avx512cd,+evex512,-vzeroupper -mattr=+avx512bw | FileCheck %s --check-prefix=AVX512BW +; RUN: llc < %s -mtriple=i386-unknown-linux-gnu -mcpu=broadwell -mattr=+avx512f,+avx512cd,+evex512,-vzeroupper -mattr=+avx512bw | FileCheck %s --check-prefix=AVX512F-32 ; NOTE: This should use IR equivalent to what is generated by clang/test/CodeGen/avx512bw-builtins.c diff --git a/llvm/test/CodeGen/X86/avx512bw-mov.ll b/llvm/test/CodeGen/X86/avx512bw-mov.ll index 7e2f3620863d7..82cee05acb5f1 100644 --- a/llvm/test/CodeGen/X86/avx512bw-mov.ll +++ b/llvm/test/CodeGen/X86/avx512bw-mov.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=knl -mattr=+avx512bw | FileCheck %s +; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=broadwell -mattr=+avx512f,+avx512cd,+evex512,-vzeroupper -mattr=+avx512bw | FileCheck %s define <64 x i8> @test1(ptr %addr) { ; CHECK-LABEL: test1: diff --git a/llvm/test/CodeGen/X86/avx512bwvl-intrinsics-canonical.ll b/llvm/test/CodeGen/X86/avx512bwvl-intrinsics-canonical.ll index e931b5206e423..d8ea4e7af967b 100644 --- a/llvm/test/CodeGen/X86/avx512bwvl-intrinsics-canonical.ll +++ b/llvm/test/CodeGen/X86/avx512bwvl-intrinsics-canonical.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=knl -mattr=+avx512bw -mattr=+avx512vl --show-mc-encoding| FileCheck %s +; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=broadwell -mattr=+avx512f,+avx512cd,+evex512,-vzeroupper -mattr=+avx512bw -mattr=+avx512vl --show-mc-encoding| FileCheck %s ; NOTE: This should use IR equivalent to what is generated by clang/test/CodeGen/avx512vlbw-builtins.c diff --git a/llvm/test/CodeGen/X86/avx512bwvl-mov.ll b/llvm/test/CodeGen/X86/avx512bwvl-mov.ll index 71b26588833de..e0dc53f9d38d1 100644 --- a/llvm/test/CodeGen/X86/avx512bwvl-mov.ll +++ b/llvm/test/CodeGen/X86/avx512bwvl-mov.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=knl -mattr=+avx512bw -mattr=+avx512vl --show-mc-encoding| FileCheck %s +; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=broadwell -mattr=+avx512f,+avx512cd,+evex512,-vzeroupper -mattr=+avx512bw -mattr=+avx512vl --show-mc-encoding| FileCheck %s define <32 x i8> @test_256_1(ptr %addr) { ; CHECK-LABEL: test_256_1: diff --git a/llvm/test/CodeGen/X86/avx512cfma-intrinsics.ll b/llvm/test/CodeGen/X86/avx512cfma-intrinsics.ll index 8d9a927818ce9..2c0fef7f589ba 100644 --- a/llvm/test/CodeGen/X86/avx512cfma-intrinsics.ll +++ b/llvm/test/CodeGen/X86/avx512cfma-intrinsics.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=knl -mattr=+avx512bw -mattr=+avx512fp16 -mattr=+avx512vl | FileCheck %s +; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=broadwell -mattr=+avx512f,+avx512cd,+evex512,-vzeroupper -mattr=+avx512bw -mattr=+avx512fp16 -mattr=+avx512vl | FileCheck %s declare <4 x float> @llvm.x86.avx512fp16.mask.vfmadd.cph.128(<4 x float>, <4 x float>, <4 x float>, i8) declare <4 x float> @llvm.x86.avx512fp16.maskz.vfmadd.cph.128(<4 x float>, <4 x float>, <4 x float>, i8) diff --git a/llvm/test/CodeGen/X86/avx512cfmul-intrinsics.ll b/llvm/test/CodeGen/X86/avx512cfmul-intrinsics.ll index e37c649a4786d..c745e7693ac8e 100644 --- a/llvm/test/CodeGen/X86/avx512cfmul-intrinsics.ll +++ b/llvm/test/CodeGen/X86/avx512cfmul-intrinsics.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=knl -mattr=+avx512bw -mattr=+avx512fp16 -mattr=+avx512vl | FileCheck %s +; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=broadwell -mattr=+avx512f,+avx512cd,+evex512,-vzeroupper -mattr=+avx512bw -mattr=+avx512fp16 -mattr=+avx512vl | FileCheck %s declare <4 x float> @llvm.x86.avx512fp16.mask.vfmul.cph.128(<4 x float>, <4 x float>, <4 x float>, i8) diff --git a/llvm/test/CodeGen/X86/avx512cfmulsh-instrinsics.ll b/llvm/test/CodeGen/X86/avx512cfmulsh-instrinsics.ll index 43e085f37ff67..6225ce71b73e9 100644 --- a/llvm/test/CodeGen/X86/avx512cfmulsh-instrinsics.ll +++ b/llvm/test/CodeGen/X86/avx512cfmulsh-instrinsics.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=knl -mattr=+avx512bw -mattr=+avx512fp16 -mattr=+avx512vl | FileCheck %s +; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=broadwell -mattr=+avx512f,+avx512cd,+evex512,-vzeroupper -mattr=+avx512bw -mattr=+avx512fp16 -mattr=+avx512vl | FileCheck %s declare <4 x float> @llvm.x86.avx512fp16.mask.vfmul.csh(<4 x float>, <4 x float>, <4 x float>, i8, i32) declare <4 x float> @llvm.x86.avx512fp16.maskz.vfmul.csh(<4 x float>, <4 x float>, <4 x float>, i8, i32) diff --git a/llvm/test/CodeGen/X86/avx512vl-arith.ll b/llvm/test/CodeGen/X86/avx512vl-arith.ll index 1006c5625e26c..5fdb9c7f05e31 100644 --- a/llvm/test/CodeGen/X86/avx512vl-arith.ll +++ b/llvm/test/CodeGen/X86/avx512vl-arith.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=knl -mattr=+avx512vl --show-mc-encoding| FileCheck %s -; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=knl -mattr=+avx512vl,-evex512 --show-mc-encoding| FileCheck %s +; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=broadwell -mattr=+avx512f,+avx512cd,+evex512,-vzeroupper -mattr=+avx512vl --show-mc-encoding| FileCheck %s +; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=broadwell -mattr=+avx512f,+avx512cd,+evex512,-vzeroupper -mattr=+avx512vl,-evex512 --show-mc-encoding| FileCheck %s ; 256-bit diff --git a/llvm/test/CodeGen/X86/avx512vl-logic.ll b/llvm/test/CodeGen/X86/avx512vl-logic.ll index 58621967e2aca..531cc64faf0ec 100644 --- a/llvm/test/CodeGen/X86/avx512vl-logic.ll +++ b/llvm/test/CodeGen/X86/avx512vl-logic.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=knl -mattr=+avx512vl | FileCheck %s --check-prefix=CHECK --check-prefix=KNL +; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=broadwell -mattr=+avx512f,+avx512cd,+evex512,-vzeroupper -mattr=+avx512vl | FileCheck %s --check-prefix=CHECK --check-prefix=KNL ; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=skx -mattr=+avx512vl | FileCheck %s --check-prefix=CHECK --check-prefix=SKX ; 256-bit diff --git a/llvm/test/CodeGen/X86/avx512vl-mov.ll b/llvm/test/CodeGen/X86/avx512vl-mov.ll index a86c6a726f016..15da395570fda 100644 --- a/llvm/test/CodeGen/X86/avx512vl-mov.ll +++ b/llvm/test/CodeGen/X86/avx512vl-mov.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=knl -mattr=+avx512vl --show-mc-encoding| FileCheck %s +; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=broadwell -mattr=+avx512f,+avx512cd,+evex512,-vzeroupper -mattr=+avx512vl --show-mc-encoding| FileCheck %s define <8 x i32> @test_256_1(ptr %addr) { ; CHECK-LABEL: test_256_1: diff --git a/llvm/test/CodeGen/X86/avx512vl-vec-cmp.ll b/llvm/test/CodeGen/X86/avx512vl-vec-cmp.ll index 5b09e45b6fcf1..3f1087b6f8479 100644 --- a/llvm/test/CodeGen/X86/avx512vl-vec-cmp.ll +++ b/llvm/test/CodeGen/X86/avx512vl-vec-cmp.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=skx | FileCheck %s --check-prefix=VLX -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=knl | FileCheck %s --check-prefix=NoVLX +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=broadwell -mattr=+avx512f,+avx512cd,+evex512,-vzeroupper | FileCheck %s --check-prefix=NoVLX define <4 x i64> @test256_1(<4 x i64> %x, <4 x i64> %y) nounwind { ; VLX-LABEL: test256_1: diff --git a/llvm/test/CodeGen/X86/combine-select.ll b/llvm/test/CodeGen/X86/combine-select.ll index 7cbb9831ba9ad..9410088dd92be 100644 --- a/llvm/test/CodeGen/X86/combine-select.ll +++ b/llvm/test/CodeGen/X86/combine-select.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=knl | FileCheck %s +; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=broadwell -mattr=+avx512f,+avx512cd,+evex512,-vzeroupper | FileCheck %s define <4 x float> @select_mask_add_ss(<4 x float> %w, i8 zeroext %u, <4 x float> %a, <4 x float> %b) { ; CHECK-LABEL: select_mask_add_ss: diff --git a/llvm/test/CodeGen/X86/cpus-intel.ll b/llvm/test/CodeGen/X86/cpus-intel.ll index 5e4d09e081fec..1375ae4585640 100644 --- a/llvm/test/CodeGen/X86/cpus-intel.ll +++ b/llvm/test/CodeGen/X86/cpus-intel.ll @@ -88,9 +88,6 @@ ; RUN: llc < %s -o /dev/null -mtriple=x86_64-unknown-unknown -mcpu=goldmont-plus 2>&1 | FileCheck %s --check-prefix=CHECK-NO-ERROR --allow-empty ; RUN: llc < %s -o /dev/null -mtriple=x86_64-unknown-unknown -mcpu=goldmont_plus 2>&1 | FileCheck %s --check-prefix=CHECK-NO-ERROR --allow-empty ; RUN: llc < %s -o /dev/null -mtriple=x86_64-unknown-unknown -mcpu=tremont 2>&1 | FileCheck %s --check-prefix=CHECK-NO-ERROR --allow-empty -; RUN: llc < %s -o /dev/null -mtriple=x86_64-unknown-unknown -mcpu=knl 2>&1 | FileCheck %s --check-prefix=CHECK-NO-ERROR --allow-empty -; RUN: llc < %s -o /dev/null -mtriple=x86_64-unknown-unknown -mcpu=mic_avx512 2>&1 | FileCheck %s --check-prefix=CHECK-NO-ERROR --allow-empty -; RUN: llc < %s -o /dev/null -mtriple=x86_64-unknown-unknown -mcpu=knm 2>&1 | FileCheck %s --check-prefix=CHECK-NO-ERROR --allow-empty ; RUN: llc < %s -o /dev/null -mtriple=x86_64-unknown-unknown -mcpu=raptorlake 2>&1 | FileCheck %s --check-prefix=CHECK-NO-ERROR --allow-empty ; RUN: llc < %s -o /dev/null -mtriple=x86_64-unknown-unknown -mcpu=meteorlake 2>&1 | FileCheck %s --check-prefix=CHECK-NO-ERROR --allow-empty ; RUN: llc < %s -o /dev/null -mtriple=x86_64-unknown-unknown -mcpu=sierraforest 2>&1 | FileCheck %s --check-prefix=CHECK-NO-ERROR --allow-empty diff --git a/llvm/test/CodeGen/X86/i1narrowfail.ll b/llvm/test/CodeGen/X86/i1narrowfail.ll index 0bd091d4a412b..1b4df8c37cd06 100644 --- a/llvm/test/CodeGen/X86/i1narrowfail.ll +++ b/llvm/test/CodeGen/X86/i1narrowfail.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=knl | FileCheck %s +; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=broadwell -mattr=+avx512f,+avx512cd,+evex512,-vzeroupper | FileCheck %s define void @foo(ptr %ptr) { ; CHECK-LABEL: foo: diff --git a/llvm/test/CodeGen/X86/memcpy.ll b/llvm/test/CodeGen/X86/memcpy.ll index 6ec9b20163051..6b79d7b582c34 100644 --- a/llvm/test/CodeGen/X86/memcpy.ll +++ b/llvm/test/CodeGen/X86/memcpy.ll @@ -3,7 +3,7 @@ ; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -mcpu=core2 | FileCheck %s -check-prefix=LINUX ; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -mcpu=skylake | FileCheck %s -check-prefix=LINUX-SKL ; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -mcpu=skx | FileCheck %s -check-prefix=LINUX-SKX -; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -mcpu=knl | FileCheck %s -check-prefix=LINUX-KNL +; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -mcpu=broadwell -mattr=+avx512f,+avx512cd,+evex512,-vzeroupper | FileCheck %s -check-prefix=LINUX-KNL ; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -mattr=avx512bw | FileCheck %s -check-prefix=LINUX-AVX512BW declare void @llvm.memcpy.p0.p0.i64(ptr nocapture, ptr nocapture, i64, i1) nounwind diff --git a/llvm/test/CodeGen/X86/memset-zero.ll b/llvm/test/CodeGen/X86/memset-zero.ll index 8c33a22a57e99..b17f06424481a 100644 --- a/llvm/test/CodeGen/X86/memset-zero.ll +++ b/llvm/test/CodeGen/X86/memset-zero.ll @@ -4,7 +4,7 @@ ; RUN: llc < %s -mtriple=x86_64-unknown-linux -mcpu=nehalem | FileCheck %s --check-prefix=NEHALEM ; RUN: llc < %s -mtriple=x86_64-unknown-linux -mcpu=sandybridge | FileCheck %s --check-prefix=SANDYBRIDGE ; RUN: llc < %s -mtriple=x86_64-unknown-linux -mcpu=skylake | FileCheck %s --check-prefix=SKYLAKE -; RUN: llc < %s -mtriple=x86_64-unknown-linux -mcpu=knl | FileCheck %s --check-prefix=KNL +; RUN: llc < %s -mtriple=x86_64-unknown-linux -mcpu=broadwell -mattr=+avx512f,+avx512cd,+evex512,-vzeroupper | FileCheck %s --check-prefix=KNL declare void @llvm.memset.p0.i64(ptr nocapture, i8, i64, i1) nounwind diff --git a/llvm/test/CodeGen/X86/pku.ll b/llvm/test/CodeGen/X86/pku.ll index b6b2f98e29996..e6d93822412f8 100644 --- a/llvm/test/CodeGen/X86/pku.ll +++ b/llvm/test/CodeGen/X86/pku.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc < %s -mtriple=i686-apple-darwin -mcpu=knl --show-mc-encoding -verify-machineinstrs | FileCheck %s --check-prefixes=CHECK,X86 -; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=knl --show-mc-encoding -verify-machineinstrs | FileCheck %s --check-prefixes=CHECK,X64 +; RUN: llc < %s -mtriple=i686-apple-darwin -mcpu=broadwell -mattr=+avx512f,+avx512cd,+evex512,-vzeroupper --show-mc-encoding -verify-machineinstrs | FileCheck %s --check-prefixes=CHECK,X86 +; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=broadwell -mattr=+avx512f,+avx512cd,+evex512,-vzeroupper --show-mc-encoding -verify-machineinstrs | FileCheck %s --check-prefixes=CHECK,X64 declare i32 @llvm.x86.rdpkru() declare void @llvm.x86.wrpkru(i32) diff --git a/llvm/test/CodeGen/X86/pr32451.ll b/llvm/test/CodeGen/X86/pr32451.ll index 0abc87f832ee8..cec2a9a43b48d 100644 --- a/llvm/test/CodeGen/X86/pr32451.ll +++ b/llvm/test/CodeGen/X86/pr32451.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc < %s -fast-isel -mtriple=i686-unknown-unknown -O0 -mcpu=knl | FileCheck %s +; RUN: llc < %s -fast-isel -mtriple=i686-unknown-unknown -O0 -mcpu=broadwell -mattr=+avx512f,+avx512cd,+evex512,-vzeroupper | FileCheck %s ; ModuleID = 'convert' source_filename = "convert" diff --git a/llvm/test/CodeGen/X86/pr34139.ll b/llvm/test/CodeGen/X86/pr34139.ll index 93427e2e6cce2..472a6f166615d 100644 --- a/llvm/test/CodeGen/X86/pr34139.ll +++ b/llvm/test/CodeGen/X86/pr34139.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -mcpu=knl | FileCheck %s +; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -mcpu=broadwell -mattr=+avx512f,+avx512cd,+evex512,-vzeroupper | FileCheck %s define void @f_f(ptr %ptr) { ; CHECK-LABEL: f_f: diff --git a/llvm/test/CodeGen/X86/recip-fastmath.ll b/llvm/test/CodeGen/X86/recip-fastmath.ll index 7e9bbc5556424..98be0fba82010 100644 --- a/llvm/test/CodeGen/X86/recip-fastmath.ll +++ b/llvm/test/CodeGen/X86/recip-fastmath.ll @@ -7,7 +7,7 @@ ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=sandybridge | FileCheck %s --check-prefixes=AVX,SANDY ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=haswell | FileCheck %s --check-prefixes=AVX,HASWELL ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=haswell -mattr=-fma | FileCheck %s --check-prefixes=AVX,HASWELL-NO-FMA -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=knl | FileCheck %s --check-prefixes=AVX,AVX512,KNL +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=broadwell -mattr=+avx512f,+avx512cd,+evex512,-vzeroupper | FileCheck %s --check-prefixes=AVX,AVX512,KNL ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=skx | FileCheck %s --check-prefixes=AVX,AVX512,SKX ; If the target's divss/divps instructions are substantially diff --git a/llvm/test/CodeGen/X86/recip-fastmath2.ll b/llvm/test/CodeGen/X86/recip-fastmath2.ll index 2a5e46bba2c00..202cbd144ed04 100644 --- a/llvm/test/CodeGen/X86/recip-fastmath2.ll +++ b/llvm/test/CodeGen/X86/recip-fastmath2.ll @@ -7,7 +7,7 @@ ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=sandybridge | FileCheck %s --check-prefixes=AVX,SANDY ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=haswell | FileCheck %s --check-prefixes=AVX,HASWELL ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=haswell -mattr=-fma | FileCheck %s --check-prefixes=AVX,HASWELL-NO-FMA -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=knl | FileCheck %s --check-prefixes=AVX,AVX512,KNL +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=broadwell -mattr=+avx512f,+avx512cd,+evex512,-vzeroupper | FileCheck %s --check-prefixes=AVX,AVX512,KNL ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=skx | FileCheck %s --check-prefixes=AVX,AVX512,SKX ; It's the extra tests coverage for recip as discussed on D26855. diff --git a/llvm/test/CodeGen/X86/setcc-lowering.ll b/llvm/test/CodeGen/X86/setcc-lowering.ll index 705e48ca4c9c9..c315df214e359 100644 --- a/llvm/test/CodeGen/X86/setcc-lowering.ll +++ b/llvm/test/CodeGen/X86/setcc-lowering.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -mtriple=x86_64-unknown-unknown -mattr=+avx < %s | FileCheck %s --check-prefix=AVX -; RUN: llc -mtriple=i386-unknown-linux-gnu -mcpu=knl < %s | FileCheck %s --check-prefix=KNL-32 +; RUN: llc -mtriple=i386-unknown-linux-gnu -mcpu=broadwell -mattr=+avx512f,+avx512cd,+evex512,-vzeroupper < %s | FileCheck %s --check-prefix=KNL-32 ; Verify that we don't crash during codegen due to a wrong lowering diff --git a/llvm/test/CodeGen/X86/slow-pmulld.ll b/llvm/test/CodeGen/X86/slow-pmulld.ll index 8e330c3bfc676..a2785e170436d 100644 --- a/llvm/test/CodeGen/X86/slow-pmulld.ll +++ b/llvm/test/CodeGen/X86/slow-pmulld.ll @@ -13,8 +13,8 @@ ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512dq | FileCheck %s --check-prefixes=AVX2,AVX-64,AVX512-64,AVX512DQ-64 ; RUN: llc < %s -mtriple=i386-unknown-unknown -mattr=+avx512bw | FileCheck %s --check-prefixes=AVX2,AVX-32,AVX512-32,AVX512BW-32 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512bw | FileCheck %s --check-prefixes=AVX2,AVX-64,AVX512-64,AVX512BW-64 -; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=knl | FileCheck %s --check-prefixes=AVX2,AVX-32,AVX512-32,KNL-32 -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=knl | FileCheck %s --check-prefixes=AVX2,AVX-64,AVX512-64,KNL-64 +; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=broadwell -mattr=+avx512f,+avx512cd,+evex512,-vzeroupper | FileCheck %s --check-prefixes=AVX2,AVX-32,AVX512-32,KNL-32 +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=broadwell -mattr=+avx512f,+avx512cd,+evex512,-vzeroupper | FileCheck %s --check-prefixes=AVX2,AVX-64,AVX512-64,KNL-64 ; Make sure that the slow-pmulld feature can be used without SSE4.1. ; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=silvermont -mattr=-sse4.1 diff --git a/llvm/test/CodeGen/X86/slow-unaligned-mem.ll b/llvm/test/CodeGen/X86/slow-unaligned-mem.ll index d74d195439bda..e06fc2fae5611 100644 --- a/llvm/test/CodeGen/X86/slow-unaligned-mem.ll +++ b/llvm/test/CodeGen/X86/slow-unaligned-mem.ll @@ -21,7 +21,7 @@ ; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=ivybridge 2>&1 | FileCheck %s --check-prefixes=FAST,FAST-AVX128 ; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=haswell 2>&1 | FileCheck %s --check-prefixes=FAST,FAST-AVX256 ; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=broadwell 2>&1 | FileCheck %s --check-prefixes=FAST,FAST-AVX256 -; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=knl 2>&1 | FileCheck %s --check-prefixes=FAST,FAST-AVX512 +; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=broadwell -mattr=+avx512f,+avx512cd,+evex512,-vzeroupper 2>&1 | FileCheck %s --check-prefixes=FAST,FAST-AVX512 ; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=skylake-avx512 2>&1 | FileCheck %s --check-prefixes=FAST,FAST-AVX256 ; AMD chips with slow unaligned memory accesses diff --git a/llvm/test/CodeGen/X86/vector-shuffle-512-v32.ll b/llvm/test/CodeGen/X86/vector-shuffle-512-v32.ll index 4668d7b6870ef..372da59a5d8df 100644 --- a/llvm/test/CodeGen/X86/vector-shuffle-512-v32.ll +++ b/llvm/test/CodeGen/X86/vector-shuffle-512-v32.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=knl | FileCheck --check-prefixes=ALL,KNL %s +; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=broadwell -mattr=+avx512f,+avx512cd,+evex512,-vzeroupper | FileCheck --check-prefixes=ALL,KNL %s ; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=skx | FileCheck --check-prefixes=ALL,SKX %s target triple = "x86_64-unknown-unknown" diff --git a/llvm/test/CodeGen/X86/vselect-minmax.ll b/llvm/test/CodeGen/X86/vselect-minmax.ll index cb0542ca7cea8..4d553cc04f7a2 100644 --- a/llvm/test/CodeGen/X86/vselect-minmax.ll +++ b/llvm/test/CodeGen/X86/vselect-minmax.ll @@ -3,7 +3,7 @@ ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse4.2 | FileCheck %s --check-prefixes=SSE,SSE4 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefixes=AVX,AVX1 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 -mattr=+avx2 | FileCheck %s --check-prefixes=AVX,AVX2 -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=knl | FileCheck %s --check-prefixes=AVX,AVX512,AVX512F +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=broadwell -mattr=+avx512f,+avx512cd,+evex512,-vzeroupper | FileCheck %s --check-prefixes=AVX,AVX512,AVX512F ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=skx | FileCheck %s --check-prefixes=AVX,AVX512,AVX512BW define <16 x i8> @test1(<16 x i8> %a, <16 x i8> %b) { diff --git a/llvm/test/CodeGen/X86/x86-interrupt_cc.ll b/llvm/test/CodeGen/X86/x86-interrupt_cc.ll index cf8b7096816af..fdd2f824f2ff4 100644 --- a/llvm/test/CodeGen/X86/x86-interrupt_cc.ll +++ b/llvm/test/CodeGen/X86/x86-interrupt_cc.ll @@ -1,7 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -verify-machineinstrs -mtriple=x86_64-apple-macosx -show-mc-encoding -mcpu=knl < %s | FileCheck %s -check-prefix=CHECK64-KNL +; RUN: llc -verify-machineinstrs -mtriple=x86_64-apple-macosx -show-mc-encoding -mcpu=broadwell -mattr=+avx512f,+avx512cd,+evex512,-vzeroupper < %s | FileCheck %s -check-prefix=CHECK64-KNL ; RUN: llc -verify-machineinstrs -mtriple=x86_64-apple-macosx -show-mc-encoding -mcpu=skx < %s | FileCheck %s -check-prefix=CHECK64-SKX -; RUN: llc -verify-machineinstrs -mtriple=i386-apple-macosx -show-mc-encoding -mcpu=knl < %s | FileCheck %s -check-prefix=CHECK32-KNL +; RUN: llc -verify-machineinstrs -mtriple=i386-apple-macosx -show-mc-encoding -mcpu=broadwell -mattr=+avx512f,+avx512cd,+evex512,-vzeroupper < %s | FileCheck %s -check-prefix=CHECK32-KNL ; RUN: llc -verify-machineinstrs -mtriple=i386-apple-macosx -show-mc-encoding -mcpu=skx < %s | FileCheck %s -check-prefix=CHECK32-SKX ; Make sure we spill the high numbered zmm registers and K registers with the right encoding. diff --git a/llvm/test/CodeGen/X86/xaluo.ll b/llvm/test/CodeGen/X86/xaluo.ll index 5796e485f6314..e06f533293652 100644 --- a/llvm/test/CodeGen/X86/xaluo.ll +++ b/llvm/test/CodeGen/X86/xaluo.ll @@ -1,7 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -mtriple=x86_64-darwin-unknown < %s | FileCheck %s --check-prefixes=CHECK,SDAG,GENERIC ; RUN: llc -mtriple=x86_64-darwin-unknown -fast-isel -fast-isel-abort=1 < %s | FileCheck %s --check-prefixes=CHECK,FAST -; RUN: llc -mtriple=x86_64-darwin-unknown -mcpu=knl < %s | FileCheck %s --check-prefixes=CHECK,SDAG,KNL +; RUN: llc -mtriple=x86_64-darwin-unknown -mcpu=broadwell -mattr=+avx512f,+avx512cd,+evex512,-vzeroupper < %s | FileCheck %s --check-prefixes=CHECK,SDAG,KNL ; ; Get the actual value of the overflow bit. diff --git a/llvm/test/CodeGen/X86/xmulo.ll b/llvm/test/CodeGen/X86/xmulo.ll index 6eb34b4e773e8..b605307d39315 100644 --- a/llvm/test/CodeGen/X86/xmulo.ll +++ b/llvm/test/CodeGen/X86/xmulo.ll @@ -1,7 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -disable-peephole -mtriple=x86_64-linux-unknown < %s | FileCheck %s --check-prefixes=CHECK,LINUX,SDAG ; RUN: llc -disable-peephole -mtriple=x86_64-linux-unknown -fast-isel -fast-isel-abort=1 < %s | FileCheck %s --check-prefixes=CHECK,LINUX,FAST -; RUN: llc -disable-peephole -mtriple=x86_64-linux-unknown -mcpu=knl < %s | FileCheck %s --check-prefixes=CHECK,LINUX,SDAG +; RUN: llc -disable-peephole -mtriple=x86_64-linux-unknown -mcpu=broadwell -mattr=+avx512f,+avx512cd,+evex512,-vzeroupper < %s | FileCheck %s --check-prefixes=CHECK,LINUX,SDAG ; RUN: llc -disable-peephole -mtriple=x86_64-pc-win32 < %s | FileCheck %s --check-prefixes=CHECK,WIN64 ; RUN: llc -disable-peephole -mtriple=i386-pc-win32 < %s | FileCheck %s --check-prefix=WIN32 diff --git a/llvm/test/MC/Disassembler/X86/avx-512.txt b/llvm/test/MC/Disassembler/X86/avx-512.txt index 7c6f9d79ebd9b..7ebce9e391757 100644 --- a/llvm/test/MC/Disassembler/X86/avx-512.txt +++ b/llvm/test/MC/Disassembler/X86/avx-512.txt @@ -1,4 +1,4 @@ -# RUN: llvm-mc --disassemble %s -triple=x86_64-apple-darwin9 -mcpu=knl | FileCheck %s +# RUN: llvm-mc --disassemble %s -triple=x86_64-apple-darwin9 -mcpu=broadwell -mattr=+avx512f,+avx512cd,+evex512,-vzeroupper | FileCheck %s # RUN: llvm-mc --disassemble %s -triple=x86_64-apple-darwin9 -mcpu=skx | FileCheck --check-prefix=CHECK-SKX %s # CHECK: vpbroadcastd %xmm18, %zmm28 {%k7} {z} diff --git a/llvm/test/MC/X86/x86_long_nop.s b/llvm/test/MC/X86/x86_long_nop.s index 6136c3db9a3da..e427f730bf874 100644 --- a/llvm/test/MC/X86/x86_long_nop.s +++ b/llvm/test/MC/X86/x86_long_nop.s @@ -27,7 +27,7 @@ # RUN: llvm-mc -filetype=obj -arch=x86 -triple=i686-pc-linux-gnu -mcpu=broadwell %s | llvm-objdump -d --no-show-raw-insn - | FileCheck --check-prefix=LNOP15 %s # RUN: llvm-mc -filetype=obj -arch=x86 -triple=i686-pc-linux-gnu -mcpu=skylake %s | llvm-objdump -d --no-show-raw-insn - | FileCheck --check-prefix=LNOP15 %s # RUN: llvm-mc -filetype=obj -arch=x86 -triple=i686-pc-linux-gnu -mcpu=skx %s | llvm-objdump -d --no-show-raw-insn - | FileCheck --check-prefix=LNOP15 %s -# RUN: llvm-mc -filetype=obj -arch=x86 -triple=i686-pc-linux-gnu -mcpu=knl %s | llvm-objdump -d --no-show-raw-insn - | FileCheck --check-prefix=LNOP10 %s +# RUN: llvm-mc -filetype=obj -arch=x86 -triple=i686-pc-linux-gnu -mcpu=broadwell -mattr=+avx512f,+avx512cd,+evex512,-vzeroupper %s | llvm-objdump -d --no-show-raw-insn - | FileCheck --check-prefix=LNOP10 %s # RUN: llvm-mc -filetype=obj -arch=x86 -triple=i686-pc-linux-gnu -mcpu=knm %s | llvm-objdump -d --no-show-raw-insn - | FileCheck --check-prefix=LNOP10 %s # Ensure alignment directives also emit sequences of 10, 11 and 15-byte NOPs on processors diff --git a/llvm/test/Transforms/LoopVectorize/X86/gather_scatter.ll b/llvm/test/Transforms/LoopVectorize/X86/gather_scatter.ll index e9541c1ee035f..8d81c59a9b0ef 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/gather_scatter.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/gather_scatter.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt < %s -passes=loop-vectorize,simplifycfg -mcpu=knl -S | FileCheck %s -check-prefix=AVX512 -; RUN: opt < %s -passes=loop-vectorize,simplifycfg -mcpu=knl -force-vector-width=2 -force-target-max-vector-interleave=1 -S | FileCheck %s -check-prefix=FVW2 +; RUN: opt < %s -passes=loop-vectorize,simplifycfg -mcpu=broadwell -mattr=+avx512f,+avx512cd,+evex512,-vzeroupper -S | FileCheck %s -check-prefix=AVX512 +; RUN: opt < %s -passes=loop-vectorize,simplifycfg -mcpu=broadwell -mattr=+avx512f,+avx512cd,+evex512,-vzeroupper -force-vector-width=2 -force-target-max-vector-interleave=1 -S | FileCheck %s -check-prefix=FVW2 ; With a force-vector-width, it is sometimes more profitable to generate ; scalarized and predicated stores instead of masked scatter. Disable diff --git a/llvm/test/Transforms/LoopVectorize/X86/masked_load_store.ll b/llvm/test/Transforms/LoopVectorize/X86/masked_load_store.ll index 5cc4d43ec2e49..b48965e366d9d 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/masked_load_store.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/masked_load_store.ll @@ -1,7 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py ; RUN: opt < %s -passes=loop-vectorize -mcpu=corei7-avx -S | FileCheck %s -check-prefix=AVX -check-prefix=AVX1 ; RUN: opt < %s -passes=loop-vectorize -mcpu=core-avx2 -S | FileCheck %s -check-prefix=AVX -check-prefix=AVX2 -; RUN: opt < %s -passes=loop-vectorize -mcpu=knl -S | FileCheck %s -check-prefix=AVX512 +; RUN: opt < %s -passes=loop-vectorize -mcpu=broadwell -mattr=+avx512f,+avx512cd,+evex512,-vzeroupper -S | FileCheck %s -check-prefix=AVX512 target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" target triple = "x86_64-pc_linux" diff --git a/llvm/test/Transforms/SLPVectorizer/X86/alternate-calls-inseltpoison.ll b/llvm/test/Transforms/SLPVectorizer/X86/alternate-calls-inseltpoison.ll index 6c21cc1cfc5be..c216d57c5317c 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/alternate-calls-inseltpoison.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/alternate-calls-inseltpoison.ll @@ -3,7 +3,7 @@ ; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=slm -passes=slp-vectorizer,instcombine -S | FileCheck %s --check-prefixes=SLM ; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=corei7-avx -passes=slp-vectorizer,instcombine -S | FileCheck %s --check-prefixes=AVX ; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=core-avx2 -passes=slp-vectorizer,instcombine -S | FileCheck %s --check-prefixes=AVX2 -; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=knl -passes=slp-vectorizer,instcombine -S | FileCheck %s --check-prefixes=AVX2 +; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=broadwell -mattr=+avx512f,+avx512cd,+evex512,-vzeroupper -passes=slp-vectorizer,instcombine -S | FileCheck %s --check-prefixes=AVX2 ; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=skx -passes=slp-vectorizer,instcombine -S | FileCheck %s --check-prefixes=AVX2 define <8 x float> @ceil_floor(<8 x float> %a) { diff --git a/llvm/test/Transforms/SLPVectorizer/X86/alternate-calls.ll b/llvm/test/Transforms/SLPVectorizer/X86/alternate-calls.ll index bc5bcee361168..18363fe388725 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/alternate-calls.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/alternate-calls.ll @@ -3,7 +3,7 @@ ; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=slm -passes=slp-vectorizer,instcombine -S | FileCheck %s --check-prefixes=SLM ; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=corei7-avx -passes=slp-vectorizer,instcombine -S | FileCheck %s --check-prefixes=AVX ; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=core-avx2 -passes=slp-vectorizer,instcombine -S | FileCheck %s --check-prefixes=AVX2 -; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=knl -passes=slp-vectorizer,instcombine -S | FileCheck %s --check-prefixes=AVX2 +; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=broadwell -mattr=+avx512f,+avx512cd,+evex512,-vzeroupper -passes=slp-vectorizer,instcombine -S | FileCheck %s --check-prefixes=AVX2 ; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=skx -passes=slp-vectorizer,instcombine -S | FileCheck %s --check-prefixes=AVX2 define <8 x float> @ceil_floor(<8 x float> %a) { diff --git a/llvm/test/Transforms/SLPVectorizer/X86/alternate-cast-inseltpoison.ll b/llvm/test/Transforms/SLPVectorizer/X86/alternate-cast-inseltpoison.ll index e24c52ba81ddf..bc0f84b865ac8 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/alternate-cast-inseltpoison.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/alternate-cast-inseltpoison.ll @@ -3,7 +3,7 @@ ; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=slm -passes=slp-vectorizer,instcombine -S | FileCheck %s ; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=corei7-avx -passes=slp-vectorizer,instcombine -S | FileCheck %s ; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=core-avx2 -passes=slp-vectorizer,instcombine -S | FileCheck %s -; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=knl -passes=slp-vectorizer,instcombine -S | FileCheck %s +; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=broadwell -mattr=+avx512f,+avx512cd,+evex512,-vzeroupper -passes=slp-vectorizer,instcombine -S | FileCheck %s ; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=skx -passes=slp-vectorizer,instcombine -S | FileCheck %s define <8 x float> @sitofp_uitofp(<8 x i32> %a) { diff --git a/llvm/test/Transforms/SLPVectorizer/X86/alternate-cast.ll b/llvm/test/Transforms/SLPVectorizer/X86/alternate-cast.ll index 0f8751a6da7f5..03e31913872e8 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/alternate-cast.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/alternate-cast.ll @@ -3,7 +3,7 @@ ; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=slm -passes=slp-vectorizer,instcombine -S | FileCheck %s ; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=corei7-avx -passes=slp-vectorizer,instcombine -S | FileCheck %s ; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=core-avx2 -passes=slp-vectorizer,instcombine -S | FileCheck %s -; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=knl -passes=slp-vectorizer,instcombine -S | FileCheck %s +; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=broadwell -mattr=+avx512f,+avx512cd,+evex512,-vzeroupper -passes=slp-vectorizer,instcombine -S | FileCheck %s ; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=skx -passes=slp-vectorizer,instcombine -S | FileCheck %s define <8 x float> @sitofp_uitofp(<8 x i32> %a) { diff --git a/llvm/test/Transforms/SLPVectorizer/X86/alternate-fp-inseltpoison.ll b/llvm/test/Transforms/SLPVectorizer/X86/alternate-fp-inseltpoison.ll index 5a1de4f3e3d7f..9aa273d793510 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/alternate-fp-inseltpoison.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/alternate-fp-inseltpoison.ll @@ -3,7 +3,7 @@ ; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=slm -passes=slp-vectorizer,instcombine -S | FileCheck %s --check-prefix=CHECK --check-prefix=SLM ; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=corei7-avx -passes=slp-vectorizer,instcombine -S | FileCheck %s --check-prefix=CHECK --check-prefix=AVX ; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=core-avx2 -passes=slp-vectorizer,instcombine -S | FileCheck %s --check-prefix=CHECK --check-prefix=AVX -; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=knl -passes=slp-vectorizer,instcombine -S | FileCheck %s --check-prefix=CHECK --check-prefix=AVX512 +; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=broadwell -mattr=+avx512f,+avx512cd,+evex512,-vzeroupper -passes=slp-vectorizer,instcombine -S | FileCheck %s --check-prefix=CHECK --check-prefix=AVX512 ; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=skx -passes=slp-vectorizer,instcombine -S | FileCheck %s --check-prefix=CHECK --check-prefix=AVX512 define <8 x float> @fadd_fsub_v8f32(<8 x float> %a, <8 x float> %b) { diff --git a/llvm/test/Transforms/SLPVectorizer/X86/alternate-fp.ll b/llvm/test/Transforms/SLPVectorizer/X86/alternate-fp.ll index 046ed781f4c8d..be3b86cbdd9d8 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/alternate-fp.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/alternate-fp.ll @@ -3,7 +3,7 @@ ; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=slm -passes=slp-vectorizer,instcombine -S | FileCheck %s --check-prefix=CHECK --check-prefix=SLM ; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=corei7-avx -passes=slp-vectorizer,instcombine -S | FileCheck %s --check-prefix=CHECK --check-prefix=AVX ; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=core-avx2 -passes=slp-vectorizer,instcombine -S | FileCheck %s --check-prefix=CHECK --check-prefix=AVX -; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=knl -passes=slp-vectorizer,instcombine -S | FileCheck %s --check-prefix=CHECK --check-prefix=AVX512 +; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=broadwell -mattr=+avx512f,+avx512cd,+evex512,-vzeroupper -passes=slp-vectorizer,instcombine -S | FileCheck %s --check-prefix=CHECK --check-prefix=AVX512 ; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=skx -passes=slp-vectorizer,instcombine -S | FileCheck %s --check-prefix=CHECK --check-prefix=AVX512 define <8 x float> @fadd_fsub_v8f32(<8 x float> %a, <8 x float> %b) { diff --git a/llvm/test/Transforms/SLPVectorizer/X86/alternate-int-inseltpoison.ll b/llvm/test/Transforms/SLPVectorizer/X86/alternate-int-inseltpoison.ll index 8e878f3f8b80f..8aeead6a7bea0 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/alternate-int-inseltpoison.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/alternate-int-inseltpoison.ll @@ -3,7 +3,7 @@ ; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=slm -passes=slp-vectorizer,instcombine -S | FileCheck %s --check-prefixes=CHECK,SLM ; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=corei7-avx -passes=slp-vectorizer,instcombine -S | FileCheck %s --check-prefixes=CHECK,AVX1 ; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=core-avx2 -passes=slp-vectorizer,instcombine -S | FileCheck %s --check-prefixes=CHECK,AVX2 -; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=knl -passes=slp-vectorizer,instcombine -S | FileCheck %s --check-prefixes=CHECK,AVX512 +; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=broadwell -mattr=+avx512f,+avx512cd,+evex512,-vzeroupper -passes=slp-vectorizer,instcombine -S | FileCheck %s --check-prefixes=CHECK,AVX512 ; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=skx -passes=slp-vectorizer,instcombine -S | FileCheck %s --check-prefixes=CHECK,AVX512 define <8 x i32> @add_sub_v8i32(<8 x i32> %a, <8 x i32> %b) { diff --git a/llvm/test/Transforms/SLPVectorizer/X86/alternate-int.ll b/llvm/test/Transforms/SLPVectorizer/X86/alternate-int.ll index 0b17e19e4fadd..a2620181e3c22 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/alternate-int.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/alternate-int.ll @@ -3,7 +3,7 @@ ; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=slm -passes=slp-vectorizer,instcombine -S | FileCheck %s --check-prefixes=CHECK,SLM ; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=corei7-avx -passes=slp-vectorizer,instcombine -S | FileCheck %s --check-prefixes=CHECK,AVX1 ; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=core-avx2 -passes=slp-vectorizer,instcombine -S | FileCheck %s --check-prefixes=CHECK,AVX2 -; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=knl -passes=slp-vectorizer,instcombine -S | FileCheck %s --check-prefixes=CHECK,AVX512 +; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=broadwell -mattr=+avx512f,+avx512cd,+evex512,-vzeroupper -passes=slp-vectorizer,instcombine -S | FileCheck %s --check-prefixes=CHECK,AVX512 ; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=skx -passes=slp-vectorizer,instcombine -S | FileCheck %s --check-prefixes=CHECK,AVX512 define <8 x i32> @add_sub_v8i32(<8 x i32> %a, <8 x i32> %b) { diff --git a/llvm/test/Transforms/SLPVectorizer/X86/arith-abs.ll b/llvm/test/Transforms/SLPVectorizer/X86/arith-abs.ll index c37f260fad015..0fbef6ccfe3a1 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/arith-abs.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/arith-abs.ll @@ -3,7 +3,7 @@ ; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=slm -passes=slp-vectorizer -S | FileCheck %s --check-prefix=SLM ; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=corei7-avx -passes=slp-vectorizer -S | FileCheck %s --check-prefix=AVX ; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=core-avx2 -passes=slp-vectorizer -S | FileCheck %s --check-prefix=AVX -; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=knl -passes=slp-vectorizer -S | FileCheck %s --check-prefix=AVX512 +; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=broadwell -mattr=+avx512f,+avx512cd,+evex512,-vzeroupper -passes=slp-vectorizer -S | FileCheck %s --check-prefix=AVX512 ; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=skx -mattr=-prefer-256-bit -passes=slp-vectorizer -S | FileCheck %s --check-prefix=AVX512 ; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=skx -mattr=+prefer-256-bit -passes=slp-vectorizer -S | FileCheck %s --check-prefix=AVX diff --git a/llvm/test/Transforms/SLPVectorizer/X86/arith-add-saddo.ll b/llvm/test/Transforms/SLPVectorizer/X86/arith-add-saddo.ll index 8d7dd9b9621c8..57e5a5b8da287 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/arith-add-saddo.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/arith-add-saddo.ll @@ -3,7 +3,7 @@ ; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=slm -passes=slp-vectorizer -S | FileCheck %s ; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=corei7-avx -passes=slp-vectorizer -S | FileCheck %s ; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=core-avx2 -passes=slp-vectorizer -S | FileCheck %s -; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=knl -passes=slp-vectorizer -S | FileCheck %s +; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=broadwell -mattr=+avx512f,+avx512cd,+evex512,-vzeroupper -passes=slp-vectorizer -S | FileCheck %s ; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=skx -mattr=-prefer-256-bit -passes=slp-vectorizer -S | FileCheck %s ; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=skx -mattr=+prefer-256-bit -passes=slp-vectorizer -S | FileCheck %s diff --git a/llvm/test/Transforms/SLPVectorizer/X86/arith-add-ssat.ll b/llvm/test/Transforms/SLPVectorizer/X86/arith-add-ssat.ll index 24c5fcb068086..61a4810a0bcf8 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/arith-add-ssat.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/arith-add-ssat.ll @@ -3,7 +3,7 @@ ; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=slm -passes=slp-vectorizer -S | FileCheck %s --check-prefixes=SLM ; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=corei7-avx -passes=slp-vectorizer -S | FileCheck %s --check-prefixes=AVX,AVX1 ; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=core-avx2 -passes=slp-vectorizer -S | FileCheck %s --check-prefixes=AVX,AVX2 -; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=knl -passes=slp-vectorizer -S | FileCheck %s --check-prefixes=AVX512 +; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=broadwell -mattr=+avx512f,+avx512cd,+evex512,-vzeroupper -passes=slp-vectorizer -S | FileCheck %s --check-prefixes=AVX512 ; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=skx -mattr=-prefer-256-bit -passes=slp-vectorizer -S | FileCheck %s --check-prefixes=AVX512 ; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=skx -mattr=+prefer-256-bit -passes=slp-vectorizer -S | FileCheck %s --check-prefixes=AVX diff --git a/llvm/test/Transforms/SLPVectorizer/X86/arith-add-uaddo.ll b/llvm/test/Transforms/SLPVectorizer/X86/arith-add-uaddo.ll index fc67cec60f177..fbcb7f9c39463 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/arith-add-uaddo.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/arith-add-uaddo.ll @@ -3,7 +3,7 @@ ; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=slm -passes=slp-vectorizer -S | FileCheck %s ; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=corei7-avx -passes=slp-vectorizer -S | FileCheck %s ; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=core-avx2 -passes=slp-vectorizer -S | FileCheck %s -; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=knl -passes=slp-vectorizer -S | FileCheck %s +; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=broadwell -mattr=+avx512f,+avx512cd,+evex512,-vzeroupper -passes=slp-vectorizer -S | FileCheck %s ; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=skx -mattr=-prefer-256-bit -passes=slp-vectorizer -S | FileCheck %s ; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=skx -mattr=+prefer-256-bit -passes=slp-vectorizer -S | FileCheck %s diff --git a/llvm/test/Transforms/SLPVectorizer/X86/arith-add-usat.ll b/llvm/test/Transforms/SLPVectorizer/X86/arith-add-usat.ll index fab022d691c07..a0d3a3e92392e 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/arith-add-usat.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/arith-add-usat.ll @@ -3,7 +3,7 @@ ; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=slm -passes=slp-vectorizer -S | FileCheck %s --check-prefixes=SSE ; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=corei7-avx -passes=slp-vectorizer -S | FileCheck %s --check-prefixes=AVX ; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=core-avx2 -passes=slp-vectorizer -S | FileCheck %s --check-prefixes=AVX -; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=knl -passes=slp-vectorizer -S | FileCheck %s --check-prefixes=AVX512 +; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=broadwell -mattr=+avx512f,+avx512cd,+evex512,-vzeroupper -passes=slp-vectorizer -S | FileCheck %s --check-prefixes=AVX512 ; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=skx -mattr=-prefer-256-bit -passes=slp-vectorizer -S | FileCheck %s --check-prefixes=AVX512 ; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=skx -mattr=+prefer-256-bit -passes=slp-vectorizer -S | FileCheck %s --check-prefixes=AVX diff --git a/llvm/test/Transforms/SLPVectorizer/X86/arith-add.ll b/llvm/test/Transforms/SLPVectorizer/X86/arith-add.ll index dafed43e6e71c..b84d2a56a78e9 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/arith-add.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/arith-add.ll @@ -5,7 +5,7 @@ ; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=corei7-avx -mattr=+prefer-128-bit -passes=slp-vectorizer -S | FileCheck %s --check-prefix=SSE ; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=core-avx2 -mattr=-prefer-128-bit -passes=slp-vectorizer -S | FileCheck %s --check-prefix=AVX ; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=core-avx2 -mattr=+prefer-128-bit -passes=slp-vectorizer -S | FileCheck %s --check-prefix=SSE -; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=knl -passes=slp-vectorizer -S | FileCheck %s --check-prefix=AVX512 +; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=broadwell -mattr=+avx512f,+avx512cd,+evex512,-vzeroupper -passes=slp-vectorizer -S | FileCheck %s --check-prefix=AVX512 ; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=skx -mattr=-prefer-256-bit -passes=slp-vectorizer -S | FileCheck %s --check-prefix=AVX512 ; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=skx -mattr=+prefer-256-bit -passes=slp-vectorizer -S | FileCheck %s --check-prefix=AVX diff --git a/llvm/test/Transforms/SLPVectorizer/X86/arith-div.ll b/llvm/test/Transforms/SLPVectorizer/X86/arith-div.ll index 33fd3e6dc0e09..eabf866f57624 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/arith-div.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/arith-div.ll @@ -5,7 +5,7 @@ ; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=corei7-avx -mattr=+prefer-128-bit -passes=slp-vectorizer -S | FileCheck %s --check-prefix=SSE ; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=core-avx2 -mattr=-prefer-128-bit -passes=slp-vectorizer -S | FileCheck %s --check-prefix=AVX ; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=core-avx2 -mattr=+prefer-128-bit -passes=slp-vectorizer -S | FileCheck %s --check-prefix=SSE -; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=knl -passes=slp-vectorizer -S | FileCheck %s --check-prefix=AVX512 +; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=broadwell -mattr=+avx512f,+avx512cd,+evex512,-vzeroupper -passes=slp-vectorizer -S | FileCheck %s --check-prefix=AVX512 ; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=skx -mattr=-prefer-256-bit -passes=slp-vectorizer -S | FileCheck %s --check-prefix=AVX512 ; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=skx -mattr=+prefer-256-bit -passes=slp-vectorizer -S | FileCheck %s --check-prefix=AVX diff --git a/llvm/test/Transforms/SLPVectorizer/X86/arith-fix.ll b/llvm/test/Transforms/SLPVectorizer/X86/arith-fix.ll index e4c76daddb02e..57222f791f72e 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/arith-fix.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/arith-fix.ll @@ -3,7 +3,7 @@ ; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=slm -passes=slp-vectorizer -S | FileCheck %s --check-prefixes=SLM ; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=corei7-avx -passes=slp-vectorizer -S | FileCheck %s --check-prefixes=AVX,AVX1 ; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=core-avx2 -passes=slp-vectorizer -S | FileCheck %s --check-prefixes=AVX,AVX2 -; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=knl -passes=slp-vectorizer -S | FileCheck %s --check-prefixes=AVX512 +; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=broadwell -mattr=+avx512f,+avx512cd,+evex512,-vzeroupper -passes=slp-vectorizer -S | FileCheck %s --check-prefixes=AVX512 ; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=skx -mattr=-prefer-256-bit -passes=slp-vectorizer -S | FileCheck %s --check-prefixes=AVX512 ; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=skx -mattr=+prefer-256-bit -passes=slp-vectorizer -S | FileCheck %s --check-prefixes=AVX,AVX256BW diff --git a/llvm/test/Transforms/SLPVectorizer/X86/arith-fshl-rot.ll b/llvm/test/Transforms/SLPVectorizer/X86/arith-fshl-rot.ll index 7f986c74f207f..ec970e3761657 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/arith-fshl-rot.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/arith-fshl-rot.ll @@ -5,7 +5,7 @@ ; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=core-avx2 -passes=slp-vectorizer -S | FileCheck %s --check-prefixes=AVX,AVX2 ; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=skx -mattr=+prefer-256-bit -passes=slp-vectorizer -S | FileCheck %s --check-prefixes=AVX,AVX256 ; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=skx -mattr=-prefer-256-bit -passes=slp-vectorizer -S | FileCheck %s --check-prefixes=AVX512 -; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=knl -passes=slp-vectorizer -S | FileCheck %s --check-prefixes=AVX512 +; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=broadwell -mattr=+avx512f,+avx512cd,+evex512,-vzeroupper -passes=slp-vectorizer -S | FileCheck %s --check-prefixes=AVX512 @a64 = common global [8 x i64] zeroinitializer, align 64 @b64 = common global [8 x i64] zeroinitializer, align 64 diff --git a/llvm/test/Transforms/SLPVectorizer/X86/arith-fshl.ll b/llvm/test/Transforms/SLPVectorizer/X86/arith-fshl.ll index 5153dc34e7a4f..d5b6a27f115a0 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/arith-fshl.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/arith-fshl.ll @@ -5,7 +5,7 @@ ; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=core-avx2 -passes=slp-vectorizer -S | FileCheck %s --check-prefixes=AVX,AVX2 ; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=skx -mattr=+prefer-256-bit -passes=slp-vectorizer -S | FileCheck %s --check-prefixes=AVX,AVX256 ; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=skx -mattr=-prefer-256-bit -passes=slp-vectorizer -S | FileCheck %s --check-prefixes=AVX512 -; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=knl -passes=slp-vectorizer -S | FileCheck %s --check-prefixes=AVX512 +; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=broadwell -mattr=+avx512f,+avx512cd,+evex512,-vzeroupper -passes=slp-vectorizer -S | FileCheck %s --check-prefixes=AVX512 @a64 = common global [8 x i64] zeroinitializer, align 64 @b64 = common global [8 x i64] zeroinitializer, align 64 diff --git a/llvm/test/Transforms/SLPVectorizer/X86/arith-fshr-rot.ll b/llvm/test/Transforms/SLPVectorizer/X86/arith-fshr-rot.ll index 52c6f14f28e18..5db1290620778 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/arith-fshr-rot.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/arith-fshr-rot.ll @@ -5,7 +5,7 @@ ; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=core-avx2 -passes=slp-vectorizer -S | FileCheck %s --check-prefixes=AVX,AVX2 ; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=skx -mattr=+prefer-256-bit -passes=slp-vectorizer -S | FileCheck %s --check-prefixes=AVX,AVX256 ; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=skx -mattr=-prefer-256-bit -passes=slp-vectorizer -S | FileCheck %s --check-prefixes=AVX512 -; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=knl -passes=slp-vectorizer -S | FileCheck %s --check-prefixes=AVX512 +; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=broadwell -mattr=+avx512f,+avx512cd,+evex512,-vzeroupper -passes=slp-vectorizer -S | FileCheck %s --check-prefixes=AVX512 @a64 = common global [8 x i64] zeroinitializer, align 64 @b64 = common global [8 x i64] zeroinitializer, align 64 diff --git a/llvm/test/Transforms/SLPVectorizer/X86/arith-fshr.ll b/llvm/test/Transforms/SLPVectorizer/X86/arith-fshr.ll index b456742337abd..4edb6742f04ce 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/arith-fshr.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/arith-fshr.ll @@ -5,7 +5,7 @@ ; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=core-avx2 -passes=slp-vectorizer -S | FileCheck %s --check-prefixes=AVX,AVX2 ; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=skx -mattr=+prefer-256-bit -passes=slp-vectorizer -S | FileCheck %s --check-prefixes=AVX,AVX256 ; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=skx -mattr=-prefer-256-bit -passes=slp-vectorizer -S | FileCheck %s --check-prefixes=AVX512 -; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=knl -passes=slp-vectorizer -S | FileCheck %s --check-prefixes=AVX512 +; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=broadwell -mattr=+avx512f,+avx512cd,+evex512,-vzeroupper -passes=slp-vectorizer -S | FileCheck %s --check-prefixes=AVX512 @a64 = common global [8 x i64] zeroinitializer, align 64 @b64 = common global [8 x i64] zeroinitializer, align 64 diff --git a/llvm/test/Transforms/SLPVectorizer/X86/arith-mul-smulo.ll b/llvm/test/Transforms/SLPVectorizer/X86/arith-mul-smulo.ll index 72a3ddd0bb747..6cdec59304394 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/arith-mul-smulo.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/arith-mul-smulo.ll @@ -3,7 +3,7 @@ ; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=slm -passes=slp-vectorizer -S | FileCheck %s ; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=corei7-avx -passes=slp-vectorizer -S | FileCheck %s ; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=core-avx2 -passes=slp-vectorizer -S | FileCheck %s -; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=knl -passes=slp-vectorizer -S | FileCheck %s +; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=broadwell -mattr=+avx512f,+avx512cd,+evex512,-vzeroupper -passes=slp-vectorizer -S | FileCheck %s ; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=skx -mattr=-prefer-256-bit -passes=slp-vectorizer -S | FileCheck %s ; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=skx -mattr=+prefer-256-bit -passes=slp-vectorizer -S | FileCheck %s diff --git a/llvm/test/Transforms/SLPVectorizer/X86/arith-mul-umulo.ll b/llvm/test/Transforms/SLPVectorizer/X86/arith-mul-umulo.ll index 4126f06e8ca81..e2f62663f7718 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/arith-mul-umulo.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/arith-mul-umulo.ll @@ -3,7 +3,7 @@ ; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=slm -passes=slp-vectorizer -S | FileCheck %s ; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=corei7-avx -passes=slp-vectorizer -S | FileCheck %s ; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=core-avx2 -passes=slp-vectorizer -S | FileCheck %s -; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=knl -passes=slp-vectorizer -S | FileCheck %s +; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=broadwell -mattr=+avx512f,+avx512cd,+evex512,-vzeroupper -passes=slp-vectorizer -S | FileCheck %s ; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=skx -mattr=-prefer-256-bit -passes=slp-vectorizer -S | FileCheck %s ; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=skx -mattr=+prefer-256-bit -passes=slp-vectorizer -S | FileCheck %s diff --git a/llvm/test/Transforms/SLPVectorizer/X86/arith-mul.ll b/llvm/test/Transforms/SLPVectorizer/X86/arith-mul.ll index 94976a8cdee25..3678b0389f659 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/arith-mul.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/arith-mul.ll @@ -5,7 +5,7 @@ ; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=corei7-avx -mattr=-prefer-128-bit -passes=slp-vectorizer -S | FileCheck %s --check-prefix=AVX256 ; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=core-avx2 -mattr=+prefer-128-bit -passes=slp-vectorizer -S | FileCheck %s --check-prefix=AVX128 ; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=core-avx2 -mattr=-prefer-128-bit -passes=slp-vectorizer -S | FileCheck %s --check-prefix=AVX256 -; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=knl -passes=slp-vectorizer -S | FileCheck %s --check-prefix=AVX512 +; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=broadwell -mattr=+avx512f,+avx512cd,+evex512,-vzeroupper -passes=slp-vectorizer -S | FileCheck %s --check-prefix=AVX512 ; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=skx -mattr=-prefer-256-bit -passes=slp-vectorizer -S | FileCheck %s --check-prefix=AVX512 ; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=skx -mattr=+prefer-256-bit -passes=slp-vectorizer -S | FileCheck %s --check-prefix=AVX256 diff --git a/llvm/test/Transforms/SLPVectorizer/X86/arith-smax.ll b/llvm/test/Transforms/SLPVectorizer/X86/arith-smax.ll index c63b672f4187c..34470ecca3f0f 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/arith-smax.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/arith-smax.ll @@ -3,7 +3,7 @@ ; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=slm -passes=slp-vectorizer -S | FileCheck %s --check-prefix=SSE ; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=corei7-avx -passes=slp-vectorizer -S | FileCheck %s --check-prefix=AVX ; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=core-avx2 -passes=slp-vectorizer -S | FileCheck %s --check-prefix=AVX -; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=knl -passes=slp-vectorizer -S | FileCheck %s --check-prefix=AVX512 +; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=broadwell -mattr=+avx512f,+avx512cd,+evex512,-vzeroupper -passes=slp-vectorizer -S | FileCheck %s --check-prefix=AVX512 ; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=skx -mattr=-prefer-256-bit -passes=slp-vectorizer -S | FileCheck %s --check-prefix=AVX512 ; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=skx -mattr=+prefer-256-bit -passes=slp-vectorizer -S | FileCheck %s --check-prefix=AVX diff --git a/llvm/test/Transforms/SLPVectorizer/X86/arith-smin.ll b/llvm/test/Transforms/SLPVectorizer/X86/arith-smin.ll index 826f97f2a2d89..eddcfd0e22d6e 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/arith-smin.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/arith-smin.ll @@ -3,7 +3,7 @@ ; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=slm -passes=slp-vectorizer -S | FileCheck %s --check-prefix=SSE ; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=corei7-avx -passes=slp-vectorizer -S | FileCheck %s --check-prefix=AVX ; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=core-avx2 -passes=slp-vectorizer -S | FileCheck %s --check-prefix=AVX -; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=knl -passes=slp-vectorizer -S | FileCheck %s --check-prefix=AVX512 +; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=broadwell -mattr=+avx512f,+avx512cd,+evex512,-vzeroupper -passes=slp-vectorizer -S | FileCheck %s --check-prefix=AVX512 ; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=skx -mattr=-prefer-256-bit -passes=slp-vectorizer -S | FileCheck %s --check-prefix=AVX512 ; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=skx -mattr=+prefer-256-bit -passes=slp-vectorizer -S | FileCheck %s --check-prefix=AVX diff --git a/llvm/test/Transforms/SLPVectorizer/X86/arith-sub-ssat.ll b/llvm/test/Transforms/SLPVectorizer/X86/arith-sub-ssat.ll index afaab8b8ca642..7cf590955c991 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/arith-sub-ssat.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/arith-sub-ssat.ll @@ -3,7 +3,7 @@ ; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=slm -passes=slp-vectorizer -S | FileCheck %s --check-prefixes=SLM ; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=corei7-avx -passes=slp-vectorizer -S | FileCheck %s --check-prefixes=AVX,AVX1 ; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=core-avx2 -passes=slp-vectorizer -S | FileCheck %s --check-prefixes=AVX,AVX2 -; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=knl -passes=slp-vectorizer -S | FileCheck %s --check-prefixes=AVX512 +; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=broadwell -mattr=+avx512f,+avx512cd,+evex512,-vzeroupper -passes=slp-vectorizer -S | FileCheck %s --check-prefixes=AVX512 ; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=skx -mattr=-prefer-256-bit -passes=slp-vectorizer -S | FileCheck %s --check-prefixes=AVX512 ; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=skx -mattr=+prefer-256-bit -passes=slp-vectorizer -S | FileCheck %s --check-prefixes=AVX diff --git a/llvm/test/Transforms/SLPVectorizer/X86/arith-sub-ssubo.ll b/llvm/test/Transforms/SLPVectorizer/X86/arith-sub-ssubo.ll index d628dddd16cb1..1c265a057f118 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/arith-sub-ssubo.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/arith-sub-ssubo.ll @@ -3,7 +3,7 @@ ; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=slm -passes=slp-vectorizer -S | FileCheck %s ; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=corei7-avx -passes=slp-vectorizer -S | FileCheck %s ; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=core-avx2 -passes=slp-vectorizer -S | FileCheck %s -; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=knl -passes=slp-vectorizer -S | FileCheck %s +; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=broadwell -mattr=+avx512f,+avx512cd,+evex512,-vzeroupper -passes=slp-vectorizer -S | FileCheck %s ; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=skx -mattr=-prefer-256-bit -passes=slp-vectorizer -S | FileCheck %s ; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=skx -mattr=+prefer-256-bit -passes=slp-vectorizer -S | FileCheck %s diff --git a/llvm/test/Transforms/SLPVectorizer/X86/arith-sub-usat.ll b/llvm/test/Transforms/SLPVectorizer/X86/arith-sub-usat.ll index 3510863c88930..ffdb05e5e4383 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/arith-sub-usat.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/arith-sub-usat.ll @@ -3,7 +3,7 @@ ; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=slm -passes=slp-vectorizer -S | FileCheck %s --check-prefixes=SSE ; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=corei7-avx -passes=slp-vectorizer -S | FileCheck %s --check-prefixes=AVX ; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=core-avx2 -passes=slp-vectorizer -S | FileCheck %s --check-prefixes=AVX -; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=knl -passes=slp-vectorizer -S | FileCheck %s --check-prefixes=AVX512 +; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=broadwell -mattr=+avx512f,+avx512cd,+evex512,-vzeroupper -passes=slp-vectorizer -S | FileCheck %s --check-prefixes=AVX512 ; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=skx -mattr=-prefer-256-bit -passes=slp-vectorizer -S | FileCheck %s --check-prefixes=AVX512 ; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=skx -mattr=+prefer-256-bit -passes=slp-vectorizer -S | FileCheck %s --check-prefixes=AVX diff --git a/llvm/test/Transforms/SLPVectorizer/X86/arith-sub-usubo.ll b/llvm/test/Transforms/SLPVectorizer/X86/arith-sub-usubo.ll index 11a68a5dfbcca..ccf9a4651dfca 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/arith-sub-usubo.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/arith-sub-usubo.ll @@ -3,7 +3,7 @@ ; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=slm -passes=slp-vectorizer -S | FileCheck %s ; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=corei7-avx -passes=slp-vectorizer -S | FileCheck %s ; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=core-avx2 -passes=slp-vectorizer -S | FileCheck %s -; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=knl -passes=slp-vectorizer -S | FileCheck %s +; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=broadwell -mattr=+avx512f,+avx512cd,+evex512,-vzeroupper -passes=slp-vectorizer -S | FileCheck %s ; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=skx -mattr=-prefer-256-bit -passes=slp-vectorizer -S | FileCheck %s ; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=skx -mattr=+prefer-256-bit -passes=slp-vectorizer -S | FileCheck %s diff --git a/llvm/test/Transforms/SLPVectorizer/X86/arith-sub.ll b/llvm/test/Transforms/SLPVectorizer/X86/arith-sub.ll index be54c1e04ca39..5028999d5f25f 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/arith-sub.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/arith-sub.ll @@ -5,7 +5,7 @@ ; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=corei7-avx -mattr=+prefer-128-bit -passes=slp-vectorizer -S | FileCheck %s --check-prefix=SSE ; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=core-avx2 -mattr=-prefer-128-bit -passes=slp-vectorizer -S | FileCheck %s --check-prefix=AVX ; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=core-avx2 -mattr=+prefer-128-bit -passes=slp-vectorizer -S | FileCheck %s --check-prefix=SSE -; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=knl -passes=slp-vectorizer -S | FileCheck %s --check-prefix=AVX512 +; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=broadwell -mattr=+avx512f,+avx512cd,+evex512,-vzeroupper -passes=slp-vectorizer -S | FileCheck %s --check-prefix=AVX512 ; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=skx -mattr=-prefer-256-bit -passes=slp-vectorizer -S | FileCheck %s --check-prefix=AVX512 ; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=skx -mattr=+prefer-256-bit -passes=slp-vectorizer -S | FileCheck %s --check-prefix=AVX diff --git a/llvm/test/Transforms/SLPVectorizer/X86/arith-umax.ll b/llvm/test/Transforms/SLPVectorizer/X86/arith-umax.ll index 3a187930055f0..0f9b07e03b90f 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/arith-umax.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/arith-umax.ll @@ -3,7 +3,7 @@ ; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=slm -passes=slp-vectorizer -S | FileCheck %s --check-prefix=SSE ; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=corei7-avx -passes=slp-vectorizer -S | FileCheck %s --check-prefix=AVX ; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=core-avx2 -passes=slp-vectorizer -S | FileCheck %s --check-prefix=AVX -; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=knl -passes=slp-vectorizer -S | FileCheck %s --check-prefix=AVX512 +; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=broadwell -mattr=+avx512f,+avx512cd,+evex512,-vzeroupper -passes=slp-vectorizer -S | FileCheck %s --check-prefix=AVX512 ; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=skx -mattr=-prefer-256-bit -passes=slp-vectorizer -S | FileCheck %s --check-prefix=AVX512 ; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=skx -mattr=+prefer-256-bit -passes=slp-vectorizer -S | FileCheck %s --check-prefix=AVX diff --git a/llvm/test/Transforms/SLPVectorizer/X86/arith-umin.ll b/llvm/test/Transforms/SLPVectorizer/X86/arith-umin.ll index 15119a9628067..7102e6de4a083 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/arith-umin.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/arith-umin.ll @@ -3,7 +3,7 @@ ; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=slm -passes=slp-vectorizer -S | FileCheck %s --check-prefix=SSE ; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=corei7-avx -passes=slp-vectorizer -S | FileCheck %s --check-prefix=AVX ; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=core-avx2 -passes=slp-vectorizer -S | FileCheck %s --check-prefix=AVX -; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=knl -passes=slp-vectorizer -S | FileCheck %s --check-prefix=AVX512 +; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=broadwell -mattr=+avx512f,+avx512cd,+evex512,-vzeroupper -passes=slp-vectorizer -S | FileCheck %s --check-prefix=AVX512 ; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=skx -mattr=-prefer-256-bit -passes=slp-vectorizer -S | FileCheck %s --check-prefix=AVX512 ; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=skx -mattr=+prefer-256-bit -passes=slp-vectorizer -S | FileCheck %s --check-prefix=AVX diff --git a/llvm/test/Transforms/SLPVectorizer/X86/hadd-inseltpoison.ll b/llvm/test/Transforms/SLPVectorizer/X86/hadd-inseltpoison.ll index 4a9f717918a02..21a82cf169d11 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/hadd-inseltpoison.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/hadd-inseltpoison.ll @@ -3,7 +3,7 @@ ; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=slm -passes=slp-vectorizer,instcombine -S | FileCheck %s --check-prefixes=CHECK,SLM ; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=corei7-avx -passes=slp-vectorizer,instcombine -S | FileCheck %s --check-prefixes=CHECK,AVX,AVX1 ; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=core-avx2 -passes=slp-vectorizer,instcombine -S | FileCheck %s --check-prefixes=CHECK,AVX,AVX2 -; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=knl -passes=slp-vectorizer,instcombine -S | FileCheck %s --check-prefixes=CHECK,AVX,AVX512 +; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=broadwell -mattr=+avx512f,+avx512cd,+evex512,-vzeroupper -passes=slp-vectorizer,instcombine -S | FileCheck %s --check-prefixes=CHECK,AVX,AVX512 ; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=skx -passes=slp-vectorizer,instcombine -S | FileCheck %s --check-prefixes=CHECK,AVX,AVX512 ; diff --git a/llvm/test/Transforms/SLPVectorizer/X86/hadd.ll b/llvm/test/Transforms/SLPVectorizer/X86/hadd.ll index 31e3e6aa0a833..0efa6e021d001 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/hadd.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/hadd.ll @@ -3,7 +3,7 @@ ; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=slm -passes=slp-vectorizer,instcombine -S | FileCheck %s --check-prefixes=CHECK,SLM ; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=corei7-avx -passes=slp-vectorizer,instcombine -S | FileCheck %s --check-prefixes=CHECK,AVX,AVX1 ; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=core-avx2 -passes=slp-vectorizer,instcombine -S | FileCheck %s --check-prefixes=CHECK,AVX,AVX2 -; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=knl -passes=slp-vectorizer,instcombine -S | FileCheck %s --check-prefixes=CHECK,AVX,AVX512 +; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=broadwell -mattr=+avx512f,+avx512cd,+evex512,-vzeroupper -passes=slp-vectorizer,instcombine -S | FileCheck %s --check-prefixes=CHECK,AVX,AVX512 ; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=skx -passes=slp-vectorizer,instcombine -S | FileCheck %s --check-prefixes=CHECK,AVX,AVX512 ; diff --git a/llvm/test/Transforms/SLPVectorizer/X86/hsub-inseltpoison.ll b/llvm/test/Transforms/SLPVectorizer/X86/hsub-inseltpoison.ll index 40b6a8c32f5d0..d14facddab992 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/hsub-inseltpoison.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/hsub-inseltpoison.ll @@ -3,7 +3,7 @@ ; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=slm -passes=slp-vectorizer,instcombine -S | FileCheck %s --check-prefixes=CHECK,SLM ; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=corei7-avx -passes=slp-vectorizer,instcombine -S | FileCheck %s --check-prefixes=CHECK,AVX,AVX1 ; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=core-avx2 -passes=slp-vectorizer,instcombine -S | FileCheck %s --check-prefixes=CHECK,AVX,AVX2 -; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=knl -passes=slp-vectorizer,instcombine -S | FileCheck %s --check-prefixes=CHECK,AVX,AVX512 +; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=broadwell -mattr=+avx512f,+avx512cd,+evex512,-vzeroupper -passes=slp-vectorizer,instcombine -S | FileCheck %s --check-prefixes=CHECK,AVX,AVX512 ; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=skx -passes=slp-vectorizer,instcombine -S | FileCheck %s --check-prefixes=CHECK,AVX,AVX512 ; diff --git a/llvm/test/Transforms/SLPVectorizer/X86/hsub.ll b/llvm/test/Transforms/SLPVectorizer/X86/hsub.ll index 09113323d3ab7..3a8d35d4d09eb 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/hsub.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/hsub.ll @@ -3,7 +3,7 @@ ; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=slm -passes=slp-vectorizer,instcombine -S | FileCheck %s --check-prefixes=CHECK,SLM ; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=corei7-avx -passes=slp-vectorizer,instcombine -S | FileCheck %s --check-prefixes=CHECK,AVX,AVX1 ; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=core-avx2 -passes=slp-vectorizer,instcombine -S | FileCheck %s --check-prefixes=CHECK,AVX,AVX2 -; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=knl -passes=slp-vectorizer,instcombine -S | FileCheck %s --check-prefixes=CHECK,AVX,AVX512 +; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=broadwell -mattr=+avx512f,+avx512cd,+evex512,-vzeroupper -passes=slp-vectorizer,instcombine -S | FileCheck %s --check-prefixes=CHECK,AVX,AVX512 ; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=skx -passes=slp-vectorizer,instcombine -S | FileCheck %s --check-prefixes=CHECK,AVX,AVX512 ; diff --git a/llvm/test/Transforms/SLPVectorizer/X86/sext-inseltpoison.ll b/llvm/test/Transforms/SLPVectorizer/X86/sext-inseltpoison.ll index 5ae0ad932fddd..71c6586857633 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/sext-inseltpoison.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/sext-inseltpoison.ll @@ -3,7 +3,7 @@ ; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=slm -passes=slp-vectorizer -S | FileCheck %s --check-prefixes=SSE,SLM ; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=corei7-avx -passes=slp-vectorizer -S | FileCheck %s --check-prefixes=AVX ; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=core-avx2 -passes=slp-vectorizer -S | FileCheck %s --check-prefixes=AVX -; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=knl -passes=slp-vectorizer -S | FileCheck %s --check-prefixes=AVX +; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=broadwell -mattr=+avx512f,+avx512cd,+evex512,-vzeroupper -passes=slp-vectorizer -S | FileCheck %s --check-prefixes=AVX ; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=skx -mattr=+avx512bw -passes=slp-vectorizer -S | FileCheck %s --check-prefixes=AVX ; diff --git a/llvm/test/Transforms/SLPVectorizer/X86/sext.ll b/llvm/test/Transforms/SLPVectorizer/X86/sext.ll index 7d38aeb0c3635..a5751c31780db 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/sext.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/sext.ll @@ -3,7 +3,7 @@ ; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=slm -passes=slp-vectorizer -S | FileCheck %s --check-prefixes=SSE,SLM ; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=corei7-avx -passes=slp-vectorizer -S | FileCheck %s --check-prefixes=AVX ; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=core-avx2 -passes=slp-vectorizer -S | FileCheck %s --check-prefixes=AVX -; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=knl -passes=slp-vectorizer -S | FileCheck %s --check-prefixes=AVX +; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=broadwell -mattr=+avx512f,+avx512cd,+evex512,-vzeroupper -passes=slp-vectorizer -S | FileCheck %s --check-prefixes=AVX ; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=skx -mattr=+avx512bw -passes=slp-vectorizer -S | FileCheck %s --check-prefixes=AVX ; diff --git a/llvm/test/Transforms/SLPVectorizer/X86/shift-ashr.ll b/llvm/test/Transforms/SLPVectorizer/X86/shift-ashr.ll index 51798deae694a..806ca1c2efc5e 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/shift-ashr.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/shift-ashr.ll @@ -2,7 +2,7 @@ ; RUN: opt < %s -mtriple=x86_64-unknown -passes=slp-vectorizer -S | FileCheck %s --check-prefix=SSE ; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=corei7-avx -passes=slp-vectorizer -S | FileCheck %s --check-prefixes=AVX,AVX1 ; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=core-avx2 -passes=slp-vectorizer -S | FileCheck %s --check-prefixes=AVX,AVX2 -; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=knl -passes=slp-vectorizer -S | FileCheck %s --check-prefix=AVX512 +; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=broadwell -mattr=+avx512f,+avx512cd,+evex512,-vzeroupper -passes=slp-vectorizer -S | FileCheck %s --check-prefix=AVX512 ; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=skx -mattr=-prefer-256-bit -passes=slp-vectorizer -S | FileCheck %s --check-prefix=AVX512 ; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=skx -mattr=+prefer-256-bit -passes=slp-vectorizer -S | FileCheck %s --check-prefixes=AVX,AVX2 ; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=bdver4 -passes=slp-vectorizer -S | FileCheck %s --check-prefix=XOP diff --git a/llvm/test/Transforms/SLPVectorizer/X86/shift-lshr.ll b/llvm/test/Transforms/SLPVectorizer/X86/shift-lshr.ll index 7583561bbecf9..df597b18646c1 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/shift-lshr.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/shift-lshr.ll @@ -2,7 +2,7 @@ ; RUN: opt < %s -mtriple=x86_64-unknown -passes=slp-vectorizer -S | FileCheck %s --check-prefix=SSE ; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=corei7-avx -passes=slp-vectorizer -S | FileCheck %s --check-prefixes=AVX ; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=core-avx2 -passes=slp-vectorizer -S | FileCheck %s --check-prefixes=AVX -; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=knl -passes=slp-vectorizer -S | FileCheck %s --check-prefix=AVX512 +; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=broadwell -mattr=+avx512f,+avx512cd,+evex512,-vzeroupper -passes=slp-vectorizer -S | FileCheck %s --check-prefix=AVX512 ; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=skx -mattr=-prefer-256-bit -passes=slp-vectorizer -S | FileCheck %s --check-prefix=AVX512 ; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=skx -mattr=+prefer-256-bit -passes=slp-vectorizer -S | FileCheck %s --check-prefixes=AVX ; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=bdver4 -passes=slp-vectorizer -S | FileCheck %s --check-prefix=XOP diff --git a/llvm/test/Transforms/SLPVectorizer/X86/shift-shl.ll b/llvm/test/Transforms/SLPVectorizer/X86/shift-shl.ll index 5ec327c131fb7..dc8f37ea21118 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/shift-shl.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/shift-shl.ll @@ -2,7 +2,7 @@ ; RUN: opt < %s -mtriple=x86_64-unknown -passes=slp-vectorizer -S | FileCheck %s --check-prefix=SSE ; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=corei7-avx -passes=slp-vectorizer -S | FileCheck %s --check-prefixes=AVX ; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=core-avx2 -passes=slp-vectorizer -S | FileCheck %s --check-prefixes=AVX -; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=knl -passes=slp-vectorizer -S | FileCheck %s --check-prefix=AVX512 +; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=broadwell -mattr=+avx512f,+avx512cd,+evex512,-vzeroupper -passes=slp-vectorizer -S | FileCheck %s --check-prefix=AVX512 ; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=skx -mattr=-prefer-256-bit -passes=slp-vectorizer -S | FileCheck %s --check-prefix=AVX512 ; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=skx -mattr=+prefer-256-bit -passes=slp-vectorizer -S | FileCheck %s --check-prefixes=AVX ; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=bdver4 -passes=slp-vectorizer -S | FileCheck %s --check-prefix=XOP diff --git a/llvm/test/Transforms/SLPVectorizer/X86/zext-inseltpoison.ll b/llvm/test/Transforms/SLPVectorizer/X86/zext-inseltpoison.ll index d1f6c41e5c30e..ea98d3cd0917d 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/zext-inseltpoison.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/zext-inseltpoison.ll @@ -3,7 +3,7 @@ ; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=slm -passes=slp-vectorizer -S | FileCheck %s --check-prefixes=SLM ; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=corei7-avx -passes=slp-vectorizer -S | FileCheck %s --check-prefixes=AVX ; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=core-avx2 -passes=slp-vectorizer -S | FileCheck %s --check-prefixes=AVX -; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=knl -passes=slp-vectorizer -S | FileCheck %s --check-prefixes=AVX +; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=broadwell -mattr=+avx512f,+avx512cd,+evex512,-vzeroupper -passes=slp-vectorizer -S | FileCheck %s --check-prefixes=AVX ; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=skx -mattr=+avx512bw -passes=slp-vectorizer -S | FileCheck %s --check-prefixes=AVX ; diff --git a/llvm/test/Transforms/SLPVectorizer/X86/zext.ll b/llvm/test/Transforms/SLPVectorizer/X86/zext.ll index 829e4bab20ffa..b881d489daf9d 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/zext.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/zext.ll @@ -3,7 +3,7 @@ ; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=slm -passes=slp-vectorizer -S | FileCheck %s --check-prefixes=SLM ; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=corei7-avx -passes=slp-vectorizer -S | FileCheck %s --check-prefixes=AVX ; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=core-avx2 -passes=slp-vectorizer -S | FileCheck %s --check-prefixes=AVX -; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=knl -passes=slp-vectorizer -S | FileCheck %s --check-prefixes=AVX +; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=broadwell -mattr=+avx512f,+avx512cd,+evex512,-vzeroupper -passes=slp-vectorizer -S | FileCheck %s --check-prefixes=AVX ; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=skx -mattr=+avx512bw -passes=slp-vectorizer -S | FileCheck %s --check-prefixes=AVX ; diff --git a/llvm/test/tools/llvm-mca/X86/cpus.s b/llvm/test/tools/llvm-mca/X86/cpus.s index 38e0365dc80eb..9db78a75dc2d5 100644 --- a/llvm/test/tools/llvm-mca/X86/cpus.s +++ b/llvm/test/tools/llvm-mca/X86/cpus.s @@ -10,7 +10,7 @@ # RUN: llvm-mca %s -mtriple=x86_64-unknown-unknown -mcpu=ivybridge -resource-pressure=false -instruction-info=false < %s | FileCheck --check-prefixes=ALL,IVYBRIDGE %s # RUN: llvm-mca %s -mtriple=x86_64-unknown-unknown -mcpu=haswell -resource-pressure=false -instruction-info=false < %s | FileCheck --check-prefixes=ALL,HASWELL %s # RUN: llvm-mca %s -mtriple=x86_64-unknown-unknown -mcpu=broadwell -resource-pressure=false -instruction-info=false < %s | FileCheck --check-prefixes=ALL,BROADWELL %s -# RUN: llvm-mca %s -mtriple=x86_64-unknown-unknown -mcpu=knl -resource-pressure=false -instruction-info=false < %s | FileCheck --check-prefixes=ALL,KNL %s +# RUN: llvm-mca %s -mtriple=x86_64-unknown-unknown -mcpu=broadwell -mattr=+avx512f,+avx512cd,+evex512,-vzeroupper -resource-pressure=false -instruction-info=false < %s | FileCheck --check-prefixes=ALL,KNL %s # RUN: llvm-mca %s -mtriple=x86_64-unknown-unknown -mcpu=skylake -resource-pressure=false -instruction-info=false < %s | FileCheck --check-prefixes=ALL,SKX %s # RUN: llvm-mca %s -mtriple=x86_64-unknown-unknown -mcpu=skylake-avx512 -resource-pressure=false -instruction-info=false < %s | FileCheck --check-prefixes=ALL,SKX-AVX512 %s # RUN: llvm-mca %s -mtriple=x86_64-unknown-unknown -mcpu=icelake-client -resource-pressure=false -instruction-info=false < %s | FileCheck --check-prefixes=ALL,ICX %s diff --git a/llvm/test/tools/llvm-mca/X86/register-file-statistics.s b/llvm/test/tools/llvm-mca/X86/register-file-statistics.s index fd90a6ec59977..3613b29cf6303 100644 --- a/llvm/test/tools/llvm-mca/X86/register-file-statistics.s +++ b/llvm/test/tools/llvm-mca/X86/register-file-statistics.s @@ -11,7 +11,7 @@ # RUN: llvm-mca %s -mtriple=x86_64-unknown-unknown -mcpu=ivybridge -iterations=1 -all-stats=false -all-views=false -register-file-stats < %s | FileCheck --check-prefixes=ALL %s # RUN: llvm-mca %s -mtriple=x86_64-unknown-unknown -mcpu=haswell -iterations=1 -all-stats=false -all-views=false -register-file-stats < %s | FileCheck --check-prefixes=ALL %s # RUN: llvm-mca %s -mtriple=x86_64-unknown-unknown -mcpu=broadwell -iterations=1 -all-stats=false -all-views=false -register-file-stats < %s | FileCheck --check-prefixes=ALL %s -# RUN: llvm-mca %s -mtriple=x86_64-unknown-unknown -mcpu=knl -iterations=1 -all-stats=false -all-views=false -register-file-stats < %s | FileCheck --check-prefixes=ALL %s +# RUN: llvm-mca %s -mtriple=x86_64-unknown-unknown -mcpu=broadwell -mattr=+avx512f,+avx512cd,+evex512,-vzeroupper -iterations=1 -all-stats=false -all-views=false -register-file-stats < %s | FileCheck --check-prefixes=ALL %s # RUN: llvm-mca %s -mtriple=x86_64-unknown-unknown -mcpu=skylake -iterations=1 -all-stats=false -all-views=false -register-file-stats < %s | FileCheck --check-prefixes=ALL %s # RUN: llvm-mca %s -mtriple=x86_64-unknown-unknown -mcpu=skylake-avx512 -iterations=1 -all-stats=false -all-views=false -register-file-stats < %s | FileCheck --check-prefixes=ALL %s # RUN: llvm-mca %s -mtriple=x86_64-unknown-unknown -mcpu=icelake-client -iterations=1 -all-stats=false -all-views=false -register-file-stats < %s | FileCheck --check-prefixes=ALL %s diff --git a/llvm/test/tools/llvm-mca/X86/scheduler-queue-usage.s b/llvm/test/tools/llvm-mca/X86/scheduler-queue-usage.s index 2d5c0483de7df..1f70a2628f9f0 100644 --- a/llvm/test/tools/llvm-mca/X86/scheduler-queue-usage.s +++ b/llvm/test/tools/llvm-mca/X86/scheduler-queue-usage.s @@ -10,7 +10,7 @@ # RUN: llvm-mca %s -mtriple=x86_64-unknown-unknown -mcpu=ivybridge -iterations=1 -all-stats=false -all-views=false -scheduler-stats < %s | FileCheck --check-prefixes=ALL,IVB %s # RUN: llvm-mca %s -mtriple=x86_64-unknown-unknown -mcpu=haswell -iterations=1 -all-stats=false -all-views=false -scheduler-stats < %s | FileCheck --check-prefixes=ALL,HSW %s # RUN: llvm-mca %s -mtriple=x86_64-unknown-unknown -mcpu=broadwell -iterations=1 -all-stats=false -all-views=false -scheduler-stats < %s | FileCheck --check-prefixes=ALL,BDW %s -# RUN: llvm-mca %s -mtriple=x86_64-unknown-unknown -mcpu=knl -iterations=1 -all-stats=false -all-views=false -scheduler-stats < %s | FileCheck --check-prefixes=ALL,KNL %s +# RUN: llvm-mca %s -mtriple=x86_64-unknown-unknown -mcpu=broadwell -mattr=+avx512f,+avx512cd,+evex512,-vzeroupper -iterations=1 -all-stats=false -all-views=false -scheduler-stats < %s | FileCheck --check-prefixes=ALL,KNL %s # RUN: llvm-mca %s -mtriple=x86_64-unknown-unknown -mcpu=skylake -iterations=1 -all-stats=false -all-views=false -scheduler-stats < %s | FileCheck --check-prefixes=ALL,SKX %s # RUN: llvm-mca %s -mtriple=x86_64-unknown-unknown -mcpu=skylake-avx512 -iterations=1 -all-stats=false -all-views=false -scheduler-stats < %s | FileCheck --check-prefixes=ALL,SKX-AVX512 %s # RUN: llvm-mca %s -mtriple=x86_64-unknown-unknown -mcpu=icelake-client -iterations=1 -all-stats=false -all-views=false -scheduler-stats < %s | FileCheck --check-prefixes=ALL,ICX %s From edd636d923b35aa424023d48896ee89740971d54 Mon Sep 17 00:00:00 2001 From: Freddy Ye Date: Thu, 21 Dec 2023 17:07:12 +0800 Subject: [PATCH 4/7] [X86][BE] Workaround special tests to work. --- llvm/test/CodeGen/X86/avx512-cmp-kor-sequence.ll | 2 +- llvm/test/CodeGen/X86/recip-fastmath2.ll | 2 +- llvm/test/CodeGen/X86/slow-pmulld.ll | 4 ++-- llvm/test/CodeGen/X86/vector-shuffle-512-v32.ll | 2 +- llvm/test/CodeGen/X86/xaluo.ll | 2 +- llvm/test/MC/X86/x86_long_nop.s | 2 -- .../LoadStoreVectorizer/X86/load-width-inseltpoison.ll | 4 ++-- .../Transforms/LoadStoreVectorizer/X86/load-width.ll | 4 ++-- .../test/Transforms/LoopVectorize/X86/scatter_crash.ll | 2 +- llvm/test/Transforms/SLPVectorizer/X86/vector_gep.ll | 2 +- llvm/test/tools/llvm-mca/X86/scheduler-queue-usage.s | 10 ---------- 11 files changed, 12 insertions(+), 24 deletions(-) diff --git a/llvm/test/CodeGen/X86/avx512-cmp-kor-sequence.ll b/llvm/test/CodeGen/X86/avx512-cmp-kor-sequence.ll index bb86f307afa81..7676a65b735e0 100644 --- a/llvm/test/CodeGen/X86/avx512-cmp-kor-sequence.ll +++ b/llvm/test/CodeGen/X86/avx512-cmp-kor-sequence.ll @@ -48,5 +48,5 @@ entry: ; Function Attrs: nounwind readnone declare <16 x i1> @llvm.x86.avx512.mask.cmp.ps.512(<16 x float>, <16 x float>, i32, <16 x i1>, i32) #1 -attributes #0 = { nounwind readnone uwtable "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="knl" "target-features"="+adx,+aes,+avx,+avx2,+avx512cd,+avx512er,+avx512f,+avx512pf,+bmi,+bmi2,+cx16,+f16c,+fma,+fsgsbase,+fxsr,+lzcnt,+mmx,+movbe,+pclmul,+popcnt,+prefetchwt1,+rdrnd,+rdseed,+rtm,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+x87,+xsave,+xsaveopt" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #0 = { nounwind readnone uwtable "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="broadwell" "target-features"="+adx,+aes,+avx,+avx2,+avx512cd,+avx512f,+bmi,+bmi2,+cx16,+f16c,+fma,+fsgsbase,+fxsr,+evex512,+lzcnt,+mmx,+movbe,+pclmul,+popcnt,+prefetchwt1,+rdrnd,+rdseed,+rtm,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+x87,+xsave,+xsaveopt,-vzeroupper" "unsafe-fp-math"="false" "use-soft-float"="false" } attributes #1 = { nounwind readnone } diff --git a/llvm/test/CodeGen/X86/recip-fastmath2.ll b/llvm/test/CodeGen/X86/recip-fastmath2.ll index 202cbd144ed04..cc5c2b2121622 100644 --- a/llvm/test/CodeGen/X86/recip-fastmath2.ll +++ b/llvm/test/CodeGen/X86/recip-fastmath2.ll @@ -7,7 +7,7 @@ ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=sandybridge | FileCheck %s --check-prefixes=AVX,SANDY ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=haswell | FileCheck %s --check-prefixes=AVX,HASWELL ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=haswell -mattr=-fma | FileCheck %s --check-prefixes=AVX,HASWELL-NO-FMA -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=broadwell -mattr=+avx512f,+avx512cd,+evex512,-vzeroupper | FileCheck %s --check-prefixes=AVX,AVX512,KNL +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=haswell -mattr=+avx512f,+avx512cd,+evex512,-vzeroupper | FileCheck %s --check-prefixes=AVX,AVX512,KNL ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=skx | FileCheck %s --check-prefixes=AVX,AVX512,SKX ; It's the extra tests coverage for recip as discussed on D26855. diff --git a/llvm/test/CodeGen/X86/slow-pmulld.ll b/llvm/test/CodeGen/X86/slow-pmulld.ll index a2785e170436d..2e1e4f1696215 100644 --- a/llvm/test/CodeGen/X86/slow-pmulld.ll +++ b/llvm/test/CodeGen/X86/slow-pmulld.ll @@ -13,8 +13,8 @@ ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512dq | FileCheck %s --check-prefixes=AVX2,AVX-64,AVX512-64,AVX512DQ-64 ; RUN: llc < %s -mtriple=i386-unknown-unknown -mattr=+avx512bw | FileCheck %s --check-prefixes=AVX2,AVX-32,AVX512-32,AVX512BW-32 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512bw | FileCheck %s --check-prefixes=AVX2,AVX-64,AVX512-64,AVX512BW-64 -; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=broadwell -mattr=+avx512f,+avx512cd,+evex512,-vzeroupper | FileCheck %s --check-prefixes=AVX2,AVX-32,AVX512-32,KNL-32 -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=broadwell -mattr=+avx512f,+avx512cd,+evex512,-vzeroupper | FileCheck %s --check-prefixes=AVX2,AVX-64,AVX512-64,KNL-64 +; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=broadwell -mattr=+avx512f,+avx512cd,+evex512,-vzeroupper,+slow-pmaddwd | FileCheck %s --check-prefixes=AVX2,AVX-32,AVX512-32,KNL-32 +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=broadwell -mattr=+avx512f,+avx512cd,+evex512,-vzeroupper,+slow-pmaddwd | FileCheck %s --check-prefixes=AVX2,AVX-64,AVX512-64,KNL-64 ; Make sure that the slow-pmulld feature can be used without SSE4.1. ; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=silvermont -mattr=-sse4.1 diff --git a/llvm/test/CodeGen/X86/vector-shuffle-512-v32.ll b/llvm/test/CodeGen/X86/vector-shuffle-512-v32.ll index 372da59a5d8df..56f4580826513 100644 --- a/llvm/test/CodeGen/X86/vector-shuffle-512-v32.ll +++ b/llvm/test/CodeGen/X86/vector-shuffle-512-v32.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=broadwell -mattr=+avx512f,+avx512cd,+evex512,-vzeroupper | FileCheck --check-prefixes=ALL,KNL %s +; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=haswell -mattr=+avx512f,+avx512cd,+evex512,-vzeroupper,-fast-variable-perlane-shuffle | FileCheck --check-prefixes=ALL,KNL %s ; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=skx | FileCheck --check-prefixes=ALL,SKX %s target triple = "x86_64-unknown-unknown" diff --git a/llvm/test/CodeGen/X86/xaluo.ll b/llvm/test/CodeGen/X86/xaluo.ll index e06f533293652..bfac979519ee5 100644 --- a/llvm/test/CodeGen/X86/xaluo.ll +++ b/llvm/test/CodeGen/X86/xaluo.ll @@ -1,7 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -mtriple=x86_64-darwin-unknown < %s | FileCheck %s --check-prefixes=CHECK,SDAG,GENERIC ; RUN: llc -mtriple=x86_64-darwin-unknown -fast-isel -fast-isel-abort=1 < %s | FileCheck %s --check-prefixes=CHECK,FAST -; RUN: llc -mtriple=x86_64-darwin-unknown -mcpu=broadwell -mattr=+avx512f,+avx512cd,+evex512,-vzeroupper < %s | FileCheck %s --check-prefixes=CHECK,SDAG,KNL +; RUN: llc -mtriple=x86_64-darwin-unknown -mcpu=broadwell -mattr=+avx512f,+avx512cd,+evex512,-vzeroupper,+slow-incdec < %s | FileCheck %s --check-prefixes=CHECK,SDAG,KNL ; ; Get the actual value of the overflow bit. diff --git a/llvm/test/MC/X86/x86_long_nop.s b/llvm/test/MC/X86/x86_long_nop.s index e427f730bf874..c0219aca787b3 100644 --- a/llvm/test/MC/X86/x86_long_nop.s +++ b/llvm/test/MC/X86/x86_long_nop.s @@ -27,8 +27,6 @@ # RUN: llvm-mc -filetype=obj -arch=x86 -triple=i686-pc-linux-gnu -mcpu=broadwell %s | llvm-objdump -d --no-show-raw-insn - | FileCheck --check-prefix=LNOP15 %s # RUN: llvm-mc -filetype=obj -arch=x86 -triple=i686-pc-linux-gnu -mcpu=skylake %s | llvm-objdump -d --no-show-raw-insn - | FileCheck --check-prefix=LNOP15 %s # RUN: llvm-mc -filetype=obj -arch=x86 -triple=i686-pc-linux-gnu -mcpu=skx %s | llvm-objdump -d --no-show-raw-insn - | FileCheck --check-prefix=LNOP15 %s -# RUN: llvm-mc -filetype=obj -arch=x86 -triple=i686-pc-linux-gnu -mcpu=broadwell -mattr=+avx512f,+avx512cd,+evex512,-vzeroupper %s | llvm-objdump -d --no-show-raw-insn - | FileCheck --check-prefix=LNOP10 %s -# RUN: llvm-mc -filetype=obj -arch=x86 -triple=i686-pc-linux-gnu -mcpu=knm %s | llvm-objdump -d --no-show-raw-insn - | FileCheck --check-prefix=LNOP10 %s # Ensure alignment directives also emit sequences of 10, 11 and 15-byte NOPs on processors # capable of using long NOPs. diff --git a/llvm/test/Transforms/LoadStoreVectorizer/X86/load-width-inseltpoison.ll b/llvm/test/Transforms/LoadStoreVectorizer/X86/load-width-inseltpoison.ll index a38aacfc3ce0e..2728ae15befaf 100644 --- a/llvm/test/Transforms/LoadStoreVectorizer/X86/load-width-inseltpoison.ll +++ b/llvm/test/Transforms/LoadStoreVectorizer/X86/load-width-inseltpoison.ll @@ -1,7 +1,7 @@ ; RUN: opt -mtriple=x86_64-unknown-linux-gnu -passes=load-store-vectorizer -mcpu haswell -S -o - %s | FileCheck --check-prefix=CHECK-HSW %s -; RUN: opt -mtriple=x86_64-unknown-linux-gnu -passes=load-store-vectorizer -mcpu knl -S -o - %s | FileCheck --check-prefix=CHECK-KNL %s +; RUN: opt -mtriple=x86_64-unknown-linux-gnu -passes=load-store-vectorizer -mcpu=broadwell -mattr=+avx512f,+avx512cd,+evex512,-vzeroupper -S -o - %s | FileCheck --check-prefix=CHECK-KNL %s ; RUN: opt -mtriple=x86_64-unknown-linux-gnu -aa-pipeline=basic-aa -passes='function(load-store-vectorizer)' -mcpu haswell -S -o - %s | FileCheck --check-prefix=CHECK-HSW %s -; RUN: opt -mtriple=x86_64-unknown-linux-gnu -aa-pipeline=basic-aa -passes='function(load-store-vectorizer)' -mcpu knl -S -o - %s | FileCheck --check-prefix=CHECK-KNL %s +; RUN: opt -mtriple=x86_64-unknown-linux-gnu -aa-pipeline=basic-aa -passes='function(load-store-vectorizer)' -mcpu=broadwell -mattr=+avx512f,+avx512cd,+evex512,-vzeroupper -S -o - %s | FileCheck --check-prefix=CHECK-KNL %s define <8 x double> @loadwidth_insert_extract(ptr %ptr) { %b = getelementptr <2 x double>, ptr %ptr, i32 1 diff --git a/llvm/test/Transforms/LoadStoreVectorizer/X86/load-width.ll b/llvm/test/Transforms/LoadStoreVectorizer/X86/load-width.ll index f225762d43801..6d7ccd370fcea 100644 --- a/llvm/test/Transforms/LoadStoreVectorizer/X86/load-width.ll +++ b/llvm/test/Transforms/LoadStoreVectorizer/X86/load-width.ll @@ -1,7 +1,7 @@ ; RUN: opt -mtriple=x86_64-unknown-linux-gnu -passes=load-store-vectorizer -mcpu haswell -S -o - %s | FileCheck --check-prefix=CHECK-HSW %s -; RUN: opt -mtriple=x86_64-unknown-linux-gnu -passes=load-store-vectorizer -mcpu knl -S -o - %s | FileCheck --check-prefix=CHECK-KNL %s +; RUN: opt -mtriple=x86_64-unknown-linux-gnu -passes=load-store-vectorizer -mcpu=broadwell -mattr=+avx512f,+avx512cd,+evex512,-vzeroupper -S -o - %s | FileCheck --check-prefix=CHECK-KNL %s ; RUN: opt -mtriple=x86_64-unknown-linux-gnu -aa-pipeline=basic-aa -passes='function(load-store-vectorizer)' -mcpu haswell -S -o - %s | FileCheck --check-prefix=CHECK-HSW %s -; RUN: opt -mtriple=x86_64-unknown-linux-gnu -aa-pipeline=basic-aa -passes='function(load-store-vectorizer)' -mcpu knl -S -o - %s | FileCheck --check-prefix=CHECK-KNL %s +; RUN: opt -mtriple=x86_64-unknown-linux-gnu -aa-pipeline=basic-aa -passes='function(load-store-vectorizer)' -mcpu=broadwell -mattr=+avx512f,+avx512cd,+evex512,-vzeroupper -S -o - %s | FileCheck --check-prefix=CHECK-KNL %s define <8 x double> @loadwidth_insert_extract(ptr %ptr) { %b = getelementptr <2 x double>, ptr %ptr, i32 1 diff --git a/llvm/test/Transforms/LoopVectorize/X86/scatter_crash.ll b/llvm/test/Transforms/LoopVectorize/X86/scatter_crash.ll index bf2b9e2aef85a..4839e3edf7b4d 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/scatter_crash.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/scatter_crash.ll @@ -111,4 +111,4 @@ for.body: ; preds = %for.body.preheader, br i1 %cmp, label %for.body, label %for.cond.cleanup.loopexit99 } -attributes #0 = { norecurse nounwind ssp uwtable "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="knl" "target-features"="+adx,+aes,+avx,+avx2,+avx512cd,+avx512er,+avx512f,+avx512pf,+bmi,+bmi2,+cx16,+f16c,+fma,+fsgsbase,+fxsr,+lzcnt,+mmx,+movbe,+pclmul,+popcnt,+prefetchwt1,+rdrnd,+rdseed,+rtm,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+x87,+xsave,+xsaveopt" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #0 = { norecurse nounwind ssp uwtable "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="broadwell" "target-features"="+adx,+aes,+avx,+avx2,+avx512cd,+avx512f,+bmi,+bmi2,+cx16,+f16c,+fma,+fsgsbase,+fxsr,+evex512,+lzcnt,+mmx,+movbe,+pclmul,+popcnt,+prefetchwt1,+rdrnd,+rdseed,+rtm,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+x87,+xsave,+xsaveopt,-vzeroupper" "unsafe-fp-math"="false" "use-soft-float"="false" } diff --git a/llvm/test/Transforms/SLPVectorizer/X86/vector_gep.ll b/llvm/test/Transforms/SLPVectorizer/X86/vector_gep.ll index b8c551c7b771d..02bf77a5e103d 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/vector_gep.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/vector_gep.ll @@ -26,5 +26,5 @@ entry: unreachable } -attributes #0 = { noreturn readonly uwtable "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="knl" "target-features"="+adx,+aes,+avx,+avx2,+avx512cd,+avx512er,+avx512f,+avx512pf,+bmi,+bmi2,+cx16,+f16c,+fma,+fsgsbase,+fxsr,+lzcnt,+mmx,+movbe,+pclmul,+popcnt,+prefetchwt1,+rdrnd,+rdseed,+rtm,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+x87,+xsave,+xsaveopt" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #0 = { noreturn readonly uwtable "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="broadwell" "target-features"="+adx,+aes,+avx,+avx2,+avx512cd,+avx512f,+bmi,+bmi2,+cx16,+f16c,+fma,+fsgsbase,+fxsr,+lzcnt,+mmx,+movbe,+pclmul,+popcnt,+prefetchwt1,+rdrnd,+rdseed,+rtm,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+x87,+xsave,+xsaveopt" "unsafe-fp-math"="false" "use-soft-float"="false" } diff --git a/llvm/test/tools/llvm-mca/X86/scheduler-queue-usage.s b/llvm/test/tools/llvm-mca/X86/scheduler-queue-usage.s index 1f70a2628f9f0..7f9a501b57a15 100644 --- a/llvm/test/tools/llvm-mca/X86/scheduler-queue-usage.s +++ b/llvm/test/tools/llvm-mca/X86/scheduler-queue-usage.s @@ -10,7 +10,6 @@ # RUN: llvm-mca %s -mtriple=x86_64-unknown-unknown -mcpu=ivybridge -iterations=1 -all-stats=false -all-views=false -scheduler-stats < %s | FileCheck --check-prefixes=ALL,IVB %s # RUN: llvm-mca %s -mtriple=x86_64-unknown-unknown -mcpu=haswell -iterations=1 -all-stats=false -all-views=false -scheduler-stats < %s | FileCheck --check-prefixes=ALL,HSW %s # RUN: llvm-mca %s -mtriple=x86_64-unknown-unknown -mcpu=broadwell -iterations=1 -all-stats=false -all-views=false -scheduler-stats < %s | FileCheck --check-prefixes=ALL,BDW %s -# RUN: llvm-mca %s -mtriple=x86_64-unknown-unknown -mcpu=broadwell -mattr=+avx512f,+avx512cd,+evex512,-vzeroupper -iterations=1 -all-stats=false -all-views=false -scheduler-stats < %s | FileCheck --check-prefixes=ALL,KNL %s # RUN: llvm-mca %s -mtriple=x86_64-unknown-unknown -mcpu=skylake -iterations=1 -all-stats=false -all-views=false -scheduler-stats < %s | FileCheck --check-prefixes=ALL,SKX %s # RUN: llvm-mca %s -mtriple=x86_64-unknown-unknown -mcpu=skylake-avx512 -iterations=1 -all-stats=false -all-views=false -scheduler-stats < %s | FileCheck --check-prefixes=ALL,SKX-AVX512 %s # RUN: llvm-mca %s -mtriple=x86_64-unknown-unknown -mcpu=icelake-client -iterations=1 -all-stats=false -all-views=false -scheduler-stats < %s | FileCheck --check-prefixes=ALL,ICX %s @@ -68,12 +67,6 @@ xor %eax, %ebx # IVB-NEXT: [3] Maximum number of used buffer entries. # IVB-NEXT: [4] Total number of buffer entries. -# KNL: Scheduler's queue usage: -# KNL-NEXT: [1] Resource name. -# KNL-NEXT: [2] Average number of used buffer entries. -# KNL-NEXT: [3] Maximum number of used buffer entries. -# KNL-NEXT: [4] Total number of buffer entries. - # SKX: Scheduler's queue usage: # SKX-NEXT: [1] Resource name. # SKX-NEXT: [2] Average number of used buffer entries. @@ -145,9 +138,6 @@ xor %eax, %ebx # IVB: [1] [2] [3] [4] # IVB-NEXT: SBPortAny 0 1 54 -# KNL: [1] [2] [3] [4] -# KNL-NEXT: HWPortAny 0 1 60 - # SKX: [1] [2] [3] [4] # SKX-NEXT: SKLPortAny 0 1 60 From 6470ba478914b5e6fbec1857848c957ccf2816e1 Mon Sep 17 00:00:00 2001 From: Freddy Ye Date: Tue, 26 Dec 2023 09:51:43 +0800 Subject: [PATCH 5/7] Remove AVX512ER intrinsic supports. --- clang/include/clang/Basic/BuiltinsX86.def | 12 - clang/include/clang/Driver/Options.td | 2 - clang/lib/Basic/Targets/X86.cpp | 6 - clang/lib/Basic/Targets/X86.h | 1 - clang/lib/Headers/CMakeLists.txt | 1 - clang/lib/Headers/avx512erintrin.h | 271 ------------- clang/lib/Headers/cpuid.h | 1 - clang/lib/Headers/immintrin.h | 5 - clang/lib/Sema/SemaChecking.cpp | 10 - clang/test/CodeGen/X86/avx512er-builtins.c | 347 ---------------- clang/test/CodeGen/function-target-features.c | 4 +- clang/test/CodeGen/target-builtin-noerror.c | 1 - clang/test/Driver/x86-target-features.c | 5 - clang/test/Preprocessor/x86_target_features.c | 18 - llvm/include/llvm/IR/IntrinsicsX86.td | 52 --- llvm/lib/Target/X86/X86.td | 3 - llvm/lib/Target/X86/X86InstrAVX512.td | 78 +++- llvm/lib/Target/X86/X86InstrPredicates.td | 1 - llvm/lib/Target/X86/X86IntrinsicsInfo.h | 10 - llvm/lib/TargetParser/Host.cpp | 5 - llvm/test/CodeGen/X86/avx512er-intrinsics.ll | 306 -------------- llvm/test/CodeGen/X86/crc32-target-feature.ll | 4 +- llvm/test/CodeGen/X86/unfoldMemoryOperand.mir | 2 +- .../LoopStrengthReduce/X86/pr40514.ll | 2 +- .../Transforms/LoopVectorize/X86/pr23997.ll | 2 +- .../Transforms/LoopVectorize/X86/pr54634.ll | 2 +- .../llvm-mca/X86/Generic/resources-avx512er.s | 373 ------------------ .../gn/secondary/clang/lib/Headers/BUILD.gn | 1 - 28 files changed, 71 insertions(+), 1454 deletions(-) delete mode 100644 clang/lib/Headers/avx512erintrin.h delete mode 100644 clang/test/CodeGen/X86/avx512er-builtins.c delete mode 100644 llvm/test/CodeGen/X86/avx512er-intrinsics.ll delete mode 100644 llvm/test/tools/llvm-mca/X86/Generic/resources-avx512er.s diff --git a/clang/include/clang/Basic/BuiltinsX86.def b/clang/include/clang/Basic/BuiltinsX86.def index 60b752ad48548..00a69b16a5128 100644 --- a/clang/include/clang/Basic/BuiltinsX86.def +++ b/clang/include/clang/Basic/BuiltinsX86.def @@ -839,23 +839,11 @@ TARGET_BUILTIN(__builtin_ia32_rsqrt14ss_mask, "V4fV4fV4fV4fUc", "ncV:128:", "avx TARGET_BUILTIN(__builtin_ia32_rsqrt14pd512_mask, "V8dV8dV8dUc", "ncV:512:", "avx512f,evex512") TARGET_BUILTIN(__builtin_ia32_rsqrt14ps512_mask, "V16fV16fV16fUs", "ncV:512:", "avx512f,evex512") -TARGET_BUILTIN(__builtin_ia32_rsqrt28sd_round_mask, "V2dV2dV2dV2dUcIi", "ncV:128:", "avx512er") -TARGET_BUILTIN(__builtin_ia32_rsqrt28ss_round_mask, "V4fV4fV4fV4fUcIi", "ncV:128:", "avx512er") -TARGET_BUILTIN(__builtin_ia32_rsqrt28pd_mask, "V8dV8dV8dUcIi", "ncV:512:", "avx512er,evex512") -TARGET_BUILTIN(__builtin_ia32_rsqrt28ps_mask, "V16fV16fV16fUsIi", "ncV:512:", "avx512er,evex512") - TARGET_BUILTIN(__builtin_ia32_rcp14sd_mask, "V2dV2dV2dV2dUc", "ncV:128:", "avx512f") TARGET_BUILTIN(__builtin_ia32_rcp14ss_mask, "V4fV4fV4fV4fUc", "ncV:128:", "avx512f") TARGET_BUILTIN(__builtin_ia32_rcp14pd512_mask, "V8dV8dV8dUc", "ncV:512:", "avx512f,evex512") TARGET_BUILTIN(__builtin_ia32_rcp14ps512_mask, "V16fV16fV16fUs", "ncV:512:", "avx512f,evex512") -TARGET_BUILTIN(__builtin_ia32_rcp28sd_round_mask, "V2dV2dV2dV2dUcIi", "ncV:128:", "avx512er") -TARGET_BUILTIN(__builtin_ia32_rcp28ss_round_mask, "V4fV4fV4fV4fUcIi", "ncV:128:", "avx512er") -TARGET_BUILTIN(__builtin_ia32_rcp28pd_mask, "V8dV8dV8dUcIi", "ncV:512:", "avx512er,evex512") -TARGET_BUILTIN(__builtin_ia32_rcp28ps_mask, "V16fV16fV16fUsIi", "ncV:512:", "avx512er,evex512") -TARGET_BUILTIN(__builtin_ia32_exp2pd_mask, "V8dV8dV8dUcIi", "ncV:512:", "avx512er,evex512") -TARGET_BUILTIN(__builtin_ia32_exp2ps_mask, "V16fV16fV16fUsIi", "ncV:512:", "avx512er,evex512") - TARGET_BUILTIN(__builtin_ia32_cvttps2dq512_mask, "V16iV16fV16iUsIi", "ncV:512:", "avx512f,evex512") TARGET_BUILTIN(__builtin_ia32_cvttps2udq512_mask, "V16iV16fV16iUsIi", "ncV:512:", "avx512f,evex512") TARGET_BUILTIN(__builtin_ia32_cvttpd2dq512_mask, "V8iV8dV8iUcIi", "ncV:512:", "avx512f,evex512") diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td index 2b93ddf033499..a2f26b9ca4c35 100644 --- a/clang/include/clang/Driver/Options.td +++ b/clang/include/clang/Driver/Options.td @@ -5865,8 +5865,6 @@ def mavx512cd : Flag<["-"], "mavx512cd">, Group; def mno_avx512cd : Flag<["-"], "mno-avx512cd">, Group; def mavx512dq : Flag<["-"], "mavx512dq">, Group; def mno_avx512dq : Flag<["-"], "mno-avx512dq">, Group; -def mavx512er : Flag<["-"], "mavx512er">, Group; -def mno_avx512er : Flag<["-"], "mno-avx512er">, Group; def mavx512fp16 : Flag<["-"], "mavx512fp16">, Group; def mno_avx512fp16 : Flag<["-"], "mno-avx512fp16">, Group; def mavx512ifma : Flag<["-"], "mavx512ifma">, Group; diff --git a/clang/lib/Basic/Targets/X86.cpp b/clang/lib/Basic/Targets/X86.cpp index 2483bc8b08f01..8b0e021a48842 100644 --- a/clang/lib/Basic/Targets/X86.cpp +++ b/clang/lib/Basic/Targets/X86.cpp @@ -293,8 +293,6 @@ bool X86TargetInfo::handleTargetFeatures(std::vector &Features, HasAVX512VNNI = true; } else if (Feature == "+avx512bf16") { HasAVX512BF16 = true; - } else if (Feature == "+avx512er") { - HasAVX512ER = true; } else if (Feature == "+avx512fp16") { HasAVX512FP16 = true; HasLegalHalfType = true; @@ -813,8 +811,6 @@ void X86TargetInfo::getTargetDefines(const LangOptions &Opts, Builder.defineMacro("__AVX512VNNI__"); if (HasAVX512BF16) Builder.defineMacro("__AVX512BF16__"); - if (HasAVX512ER) - Builder.defineMacro("__AVX512ER__"); if (HasAVX512FP16) Builder.defineMacro("__AVX512FP16__"); if (HasAVX512PF) @@ -1052,7 +1048,6 @@ bool X86TargetInfo::isValidFeatureName(StringRef Name) const { .Case("avx512vpopcntdq", true) .Case("avx512vnni", true) .Case("avx512bf16", true) - .Case("avx512er", true) .Case("avx512fp16", true) .Case("avx512pf", true) .Case("avx512dq", true) @@ -1168,7 +1163,6 @@ bool X86TargetInfo::hasFeature(StringRef Feature) const { .Case("avx512vpopcntdq", HasAVX512VPOPCNTDQ) .Case("avx512vnni", HasAVX512VNNI) .Case("avx512bf16", HasAVX512BF16) - .Case("avx512er", HasAVX512ER) .Case("avx512fp16", HasAVX512FP16) .Case("avx512pf", HasAVX512PF) .Case("avx512dq", HasAVX512DQ) diff --git a/clang/lib/Basic/Targets/X86.h b/clang/lib/Basic/Targets/X86.h index 0ab1c10833db2..a890348073e88 100644 --- a/clang/lib/Basic/Targets/X86.h +++ b/clang/lib/Basic/Targets/X86.h @@ -103,7 +103,6 @@ class LLVM_LIBRARY_VISIBILITY X86TargetInfo : public TargetInfo { bool HasAVX512VNNI = false; bool HasAVX512FP16 = false; bool HasAVX512BF16 = false; - bool HasAVX512ER = false; bool HasAVX512PF = false; bool HasAVX512DQ = false; bool HasAVX512BITALG = false; diff --git a/clang/lib/Headers/CMakeLists.txt b/clang/lib/Headers/CMakeLists.txt index 735e4e4e3be89..09c62d14085df 100644 --- a/clang/lib/Headers/CMakeLists.txt +++ b/clang/lib/Headers/CMakeLists.txt @@ -151,7 +151,6 @@ set(x86_files avx512bwintrin.h avx512cdintrin.h avx512dqintrin.h - avx512erintrin.h avx512fintrin.h avx512fp16intrin.h avx512ifmaintrin.h diff --git a/clang/lib/Headers/avx512erintrin.h b/clang/lib/Headers/avx512erintrin.h deleted file mode 100644 index 1c5a2d2d208ff..0000000000000 --- a/clang/lib/Headers/avx512erintrin.h +++ /dev/null @@ -1,271 +0,0 @@ -/*===---- avx512erintrin.h - AVX512ER intrinsics ---------------------------=== - * - * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. - * See https://llvm.org/LICENSE.txt for license information. - * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception - * - *===-----------------------------------------------------------------------=== - */ -#ifndef __IMMINTRIN_H -#error "Never use directly; include instead." -#endif - -#ifndef __AVX512ERINTRIN_H -#define __AVX512ERINTRIN_H - -/* exp2a23 */ -#define _mm512_exp2a23_round_pd(A, R) \ - ((__m512d)__builtin_ia32_exp2pd_mask((__v8df)(__m512d)(A), \ - (__v8df)_mm512_setzero_pd(), \ - (__mmask8)-1, (int)(R))) - -#define _mm512_mask_exp2a23_round_pd(S, M, A, R) \ - ((__m512d)__builtin_ia32_exp2pd_mask((__v8df)(__m512d)(A), \ - (__v8df)(__m512d)(S), (__mmask8)(M), \ - (int)(R))) - -#define _mm512_maskz_exp2a23_round_pd(M, A, R) \ - ((__m512d)__builtin_ia32_exp2pd_mask((__v8df)(__m512d)(A), \ - (__v8df)_mm512_setzero_pd(), \ - (__mmask8)(M), (int)(R))) - -#define _mm512_exp2a23_pd(A) \ - _mm512_exp2a23_round_pd((A), _MM_FROUND_CUR_DIRECTION) - -#define _mm512_mask_exp2a23_pd(S, M, A) \ - _mm512_mask_exp2a23_round_pd((S), (M), (A), _MM_FROUND_CUR_DIRECTION) - -#define _mm512_maskz_exp2a23_pd(M, A) \ - _mm512_maskz_exp2a23_round_pd((M), (A), _MM_FROUND_CUR_DIRECTION) - -#define _mm512_exp2a23_round_ps(A, R) \ - ((__m512)__builtin_ia32_exp2ps_mask((__v16sf)(__m512)(A), \ - (__v16sf)_mm512_setzero_ps(), \ - (__mmask16)-1, (int)(R))) - -#define _mm512_mask_exp2a23_round_ps(S, M, A, R) \ - ((__m512)__builtin_ia32_exp2ps_mask((__v16sf)(__m512)(A), \ - (__v16sf)(__m512)(S), (__mmask16)(M), \ - (int)(R))) - -#define _mm512_maskz_exp2a23_round_ps(M, A, R) \ - ((__m512)__builtin_ia32_exp2ps_mask((__v16sf)(__m512)(A), \ - (__v16sf)_mm512_setzero_ps(), \ - (__mmask16)(M), (int)(R))) - -#define _mm512_exp2a23_ps(A) \ - _mm512_exp2a23_round_ps((A), _MM_FROUND_CUR_DIRECTION) - -#define _mm512_mask_exp2a23_ps(S, M, A) \ - _mm512_mask_exp2a23_round_ps((S), (M), (A), _MM_FROUND_CUR_DIRECTION) - -#define _mm512_maskz_exp2a23_ps(M, A) \ - _mm512_maskz_exp2a23_round_ps((M), (A), _MM_FROUND_CUR_DIRECTION) - -/* rsqrt28 */ -#define _mm512_rsqrt28_round_pd(A, R) \ - ((__m512d)__builtin_ia32_rsqrt28pd_mask((__v8df)(__m512d)(A), \ - (__v8df)_mm512_setzero_pd(), \ - (__mmask8)-1, (int)(R))) - -#define _mm512_mask_rsqrt28_round_pd(S, M, A, R) \ - ((__m512d)__builtin_ia32_rsqrt28pd_mask((__v8df)(__m512d)(A), \ - (__v8df)(__m512d)(S), (__mmask8)(M), \ - (int)(R))) - -#define _mm512_maskz_rsqrt28_round_pd(M, A, R) \ - ((__m512d)__builtin_ia32_rsqrt28pd_mask((__v8df)(__m512d)(A), \ - (__v8df)_mm512_setzero_pd(), \ - (__mmask8)(M), (int)(R))) - -#define _mm512_rsqrt28_pd(A) \ - _mm512_rsqrt28_round_pd((A), _MM_FROUND_CUR_DIRECTION) - -#define _mm512_mask_rsqrt28_pd(S, M, A) \ - _mm512_mask_rsqrt28_round_pd((S), (M), (A), _MM_FROUND_CUR_DIRECTION) - -#define _mm512_maskz_rsqrt28_pd(M, A) \ - _mm512_maskz_rsqrt28_round_pd((M), (A), _MM_FROUND_CUR_DIRECTION) - -#define _mm512_rsqrt28_round_ps(A, R) \ - ((__m512)__builtin_ia32_rsqrt28ps_mask((__v16sf)(__m512)(A), \ - (__v16sf)_mm512_setzero_ps(), \ - (__mmask16)-1, (int)(R))) - -#define _mm512_mask_rsqrt28_round_ps(S, M, A, R) \ - ((__m512)__builtin_ia32_rsqrt28ps_mask((__v16sf)(__m512)(A), \ - (__v16sf)(__m512)(S), (__mmask16)(M), \ - (int)(R))) - -#define _mm512_maskz_rsqrt28_round_ps(M, A, R) \ - ((__m512)__builtin_ia32_rsqrt28ps_mask((__v16sf)(__m512)(A), \ - (__v16sf)_mm512_setzero_ps(), \ - (__mmask16)(M), (int)(R))) - -#define _mm512_rsqrt28_ps(A) \ - _mm512_rsqrt28_round_ps((A), _MM_FROUND_CUR_DIRECTION) - -#define _mm512_mask_rsqrt28_ps(S, M, A) \ - _mm512_mask_rsqrt28_round_ps((S), (M), A, _MM_FROUND_CUR_DIRECTION) - -#define _mm512_maskz_rsqrt28_ps(M, A) \ - _mm512_maskz_rsqrt28_round_ps((M), (A), _MM_FROUND_CUR_DIRECTION) - -#define _mm_rsqrt28_round_ss(A, B, R) \ - ((__m128)__builtin_ia32_rsqrt28ss_round_mask((__v4sf)(__m128)(A), \ - (__v4sf)(__m128)(B), \ - (__v4sf)_mm_setzero_ps(), \ - (__mmask8)-1, (int)(R))) - -#define _mm_mask_rsqrt28_round_ss(S, M, A, B, R) \ - ((__m128)__builtin_ia32_rsqrt28ss_round_mask((__v4sf)(__m128)(A), \ - (__v4sf)(__m128)(B), \ - (__v4sf)(__m128)(S), \ - (__mmask8)(M), (int)(R))) - -#define _mm_maskz_rsqrt28_round_ss(M, A, B, R) \ - ((__m128)__builtin_ia32_rsqrt28ss_round_mask((__v4sf)(__m128)(A), \ - (__v4sf)(__m128)(B), \ - (__v4sf)_mm_setzero_ps(), \ - (__mmask8)(M), (int)(R))) - -#define _mm_rsqrt28_ss(A, B) \ - _mm_rsqrt28_round_ss((A), (B), _MM_FROUND_CUR_DIRECTION) - -#define _mm_mask_rsqrt28_ss(S, M, A, B) \ - _mm_mask_rsqrt28_round_ss((S), (M), (A), (B), _MM_FROUND_CUR_DIRECTION) - -#define _mm_maskz_rsqrt28_ss(M, A, B) \ - _mm_maskz_rsqrt28_round_ss((M), (A), (B), _MM_FROUND_CUR_DIRECTION) - -#define _mm_rsqrt28_round_sd(A, B, R) \ - ((__m128d)__builtin_ia32_rsqrt28sd_round_mask((__v2df)(__m128d)(A), \ - (__v2df)(__m128d)(B), \ - (__v2df)_mm_setzero_pd(), \ - (__mmask8)-1, (int)(R))) - -#define _mm_mask_rsqrt28_round_sd(S, M, A, B, R) \ - ((__m128d)__builtin_ia32_rsqrt28sd_round_mask((__v2df)(__m128d)(A), \ - (__v2df)(__m128d)(B), \ - (__v2df)(__m128d)(S), \ - (__mmask8)(M), (int)(R))) - -#define _mm_maskz_rsqrt28_round_sd(M, A, B, R) \ - ((__m128d)__builtin_ia32_rsqrt28sd_round_mask((__v2df)(__m128d)(A), \ - (__v2df)(__m128d)(B), \ - (__v2df)_mm_setzero_pd(), \ - (__mmask8)(M), (int)(R))) - -#define _mm_rsqrt28_sd(A, B) \ - _mm_rsqrt28_round_sd((A), (B), _MM_FROUND_CUR_DIRECTION) - -#define _mm_mask_rsqrt28_sd(S, M, A, B) \ - _mm_mask_rsqrt28_round_sd((S), (M), (A), (B), _MM_FROUND_CUR_DIRECTION) - -#define _mm_maskz_rsqrt28_sd(M, A, B) \ - _mm_maskz_rsqrt28_round_sd((M), (A), (B), _MM_FROUND_CUR_DIRECTION) - -/* rcp28 */ -#define _mm512_rcp28_round_pd(A, R) \ - ((__m512d)__builtin_ia32_rcp28pd_mask((__v8df)(__m512d)(A), \ - (__v8df)_mm512_setzero_pd(), \ - (__mmask8)-1, (int)(R))) - -#define _mm512_mask_rcp28_round_pd(S, M, A, R) \ - ((__m512d)__builtin_ia32_rcp28pd_mask((__v8df)(__m512d)(A), \ - (__v8df)(__m512d)(S), (__mmask8)(M), \ - (int)(R))) - -#define _mm512_maskz_rcp28_round_pd(M, A, R) \ - ((__m512d)__builtin_ia32_rcp28pd_mask((__v8df)(__m512d)(A), \ - (__v8df)_mm512_setzero_pd(), \ - (__mmask8)(M), (int)(R))) - -#define _mm512_rcp28_pd(A) \ - _mm512_rcp28_round_pd((A), _MM_FROUND_CUR_DIRECTION) - -#define _mm512_mask_rcp28_pd(S, M, A) \ - _mm512_mask_rcp28_round_pd((S), (M), (A), _MM_FROUND_CUR_DIRECTION) - -#define _mm512_maskz_rcp28_pd(M, A) \ - _mm512_maskz_rcp28_round_pd((M), (A), _MM_FROUND_CUR_DIRECTION) - -#define _mm512_rcp28_round_ps(A, R) \ - ((__m512)__builtin_ia32_rcp28ps_mask((__v16sf)(__m512)(A), \ - (__v16sf)_mm512_setzero_ps(), \ - (__mmask16)-1, (int)(R))) - -#define _mm512_mask_rcp28_round_ps(S, M, A, R) \ - ((__m512)__builtin_ia32_rcp28ps_mask((__v16sf)(__m512)(A), \ - (__v16sf)(__m512)(S), (__mmask16)(M), \ - (int)(R))) - -#define _mm512_maskz_rcp28_round_ps(M, A, R) \ - ((__m512)__builtin_ia32_rcp28ps_mask((__v16sf)(__m512)(A), \ - (__v16sf)_mm512_setzero_ps(), \ - (__mmask16)(M), (int)(R))) - -#define _mm512_rcp28_ps(A) \ - _mm512_rcp28_round_ps((A), _MM_FROUND_CUR_DIRECTION) - -#define _mm512_mask_rcp28_ps(S, M, A) \ - _mm512_mask_rcp28_round_ps((S), (M), (A), _MM_FROUND_CUR_DIRECTION) - -#define _mm512_maskz_rcp28_ps(M, A) \ - _mm512_maskz_rcp28_round_ps((M), (A), _MM_FROUND_CUR_DIRECTION) - -#define _mm_rcp28_round_ss(A, B, R) \ - ((__m128)__builtin_ia32_rcp28ss_round_mask((__v4sf)(__m128)(A), \ - (__v4sf)(__m128)(B), \ - (__v4sf)_mm_setzero_ps(), \ - (__mmask8)-1, (int)(R))) - -#define _mm_mask_rcp28_round_ss(S, M, A, B, R) \ - ((__m128)__builtin_ia32_rcp28ss_round_mask((__v4sf)(__m128)(A), \ - (__v4sf)(__m128)(B), \ - (__v4sf)(__m128)(S), \ - (__mmask8)(M), (int)(R))) - -#define _mm_maskz_rcp28_round_ss(M, A, B, R) \ - ((__m128)__builtin_ia32_rcp28ss_round_mask((__v4sf)(__m128)(A), \ - (__v4sf)(__m128)(B), \ - (__v4sf)_mm_setzero_ps(), \ - (__mmask8)(M), (int)(R))) - -#define _mm_rcp28_ss(A, B) \ - _mm_rcp28_round_ss((A), (B), _MM_FROUND_CUR_DIRECTION) - -#define _mm_mask_rcp28_ss(S, M, A, B) \ - _mm_mask_rcp28_round_ss((S), (M), (A), (B), _MM_FROUND_CUR_DIRECTION) - -#define _mm_maskz_rcp28_ss(M, A, B) \ - _mm_maskz_rcp28_round_ss((M), (A), (B), _MM_FROUND_CUR_DIRECTION) - -#define _mm_rcp28_round_sd(A, B, R) \ - ((__m128d)__builtin_ia32_rcp28sd_round_mask((__v2df)(__m128d)(A), \ - (__v2df)(__m128d)(B), \ - (__v2df)_mm_setzero_pd(), \ - (__mmask8)-1, (int)(R))) - -#define _mm_mask_rcp28_round_sd(S, M, A, B, R) \ - ((__m128d)__builtin_ia32_rcp28sd_round_mask((__v2df)(__m128d)(A), \ - (__v2df)(__m128d)(B), \ - (__v2df)(__m128d)(S), \ - (__mmask8)(M), (int)(R))) - -#define _mm_maskz_rcp28_round_sd(M, A, B, R) \ - ((__m128d)__builtin_ia32_rcp28sd_round_mask((__v2df)(__m128d)(A), \ - (__v2df)(__m128d)(B), \ - (__v2df)_mm_setzero_pd(), \ - (__mmask8)(M), (int)(R))) - -#define _mm_rcp28_sd(A, B) \ - _mm_rcp28_round_sd((A), (B), _MM_FROUND_CUR_DIRECTION) - -#define _mm_mask_rcp28_sd(S, M, A, B) \ - _mm_mask_rcp28_round_sd((S), (M), (A), (B), _MM_FROUND_CUR_DIRECTION) - -#define _mm_maskz_rcp28_sd(M, A, B) \ - _mm_maskz_rcp28_round_sd((M), (A), (B), _MM_FROUND_CUR_DIRECTION) - -#endif /* __AVX512ERINTRIN_H */ diff --git a/clang/lib/Headers/cpuid.h b/clang/lib/Headers/cpuid.h index 1ad6853a97c9d..b09ca3585d606 100644 --- a/clang/lib/Headers/cpuid.h +++ b/clang/lib/Headers/cpuid.h @@ -160,7 +160,6 @@ #define bit_CLFLUSHOPT 0x00800000 #define bit_CLWB 0x01000000 #define bit_AVX512PF 0x04000000 -#define bit_AVX512ER 0x08000000 #define bit_AVX512CD 0x10000000 #define bit_SHA 0x20000000 #define bit_AVX512BW 0x40000000 diff --git a/clang/lib/Headers/immintrin.h b/clang/lib/Headers/immintrin.h index 0149a1cdea633..876392e9a5daf 100644 --- a/clang/lib/Headers/immintrin.h +++ b/clang/lib/Headers/immintrin.h @@ -174,11 +174,6 @@ #include #endif -#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ - defined(__AVX512ER__) -#include -#endif - #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ defined(__AVX512IFMA__) #include diff --git a/clang/lib/Sema/SemaChecking.cpp b/clang/lib/Sema/SemaChecking.cpp index 66dac99b8d992..e14bb9b1287b1 100644 --- a/clang/lib/Sema/SemaChecking.cpp +++ b/clang/lib/Sema/SemaChecking.cpp @@ -6300,15 +6300,9 @@ bool Sema::CheckX86BuiltinRoundingOrSAE(unsigned BuiltinID, CallExpr *TheCall) { case X86::BI__builtin_ia32_vcvttph2udq512_mask: case X86::BI__builtin_ia32_vcvttph2qq512_mask: case X86::BI__builtin_ia32_vcvttph2uqq512_mask: - case X86::BI__builtin_ia32_exp2pd_mask: - case X86::BI__builtin_ia32_exp2ps_mask: case X86::BI__builtin_ia32_getexppd512_mask: case X86::BI__builtin_ia32_getexpps512_mask: case X86::BI__builtin_ia32_getexpph512_mask: - case X86::BI__builtin_ia32_rcp28pd_mask: - case X86::BI__builtin_ia32_rcp28ps_mask: - case X86::BI__builtin_ia32_rsqrt28pd_mask: - case X86::BI__builtin_ia32_rsqrt28ps_mask: case X86::BI__builtin_ia32_vcomisd: case X86::BI__builtin_ia32_vcomiss: case X86::BI__builtin_ia32_vcomish: @@ -6335,16 +6329,12 @@ bool Sema::CheckX86BuiltinRoundingOrSAE(unsigned BuiltinID, CallExpr *TheCall) { case X86::BI__builtin_ia32_minsd_round_mask: case X86::BI__builtin_ia32_minss_round_mask: case X86::BI__builtin_ia32_minsh_round_mask: - case X86::BI__builtin_ia32_rcp28sd_round_mask: - case X86::BI__builtin_ia32_rcp28ss_round_mask: case X86::BI__builtin_ia32_reducepd512_mask: case X86::BI__builtin_ia32_reduceps512_mask: case X86::BI__builtin_ia32_reduceph512_mask: case X86::BI__builtin_ia32_rndscalepd_mask: case X86::BI__builtin_ia32_rndscaleps_mask: case X86::BI__builtin_ia32_rndscaleph_mask: - case X86::BI__builtin_ia32_rsqrt28sd_round_mask: - case X86::BI__builtin_ia32_rsqrt28ss_round_mask: ArgNum = 4; break; case X86::BI__builtin_ia32_fixupimmpd512_mask: diff --git a/clang/test/CodeGen/X86/avx512er-builtins.c b/clang/test/CodeGen/X86/avx512er-builtins.c deleted file mode 100644 index ee31236a3c01a..0000000000000 --- a/clang/test/CodeGen/X86/avx512er-builtins.c +++ /dev/null @@ -1,347 +0,0 @@ -// RUN: %clang_cc1 -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +avx512f -target-feature +avx512er -emit-llvm -o - -Wall -Werror | FileCheck %s - - -#include - -__m512d test_mm512_rsqrt28_round_pd(__m512d a) { - // CHECK-LABEL: @test_mm512_rsqrt28_round_pd - // CHECK: @llvm.x86.avx512.rsqrt28.pd - return _mm512_rsqrt28_round_pd(a, _MM_FROUND_NO_EXC); -} - -__m512d test_mm512_mask_rsqrt28_round_pd(__m512d s, __mmask8 m, __m512d a) { - // CHECK-LABEL: @test_mm512_mask_rsqrt28_round_pd - // CHECK: @llvm.x86.avx512.rsqrt28.pd - return _mm512_mask_rsqrt28_round_pd(s, m, a, _MM_FROUND_NO_EXC); -} - -__m512d test_mm512_maskz_rsqrt28_round_pd(__mmask8 m, __m512d a) { - // CHECK-LABEL: @test_mm512_maskz_rsqrt28_round_pd - // CHECK: @llvm.x86.avx512.rsqrt28.pd - return _mm512_maskz_rsqrt28_round_pd(m, a, _MM_FROUND_NO_EXC); -} - -__m512d test_mm512_rsqrt28_pd(__m512d a) { - // CHECK-LABEL: @test_mm512_rsqrt28_pd - // CHECK: @llvm.x86.avx512.rsqrt28.pd - return _mm512_rsqrt28_pd(a); -} - -__m512d test_mm512_mask_rsqrt28_pd(__m512d s, __mmask8 m, __m512d a) { - // CHECK-LABEL: @test_mm512_mask_rsqrt28_pd - // CHECK: @llvm.x86.avx512.rsqrt28.pd - return _mm512_mask_rsqrt28_pd(s, m, a); -} - -__m512d test_mm512_maskz_rsqrt28_pd(__mmask8 m, __m512d a) { - // CHECK-LABEL: @test_mm512_maskz_rsqrt28_pd - // CHECK: @llvm.x86.avx512.rsqrt28.pd - return _mm512_maskz_rsqrt28_pd(m, a); -} - -__m512 test_mm512_rsqrt28_round_ps(__m512 a) { - // CHECK-LABEL: @test_mm512_rsqrt28_round_ps - // CHECK: @llvm.x86.avx512.rsqrt28.ps - return _mm512_rsqrt28_round_ps(a, _MM_FROUND_NO_EXC); -} - -__m512 test_mm512_mask_rsqrt28_round_ps(__m512 s, __mmask16 m, __m512 a) { - // CHECK-LABEL: @test_mm512_mask_rsqrt28_round_ps - // CHECK: @llvm.x86.avx512.rsqrt28.ps - return _mm512_mask_rsqrt28_round_ps(s, m, a, _MM_FROUND_NO_EXC); -} - -__m512 test_mm512_maskz_rsqrt28_round_ps(__mmask16 m, __m512 a) { - // CHECK-LABEL: @test_mm512_maskz_rsqrt28_round_ps - // CHECK: @llvm.x86.avx512.rsqrt28.ps - return _mm512_maskz_rsqrt28_round_ps(m, a, _MM_FROUND_NO_EXC); -} - -__m512 test_mm512_rsqrt28_ps(__m512 a) { - // CHECK-LABEL: @test_mm512_rsqrt28_ps - // CHECK: @llvm.x86.avx512.rsqrt28.ps - return _mm512_rsqrt28_ps(a); -} - -__m512 test_mm512_mask_rsqrt28_ps(__m512 s, __mmask16 m, __m512 a) { - // CHECK-LABEL: @test_mm512_mask_rsqrt28_ps - // CHECK: @llvm.x86.avx512.rsqrt28.ps - return _mm512_mask_rsqrt28_ps(s, m, a); -} - -__m512 test_mm512_maskz_rsqrt28_ps(__mmask16 m, __m512 a) { - // CHECK-LABEL: @test_mm512_maskz_rsqrt28_ps - // CHECK: @llvm.x86.avx512.rsqrt28.ps - return _mm512_maskz_rsqrt28_ps(m, a); -} - -__m128 test_mm_rsqrt28_round_ss(__m128 a, __m128 b) { - // CHECK-LABEL: @test_mm_rsqrt28_round_ss - // CHECK: @llvm.x86.avx512.rsqrt28.ss - return _mm_rsqrt28_round_ss(a, b, _MM_FROUND_NO_EXC); -} - -__m128 test_mm_mask_rsqrt28_round_ss(__m128 s, __mmask16 m, __m128 a, __m128 b) { - // CHECK-LABEL: @test_mm_mask_rsqrt28_round_ss - // CHECK: @llvm.x86.avx512.rsqrt28.ss - return _mm_mask_rsqrt28_round_ss(s, m, a, b, _MM_FROUND_NO_EXC); -} - -__m128 test_mm_maskz_rsqrt28_round_ss(__mmask16 m, __m128 a, __m128 b) { - // CHECK-LABEL: @test_mm_maskz_rsqrt28_round_ss - // CHECK: @llvm.x86.avx512.rsqrt28.ss - return _mm_maskz_rsqrt28_round_ss(m, a, b, _MM_FROUND_NO_EXC); -} - -__m128 test_mm_rsqrt28_ss(__m128 a, __m128 b) { - // CHECK-LABEL: @test_mm_rsqrt28_ss - // CHECK: @llvm.x86.avx512.rsqrt28.ss - return _mm_rsqrt28_ss(a, b); -} - -__m128 test_mm_mask_rsqrt28_ss(__m128 s, __mmask16 m, __m128 a, __m128 b) { - // CHECK-LABEL: @test_mm_mask_rsqrt28_ss - // CHECK: @llvm.x86.avx512.rsqrt28.ss - return _mm_mask_rsqrt28_ss(s, m, a, b); -} - -__m128 test_mm_maskz_rsqrt28_ss(__mmask16 m, __m128 a, __m128 b) { - // CHECK-LABEL: @test_mm_maskz_rsqrt28_ss - // CHECK: @llvm.x86.avx512.rsqrt28.ss - return _mm_maskz_rsqrt28_ss(m, a, b); -} - -__m128d test_mm_rsqrt28_round_sd(__m128d a, __m128d b) { - // CHECK-LABEL: @test_mm_rsqrt28_round_sd - // CHECK: @llvm.x86.avx512.rsqrt28.sd - return _mm_rsqrt28_round_sd(a, b, _MM_FROUND_NO_EXC); -} - -__m128d test_mm_mask_rsqrt28_round_sd(__m128d s, __mmask8 m, __m128d a, __m128d b) { - // CHECK-LABEL: @test_mm_mask_rsqrt28_round_sd - // CHECK: @llvm.x86.avx512.rsqrt28.sd - return _mm_mask_rsqrt28_round_sd(s, m, a, b, _MM_FROUND_NO_EXC); -} - -__m128d test_mm_maskz_rsqrt28_round_sd(__mmask8 m, __m128d a, __m128d b) { - // CHECK-LABEL: @test_mm_maskz_rsqrt28_round_sd - // CHECK: @llvm.x86.avx512.rsqrt28.sd - return _mm_maskz_rsqrt28_round_sd(m, a, b, _MM_FROUND_NO_EXC); -} - -__m512d test_mm512_rcp28_round_pd(__m512d a) { - // CHECK-LABEL: @test_mm512_rcp28_round_pd - // CHECK: @llvm.x86.avx512.rcp28.pd - return _mm512_rcp28_round_pd(a, _MM_FROUND_NO_EXC); -} - -__m512d test_mm512_mask_rcp28_round_pd(__m512d s, __mmask8 m, __m512d a) { - // CHECK-LABEL: @test_mm512_mask_rcp28_round_pd - // CHECK: @llvm.x86.avx512.rcp28.pd - return _mm512_mask_rcp28_round_pd(s, m, a, _MM_FROUND_NO_EXC); -} - -__m512d test_mm512_maskz_rcp28_round_pd(__mmask8 m, __m512d a) { - // CHECK-LABEL: @test_mm512_maskz_rcp28_round_pd - // CHECK: @llvm.x86.avx512.rcp28.pd - return _mm512_maskz_rcp28_round_pd(m, a, _MM_FROUND_NO_EXC); -} - -__m512d test_mm512_rcp28_pd(__m512d a) { - // CHECK-LABEL: @test_mm512_rcp28_pd - // CHECK: @llvm.x86.avx512.rcp28.pd - return _mm512_rcp28_pd(a); -} - -__m512d test_mm512_mask_rcp28_pd(__m512d s, __mmask8 m, __m512d a) { - // CHECK-LABEL: @test_mm512_mask_rcp28_pd - // CHECK: @llvm.x86.avx512.rcp28.pd - return _mm512_mask_rcp28_pd(s, m, a); -} - -__m512d test_mm512_maskz_rcp28_pd(__mmask8 m, __m512d a) { - // CHECK-LABEL: @test_mm512_maskz_rcp28_pd - // CHECK: @llvm.x86.avx512.rcp28.pd - return _mm512_maskz_rcp28_pd(m, a); -} - -__m512 test_mm512_rcp28_round_ps(__m512 a) { - // CHECK-LABEL: @test_mm512_rcp28_round_ps - // CHECK: @llvm.x86.avx512.rcp28.ps - return _mm512_rcp28_round_ps(a, _MM_FROUND_NO_EXC); -} - -__m512 test_mm512_mask_rcp28_round_ps(__m512 s, __mmask16 m, __m512 a) { - // CHECK-LABEL: @test_mm512_mask_rcp28_round_ps - // CHECK: @llvm.x86.avx512.rcp28.ps - return _mm512_mask_rcp28_round_ps(s, m, a, _MM_FROUND_NO_EXC); -} - -__m512 test_mm512_maskz_rcp28_round_ps(__mmask16 m, __m512 a) { - // CHECK-LABEL: @test_mm512_maskz_rcp28_round_ps - // CHECK: @llvm.x86.avx512.rcp28.ps - return _mm512_maskz_rcp28_round_ps(m, a, _MM_FROUND_NO_EXC); -} - -__m512 test_mm512_rcp28_ps(__m512 a) { - // CHECK-LABEL: @test_mm512_rcp28_ps - // CHECK: @llvm.x86.avx512.rcp28.ps - return _mm512_rcp28_ps(a); -} - -__m512 test_mm512_mask_rcp28_ps(__m512 s, __mmask16 m, __m512 a) { - // CHECK-LABEL: @test_mm512_mask_rcp28_ps - // CHECK: @llvm.x86.avx512.rcp28.ps - return _mm512_mask_rcp28_ps(s, m, a); -} - -__m512 test_mm512_maskz_rcp28_ps(__mmask16 m, __m512 a) { - // CHECK-LABEL: @test_mm512_maskz_rcp28_ps - // CHECK: @llvm.x86.avx512.rcp28.ps - return _mm512_maskz_rcp28_ps(m, a); -} - -__m128 test_mm_rcp28_round_ss(__m128 a, __m128 b) { - // CHECK-LABEL: @test_mm_rcp28_round_ss - // CHECK: @llvm.x86.avx512.rcp28.ss - return _mm_rcp28_round_ss(a, b, _MM_FROUND_NO_EXC); -} - -__m128 test_mm_mask_rcp28_round_ss(__m128 s, __mmask16 m, __m128 a, __m128 b) { - // CHECK-LABEL: @test_mm_mask_rcp28_round_ss - // CHECK: @llvm.x86.avx512.rcp28.ss - return _mm_mask_rcp28_round_ss(s, m, a, b, _MM_FROUND_NO_EXC); -} - -__m128 test_mm_maskz_rcp28_round_ss(__mmask16 m, __m128 a, __m128 b) { - // CHECK-LABEL: @test_mm_maskz_rcp28_round_ss - // CHECK: @llvm.x86.avx512.rcp28.ss - return _mm_maskz_rcp28_round_ss(m, a, b, _MM_FROUND_NO_EXC); -} - -__m128 test_mm_rcp28_ss(__m128 a, __m128 b) { - // CHECK-LABEL: @test_mm_rcp28_ss - // CHECK: @llvm.x86.avx512.rcp28.ss - return _mm_rcp28_ss(a, b); -} - -__m128 test_mm_mask_rcp28_ss(__m128 s, __mmask16 m, __m128 a, __m128 b) { - // CHECK-LABEL: @test_mm_mask_rcp28_ss - // CHECK: @llvm.x86.avx512.rcp28.ss - return _mm_mask_rcp28_ss(s, m, a, b); -} - -__m128 test_mm_maskz_rcp28_ss(__mmask16 m, __m128 a, __m128 b) { - // CHECK-LABEL: @test_mm_maskz_rcp28_ss - // CHECK: @llvm.x86.avx512.rcp28.ss - return _mm_maskz_rcp28_ss(m, a, b); -} - -__m128d test_mm_rcp28_round_sd(__m128d a, __m128d b) { - // CHECK-LABEL: @test_mm_rcp28_round_sd - // CHECK: @llvm.x86.avx512.rcp28.sd - return _mm_rcp28_round_sd(a, b, _MM_FROUND_NO_EXC); -} - -__m128d test_mm_mask_rcp28_round_sd(__m128d s, __mmask8 m, __m128d a, __m128d b) { - // CHECK-LABEL: @test_mm_mask_rcp28_round_sd - // CHECK: @llvm.x86.avx512.rcp28.sd - return _mm_mask_rcp28_round_sd(s, m, a, b, _MM_FROUND_NO_EXC); -} - -__m128d test_mm_maskz_rcp28_round_sd(__mmask8 m, __m128d a, __m128d b) { - // CHECK-LABEL: @test_mm_maskz_rcp28_round_sd - // CHECK: @llvm.x86.avx512.rcp28.sd - return _mm_maskz_rcp28_round_sd(m, a, b, _MM_FROUND_NO_EXC); -} - -__m128d test_mm_rcp28_sd(__m128d a, __m128d b) { - // CHECK-LABEL: @test_mm_rcp28_sd - // CHECK: @llvm.x86.avx512.rcp28.sd - return _mm_rcp28_sd(a, b); -} - -__m128d test_mm_mask_rcp28_sd(__m128d s, __mmask8 m, __m128d a, __m128d b) { - // CHECK-LABEL: @test_mm_mask_rcp28_sd - // CHECK: @llvm.x86.avx512.rcp28.sd - return _mm_mask_rcp28_sd(s, m, a, b); -} - -__m128d test_mm_maskz_rcp28_sd(__mmask8 m, __m128d a, __m128d b) { - // CHECK-LABEL: @test_mm_maskz_rcp28_sd - // CHECK: @llvm.x86.avx512.rcp28.sd - return _mm_maskz_rcp28_sd(m, a, b); -} - -__m512d test_mm512_exp2a23_round_pd(__m512d a) { - // CHECK-LABEL: @test_mm512_exp2a23_round_pd - // CHECK: @llvm.x86.avx512.exp2.pd - return _mm512_exp2a23_round_pd(a, _MM_FROUND_NO_EXC); -} - -__m512d test_mm512_mask_exp2a23_round_pd(__m512d s, __mmask8 m, __m512d a) { - // CHECK-LABEL: @test_mm512_mask_exp2a23_round_pd - // CHECK: @llvm.x86.avx512.exp2.pd - return _mm512_mask_exp2a23_round_pd(s, m, a, _MM_FROUND_NO_EXC); -} - -__m512d test_mm512_maskz_exp2a23_round_pd(__mmask8 m, __m512d a) { - // CHECK-LABEL: @test_mm512_maskz_exp2a23_round_pd - // CHECK: @llvm.x86.avx512.exp2.pd - return _mm512_maskz_exp2a23_round_pd(m, a, _MM_FROUND_NO_EXC); -} - -__m512d test_mm512_exp2a23_pd(__m512d a) { - // CHECK-LABEL: @test_mm512_exp2a23_pd - // CHECK: @llvm.x86.avx512.exp2.pd - return _mm512_exp2a23_pd(a); -} - -__m512d test_mm512_mask_exp2a23_pd(__m512d s, __mmask8 m, __m512d a) { - // CHECK-LABEL: @test_mm512_mask_exp2a23_pd - // CHECK: @llvm.x86.avx512.exp2.pd - return _mm512_mask_exp2a23_pd(s, m, a); -} - -__m512d test_mm512_maskz_exp2a23_pd(__mmask8 m, __m512d a) { - // CHECK-LABEL: @test_mm512_maskz_exp2a23_pd - // CHECK: @llvm.x86.avx512.exp2.pd - return _mm512_maskz_exp2a23_pd(m, a); -} - -__m512 test_mm512_exp2a23_round_ps(__m512 a) { - // CHECK-LABEL: @test_mm512_exp2a23_round_ps - // CHECK: @llvm.x86.avx512.exp2.ps - return _mm512_exp2a23_round_ps(a, _MM_FROUND_NO_EXC); -} - -__m512 test_mm512_mask_exp2a23_round_ps(__m512 s, __mmask16 m, __m512 a) { - // CHECK-LABEL: @test_mm512_mask_exp2a23_round_ps - // CHECK: @llvm.x86.avx512.exp2.ps - return _mm512_mask_exp2a23_round_ps(s, m, a, _MM_FROUND_NO_EXC); -} - -__m512 test_mm512_maskz_exp2a23_round_ps(__mmask16 m, __m512 a) { - // CHECK-LABEL: @test_mm512_maskz_exp2a23_round_ps - // CHECK: @llvm.x86.avx512.exp2.ps - return _mm512_maskz_exp2a23_round_ps(m, a, _MM_FROUND_NO_EXC); -} - -__m512 test_mm512_exp2a23_ps(__m512 a) { - // CHECK-LABEL: @test_mm512_exp2a23_ps - // CHECK: @llvm.x86.avx512.exp2.ps - return _mm512_exp2a23_ps(a); -} - -__m512 test_mm512_mask_exp2a23_ps(__m512 s, __mmask16 m, __m512 a) { - // CHECK-LABEL: @test_mm512_mask_exp2a23_ps - // CHECK: @llvm.x86.avx512.exp2.ps - return _mm512_mask_exp2a23_ps(s, m, a); -} - -__m512 test_mm512_maskz_exp2a23_ps(__mmask16 m, __m512 a) { - // CHECK-LABEL: @test_mm512_maskz_exp2a23_ps - // CHECK: @llvm.x86.avx512.exp2.ps - return _mm512_maskz_exp2a23_ps(m, a); -} - diff --git a/clang/test/CodeGen/function-target-features.c b/clang/test/CodeGen/function-target-features.c index 0d8bfc7e4e44c..d6a73ff8224b6 100644 --- a/clang/test/CodeGen/function-target-features.c +++ b/clang/test/CodeGen/function-target-features.c @@ -4,7 +4,7 @@ // RUN: %clang_cc1 -triple x86_64-unknown-unknown -emit-llvm -o - %s -target-feature +avx | FileCheck %s -check-prefix=AVX-FEATURE // RUN: %clang_cc1 -triple x86_64-unknown-unknown -emit-llvm -o - %s -target-feature +avx | FileCheck %s -check-prefix=AVX-NO-CPU -// RUN: %clang_cc1 -triple x86_64-unknown-unknown -emit-llvm -o - %s -target-feature +avx512f -target-feature +avx512er | FileCheck %s -check-prefix=TWO-AVX +// RUN: %clang_cc1 -triple x86_64-unknown-unknown -emit-llvm -o - %s -target-feature +avx512f -target-feature +avx512bw | FileCheck %s -check-prefix=TWO-AVX // RUN: %clang_cc1 -triple x86_64-unknown-unknown -emit-llvm -o - %s -target-cpu corei7 | FileCheck %s -check-prefix=CORE-CPU // RUN: %clang_cc1 -triple x86_64-unknown-unknown -emit-llvm -o - %s -target-cpu corei7 -target-feature +avx | FileCheck %s -check-prefix=CORE-CPU-AND-FEATURES // RUN: %clang_cc1 -triple x86_64-unknown-unknown -emit-llvm -o - %s -target-cpu x86-64 | FileCheck %s -check-prefix=X86-64-CPU @@ -17,7 +17,7 @@ void foo(void) {} // AVX-FEATURE: "target-features"{{.*}}+avx // AVX-NO-CPU-NOT: target-cpu -// TWO-AVX: "target-features"={{.*}}+avx512er{{.*}}+avx512f +// TWO-AVX: "target-features"={{.*}}+avx512bw{{.*}}+avx512f // CORE-CPU: "target-cpu"="corei7" // CORE-CPU-AND-FEATURES: "target-cpu"="corei7" "target-features"={{.*}}+avx // X86-64-CPU: "target-cpu"="x86-64" diff --git a/clang/test/CodeGen/target-builtin-noerror.c b/clang/test/CodeGen/target-builtin-noerror.c index 505f4a3e94565..9beea3bdef69a 100644 --- a/clang/test/CodeGen/target-builtin-noerror.c +++ b/clang/test/CodeGen/target-builtin-noerror.c @@ -68,7 +68,6 @@ void verifyfeaturestrings(void) { (void)__builtin_cpu_supports("avx512bw"); (void)__builtin_cpu_supports("avx512dq"); (void)__builtin_cpu_supports("avx512cd"); - (void)__builtin_cpu_supports("avx512er"); (void)__builtin_cpu_supports("avx512pf"); (void)__builtin_cpu_supports("avx512vbmi"); (void)__builtin_cpu_supports("avx512ifma"); diff --git a/clang/test/Driver/x86-target-features.c b/clang/test/Driver/x86-target-features.c index 6fb9df96e2b33..942df9259e6a8 100644 --- a/clang/test/Driver/x86-target-features.c +++ b/clang/test/Driver/x86-target-features.c @@ -21,11 +21,6 @@ // SSE4-AES: "-target-feature" "+sse4.2" "-target-feature" "+aes" // NO-SSE4-AES: "-target-feature" "-sse4.1" "-target-feature" "-aes" -// RUN: %clang --target=i386 -march=i386 -mavx -mavx2 -mavx512f -mavx512cd -mavx512er -mavx512pf -mavx512dq -mavx512bw -mavx512vl -mavx512vbmi -mavx512vbmi2 -mavx512ifma %s -### 2>&1 | FileCheck -check-prefix=AVX %s -// RUN: %clang --target=i386 -march=i386 -mno-avx -mno-avx2 -mno-avx512f -mno-avx512cd -mno-avx512er -mno-avx512pf -mno-avx512dq -mno-avx512bw -mno-avx512vl -mno-avx512vbmi -mno-avx512vbmi2 -mno-avx512ifma %s -### 2>&1 | FileCheck -check-prefix=NO-AVX %s -// AVX: "-target-feature" "+avx" "-target-feature" "+avx2" "-target-feature" "+avx512f" "-target-feature" "+avx512cd" "-target-feature" "+avx512er" "-target-feature" "+avx512pf" "-target-feature" "+avx512dq" "-target-feature" "+avx512bw" "-target-feature" "+avx512vl" "-target-feature" "+avx512vbmi" "-target-feature" "+avx512vbmi2" "-target-feature" "+avx512ifma" -// NO-AVX: "-target-feature" "-avx" "-target-feature" "-avx2" "-target-feature" "-avx512f" "-target-feature" "-avx512cd" "-target-feature" "-avx512er" "-target-feature" "-avx512pf" "-target-feature" "-avx512dq" "-target-feature" "-avx512bw" "-target-feature" "-avx512vl" "-target-feature" "-avx512vbmi" "-target-feature" "-avx512vbmi2" "-target-feature" "-avx512ifma" - // RUN: %clang --target=i386 -march=i386 -mpclmul -mrdrnd -mfsgsbase -mbmi -mbmi2 %s -### 2>&1 | FileCheck -check-prefix=BMI %s // RUN: %clang --target=i386 -march=i386 -mno-pclmul -mno-rdrnd -mno-fsgsbase -mno-bmi -mno-bmi2 %s -### 2>&1 | FileCheck -check-prefix=NO-BMI %s // BMI: "-target-feature" "+pclmul" "-target-feature" "+rdrnd" "-target-feature" "+fsgsbase" "-target-feature" "+bmi" "-target-feature" "+bmi2" diff --git a/clang/test/Preprocessor/x86_target_features.c b/clang/test/Preprocessor/x86_target_features.c index 8ef565ccbc85c..acba2a0a9bda2 100644 --- a/clang/test/Preprocessor/x86_target_features.c +++ b/clang/test/Preprocessor/x86_target_features.c @@ -90,22 +90,6 @@ // AVX512CD: #define __SSE__ 1 // AVX512CD: #define __SSSE3__ 1 -// RUN: %clang -target i386-unknown-unknown -march=atom -mavx512er -x c -E -dM -o - %s | FileCheck -match-full-lines --check-prefix=AVX512ER %s - -// AVX512ER: #define __AVX2__ 1 -// AVX512ER: #define __AVX512ER__ 1 -// AVX512ER: #define __AVX512F__ 1 -// AVX512ER: #define __AVX__ 1 -// AVX512ER: #define __EVEX512__ 1 -// AVX512ER: #define __SSE2_MATH__ 1 -// AVX512ER: #define __SSE2__ 1 -// AVX512ER: #define __SSE3__ 1 -// AVX512ER: #define __SSE4_1__ 1 -// AVX512ER: #define __SSE4_2__ 1 -// AVX512ER: #define __SSE_MATH__ 1 -// AVX512ER: #define __SSE__ 1 -// AVX512ER: #define __SSSE3__ 1 - // RUN: %clang -target i386-unknown-unknown -march=atom -mavx512pf -x c -E -dM -o - %s | FileCheck -match-full-lines --check-prefix=AVX512PF %s // AVX512PF: #define __AVX2__ 1 @@ -638,14 +622,12 @@ // RUN: %clang -target i386-unknown-unknown -march=atom -mavx512f -mno-avx512f -x c -E -dM -o - %s | FileCheck -match-full-lines --check-prefix=NOEVEX512 %s // RUN: %clang -target i386-unknown-unknown -march=atom -mavx512cd -mno-avx512f -x c -E -dM -o - %s | FileCheck -match-full-lines --check-prefix=NOEVEX512 %s -// RUN: %clang -target i386-unknown-unknown -march=atom -mavx512er -mno-avx512f -x c -E -dM -o - %s | FileCheck -match-full-lines --check-prefix=NOEVEX512 %s // NOEVEX512-NOT: #define __AVX512F__ 1 // NOEVEX512-NOT: #define __EVEX256__ 1 // NOEVEX512-NOT: #define __EVEX512__ 1 // RUN: %clang -target i386-unknown-unknown -march=atom -mavx512f -mno-evex512 -x c -E -dM -o - %s | FileCheck -match-full-lines --check-prefix=AVX512NOEVEX512 %s // RUN: %clang -target i386-unknown-unknown -march=atom -mavx512cd -mno-evex512 -x c -E -dM -o - %s | FileCheck -match-full-lines --check-prefix=AVX512NOEVEX512 %s -// RUN: %clang -target i386-unknown-unknown -march=atom -mavx512er -mno-evex512 -x c -E -dM -o - %s | FileCheck -match-full-lines --check-prefix=AVX512NOEVEX512 %s // AVX512NOEVEX512: #define __AVX512F__ 1 // AVX512NOEVEX512-NOT: #define __EVEX256__ 1 // AVX512NOEVEX512-NOT: #define __EVEX512__ 1 diff --git a/llvm/include/llvm/IR/IntrinsicsX86.td b/llvm/include/llvm/IR/IntrinsicsX86.td index fdc2b0fb7f80f..0fda7e66c06a7 100644 --- a/llvm/include/llvm/IR/IntrinsicsX86.td +++ b/llvm/include/llvm/IR/IntrinsicsX86.td @@ -3843,58 +3843,6 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". DefaultAttrsIntrinsic<[llvm_v16f32_ty], [llvm_v16f32_ty, llvm_v16f32_ty, llvm_i16_ty], [IntrNoMem]>; - - def int_x86_avx512_rcp28_ps : ClangBuiltin<"__builtin_ia32_rcp28ps_mask">, - DefaultAttrsIntrinsic<[llvm_v16f32_ty], - [llvm_v16f32_ty, llvm_v16f32_ty, llvm_i16_ty, - llvm_i32_ty], - [IntrNoMem, ImmArg>]>; - def int_x86_avx512_rcp28_pd : ClangBuiltin<"__builtin_ia32_rcp28pd_mask">, - DefaultAttrsIntrinsic<[llvm_v8f64_ty], - [llvm_v8f64_ty, llvm_v8f64_ty, llvm_i8_ty, - llvm_i32_ty], - [IntrNoMem, ImmArg>]>; - def int_x86_avx512_exp2_ps : ClangBuiltin<"__builtin_ia32_exp2ps_mask">, - DefaultAttrsIntrinsic<[llvm_v16f32_ty], - [llvm_v16f32_ty, llvm_v16f32_ty, llvm_i16_ty, - llvm_i32_ty], - [IntrNoMem, ImmArg>]>; - def int_x86_avx512_exp2_pd : ClangBuiltin<"__builtin_ia32_exp2pd_mask">, - DefaultAttrsIntrinsic<[llvm_v8f64_ty], - [llvm_v8f64_ty, llvm_v8f64_ty, llvm_i8_ty, - llvm_i32_ty], - [IntrNoMem, ImmArg>]>; - - def int_x86_avx512_rcp28_ss : ClangBuiltin<"__builtin_ia32_rcp28ss_round_mask">, - DefaultAttrsIntrinsic<[llvm_v4f32_ty], - [llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty, - llvm_i8_ty, llvm_i32_ty], - [IntrNoMem, ImmArg>]>; - def int_x86_avx512_rcp28_sd : ClangBuiltin<"__builtin_ia32_rcp28sd_round_mask">, - DefaultAttrsIntrinsic<[llvm_v2f64_ty], - [llvm_v2f64_ty, llvm_v2f64_ty, llvm_v2f64_ty, - llvm_i8_ty, llvm_i32_ty], - [IntrNoMem, ImmArg>]>; - def int_x86_avx512_rsqrt28_ps : ClangBuiltin<"__builtin_ia32_rsqrt28ps_mask">, - DefaultAttrsIntrinsic<[llvm_v16f32_ty], - [llvm_v16f32_ty, llvm_v16f32_ty, llvm_i16_ty, - llvm_i32_ty], - [IntrNoMem, ImmArg>]>; - def int_x86_avx512_rsqrt28_pd : ClangBuiltin<"__builtin_ia32_rsqrt28pd_mask">, - DefaultAttrsIntrinsic<[llvm_v8f64_ty], - [llvm_v8f64_ty, llvm_v8f64_ty, llvm_i8_ty, - llvm_i32_ty], - [IntrNoMem, ImmArg>]>; - def int_x86_avx512_rsqrt28_ss : ClangBuiltin<"__builtin_ia32_rsqrt28ss_round_mask">, - DefaultAttrsIntrinsic<[llvm_v4f32_ty], - [llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty, - llvm_i8_ty, llvm_i32_ty], - [IntrNoMem, ImmArg>]>; - def int_x86_avx512_rsqrt28_sd : ClangBuiltin<"__builtin_ia32_rsqrt28sd_round_mask">, - DefaultAttrsIntrinsic<[llvm_v2f64_ty], - [llvm_v2f64_ty, llvm_v2f64_ty, llvm_v2f64_ty, - llvm_i8_ty, llvm_i32_ty], - [IntrNoMem, ImmArg>]>; def int_x86_avx512_psad_bw_512 : ClangBuiltin<"__builtin_ia32_psadbw512">, DefaultAttrsIntrinsic<[llvm_v8i64_ty], [llvm_v64i8_ty, llvm_v64i8_ty], [IntrNoMem, Commutative]>; diff --git a/llvm/lib/Target/X86/X86.td b/llvm/lib/Target/X86/X86.td index d13dc9a271e59..682b32e29cff5 100644 --- a/llvm/lib/Target/X86/X86.td +++ b/llvm/lib/Target/X86/X86.td @@ -124,9 +124,6 @@ def FeatureEVEX512 : SubtargetFeature<"evex512", "HasEVEX512", "true", def FeatureAVX512 : SubtargetFeature<"avx512f", "X86SSELevel", "AVX512", "Enable AVX-512 instructions", [FeatureAVX2, FeatureFMA, FeatureF16C]>; -def FeatureERI : SubtargetFeature<"avx512er", "HasERI", "true", - "Enable AVX-512 Exponential and Reciprocal Instructions", - [FeatureAVX512]>; def FeatureCDI : SubtargetFeature<"avx512cd", "HasCDI", "true", "Enable AVX-512 Conflict Detection Instructions", [FeatureAVX512]>; diff --git a/llvm/lib/Target/X86/X86InstrAVX512.td b/llvm/lib/Target/X86/X86InstrAVX512.td index 7c3c1d5fe42b3..981cefc1b5563 100644 --- a/llvm/lib/Target/X86/X86InstrAVX512.td +++ b/llvm/lib/Target/X86/X86InstrAVX512.td @@ -9247,6 +9247,31 @@ multiclass avx512_fp28_s opc, string OpcodeStr,X86VectorVTInfo _, } } +multiclass avx512_fp28_s_ass opc, string OpcodeStr, X86VectorVTInfo _> { + let ExeDomain = _.ExeDomain, hasNoSchedulingInfo = 1 in { + defm r : AVX512_maskable_scalar, Sched<[WriteMove]>; + defm rb : AVX512_maskable_scalar, Sched<[WriteMove]>, EVEX_B; + defm m : AVX512_maskable_scalar, Sched<[WriteMove]>; + } +} + +multiclass avx512_eri_s_ass opc, string OpcodeStr> { + defm SSZ : avx512_fp28_s_ass, EVEX_CD8<32, CD8VT1>, VEX_LIG, T8PD, EVEX_4V; + defm SDZ : avx512_fp28_s_ass, EVEX_CD8<64, CD8VT1>, VEX_LIG, REX_W, T8PD, EVEX_4V; +} + +defm VRCP28 : avx512_eri_s_ass<0xCB, "vrcp28">; +defm VRSQRT28 : avx512_eri_s_ass<0xCD, "vrsqrt28">; + multiclass avx512_eri_s opc, string OpcodeStr, SDNode OpNode, SDNode OpNodeSAE, X86FoldableSchedWrite sched> { defm SSZ : avx512_fp28_s opc, string OpcodeStr, SDNode OpNode, EVEX_CD8<16, CD8VT1>, T_MAP6, PD, EVEX, VVVV; } -let Predicates = [HasERI] in { - defm VRCP28 : avx512_eri_s<0xCB, "vrcp28", X86rcp28s, X86rcp28SAEs, - SchedWriteFRcp.Scl>; - defm VRSQRT28 : avx512_eri_s<0xCD, "vrsqrt28", X86rsqrt28s, X86rsqrt28SAEs, - SchedWriteFRsqrt.Scl>; -} - defm VGETEXP : avx512_eri_s<0x43, "vgetexp", X86fgetexps, X86fgetexpSAEs, SchedWriteFRnd.Scl>, avx512_vgetexpsh<0x43, "vgetexp", X86fgetexps, X86fgetexpSAEs, @@ -9307,6 +9325,43 @@ multiclass avx512_fp28_p_sae opc, string OpcodeStr, X86VectorVTInfo _, EVEX_B, Sched<[sched]>; } +multiclass avx512_fp28_p_ass opc, string OpcodeStr, X86VectorVTInfo _> { + let ExeDomain = _.ExeDomain, hasNoSchedulingInfo = 1 in { + defm r : AVX512_maskable, Sched<[WriteMove]>; + + defm m : AVX512_maskable, Sched<[WriteMove]>; + + defm mb : AVX512_maskable, Sched<[WriteMove]>, EVEX_B; + } +} +multiclass avx512_fp28_p_sae_ass opc, string OpcodeStr, X86VectorVTInfo _> { + let ExeDomain = _.ExeDomain, Uses = [MXCSR] in + defm rb : AVX512_maskable, Sched<[WriteMove]>, EVEX_B; +} + +multiclass avx512_eri_ass opc, string OpcodeStr> { + defm PSZ : avx512_fp28_p_ass, + avx512_fp28_p_sae_ass, + T8PD, EVEX_V512, EVEX_CD8<32, CD8VF>; + defm PDZ : avx512_fp28_p_ass, + avx512_fp28_p_sae_ass, + T8PD, EVEX_V512, REX_W, EVEX_CD8<64, CD8VF>; +} + +defm VRSQRT28 : avx512_eri_ass<0xCC, "vrsqrt28">, EVEX; +defm VRCP28 : avx512_eri_ass<0xCA, "vrcp28">, EVEX; +defm VEXP2 : avx512_eri_ass<0xC8, "vexp2">, EVEX; + multiclass avx512_eri opc, string OpcodeStr, SDNode OpNode, SDNode OpNodeSAE, X86SchedWriteWidths sched> { defm PSZ : avx512_fp28_p, @@ -9349,14 +9404,7 @@ multiclass avx512_vgetexp_fp16 opc, string OpcodeStr, SDNode OpNode, EVEX_V256, T_MAP6, PD, EVEX_CD8<16, CD8VF>; } } -let Predicates = [HasERI] in { - defm VRSQRT28 : avx512_eri<0xCC, "vrsqrt28", X86rsqrt28, X86rsqrt28SAE, - SchedWriteFRsqrt>, EVEX; - defm VRCP28 : avx512_eri<0xCA, "vrcp28", X86rcp28, X86rcp28SAE, - SchedWriteFRcp>, EVEX; - defm VEXP2 : avx512_eri<0xC8, "vexp2", X86exp2, X86exp2SAE, - SchedWriteFAdd>, EVEX; -} + defm VGETEXP : avx512_eri<0x42, "vgetexp", X86fgetexp, X86fgetexpSAE, SchedWriteFRnd>, avx512_vgetexp_fp16<0x42, "vgetexp", X86fgetexp, X86fgetexpSAE, diff --git a/llvm/lib/Target/X86/X86InstrPredicates.td b/llvm/lib/Target/X86/X86InstrPredicates.td index df4bc38aa0b56..6b89d2834a1da 100644 --- a/llvm/lib/Target/X86/X86InstrPredicates.td +++ b/llvm/lib/Target/X86/X86InstrPredicates.td @@ -44,7 +44,6 @@ def NoAVX512 : Predicate<"!Subtarget->hasAVX512()">; def HasCDI : Predicate<"Subtarget->hasCDI()">; def HasVPOPCNTDQ : Predicate<"Subtarget->hasVPOPCNTDQ()">; def HasPFI : Predicate<"Subtarget->hasPFI()">; -def HasERI : Predicate<"Subtarget->hasERI()">; def HasDQI : Predicate<"Subtarget->hasDQI()">; def NoDQI : Predicate<"!Subtarget->hasDQI()">; def HasBWI : Predicate<"Subtarget->hasBWI()">; diff --git a/llvm/lib/Target/X86/X86IntrinsicsInfo.h b/llvm/lib/Target/X86/X86IntrinsicsInfo.h index 3bb2f07b5f1a1..9a0a4e8657035 100644 --- a/llvm/lib/Target/X86/X86IntrinsicsInfo.h +++ b/llvm/lib/Target/X86/X86IntrinsicsInfo.h @@ -454,8 +454,6 @@ static const IntrinsicData IntrinsicsWithoutChain[] = { X86_INTRINSIC_DATA(avx512_dbpsadbw_512, INTR_TYPE_3OP_IMM8, X86ISD::DBPSADBW, 0), X86_INTRINSIC_DATA(avx512_div_pd_512, INTR_TYPE_2OP, ISD::FDIV, X86ISD::FDIV_RND), X86_INTRINSIC_DATA(avx512_div_ps_512, INTR_TYPE_2OP, ISD::FDIV, X86ISD::FDIV_RND), - X86_INTRINSIC_DATA(avx512_exp2_pd, INTR_TYPE_1OP_MASK_SAE, X86ISD::EXP2, X86ISD::EXP2_SAE), - X86_INTRINSIC_DATA(avx512_exp2_ps, INTR_TYPE_1OP_MASK_SAE, X86ISD::EXP2, X86ISD::EXP2_SAE), X86_INTRINSIC_DATA(avx512_fpclass_pd_128, INTR_TYPE_2OP, X86ISD::VFPCLASS, 0), X86_INTRINSIC_DATA(avx512_fpclass_pd_256, INTR_TYPE_2OP, X86ISD::VFPCLASS, 0), X86_INTRINSIC_DATA(avx512_fpclass_pd_512, INTR_TYPE_2OP, X86ISD::VFPCLASS, 0), @@ -908,10 +906,6 @@ static const IntrinsicData IntrinsicsWithoutChain[] = { X86_INTRINSIC_DATA(avx512_rcp14_ps_512, INTR_TYPE_1OP_MASK, X86ISD::RCP14, 0), X86_INTRINSIC_DATA(avx512_rcp14_sd, INTR_TYPE_SCALAR_MASK, X86ISD::RCP14S, 0), X86_INTRINSIC_DATA(avx512_rcp14_ss, INTR_TYPE_SCALAR_MASK, X86ISD::RCP14S, 0), - X86_INTRINSIC_DATA(avx512_rcp28_pd, INTR_TYPE_1OP_MASK_SAE, X86ISD::RCP28, X86ISD::RCP28_SAE), - X86_INTRINSIC_DATA(avx512_rcp28_ps, INTR_TYPE_1OP_MASK_SAE, X86ISD::RCP28, X86ISD::RCP28_SAE), - X86_INTRINSIC_DATA(avx512_rcp28_sd, INTR_TYPE_SCALAR_MASK_SAE, X86ISD::RCP28S, X86ISD::RCP28S_SAE), - X86_INTRINSIC_DATA(avx512_rcp28_ss, INTR_TYPE_SCALAR_MASK_SAE, X86ISD::RCP28S, X86ISD::RCP28S_SAE), X86_INTRINSIC_DATA(avx512_rsqrt14_pd_128, INTR_TYPE_1OP_MASK, X86ISD::RSQRT14, 0), X86_INTRINSIC_DATA(avx512_rsqrt14_pd_256, INTR_TYPE_1OP_MASK, X86ISD::RSQRT14, 0), X86_INTRINSIC_DATA(avx512_rsqrt14_pd_512, INTR_TYPE_1OP_MASK, X86ISD::RSQRT14, 0), @@ -920,10 +914,6 @@ static const IntrinsicData IntrinsicsWithoutChain[] = { X86_INTRINSIC_DATA(avx512_rsqrt14_ps_512, INTR_TYPE_1OP_MASK, X86ISD::RSQRT14, 0), X86_INTRINSIC_DATA(avx512_rsqrt14_sd, INTR_TYPE_SCALAR_MASK, X86ISD::RSQRT14S, 0), X86_INTRINSIC_DATA(avx512_rsqrt14_ss, INTR_TYPE_SCALAR_MASK, X86ISD::RSQRT14S, 0), - X86_INTRINSIC_DATA(avx512_rsqrt28_pd, INTR_TYPE_1OP_MASK_SAE,X86ISD::RSQRT28, X86ISD::RSQRT28_SAE), - X86_INTRINSIC_DATA(avx512_rsqrt28_ps, INTR_TYPE_1OP_MASK_SAE,X86ISD::RSQRT28, X86ISD::RSQRT28_SAE), - X86_INTRINSIC_DATA(avx512_rsqrt28_sd, INTR_TYPE_SCALAR_MASK_SAE,X86ISD::RSQRT28S, X86ISD::RSQRT28S_SAE), - X86_INTRINSIC_DATA(avx512_rsqrt28_ss, INTR_TYPE_SCALAR_MASK_SAE,X86ISD::RSQRT28S, X86ISD::RSQRT28S_SAE), X86_INTRINSIC_DATA(avx512_sitofp_round, INTR_TYPE_1OP, ISD::SINT_TO_FP, X86ISD::SINT_TO_FP_RND), X86_INTRINSIC_DATA(avx512_sqrt_pd_512, INTR_TYPE_1OP, ISD::FSQRT, X86ISD::FSQRT_RND), X86_INTRINSIC_DATA(avx512_sqrt_ps_512, INTR_TYPE_1OP, ISD::FSQRT, X86ISD::FSQRT_RND), diff --git a/llvm/lib/TargetParser/Host.cpp b/llvm/lib/TargetParser/Host.cpp index 11c5000acc077..cae3ba1dffe48 100644 --- a/llvm/lib/TargetParser/Host.cpp +++ b/llvm/lib/TargetParser/Host.cpp @@ -975,8 +975,6 @@ getIntelProcessorTypeAndSubtype(unsigned Family, unsigned Model, CPU = "cascadelake"; } else if (testFeature(X86::FEATURE_AVX512VL)) { CPU = "skylake-avx512"; - } else if (testFeature(X86::FEATURE_AVX512ER)) { - CPU = "knl"; } else if (testFeature(X86::FEATURE_CLFLUSHOPT)) { if (testFeature(X86::FEATURE_SHA)) CPU = "goldmont"; @@ -1270,8 +1268,6 @@ static void getAvailableFeatures(unsigned ECX, unsigned EDX, unsigned MaxLeaf, setFeature(X86::FEATURE_CLFLUSHOPT); if (HasLeaf7 && ((EBX >> 26) & 1) && HasAVX512Save) setFeature(X86::FEATURE_AVX512PF); - if (HasLeaf7 && ((EBX >> 27) & 1) && HasAVX512Save) - setFeature(X86::FEATURE_AVX512ER); if (HasLeaf7 && ((EBX >> 28) & 1) && HasAVX512Save) setFeature(X86::FEATURE_AVX512CD); if (HasLeaf7 && ((EBX >> 29) & 1)) @@ -1770,7 +1766,6 @@ bool sys::getHostCPUFeatures(StringMap &Features) { Features["clflushopt"] = HasLeaf7 && ((EBX >> 23) & 1); Features["clwb"] = HasLeaf7 && ((EBX >> 24) & 1); Features["avx512pf"] = HasLeaf7 && ((EBX >> 26) & 1) && HasAVX512Save; - Features["avx512er"] = HasLeaf7 && ((EBX >> 27) & 1) && HasAVX512Save; Features["avx512cd"] = HasLeaf7 && ((EBX >> 28) & 1) && HasAVX512Save; Features["sha"] = HasLeaf7 && ((EBX >> 29) & 1); Features["avx512bw"] = HasLeaf7 && ((EBX >> 30) & 1) && HasAVX512Save; diff --git a/llvm/test/CodeGen/X86/avx512er-intrinsics.ll b/llvm/test/CodeGen/X86/avx512er-intrinsics.ll deleted file mode 100644 index fa4025f76b57d..0000000000000 --- a/llvm/test/CodeGen/X86/avx512er-intrinsics.ll +++ /dev/null @@ -1,306 +0,0 @@ -; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx512er --show-mc-encoding | FileCheck %s --check-prefixes=CHECK,X86 -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512er --show-mc-encoding | FileCheck %s --check-prefixes=CHECK,X64 - -define <16 x float> @test_rsqrt28_ps(<16 x float> %a0) { -; CHECK-LABEL: test_rsqrt28_ps: -; CHECK: # %bb.0: -; CHECK-NEXT: vrsqrt28ps {sae}, %zmm0, %zmm0 # encoding: [0x62,0xf2,0x7d,0x18,0xcc,0xc0] -; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] - %res = call <16 x float> @llvm.x86.avx512.rsqrt28.ps(<16 x float> %a0, <16 x float> zeroinitializer, i16 -1, i32 8) - ret <16 x float> %res -} - -define <16 x float> @test1_rsqrt28_ps(<16 x float> %a0, <16 x float> %a1) { -; CHECK-LABEL: test1_rsqrt28_ps: -; CHECK: # %bb.0: -; CHECK-NEXT: movw $6, %ax # encoding: [0x66,0xb8,0x06,0x00] -; CHECK-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] -; CHECK-NEXT: vrsqrt28ps {sae}, %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x19,0xcc,0xc8] -; CHECK-NEXT: vmovaps %zmm1, %zmm0 # encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc1] -; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] - %res = call <16 x float> @llvm.x86.avx512.rsqrt28.ps(<16 x float> %a0, <16 x float> %a1, i16 6, i32 8) - ret <16 x float> %res -} - -define <16 x float> @test2_rsqrt28_ps(<16 x float> %a0) { -; CHECK-LABEL: test2_rsqrt28_ps: -; CHECK: # %bb.0: -; CHECK-NEXT: movw $6, %ax # encoding: [0x66,0xb8,0x06,0x00] -; CHECK-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] -; CHECK-NEXT: vrsqrt28ps %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xc9,0xcc,0xc0] -; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] - %res = call <16 x float> @llvm.x86.avx512.rsqrt28.ps(<16 x float> %a0, <16 x float> undef, i16 6, i32 4) - ret <16 x float> %res -} - -define <16 x float> @test3_rsqrt28_ps(<16 x float> %a0) { -; CHECK-LABEL: test3_rsqrt28_ps: -; CHECK: # %bb.0: -; CHECK-NEXT: movw $6, %ax # encoding: [0x66,0xb8,0x06,0x00] -; CHECK-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] -; CHECK-NEXT: vrsqrt28ps %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xc9,0xcc,0xc0] -; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] - %res = call <16 x float> @llvm.x86.avx512.rsqrt28.ps(<16 x float> %a0, <16 x float> zeroinitializer, i16 6, i32 4) - ret <16 x float> %res -} - -define <16 x float> @test4_rsqrt28_ps(<16 x float> %a0) { -; CHECK-LABEL: test4_rsqrt28_ps: -; CHECK: # %bb.0: -; CHECK-NEXT: movw $6, %ax # encoding: [0x66,0xb8,0x06,0x00] -; CHECK-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] -; CHECK-NEXT: vrsqrt28ps {sae}, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x99,0xcc,0xc0] -; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] - %res = call <16 x float> @llvm.x86.avx512.rsqrt28.ps(<16 x float> %a0, <16 x float> undef, i16 6, i32 8) - ret <16 x float> %res -} - -declare <16 x float> @llvm.x86.avx512.rsqrt28.ps(<16 x float>, <16 x float>, i16, i32) nounwind readnone - -define <16 x float> @test_rcp28_ps_512(<16 x float> %a0) { -; CHECK-LABEL: test_rcp28_ps_512: -; CHECK: # %bb.0: -; CHECK-NEXT: vrcp28ps {sae}, %zmm0, %zmm0 # encoding: [0x62,0xf2,0x7d,0x18,0xca,0xc0] -; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] - %res = call <16 x float> @llvm.x86.avx512.rcp28.ps(<16 x float> %a0, <16 x float> zeroinitializer, i16 -1, i32 8) - ret <16 x float> %res -} -declare <16 x float> @llvm.x86.avx512.rcp28.ps(<16 x float>, <16 x float>, i16, i32) nounwind readnone - -define <8 x double> @test_rcp28_pd_512(<8 x double> %a0) { -; CHECK-LABEL: test_rcp28_pd_512: -; CHECK: # %bb.0: -; CHECK-NEXT: vrcp28pd {sae}, %zmm0, %zmm0 # encoding: [0x62,0xf2,0xfd,0x18,0xca,0xc0] -; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] - %res = call <8 x double> @llvm.x86.avx512.rcp28.pd(<8 x double> %a0, <8 x double> zeroinitializer, i8 -1, i32 8) - ret <8 x double> %res -} -declare <8 x double> @llvm.x86.avx512.rcp28.pd(<8 x double>, <8 x double>, i8, i32) nounwind readnone - -define <16 x float> @test_exp2_ps_512(<16 x float> %a0) { -; CHECK-LABEL: test_exp2_ps_512: -; CHECK: # %bb.0: -; CHECK-NEXT: vexp2ps {sae}, %zmm0, %zmm0 # encoding: [0x62,0xf2,0x7d,0x18,0xc8,0xc0] -; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] - %res = call <16 x float> @llvm.x86.avx512.exp2.ps(<16 x float> %a0, <16 x float> zeroinitializer, i16 -1, i32 8) - ret <16 x float> %res -} -declare <16 x float> @llvm.x86.avx512.exp2.ps(<16 x float>, <16 x float>, i16, i32) nounwind readnone - -define <8 x double> @test_exp2_pd_512(<8 x double> %a0) { -; CHECK-LABEL: test_exp2_pd_512: -; CHECK: # %bb.0: -; CHECK-NEXT: vexp2pd {sae}, %zmm0, %zmm0 # encoding: [0x62,0xf2,0xfd,0x18,0xc8,0xc0] -; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] - %res = call <8 x double> @llvm.x86.avx512.exp2.pd(<8 x double> %a0, <8 x double> zeroinitializer, i8 -1, i32 8) - ret <8 x double> %res -} -declare <8 x double> @llvm.x86.avx512.exp2.pd(<8 x double>, <8 x double>, i8, i32) nounwind readnone - -define <4 x float> @test_rsqrt28_ss(<4 x float> %a0) { -; CHECK-LABEL: test_rsqrt28_ss: -; CHECK: # %bb.0: -; CHECK-NEXT: vrsqrt28ss {sae}, %xmm0, %xmm0, %xmm0 # encoding: [0x62,0xf2,0x7d,0x18,0xcd,0xc0] -; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] - %res = call <4 x float> @llvm.x86.avx512.rsqrt28.ss(<4 x float> %a0, <4 x float> %a0, <4 x float> zeroinitializer, i8 -1, i32 8) ; <<4 x float>> [#uses=1] - ret <4 x float> %res -} -declare <4 x float> @llvm.x86.avx512.rsqrt28.ss(<4 x float>, <4 x float>, <4 x float>, i8, i32) nounwind readnone - -define <4 x float> @test_rcp28_ss(<4 x float> %a0) { -; CHECK-LABEL: test_rcp28_ss: -; CHECK: # %bb.0: -; CHECK-NEXT: vrcp28ss {sae}, %xmm0, %xmm0, %xmm0 # encoding: [0x62,0xf2,0x7d,0x18,0xcb,0xc0] -; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] - %res = call <4 x float> @llvm.x86.avx512.rcp28.ss(<4 x float> %a0, <4 x float> %a0, <4 x float> zeroinitializer, i8 -1, i32 8) ; <<4 x float>> [#uses=1] - ret <4 x float> %res -} -declare <4 x float> @llvm.x86.avx512.rcp28.ss(<4 x float>, <4 x float>, <4 x float>, i8, i32) nounwind readnone - -define <4 x float> @test_rcp28_ss_load(<4 x float> %a0, ptr %a1ptr) { -; X86-LABEL: test_rcp28_ss_load: -; X86: # %bb.0: -; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] -; X86-NEXT: vrcp28ss (%eax), %xmm0, %xmm0 # encoding: [0x62,0xf2,0x7d,0x08,0xcb,0x00] -; X86-NEXT: retl # encoding: [0xc3] -; -; X64-LABEL: test_rcp28_ss_load: -; X64: # %bb.0: -; X64-NEXT: vrcp28ss (%rdi), %xmm0, %xmm0 # encoding: [0x62,0xf2,0x7d,0x08,0xcb,0x07] -; X64-NEXT: retq # encoding: [0xc3] - %a1 = load <4 x float>, ptr %a1ptr - %res = call <4 x float> @llvm.x86.avx512.rcp28.ss(<4 x float> %a0, <4 x float> %a1, <4 x float> undef, i8 -1, i32 4) ; <<4 x float>> [#uses=1] - ret <4 x float> %res -} - -define <4 x float> @test_rsqrt28_ss_load(<4 x float> %a0, ptr %a1ptr) { -; X86-LABEL: test_rsqrt28_ss_load: -; X86: # %bb.0: -; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] -; X86-NEXT: vrsqrt28ss (%eax), %xmm0, %xmm0 # encoding: [0x62,0xf2,0x7d,0x08,0xcd,0x00] -; X86-NEXT: retl # encoding: [0xc3] -; -; X64-LABEL: test_rsqrt28_ss_load: -; X64: # %bb.0: -; X64-NEXT: vrsqrt28ss (%rdi), %xmm0, %xmm0 # encoding: [0x62,0xf2,0x7d,0x08,0xcd,0x07] -; X64-NEXT: retq # encoding: [0xc3] - %a1 = load <4 x float>, ptr %a1ptr - %res = call <4 x float> @llvm.x86.avx512.rsqrt28.ss(<4 x float> %a0, <4 x float> %a1, <4 x float> undef, i8 -1, i32 4) ; <<4 x float>> [#uses=1] - ret <4 x float> %res -} - -define <4 x float> @test_rsqrt28_ss_maskz(<4 x float> %a0, i8 %mask) { -; X86-LABEL: test_rsqrt28_ss_maskz: -; X86: # %bb.0: -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] -; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] -; X86-NEXT: vrsqrt28ss {sae}, %xmm0, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x99,0xcd,0xc0] -; X86-NEXT: retl # encoding: [0xc3] -; -; X64-LABEL: test_rsqrt28_ss_maskz: -; X64: # %bb.0: -; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] -; X64-NEXT: vrsqrt28ss {sae}, %xmm0, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x99,0xcd,0xc0] -; X64-NEXT: retq # encoding: [0xc3] - %res = call <4 x float> @llvm.x86.avx512.rsqrt28.ss(<4 x float> %a0, <4 x float> %a0, <4 x float> zeroinitializer, i8 %mask, i32 8) ; - ret <4 x float> %res -} - -define <4 x float> @test_rsqrt28_ss_mask(<4 x float> %a0, <4 x float> %b0, <4 x float> %c0, i8 %mask) { -; X86-LABEL: test_rsqrt28_ss_mask: -; X86: # %bb.0: -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] -; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] -; X86-NEXT: vrsqrt28ss {sae}, %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x19,0xcd,0xd1] -; X86-NEXT: vmovaps %xmm2, %xmm0 # encoding: [0xc5,0xf8,0x28,0xc2] -; X86-NEXT: retl # encoding: [0xc3] -; -; X64-LABEL: test_rsqrt28_ss_mask: -; X64: # %bb.0: -; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] -; X64-NEXT: vrsqrt28ss {sae}, %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x19,0xcd,0xd1] -; X64-NEXT: vmovaps %xmm2, %xmm0 # encoding: [0xc5,0xf8,0x28,0xc2] -; X64-NEXT: retq # encoding: [0xc3] - %res = call <4 x float> @llvm.x86.avx512.rsqrt28.ss(<4 x float> %a0, <4 x float> %b0, <4 x float> %c0, i8 %mask, i32 8) ; - ret <4 x float> %res -} - -define <2 x double> @test_rcp28_sd_mask_load(<2 x double> %a0, ptr %a1ptr, <2 x double> %a2, i8 %mask) { -; X86-LABEL: test_rcp28_sd_mask_load: -; X86: # %bb.0: -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x08] -; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] -; X86-NEXT: vrcp28sd %xmm0, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0xcb,0xc8] -; X86-NEXT: vmovapd %xmm1, %xmm0 # encoding: [0xc5,0xf9,0x28,0xc1] -; X86-NEXT: retl # encoding: [0xc3] -; -; X64-LABEL: test_rcp28_sd_mask_load: -; X64: # %bb.0: -; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] -; X64-NEXT: vrcp28sd %xmm0, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0xcb,0xc8] -; X64-NEXT: vmovapd %xmm1, %xmm0 # encoding: [0xc5,0xf9,0x28,0xc1] -; X64-NEXT: retq # encoding: [0xc3] - %a1 = load <2 x double>, ptr %a1ptr - %res = call <2 x double> @llvm.x86.avx512.rcp28.sd(<2 x double> %a0, <2 x double> %a0, <2 x double> %a2, i8 %mask, i32 4) ; - ret <2 x double> %res -} -declare <2 x double> @llvm.x86.avx512.rcp28.sd(<2 x double>, <2 x double>, <2 x double>, i8, i32) nounwind readnone - -define <2 x double> @test_rsqrt28_sd_maskz_load(<2 x double> %a0, ptr %a1ptr, i8 %mask) { -; X86-LABEL: test_rsqrt28_sd_maskz_load: -; X86: # %bb.0: -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x08] -; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] -; X86-NEXT: vrsqrt28sd %xmm0, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0x89,0xcd,0xc0] -; X86-NEXT: retl # encoding: [0xc3] -; -; X64-LABEL: test_rsqrt28_sd_maskz_load: -; X64: # %bb.0: -; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] -; X64-NEXT: vrsqrt28sd %xmm0, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0x89,0xcd,0xc0] -; X64-NEXT: retq # encoding: [0xc3] - %a1 = load <2 x double>, ptr %a1ptr - %res = call <2 x double> @llvm.x86.avx512.rsqrt28.sd(<2 x double> %a0, <2 x double> %a0, <2 x double> zeroinitializer, i8 %mask, i32 4) ; - ret <2 x double> %res -} - -define <2 x double> @test_rsqrt28_sd_maskz(<2 x double> %a0, i8 %mask) { -; X86-LABEL: test_rsqrt28_sd_maskz: -; X86: # %bb.0: -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] -; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] -; X86-NEXT: vrsqrt28sd {sae}, %xmm0, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0x99,0xcd,0xc0] -; X86-NEXT: retl # encoding: [0xc3] -; -; X64-LABEL: test_rsqrt28_sd_maskz: -; X64: # %bb.0: -; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] -; X64-NEXT: vrsqrt28sd {sae}, %xmm0, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0x99,0xcd,0xc0] -; X64-NEXT: retq # encoding: [0xc3] - %res = call <2 x double> @llvm.x86.avx512.rsqrt28.sd(<2 x double> %a0, <2 x double> %a0, <2 x double> zeroinitializer, i8 %mask, i32 8) ; - ret <2 x double> %res -} - -define <2 x double> @test_rsqrt28_sd_mask(<2 x double> %a0, <2 x double> %b0, <2 x double> %c0, i8 %mask) { -; X86-LABEL: test_rsqrt28_sd_mask: -; X86: # %bb.0: -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] -; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] -; X86-NEXT: vrsqrt28sd {sae}, %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x19,0xcd,0xd1] -; X86-NEXT: vmovapd %xmm2, %xmm0 # encoding: [0xc5,0xf9,0x28,0xc2] -; X86-NEXT: retl # encoding: [0xc3] -; -; X64-LABEL: test_rsqrt28_sd_mask: -; X64: # %bb.0: -; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] -; X64-NEXT: vrsqrt28sd {sae}, %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x19,0xcd,0xd1] -; X64-NEXT: vmovapd %xmm2, %xmm0 # encoding: [0xc5,0xf9,0x28,0xc2] -; X64-NEXT: retq # encoding: [0xc3] - %res = call <2 x double> @llvm.x86.avx512.rsqrt28.sd(<2 x double> %a0, <2 x double> %b0, <2 x double> %c0, i8 %mask, i32 8) ; - ret <2 x double> %res -} - -declare <2 x double> @llvm.x86.avx512.rsqrt28.sd(<2 x double>, <2 x double>, <2 x double>, i8, i32) nounwind readnone - -define <2 x double> @test_rsqrt28_sd_maskz_mem(<2 x double> %a0, ptr %ptr, i8 %mask) { -; X86-LABEL: test_rsqrt28_sd_maskz_mem: -; X86: # %bb.0: -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x08] -; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] -; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] -; X86-NEXT: vrsqrt28sd (%eax), %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0x89,0xcd,0x00] -; X86-NEXT: retl # encoding: [0xc3] -; -; X64-LABEL: test_rsqrt28_sd_maskz_mem: -; X64: # %bb.0: -; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] -; X64-NEXT: vrsqrt28sd (%rdi), %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0x89,0xcd,0x07] -; X64-NEXT: retq # encoding: [0xc3] - %mem = load double , ptr %ptr, align 8 - %mem_v = insertelement <2 x double> undef, double %mem, i32 0 - %res = call <2 x double> @llvm.x86.avx512.rsqrt28.sd(<2 x double> %a0, <2 x double> %mem_v, <2 x double> zeroinitializer, i8 %mask, i32 4) ; - ret <2 x double> %res -} - -define <2 x double> @test_rsqrt28_sd_maskz_mem_offset(<2 x double> %a0, ptr %ptr, i8 %mask) { -; X86-LABEL: test_rsqrt28_sd_maskz_mem_offset: -; X86: # %bb.0: -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x08] -; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] -; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] -; X86-NEXT: vrsqrt28sd 144(%eax), %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0x89,0xcd,0x40,0x12] -; X86-NEXT: retl # encoding: [0xc3] -; -; X64-LABEL: test_rsqrt28_sd_maskz_mem_offset: -; X64: # %bb.0: -; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] -; X64-NEXT: vrsqrt28sd 144(%rdi), %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0x89,0xcd,0x47,0x12] -; X64-NEXT: retq # encoding: [0xc3] - %ptr1 = getelementptr double, ptr %ptr, i32 18 - %mem = load double , ptr %ptr1, align 8 - %mem_v = insertelement <2 x double> undef, double %mem, i32 0 - %res = call <2 x double> @llvm.x86.avx512.rsqrt28.sd(<2 x double> %a0, <2 x double> %mem_v, <2 x double> zeroinitializer, i8 %mask, i32 4) ; - ret <2 x double> %res -} - diff --git a/llvm/test/CodeGen/X86/crc32-target-feature.ll b/llvm/test/CodeGen/X86/crc32-target-feature.ll index ef4fafcae5dce..9dfe27e653511 100644 --- a/llvm/test/CodeGen/X86/crc32-target-feature.ll +++ b/llvm/test/CodeGen/X86/crc32-target-feature.ll @@ -25,5 +25,5 @@ define i32 @test3(i32 %a, i8 %b) nounwind #2 { declare i32 @llvm.x86.sse42.crc32.32.8(i32, i8) nounwind attributes #0 = { "target-features"="+crc32" } -attributes #1 = { "target-features"="+cx8,+fxsr,-3dnow,-3dnowa,-aes,-avx,-avx2,-avx512bf16,-avx512bitalg,-avx512bw,-avx512cd,-avx512dq,-avx512er,-avx512f,-avx512ifma,-avx512pf,-avx512vbmi,-avx512vbmi2,-avx512vl,-avx512vnni,-avx512vp2intersect,-avx512vpopcntdq,-avxvnni,-f16c,-fma,-fma4,-gfni,-kl,-mmx,-pclmul,-sha,-sse,-sse2,-sse3,-sse4.1,-sse4.2,-sse4a,-ssse3,-vaes,-vpclmulqdq,-widekl,-x87,-xop,+crc32" } -attributes #2 = { "target-features"="+crc32,+cx8,+fxsr,-3dnow,-3dnowa,-aes,-avx,-avx2,-avx512bf16,-avx512bitalg,-avx512bw,-avx512cd,-avx512dq,-avx512er,-avx512f,-avx512ifma,-avx512pf,-avx512vbmi,-avx512vbmi2,-avx512vl,-avx512vnni,-avx512vp2intersect,-avx512vpopcntdq,-avxvnni,-f16c,-fma,-fma4,-gfni,-kl,-mmx,-pclmul,-sha,-sse,-sse2,-sse3,-sse4.1,-sse4.2,-sse4a,-ssse3,-vaes,-vpclmulqdq,-widekl,-x87,-xop" } +attributes #1 = { "target-features"="+cx8,+fxsr,-3dnow,-3dnowa,-aes,-avx,-avx2,-avx512bf16,-avx512bitalg,-avx512bw,-avx512cd,-avx512dq,-avx512f,-avx512ifma,-avx512pf,-avx512vbmi,-avx512vbmi2,-avx512vl,-avx512vnni,-avx512vp2intersect,-avx512vpopcntdq,-avxvnni,-f16c,-fma,-fma4,-gfni,-kl,-mmx,-pclmul,-sha,-sse,-sse2,-sse3,-sse4.1,-sse4.2,-sse4a,-ssse3,-vaes,-vpclmulqdq,-widekl,-x87,-xop,+crc32" } +attributes #2 = { "target-features"="+crc32,+cx8,+fxsr,-3dnow,-3dnowa,-aes,-avx,-avx2,-avx512bf16,-avx512bitalg,-avx512bw,-avx512cd,-avx512dq,-avx512f,-avx512ifma,-avx512pf,-avx512vbmi,-avx512vbmi2,-avx512vl,-avx512vnni,-avx512vp2intersect,-avx512vpopcntdq,-avxvnni,-f16c,-fma,-fma4,-gfni,-kl,-mmx,-pclmul,-sha,-sse,-sse2,-sse3,-sse4.1,-sse4.2,-sse4a,-ssse3,-vaes,-vpclmulqdq,-widekl,-x87,-xop" } diff --git a/llvm/test/CodeGen/X86/unfoldMemoryOperand.mir b/llvm/test/CodeGen/X86/unfoldMemoryOperand.mir index 4c715b894fae8..af57d972f2246 100644 --- a/llvm/test/CodeGen/X86/unfoldMemoryOperand.mir +++ b/llvm/test/CodeGen/X86/unfoldMemoryOperand.mir @@ -23,7 +23,7 @@ br i1 %6, label %4, label %5, !llvm.loop !9 } - attributes #0 = { nofree norecurse nosync nounwind uwtable writeonly mustprogress "frame-pointer"="none" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+x87,-aes,-avx,-avx2,-avx512bf16,-avx512bitalg,-avx512bw,-avx512cd,-avx512dq,-avx512er,-avx512f,-avx512ifma,-avx512pf,-avx512vbmi,-avx512vbmi2,-avx512vl,-avx512vnni,-avx512vp2intersect,-avx512vpopcntdq,-avxvnni,-f16c,-fma,-fma4,-gfni,-kl,-pclmul,-sha,-sse,-sse2,-sse3,-sse4.1,-sse4.2,-sse4a,-ssse3,-vaes,-vpclmulqdq,-widekl,-xop" "tune-cpu"="generic" } + attributes #0 = { nofree norecurse nosync nounwind uwtable writeonly mustprogress "frame-pointer"="none" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+x87,-aes,-avx,-avx2,-avx512bf16,-avx512bitalg,-avx512bw,-avx512cd,-avx512dq,-avx512f,-avx512ifma,-avx512pf,-avx512vbmi,-avx512vbmi2,-avx512vl,-avx512vnni,-avx512vp2intersect,-avx512vpopcntdq,-avxvnni,-f16c,-fma,-fma4,-gfni,-kl,-pclmul,-sha,-sse,-sse2,-sse3,-sse4.1,-sse4.2,-sse4a,-ssse3,-vaes,-vpclmulqdq,-widekl,-xop" "tune-cpu"="generic" } !llvm.module.flags = !{!0, !1} !llvm.ident = !{!2} diff --git a/llvm/test/Transforms/LoopStrengthReduce/X86/pr40514.ll b/llvm/test/Transforms/LoopStrengthReduce/X86/pr40514.ll index 03b1aece9e870..a461f35d00dc9 100644 --- a/llvm/test/Transforms/LoopStrengthReduce/X86/pr40514.ll +++ b/llvm/test/Transforms/LoopStrengthReduce/X86/pr40514.ll @@ -54,4 +54,4 @@ bb10: ; preds = %bb10, %bb } -attributes #0 = { "target-cpu"="broadwell" "target-features"="+sse2,+cx16,+sahf,-tbm,-avx512ifma,-sha,-gfni,-fma4,-vpclmulqdq,+prfchw,+bmi2,-cldemote,+fsgsbase,-ptwrite,-xsavec,+popcnt,+aes,-avx512bitalg,-movdiri,-xsaves,-avx512er,-avx512vnni,-avx512vpopcntdq,-pconfig,-clwb,-avx512f,-clzero,-pku,+mmx,-lwp,-rdpid,-xop,+rdseed,-waitpkg,-movdir64b,-sse4a,-avx512bw,-clflushopt,+xsave,-avx512vbmi2,+64bit,-avx512vl,+invpcid,-avx512cd,+avx,-vaes,+rtm,+fma,+bmi,+rdrnd,-mwaitx,+sse4.1,+sse4.2,+avx2,-wbnoinvd,+sse,+lzcnt,+pclmul,-prefetchwt1,+f16c,+ssse3,-sgx,-shstk,+cmov,-avx512vbmi,+movbe,+xsaveopt,-avx512dq,+adx,-avx512pf,+sse3" } +attributes #0 = { "target-cpu"="broadwell" "target-features"="+sse2,+cx16,+sahf,-tbm,-avx512ifma,-sha,-gfni,-fma4,-vpclmulqdq,+prfchw,+bmi2,-cldemote,+fsgsbase,-ptwrite,-xsavec,+popcnt,+aes,-avx512bitalg,-movdiri,-xsaves,-avx512vnni,-avx512vpopcntdq,-pconfig,-clwb,-avx512f,-clzero,-pku,+mmx,-lwp,-rdpid,-xop,+rdseed,-waitpkg,-movdir64b,-sse4a,-avx512bw,-clflushopt,+xsave,-avx512vbmi2,+64bit,-avx512vl,+invpcid,-avx512cd,+avx,-vaes,+rtm,+fma,+bmi,+rdrnd,-mwaitx,+sse4.1,+sse4.2,+avx2,-wbnoinvd,+sse,+lzcnt,+pclmul,-prefetchwt1,+f16c,+ssse3,-sgx,-shstk,+cmov,-avx512vbmi,+movbe,+xsaveopt,-avx512dq,+adx,-avx512pf,+sse3" } diff --git a/llvm/test/Transforms/LoopVectorize/X86/pr23997.ll b/llvm/test/Transforms/LoopVectorize/X86/pr23997.ll index 3a5db926082f0..3e3018f506094 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/pr23997.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/pr23997.ll @@ -88,7 +88,7 @@ loopexit: ret void } -attributes #0 = { uwtable "target-cpu"="skylake" "target-features"="+sse2,+cx16,+sahf,-tbm,-avx512ifma,-sha,-gfni,-fma4,-vpclmulqdq,+prfchw,+bmi2,-cldemote,+fsgsbase,+xsavec,+popcnt,+aes,-avx512bitalg,+xsaves,-avx512er,-avx512vnni,-avx512vpopcntdq,-clwb,-avx512f,-clzero,-pku,+mmx,-lwp,-rdpid,-xop,+rdseed,-waitpkg,-sse4a,-avx512bw,+clflushopt,+xsave,-avx512vbmi2,-avx512vl,-avx512cd,+avx,-vaes,+rtm,+fma,+bmi,+rdrnd,-mwaitx,+sse4.1,+sse4.2,+avx2,-wbnoinvd,+sse,+lzcnt,+pclmul,-prefetchwt1,+f16c,+ssse3,+sgx,-shstk,+cmov,-avx512vbmi,+movbe,+xsaveopt,-avx512dq,+adx,-avx512pf,+sse3" } +attributes #0 = { uwtable "target-cpu"="skylake" "target-features"="+sse2,+cx16,+sahf,-tbm,-avx512ifma,-sha,-gfni,-fma4,-vpclmulqdq,+prfchw,+bmi2,-cldemote,+fsgsbase,+xsavec,+popcnt,+aes,-avx512bitalg,+xsaves,-avx512vnni,-avx512vpopcntdq,-clwb,-avx512f,-clzero,-pku,+mmx,-lwp,-rdpid,-xop,+rdseed,-waitpkg,-sse4a,-avx512bw,+clflushopt,+xsave,-avx512vbmi2,-avx512vl,-avx512cd,+avx,-vaes,+rtm,+fma,+bmi,+rdrnd,-mwaitx,+sse4.1,+sse4.2,+avx2,-wbnoinvd,+sse,+lzcnt,+pclmul,-prefetchwt1,+f16c,+ssse3,+sgx,-shstk,+cmov,-avx512vbmi,+movbe,+xsaveopt,-avx512dq,+adx,-avx512pf,+sse3" } !0 = !{i32 0, i32 2147483646} !1 = !{} diff --git a/llvm/test/Transforms/LoopVectorize/X86/pr54634.ll b/llvm/test/Transforms/LoopVectorize/X86/pr54634.ll index 5c9fe54b55212..20566005c93df 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/pr54634.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/pr54634.ll @@ -118,7 +118,7 @@ L44: ; preds = %L26 ret ptr addrspace(10) null } -attributes #0 = { "target-cpu"="skylake-avx512" "target-features"="+xsaves,+xsavec,+prfchw,+lzcnt,+sahf,+pku,+avx512vl,+avx512bw,+avx512cd,+clwb,+clflushopt,+adx,+avx512dq,+avx512f,+bmi2,+avx2,+bmi,+fsgsbase,+f16c,+avx,+xsave,+aes,+popcnt,+movbe,+sse4.2,+sse4.1,+cx16,+fma,+ssse3,+pclmul,+sse3,-rdrnd,-rtm,-rdseed,-avx512ifma,-avx512pf,-avx512er,-sha,-prefetchwt1,-avx512vbmi,-waitpkg,-avx512vbmi2,-shstk,-gfni,-vaes,-vpclmulqdq,-avx512vnni,-avx512bitalg,-avx512vpopcntdq,-rdpid,-cldemote,-movdiri,-movdir64b,-enqcmd,-avx512vp2intersect,-serialize,-tsxldtrk,-pconfig,-amx-bf16,-amx-tile,-amx-int8,-sse4a,-xop,-lwp,-fma4,-tbm,-mwaitx,-xsaveopt,-clzero,-wbnoinvd,-avx512bf16,-ptwrite,+sse2,+mmx,+fxsr,+64bit,+cx8" } +attributes #0 = { "target-cpu"="skylake-avx512" "target-features"="+xsaves,+xsavec,+prfchw,+lzcnt,+sahf,+pku,+avx512vl,+avx512bw,+avx512cd,+clwb,+clflushopt,+adx,+avx512dq,+avx512f,+bmi2,+avx2,+bmi,+fsgsbase,+f16c,+avx,+xsave,+aes,+popcnt,+movbe,+sse4.2,+sse4.1,+cx16,+fma,+ssse3,+pclmul,+sse3,-rdrnd,-rtm,-rdseed,-avx512ifma,-avx512pf,-sha,-prefetchwt1,-avx512vbmi,-waitpkg,-avx512vbmi2,-shstk,-gfni,-vaes,-vpclmulqdq,-avx512vnni,-avx512bitalg,-avx512vpopcntdq,-rdpid,-cldemote,-movdiri,-movdir64b,-enqcmd,-avx512vp2intersect,-serialize,-tsxldtrk,-pconfig,-amx-bf16,-amx-tile,-amx-int8,-sse4a,-xop,-lwp,-fma4,-tbm,-mwaitx,-xsaveopt,-clzero,-wbnoinvd,-avx512bf16,-ptwrite,+sse2,+mmx,+fxsr,+64bit,+cx8" } attributes #1 = { inaccessiblemem_or_argmemonly } attributes #2 = { allocsize(1) } diff --git a/llvm/test/tools/llvm-mca/X86/Generic/resources-avx512er.s b/llvm/test/tools/llvm-mca/X86/Generic/resources-avx512er.s deleted file mode 100644 index 034fc6d83d153..0000000000000 --- a/llvm/test/tools/llvm-mca/X86/Generic/resources-avx512er.s +++ /dev/null @@ -1,373 +0,0 @@ -# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py -# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -instruction-tables < %s | FileCheck %s - -vexp2pd %zmm16, %zmm19 -vexp2pd (%rax), %zmm19 -vexp2pd (%rax){1to8}, %zmm19 -vexp2pd %zmm16, %zmm19 {k1} -vexp2pd (%rax), %zmm19 {k1} -vexp2pd (%rax){1to8}, %zmm19 {k1} -vexp2pd %zmm16, %zmm19 {z}{k1} -vexp2pd (%rax), %zmm19 {z}{k1} -vexp2pd (%rax){1to8}, %zmm19 {z}{k1} - -vexp2pd {sae}, %zmm16, %zmm19 -vexp2pd {sae}, %zmm16, %zmm19 {k1} -vexp2pd {sae}, %zmm16, %zmm19 {z}{k1} - -vexp2ps %zmm16, %zmm19 -vexp2ps (%rax), %zmm19 -vexp2ps (%rax){1to16}, %zmm19 -vexp2ps %zmm16, %zmm19 {k1} -vexp2ps (%rax), %zmm19 {k1} -vexp2ps (%rax){1to16}, %zmm19 {k1} -vexp2ps %zmm16, %zmm19 {z}{k1} -vexp2ps (%rax), %zmm19 {z}{k1} -vexp2ps (%rax){1to16}, %zmm19 {z}{k1} - -vexp2ps {sae}, %zmm16, %zmm19 -vexp2ps {sae}, %zmm16, %zmm19 {k1} -vexp2ps {sae}, %zmm16, %zmm19 {z}{k1} - -vrcp28pd %zmm16, %zmm19 -vrcp28pd (%rax), %zmm19 -vrcp28pd (%rax){1to8}, %zmm19 -vrcp28pd %zmm16, %zmm19 {k1} -vrcp28pd (%rax), %zmm19 {k1} -vrcp28pd (%rax){1to8}, %zmm19 {k1} -vrcp28pd %zmm16, %zmm19 {z}{k1} -vrcp28pd (%rax), %zmm19 {z}{k1} -vrcp28pd (%rax){1to8}, %zmm19 {z}{k1} - -vrcp28pd {sae}, %zmm16, %zmm19 -vrcp28pd {sae}, %zmm16, %zmm19 {k1} -vrcp28pd {sae}, %zmm16, %zmm19 {z}{k1} - -vrcp28ps %zmm16, %zmm19 -vrcp28ps (%rax), %zmm19 -vrcp28ps (%rax){1to16}, %zmm19 -vrcp28ps %zmm16, %zmm19 {k1} -vrcp28ps (%rax), %zmm19 {k1} -vrcp28ps (%rax){1to16}, %zmm19 {k1} -vrcp28ps %zmm16, %zmm19 {z}{k1} -vrcp28ps (%rax), %zmm19 {z}{k1} -vrcp28ps (%rax){1to16}, %zmm19 {z}{k1} - -vrcp28ps {sae}, %zmm16, %zmm19 -vrcp28ps {sae}, %zmm16, %zmm19 {k1} -vrcp28ps {sae}, %zmm16, %zmm19 {z}{k1} - -vrcp28sd %xmm16, %xmm17, %xmm19 -vrcp28sd (%rax), %xmm17, %xmm19 -vrcp28sd %xmm16, %xmm17, %xmm19 {k1} -vrcp28sd (%rax), %xmm17, %xmm19 {k1} -vrcp28sd %xmm16, %xmm17, %xmm19 {z}{k1} -vrcp28sd (%rax), %xmm17, %xmm19 {z}{k1} - -vrcp28sd {sae}, %xmm16, %xmm17, %xmm19 -vrcp28sd {sae}, %xmm16, %xmm17, %xmm19 {k1} -vrcp28sd {sae}, %xmm16, %xmm17, %xmm19 {z}{k1} - -vrcp28ss %xmm16, %xmm17, %xmm19 -vrcp28ss (%rax), %xmm17, %xmm19 -vrcp28ss %xmm16, %xmm17, %xmm19 {k1} -vrcp28ss (%rax), %xmm17, %xmm19 {k1} -vrcp28ss %xmm16, %xmm17, %xmm19 {z}{k1} -vrcp28ss (%rax), %xmm17, %xmm19 {z}{k1} - -vrcp28ss {sae}, %xmm16, %xmm17, %xmm19 -vrcp28ss {sae}, %xmm16, %xmm17, %xmm19 {k1} -vrcp28ss {sae}, %xmm16, %xmm17, %xmm19 {z}{k1} - -vrsqrt28pd %zmm16, %zmm19 -vrsqrt28pd (%rax), %zmm19 -vrsqrt28pd (%rax){1to8}, %zmm19 -vrsqrt28pd %zmm16, %zmm19 {k1} -vrsqrt28pd (%rax), %zmm19 {k1} -vrsqrt28pd (%rax){1to8}, %zmm19 {k1} -vrsqrt28pd %zmm16, %zmm19 {z}{k1} -vrsqrt28pd (%rax), %zmm19 {z}{k1} -vrsqrt28pd (%rax){1to8}, %zmm19 {z}{k1} - -vrsqrt28pd {sae}, %zmm16, %zmm19 -vrsqrt28pd {sae}, %zmm16, %zmm19 {k1} -vrsqrt28pd {sae}, %zmm16, %zmm19 {z}{k1} - -vrsqrt28ps %zmm16, %zmm19 -vrsqrt28ps (%rax), %zmm19 -vrsqrt28ps (%rax){1to16}, %zmm19 -vrsqrt28ps %zmm16, %zmm19 {k1} -vrsqrt28ps (%rax), %zmm19 {k1} -vrsqrt28ps (%rax){1to16}, %zmm19 {k1} -vrsqrt28ps %zmm16, %zmm19 {z}{k1} -vrsqrt28ps (%rax), %zmm19 {z}{k1} -vrsqrt28ps (%rax){1to16}, %zmm19 {z}{k1} - -vrsqrt28ps {sae}, %zmm16, %zmm19 -vrsqrt28ps {sae}, %zmm16, %zmm19 {k1} -vrsqrt28ps {sae}, %zmm16, %zmm19 {z}{k1} - -vrsqrt28sd %xmm16, %xmm17, %xmm19 -vrsqrt28sd (%rax), %xmm17, %xmm19 -vrsqrt28sd %xmm16, %xmm17, %xmm19 {k1} -vrsqrt28sd (%rax), %xmm17, %xmm19 {k1} -vrsqrt28sd %xmm16, %xmm17, %xmm19 {z}{k1} -vrsqrt28sd (%rax), %xmm17, %xmm19 {z}{k1} - -vrsqrt28sd {sae}, %xmm16, %xmm17, %xmm19 -vrsqrt28sd {sae}, %xmm16, %xmm17, %xmm19 {k1} -vrsqrt28sd {sae}, %xmm16, %xmm17, %xmm19 {z}{k1} - -vrsqrt28ss %xmm16, %xmm17, %xmm19 -vrsqrt28ss (%rax), %xmm17, %xmm19 -vrsqrt28ss %xmm16, %xmm17, %xmm19 {k1} -vrsqrt28ss (%rax), %xmm17, %xmm19 {k1} -vrsqrt28ss %xmm16, %xmm17, %xmm19 {z}{k1} -vrsqrt28ss (%rax), %xmm17, %xmm19 {z}{k1} - -vrsqrt28ss {sae}, %xmm16, %xmm17, %xmm19 -vrsqrt28ss {sae}, %xmm16, %xmm17, %xmm19 {k1} -vrsqrt28ss {sae}, %xmm16, %xmm17, %xmm19 {z}{k1} - -# CHECK: Instruction Info: -# CHECK-NEXT: [1]: #uOps -# CHECK-NEXT: [2]: Latency -# CHECK-NEXT: [3]: RThroughput -# CHECK-NEXT: [4]: MayLoad -# CHECK-NEXT: [5]: MayStore -# CHECK-NEXT: [6]: HasSideEffects (U) - -# CHECK: [1] [2] [3] [4] [5] [6] Instructions: -# CHECK-NEXT: 1 3 1.00 vexp2pd %zmm16, %zmm19 -# CHECK-NEXT: 2 10 1.00 * vexp2pd (%rax), %zmm19 -# CHECK-NEXT: 2 10 1.00 * vexp2pd (%rax){1to8}, %zmm19 -# CHECK-NEXT: 1 3 1.00 vexp2pd %zmm16, %zmm19 {%k1} -# CHECK-NEXT: 2 10 1.00 * vexp2pd (%rax), %zmm19 {%k1} -# CHECK-NEXT: 2 10 1.00 * vexp2pd (%rax){1to8}, %zmm19 {%k1} -# CHECK-NEXT: 1 3 1.00 vexp2pd %zmm16, %zmm19 {%k1} {z} -# CHECK-NEXT: 2 10 1.00 * vexp2pd (%rax), %zmm19 {%k1} {z} -# CHECK-NEXT: 2 10 1.00 * vexp2pd (%rax){1to8}, %zmm19 {%k1} {z} -# CHECK-NEXT: 1 3 1.00 vexp2pd {sae}, %zmm16, %zmm19 -# CHECK-NEXT: 1 3 1.00 vexp2pd {sae}, %zmm16, %zmm19 {%k1} -# CHECK-NEXT: 1 3 1.00 vexp2pd {sae}, %zmm16, %zmm19 {%k1} {z} -# CHECK-NEXT: 1 3 1.00 vexp2ps %zmm16, %zmm19 -# CHECK-NEXT: 2 10 1.00 * vexp2ps (%rax), %zmm19 -# CHECK-NEXT: 2 10 1.00 * vexp2ps (%rax){1to16}, %zmm19 -# CHECK-NEXT: 1 3 1.00 vexp2ps %zmm16, %zmm19 {%k1} -# CHECK-NEXT: 2 10 1.00 * vexp2ps (%rax), %zmm19 {%k1} -# CHECK-NEXT: 2 10 1.00 * vexp2ps (%rax){1to16}, %zmm19 {%k1} -# CHECK-NEXT: 1 3 1.00 vexp2ps %zmm16, %zmm19 {%k1} {z} -# CHECK-NEXT: 2 10 1.00 * vexp2ps (%rax), %zmm19 {%k1} {z} -# CHECK-NEXT: 2 10 1.00 * vexp2ps (%rax){1to16}, %zmm19 {%k1} {z} -# CHECK-NEXT: 1 3 1.00 vexp2ps {sae}, %zmm16, %zmm19 -# CHECK-NEXT: 1 3 1.00 vexp2ps {sae}, %zmm16, %zmm19 {%k1} -# CHECK-NEXT: 1 3 1.00 vexp2ps {sae}, %zmm16, %zmm19 {%k1} {z} -# CHECK-NEXT: 3 7 2.00 vrcp28pd %zmm16, %zmm19 -# CHECK-NEXT: 4 14 2.00 * vrcp28pd (%rax), %zmm19 -# CHECK-NEXT: 4 14 2.00 * vrcp28pd (%rax){1to8}, %zmm19 -# CHECK-NEXT: 3 7 2.00 vrcp28pd %zmm16, %zmm19 {%k1} -# CHECK-NEXT: 4 14 2.00 * vrcp28pd (%rax), %zmm19 {%k1} -# CHECK-NEXT: 4 14 2.00 * vrcp28pd (%rax){1to8}, %zmm19 {%k1} -# CHECK-NEXT: 3 7 2.00 vrcp28pd %zmm16, %zmm19 {%k1} {z} -# CHECK-NEXT: 4 14 2.00 * vrcp28pd (%rax), %zmm19 {%k1} {z} -# CHECK-NEXT: 4 14 2.00 * vrcp28pd (%rax){1to8}, %zmm19 {%k1} {z} -# CHECK-NEXT: 3 7 2.00 vrcp28pd {sae}, %zmm16, %zmm19 -# CHECK-NEXT: 3 7 2.00 vrcp28pd {sae}, %zmm16, %zmm19 {%k1} -# CHECK-NEXT: 3 7 2.00 vrcp28pd {sae}, %zmm16, %zmm19 {%k1} {z} -# CHECK-NEXT: 3 7 2.00 vrcp28ps %zmm16, %zmm19 -# CHECK-NEXT: 4 14 2.00 * vrcp28ps (%rax), %zmm19 -# CHECK-NEXT: 4 14 2.00 * vrcp28ps (%rax){1to16}, %zmm19 -# CHECK-NEXT: 3 7 2.00 vrcp28ps %zmm16, %zmm19 {%k1} -# CHECK-NEXT: 4 14 2.00 * vrcp28ps (%rax), %zmm19 {%k1} -# CHECK-NEXT: 4 14 2.00 * vrcp28ps (%rax){1to16}, %zmm19 {%k1} -# CHECK-NEXT: 3 7 2.00 vrcp28ps %zmm16, %zmm19 {%k1} {z} -# CHECK-NEXT: 4 14 2.00 * vrcp28ps (%rax), %zmm19 {%k1} {z} -# CHECK-NEXT: 4 14 2.00 * vrcp28ps (%rax){1to16}, %zmm19 {%k1} {z} -# CHECK-NEXT: 3 7 2.00 vrcp28ps {sae}, %zmm16, %zmm19 -# CHECK-NEXT: 3 7 2.00 vrcp28ps {sae}, %zmm16, %zmm19 {%k1} -# CHECK-NEXT: 3 7 2.00 vrcp28ps {sae}, %zmm16, %zmm19 {%k1} {z} -# CHECK-NEXT: 1 5 1.00 vrcp28sd %xmm16, %xmm17, %xmm19 -# CHECK-NEXT: 2 11 1.00 * vrcp28sd (%rax), %xmm17, %xmm19 -# CHECK-NEXT: 1 5 1.00 vrcp28sd %xmm16, %xmm17, %xmm19 {%k1} -# CHECK-NEXT: 2 11 1.00 * vrcp28sd (%rax), %xmm17, %xmm19 {%k1} -# CHECK-NEXT: 1 5 1.00 vrcp28sd %xmm16, %xmm17, %xmm19 {%k1} {z} -# CHECK-NEXT: 2 11 1.00 * vrcp28sd (%rax), %xmm17, %xmm19 {%k1} {z} -# CHECK-NEXT: 1 5 1.00 vrcp28sd {sae}, %xmm16, %xmm17, %xmm19 -# CHECK-NEXT: 1 5 1.00 vrcp28sd {sae}, %xmm16, %xmm17, %xmm19 {%k1} -# CHECK-NEXT: 1 5 1.00 vrcp28sd {sae}, %xmm16, %xmm17, %xmm19 {%k1} {z} -# CHECK-NEXT: 1 5 1.00 vrcp28ss %xmm16, %xmm17, %xmm19 -# CHECK-NEXT: 2 11 1.00 * vrcp28ss (%rax), %xmm17, %xmm19 -# CHECK-NEXT: 1 5 1.00 vrcp28ss %xmm16, %xmm17, %xmm19 {%k1} -# CHECK-NEXT: 2 11 1.00 * vrcp28ss (%rax), %xmm17, %xmm19 {%k1} -# CHECK-NEXT: 1 5 1.00 vrcp28ss %xmm16, %xmm17, %xmm19 {%k1} {z} -# CHECK-NEXT: 2 11 1.00 * vrcp28ss (%rax), %xmm17, %xmm19 {%k1} {z} -# CHECK-NEXT: 1 5 1.00 vrcp28ss {sae}, %xmm16, %xmm17, %xmm19 -# CHECK-NEXT: 1 5 1.00 vrcp28ss {sae}, %xmm16, %xmm17, %xmm19 {%k1} -# CHECK-NEXT: 1 5 1.00 vrcp28ss {sae}, %xmm16, %xmm17, %xmm19 {%k1} {z} -# CHECK-NEXT: 3 7 2.00 vrsqrt28pd %zmm16, %zmm19 -# CHECK-NEXT: 4 14 2.00 * vrsqrt28pd (%rax), %zmm19 -# CHECK-NEXT: 4 14 2.00 * vrsqrt28pd (%rax){1to8}, %zmm19 -# CHECK-NEXT: 3 7 2.00 vrsqrt28pd %zmm16, %zmm19 {%k1} -# CHECK-NEXT: 4 14 2.00 * vrsqrt28pd (%rax), %zmm19 {%k1} -# CHECK-NEXT: 4 14 2.00 * vrsqrt28pd (%rax){1to8}, %zmm19 {%k1} -# CHECK-NEXT: 3 7 2.00 vrsqrt28pd %zmm16, %zmm19 {%k1} {z} -# CHECK-NEXT: 4 14 2.00 * vrsqrt28pd (%rax), %zmm19 {%k1} {z} -# CHECK-NEXT: 4 14 2.00 * vrsqrt28pd (%rax){1to8}, %zmm19 {%k1} {z} -# CHECK-NEXT: 3 7 2.00 vrsqrt28pd {sae}, %zmm16, %zmm19 -# CHECK-NEXT: 3 7 2.00 vrsqrt28pd {sae}, %zmm16, %zmm19 {%k1} -# CHECK-NEXT: 3 7 2.00 vrsqrt28pd {sae}, %zmm16, %zmm19 {%k1} {z} -# CHECK-NEXT: 3 7 2.00 vrsqrt28ps %zmm16, %zmm19 -# CHECK-NEXT: 4 14 2.00 * vrsqrt28ps (%rax), %zmm19 -# CHECK-NEXT: 4 14 2.00 * vrsqrt28ps (%rax){1to16}, %zmm19 -# CHECK-NEXT: 3 7 2.00 vrsqrt28ps %zmm16, %zmm19 {%k1} -# CHECK-NEXT: 4 14 2.00 * vrsqrt28ps (%rax), %zmm19 {%k1} -# CHECK-NEXT: 4 14 2.00 * vrsqrt28ps (%rax){1to16}, %zmm19 {%k1} -# CHECK-NEXT: 3 7 2.00 vrsqrt28ps %zmm16, %zmm19 {%k1} {z} -# CHECK-NEXT: 4 14 2.00 * vrsqrt28ps (%rax), %zmm19 {%k1} {z} -# CHECK-NEXT: 4 14 2.00 * vrsqrt28ps (%rax){1to16}, %zmm19 {%k1} {z} -# CHECK-NEXT: 3 7 2.00 vrsqrt28ps {sae}, %zmm16, %zmm19 -# CHECK-NEXT: 3 7 2.00 vrsqrt28ps {sae}, %zmm16, %zmm19 {%k1} -# CHECK-NEXT: 3 7 2.00 vrsqrt28ps {sae}, %zmm16, %zmm19 {%k1} {z} -# CHECK-NEXT: 1 5 1.00 vrsqrt28sd %xmm16, %xmm17, %xmm19 -# CHECK-NEXT: 2 11 1.00 * vrsqrt28sd (%rax), %xmm17, %xmm19 -# CHECK-NEXT: 1 5 1.00 vrsqrt28sd %xmm16, %xmm17, %xmm19 {%k1} -# CHECK-NEXT: 2 11 1.00 * vrsqrt28sd (%rax), %xmm17, %xmm19 {%k1} -# CHECK-NEXT: 1 5 1.00 vrsqrt28sd %xmm16, %xmm17, %xmm19 {%k1} {z} -# CHECK-NEXT: 2 11 1.00 * vrsqrt28sd (%rax), %xmm17, %xmm19 {%k1} {z} -# CHECK-NEXT: 1 5 1.00 vrsqrt28sd {sae}, %xmm16, %xmm17, %xmm19 -# CHECK-NEXT: 1 5 1.00 vrsqrt28sd {sae}, %xmm16, %xmm17, %xmm19 {%k1} -# CHECK-NEXT: 1 5 1.00 vrsqrt28sd {sae}, %xmm16, %xmm17, %xmm19 {%k1} {z} -# CHECK-NEXT: 1 5 1.00 vrsqrt28ss %xmm16, %xmm17, %xmm19 -# CHECK-NEXT: 2 11 1.00 * vrsqrt28ss (%rax), %xmm17, %xmm19 -# CHECK-NEXT: 1 5 1.00 vrsqrt28ss %xmm16, %xmm17, %xmm19 {%k1} -# CHECK-NEXT: 2 11 1.00 * vrsqrt28ss (%rax), %xmm17, %xmm19 {%k1} -# CHECK-NEXT: 1 5 1.00 vrsqrt28ss %xmm16, %xmm17, %xmm19 {%k1} {z} -# CHECK-NEXT: 2 11 1.00 * vrsqrt28ss (%rax), %xmm17, %xmm19 {%k1} {z} -# CHECK-NEXT: 1 5 1.00 vrsqrt28ss {sae}, %xmm16, %xmm17, %xmm19 -# CHECK-NEXT: 1 5 1.00 vrsqrt28ss {sae}, %xmm16, %xmm17, %xmm19 {%k1} -# CHECK-NEXT: 1 5 1.00 vrsqrt28ss {sae}, %xmm16, %xmm17, %xmm19 {%k1} {z} - -# CHECK: Resources: -# CHECK-NEXT: [0] - SBDivider -# CHECK-NEXT: [1] - SBFPDivider -# CHECK-NEXT: [2] - SBPort0 -# CHECK-NEXT: [3] - SBPort1 -# CHECK-NEXT: [4] - SBPort4 -# CHECK-NEXT: [5] - SBPort5 -# CHECK-NEXT: [6.0] - SBPort23 -# CHECK-NEXT: [6.1] - SBPort23 - -# CHECK: Resource pressure per iteration: -# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6.0] [6.1] -# CHECK-NEXT: - - 156.00 24.00 - 24.00 24.00 24.00 - -# CHECK: Resource pressure by instruction: -# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6.0] [6.1] Instructions: -# CHECK-NEXT: - - - 1.00 - - - - vexp2pd %zmm16, %zmm19 -# CHECK-NEXT: - - - 1.00 - - 0.50 0.50 vexp2pd (%rax), %zmm19 -# CHECK-NEXT: - - - 1.00 - - 0.50 0.50 vexp2pd (%rax){1to8}, %zmm19 -# CHECK-NEXT: - - - 1.00 - - - - vexp2pd %zmm16, %zmm19 {%k1} -# CHECK-NEXT: - - - 1.00 - - 0.50 0.50 vexp2pd (%rax), %zmm19 {%k1} -# CHECK-NEXT: - - - 1.00 - - 0.50 0.50 vexp2pd (%rax){1to8}, %zmm19 {%k1} -# CHECK-NEXT: - - - 1.00 - - - - vexp2pd %zmm16, %zmm19 {%k1} {z} -# CHECK-NEXT: - - - 1.00 - - 0.50 0.50 vexp2pd (%rax), %zmm19 {%k1} {z} -# CHECK-NEXT: - - - 1.00 - - 0.50 0.50 vexp2pd (%rax){1to8}, %zmm19 {%k1} {z} -# CHECK-NEXT: - - - 1.00 - - - - vexp2pd {sae}, %zmm16, %zmm19 -# CHECK-NEXT: - - - 1.00 - - - - vexp2pd {sae}, %zmm16, %zmm19 {%k1} -# CHECK-NEXT: - - - 1.00 - - - - vexp2pd {sae}, %zmm16, %zmm19 {%k1} {z} -# CHECK-NEXT: - - - 1.00 - - - - vexp2ps %zmm16, %zmm19 -# CHECK-NEXT: - - - 1.00 - - 0.50 0.50 vexp2ps (%rax), %zmm19 -# CHECK-NEXT: - - - 1.00 - - 0.50 0.50 vexp2ps (%rax){1to16}, %zmm19 -# CHECK-NEXT: - - - 1.00 - - - - vexp2ps %zmm16, %zmm19 {%k1} -# CHECK-NEXT: - - - 1.00 - - 0.50 0.50 vexp2ps (%rax), %zmm19 {%k1} -# CHECK-NEXT: - - - 1.00 - - 0.50 0.50 vexp2ps (%rax){1to16}, %zmm19 {%k1} -# CHECK-NEXT: - - - 1.00 - - - - vexp2ps %zmm16, %zmm19 {%k1} {z} -# CHECK-NEXT: - - - 1.00 - - 0.50 0.50 vexp2ps (%rax), %zmm19 {%k1} {z} -# CHECK-NEXT: - - - 1.00 - - 0.50 0.50 vexp2ps (%rax){1to16}, %zmm19 {%k1} {z} -# CHECK-NEXT: - - - 1.00 - - - - vexp2ps {sae}, %zmm16, %zmm19 -# CHECK-NEXT: - - - 1.00 - - - - vexp2ps {sae}, %zmm16, %zmm19 {%k1} -# CHECK-NEXT: - - - 1.00 - - - - vexp2ps {sae}, %zmm16, %zmm19 {%k1} {z} -# CHECK-NEXT: - - 2.50 - - 0.50 - - vrcp28pd %zmm16, %zmm19 -# CHECK-NEXT: - - 2.50 - - 0.50 0.50 0.50 vrcp28pd (%rax), %zmm19 -# CHECK-NEXT: - - 2.50 - - 0.50 0.50 0.50 vrcp28pd (%rax){1to8}, %zmm19 -# CHECK-NEXT: - - 2.50 - - 0.50 - - vrcp28pd %zmm16, %zmm19 {%k1} -# CHECK-NEXT: - - 2.50 - - 0.50 0.50 0.50 vrcp28pd (%rax), %zmm19 {%k1} -# CHECK-NEXT: - - 2.50 - - 0.50 0.50 0.50 vrcp28pd (%rax){1to8}, %zmm19 {%k1} -# CHECK-NEXT: - - 2.50 - - 0.50 - - vrcp28pd %zmm16, %zmm19 {%k1} {z} -# CHECK-NEXT: - - 2.50 - - 0.50 0.50 0.50 vrcp28pd (%rax), %zmm19 {%k1} {z} -# CHECK-NEXT: - - 2.50 - - 0.50 0.50 0.50 vrcp28pd (%rax){1to8}, %zmm19 {%k1} {z} -# CHECK-NEXT: - - 2.50 - - 0.50 - - vrcp28pd {sae}, %zmm16, %zmm19 -# CHECK-NEXT: - - 2.50 - - 0.50 - - vrcp28pd {sae}, %zmm16, %zmm19 {%k1} -# CHECK-NEXT: - - 2.50 - - 0.50 - - vrcp28pd {sae}, %zmm16, %zmm19 {%k1} {z} -# CHECK-NEXT: - - 2.50 - - 0.50 - - vrcp28ps %zmm16, %zmm19 -# CHECK-NEXT: - - 2.50 - - 0.50 0.50 0.50 vrcp28ps (%rax), %zmm19 -# CHECK-NEXT: - - 2.50 - - 0.50 0.50 0.50 vrcp28ps (%rax){1to16}, %zmm19 -# CHECK-NEXT: - - 2.50 - - 0.50 - - vrcp28ps %zmm16, %zmm19 {%k1} -# CHECK-NEXT: - - 2.50 - - 0.50 0.50 0.50 vrcp28ps (%rax), %zmm19 {%k1} -# CHECK-NEXT: - - 2.50 - - 0.50 0.50 0.50 vrcp28ps (%rax){1to16}, %zmm19 {%k1} -# CHECK-NEXT: - - 2.50 - - 0.50 - - vrcp28ps %zmm16, %zmm19 {%k1} {z} -# CHECK-NEXT: - - 2.50 - - 0.50 0.50 0.50 vrcp28ps (%rax), %zmm19 {%k1} {z} -# CHECK-NEXT: - - 2.50 - - 0.50 0.50 0.50 vrcp28ps (%rax){1to16}, %zmm19 {%k1} {z} -# CHECK-NEXT: - - 2.50 - - 0.50 - - vrcp28ps {sae}, %zmm16, %zmm19 -# CHECK-NEXT: - - 2.50 - - 0.50 - - vrcp28ps {sae}, %zmm16, %zmm19 {%k1} -# CHECK-NEXT: - - 2.50 - - 0.50 - - vrcp28ps {sae}, %zmm16, %zmm19 {%k1} {z} -# CHECK-NEXT: - - 1.00 - - - - - vrcp28sd %xmm16, %xmm17, %xmm19 -# CHECK-NEXT: - - 1.00 - - - 0.50 0.50 vrcp28sd (%rax), %xmm17, %xmm19 -# CHECK-NEXT: - - 1.00 - - - - - vrcp28sd %xmm16, %xmm17, %xmm19 {%k1} -# CHECK-NEXT: - - 1.00 - - - 0.50 0.50 vrcp28sd (%rax), %xmm17, %xmm19 {%k1} -# CHECK-NEXT: - - 1.00 - - - - - vrcp28sd %xmm16, %xmm17, %xmm19 {%k1} {z} -# CHECK-NEXT: - - 1.00 - - - 0.50 0.50 vrcp28sd (%rax), %xmm17, %xmm19 {%k1} {z} -# CHECK-NEXT: - - 1.00 - - - - - vrcp28sd {sae}, %xmm16, %xmm17, %xmm19 -# CHECK-NEXT: - - 1.00 - - - - - vrcp28sd {sae}, %xmm16, %xmm17, %xmm19 {%k1} -# CHECK-NEXT: - - 1.00 - - - - - vrcp28sd {sae}, %xmm16, %xmm17, %xmm19 {%k1} {z} -# CHECK-NEXT: - - 1.00 - - - - - vrcp28ss %xmm16, %xmm17, %xmm19 -# CHECK-NEXT: - - 1.00 - - - 0.50 0.50 vrcp28ss (%rax), %xmm17, %xmm19 -# CHECK-NEXT: - - 1.00 - - - - - vrcp28ss %xmm16, %xmm17, %xmm19 {%k1} -# CHECK-NEXT: - - 1.00 - - - 0.50 0.50 vrcp28ss (%rax), %xmm17, %xmm19 {%k1} -# CHECK-NEXT: - - 1.00 - - - - - vrcp28ss %xmm16, %xmm17, %xmm19 {%k1} {z} -# CHECK-NEXT: - - 1.00 - - - 0.50 0.50 vrcp28ss (%rax), %xmm17, %xmm19 {%k1} {z} -# CHECK-NEXT: - - 1.00 - - - - - vrcp28ss {sae}, %xmm16, %xmm17, %xmm19 -# CHECK-NEXT: - - 1.00 - - - - - vrcp28ss {sae}, %xmm16, %xmm17, %xmm19 {%k1} -# CHECK-NEXT: - - 1.00 - - - - - vrcp28ss {sae}, %xmm16, %xmm17, %xmm19 {%k1} {z} -# CHECK-NEXT: - - 2.50 - - 0.50 - - vrsqrt28pd %zmm16, %zmm19 -# CHECK-NEXT: - - 2.50 - - 0.50 0.50 0.50 vrsqrt28pd (%rax), %zmm19 -# CHECK-NEXT: - - 2.50 - - 0.50 0.50 0.50 vrsqrt28pd (%rax){1to8}, %zmm19 -# CHECK-NEXT: - - 2.50 - - 0.50 - - vrsqrt28pd %zmm16, %zmm19 {%k1} -# CHECK-NEXT: - - 2.50 - - 0.50 0.50 0.50 vrsqrt28pd (%rax), %zmm19 {%k1} -# CHECK-NEXT: - - 2.50 - - 0.50 0.50 0.50 vrsqrt28pd (%rax){1to8}, %zmm19 {%k1} -# CHECK-NEXT: - - 2.50 - - 0.50 - - vrsqrt28pd %zmm16, %zmm19 {%k1} {z} -# CHECK-NEXT: - - 2.50 - - 0.50 0.50 0.50 vrsqrt28pd (%rax), %zmm19 {%k1} {z} -# CHECK-NEXT: - - 2.50 - - 0.50 0.50 0.50 vrsqrt28pd (%rax){1to8}, %zmm19 {%k1} {z} -# CHECK-NEXT: - - 2.50 - - 0.50 - - vrsqrt28pd {sae}, %zmm16, %zmm19 -# CHECK-NEXT: - - 2.50 - - 0.50 - - vrsqrt28pd {sae}, %zmm16, %zmm19 {%k1} -# CHECK-NEXT: - - 2.50 - - 0.50 - - vrsqrt28pd {sae}, %zmm16, %zmm19 {%k1} {z} -# CHECK-NEXT: - - 2.50 - - 0.50 - - vrsqrt28ps %zmm16, %zmm19 -# CHECK-NEXT: - - 2.50 - - 0.50 0.50 0.50 vrsqrt28ps (%rax), %zmm19 -# CHECK-NEXT: - - 2.50 - - 0.50 0.50 0.50 vrsqrt28ps (%rax){1to16}, %zmm19 -# CHECK-NEXT: - - 2.50 - - 0.50 - - vrsqrt28ps %zmm16, %zmm19 {%k1} -# CHECK-NEXT: - - 2.50 - - 0.50 0.50 0.50 vrsqrt28ps (%rax), %zmm19 {%k1} -# CHECK-NEXT: - - 2.50 - - 0.50 0.50 0.50 vrsqrt28ps (%rax){1to16}, %zmm19 {%k1} -# CHECK-NEXT: - - 2.50 - - 0.50 - - vrsqrt28ps %zmm16, %zmm19 {%k1} {z} -# CHECK-NEXT: - - 2.50 - - 0.50 0.50 0.50 vrsqrt28ps (%rax), %zmm19 {%k1} {z} -# CHECK-NEXT: - - 2.50 - - 0.50 0.50 0.50 vrsqrt28ps (%rax){1to16}, %zmm19 {%k1} {z} -# CHECK-NEXT: - - 2.50 - - 0.50 - - vrsqrt28ps {sae}, %zmm16, %zmm19 -# CHECK-NEXT: - - 2.50 - - 0.50 - - vrsqrt28ps {sae}, %zmm16, %zmm19 {%k1} -# CHECK-NEXT: - - 2.50 - - 0.50 - - vrsqrt28ps {sae}, %zmm16, %zmm19 {%k1} {z} -# CHECK-NEXT: - - 1.00 - - - - - vrsqrt28sd %xmm16, %xmm17, %xmm19 -# CHECK-NEXT: - - 1.00 - - - 0.50 0.50 vrsqrt28sd (%rax), %xmm17, %xmm19 -# CHECK-NEXT: - - 1.00 - - - - - vrsqrt28sd %xmm16, %xmm17, %xmm19 {%k1} -# CHECK-NEXT: - - 1.00 - - - 0.50 0.50 vrsqrt28sd (%rax), %xmm17, %xmm19 {%k1} -# CHECK-NEXT: - - 1.00 - - - - - vrsqrt28sd %xmm16, %xmm17, %xmm19 {%k1} {z} -# CHECK-NEXT: - - 1.00 - - - 0.50 0.50 vrsqrt28sd (%rax), %xmm17, %xmm19 {%k1} {z} -# CHECK-NEXT: - - 1.00 - - - - - vrsqrt28sd {sae}, %xmm16, %xmm17, %xmm19 -# CHECK-NEXT: - - 1.00 - - - - - vrsqrt28sd {sae}, %xmm16, %xmm17, %xmm19 {%k1} -# CHECK-NEXT: - - 1.00 - - - - - vrsqrt28sd {sae}, %xmm16, %xmm17, %xmm19 {%k1} {z} -# CHECK-NEXT: - - 1.00 - - - - - vrsqrt28ss %xmm16, %xmm17, %xmm19 -# CHECK-NEXT: - - 1.00 - - - 0.50 0.50 vrsqrt28ss (%rax), %xmm17, %xmm19 -# CHECK-NEXT: - - 1.00 - - - - - vrsqrt28ss %xmm16, %xmm17, %xmm19 {%k1} -# CHECK-NEXT: - - 1.00 - - - 0.50 0.50 vrsqrt28ss (%rax), %xmm17, %xmm19 {%k1} -# CHECK-NEXT: - - 1.00 - - - - - vrsqrt28ss %xmm16, %xmm17, %xmm19 {%k1} {z} -# CHECK-NEXT: - - 1.00 - - - 0.50 0.50 vrsqrt28ss (%rax), %xmm17, %xmm19 {%k1} {z} -# CHECK-NEXT: - - 1.00 - - - - - vrsqrt28ss {sae}, %xmm16, %xmm17, %xmm19 -# CHECK-NEXT: - - 1.00 - - - - - vrsqrt28ss {sae}, %xmm16, %xmm17, %xmm19 {%k1} -# CHECK-NEXT: - - 1.00 - - - - - vrsqrt28ss {sae}, %xmm16, %xmm17, %xmm19 {%k1} {z} diff --git a/llvm/utils/gn/secondary/clang/lib/Headers/BUILD.gn b/llvm/utils/gn/secondary/clang/lib/Headers/BUILD.gn index 6059074dfa27b..dca476e658b4f 100644 --- a/llvm/utils/gn/secondary/clang/lib/Headers/BUILD.gn +++ b/llvm/utils/gn/secondary/clang/lib/Headers/BUILD.gn @@ -138,7 +138,6 @@ copy("Headers") { "avx512bwintrin.h", "avx512cdintrin.h", "avx512dqintrin.h", - "avx512erintrin.h", "avx512fintrin.h", "avx512fp16intrin.h", "avx512ifmaintrin.h", From 0d14f817b3349ad0d320ccc77a11bfe703259240 Mon Sep 17 00:00:00 2001 From: Freddy Ye Date: Tue, 26 Dec 2023 10:47:09 +0800 Subject: [PATCH 6/7] Remove AVX512PF intrinsic supports. --- clang/include/clang/Basic/BuiltinsX86.def | 9 -- clang/include/clang/Driver/Options.td | 2 - clang/lib/Basic/Targets/X86.cpp | 6 -- clang/lib/Basic/Targets/X86.h | 1 - clang/lib/Headers/CMakeLists.txt | 1 - clang/lib/Headers/avx512pfintrin.h | 92 ---------------- clang/lib/Headers/cpuid.h | 1 - clang/lib/Headers/immintrin.h | 5 - clang/lib/Sema/SemaChecking.cpp | 20 ---- clang/test/CodeGen/X86/avx512pf-builtins.c | 100 ------------------ clang/test/CodeGen/target-builtin-noerror.c | 1 - clang/test/Preprocessor/x86_target_features.c | 32 ------ clang/test/Sema/builtins-x86.c | 8 -- llvm/include/llvm/IR/IntrinsicsX86.td | 32 ------ llvm/lib/Target/X86/X86.td | 3 - llvm/lib/Target/X86/X86InstrAVX512.td | 2 +- llvm/lib/Target/X86/X86InstrPredicates.td | 1 - llvm/lib/Target/X86/X86IntrinsicsInfo.h | 17 --- llvm/lib/TargetParser/Host.cpp | 3 - ...avx512-gather-scatter-intrin-deprecated.ll | 24 ----- .../X86/avx512-gather-scatter-intrin.ll | 24 ----- .../X86/insert-prefetch-invalid-instr.ll | 5 - .../X86/speculative-load-hardening-gather.ll | 22 ---- 23 files changed, 1 insertion(+), 410 deletions(-) delete mode 100644 clang/lib/Headers/avx512pfintrin.h delete mode 100644 clang/test/CodeGen/X86/avx512pf-builtins.c diff --git a/clang/include/clang/Basic/BuiltinsX86.def b/clang/include/clang/Basic/BuiltinsX86.def index 00a69b16a5128..4bca2a5c465ec 100644 --- a/clang/include/clang/Basic/BuiltinsX86.def +++ b/clang/include/clang/Basic/BuiltinsX86.def @@ -955,15 +955,6 @@ TARGET_BUILTIN(__builtin_ia32_scattersiv16si, "vv*UsV16iV16iIi", "nV:512:", "avx TARGET_BUILTIN(__builtin_ia32_scatterdiv8di, "vv*UcV8OiV8OiIi", "nV:512:", "avx512f,evex512") TARGET_BUILTIN(__builtin_ia32_scatterdiv16si, "vv*UcV8OiV8iIi", "nV:512:", "avx512f,evex512") -TARGET_BUILTIN(__builtin_ia32_gatherpfdpd, "vUcV8ivC*IiIi", "nV:512:", "avx512pf,evex512") -TARGET_BUILTIN(__builtin_ia32_gatherpfdps, "vUsV16ivC*IiIi", "nV:512:", "avx512pf,evex512") -TARGET_BUILTIN(__builtin_ia32_gatherpfqpd, "vUcV8OivC*IiIi", "nV:512:", "avx512pf,evex512") -TARGET_BUILTIN(__builtin_ia32_gatherpfqps, "vUcV8OivC*IiIi", "nV:512:", "avx512pf,evex512") -TARGET_BUILTIN(__builtin_ia32_scatterpfdpd, "vUcV8iv*IiIi", "nV:512:", "avx512pf,evex512") -TARGET_BUILTIN(__builtin_ia32_scatterpfdps, "vUsV16iv*IiIi", "nV:512:", "avx512pf,evex512") -TARGET_BUILTIN(__builtin_ia32_scatterpfqpd, "vUcV8Oiv*IiIi", "nV:512:", "avx512pf,evex512") -TARGET_BUILTIN(__builtin_ia32_scatterpfqps, "vUcV8Oiv*IiIi", "nV:512:", "avx512pf,evex512") - TARGET_BUILTIN(__builtin_ia32_knotqi, "UcUc", "nc", "avx512dq") TARGET_BUILTIN(__builtin_ia32_knothi, "UsUs", "nc", "avx512f") TARGET_BUILTIN(__builtin_ia32_knotsi, "UiUi", "nc", "avx512bw") diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td index a2f26b9ca4c35..3f17fcaf36b90 100644 --- a/clang/include/clang/Driver/Options.td +++ b/clang/include/clang/Driver/Options.td @@ -5869,8 +5869,6 @@ def mavx512fp16 : Flag<["-"], "mavx512fp16">, Group; def mno_avx512fp16 : Flag<["-"], "mno-avx512fp16">, Group; def mavx512ifma : Flag<["-"], "mavx512ifma">, Group; def mno_avx512ifma : Flag<["-"], "mno-avx512ifma">, Group; -def mavx512pf : Flag<["-"], "mavx512pf">, Group; -def mno_avx512pf : Flag<["-"], "mno-avx512pf">, Group; def mavx512vbmi : Flag<["-"], "mavx512vbmi">, Group; def mno_avx512vbmi : Flag<["-"], "mno-avx512vbmi">, Group; def mavx512vbmi2 : Flag<["-"], "mavx512vbmi2">, Group; diff --git a/clang/lib/Basic/Targets/X86.cpp b/clang/lib/Basic/Targets/X86.cpp index 8b0e021a48842..4e8613ce7cfc3 100644 --- a/clang/lib/Basic/Targets/X86.cpp +++ b/clang/lib/Basic/Targets/X86.cpp @@ -296,8 +296,6 @@ bool X86TargetInfo::handleTargetFeatures(std::vector &Features, } else if (Feature == "+avx512fp16") { HasAVX512FP16 = true; HasLegalHalfType = true; - } else if (Feature == "+avx512pf") { - HasAVX512PF = true; } else if (Feature == "+avx512dq") { HasAVX512DQ = true; } else if (Feature == "+avx512bitalg") { @@ -813,8 +811,6 @@ void X86TargetInfo::getTargetDefines(const LangOptions &Opts, Builder.defineMacro("__AVX512BF16__"); if (HasAVX512FP16) Builder.defineMacro("__AVX512FP16__"); - if (HasAVX512PF) - Builder.defineMacro("__AVX512PF__"); if (HasAVX512DQ) Builder.defineMacro("__AVX512DQ__"); if (HasAVX512BITALG) @@ -1049,7 +1045,6 @@ bool X86TargetInfo::isValidFeatureName(StringRef Name) const { .Case("avx512vnni", true) .Case("avx512bf16", true) .Case("avx512fp16", true) - .Case("avx512pf", true) .Case("avx512dq", true) .Case("avx512bitalg", true) .Case("avx512bw", true) @@ -1164,7 +1159,6 @@ bool X86TargetInfo::hasFeature(StringRef Feature) const { .Case("avx512vnni", HasAVX512VNNI) .Case("avx512bf16", HasAVX512BF16) .Case("avx512fp16", HasAVX512FP16) - .Case("avx512pf", HasAVX512PF) .Case("avx512dq", HasAVX512DQ) .Case("avx512bitalg", HasAVX512BITALG) .Case("avx512bw", HasAVX512BW) diff --git a/clang/lib/Basic/Targets/X86.h b/clang/lib/Basic/Targets/X86.h index a890348073e88..8e5d7d56a17b1 100644 --- a/clang/lib/Basic/Targets/X86.h +++ b/clang/lib/Basic/Targets/X86.h @@ -103,7 +103,6 @@ class LLVM_LIBRARY_VISIBILITY X86TargetInfo : public TargetInfo { bool HasAVX512VNNI = false; bool HasAVX512FP16 = false; bool HasAVX512BF16 = false; - bool HasAVX512PF = false; bool HasAVX512DQ = false; bool HasAVX512BITALG = false; bool HasAVX512BW = false; diff --git a/clang/lib/Headers/CMakeLists.txt b/clang/lib/Headers/CMakeLists.txt index 09c62d14085df..65e483f359840 100644 --- a/clang/lib/Headers/CMakeLists.txt +++ b/clang/lib/Headers/CMakeLists.txt @@ -155,7 +155,6 @@ set(x86_files avx512fp16intrin.h avx512ifmaintrin.h avx512ifmavlintrin.h - avx512pfintrin.h avx512vbmi2intrin.h avx512vbmiintrin.h avx512vbmivlintrin.h diff --git a/clang/lib/Headers/avx512pfintrin.h b/clang/lib/Headers/avx512pfintrin.h deleted file mode 100644 index f853be021a2dd..0000000000000 --- a/clang/lib/Headers/avx512pfintrin.h +++ /dev/null @@ -1,92 +0,0 @@ -/*===------------- avx512pfintrin.h - PF intrinsics ------------------------=== - * - * - * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. - * See https://llvm.org/LICENSE.txt for license information. - * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception - * - *===-----------------------------------------------------------------------=== - */ -#ifndef __IMMINTRIN_H -#error "Never use directly; include instead." -#endif - -#ifndef __AVX512PFINTRIN_H -#define __AVX512PFINTRIN_H - -#define _mm512_mask_prefetch_i32gather_pd(index, mask, addr, scale, hint) \ - __builtin_ia32_gatherpfdpd((__mmask8)(mask), (__v8si)(__m256i)(index), \ - (void const *)(addr), (int)(scale), \ - (int)(hint)) - -#define _mm512_prefetch_i32gather_pd(index, addr, scale, hint) \ - __builtin_ia32_gatherpfdpd((__mmask8) -1, (__v8si)(__m256i)(index), \ - (void const *)(addr), (int)(scale), \ - (int)(hint)) - -#define _mm512_mask_prefetch_i32gather_ps(index, mask, addr, scale, hint) \ - __builtin_ia32_gatherpfdps((__mmask16)(mask), \ - (__v16si)(__m512i)(index), (void const *)(addr), \ - (int)(scale), (int)(hint)) - -#define _mm512_prefetch_i32gather_ps(index, addr, scale, hint) \ - __builtin_ia32_gatherpfdps((__mmask16) -1, \ - (__v16si)(__m512i)(index), (void const *)(addr), \ - (int)(scale), (int)(hint)) - -#define _mm512_mask_prefetch_i64gather_pd(index, mask, addr, scale, hint) \ - __builtin_ia32_gatherpfqpd((__mmask8)(mask), (__v8di)(__m512i)(index), \ - (void const *)(addr), (int)(scale), \ - (int)(hint)) - -#define _mm512_prefetch_i64gather_pd(index, addr, scale, hint) \ - __builtin_ia32_gatherpfqpd((__mmask8) -1, (__v8di)(__m512i)(index), \ - (void const *)(addr), (int)(scale), \ - (int)(hint)) - -#define _mm512_mask_prefetch_i64gather_ps(index, mask, addr, scale, hint) \ - __builtin_ia32_gatherpfqps((__mmask8)(mask), (__v8di)(__m512i)(index), \ - (void const *)(addr), (int)(scale), (int)(hint)) - -#define _mm512_prefetch_i64gather_ps(index, addr, scale, hint) \ - __builtin_ia32_gatherpfqps((__mmask8) -1, (__v8di)(__m512i)(index), \ - (void const *)(addr), (int)(scale), (int)(hint)) - -#define _mm512_prefetch_i32scatter_pd(addr, index, scale, hint) \ - __builtin_ia32_scatterpfdpd((__mmask8)-1, (__v8si)(__m256i)(index), \ - (void *)(addr), (int)(scale), \ - (int)(hint)) - -#define _mm512_mask_prefetch_i32scatter_pd(addr, mask, index, scale, hint) \ - __builtin_ia32_scatterpfdpd((__mmask8)(mask), (__v8si)(__m256i)(index), \ - (void *)(addr), (int)(scale), \ - (int)(hint)) - -#define _mm512_prefetch_i32scatter_ps(addr, index, scale, hint) \ - __builtin_ia32_scatterpfdps((__mmask16)-1, (__v16si)(__m512i)(index), \ - (void *)(addr), (int)(scale), (int)(hint)) - -#define _mm512_mask_prefetch_i32scatter_ps(addr, mask, index, scale, hint) \ - __builtin_ia32_scatterpfdps((__mmask16)(mask), \ - (__v16si)(__m512i)(index), (void *)(addr), \ - (int)(scale), (int)(hint)) - -#define _mm512_prefetch_i64scatter_pd(addr, index, scale, hint) \ - __builtin_ia32_scatterpfqpd((__mmask8)-1, (__v8di)(__m512i)(index), \ - (void *)(addr), (int)(scale), \ - (int)(hint)) - -#define _mm512_mask_prefetch_i64scatter_pd(addr, mask, index, scale, hint) \ - __builtin_ia32_scatterpfqpd((__mmask8)(mask), (__v8di)(__m512i)(index), \ - (void *)(addr), (int)(scale), \ - (int)(hint)) - -#define _mm512_prefetch_i64scatter_ps(addr, index, scale, hint) \ - __builtin_ia32_scatterpfqps((__mmask8)-1, (__v8di)(__m512i)(index), \ - (void *)(addr), (int)(scale), (int)(hint)) - -#define _mm512_mask_prefetch_i64scatter_ps(addr, mask, index, scale, hint) \ - __builtin_ia32_scatterpfqps((__mmask8)(mask), (__v8di)(__m512i)(index), \ - (void *)(addr), (int)(scale), (int)(hint)) - -#endif diff --git a/clang/lib/Headers/cpuid.h b/clang/lib/Headers/cpuid.h index b09ca3585d606..0902734cd73af 100644 --- a/clang/lib/Headers/cpuid.h +++ b/clang/lib/Headers/cpuid.h @@ -159,7 +159,6 @@ #define bit_AVX512IFMA 0x00200000 #define bit_CLFLUSHOPT 0x00800000 #define bit_CLWB 0x01000000 -#define bit_AVX512PF 0x04000000 #define bit_AVX512CD 0x10000000 #define bit_SHA 0x20000000 #define bit_AVX512BW 0x40000000 diff --git a/clang/lib/Headers/immintrin.h b/clang/lib/Headers/immintrin.h index 876392e9a5daf..d2aff11e0abfa 100644 --- a/clang/lib/Headers/immintrin.h +++ b/clang/lib/Headers/immintrin.h @@ -209,11 +209,6 @@ #include #endif -#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ - defined(__AVX512PF__) -#include -#endif - #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ defined(__AVX512FP16__) #include diff --git a/clang/lib/Sema/SemaChecking.cpp b/clang/lib/Sema/SemaChecking.cpp index e14bb9b1287b1..abffc2f9b760f 100644 --- a/clang/lib/Sema/SemaChecking.cpp +++ b/clang/lib/Sema/SemaChecking.cpp @@ -6545,16 +6545,6 @@ bool Sema::CheckX86BuiltinGatherScatterScale(unsigned BuiltinID, switch (BuiltinID) { default: return false; - case X86::BI__builtin_ia32_gatherpfdpd: - case X86::BI__builtin_ia32_gatherpfdps: - case X86::BI__builtin_ia32_gatherpfqpd: - case X86::BI__builtin_ia32_gatherpfqps: - case X86::BI__builtin_ia32_scatterpfdpd: - case X86::BI__builtin_ia32_scatterpfdps: - case X86::BI__builtin_ia32_scatterpfqpd: - case X86::BI__builtin_ia32_scatterpfqps: - ArgNum = 3; - break; case X86::BI__builtin_ia32_gatherd_pd: case X86::BI__builtin_ia32_gatherd_pd256: case X86::BI__builtin_ia32_gatherq_pd: @@ -7067,16 +7057,6 @@ bool Sema::CheckX86BuiltinFunctionCall(const TargetInfo &TI, unsigned BuiltinID, case X86::BI__builtin_ia32_vsm3rnds2: i = 3; l = 0; u = 255; break; - case X86::BI__builtin_ia32_gatherpfdpd: - case X86::BI__builtin_ia32_gatherpfdps: - case X86::BI__builtin_ia32_gatherpfqpd: - case X86::BI__builtin_ia32_gatherpfqps: - case X86::BI__builtin_ia32_scatterpfdpd: - case X86::BI__builtin_ia32_scatterpfdps: - case X86::BI__builtin_ia32_scatterpfqpd: - case X86::BI__builtin_ia32_scatterpfqps: - i = 4; l = 2; u = 3; - break; case X86::BI__builtin_ia32_reducesd_mask: case X86::BI__builtin_ia32_reducess_mask: case X86::BI__builtin_ia32_rndscalesd_round_mask: diff --git a/clang/test/CodeGen/X86/avx512pf-builtins.c b/clang/test/CodeGen/X86/avx512pf-builtins.c deleted file mode 100644 index 4ca70f5787968..0000000000000 --- a/clang/test/CodeGen/X86/avx512pf-builtins.c +++ /dev/null @@ -1,100 +0,0 @@ -// RUN: %clang_cc1 -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +avx512pf -emit-llvm -o - -Wall -Werror | FileCheck %s - - -#include - -void test_mm512_mask_prefetch_i32gather_pd(__m256i index, __mmask8 mask, void const *addr) { - // CHECK-LABEL: @test_mm512_mask_prefetch_i32gather_pd - // CHECK: @llvm.x86.avx512.gatherpf.dpd - return _mm512_mask_prefetch_i32gather_pd(index, mask, addr, 2, _MM_HINT_T0); -} - -void test_mm512_prefetch_i32gather_pd(__m256i index, void const *addr) { - // CHECK-LABEL: @test_mm512_prefetch_i32gather_pd - // CHECK: @llvm.x86.avx512.gatherpf.dpd - return _mm512_prefetch_i32gather_pd(index, addr, 2, _MM_HINT_T0); -} - -void test_mm512_mask_prefetch_i32gather_ps(__m512i index, __mmask16 mask, void const *addr) { - // CHECK-LABEL: @test_mm512_mask_prefetch_i32gather_ps - // CHECK: @llvm.x86.avx512.gatherpf.dps - return _mm512_mask_prefetch_i32gather_ps(index, mask, addr, 2, _MM_HINT_T0); -} - -void test_mm512_prefetch_i32gather_ps(__m512i index, void const *addr) { - // CHECK-LABEL: @test_mm512_prefetch_i32gather_ps - // CHECK: @llvm.x86.avx512.gatherpf.dps - return _mm512_prefetch_i32gather_ps(index, addr, 2, _MM_HINT_T0); -} - -void test_mm512_mask_prefetch_i64gather_pd(__m512i index, __mmask8 mask, void const *addr) { - // CHECK-LABEL: @test_mm512_mask_prefetch_i64gather_pd - // CHECK: @llvm.x86.avx512.gatherpf.qpd - return _mm512_mask_prefetch_i64gather_pd(index, mask, addr, 2, _MM_HINT_T0); -} - -void test_mm512_prefetch_i64gather_pd(__m512i index, void const *addr) { - // CHECK-LABEL: @test_mm512_prefetch_i64gather_pd - // CHECK: @llvm.x86.avx512.gatherpf.qpd - return _mm512_prefetch_i64gather_pd(index, addr, 2, _MM_HINT_T0); -} - -void test_mm512_mask_prefetch_i64gather_ps(__m512i index, __mmask8 mask, void const *addr) { - // CHECK-LABEL: @test_mm512_mask_prefetch_i64gather_ps - // CHECK: @llvm.x86.avx512.gatherpf.qps - return _mm512_mask_prefetch_i64gather_ps(index, mask, addr, 2, _MM_HINT_T0); -} - -void test_mm512_prefetch_i64gather_ps(__m512i index, void const *addr) { - // CHECK-LABEL: @test_mm512_prefetch_i64gather_ps - // CHECK: @llvm.x86.avx512.gatherpf.qps - return _mm512_prefetch_i64gather_ps(index, addr, 2, _MM_HINT_T0); -} - -void test_mm512_prefetch_i32scatter_pd(void *addr, __m256i index) { - // CHECK-LABEL: @test_mm512_prefetch_i32scatter_pd - // CHECK: @llvm.x86.avx512.scatterpf.dpd.512 - return _mm512_prefetch_i32scatter_pd(addr, index, 1, _MM_HINT_T1); -} - -void test_mm512_mask_prefetch_i32scatter_pd(void *addr, __mmask8 mask, __m256i index) { - // CHECK-LABEL: @test_mm512_mask_prefetch_i32scatter_pd - // CHECK: @llvm.x86.avx512.scatterpf.dpd.512 - return _mm512_mask_prefetch_i32scatter_pd(addr, mask, index, 1, _MM_HINT_T1); -} - -void test_mm512_prefetch_i32scatter_ps(void *addr, __m512i index) { - // CHECK-LABEL: @test_mm512_prefetch_i32scatter_ps - // CHECK: @llvm.x86.avx512.scatterpf.dps.512 - return _mm512_prefetch_i32scatter_ps(addr, index, 1, _MM_HINT_T1); -} - -void test_mm512_mask_prefetch_i32scatter_ps(void *addr, __mmask16 mask, __m512i index) { - // CHECK-LABEL: @test_mm512_mask_prefetch_i32scatter_ps - // CHECK: @llvm.x86.avx512.scatterpf.dps.512 - return _mm512_mask_prefetch_i32scatter_ps(addr, mask, index, 1, _MM_HINT_T1); -} - -void test_mm512_prefetch_i64scatter_pd(void *addr, __m512i index) { - // CHECK-LABEL: @test_mm512_prefetch_i64scatter_pd - // CHECK: @llvm.x86.avx512.scatterpf.qpd.512 - return _mm512_prefetch_i64scatter_pd(addr, index, 1, _MM_HINT_T1); -} - -void test_mm512_mask_prefetch_i64scatter_pd(void *addr, __mmask16 mask, __m512i index) { - // CHECK-LABEL: @test_mm512_mask_prefetch_i64scatter_pd - // CHECK: @llvm.x86.avx512.scatterpf.qpd.512 - return _mm512_mask_prefetch_i64scatter_pd(addr, mask, index, 1, _MM_HINT_T1); -} - -void test_mm512_prefetch_i64scatter_ps(void *addr, __m512i index) { - // CHECK-LABEL: @test_mm512_prefetch_i64scatter_ps - // CHECK: @llvm.x86.avx512.scatterpf.qps.512 - return _mm512_prefetch_i64scatter_ps(addr, index, 1, _MM_HINT_T1); -} - -void test_mm512_mask_prefetch_i64scatter_ps(void *addr, __mmask16 mask, __m512i index) { - // CHECK-LABEL: @test_mm512_mask_prefetch_i64scatter_ps - // CHECK: @llvm.x86.avx512.scatterpf.qps.512 - return _mm512_mask_prefetch_i64scatter_ps(addr, mask, index, 1, _MM_HINT_T1); -} diff --git a/clang/test/CodeGen/target-builtin-noerror.c b/clang/test/CodeGen/target-builtin-noerror.c index 9beea3bdef69a..43ee8d5740c3b 100644 --- a/clang/test/CodeGen/target-builtin-noerror.c +++ b/clang/test/CodeGen/target-builtin-noerror.c @@ -68,7 +68,6 @@ void verifyfeaturestrings(void) { (void)__builtin_cpu_supports("avx512bw"); (void)__builtin_cpu_supports("avx512dq"); (void)__builtin_cpu_supports("avx512cd"); - (void)__builtin_cpu_supports("avx512pf"); (void)__builtin_cpu_supports("avx512vbmi"); (void)__builtin_cpu_supports("avx512ifma"); (void)__builtin_cpu_supports("avx5124vnniw"); diff --git a/clang/test/Preprocessor/x86_target_features.c b/clang/test/Preprocessor/x86_target_features.c index acba2a0a9bda2..e667a69352231 100644 --- a/clang/test/Preprocessor/x86_target_features.c +++ b/clang/test/Preprocessor/x86_target_features.c @@ -90,22 +90,6 @@ // AVX512CD: #define __SSE__ 1 // AVX512CD: #define __SSSE3__ 1 -// RUN: %clang -target i386-unknown-unknown -march=atom -mavx512pf -x c -E -dM -o - %s | FileCheck -match-full-lines --check-prefix=AVX512PF %s - -// AVX512PF: #define __AVX2__ 1 -// AVX512PF: #define __AVX512F__ 1 -// AVX512PF: #define __AVX512PF__ 1 -// AVX512PF: #define __AVX__ 1 -// AVX512PF: #define __EVEX512__ 1 -// AVX512PF: #define __SSE2_MATH__ 1 -// AVX512PF: #define __SSE2__ 1 -// AVX512PF: #define __SSE3__ 1 -// AVX512PF: #define __SSE4_1__ 1 -// AVX512PF: #define __SSE4_2__ 1 -// AVX512PF: #define __SSE_MATH__ 1 -// AVX512PF: #define __SSE__ 1 -// AVX512PF: #define __SSSE3__ 1 - // RUN: %clang -target i386-unknown-unknown -march=atom -mavx512dq -x c -E -dM -o - %s | FileCheck -match-full-lines --check-prefix=AVX512DQ %s // AVX512DQ: #define __AVX2__ 1 @@ -155,22 +139,6 @@ // AVX512VL: #define __SSE__ 1 // AVX512VL: #define __SSSE3__ 1 -// RUN: %clang -target i386-unknown-unknown -march=atom -mavx512pf -mno-avx512f -x c -E -dM -o - %s | FileCheck -match-full-lines --check-prefix=AVX512F2 %s - -// AVX512F2: #define __AVX2__ 1 -// AVX512F2-NOT: #define __AVX512F__ 1 -// AVX512F2-NOT: #define __AVX512PF__ 1 -// AVX512F2-NOT: #define __EVEX512__ 1 -// AVX512F2: #define __AVX__ 1 -// AVX512F2: #define __SSE2_MATH__ 1 -// AVX512F2: #define __SSE2__ 1 -// AVX512F2: #define __SSE3__ 1 -// AVX512F2: #define __SSE4_1__ 1 -// AVX512F2: #define __SSE4_2__ 1 -// AVX512F2: #define __SSE_MATH__ 1 -// AVX512F2: #define __SSE__ 1 -// AVX512F2: #define __SSSE3__ 1 - // RUN: %clang -target i386-unknown-unknown -march=atom -mavx512ifma -x c -E -dM -o - %s | FileCheck -match-full-lines --check-prefix=AVX512IFMA %s // AVX512IFMA: #define __AVX2__ 1 diff --git a/clang/test/Sema/builtins-x86.c b/clang/test/Sema/builtins-x86.c index cbaf7bcde871e..7d9cdce3d7894 100644 --- a/clang/test/Sema/builtins-x86.c +++ b/clang/test/Sema/builtins-x86.c @@ -106,14 +106,6 @@ __m128i test_mm_mask_i32gather_epi32(__m128i a, int const *b, __m128i c, __m128i return __builtin_ia32_gatherd_d(a, b, c, mask, 5); // expected-error {{scale argument must be 1, 2, 4, or 8}} } -void _mm512_mask_prefetch_i32gather_ps(__m512i index, __mmask16 mask, int const *addr) { - __builtin_ia32_gatherpfdps(mask, index, addr, 5, 1); // expected-error {{scale argument must be 1, 2, 4, or 8}} -} - -void _mm512_mask_prefetch_i32gather_ps_2(__m512i index, __mmask16 mask, int const *addr) { - __builtin_ia32_gatherpfdps(mask, index, addr, 1, 1); // expected-error {{argument value 1 is outside the valid range [2, 3]}} -} - __m512i test_mm512_shldi_epi64(__m512i __A, __m512i __B) { return __builtin_ia32_vpshldq512(__A, __B, 1024); // expected-error {{argument value 1024 is outside the valid range [0, 255]}} } diff --git a/llvm/include/llvm/IR/IntrinsicsX86.td b/llvm/include/llvm/IR/IntrinsicsX86.td index 0fda7e66c06a7..aee804047e1b0 100644 --- a/llvm/include/llvm/IR/IntrinsicsX86.td +++ b/llvm/include/llvm/IR/IntrinsicsX86.td @@ -4125,38 +4125,6 @@ let TargetPrefix = "x86" in { Intrinsic<[], [llvm_ptr_ty, llvm_i8_ty, llvm_v8i32_ty, llvm_v8i32_ty, llvm_i32_ty], [ImmArg>]>; - - // gather prefetch - // NOTE: These can't be ArgMemOnly because you can put the address completely - // in the index register. - def int_x86_avx512_gatherpf_dpd_512 : ClangBuiltin<"__builtin_ia32_gatherpfdpd">, - Intrinsic<[], [llvm_i8_ty, llvm_v8i32_ty, llvm_ptr_ty, - llvm_i32_ty, llvm_i32_ty], [ImmArg>, ImmArg>]>; - def int_x86_avx512_gatherpf_dps_512 : ClangBuiltin<"__builtin_ia32_gatherpfdps">, - Intrinsic<[], [llvm_i16_ty, llvm_v16i32_ty, llvm_ptr_ty, - llvm_i32_ty, llvm_i32_ty], [ImmArg>, ImmArg>]>; - def int_x86_avx512_gatherpf_qpd_512 : ClangBuiltin<"__builtin_ia32_gatherpfqpd">, - Intrinsic<[], [llvm_i8_ty, llvm_v8i64_ty, llvm_ptr_ty, - llvm_i32_ty, llvm_i32_ty], [ImmArg>, ImmArg>]>; - def int_x86_avx512_gatherpf_qps_512 : ClangBuiltin<"__builtin_ia32_gatherpfqps">, - Intrinsic<[], [llvm_i8_ty, llvm_v8i64_ty, llvm_ptr_ty, - llvm_i32_ty, llvm_i32_ty], [ImmArg>, ImmArg>]>; - - // scatter prefetch - // NOTE: These can't be ArgMemOnly because you can put the address completely - // in the index register. - def int_x86_avx512_scatterpf_dpd_512 : ClangBuiltin<"__builtin_ia32_scatterpfdpd">, - Intrinsic<[], [llvm_i8_ty, llvm_v8i32_ty, llvm_ptr_ty, - llvm_i32_ty, llvm_i32_ty], [ImmArg>, ImmArg>]>; - def int_x86_avx512_scatterpf_dps_512 : ClangBuiltin<"__builtin_ia32_scatterpfdps">, - Intrinsic<[], [llvm_i16_ty, llvm_v16i32_ty, llvm_ptr_ty, - llvm_i32_ty, llvm_i32_ty], [ImmArg>, ImmArg>]>; - def int_x86_avx512_scatterpf_qpd_512 : ClangBuiltin<"__builtin_ia32_scatterpfqpd">, - Intrinsic<[], [llvm_i8_ty, llvm_v8i64_ty, llvm_ptr_ty, - llvm_i32_ty, llvm_i32_ty], [ImmArg>, ImmArg>]>; - def int_x86_avx512_scatterpf_qps_512 : ClangBuiltin<"__builtin_ia32_scatterpfqps">, - Intrinsic<[], [llvm_i8_ty, llvm_v8i64_ty, llvm_ptr_ty, - llvm_i32_ty, llvm_i32_ty], [ImmArg>, ImmArg>]>; } // AVX512 gather/scatter intrinsics that use vXi1 masks. diff --git a/llvm/lib/Target/X86/X86.td b/llvm/lib/Target/X86/X86.td index 682b32e29cff5..50b7fb677f3aa 100644 --- a/llvm/lib/Target/X86/X86.td +++ b/llvm/lib/Target/X86/X86.td @@ -130,9 +130,6 @@ def FeatureCDI : SubtargetFeature<"avx512cd", "HasCDI", "true", def FeatureVPOPCNTDQ : SubtargetFeature<"avx512vpopcntdq", "HasVPOPCNTDQ", "true", "Enable AVX-512 Population Count Instructions", [FeatureAVX512]>; -def FeaturePFI : SubtargetFeature<"avx512pf", "HasPFI", "true", - "Enable AVX-512 PreFetch Instructions", - [FeatureAVX512]>; def FeaturePREFETCHI : SubtargetFeature<"prefetchi", "HasPREFETCHI", "true", "Prefetch instruction with T0 or T1 Hint">; diff --git a/llvm/lib/Target/X86/X86InstrAVX512.td b/llvm/lib/Target/X86/X86InstrAVX512.td index 981cefc1b5563..9b89b298f743f 100644 --- a/llvm/lib/Target/X86/X86InstrAVX512.td +++ b/llvm/lib/Target/X86/X86InstrAVX512.td @@ -10338,7 +10338,7 @@ defm VPSCATTER : avx512_scatter_q_pd<0xA0, 0xA1, avx512vl_i64_info, "vpscatter", // prefetch multiclass avx512_gather_scatter_prefetch opc, Format F, string OpcodeStr, RegisterClass KRC, X86MemOperand memop> { - let Predicates = [HasPFI], mayLoad = 1, mayStore = 1 in + let mayLoad = 1, mayStore = 1 in def m : AVX5128I, EVEX, EVEX_K, Sched<[WriteLoad]>; diff --git a/llvm/lib/Target/X86/X86InstrPredicates.td b/llvm/lib/Target/X86/X86InstrPredicates.td index 6b89d2834a1da..0ea0bcbfe2ec6 100644 --- a/llvm/lib/Target/X86/X86InstrPredicates.td +++ b/llvm/lib/Target/X86/X86InstrPredicates.td @@ -43,7 +43,6 @@ def UseAVX2 : Predicate<"Subtarget->hasAVX2() && !Subtarget->hasAVX512()">; def NoAVX512 : Predicate<"!Subtarget->hasAVX512()">; def HasCDI : Predicate<"Subtarget->hasCDI()">; def HasVPOPCNTDQ : Predicate<"Subtarget->hasVPOPCNTDQ()">; -def HasPFI : Predicate<"Subtarget->hasPFI()">; def HasDQI : Predicate<"Subtarget->hasDQI()">; def NoDQI : Predicate<"!Subtarget->hasDQI()">; def HasBWI : Predicate<"Subtarget->hasBWI()">; diff --git a/llvm/lib/Target/X86/X86IntrinsicsInfo.h b/llvm/lib/Target/X86/X86IntrinsicsInfo.h index 9a0a4e8657035..e3961e0094d3a 100644 --- a/llvm/lib/Target/X86/X86IntrinsicsInfo.h +++ b/llvm/lib/Target/X86/X86IntrinsicsInfo.h @@ -108,15 +108,6 @@ static const IntrinsicData IntrinsicsWithChain[] = { X86_INTRINSIC_DATA(avx512_gather3siv8_sf, GATHER, 0, 0), X86_INTRINSIC_DATA(avx512_gather3siv8_si, GATHER, 0, 0), - X86_INTRINSIC_DATA(avx512_gatherpf_dpd_512, PREFETCH, - X86::VGATHERPF0DPDm, X86::VGATHERPF1DPDm), - X86_INTRINSIC_DATA(avx512_gatherpf_dps_512, PREFETCH, - X86::VGATHERPF0DPSm, X86::VGATHERPF1DPSm), - X86_INTRINSIC_DATA(avx512_gatherpf_qpd_512, PREFETCH, - X86::VGATHERPF0QPDm, X86::VGATHERPF1QPDm), - X86_INTRINSIC_DATA(avx512_gatherpf_qps_512, PREFETCH, - X86::VGATHERPF0QPSm, X86::VGATHERPF1QPSm), - X86_INTRINSIC_DATA(avx512_mask_gather_dpd_512, GATHER, 0, 0), X86_INTRINSIC_DATA(avx512_mask_gather_dpi_512, GATHER, 0, 0), X86_INTRINSIC_DATA(avx512_mask_gather_dpq_512, GATHER, 0, 0), @@ -292,14 +283,6 @@ static const IntrinsicData IntrinsicsWithChain[] = { X86_INTRINSIC_DATA(avx512_scatterdiv4_si, SCATTER, 0, 0), X86_INTRINSIC_DATA(avx512_scatterdiv8_sf, SCATTER, 0, 0), X86_INTRINSIC_DATA(avx512_scatterdiv8_si, SCATTER, 0, 0), - X86_INTRINSIC_DATA(avx512_scatterpf_dpd_512, PREFETCH, X86::VSCATTERPF0DPDm, - X86::VSCATTERPF1DPDm), - X86_INTRINSIC_DATA(avx512_scatterpf_dps_512, PREFETCH, X86::VSCATTERPF0DPSm, - X86::VSCATTERPF1DPSm), - X86_INTRINSIC_DATA(avx512_scatterpf_qpd_512, PREFETCH, X86::VSCATTERPF0QPDm, - X86::VSCATTERPF1QPDm), - X86_INTRINSIC_DATA(avx512_scatterpf_qps_512, PREFETCH, X86::VSCATTERPF0QPSm, - X86::VSCATTERPF1QPSm), X86_INTRINSIC_DATA(avx512_scattersiv2_df, SCATTER, 0, 0), X86_INTRINSIC_DATA(avx512_scattersiv2_di, SCATTER, 0, 0), X86_INTRINSIC_DATA(avx512_scattersiv4_df, SCATTER, 0, 0), diff --git a/llvm/lib/TargetParser/Host.cpp b/llvm/lib/TargetParser/Host.cpp index cae3ba1dffe48..ab7c28bb5db7a 100644 --- a/llvm/lib/TargetParser/Host.cpp +++ b/llvm/lib/TargetParser/Host.cpp @@ -1266,8 +1266,6 @@ static void getAvailableFeatures(unsigned ECX, unsigned EDX, unsigned MaxLeaf, setFeature(X86::FEATURE_AVX512IFMA); if (HasLeaf7 && ((EBX >> 23) & 1)) setFeature(X86::FEATURE_CLFLUSHOPT); - if (HasLeaf7 && ((EBX >> 26) & 1) && HasAVX512Save) - setFeature(X86::FEATURE_AVX512PF); if (HasLeaf7 && ((EBX >> 28) & 1) && HasAVX512Save) setFeature(X86::FEATURE_AVX512CD); if (HasLeaf7 && ((EBX >> 29) & 1)) @@ -1765,7 +1763,6 @@ bool sys::getHostCPUFeatures(StringMap &Features) { Features["avx512ifma"] = HasLeaf7 && ((EBX >> 21) & 1) && HasAVX512Save; Features["clflushopt"] = HasLeaf7 && ((EBX >> 23) & 1); Features["clwb"] = HasLeaf7 && ((EBX >> 24) & 1); - Features["avx512pf"] = HasLeaf7 && ((EBX >> 26) & 1) && HasAVX512Save; Features["avx512cd"] = HasLeaf7 && ((EBX >> 28) & 1) && HasAVX512Save; Features["sha"] = HasLeaf7 && ((EBX >> 29) & 1); Features["avx512bw"] = HasLeaf7 && ((EBX >> 30) & 1) && HasAVX512Save; diff --git a/llvm/test/CodeGen/X86/avx512-gather-scatter-intrin-deprecated.ll b/llvm/test/CodeGen/X86/avx512-gather-scatter-intrin-deprecated.ll index 8d09497cefb1b..77053e2c1bc98 100644 --- a/llvm/test/CodeGen/X86/avx512-gather-scatter-intrin-deprecated.ll +++ b/llvm/test/CodeGen/X86/avx512-gather-scatter-intrin-deprecated.ll @@ -268,30 +268,6 @@ define void @gather_qps(<8 x i64> %ind, <8 x float> %src, ptr %base, ptr %stbuf) ret void } -declare void @llvm.x86.avx512.gatherpf.qps.512(i8, <8 x i64>, ptr , i32, i32); -declare void @llvm.x86.avx512.scatterpf.qps.512(i8, <8 x i64>, ptr , i32, i32); -define void @prefetch(<8 x i64> %ind, ptr %base) { -; CHECK-LABEL: prefetch: -; CHECK: ## %bb.0: -; CHECK-NEXT: kxnorw %k0, %k0, %k1 -; CHECK-NEXT: vgatherpf0qps (%rdi,%zmm0,4) {%k1} -; CHECK-NEXT: kxorw %k0, %k0, %k1 -; CHECK-NEXT: vgatherpf1qps (%rdi,%zmm0,4) {%k1} -; CHECK-NEXT: movb $1, %al -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: vscatterpf0qps (%rdi,%zmm0,2) {%k1} -; CHECK-NEXT: movb $120, %al -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: vscatterpf1qps (%rdi,%zmm0,2) {%k1} -; CHECK-NEXT: vzeroupper -; CHECK-NEXT: retq - call void @llvm.x86.avx512.gatherpf.qps.512(i8 -1, <8 x i64> %ind, ptr %base, i32 4, i32 3) - call void @llvm.x86.avx512.gatherpf.qps.512(i8 0, <8 x i64> %ind, ptr %base, i32 4, i32 2) - call void @llvm.x86.avx512.scatterpf.qps.512(i8 1, <8 x i64> %ind, ptr %base, i32 2, i32 3) - call void @llvm.x86.avx512.scatterpf.qps.512(i8 120, <8 x i64> %ind, ptr %base, i32 2, i32 2) - ret void -} - declare <2 x double> @llvm.x86.avx512.gather3div2.df(<2 x double>, ptr, <2 x i64>, i8, i32) define <2 x double>@test_int_x86_avx512_gather3div2_df(<2 x double> %x0, ptr %x1, <2 x i64> %x2, i8 %x3) { diff --git a/llvm/test/CodeGen/X86/avx512-gather-scatter-intrin.ll b/llvm/test/CodeGen/X86/avx512-gather-scatter-intrin.ll index acbf4387255c5..df71e3c3afa5e 100644 --- a/llvm/test/CodeGen/X86/avx512-gather-scatter-intrin.ll +++ b/llvm/test/CodeGen/X86/avx512-gather-scatter-intrin.ll @@ -265,30 +265,6 @@ define dso_local void @gather_qps(<8 x i64> %ind, <8 x float> %src, ptr %base, p ret void } -declare void @llvm.x86.avx512.gatherpf.qps.512(i8, <8 x i64>, ptr , i32, i32); -declare void @llvm.x86.avx512.scatterpf.qps.512(i8, <8 x i64>, ptr , i32, i32); -define dso_local void @prefetch(<8 x i64> %ind, ptr %base) { -; CHECK-LABEL: prefetch: -; CHECK: # %bb.0: -; CHECK-NEXT: kxnorw %k0, %k0, %k1 -; CHECK-NEXT: vgatherpf0qps (%rdi,%zmm0,4) {%k1} -; CHECK-NEXT: kxorw %k0, %k0, %k1 -; CHECK-NEXT: vgatherpf1qps (%rdi,%zmm0,4) {%k1} -; CHECK-NEXT: movb $1, %al -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: vscatterpf0qps (%rdi,%zmm0,2) {%k1} -; CHECK-NEXT: movb $120, %al -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: vscatterpf1qps (%rdi,%zmm0,2) {%k1} -; CHECK-NEXT: vzeroupper -; CHECK-NEXT: retq - call void @llvm.x86.avx512.gatherpf.qps.512(i8 -1, <8 x i64> %ind, ptr %base, i32 4, i32 3) - call void @llvm.x86.avx512.gatherpf.qps.512(i8 0, <8 x i64> %ind, ptr %base, i32 4, i32 2) - call void @llvm.x86.avx512.scatterpf.qps.512(i8 1, <8 x i64> %ind, ptr %base, i32 2, i32 3) - call void @llvm.x86.avx512.scatterpf.qps.512(i8 120, <8 x i64> %ind, ptr %base, i32 2, i32 2) - ret void -} - define <2 x double> @test_int_x86_avx512_mask_gather3div2_df(<2 x double> %x0, ptr %x1, <2 x i64> %x2, i8 %x3) { ; CHECK-LABEL: test_int_x86_avx512_mask_gather3div2_df: ; CHECK: # %bb.0: diff --git a/llvm/test/CodeGen/X86/insert-prefetch-invalid-instr.ll b/llvm/test/CodeGen/X86/insert-prefetch-invalid-instr.ll index 2f5a36865d4ae..7bdbb19d1714d 100644 --- a/llvm/test/CodeGen/X86/insert-prefetch-invalid-instr.ll +++ b/llvm/test/CodeGen/X86/insert-prefetch-invalid-instr.ll @@ -8,16 +8,12 @@ target triple = "x86_64-unknown-linux-gnu" define dso_local i32 @main() local_unnamed_addr #0 !dbg !7 { entry: tail call void @llvm.prefetch(ptr inttoptr (i64 291 to ptr), i32 0, i32 0, i32 1), !dbg !9 - tail call void @llvm.x86.avx512.gatherpf.dpd.512(i8 97, <8 x i32> undef, ptr null, i32 1, i32 2), !dbg !10 ret i32 291, !dbg !11 } ; Function Attrs: inaccessiblemem_or_argmemonly nounwind declare void @llvm.prefetch(ptr nocapture readonly, i32, i32, i32) #1 -; Function Attrs: argmemonly nounwind -declare void @llvm.x86.avx512.gatherpf.dpd.512(i8, <8 x i32>, ptr, i32, i32) #2 - attributes #0 = {"target-cpu"="x86-64" "target-features"="+avx512pf,+sse4.2,+ssse3"} attributes #1 = { inaccessiblemem_or_argmemonly nounwind } attributes #2 = { argmemonly nounwind } @@ -43,4 +39,3 @@ attributes #2 = { argmemonly nounwind } ;CHECK: # %bb.0: ;CHECK: prefetchnta 291 ;CHECK-NOT: prefetchnta 42(%rax,%ymm0) -;CHECK: vgatherpf1dpd (%rax,%ymm0) {%k1} diff --git a/llvm/test/CodeGen/X86/speculative-load-hardening-gather.ll b/llvm/test/CodeGen/X86/speculative-load-hardening-gather.ll index 6e89445bead63..7b3667420ec6d 100644 --- a/llvm/test/CodeGen/X86/speculative-load-hardening-gather.ll +++ b/llvm/test/CodeGen/X86/speculative-load-hardening-gather.ll @@ -558,28 +558,6 @@ entry: ret <8 x i64> %v } -declare void @llvm.x86.avx512.gatherpf.qps.512(i8, <8 x i64>, ptr, i32, i32); - -define void @test_llvm_x86_avx512_gatherpf_qps_512(<8 x i64> %iv, ptr %b) #1 { -; CHECK-LABEL: test_llvm_x86_avx512_gatherpf_qps_512: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: movq %rsp, %rax -; CHECK-NEXT: movq $-1, %rcx -; CHECK-NEXT: sarq $63, %rax -; CHECK-NEXT: kxnorw %k0, %k0, %k1 -; CHECK-NEXT: orq %rax, %rdi -; CHECK-NEXT: vpbroadcastq %rax, %zmm1 -; CHECK-NEXT: vporq %zmm0, %zmm1, %zmm0 -; CHECK-NEXT: vgatherpf0qps (%rdi,%zmm0,4) {%k1} -; CHECK-NEXT: shlq $47, %rax -; CHECK-NEXT: orq %rax, %rsp -; CHECK-NEXT: vzeroupper -; CHECK-NEXT: retq -entry: - call void @llvm.x86.avx512.gatherpf.qps.512(i8 -1, <8 x i64> %iv, ptr %b, i32 4, i32 3) - ret void -} - declare <4 x float> @llvm.x86.avx512.gather3siv4.sf(<4 x float>, ptr, <4 x i32>, i8, i32) define <4 x float> @test_llvm_x86_avx512_gather3siv4_sf(ptr %b, <4 x i32> %iv) #2 { From 87d15d7ad6141d7492e9579928839408d60e8152 Mon Sep 17 00:00:00 2001 From: Freddy Ye Date: Tue, 26 Dec 2023 13:33:16 +0800 Subject: [PATCH 7/7] Remove PREFETCHWT1 intrinsic supports. --- clang/include/clang/Driver/Options.td | 2 -- clang/lib/Basic/Targets/X86.cpp | 6 ------ clang/lib/Basic/Targets/X86.h | 1 - clang/test/Driver/x86-target-features.c | 5 ----- .../llvm/TargetParser/X86TargetParser.def | 1 - llvm/lib/Target/X86/X86.td | 3 --- llvm/lib/Target/X86/X86Instr3DNow.td | 3 +-- llvm/lib/Target/X86/X86InstrFragments.td | 8 +------- llvm/lib/Target/X86/X86InstrPredicates.td | 1 - llvm/lib/Target/X86/X86Subtarget.h | 7 +++---- llvm/lib/TargetParser/Host.cpp | 1 - llvm/lib/TargetParser/X86TargetParser.cpp | 1 - .../test/CodeGen/X86/avx512-cmp-kor-sequence.ll | 2 +- llvm/test/CodeGen/X86/prefetch.ll | 17 ----------------- .../LoopStrengthReduce/X86/pr40514.ll | 2 +- .../Transforms/LoopVectorize/X86/pr23997.ll | 2 +- .../Transforms/LoopVectorize/X86/pr54634.ll | 2 +- .../LoopVectorize/X86/scatter_crash.ll | 2 +- .../Transforms/SLPVectorizer/X86/vector_gep.ll | 2 +- .../pattern-matching-based-opts-after-delicm.ll | 2 +- 20 files changed, 12 insertions(+), 58 deletions(-) diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td index 3f17fcaf36b90..f4a731cc699bc 100644 --- a/clang/include/clang/Driver/Options.td +++ b/clang/include/clang/Driver/Options.td @@ -5959,8 +5959,6 @@ def mpopcnt : Flag<["-"], "mpopcnt">, Group; def mno_popcnt : Flag<["-"], "mno-popcnt">, Group; def mprefetchi : Flag<["-"], "mprefetchi">, Group; def mno_prefetchi : Flag<["-"], "mno-prefetchi">, Group; -def mprefetchwt1 : Flag<["-"], "mprefetchwt1">, Group; -def mno_prefetchwt1 : Flag<["-"], "mno-prefetchwt1">, Group; def mprfchw : Flag<["-"], "mprfchw">, Group; def mno_prfchw : Flag<["-"], "mno-prfchw">, Group; def mptwrite : Flag<["-"], "mptwrite">, Group; diff --git a/clang/lib/Basic/Targets/X86.cpp b/clang/lib/Basic/Targets/X86.cpp index 4e8613ce7cfc3..516c98adfff7e 100644 --- a/clang/lib/Basic/Targets/X86.cpp +++ b/clang/lib/Basic/Targets/X86.cpp @@ -352,8 +352,6 @@ bool X86TargetInfo::handleTargetFeatures(std::vector &Features, HasWBNOINVD = true; } else if (Feature == "+prefetchi") { HasPREFETCHI = true; - } else if (Feature == "+prefetchwt1") { - HasPREFETCHWT1 = true; } else if (Feature == "+clzero") { HasCLZERO = true; } else if (Feature == "+cldemote") { @@ -862,8 +860,6 @@ void X86TargetInfo::getTargetDefines(const LangOptions &Opts, Builder.defineMacro("__SM4__"); if (HasPREFETCHI) Builder.defineMacro("__PREFETCHI__"); - if (HasPREFETCHWT1) - Builder.defineMacro("__PREFETCHWT1__"); if (HasCLZERO) Builder.defineMacro("__CLZERO__"); if (HasKL) @@ -1092,7 +1088,6 @@ bool X86TargetInfo::isValidFeatureName(StringRef Name) const { .Case("pku", true) .Case("popcnt", true) .Case("prefetchi", true) - .Case("prefetchwt1", true) .Case("prfchw", true) .Case("ptwrite", true) .Case("raoint", true) @@ -1208,7 +1203,6 @@ bool X86TargetInfo::hasFeature(StringRef Feature) const { .Case("pku", HasPKU) .Case("popcnt", HasPOPCNT) .Case("prefetchi", HasPREFETCHI) - .Case("prefetchwt1", HasPREFETCHWT1) .Case("prfchw", HasPRFCHW) .Case("ptwrite", HasPTWRITE) .Case("raoint", HasRAOINT) diff --git a/clang/lib/Basic/Targets/X86.h b/clang/lib/Basic/Targets/X86.h index 8e5d7d56a17b1..36980c42cfead 100644 --- a/clang/lib/Basic/Targets/X86.h +++ b/clang/lib/Basic/Targets/X86.h @@ -134,7 +134,6 @@ class LLVM_LIBRARY_VISIBILITY X86TargetInfo : public TargetInfo { bool HasCLWB = false; bool HasMOVBE = false; bool HasPREFETCHI = false; - bool HasPREFETCHWT1 = false; bool HasRDPID = false; bool HasRDPRU = false; bool HasRetpolineExternalThunk = false; diff --git a/clang/test/Driver/x86-target-features.c b/clang/test/Driver/x86-target-features.c index 942df9259e6a8..856b19af96ffe 100644 --- a/clang/test/Driver/x86-target-features.c +++ b/clang/test/Driver/x86-target-features.c @@ -81,11 +81,6 @@ // SGX: "-target-feature" "+sgx" // NO-SGX: "-target-feature" "-sgx" -// RUN: %clang --target=i386 -march=i386 -mprefetchwt1 %s -### 2>&1 | FileCheck -check-prefix=PREFETCHWT1 %s -// RUN: %clang --target=i386 -march=i386 -mno-prefetchwt1 %s -### 2>&1 | FileCheck -check-prefix=NO-PREFETCHWT1 %s -// PREFETCHWT1: "-target-feature" "+prefetchwt1" -// NO-PREFETCHWT1: "-target-feature" "-prefetchwt1" - // RUN: %clang --target=i386 -march=i386 -mprefetchi %s -### -o %t.o 2>&1 | FileCheck -check-prefix=PREFETCHI %s // RUN: %clang --target=i386 -march=i386 -mno-prefetchi %s -### -o %t.o 2>&1 | FileCheck -check-prefix=NO-PREFETCHI %s // PREFETCHI: "-target-feature" "+prefetchi" diff --git a/llvm/include/llvm/TargetParser/X86TargetParser.def b/llvm/include/llvm/TargetParser/X86TargetParser.def index b58feafe4e8c2..2a20404245296 100644 --- a/llvm/include/llvm/TargetParser/X86TargetParser.def +++ b/llvm/include/llvm/TargetParser/X86TargetParser.def @@ -204,7 +204,6 @@ X86_FEATURE (MWAITX, "mwaitx") X86_FEATURE (PCONFIG, "pconfig") X86_FEATURE (PKU, "pku") X86_FEATURE (PREFETCHI, "prefetchi") -X86_FEATURE (PREFETCHWT1, "prefetchwt1") X86_FEATURE (PRFCHW, "prfchw") X86_FEATURE (PTWRITE, "ptwrite") X86_FEATURE (RDPID, "rdpid") diff --git a/llvm/lib/Target/X86/X86.td b/llvm/lib/Target/X86/X86.td index 50b7fb677f3aa..a60615c4d78c3 100644 --- a/llvm/lib/Target/X86/X86.td +++ b/llvm/lib/Target/X86/X86.td @@ -133,9 +133,6 @@ def FeatureVPOPCNTDQ : SubtargetFeature<"avx512vpopcntdq", "HasVPOPCNTDQ", def FeaturePREFETCHI : SubtargetFeature<"prefetchi", "HasPREFETCHI", "true", "Prefetch instruction with T0 or T1 Hint">; -def FeaturePREFETCHWT1 : SubtargetFeature<"prefetchwt1", "HasPREFETCHWT1", - "true", - "Prefetch with Intent to Write and T1 Hint">; def FeatureDQI : SubtargetFeature<"avx512dq", "HasDQI", "true", "Enable AVX-512 Doubleword and Quadword Instructions", [FeatureAVX512]>; diff --git a/llvm/lib/Target/X86/X86Instr3DNow.td b/llvm/lib/Target/X86/X86Instr3DNow.td index 3be03ab0f4332..03612de0fad94 100644 --- a/llvm/lib/Target/X86/X86Instr3DNow.td +++ b/llvm/lib/Target/X86/X86Instr3DNow.td @@ -90,8 +90,7 @@ def PREFETCHW : I<0x0D, MRM1m, (outs), (ins i8mem:$addr), "prefetchw\t$addr", TB, Requires<[HasPrefetchW]>; def PREFETCHWT1 : I<0x0D, MRM2m, (outs), (ins i8mem:$addr), "prefetchwt1\t$addr", - [(prefetch addr:$addr, (i32 1), (i32 PrefetchWT1Level), (i32 1))]>, - TB, Requires<[HasPREFETCHWT1]>; + []>, TB; } // "3DNowA" instructions diff --git a/llvm/lib/Target/X86/X86InstrFragments.td b/llvm/lib/Target/X86/X86InstrFragments.td index adf527d72f5b4..b6b623aa1b78a 100644 --- a/llvm/lib/Target/X86/X86InstrFragments.td +++ b/llvm/lib/Target/X86/X86InstrFragments.td @@ -604,14 +604,8 @@ def X86any_fcmp : PatFrags<(ops node:$lhs, node:$rhs), [(X86strict_fcmp node:$lhs, node:$rhs), (X86fcmp node:$lhs, node:$rhs)]>; -// PREFETCHWT1 is supported we want to use it for everything but T0. def PrefetchWLevel : PatFrag<(ops), (i32 timm), [{ - return N->getSExtValue() == 3 || !Subtarget->hasPREFETCHWT1(); -}]>; - -// Use PREFETCHWT1 for NTA, T2, T1. -def PrefetchWT1Level : TImmLeafgetSExtValue() <= 3; }]>; def X86lock_add_nocf : PatFrag<(ops node:$lhs, node:$rhs), diff --git a/llvm/lib/Target/X86/X86InstrPredicates.td b/llvm/lib/Target/X86/X86InstrPredicates.td index 0ea0bcbfe2ec6..0f2f7429e1aff 100644 --- a/llvm/lib/Target/X86/X86InstrPredicates.td +++ b/llvm/lib/Target/X86/X86InstrPredicates.td @@ -108,7 +108,6 @@ def NoSSEPrefetch : Predicate<"!Subtarget->hasSSEPrefetch()">; def HasPRFCHW : Predicate<"Subtarget->hasPRFCHW()">; def HasPREFETCHI : Predicate<"Subtarget->hasPREFETCHI()">; def HasPrefetchW : Predicate<"Subtarget->hasPrefetchW()">; -def HasPREFETCHWT1 : Predicate<"Subtarget->hasPREFETCHWT1()">; def HasLAHFSAHF : Predicate<"Subtarget->hasLAHFSAHF()">; def HasLAHFSAHF64 : Predicate<"Subtarget->hasLAHFSAHF64()">; def HasMWAITX : Predicate<"Subtarget->hasMWAITX()">; diff --git a/llvm/lib/Target/X86/X86Subtarget.h b/llvm/lib/Target/X86/X86Subtarget.h index a458b5f9ec8fb..b4480819f09be 100644 --- a/llvm/lib/Target/X86/X86Subtarget.h +++ b/llvm/lib/Target/X86/X86Subtarget.h @@ -213,16 +213,15 @@ class X86Subtarget final : public X86GenSubtargetInfo { bool hasAnyFMA() const { return hasFMA() || hasFMA4(); } bool hasPrefetchW() const { // The PREFETCHW instruction was added with 3DNow but later CPUs gave it - // its own CPUID bit as part of deprecating 3DNow. Intel eventually added - // it and KNL has another that prefetches to L2 cache. We assume the + // its own CPUID bit as part of deprecating 3DNow. We assume the // L1 version exists if the L2 version does. - return hasThreeDNow() || hasPRFCHW() || hasPREFETCHWT1(); + return hasThreeDNow() || hasPRFCHW(); } bool hasSSEPrefetch() const { // We implicitly enable these when we have a write prefix supporting cache // level OR if we have prfchw, but don't already have a read prefetch from // 3dnow. - return hasSSE1() || (hasPRFCHW() && !hasThreeDNow()) || hasPREFETCHWT1() || + return hasSSE1() || (hasPRFCHW() && !hasThreeDNow()) || hasPREFETCHI(); } bool canUseLAHFSAHF() const { return hasLAHFSAHF64() || !is64Bit(); } diff --git a/llvm/lib/TargetParser/Host.cpp b/llvm/lib/TargetParser/Host.cpp index ab7c28bb5db7a..aaf5b760dfa0f 100644 --- a/llvm/lib/TargetParser/Host.cpp +++ b/llvm/lib/TargetParser/Host.cpp @@ -1768,7 +1768,6 @@ bool sys::getHostCPUFeatures(StringMap &Features) { Features["avx512bw"] = HasLeaf7 && ((EBX >> 30) & 1) && HasAVX512Save; Features["avx512vl"] = HasLeaf7 && ((EBX >> 31) & 1) && HasAVX512Save; - Features["prefetchwt1"] = HasLeaf7 && ((ECX >> 0) & 1); Features["avx512vbmi"] = HasLeaf7 && ((ECX >> 1) & 1) && HasAVX512Save; Features["pku"] = HasLeaf7 && ((ECX >> 4) & 1); Features["waitpkg"] = HasLeaf7 && ((ECX >> 5) & 1); diff --git a/llvm/lib/TargetParser/X86TargetParser.cpp b/llvm/lib/TargetParser/X86TargetParser.cpp index df900aed145ff..53cbc22840eb6 100644 --- a/llvm/lib/TargetParser/X86TargetParser.cpp +++ b/llvm/lib/TargetParser/X86TargetParser.cpp @@ -490,7 +490,6 @@ constexpr FeatureBitset ImpliedFeaturesMOVDIRI = {}; constexpr FeatureBitset ImpliedFeaturesPCONFIG = {}; constexpr FeatureBitset ImpliedFeaturesPOPCNT = {}; constexpr FeatureBitset ImpliedFeaturesPKU = {}; -constexpr FeatureBitset ImpliedFeaturesPREFETCHWT1 = {}; constexpr FeatureBitset ImpliedFeaturesPRFCHW = {}; constexpr FeatureBitset ImpliedFeaturesPTWRITE = {}; constexpr FeatureBitset ImpliedFeaturesRDPID = {}; diff --git a/llvm/test/CodeGen/X86/avx512-cmp-kor-sequence.ll b/llvm/test/CodeGen/X86/avx512-cmp-kor-sequence.ll index 7676a65b735e0..b4ba23934d54d 100644 --- a/llvm/test/CodeGen/X86/avx512-cmp-kor-sequence.ll +++ b/llvm/test/CodeGen/X86/avx512-cmp-kor-sequence.ll @@ -48,5 +48,5 @@ entry: ; Function Attrs: nounwind readnone declare <16 x i1> @llvm.x86.avx512.mask.cmp.ps.512(<16 x float>, <16 x float>, i32, <16 x i1>, i32) #1 -attributes #0 = { nounwind readnone uwtable "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="broadwell" "target-features"="+adx,+aes,+avx,+avx2,+avx512cd,+avx512f,+bmi,+bmi2,+cx16,+f16c,+fma,+fsgsbase,+fxsr,+evex512,+lzcnt,+mmx,+movbe,+pclmul,+popcnt,+prefetchwt1,+rdrnd,+rdseed,+rtm,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+x87,+xsave,+xsaveopt,-vzeroupper" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #0 = { nounwind readnone uwtable "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="broadwell" "target-features"="+adx,+aes,+avx,+avx2,+avx512cd,+avx512f,+bmi,+bmi2,+cx16,+f16c,+fma,+fsgsbase,+fxsr,+evex512,+lzcnt,+mmx,+movbe,+pclmul,+popcnt,+rdrnd,+rdseed,+rtm,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+x87,+xsave,+xsaveopt,-vzeroupper" "unsafe-fp-math"="false" "use-soft-float"="false" } attributes #1 = { nounwind readnone } diff --git a/llvm/test/CodeGen/X86/prefetch.ll b/llvm/test/CodeGen/X86/prefetch.ll index 3cfa0e3efcb1e..4de0cb3948f0c 100644 --- a/llvm/test/CodeGen/X86/prefetch.ll +++ b/llvm/test/CodeGen/X86/prefetch.ll @@ -6,9 +6,6 @@ ; RUN: llc < %s -mtriple=i686-- -mcpu=slm | FileCheck %s -check-prefix=PRFCHWSSE ; RUN: llc < %s -mtriple=i686-- -mcpu=btver2 | FileCheck %s -check-prefix=PRFCHWSSE ; RUN: llc < %s -mtriple=i686-- -mcpu=btver2 -mattr=-prfchw | FileCheck %s -check-prefix=SSE -; RUN: llc < %s -mtriple=i686-- -mattr=+sse,+prefetchwt1 | FileCheck %s -check-prefix=PREFETCHWT1 -; RUN: llc < %s -mtriple=i686-- -mattr=-sse,+prefetchwt1 | FileCheck %s -check-prefix=PREFETCHWT1 -; RUN: llc < %s -mtriple=i686-- -mattr=-sse,+3dnow,+prefetchwt1 | FileCheck %s -check-prefix=PREFETCHWT1 ; RUN: llc < %s -mtriple=i686-- -mattr=+3dnow | FileCheck %s -check-prefix=3DNOW ; RUN: llc < %s -mtriple=i686-- -mattr=+3dnow,+prfchw | FileCheck %s -check-prefix=3DNOW @@ -16,7 +13,6 @@ ; 3dnow by itself get you just the single prefetch instruction with no hints ; sse provides prefetch0/1/2/nta ; supporting prefetchw, but not 3dnow implicitly provides prefetcht0/1/2/nta regardless of sse setting as we need something to fall back to for the non-write hint. -; supporting prefetchwt1 implies prefetcht0/1/2/nta and prefetchw regardless of other settings. this allows levels for non-write and gives us an instruction for write+T0 ; 3dnow prefetch instruction will only get used if you have no other prefetch instructions enabled ; rdar://10538297 @@ -48,19 +44,6 @@ define void @t(ptr %ptr) nounwind { ; PRFCHWSSE-NEXT: prefetchw (%eax) ; PRFCHWSSE-NEXT: retl ; -; PREFETCHWT1-LABEL: t: -; PREFETCHWT1: # %bb.0: # %entry -; PREFETCHWT1-NEXT: movl {{[0-9]+}}(%esp), %eax -; PREFETCHWT1-NEXT: prefetcht2 (%eax) -; PREFETCHWT1-NEXT: prefetcht1 (%eax) -; PREFETCHWT1-NEXT: prefetcht0 (%eax) -; PREFETCHWT1-NEXT: prefetchnta (%eax) -; PREFETCHWT1-NEXT: prefetchwt1 (%eax) -; PREFETCHWT1-NEXT: prefetchwt1 (%eax) -; PREFETCHWT1-NEXT: prefetchw (%eax) -; PREFETCHWT1-NEXT: prefetchwt1 (%eax) -; PREFETCHWT1-NEXT: retl -; ; 3DNOW-LABEL: t: ; 3DNOW: # %bb.0: # %entry ; 3DNOW-NEXT: movl {{[0-9]+}}(%esp), %eax diff --git a/llvm/test/Transforms/LoopStrengthReduce/X86/pr40514.ll b/llvm/test/Transforms/LoopStrengthReduce/X86/pr40514.ll index a461f35d00dc9..a6bff63dfc715 100644 --- a/llvm/test/Transforms/LoopStrengthReduce/X86/pr40514.ll +++ b/llvm/test/Transforms/LoopStrengthReduce/X86/pr40514.ll @@ -54,4 +54,4 @@ bb10: ; preds = %bb10, %bb } -attributes #0 = { "target-cpu"="broadwell" "target-features"="+sse2,+cx16,+sahf,-tbm,-avx512ifma,-sha,-gfni,-fma4,-vpclmulqdq,+prfchw,+bmi2,-cldemote,+fsgsbase,-ptwrite,-xsavec,+popcnt,+aes,-avx512bitalg,-movdiri,-xsaves,-avx512vnni,-avx512vpopcntdq,-pconfig,-clwb,-avx512f,-clzero,-pku,+mmx,-lwp,-rdpid,-xop,+rdseed,-waitpkg,-movdir64b,-sse4a,-avx512bw,-clflushopt,+xsave,-avx512vbmi2,+64bit,-avx512vl,+invpcid,-avx512cd,+avx,-vaes,+rtm,+fma,+bmi,+rdrnd,-mwaitx,+sse4.1,+sse4.2,+avx2,-wbnoinvd,+sse,+lzcnt,+pclmul,-prefetchwt1,+f16c,+ssse3,-sgx,-shstk,+cmov,-avx512vbmi,+movbe,+xsaveopt,-avx512dq,+adx,-avx512pf,+sse3" } +attributes #0 = { "target-cpu"="broadwell" "target-features"="+sse2,+cx16,+sahf,-tbm,-avx512ifma,-sha,-gfni,-fma4,-vpclmulqdq,+prfchw,+bmi2,-cldemote,+fsgsbase,-ptwrite,-xsavec,+popcnt,+aes,-avx512bitalg,-movdiri,-xsaves,-avx512vnni,-avx512vpopcntdq,-pconfig,-clwb,-avx512f,-clzero,-pku,+mmx,-lwp,-rdpid,-xop,+rdseed,-waitpkg,-movdir64b,-sse4a,-avx512bw,-clflushopt,+xsave,-avx512vbmi2,+64bit,-avx512vl,+invpcid,-avx512cd,+avx,-vaes,+rtm,+fma,+bmi,+rdrnd,-mwaitx,+sse4.1,+sse4.2,+avx2,-wbnoinvd,+sse,+lzcnt,+pclmul,+f16c,+ssse3,-sgx,-shstk,+cmov,-avx512vbmi,+movbe,+xsaveopt,-avx512dq,+adx,-avx512pf,+sse3" } diff --git a/llvm/test/Transforms/LoopVectorize/X86/pr23997.ll b/llvm/test/Transforms/LoopVectorize/X86/pr23997.ll index 3e3018f506094..b94ebf109163e 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/pr23997.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/pr23997.ll @@ -88,7 +88,7 @@ loopexit: ret void } -attributes #0 = { uwtable "target-cpu"="skylake" "target-features"="+sse2,+cx16,+sahf,-tbm,-avx512ifma,-sha,-gfni,-fma4,-vpclmulqdq,+prfchw,+bmi2,-cldemote,+fsgsbase,+xsavec,+popcnt,+aes,-avx512bitalg,+xsaves,-avx512vnni,-avx512vpopcntdq,-clwb,-avx512f,-clzero,-pku,+mmx,-lwp,-rdpid,-xop,+rdseed,-waitpkg,-sse4a,-avx512bw,+clflushopt,+xsave,-avx512vbmi2,-avx512vl,-avx512cd,+avx,-vaes,+rtm,+fma,+bmi,+rdrnd,-mwaitx,+sse4.1,+sse4.2,+avx2,-wbnoinvd,+sse,+lzcnt,+pclmul,-prefetchwt1,+f16c,+ssse3,+sgx,-shstk,+cmov,-avx512vbmi,+movbe,+xsaveopt,-avx512dq,+adx,-avx512pf,+sse3" } +attributes #0 = { uwtable "target-cpu"="skylake" "target-features"="+sse2,+cx16,+sahf,-tbm,-avx512ifma,-sha,-gfni,-fma4,-vpclmulqdq,+prfchw,+bmi2,-cldemote,+fsgsbase,+xsavec,+popcnt,+aes,-avx512bitalg,+xsaves,-avx512vnni,-avx512vpopcntdq,-clwb,-avx512f,-clzero,-pku,+mmx,-lwp,-rdpid,-xop,+rdseed,-waitpkg,-sse4a,-avx512bw,+clflushopt,+xsave,-avx512vbmi2,-avx512vl,-avx512cd,+avx,-vaes,+rtm,+fma,+bmi,+rdrnd,-mwaitx,+sse4.1,+sse4.2,+avx2,-wbnoinvd,+sse,+lzcnt,+pclmul,+f16c,+ssse3,+sgx,-shstk,+cmov,-avx512vbmi,+movbe,+xsaveopt,-avx512dq,+adx,-avx512pf,+sse3" } !0 = !{i32 0, i32 2147483646} !1 = !{} diff --git a/llvm/test/Transforms/LoopVectorize/X86/pr54634.ll b/llvm/test/Transforms/LoopVectorize/X86/pr54634.ll index 20566005c93df..743ca20f92b49 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/pr54634.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/pr54634.ll @@ -118,7 +118,7 @@ L44: ; preds = %L26 ret ptr addrspace(10) null } -attributes #0 = { "target-cpu"="skylake-avx512" "target-features"="+xsaves,+xsavec,+prfchw,+lzcnt,+sahf,+pku,+avx512vl,+avx512bw,+avx512cd,+clwb,+clflushopt,+adx,+avx512dq,+avx512f,+bmi2,+avx2,+bmi,+fsgsbase,+f16c,+avx,+xsave,+aes,+popcnt,+movbe,+sse4.2,+sse4.1,+cx16,+fma,+ssse3,+pclmul,+sse3,-rdrnd,-rtm,-rdseed,-avx512ifma,-avx512pf,-sha,-prefetchwt1,-avx512vbmi,-waitpkg,-avx512vbmi2,-shstk,-gfni,-vaes,-vpclmulqdq,-avx512vnni,-avx512bitalg,-avx512vpopcntdq,-rdpid,-cldemote,-movdiri,-movdir64b,-enqcmd,-avx512vp2intersect,-serialize,-tsxldtrk,-pconfig,-amx-bf16,-amx-tile,-amx-int8,-sse4a,-xop,-lwp,-fma4,-tbm,-mwaitx,-xsaveopt,-clzero,-wbnoinvd,-avx512bf16,-ptwrite,+sse2,+mmx,+fxsr,+64bit,+cx8" } +attributes #0 = { "target-cpu"="skylake-avx512" "target-features"="+xsaves,+xsavec,+prfchw,+lzcnt,+sahf,+pku,+avx512vl,+avx512bw,+avx512cd,+clwb,+clflushopt,+adx,+avx512dq,+avx512f,+bmi2,+avx2,+bmi,+fsgsbase,+f16c,+avx,+xsave,+aes,+popcnt,+movbe,+sse4.2,+sse4.1,+cx16,+fma,+ssse3,+pclmul,+sse3,-rdrnd,-rtm,-rdseed,-avx512ifma,-avx512pf,-sha,-avx512vbmi,-waitpkg,-avx512vbmi2,-shstk,-gfni,-vaes,-vpclmulqdq,-avx512vnni,-avx512bitalg,-avx512vpopcntdq,-rdpid,-cldemote,-movdiri,-movdir64b,-enqcmd,-avx512vp2intersect,-serialize,-tsxldtrk,-pconfig,-amx-bf16,-amx-tile,-amx-int8,-sse4a,-xop,-lwp,-fma4,-tbm,-mwaitx,-xsaveopt,-clzero,-wbnoinvd,-avx512bf16,-ptwrite,+sse2,+mmx,+fxsr,+64bit,+cx8" } attributes #1 = { inaccessiblemem_or_argmemonly } attributes #2 = { allocsize(1) } diff --git a/llvm/test/Transforms/LoopVectorize/X86/scatter_crash.ll b/llvm/test/Transforms/LoopVectorize/X86/scatter_crash.ll index 4839e3edf7b4d..ce460f4fe3542 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/scatter_crash.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/scatter_crash.ll @@ -111,4 +111,4 @@ for.body: ; preds = %for.body.preheader, br i1 %cmp, label %for.body, label %for.cond.cleanup.loopexit99 } -attributes #0 = { norecurse nounwind ssp uwtable "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="broadwell" "target-features"="+adx,+aes,+avx,+avx2,+avx512cd,+avx512f,+bmi,+bmi2,+cx16,+f16c,+fma,+fsgsbase,+fxsr,+evex512,+lzcnt,+mmx,+movbe,+pclmul,+popcnt,+prefetchwt1,+rdrnd,+rdseed,+rtm,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+x87,+xsave,+xsaveopt,-vzeroupper" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #0 = { norecurse nounwind ssp uwtable "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="broadwell" "target-features"="+adx,+aes,+avx,+avx2,+avx512cd,+avx512f,+bmi,+bmi2,+cx16,+f16c,+fma,+fsgsbase,+fxsr,+evex512,+lzcnt,+mmx,+movbe,+pclmul,+popcnt,+rdrnd,+rdseed,+rtm,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+x87,+xsave,+xsaveopt,-vzeroupper" "unsafe-fp-math"="false" "use-soft-float"="false" } diff --git a/llvm/test/Transforms/SLPVectorizer/X86/vector_gep.ll b/llvm/test/Transforms/SLPVectorizer/X86/vector_gep.ll index 02bf77a5e103d..9e8cdc62c729a 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/vector_gep.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/vector_gep.ll @@ -26,5 +26,5 @@ entry: unreachable } -attributes #0 = { noreturn readonly uwtable "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="broadwell" "target-features"="+adx,+aes,+avx,+avx2,+avx512cd,+avx512f,+bmi,+bmi2,+cx16,+f16c,+fma,+fsgsbase,+fxsr,+lzcnt,+mmx,+movbe,+pclmul,+popcnt,+prefetchwt1,+rdrnd,+rdseed,+rtm,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+x87,+xsave,+xsaveopt" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #0 = { noreturn readonly uwtable "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="broadwell" "target-features"="+adx,+aes,+avx,+avx2,+avx512cd,+avx512f,+bmi,+bmi2,+cx16,+f16c,+fma,+fsgsbase,+fxsr,+lzcnt,+mmx,+movbe,+pclmul,+popcnt,+rdrnd,+rdseed,+rtm,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+x87,+xsave,+xsaveopt" "unsafe-fp-math"="false" "use-soft-float"="false" } diff --git a/polly/test/ScheduleOptimizer/pattern-matching-based-opts-after-delicm.ll b/polly/test/ScheduleOptimizer/pattern-matching-based-opts-after-delicm.ll index 66011168fcc13..060140da0babe 100644 --- a/polly/test/ScheduleOptimizer/pattern-matching-based-opts-after-delicm.ll +++ b/polly/test/ScheduleOptimizer/pattern-matching-based-opts-after-delicm.ll @@ -93,7 +93,7 @@ for.end27: ; preds = %for.inc25 ret void } -attributes #0 = { norecurse nounwind uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="haswell" "target-features"="+aes,+avx,+avx2,+bmi,+bmi2,+cmov,+cx16,+f16c,+fma,+fsgsbase,+fxsr,+lzcnt,+mmx,+movbe,+pclmul,+popcnt,+rdrnd,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+x87,+xsave,+xsaveopt,-adx,-avx512bw,-avx512cd,-avx512dq,-avx512er,-avx512f,-avx512ifma,-avx512pf,-avx512vbmi,-avx512vl,-avx512vpopcntdq,-clflushopt,-clwb,-clzero,-fma4,-lwp,-mwaitx,-pku,-prefetchwt1,-prfchw,-rdseed,-rtm,-sgx,-sha,-sse4a,-tbm,-xop,-xsavec,-xsaves" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #0 = { norecurse nounwind uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="haswell" "target-features"="+aes,+avx,+avx2,+bmi,+bmi2,+cmov,+cx16,+f16c,+fma,+fsgsbase,+fxsr,+lzcnt,+mmx,+movbe,+pclmul,+popcnt,+rdrnd,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+x87,+xsave,+xsaveopt,-adx,-avx512bw,-avx512cd,-avx512dq,-avx512er,-avx512f,-avx512ifma,-avx512pf,-avx512vbmi,-avx512vl,-avx512vpopcntdq,-clflushopt,-clwb,-clzero,-fma4,-lwp,-mwaitx,-pku,-prfchw,-rdseed,-rtm,-sgx,-sha,-sse4a,-tbm,-xop,-xsavec,-xsaves" "unsafe-fp-math"="false" "use-soft-float"="false" } !llvm.module.flags = !{!0} !llvm.ident = !{!1}