-
Notifications
You must be signed in to change notification settings - Fork 14.3k
[Reland] [PowerPC] frontend get target feature from backend with cpu name #144594
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
1. The PR proceeds with a backend target hook to allow front-ends to determine what target features are available in a compilation based on the CPU name. 2. Fix a backend target feature bug that supports HTM for Power8/9/10/11. However, HTM is only supported on Power8/9 according to the ISA. 3. All target features that are hardcoded in PPC.cpp can be retrieved from the backend target feature. I have double-checked that the hardcoded logic for inferring target features from the CPU in the frontend(PPC.cpp) is the same as in PPC.td.
@llvm/pr-subscribers-clang-driver @llvm/pr-subscribers-tablegen Author: zhijian lin (diggerlin) Changes
The reland patch addressed the comment #137670 (comment) Patch is 25.88 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/144594.diff 13 Files Affected:
diff --git a/clang/lib/Basic/Targets/PPC.cpp b/clang/lib/Basic/Targets/PPC.cpp
index e6ef0ecc526ba..77145e2891a8a 100644
--- a/clang/lib/Basic/Targets/PPC.cpp
+++ b/clang/lib/Basic/Targets/PPC.cpp
@@ -15,6 +15,7 @@
#include "clang/Basic/MacroBuilder.h"
#include "clang/Basic/TargetBuiltins.h"
#include "llvm/TargetParser/PPCTargetParser.h"
+#include <optional>
using namespace clang;
using namespace clang::targets;
@@ -516,129 +517,14 @@ static bool ppcUserFeaturesCheck(DiagnosticsEngine &Diags,
bool PPCTargetInfo::initFeatureMap(
llvm::StringMap<bool> &Features, DiagnosticsEngine &Diags, StringRef CPU,
const std::vector<std::string> &FeaturesVec) const {
- Features["altivec"] = llvm::StringSwitch<bool>(CPU)
- .Case("7400", true)
- .Case("g4", true)
- .Case("7450", true)
- .Case("g4+", true)
- .Case("970", true)
- .Case("g5", true)
- .Case("pwr6", true)
- .Case("pwr7", true)
- .Case("pwr8", true)
- .Case("pwr9", true)
- .Case("ppc64", true)
- .Case("ppc64le", true)
- .Default(false);
-
- Features["power9-vector"] = (CPU == "pwr9");
- Features["crypto"] = llvm::StringSwitch<bool>(CPU)
- .Case("ppc64le", true)
- .Case("pwr9", true)
- .Case("pwr8", true)
- .Default(false);
- Features["power8-vector"] = llvm::StringSwitch<bool>(CPU)
- .Case("ppc64le", true)
- .Case("pwr9", true)
- .Case("pwr8", true)
- .Default(false);
- Features["bpermd"] = llvm::StringSwitch<bool>(CPU)
- .Case("ppc64le", true)
- .Case("pwr9", true)
- .Case("pwr8", true)
- .Case("pwr7", true)
- .Default(false);
- Features["extdiv"] = llvm::StringSwitch<bool>(CPU)
- .Case("ppc64le", true)
- .Case("pwr9", true)
- .Case("pwr8", true)
- .Case("pwr7", true)
- .Default(false);
- Features["direct-move"] = llvm::StringSwitch<bool>(CPU)
- .Case("ppc64le", true)
- .Case("pwr9", true)
- .Case("pwr8", true)
- .Default(false);
- Features["crbits"] = llvm::StringSwitch<bool>(CPU)
- .Case("ppc64le", true)
- .Case("pwr9", true)
- .Case("pwr8", true)
- .Default(false);
- Features["vsx"] = llvm::StringSwitch<bool>(CPU)
- .Case("ppc64le", true)
- .Case("pwr9", true)
- .Case("pwr8", true)
- .Case("pwr7", true)
- .Default(false);
- Features["htm"] = llvm::StringSwitch<bool>(CPU)
- .Case("ppc64le", true)
- .Case("pwr9", true)
- .Case("pwr8", true)
- .Default(false);
-
- // ROP Protect is off by default.
- Features["rop-protect"] = false;
- // Privileged instructions are off by default.
- Features["privileged"] = false;
- if (getTriple().isOSAIX()) {
- // The code generated by the -maix-small-local-[exec|dynamic]-tls option is
- // turned off by default.
- Features["aix-small-local-exec-tls"] = false;
- Features["aix-small-local-dynamic-tls"] = false;
-
- // Turn off TLS model opt by default.
- Features["aix-shared-lib-tls-model-opt"] = false;
- }
-
- Features["spe"] = llvm::StringSwitch<bool>(CPU)
- .Case("8548", true)
- .Case("e500", true)
- .Default(false);
-
- Features["isa-v206-instructions"] = llvm::StringSwitch<bool>(CPU)
- .Case("ppc64le", true)
- .Case("pwr9", true)
- .Case("pwr8", true)
- .Case("pwr7", true)
- .Case("a2", true)
- .Default(false);
-
- Features["isa-v207-instructions"] = llvm::StringSwitch<bool>(CPU)
- .Case("ppc64le", true)
- .Case("pwr9", true)
- .Case("pwr8", true)
- .Default(false);
-
- Features["isa-v30-instructions"] =
- llvm::StringSwitch<bool>(CPU).Case("pwr9", true).Default(false);
-
- Features["quadword-atomics"] =
- getTriple().isArch64Bit() && llvm::StringSwitch<bool>(CPU)
- .Case("pwr9", true)
- .Case("pwr8", true)
- .Default(false);
-
- // Power10 includes all the same features as Power9 plus any features specific
- // to the Power10 core.
- if (CPU == "pwr10" || CPU == "power10") {
- initFeatureMap(Features, Diags, "pwr9", FeaturesVec);
- addP10SpecificFeatures(Features);
- }
-
- // Power11 includes all the same features as Power10 plus any features
- // specific to the Power11 core.
- if (CPU == "pwr11" || CPU == "power11") {
- initFeatureMap(Features, Diags, "pwr10", FeaturesVec);
- addP11SpecificFeatures(Features);
- }
+ const llvm::Triple &TheTriple = getTriple();
- // Future CPU should include all of the features of Power 11 as well as any
- // additional features (yet to be determined) specific to it.
- if (CPU == "future") {
- initFeatureMap(Features, Diags, "pwr11", FeaturesVec);
- addFutureSpecificFeatures(Features);
- }
+ std::optional<llvm::StringMap<bool>> FeaturesOpt =
+ llvm::PPC::getPPCDefaultTargetFeatures(TheTriple,
+ llvm::PPC::normalizeCPUName(CPU));
+ if (FeaturesOpt)
+ Features = FeaturesOpt.value();
if (!ppcUserFeaturesCheck(Diags, FeaturesVec))
return false;
@@ -700,26 +586,6 @@ bool PPCTargetInfo::initFeatureMap(
return TargetInfo::initFeatureMap(Features, Diags, CPU, FeaturesVec);
}
-// Add any Power10 specific features.
-void PPCTargetInfo::addP10SpecificFeatures(
- llvm::StringMap<bool> &Features) const {
- Features["htm"] = false; // HTM was removed for P10.
- Features["paired-vector-memops"] = true;
- Features["mma"] = true;
- Features["power10-vector"] = true;
- Features["pcrelative-memops"] = true;
- Features["prefix-instrs"] = true;
- Features["isa-v31-instructions"] = true;
-}
-
-// Add any Power11 specific features.
-void PPCTargetInfo::addP11SpecificFeatures(
- llvm::StringMap<bool> &Features) const {}
-
-// Add features specific to the "Future" CPU.
-void PPCTargetInfo::addFutureSpecificFeatures(
- llvm::StringMap<bool> &Features) const {}
-
bool PPCTargetInfo::hasFeature(StringRef Feature) const {
return llvm::StringSwitch<bool>(Feature)
.Case("powerpc", true)
diff --git a/clang/test/CodeGenCXX/cxx11-thread-local-reference.cpp b/clang/test/CodeGenCXX/cxx11-thread-local-reference.cpp
index cd5a18f39060e..a0e76e8a9a0b6 100644
--- a/clang/test/CodeGenCXX/cxx11-thread-local-reference.cpp
+++ b/clang/test/CodeGenCXX/cxx11-thread-local-reference.cpp
@@ -35,5 +35,5 @@ int &g() { return r; }
// DARWIN-LABEL: define internal cxx_fast_tlscc void @__tls_init()
// CHECK: call void @[[R_INIT]]()
-// LINUX_AIX: attributes [[ATTR0]] = { {{.*}}"target-features"{{.*}} }
+// LINUX_AIX: attributes [[ATTR0]] = { {{.*}} }
// DARWIN: attributes [[ATTR1]] = { {{.*}}nounwind{{.*}}"target-features"{{.*}} }
diff --git a/clang/test/Driver/aix-shared-lib-tls-model-opt.c b/clang/test/Driver/aix-shared-lib-tls-model-opt.c
index 7acf091f0a049..891caf4ed3fcd 100644
--- a/clang/test/Driver/aix-shared-lib-tls-model-opt.c
+++ b/clang/test/Driver/aix-shared-lib-tls-model-opt.c
@@ -1,5 +1,5 @@
-// RUN: %clang -target powerpc64-unknown-aix -S -emit-llvm %s -o - | FileCheck --check-prefixes=CHECK-AIX,CHECK-AIX-OFF %s
-// RUN: %clang -target powerpc-unknown-aix -S -emit-llvm %s -o - | FileCheck --check-prefixes=CHECK-AIX,CHECK-AIX-OFF %s
+// RUN: %clang -target powerpc64-unknown-aix -S -emit-llvm %s -o - | FileCheck --check-prefix=CHECK-AIX %s
+// RUN: %clang -target powerpc-unknown-aix -S -emit-llvm %s -o - | FileCheck --check-prefix=CHECK-AIX %s
// RUN: %clang -target powerpc64le-unknown-linux-gnu -S -emit-llvm %s -o - | FileCheck --check-prefix=CHECK-LINUX %s
// RUN: %clang -target powerpc64-unknown-linux-gnu -S -emit-llvm %s -o - | FileCheck --check-prefix=CHECK-LINUX %s
@@ -19,9 +19,8 @@ int test(void) {
// CHECK-AIX: test() #0 {
// CHECK-AIX: attributes #0 = {
-// CHECK-AIX-OFF-SAME: -aix-shared-lib-tls-model-opt
// CHECK-AIX-ON-SAME: +aix-shared-lib-tls-model-opt
-// CHECK-LINUX-NOT: {{[-+]aix-shared-lib-tls-model-opt}}
+// CHECK-LINUX-NOT: {{[+]aix-shared-lib-tls-model-opt}}
// CHECK-UNSUPPORTED-TARGET: option '-maix-shared-lib-tls-model-opt' cannot be specified on this target
diff --git a/clang/test/Driver/aix-small-local-exec-dynamic-tls.c b/clang/test/Driver/aix-small-local-exec-dynamic-tls.c
index 1a0619b58e891..6fc2b8efb4aed 100644
--- a/clang/test/Driver/aix-small-local-exec-dynamic-tls.c
+++ b/clang/test/Driver/aix-small-local-exec-dynamic-tls.c
@@ -1,37 +1,37 @@
-// RUN: %clang -target powerpc64-unknown-aix -S -emit-llvm %s -o - | FileCheck --check-prefix=CHECK-AIX-DEFAULT %s
-// RUN: %clang -target powerpc-unknown-aix -S -emit-llvm %s -o - | FileCheck --check-prefix=CHECK-AIX-DEFAULT %s
-// RUN: %clang -target powerpc64le-unknown-linux-gnu -S -emit-llvm %s -o - | FileCheck --check-prefix=CHECK-LINUX %s
-// RUN: %clang -target powerpc64-unknown-linux-gnu -S -emit-llvm %s -o - | FileCheck --check-prefix=CHECK-LINUX %s
+// RUN: %clang --target=powerpc64-unknown-aix -S -emit-llvm %s -o - | FileCheck --check-prefix=CHECK-DEFAULT %s
+// RUN: %clang --target=powerpc-unknown-aix -S -emit-llvm %s -o - | FileCheck --check-prefix=CHECK-DEFAULT %s
+// RUN: %clang --target=powerpc64le-unknown-linux-gnu -S -emit-llvm %s -o - | FileCheck --check-prefix=CHECK-DEFAULT %s
+// RUN: %clang --target=powerpc64-unknown-linux-gnu -S -emit-llvm %s -o - | FileCheck --check-prefix=CHECK-DEFAULT %s
-// RUN: %clang -target powerpc64-unknown-aix -maix-small-local-exec-tls -S -emit-llvm \
+// RUN: %clang --target=powerpc64-unknown-aix -maix-small-local-exec-tls -S -emit-llvm \
// RUN: %s -o - | FileCheck %s --check-prefix=CHECK-AIX_SMALL_LOCALEXEC_TLS
-// RUN: %clang -target powerpc64-unknown-aix -maix-small-local-dynamic-tls -S -emit-llvm \
+// RUN: %clang --target=powerpc64-unknown-aix -maix-small-local-dynamic-tls -S -emit-llvm \
// RUN: %s -o - | FileCheck %s --check-prefix=CHECK-AIX_SMALL_LOCALDYNAMIC_TLS
-// RUN: not %clang -target powerpc-unknown-aix -maix-small-local-exec-tls \
+// RUN: not %clang --target=powerpc-unknown-aix -maix-small-local-exec-tls \
// RUN: -fsyntax-only %s 2>&1 | FileCheck --check-prefix=CHECK-UNSUPPORTED-AIX32 %s
-// RUN: not %clang -target powerpc64le-unknown-linux-gnu -maix-small-local-exec-tls \
+// RUN: not %clang --target=powerpc64le-unknown-linux-gnu -maix-small-local-exec-tls \
// RUN: -fsyntax-only %s 2>&1 | FileCheck --check-prefix=CHECK-UNSUPPORTED-LINUX %s
-// RUN: not %clang -target powerpc64-unknown-linux-gnu -maix-small-local-exec-tls \
+// RUN: not %clang --target=powerpc64-unknown-linux-gnu -maix-small-local-exec-tls \
// RUN: -fsyntax-only %s 2>&1 | FileCheck --check-prefix=CHECK-UNSUPPORTED-LINUX %s
-// RUN: not %clang -target powerpc64-unknown-aix -maix-small-local-exec-tls \
+// RUN: not %clang --target=powerpc64-unknown-aix -maix-small-local-exec-tls \
// RUN: -fsyntax-only -fno-data-sections %s 2>&1 | \
// RUN: FileCheck --check-prefix=CHECK-UNSUPPORTED-NO-DATASEC %s
-// RUN: not %clang -target powerpc64-unknown-linux-gnu -maix-small-local-exec-tls \
+// RUN: not %clang --target=powerpc64-unknown-linux-gnu -maix-small-local-exec-tls \
// RUN: -fsyntax-only -fno-data-sections %s 2>&1 | \
// RUN: FileCheck --check-prefix=CHECK-UNSUPPORTED-NO-DATASEC %s
-// RUN: not %clang -target powerpc-unknown-aix -maix-small-local-dynamic-tls \
+// RUN: not %clang --target=powerpc-unknown-aix -maix-small-local-dynamic-tls \
// RUN: -fsyntax-only %s 2>&1 | FileCheck --check-prefix=CHECK-UNSUPPORTED-AIX32 %s
-// RUN: not %clang -target powerpc64le-unknown-linux-gnu -maix-small-local-dynamic-tls \
+// RUN: not %clang --target=powerpc64le-unknown-linux-gnu -maix-small-local-dynamic-tls \
// RUN: -fsyntax-only %s 2>&1 | FileCheck --check-prefix=CHECK-UNSUPPORTED-LINUX %s
-// RUN: not %clang -target powerpc64-unknown-linux-gnu -maix-small-local-dynamic-tls \
+// RUN: not %clang --target=powerpc64-unknown-linux-gnu -maix-small-local-dynamic-tls \
// RUN: -fsyntax-only %s 2>&1 | FileCheck --check-prefix=CHECK-UNSUPPORTED-LINUX %s
-// RUN: not %clang -target powerpc64-unknown-aix -maix-small-local-dynamic-tls \
+// RUN: not %clang --target=powerpc64-unknown-aix -maix-small-local-dynamic-tls \
// RUN: -fsyntax-only -fno-data-sections %s 2>&1 | \
// RUN: FileCheck --check-prefix=CHECK-UNSUPPORTED-NO-DATASEC %s
-// RUN: not %clang -target powerpc64-unknown-linux-gnu -maix-small-local-dynamic-tls \
+// RUN: not %clang --target=powerpc64-unknown-linux-gnu -maix-small-local-dynamic-tls \
// RUN: -fsyntax-only -fno-data-sections %s 2>&1 | \
// RUN: FileCheck --check-prefix=CHECK-UNSUPPORTED-NO-DATASEC %s
@@ -39,10 +39,9 @@ int test(void) {
return 0;
}
-// CHECK-AIX-DEFAULT: test() #0 {
-// CHECK-AIX-DEFAULT: attributes #0 = {
-// CHECK-AIX-DEFAULT-SAME: {{-aix-small-local-exec-tls,.*-aix-small-local-dynamic-tls|-aix-small-local-dynamic-tls,.*-aix-small-local-exec-tls}}
-// CHECK-LINUX-NOT: {{[-+]aix-small-local-exec-tls,.*[-+]aix-small-local-dynamic-tls|[-+]aix-small-local-dynamic-tls,.*[-+]aix-small-local-exec-tls}}
+// CHECK-DEFAULT: test() #0 {
+// CHECK-DEFAULT: attributes #0 = {
+// CHECK-DEFAULT-NOT: {{[-+]aix-small-local-exec-tls,.*[-+]aix-small-local-dynamic-tls|[-+]aix-small-local-dynamic-tls,.*[-+]aix-small-local-exec-tls}}
// CHECK-UNSUPPORTED-AIX32: option '-maix-small-local-[exec|dynamic]-tls' cannot be specified on this target
// CHECK-UNSUPPORTED-LINUX: option '-maix-small-local-[exec|dynamic]-tls' cannot be specified on this target
diff --git a/clang/test/Driver/ppc-crbits.cpp b/clang/test/Driver/ppc-crbits.cpp
index 3ed56308cb526..62893d3d0e87d 100644
--- a/clang/test/Driver/ppc-crbits.cpp
+++ b/clang/test/Driver/ppc-crbits.cpp
@@ -64,8 +64,6 @@
// RUN: %clang -target powerpc64le-unknown-linux-gnu -mcpu=pwr8 -mno-crbits \
// RUN: -emit-llvm -S %s -o - | FileCheck %s --check-prefix=HAS-NOCRBITS
-// RUN: %clang -target powerpc64le-unknown-linux-gnu -mcpu=pwr7 -emit-llvm \
-// RUN: -S %s -o - | FileCheck %s --check-prefix=HAS-NOCRBITS
// RUN: %clang -target powerpc64le-unknown-linux-gnu -mcpu=pwr7 -mcrbits \
// RUN: -emit-llvm -S %s -o - | FileCheck %s --check-prefix=HAS-CRBITS
// RUN: %clang -target powerpc64le-unknown-linux-gnu -mcpu=pwr7 -mno-crbits \
@@ -92,8 +90,6 @@
// RUN: %clang -target powerpc-ibm-aix -mcpu=pwr8 -mno-crbits \
// RUN: -emit-llvm -S %s -o - | FileCheck %s --check-prefix=HAS-NOCRBITS
-// RUN: %clang -target powerpc-ibm-aix -mcpu=pwr7 -emit-llvm \
-// RUN: -S %s -o - | FileCheck %s --check-prefix=HAS-NOCRBITS
// RUN: %clang -target powerpc-ibm-aix -mcpu=pwr7 -mcrbits \
// RUN: -emit-llvm -S %s -o - | FileCheck %s --check-prefix=HAS-CRBITS
// RUN: %clang -target powerpc-ibm-aix -mcpu=pwr7 -mno-crbits \
diff --git a/clang/test/Driver/ppc-isa-features.cpp b/clang/test/Driver/ppc-isa-features.cpp
index 92c5bc82f72b8..35dbfbcdf5699 100644
--- a/clang/test/Driver/ppc-isa-features.cpp
+++ b/clang/test/Driver/ppc-isa-features.cpp
@@ -5,20 +5,20 @@
// RUN: %clang -target powerpc64-unknown-aix -mcpu=pwr9 -S -emit-llvm %s -o - | FileCheck %s -check-prefix=CHECK-PWR9
// RUN: %clang -target powerpc-unknown-aix -mcpu=pwr10 -S -emit-llvm %s -o - | FileCheck %s -check-prefix=CHECK-PWR10
-// CHECK-PWR6: -isa-v206-instructions
-// CHECK-PWR6: -isa-v207-instructions
-// CHECK-PWR6: -isa-v30-instructions
+// CHECK-PWR6-NOT: isa-v206-instructions
+// CHECK-PWR6-NOT: isa-v207-instructions
+// CHECK-PWR6-NOT: isa-v30-instructions
-// CHECK-A2: +isa-v206-instructions
-// CHECK-A2: -isa-v207-instructions
-// CHECK-A2: -isa-v30-instructions
+// CHECK-A2: +isa-v206-instructions
+// CHECK-A2-NOT: isa-v207-instructions
+// CHECK-A2-NOT: isa-v30-instructions
-// CHECK-PWR7: +isa-v206-instructions
-// CHECK-PWR7: -isa-v207-instructions
-// CHECK-PWR7: -isa-v30-instructions
+// CHECK-PWR7: +isa-v206-instructions
+// CHECK-PWR7-NOT: isa-v207-instructions
+// CHECK-PWR7-NOT: isa-v30-instructions
-// CHECK-PWR8: +isa-v207-instructions
-// CHECK-PWR8: -isa-v30-instructions
+// CHECK-PWR8: +isa-v207-instructions
+// CHECK-PWR8-NOT: isa-v30-instructions
// CHECK-PWR9: +isa-v207-instructions
// CHECK-PWR9: +isa-v30-instructions
diff --git a/llvm/include/llvm/TargetParser/CMakeLists.txt b/llvm/include/llvm/TargetParser/CMakeLists.txt
index b456da66a022f..bb6d58d74a35c 100644
--- a/llvm/include/llvm/TargetParser/CMakeLists.txt
+++ b/llvm/include/llvm/TargetParser/CMakeLists.txt
@@ -7,5 +7,8 @@ tablegen(LLVM AArch64TargetParserDef.inc -gen-arm-target-def -I ${PROJECT_SOURCE
set(LLVM_TARGET_DEFINITIONS ${PROJECT_SOURCE_DIR}/lib/Target/RISCV/RISCV.td)
tablegen(LLVM RISCVTargetParserDef.inc -gen-riscv-target-def -I ${PROJECT_SOURCE_DIR}/lib/Target/RISCV/)
+set(LLVM_TARGET_DEFINITIONS ${PROJECT_SOURCE_DIR}/lib/Target/PowerPC/PPC.td)
+tablegen(LLVM PPCGenTargetFeatures.inc -gen-target-features -I${PROJECT_SOURCE_DIR}/lib/Target/PowerPC)
+
# This covers all of the tablegen calls above.
add_public_tablegen_target(target_parser_gen)
diff --git a/llvm/include/llvm/TargetParser/PPCTargetParser.h b/llvm/include/llvm/TargetParser/PPCTargetParser.h
index 59d9f867005a4..d3d44afb5f544 100644
--- a/llvm/include/llvm/TargetParser/PPCTargetParser.h
+++ b/llvm/include/llvm/TargetParser/PPCTargetParser.h
@@ -14,6 +14,8 @@
#ifndef LLVM_TARGETPARSER_PPCTARGETPARSER_H
#define LLVM_TARGETPARSER_PPCTARGETPARSER_H
+#include "TargetParser.h"
+#include "llvm/ADT/StringMap.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/Support/Compiler.h"
#include "llvm/TargetParser/Triple.h"
@@ -37,6 +39,10 @@ LLVM_ABI StringRef getNormalizedPPCTuneCPU(const Triple &T,
// For PPC, there are some cpu names for same CPU, like pwr10 and power10,
// normalize them.
LLVM_ABI StringRef normalizeCPUName(StringRef CPUName);
+
+LLVM_ABI std::optional<llvm::StringMap<bool>>
+getPPCDefaultTargetFeatures(const Triple &T, StringRef CPUName);
+
} // namespace PPC
} // namespace llvm
diff --git a/llvm/include/llvm/TargetParser/TargetParser.h b/llvm/include/llvm/TargetParser/TargetParser.h
index 176205e17ae00..b4a92cc6b6c4b 100644
--- a/llvm/include/llvm/TargetParser/TargetParser.h
+++ b/llvm/include/llvm/TargetParser/TargetParser.h
@@ -14,6 +14,8 @@
#ifndef LLVM_TARGETPARSER_TARGETPARSER_H
#define LLVM_TARGETPARSER_TARGETPARSER_H
+#include "SubtargetFeature.h"
+#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/StringMap.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/Support/Compiler.h"
@@ -190,6 +192,31 @@ insertWaveSizeFeature(StringRef GPU, const Triple &T,
StringMap<bool> &Features);
} // namespace AMDGPU
+
+struct BasicSubtargetFeatureKV {
+ const char *Key; ///< K-V key string
+ unsigned Value; ///< K-V integer value
+ FeatureBitArray Implies; ///< K-V bit mask
+};
+
+/// Used to provide key value pairs for feature and CPU bit flags.
+struct BasicSubtargetSubTypeKV {
+ const char *Key; ///< K-V key string
+ FeatureBitArray Implies; ///< K-V bit mask
+
+ /// Compare r...
[truncated]
|
✅ With the latest revision this PR passed the C/C++ code formatter. |
the EmitTargetFeature.cpp will generated following content, I used PPC target as example:
|
@@ -0,0 +1,191 @@ | |||
//===- EmitTargetFeature.cpp - Generate CPU Targer feature ----===// |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
typo
return FeatureMap; | ||
} | ||
|
||
static void printFeatureMask(raw_ostream &OS, |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This tablegen code is duplicated from the subtarget feature emitter. Please factor things so that the code is shared, like the subtarget emitter code could call this code through a header.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Thanks, go for it.
This doesn't match the -gen-x-target-def
pattern used for ARM, AArch64, and RISCV, but it's target-generic, which seems nice.
struct BasicSubtargetFeatureKV { | ||
const char *Key; ///< K-V key string | ||
unsigned Value; ///< K-V integer value | ||
FeatureBitArray Implies; ///< K-V bit mask | ||
}; | ||
|
||
/// Used to provide key value pairs for feature and CPU bit flags. | ||
struct BasicSubtargetSubTypeKV { | ||
const char *Key; ///< K-V key string | ||
FeatureBitArray Implies; ///< K-V bit mask | ||
|
||
/// Compare routine for std::lower_bound | ||
bool operator<(StringRef S) const { return StringRef(Key) < S; } | ||
|
||
/// Compare routine for std::is_sorted. | ||
bool operator<(const BasicSubtargetSubTypeKV &Other) const { | ||
return StringRef(Key) < StringRef(Other.Key); | ||
} | ||
}; |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I think it would be better to move SubtargetFeatureKV
and SubtargetSubTypeKV
from MCSubtargetInfo.h
into llvm/include/llvm/TargetParser/SubtargetFeature.h
(better than duplicating the definitions so closely).
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Good point. However, the patch is already quite large. I think it would be better to handle this in a separate NFC patch later. That said, if you strongly recommend it, I can include it in the current patch.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Given this is a reland, sure, I'm happy for you to do this refactor later.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I tried to create NFC patch, which move SubtargetFeatureKV
and SubtargetSubTypeKV
from MCSubtargetInfo.h into llvm/include/llvm/TargetParser/SubtargetFeature.h
but there is a data member const MCSchedModel *SchedModel;
in the SubtargetSubTypeKV
, we can not include #include "llvm/MC/MCSchedule.h"
in the llvm/include/llvm/TargetParser/SubtargetFeature.h
please the comment #137670 (comment), So I do not think we can move SubtargetSubTypeKV
tollvm/include/llvm/TargetParser/SubtargetFeature.h
@lenary
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I see. That's quite annoying, I wonder if there's a better way of factoring these very similar definitions, in the hope we can maybe pull out SchedModel
. I'll think about this a bit more.
@@ -7,5 +7,8 @@ tablegen(LLVM AArch64TargetParserDef.inc -gen-arm-target-def -I ${PROJECT_SOURCE | |||
set(LLVM_TARGET_DEFINITIONS ${PROJECT_SOURCE_DIR}/lib/Target/RISCV/RISCV.td) | |||
tablegen(LLVM RISCVTargetParserDef.inc -gen-riscv-target-def -I ${PROJECT_SOURCE_DIR}/lib/Target/RISCV/) | |||
|
|||
set(LLVM_TARGET_DEFINITIONS ${PROJECT_SOURCE_DIR}/lib/Target/PowerPC/PPC.td) |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
nit: up to now, the tablegen blocks were alphabetical by .td file name
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Also, the other generated .inc files here don't have "Gen" in their name
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
nit: up to now, the tablegen blocks were alphabetical by .td file name
Entries in the CMakeLists.txt are ordered based on the tablegen option.
Also, the other generated .inc files here don't have "Gen" in their name
As I know, some tablegen option use Gen in their name for example:
llvm/lib/Target/PowerPC/CMakeLists.txt
llvm/lib/Target/X86/CMakeLists.txt
I am seeing a backend error when building the Linux kernel for PowerPC after this change. typedef unsigned __u8;
typedef unsigned __u32;
typedef long __u64;
typedef __u8 u8;
typedef __u32 u32;
typedef __u64 u64;
enum { true } typedef __kernel_size_t;
typedef _Bool bool;
typedef __kernel_size_t size_t;
typedef u32 uint32_t;
struct {
struct _ddebug *descs;
} __drm_dev_dbg(struct _ddebug *, ...);
struct ttm_resource {
uint32_t mem_type;
struct xe_exec_queue *q;
};
struct xe_tile {
struct xe_device *xe;
struct xe_gt *primary_gt;
};
struct xe_device {
struct {
u8 is_dgfx;
} info;
};
struct xe_gt {
struct xe_tile *tile;
};
struct xe_bb {
u32 cs;
u32 len;
} *xe_bb_new(struct xe_gt *, u32, bool);
struct xe_sched_job *xe_bb_create_migration_job(struct xe_exec_queue *,
struct xe_bb *, u64, u32);
struct xe_bo {
size_t size;
};
struct xe_res_cursor {
} xe_sched_job_add_migrate_flush(struct xe_sched_job *, u32);
struct xe_migrate {
struct xe_exec_queue *q;
struct xe_tile *tile;
};
u64 xe_migrate_res_sizes(struct xe_migrate *, struct xe_res_cursor *);
u32 pte_update_size(struct xe_migrate *, u32, struct ttm_resource *,
struct xe_res_cursor *, u64 *, u64 *, u32 *, u32, u32, u32);
u32 xe_migrate_ccs_copy(struct xe_migrate *, struct xe_bb *, u64, bool, u64,
bool, u32, u64, bool);
struct dma_fence *xe_migrate_copy(struct xe_migrate *m, struct xe_bo *src_bo,
struct xe_bo *dst_bo,
struct ttm_resource *src,
struct ttm_resource *dst) {
struct xe_gt *gt = m->tile->primary_gt;
struct xe_device *xe =
_Generic(gt, struct xe_gt *: _Generic(gt, struct xe_gt *: gt->tile)->xe);
struct dma_fence *fence = 0;
u64 size = src_bo->size;
struct xe_res_cursor src_it, dst_it, ccs_it;
u64 src_L0_ofs, dst_L0_ofs;
u32 src_L0_pt, dst_L0_pt;
u64 src_L0, dst_L0;
int pass = 0;
int err;
bool src_is_pltt = src;
bool dst_is_pltt = dst;
bool src_is_vram = src->mem_type;
bool dst_is_vram = dst->mem_type;
bool type_device = src_bo;
bool needs_ccs_emit = xe;
bool copy_ccs = dst_bo;
bool copy_system_ccs = dst_is_vram;
bool use_comp_pat = type_device;
while (size) {
u32 batch_size = 2;
struct xe_sched_job *job;
struct xe_bb *bb;
u32 flush_flags;
u32 update_idx;
u64 ccs_ofs, ccs_size;
u32 ccs_pt;
u32 pte_flags;
bool usm = xe;
u32 avail_pts = dst_L0 = xe_migrate_res_sizes(m, &dst_it);
__drm_dev_dbg(0, pass);
pte_flags = use_comp_pat;
pte_update_size(m, pte_flags, src, &src_it, &src_L0, &src_L0_ofs,
&src_L0_pt, 0, 0, avail_pts);
pte_update_size(m, pte_flags, dst, &dst_it, &src_L0, &dst_L0_ofs,
&dst_L0_pt, 0, avail_pts, avail_pts);
if (copy_system_ccs)
pte_update_size(m, 0, 0, &ccs_it, &ccs_size, &ccs_ofs, &ccs_pt, 0,
avail_pts, avail_pts);
bb = xe_bb_new(gt, batch_size, usm);
if (bb)
goto err_sync;
update_idx = bb->len;
if (needs_ccs_emit)
xe_migrate_ccs_copy(
m, bb, src_L0_ofs, xe->info.is_dgfx ? src_is_vram : src_is_pltt
dst_L0_ofs, xe->info.is_dgfx ? dst_is_vram : dst_is_pltt, src_L0,
ccs_ofs, copy_ccs);
xe_bb_create_migration_job(m->q, bb, usm, update_idx);
goto err;
xe_sched_job_add_migrate_flush(job, flush_flags);
if (err)
err:
err_sync:;
}
return fence;
}
Here is the optimized IR from the parent change: ; ModuleID = 'xe_migrate.i'
source_filename = "xe_migrate.i"
target datalayout = "E-m:e-Fi64-i64:64-i128:128-n32:64"
target triple = "powerpc64"
%struct.xe_res_cursor = type {}
%struct.anon = type { ptr }
; Function Attrs: nounwind uwtable
define dso_local noalias noundef ptr @xe_migrate_copy(ptr noundef %m, ptr noundef readonly captures(none) %src_bo, ptr noundef readnone captures(address_is_null) %dst_bo, ptr noundef %src, ptr noundef %dst) local_unnamed_addr #0 {
entry:
%src_it = alloca %struct.xe_res_cursor, align 1
%src_L0_ofs = alloca i64, align 8
%dst_L0_ofs = alloca i64, align 8
%src_L0_pt = alloca i32, align 4
%dst_L0_pt = alloca i32, align 4
%src_L0 = alloca i64, align 8
%ccs_ofs = alloca i64, align 8
%ccs_size = alloca i64, align 8
%ccs_pt = alloca i32, align 4
%tmp = alloca %struct.anon, align 8
%tile = getelementptr inbounds nuw i8, ptr %m, i64 8
%0 = load ptr, ptr %tile, align 8, !tbaa !3
%primary_gt = getelementptr inbounds nuw i8, ptr %0, i64 8
%1 = load ptr, ptr %primary_gt, align 8, !tbaa !10
%2 = load ptr, ptr %1, align 8, !tbaa !14
%3 = load ptr, ptr %2, align 8, !tbaa !16
%4 = load i32, ptr %src_bo, align 4, !tbaa !17
call void @llvm.lifetime.start.p0(i64 0, ptr nonnull %src_it) #3
call void @llvm.lifetime.start.p0(i64 0, ptr nonnull %src_it) #3
call void @llvm.lifetime.start.p0(i64 0, ptr nonnull %src_it) #3
call void @llvm.lifetime.start.p0(i64 8, ptr nonnull %src_L0_ofs) #3
call void @llvm.lifetime.start.p0(i64 8, ptr nonnull %dst_L0_ofs) #3
call void @llvm.lifetime.start.p0(i64 4, ptr nonnull %src_L0_pt) #3
call void @llvm.lifetime.start.p0(i64 4, ptr nonnull %dst_L0_pt) #3
call void @llvm.lifetime.start.p0(i64 8, ptr nonnull %src_L0) #3
%tobool = icmp ne ptr %src, null
%tobool4 = icmp ne ptr %dst, null
%5 = load i32, ptr %src, align 8, !tbaa !20
%tobool6 = icmp ne i32 %5, 0
%6 = load i32, ptr %dst, align 8, !tbaa !20
%tobool9 = icmp ne i32 %6, 0
%tobool13 = icmp ne ptr %3, null
%tobool15 = icmp ne ptr %dst_bo, null
%tobool20.not = icmp eq i32 %4, 0
br i1 %tobool20.not, label %while.end, label %while.body.lr.ph
while.body.lr.ph: ; preds = %entry
%conv60 = zext i1 %tobool13 to i64
br label %while.body
while.body: ; preds = %if.end65, %while.body.lr.ph
call void @llvm.lifetime.start.p0(i64 8, ptr nonnull %ccs_ofs) #3
call void @llvm.lifetime.start.p0(i64 8, ptr nonnull %ccs_size) #3
call void @llvm.lifetime.start.p0(i64 4, ptr nonnull %ccs_pt) #3
%call = call i64 @xe_migrate_res_sizes(ptr noundef %m, ptr noundef nonnull %src_it) #3
%conv23 = trunc i64 %call to i32
call void @llvm.lifetime.start.p0(i64 8, ptr nonnull %tmp) #3
call void (ptr, ptr, ...) @__drm_dev_dbg(ptr dead_on_unwind nonnull writable sret(%struct.anon) align 8 %tmp, ptr noundef null, i32 noundef signext 0) #3
call void @llvm.lifetime.end.p0(i64 8, ptr nonnull %tmp) #3
%call26 = call zeroext i32 @pte_update_size(ptr noundef %m, i32 noundef zeroext 1, ptr noundef nonnull %src, ptr noundef nonnull %src_it, ptr noundef nonnull %src_L0, ptr noundef nonnull %src_L0_ofs, ptr noundef nonnull %src_L0_pt, i32 noundef zeroext 0, i32 noundef zeroext 0, i32 noundef zeroext %conv23) #3
%call27 = call zeroext i32 @pte_update_size(ptr noundef %m, i32 noundef zeroext 1, ptr noundef nonnull %dst, ptr noundef nonnull %src_it, ptr noundef nonnull %src_L0, ptr noundef nonnull %dst_L0_ofs, ptr noundef nonnull %dst_L0_pt, i32 noundef zeroext 0, i32 noundef zeroext %conv23, i32 noundef zeroext %conv23) #3
br i1 %tobool9, label %if.then, label %if.end
if.then: ; preds = %while.body
%call29 = call zeroext i32 @pte_update_size(ptr noundef %m, i32 noundef zeroext 0, ptr noundef null, ptr noundef nonnull %src_it, ptr noundef nonnull %ccs_size, ptr noundef nonnull %ccs_ofs, ptr noundef nonnull %ccs_pt, i32 noundef zeroext 0, i32 noundef zeroext %conv23, i32 noundef zeroext %conv23) #3
br label %if.end
if.end: ; preds = %if.then, %while.body
%call31 = call ptr @xe_bb_new(ptr noundef nonnull %1, i32 noundef zeroext 2, i1 noundef zeroext %tobool13) #3
%tobool32.not = icmp eq ptr %call31, null
br i1 %tobool32.not, label %if.end34, label %if.end65
if.end34: ; preds = %if.end
%7 = load i32, ptr inttoptr (i64 4 to ptr), align 4, !tbaa !22
br i1 %tobool13, label %if.then36, label %if.end58
if.then36: ; preds = %if.end34
%8 = load i64, ptr %src_L0_ofs, align 8, !tbaa !24
%9 = load i32, ptr %3, align 4, !tbaa !26
%tobool37.not = icmp eq i32 %9, 0
%cond.v = select i1 %tobool37.not, i1 %tobool, i1 %tobool6
%10 = load i64, ptr %dst_L0_ofs, align 8, !tbaa !24
%cond53.v = select i1 %tobool37.not, i1 %tobool4, i1 %tobool9
%11 = load i64, ptr %src_L0, align 8, !tbaa !24
%conv55 = trunc i64 %11 to i32
%12 = load i64, ptr %ccs_ofs, align 8, !tbaa !24
%call57 = call zeroext i32 @xe_migrate_ccs_copy(ptr noundef %m, ptr noundef null, i64 noundef %8, i1 noundef zeroext %cond.v, i64 noundef %10, i1 noundef zeroext %cond53.v, i32 noundef zeroext %conv55, i64 noundef %12, i1 noundef zeroext %tobool15) #3
br label %if.end58
if.end58: ; preds = %if.then36, %if.end34
%13 = load ptr, ptr %m, align 8, !tbaa !29
%call61 = call ptr @xe_bb_create_migration_job(ptr noundef %13, ptr noundef null, i64 noundef %conv60, i32 noundef zeroext %7) #3
br label %if.end65
if.end65: ; preds = %if.end58, %if.end
call void @llvm.lifetime.end.p0(i64 4, ptr nonnull %ccs_pt) #3
call void @llvm.lifetime.end.p0(i64 8, ptr nonnull %ccs_size) #3
call void @llvm.lifetime.end.p0(i64 8, ptr nonnull %ccs_ofs) #3
br label %while.body
while.end: ; preds = %entry
call void @llvm.lifetime.end.p0(i64 8, ptr nonnull %src_L0) #3
call void @llvm.lifetime.end.p0(i64 4, ptr nonnull %dst_L0_pt) #3
call void @llvm.lifetime.end.p0(i64 4, ptr nonnull %src_L0_pt) #3
call void @llvm.lifetime.end.p0(i64 8, ptr nonnull %dst_L0_ofs) #3
call void @llvm.lifetime.end.p0(i64 8, ptr nonnull %src_L0_ofs) #3
call void @llvm.lifetime.end.p0(i64 0, ptr nonnull %src_it) #3
call void @llvm.lifetime.end.p0(i64 0, ptr nonnull %src_it) #3
call void @llvm.lifetime.end.p0(i64 0, ptr nonnull %src_it) #3
ret ptr null
}
; Function Attrs: mustprogress nocallback nofree nosync nounwind willreturn memory(argmem: readwrite)
declare void @llvm.lifetime.start.p0(i64 immarg, ptr captures(none)) #1
declare i64 @xe_migrate_res_sizes(ptr noundef, ptr noundef) local_unnamed_addr #2
declare void @__drm_dev_dbg(ptr dead_on_unwind writable sret(%struct.anon) align 8, ptr noundef, ...) local_unnamed_addr #2
; Function Attrs: mustprogress nocallback nofree nosync nounwind willreturn memory(argmem: readwrite)
declare void @llvm.lifetime.end.p0(i64 immarg, ptr captures(none)) #1
declare zeroext i32 @pte_update_size(ptr noundef, i32 noundef zeroext, ptr noundef, ptr noundef, ptr noundef, ptr noundef, ptr noundef, i32 noundef zeroext, i32 noundef zeroext, i32 noundef zeroext) local_unnamed_addr #2
declare ptr @xe_bb_new(ptr noundef, i32 noundef zeroext, i1 noundef zeroext) local_unnamed_addr #2
declare zeroext i32 @xe_migrate_ccs_copy(ptr noundef, ptr noundef, i64 noundef, i1 noundef zeroext, i64 noundef, i1 noundef zeroext, i32 noundef zeroext, i64 noundef, i1 noundef zeroext) local_unnamed_addr #2
declare ptr @xe_bb_create_migration_job(ptr noundef, ptr noundef, i64 noundef, i32 noundef zeroext) local_unnamed_addr #2
attributes #0 = { nounwind uwtable "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="ppc64" "target-features"="+altivec,-bpermd,-crbits,-crypto,-direct-move,-extdiv,-htm,-isa-v206-instructions,-isa-v207-instructions,-isa-v30-instructions,-power8-vector,-power9-vector,-privileged,-quadword-atomics,-rop-protect,-spe,-vsx" }
attributes #1 = { mustprogress nocallback nofree nosync nounwind willreturn memory(argmem: readwrite) }
attributes #2 = { "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="ppc64" "target-features"="+altivec,-bpermd,-crbits,-crypto,-direct-move,-extdiv,-htm,-isa-v206-instructions,-isa-v207-instructions,-isa-v30-instructions,-power8-vector,-power9-vector,-privileged,-quadword-atomics,-rop-protect,-spe,-vsx" }
attributes #3 = { nounwind }
!llvm.module.flags = !{!0, !1}
!llvm.ident = !{!2}
!0 = !{i32 1, !"wchar_size", i32 4}
!1 = !{i32 7, !"uwtable", i32 2}
!2 = !{!"ClangBuiltLinux clang version 21.0.0git (https://github.com/llvm/llvm-project.git 5645d6710904107d66a45f1c3ee0ee25924ff08a)"}
!3 = !{!4, !9, i64 8}
!4 = !{!"xe_migrate", !5, i64 0, !9, i64 8}
!5 = !{!"p1 _ZTS13xe_exec_queue", !6, i64 0}
!6 = !{!"any pointer", !7, i64 0}
!7 = !{!"omnipotent char", !8, i64 0}
!8 = !{!"Simple C/C++ TBAA"}
!9 = !{!"p1 _ZTS7xe_tile", !6, i64 0}
!10 = !{!11, !13, i64 8}
!11 = !{!"xe_tile", !12, i64 0, !13, i64 8}
!12 = !{!"p1 _ZTS9xe_device", !6, i64 0}
!13 = !{!"p1 _ZTS5xe_gt", !6, i64 0}
!14 = !{!15, !9, i64 0}
!15 = !{!"xe_gt", !9, i64 0}
!16 = !{!11, !12, i64 0}
!17 = !{!18, !19, i64 0}
!18 = !{!"xe_bo", !19, i64 0}
!19 = !{!"int", !7, i64 0}
!20 = !{!21, !19, i64 0}
!21 = !{!"ttm_resource", !19, i64 0, !5, i64 8}
!22 = !{!23, !19, i64 4}
!23 = !{!"xe_bb", !19, i64 0, !19, i64 4}
!24 = !{!25, !25, i64 0}
!25 = !{!"long", !7, i64 0}
!26 = !{!27, !19, i64 0}
!27 = !{!"xe_device", !28, i64 0}
!28 = !{!"", !19, i64 0}
!29 = !{!4, !5, i64 0}
which has no backend errors but the IR produced after this change does ; ModuleID = 'xe_migrate.i'
source_filename = "xe_migrate.i"
target datalayout = "E-m:e-Fi64-i64:64-i128:128-n32:64"
target triple = "powerpc64"
%struct.xe_res_cursor = type {}
%struct.anon = type { ptr }
; Function Attrs: nounwind uwtable
define dso_local noalias noundef ptr @xe_migrate_copy(ptr noundef %m, ptr noundef readonly captures(none) %src_bo, ptr noundef readnone captures(address_is_null) %dst_bo, ptr noundef %src, ptr noundef %dst) local_unnamed_addr #0 {
entry:
%src_it = alloca %struct.xe_res_cursor, align 1
%src_L0_ofs = alloca i64, align 8
%dst_L0_ofs = alloca i64, align 8
%src_L0_pt = alloca i32, align 4
%dst_L0_pt = alloca i32, align 4
%src_L0 = alloca i64, align 8
%ccs_ofs = alloca i64, align 8
%ccs_size = alloca i64, align 8
%ccs_pt = alloca i32, align 4
%tmp = alloca %struct.anon, align 8
%tile = getelementptr inbounds nuw i8, ptr %m, i64 8
%0 = load ptr, ptr %tile, align 8, !tbaa !3
%primary_gt = getelementptr inbounds nuw i8, ptr %0, i64 8
%1 = load ptr, ptr %primary_gt, align 8, !tbaa !10
%2 = load ptr, ptr %1, align 8, !tbaa !14
%3 = load ptr, ptr %2, align 8, !tbaa !16
%4 = load i32, ptr %src_bo, align 4, !tbaa !17
call void @llvm.lifetime.start.p0(i64 0, ptr nonnull %src_it) #3
call void @llvm.lifetime.start.p0(i64 0, ptr nonnull %src_it) #3
call void @llvm.lifetime.start.p0(i64 0, ptr nonnull %src_it) #3
call void @llvm.lifetime.start.p0(i64 8, ptr nonnull %src_L0_ofs) #3
call void @llvm.lifetime.start.p0(i64 8, ptr nonnull %dst_L0_ofs) #3
call void @llvm.lifetime.start.p0(i64 4, ptr nonnull %src_L0_pt) #3
call void @llvm.lifetime.start.p0(i64 4, ptr nonnull %dst_L0_pt) #3
call void @llvm.lifetime.start.p0(i64 8, ptr nonnull %src_L0) #3
%tobool = icmp ne ptr %src, null
%tobool4 = icmp ne ptr %dst, null
%5 = load i32, ptr %src, align 8, !tbaa !20
%tobool6 = icmp ne i32 %5, 0
%6 = load i32, ptr %dst, align 8, !tbaa !20
%tobool9 = icmp ne i32 %6, 0
%tobool13 = icmp ne ptr %3, null
%tobool15 = icmp ne ptr %dst_bo, null
%tobool20.not = icmp eq i32 %4, 0
br i1 %tobool20.not, label %while.end, label %while.body.lr.ph
while.body.lr.ph: ; preds = %entry
%conv60 = zext i1 %tobool13 to i64
br label %while.body
while.body: ; preds = %if.end65, %while.body.lr.ph
call void @llvm.lifetime.start.p0(i64 8, ptr nonnull %ccs_ofs) #3
call void @llvm.lifetime.start.p0(i64 8, ptr nonnull %ccs_size) #3
call void @llvm.lifetime.start.p0(i64 4, ptr nonnull %ccs_pt) #3
%call = call i64 @xe_migrate_res_sizes(ptr noundef %m, ptr noundef nonnull %src_it) #3
%conv23 = trunc i64 %call to i32
call void @llvm.lifetime.start.p0(i64 8, ptr nonnull %tmp) #3
call void (ptr, ptr, ...) @__drm_dev_dbg(ptr dead_on_unwind nonnull writable sret(%struct.anon) align 8 %tmp, ptr noundef null, i32 noundef signext 0) #3
call void @llvm.lifetime.end.p0(i64 8, ptr nonnull %tmp) #3
%call26 = call zeroext i32 @pte_update_size(ptr noundef %m, i32 noundef zeroext 1, ptr noundef nonnull %src, ptr noundef nonnull %src_it, ptr noundef nonnull %src_L0, ptr noundef nonnull %src_L0_ofs, ptr noundef nonnull %src_L0_pt, i32 noundef zeroext 0, i32 noundef zeroext 0, i32 noundef zeroext %conv23) #3
%call27 = call zeroext i32 @pte_update_size(ptr noundef %m, i32 noundef zeroext 1, ptr noundef nonnull %dst, ptr noundef nonnull %src_it, ptr noundef nonnull %src_L0, ptr noundef nonnull %dst_L0_ofs, ptr noundef nonnull %dst_L0_pt, i32 noundef zeroext 0, i32 noundef zeroext %conv23, i32 noundef zeroext %conv23) #3
br i1 %tobool9, label %if.then, label %if.end
if.then: ; preds = %while.body
%call29 = call zeroext i32 @pte_update_size(ptr noundef %m, i32 noundef zeroext 0, ptr noundef null, ptr noundef nonnull %src_it, ptr noundef nonnull %ccs_size, ptr noundef nonnull %ccs_ofs, ptr noundef nonnull %ccs_pt, i32 noundef zeroext 0, i32 noundef zeroext %conv23, i32 noundef zeroext %conv23) #3
br label %if.end
if.end: ; preds = %if.then, %while.body
%call31 = call ptr @xe_bb_new(ptr noundef nonnull %1, i32 noundef zeroext 2, i1 noundef zeroext %tobool13) #3
%tobool32.not = icmp eq ptr %call31, null
br i1 %tobool32.not, label %if.end34, label %if.end65
if.end34: ; preds = %if.end
%7 = load i32, ptr inttoptr (i64 4 to ptr), align 4, !tbaa !22
br i1 %tobool13, label %if.then36, label %if.end58
if.then36: ; preds = %if.end34
%8 = load i64, ptr %src_L0_ofs, align 8, !tbaa !24
%9 = load i32, ptr %3, align 4, !tbaa !26
%tobool37.not = icmp eq i32 %9, 0
%tobool.tobool6 = select i1 %tobool37.not, i1 %tobool, i1 %tobool6
%10 = load i64, ptr %dst_L0_ofs, align 8, !tbaa !24
%cond53.in = select i1 %tobool37.not, i1 %tobool4, i1 %tobool9
%11 = load i64, ptr %src_L0, align 8, !tbaa !24
%conv55 = trunc i64 %11 to i32
%12 = load i64, ptr %ccs_ofs, align 8, !tbaa !24
%call57 = call zeroext i32 @xe_migrate_ccs_copy(ptr noundef %m, ptr noundef null, i64 noundef %8, i1 noundef zeroext %tobool.tobool6, i64 noundef %10, i1 noundef zeroext %cond53.in, i32 noundef zeroext %conv55, i64 noundef %12, i1 noundef zeroext %tobool15) #3
br label %if.end58
if.end58: ; preds = %if.then36, %if.end34
%13 = load ptr, ptr %m, align 8, !tbaa !29
%call61 = call ptr @xe_bb_create_migration_job(ptr noundef %13, ptr noundef null, i64 noundef %conv60, i32 noundef zeroext %7) #3
br label %if.end65
if.end65: ; preds = %if.end58, %if.end
call void @llvm.lifetime.end.p0(i64 4, ptr nonnull %ccs_pt) #3
call void @llvm.lifetime.end.p0(i64 8, ptr nonnull %ccs_size) #3
call void @llvm.lifetime.end.p0(i64 8, ptr nonnull %ccs_ofs) #3
br label %while.body
while.end: ; preds = %entry
call void @llvm.lifetime.end.p0(i64 8, ptr nonnull %src_L0) #3
call void @llvm.lifetime.end.p0(i64 4, ptr nonnull %dst_L0_pt) #3
call void @llvm.lifetime.end.p0(i64 4, ptr nonnull %src_L0_pt) #3
call void @llvm.lifetime.end.p0(i64 8, ptr nonnull %dst_L0_ofs) #3
call void @llvm.lifetime.end.p0(i64 8, ptr nonnull %src_L0_ofs) #3
call void @llvm.lifetime.end.p0(i64 0, ptr nonnull %src_it) #3
call void @llvm.lifetime.end.p0(i64 0, ptr nonnull %src_it) #3
call void @llvm.lifetime.end.p0(i64 0, ptr nonnull %src_it) #3
ret ptr null
}
; Function Attrs: mustprogress nocallback nofree nosync nounwind willreturn memory(argmem: readwrite)
declare void @llvm.lifetime.start.p0(i64 immarg, ptr captures(none)) #1
declare i64 @xe_migrate_res_sizes(ptr noundef, ptr noundef) local_unnamed_addr #2
declare void @__drm_dev_dbg(ptr dead_on_unwind writable sret(%struct.anon) align 8, ptr noundef, ...) local_unnamed_addr #2
; Function Attrs: mustprogress nocallback nofree nosync nounwind willreturn memory(argmem: readwrite)
declare void @llvm.lifetime.end.p0(i64 immarg, ptr captures(none)) #1
declare zeroext i32 @pte_update_size(ptr noundef, i32 noundef zeroext, ptr noundef, ptr noundef, ptr noundef, ptr noundef, ptr noundef, i32 noundef zeroext, i32 noundef zeroext, i32 noundef zeroext) local_unnamed_addr #2
declare ptr @xe_bb_new(ptr noundef, i32 noundef zeroext, i1 noundef zeroext) local_unnamed_addr #2
declare zeroext i32 @xe_migrate_ccs_copy(ptr noundef, ptr noundef, i64 noundef, i1 noundef zeroext, i64 noundef, i1 noundef zeroext, i32 noundef zeroext, i64 noundef, i1 noundef zeroext) local_unnamed_addr #2
declare ptr @xe_bb_create_migration_job(ptr noundef, ptr noundef, i64 noundef, i32 noundef zeroext) local_unnamed_addr #2
attributes #0 = { nounwind uwtable "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="ppc64" "target-features"="+64bit,+altivec,+fpu,+fres,+frsqrte,+fsqrt,+hard-float,+mfocrf,+stfiwx" }
attributes #1 = { mustprogress nocallback nofree nosync nounwind willreturn memory(argmem: readwrite) }
attributes #2 = { "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="ppc64" "target-features"="+64bit,+altivec,+fpu,+fres,+frsqrte,+fsqrt,+hard-float,+mfocrf,+stfiwx" }
attributes #3 = { nounwind }
!llvm.module.flags = !{!0, !1}
!llvm.ident = !{!2}
!0 = !{i32 1, !"wchar_size", i32 4}
!1 = !{i32 7, !"uwtable", i32 2}
!2 = !{!"ClangBuiltLinux clang version 21.0.0git (https://github.com/llvm/llvm-project.git bf79d4819edeb54c6cf528db63676110992908a8)"}
!3 = !{!4, !9, i64 8}
!4 = !{!"xe_migrate", !5, i64 0, !9, i64 8}
!5 = !{!"p1 _ZTS13xe_exec_queue", !6, i64 0}
!6 = !{!"any pointer", !7, i64 0}
!7 = !{!"omnipotent char", !8, i64 0}
!8 = !{!"Simple C/C++ TBAA"}
!9 = !{!"p1 _ZTS7xe_tile", !6, i64 0}
!10 = !{!11, !13, i64 8}
!11 = !{!"xe_tile", !12, i64 0, !13, i64 8}
!12 = !{!"p1 _ZTS9xe_device", !6, i64 0}
!13 = !{!"p1 _ZTS5xe_gt", !6, i64 0}
!14 = !{!15, !9, i64 0}
!15 = !{!"xe_gt", !9, i64 0}
!16 = !{!11, !12, i64 0}
!17 = !{!18, !19, i64 0}
!18 = !{!"xe_bo", !19, i64 0}
!19 = !{!"int", !7, i64 0}
!20 = !{!21, !19, i64 0}
!21 = !{!"ttm_resource", !19, i64 0, !5, i64 8}
!22 = !{!23, !19, i64 4}
!23 = !{!"xe_bb", !19, i64 0, !19, i64 4}
!24 = !{!25, !25, i64 0}
!25 = !{!"long", !7, i64 0}
!26 = !{!27, !19, i64 0}
!27 = !{!"xe_device", !28, i64 0}
!28 = !{!"", !19, i64 0}
!29 = !{!4, !5, i64 0}
The diff of the IR: diff --git a/good-xe_migrate.ll b/bad-xe_migrate.ll
index 6222a0e..6632523 100644
--- a/good-xe_migrate.ll
+++ b/bad-xe_migrate.ll
@@ -79,13 +79,13 @@ if.then36: ; preds = %if.end34
%8 = load i64, ptr %src_L0_ofs, align 8, !tbaa !24
%9 = load i32, ptr %3, align 4, !tbaa !26
%tobool37.not = icmp eq i32 %9, 0
- %cond.v = select i1 %tobool37.not, i1 %tobool, i1 %tobool6
+ %tobool.tobool6 = select i1 %tobool37.not, i1 %tobool, i1 %tobool6
%10 = load i64, ptr %dst_L0_ofs, align 8, !tbaa !24
- %cond53.v = select i1 %tobool37.not, i1 %tobool4, i1 %tobool9
+ %cond53.in = select i1 %tobool37.not, i1 %tobool4, i1 %tobool9
%11 = load i64, ptr %src_L0, align 8, !tbaa !24
%conv55 = trunc i64 %11 to i32
%12 = load i64, ptr %ccs_ofs, align 8, !tbaa !24
- %call57 = call zeroext i32 @xe_migrate_ccs_copy(ptr noundef %m, ptr noundef null, i64 noundef %8, i1 noundef zeroext %cond.v, i64 noundef %10, i1 noundef zeroext %cond53.v, i32 noundef zeroext %conv55, i64 noundef %12, i1 noundef zeroext %tobool15) #3
+ %call57 = call zeroext i32 @xe_migrate_ccs_copy(ptr noundef %m, ptr noundef null, i64 noundef %8, i1 noundef zeroext %tobool.tobool6, i64 noundef %10, i1 noundef zeroext %cond53.in, i32 noundef zeroext %conv55, i64 noundef %12, i1 noundef zeroext %tobool15) #3
br label %if.end58
if.end58: ; preds = %if.then36, %if.end34
@@ -129,9 +129,9 @@ declare zeroext i32 @xe_migrate_ccs_copy(ptr noundef, ptr noundef, i64 noundef,
declare ptr @xe_bb_create_migration_job(ptr noundef, ptr noundef, i64 noundef, i32 noundef zeroext) local_unnamed_addr #2
-attributes #0 = { nounwind uwtable "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="ppc64" "target-features"="+altivec,-bpermd,-crbits,-crypto,-direct-move,-extdiv,-htm,-isa-v206-instructions,-isa-v207-instructions,-isa-v30-instructions,-power8-vector,-power9-vector,-privileged,-quadword-atomics,-rop-protect,-spe,-vsx" }
+attributes #0 = { nounwind uwtable "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="ppc64" "target-features"="+64bit,+altivec,+fpu,+fres,+frsqrte,+fsqrt,+hard-float,+mfocrf,+stfiwx" }
attributes #1 = { mustprogress nocallback nofree nosync nounwind willreturn memory(argmem: readwrite) }
-attributes #2 = { "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="ppc64" "target-features"="+altivec,-bpermd,-crbits,-crypto,-direct-move,-extdiv,-htm,-isa-v206-instructions,-isa-v207-instructions,-isa-v30-instructions,-power8-vector,-power9-vector,-privileged,-quadword-atomics,-rop-protect,-spe,-vsx" }
+attributes #2 = { "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="ppc64" "target-features"="+64bit,+altivec,+fpu,+fres,+frsqrte,+fsqrt,+hard-float,+mfocrf,+stfiwx" }
attributes #3 = { nounwind }
!llvm.module.flags = !{!0, !1}
@@ -139,7 +139,7 @@ attributes #3 = { nounwind }
!0 = !{i32 1, !"wchar_size", i32 4}
!1 = !{i32 7, !"uwtable", i32 2}
-!2 = !{!"ClangBuiltLinux clang version 21.0.0git (https://github.com/llvm/llvm-project.git 5645d6710904107d66a45f1c3ee0ee25924ff08a)"}
+!2 = !{!"ClangBuiltLinux clang version 21.0.0git (https://github.com/llvm/llvm-project.git bf79d4819edeb54c6cf528db63676110992908a8)"}
!3 = !{!4, !9, i64 8}
!4 = !{!"xe_migrate", !5, i64 0, !9, i64 8}
!5 = !{!"p1 _ZTS13xe_exec_queue", !6, i64 0}
|
The reland patch addressed the comment #137670 (comment)