From bd285ed90fd90c597ca0e764ce1e21442e23431b Mon Sep 17 00:00:00 2001 From: Shilei Tian Date: Tue, 14 Jan 2025 11:55:10 -0500 Subject: [PATCH] [LLVM][Triple] Add an argument to specify canonical form to `Triple::normalize` Currently, the output of `Triple::normalize` can vary depending on how the `Triple` object is constructed, producing a 3-field, 4-field, or even 5-field string. However, there is no way to control the format of the output, as all forms are considered canonical according to the LangRef. This lack of control can be inconvenient when a specific format is required. To address this, this PR introduces an argument to specify the desired format (3, 4, or 5 identifiers), with the default set to none to maintain the current behavior. If the requested format requires more components than are available in the actual `Data`, `"unknown"` is appended as needed. --- llvm/include/llvm/TargetParser/Triple.h | 18 ++- llvm/lib/TargetParser/Triple.cpp | 15 ++- llvm/unittests/TargetParser/TripleTest.cpp | 126 +++++++++++++++++++++ 3 files changed, 155 insertions(+), 4 deletions(-) diff --git a/llvm/include/llvm/TargetParser/Triple.h b/llvm/include/llvm/TargetParser/Triple.h index 76914ab34c1f6..8097300c6e630 100644 --- a/llvm/include/llvm/TargetParser/Triple.h +++ b/llvm/include/llvm/TargetParser/Triple.h @@ -366,14 +366,26 @@ class Triple { /// @name Normalization /// @{ + /// Canonical form + enum class CanonicalForm { + ANY = 0, + THREE_IDENT = 3, // ARCHITECTURE-VENDOR-OPERATING_SYSTEM + FOUR_IDENT = 4, // ARCHITECTURE-VENDOR-OPERATING_SYSTEM-ENVIRONMENT + FIVE_IDENT = 5, // ARCHITECTURE-VENDOR-OPERATING_SYSTEM-ENVIRONMENT-FORMAT + }; + /// Turn an arbitrary machine specification into the canonical triple form (or /// something sensible that the Triple class understands if nothing better can /// reasonably be done). In particular, it handles the common case in which - /// otherwise valid components are in the wrong order. - static std::string normalize(StringRef Str); + /// otherwise valid components are in the wrong order. \p Form is used to + /// specify the output canonical form. + static std::string normalize(StringRef Str, + CanonicalForm Form = CanonicalForm::ANY); /// Return the normalized form of this triple's string. - std::string normalize() const { return normalize(Data); } + std::string normalize(CanonicalForm Form = CanonicalForm::ANY) const { + return normalize(Data, Form); + } /// @} /// @name Typed Component Access diff --git a/llvm/lib/TargetParser/Triple.cpp b/llvm/lib/TargetParser/Triple.cpp index 4c1de09e91f21..3b8d2c9f92e87 100644 --- a/llvm/lib/TargetParser/Triple.cpp +++ b/llvm/lib/TargetParser/Triple.cpp @@ -1128,7 +1128,7 @@ static StringRef getDXILArchNameFromShaderModel(StringRef ShaderModelStr) { return Triple::getArchName(Triple::dxil, Triple::DXILSubArch_v1_0); } -std::string Triple::normalize(StringRef Str) { +std::string Triple::normalize(StringRef Str, CanonicalForm Form) { bool IsMinGW32 = false; bool IsCygwin = false; @@ -1334,6 +1334,19 @@ std::string Triple::normalize(StringRef Str) { Components[0] = getDXILArchNameFromShaderModel(Components[2]); } } + + // Canonicalize the components if necessary. + switch (Form) { + case CanonicalForm::ANY: + break; + case CanonicalForm::THREE_IDENT: + case CanonicalForm::FOUR_IDENT: + case CanonicalForm::FIVE_IDENT: { + Components.resize(static_cast(Form), "unknown"); + break; + } + } + // Stick the corrected components back together to form the normalized string. return join(Components, "-"); } diff --git a/llvm/unittests/TargetParser/TripleTest.cpp b/llvm/unittests/TargetParser/TripleTest.cpp index 7fb7625f8c2d1..5f75c80292f4b 100644 --- a/llvm/unittests/TargetParser/TripleTest.cpp +++ b/llvm/unittests/TargetParser/TripleTest.cpp @@ -1416,6 +1416,132 @@ TEST(TripleTest, Normalization) { EXPECT_EQ("x86_64-unknown-linux-gnu", Triple::normalize("x86_64-gnu-linux")); + EXPECT_EQ("a-unknown-unknown", + Triple::normalize("a", Triple::CanonicalForm::THREE_IDENT)); + EXPECT_EQ("a-b-unknown", + Triple::normalize("a-b", Triple::CanonicalForm::THREE_IDENT)); + EXPECT_EQ("a-b-c", + Triple::normalize("a-b-c", Triple::CanonicalForm::THREE_IDENT)); + EXPECT_EQ("a-b-c", + Triple::normalize("a-b-c-d", Triple::CanonicalForm::THREE_IDENT)); + EXPECT_EQ("a-b-c", + Triple::normalize("a-b-c-d-e", Triple::CanonicalForm::THREE_IDENT)); + + EXPECT_EQ("a-unknown-unknown-unknown", + Triple::normalize("a", Triple::CanonicalForm::FOUR_IDENT)); + EXPECT_EQ("a-b-unknown-unknown", + Triple::normalize("a-b", Triple::CanonicalForm::FOUR_IDENT)); + EXPECT_EQ("a-b-c-unknown", + Triple::normalize("a-b-c", Triple::CanonicalForm::FOUR_IDENT)); + EXPECT_EQ("a-b-c-d", + Triple::normalize("a-b-c-d", Triple::CanonicalForm::FOUR_IDENT)); + EXPECT_EQ("a-b-c-d", + Triple::normalize("a-b-c-d-e", Triple::CanonicalForm::FOUR_IDENT)); + + EXPECT_EQ("a-unknown-unknown-unknown-unknown", + Triple::normalize("a", Triple::CanonicalForm::FIVE_IDENT)); + EXPECT_EQ("a-b-unknown-unknown-unknown", + Triple::normalize("a-b", Triple::CanonicalForm::FIVE_IDENT)); + EXPECT_EQ("a-b-c-unknown-unknown", + Triple::normalize("a-b-c", Triple::CanonicalForm::FIVE_IDENT)); + EXPECT_EQ("a-b-c-d-unknown", + Triple::normalize("a-b-c-d", Triple::CanonicalForm::FIVE_IDENT)); + EXPECT_EQ("a-b-c-d-e", + Triple::normalize("a-b-c-d-e", Triple::CanonicalForm::FIVE_IDENT)); + + EXPECT_EQ("i386-b-c-unknown", + Triple::normalize("i386-b-c", Triple::CanonicalForm::FOUR_IDENT)); + EXPECT_EQ("i386-b-c-unknown-unknown", + Triple::normalize("i386-b-c", Triple::CanonicalForm::FIVE_IDENT)); + + EXPECT_EQ("i386-a-c-unknown", + Triple::normalize("a-i386-c", Triple::CanonicalForm::FOUR_IDENT)); + EXPECT_EQ("i386-a-c-unknown-unknown", + Triple::normalize("a-i386-c", Triple::CanonicalForm::FIVE_IDENT)); + + EXPECT_EQ("i386-a-b-unknown", + Triple::normalize("a-b-i386", Triple::CanonicalForm::FOUR_IDENT)); + EXPECT_EQ("i386-a-b-c", + Triple::normalize("a-b-c-i386", Triple::CanonicalForm::FOUR_IDENT)); + + EXPECT_EQ("a-pc-c-unknown", + Triple::normalize("a-pc-c", Triple::CanonicalForm::FOUR_IDENT)); + EXPECT_EQ("unknown-pc-b-c", + Triple::normalize("pc-b-c", Triple::CanonicalForm::FOUR_IDENT)); + EXPECT_EQ("a-pc-b-unknown", + Triple::normalize("a-b-pc", Triple::CanonicalForm::FOUR_IDENT)); + EXPECT_EQ("a-pc-b-c", + Triple::normalize("a-b-c-pc", Triple::CanonicalForm::FOUR_IDENT)); + + EXPECT_EQ("a-b-linux-unknown", + Triple::normalize("a-b-linux", Triple::CanonicalForm::FOUR_IDENT)); + // We lose `-c` here as expected. + EXPECT_EQ("unknown-unknown-linux-b", + Triple::normalize("linux-b-c", Triple::CanonicalForm::FOUR_IDENT)); + EXPECT_EQ("a-unknown-linux-c", + Triple::normalize("a-linux-c", Triple::CanonicalForm::FOUR_IDENT)); + + EXPECT_EQ("i386-pc-a-unknown", + Triple::normalize("a-pc-i386", Triple::CanonicalForm::FOUR_IDENT)); + EXPECT_EQ("i386-pc-unknown-unknown", + Triple::normalize("-pc-i386", Triple::CanonicalForm::FOUR_IDENT)); + EXPECT_EQ("unknown-pc-linux-c", + Triple::normalize("linux-pc-c", Triple::CanonicalForm::FOUR_IDENT)); + EXPECT_EQ("unknown-pc-linux-unknown", + Triple::normalize("linux-pc-", Triple::CanonicalForm::FOUR_IDENT)); + + EXPECT_EQ("i386-unknown-unknown-unknown", + Triple::normalize("i386", Triple::CanonicalForm::FOUR_IDENT)); + EXPECT_EQ("unknown-pc-unknown-unknown", + Triple::normalize("pc", Triple::CanonicalForm::FOUR_IDENT)); + EXPECT_EQ("unknown-unknown-linux-unknown", + Triple::normalize("linux", Triple::CanonicalForm::FOUR_IDENT)); + + EXPECT_EQ( + "x86_64-unknown-linux-gnu", + Triple::normalize("x86_64-gnu-linux", Triple::CanonicalForm::FOUR_IDENT)); + + EXPECT_EQ("i386-a-b-unknown-unknown", + Triple::normalize("a-b-i386", Triple::CanonicalForm::FIVE_IDENT)); + EXPECT_EQ("i386-a-b-c-unknown", + Triple::normalize("a-b-c-i386", Triple::CanonicalForm::FIVE_IDENT)); + + EXPECT_EQ("a-pc-c-unknown-unknown", + Triple::normalize("a-pc-c", Triple::CanonicalForm::FIVE_IDENT)); + EXPECT_EQ("unknown-pc-b-c-unknown", + Triple::normalize("pc-b-c", Triple::CanonicalForm::FIVE_IDENT)); + EXPECT_EQ("a-pc-b-unknown-unknown", + Triple::normalize("a-b-pc", Triple::CanonicalForm::FIVE_IDENT)); + EXPECT_EQ("a-pc-b-c-unknown", + Triple::normalize("a-b-c-pc", Triple::CanonicalForm::FIVE_IDENT)); + + EXPECT_EQ("a-b-linux-unknown-unknown", + Triple::normalize("a-b-linux", Triple::CanonicalForm::FIVE_IDENT)); + EXPECT_EQ("unknown-unknown-linux-b-c", + Triple::normalize("linux-b-c", Triple::CanonicalForm::FIVE_IDENT)); + EXPECT_EQ("a-unknown-linux-c-unknown", + Triple::normalize("a-linux-c", Triple::CanonicalForm::FIVE_IDENT)); + + EXPECT_EQ("i386-pc-a-unknown-unknown", + Triple::normalize("a-pc-i386", Triple::CanonicalForm::FIVE_IDENT)); + EXPECT_EQ("i386-pc-unknown-unknown-unknown", + Triple::normalize("-pc-i386", Triple::CanonicalForm::FIVE_IDENT)); + EXPECT_EQ("unknown-pc-linux-c-unknown", + Triple::normalize("linux-pc-c", Triple::CanonicalForm::FIVE_IDENT)); + EXPECT_EQ("unknown-pc-linux-unknown-unknown", + Triple::normalize("linux-pc-", Triple::CanonicalForm::FIVE_IDENT)); + + EXPECT_EQ("i386-unknown-unknown-unknown-unknown", + Triple::normalize("i386", Triple::CanonicalForm::FIVE_IDENT)); + EXPECT_EQ("unknown-pc-unknown-unknown-unknown", + Triple::normalize("pc", Triple::CanonicalForm::FIVE_IDENT)); + EXPECT_EQ("unknown-unknown-linux-unknown-unknown", + Triple::normalize("linux", Triple::CanonicalForm::FIVE_IDENT)); + + EXPECT_EQ( + "x86_64-unknown-linux-gnu-unknown", + Triple::normalize("x86_64-gnu-linux", Triple::CanonicalForm::FIVE_IDENT)); + // Check that normalizing a permutated set of valid components returns a // triple with the unpermuted components. //