diff --git a/llvm/lib/Analysis/VFABIDemangling.cpp b/llvm/lib/Analysis/VFABIDemangling.cpp index 22fc52070015c..426f98c0c6284 100644 --- a/llvm/lib/Analysis/VFABIDemangling.cpp +++ b/llvm/lib/Analysis/VFABIDemangling.cpp @@ -126,7 +126,7 @@ static ParseRet tryParseLinearTokenWithRuntimeStep(StringRef &ParseString, return ParseRet::None; } -/// The function looks for the following stringt at the beginning of +/// The function looks for the following string at the beginning of /// the input string `ParseString`: /// /// diff --git a/llvm/lib/CodeGen/ReplaceWithVeclib.cpp b/llvm/lib/CodeGen/ReplaceWithVeclib.cpp index 36c91b7fa97e4..893aa4a91828d 100644 --- a/llvm/lib/CodeGen/ReplaceWithVeclib.cpp +++ b/llvm/lib/CodeGen/ReplaceWithVeclib.cpp @@ -15,14 +15,17 @@ #include "llvm/CodeGen/ReplaceWithVeclib.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/Statistic.h" +#include "llvm/ADT/StringRef.h" #include "llvm/Analysis/DemandedBits.h" #include "llvm/Analysis/GlobalsModRef.h" #include "llvm/Analysis/OptimizationRemarkEmitter.h" #include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/Analysis/VectorUtils.h" #include "llvm/CodeGen/Passes.h" +#include "llvm/IR/DerivedTypes.h" #include "llvm/IR/IRBuilder.h" #include "llvm/IR/InstIterator.h" +#include "llvm/Support/TypeSize.h" #include "llvm/Transforms/Utils/ModuleUtils.h" using namespace llvm; @@ -38,138 +41,137 @@ STATISTIC(NumTLIFuncDeclAdded, STATISTIC(NumFuncUsedAdded, "Number of functions added to `llvm.compiler.used`"); -static bool replaceWithTLIFunction(CallInst &CI, const StringRef TLIName) { - Module *M = CI.getModule(); - - Function *OldFunc = CI.getCalledFunction(); - - // Check if the vector library function is already declared in this module, - // otherwise insert it. +/// Returns a vector Function that it adds to the Module \p M. When an \p +/// ScalarFunc is not null, it copies its attributes to the newly created +/// Function. +Function *getTLIFunction(Module *M, FunctionType *VectorFTy, + const StringRef TLIName, + Function *ScalarFunc = nullptr) { Function *TLIFunc = M->getFunction(TLIName); if (!TLIFunc) { - TLIFunc = Function::Create(OldFunc->getFunctionType(), - Function::ExternalLinkage, TLIName, *M); - TLIFunc->copyAttributesFrom(OldFunc); + TLIFunc = + Function::Create(VectorFTy, Function::ExternalLinkage, TLIName, *M); + if (ScalarFunc) + TLIFunc->copyAttributesFrom(ScalarFunc); LLVM_DEBUG(dbgs() << DEBUG_TYPE << ": Added vector library function `" << TLIName << "` of type `" << *(TLIFunc->getType()) << "` to module.\n"); ++NumTLIFuncDeclAdded; - - // Add the freshly created function to llvm.compiler.used, - // similar to as it is done in InjectTLIMappings + // Add the freshly created function to llvm.compiler.used, similar to as it + // is done in InjectTLIMappings. appendToCompilerUsed(*M, {TLIFunc}); - LLVM_DEBUG(dbgs() << DEBUG_TYPE << ": Adding `" << TLIName << "` to `@llvm.compiler.used`.\n"); ++NumFuncUsedAdded; } + return TLIFunc; +} - // Replace the call to the vector intrinsic with a call - // to the corresponding function from the vector library. - IRBuilder<> IRBuilder(&CI); - SmallVector Args(CI.args()); - // Preserve the operand bundles. - SmallVector OpBundles; - CI.getOperandBundlesAsDefs(OpBundles); - CallInst *Replacement = IRBuilder.CreateCall(TLIFunc, Args, OpBundles); - assert(OldFunc->getFunctionType() == TLIFunc->getFunctionType() && - "Expecting function types to be identical"); - CI.replaceAllUsesWith(Replacement); - if (isa(Replacement)) { - // Preserve fast math flags for FP math. - Replacement->copyFastMathFlags(&CI); +/// Replace the call to the vector intrinsic ( \p CalltoReplace ) with a call to +/// the corresponding function from the vector library ( \p TLIVecFunc ). +static void replaceWithTLIFunction(CallInst &CalltoReplace, VFInfo &Info, + Function *TLIVecFunc) { + IRBuilder<> IRBuilder(&CalltoReplace); + SmallVector Args(CalltoReplace.args()); + if (auto OptMaskpos = Info.getParamIndexForOptionalMask()) { + auto *MaskTy = VectorType::get(Type::getInt1Ty(CalltoReplace.getContext()), + Info.Shape.VF); + Args.insert(Args.begin() + OptMaskpos.value(), + Constant::getAllOnesValue(MaskTy)); } - LLVM_DEBUG(dbgs() << DEBUG_TYPE << ": Replaced call to `" - << OldFunc->getName() << "` with call to `" << TLIName - << "`.\n"); - ++NumCallsReplaced; - return true; + // Preserve the operand bundles. + SmallVector OpBundles; + CalltoReplace.getOperandBundlesAsDefs(OpBundles); + CallInst *Replacement = IRBuilder.CreateCall(TLIVecFunc, Args, OpBundles); + CalltoReplace.replaceAllUsesWith(Replacement); + // Preserve fast math flags for FP math. + if (isa(Replacement)) + Replacement->copyFastMathFlags(&CalltoReplace); } +/// Returns true when successfully replaced \p CallToReplace with a suitable +/// function taking vector arguments, based on available mappings in the \p TLI. +/// Currently only works when \p CallToReplace is a call to vectorized +/// intrinsic. static bool replaceWithCallToVeclib(const TargetLibraryInfo &TLI, - CallInst &CI) { - if (!CI.getCalledFunction()) { + CallInst &CallToReplace) { + if (!CallToReplace.getCalledFunction()) return false; - } - auto IntrinsicID = CI.getCalledFunction()->getIntrinsicID(); - if (IntrinsicID == Intrinsic::not_intrinsic) { - // Replacement is only performed for intrinsic functions + auto IntrinsicID = CallToReplace.getCalledFunction()->getIntrinsicID(); + // Replacement is only performed for intrinsic functions. + if (IntrinsicID == Intrinsic::not_intrinsic) return false; - } - // Convert vector arguments to scalar type and check that - // all vector operands have identical vector width. + // Compute arguments types of the corresponding scalar call. Additionally + // checks if in the vector call, all vector operands have the same EC. ElementCount VF = ElementCount::getFixed(0); - SmallVector ScalarTypes; - for (auto Arg : enumerate(CI.args())) { - auto *ArgType = Arg.value()->getType(); - // Vector calls to intrinsics can still have - // scalar operands for specific arguments. + SmallVector ScalarArgTypes; + for (auto Arg : enumerate(CallToReplace.args())) { + auto *ArgTy = Arg.value()->getType(); if (isVectorIntrinsicWithScalarOpAtArg(IntrinsicID, Arg.index())) { - ScalarTypes.push_back(ArgType); - } else { - // The argument in this place should be a vector if - // this is a call to a vector intrinsic. - auto *VectorArgTy = dyn_cast(ArgType); - if (!VectorArgTy) { - // The argument is not a vector, do not perform - // the replacement. - return false; - } - ElementCount NumElements = VectorArgTy->getElementCount(); - if (NumElements.isScalable()) { - // The current implementation does not support - // scalable vectors. + ScalarArgTypes.push_back(ArgTy); + } else if (auto *VectorArgTy = dyn_cast(ArgTy)) { + ScalarArgTypes.push_back(ArgTy->getScalarType()); + // Disallow vector arguments with different VFs. When processing the first + // vector argument, store it's VF, and for the rest ensure that they match + // it. + if (VF.isZero()) + VF = VectorArgTy->getElementCount(); + else if (VF != VectorArgTy->getElementCount()) return false; - } - if (VF.isNonZero() && VF != NumElements) { - // The different arguments differ in vector size. - return false; - } else { - VF = NumElements; - } - ScalarTypes.push_back(VectorArgTy->getElementType()); - } + } else + // Exit when it is supposed to be a vector argument but it isn't. + return false; } - // Try to reconstruct the name for the scalar version of this - // intrinsic using the intrinsic ID and the argument types - // converted to scalar above. - std::string ScalarName; - if (Intrinsic::isOverloaded(IntrinsicID)) { - ScalarName = Intrinsic::getName(IntrinsicID, ScalarTypes, CI.getModule()); - } else { - ScalarName = Intrinsic::getName(IntrinsicID).str(); - } + // Try to reconstruct the name for the scalar version of this intrinsic using + // the intrinsic ID and the argument types converted to scalar above. + std::string ScalarName = + (Intrinsic::isOverloaded(IntrinsicID) + ? Intrinsic::getName(IntrinsicID, ScalarArgTypes, + CallToReplace.getModule()) + : Intrinsic::getName(IntrinsicID).str()); + + // Try to find the mapping for the scalar version of this intrinsic and the + // exact vector width of the call operands in the TargetLibraryInfo. First, + // check with a non-masked variant, and if that fails try with a masked one. + const VecDesc *VD = + TLI.getVectorMappingInfo(ScalarName, VF, /*Masked*/ false); + if (!VD && !(VD = TLI.getVectorMappingInfo(ScalarName, VF, /*Masked*/ true))) + return false; - if (!TLI.isFunctionVectorizable(ScalarName)) { - // The TargetLibraryInfo does not contain a vectorized version of - // the scalar function. + LLVM_DEBUG(dbgs() << DEBUG_TYPE << ": Found TLI mapping from: `" << ScalarName + << "` and vector width " << VF << " to: `" + << VD->getVectorFnName() << "`.\n"); + + // Replace the call to the intrinsic with a call to the vector library + // function. + Type *ScalarRetTy = CallToReplace.getType()->getScalarType(); + FunctionType *ScalarFTy = + FunctionType::get(ScalarRetTy, ScalarArgTypes, /*isVarArg*/ false); + const std::string MangledName = VD->getVectorFunctionABIVariantString(); + auto OptInfo = VFABI::tryDemangleForVFABI(MangledName, ScalarFTy); + if (!OptInfo) return false; - } - // Try to find the mapping for the scalar version of this intrinsic - // and the exact vector width of the call operands in the - // TargetLibraryInfo. - StringRef TLIName = TLI.getVectorizedFunction(ScalarName, VF); - - LLVM_DEBUG(dbgs() << DEBUG_TYPE << ": Looking up TLI mapping for `" - << ScalarName << "` and vector width " << VF << ".\n"); - - if (!TLIName.empty()) { - // Found the correct mapping in the TargetLibraryInfo, - // replace the call to the intrinsic with a call to - // the vector library function. - LLVM_DEBUG(dbgs() << DEBUG_TYPE << ": Found TLI function `" << TLIName - << "`.\n"); - return replaceWithTLIFunction(CI, TLIName); - } + FunctionType *VectorFTy = VFABI::createFunctionType(*OptInfo, ScalarFTy); + if (!VectorFTy) + return false; + + Function *FuncToReplace = CallToReplace.getCalledFunction(); + Function *TLIFunc = getTLIFunction(CallToReplace.getModule(), VectorFTy, + VD->getVectorFnName(), FuncToReplace); + replaceWithTLIFunction(CallToReplace, *OptInfo, TLIFunc); - return false; + LLVM_DEBUG(dbgs() << DEBUG_TYPE << ": Replaced call to `" + << FuncToReplace->getName() << "` with call to `" + << TLIFunc->getName() << "`.\n"); + ++NumCallsReplaced; + return true; } static bool runImpl(const TargetLibraryInfo &TLI, Function &F) { @@ -185,9 +187,8 @@ static bool runImpl(const TargetLibraryInfo &TLI, Function &F) { } // Erase the calls to the intrinsics that have been replaced // with calls to the vector library. - for (auto *CI : ReplacedCalls) { + for (auto *CI : ReplacedCalls) CI->eraseFromParent(); - } return Changed; } @@ -207,10 +208,10 @@ PreservedAnalyses ReplaceWithVeclib::run(Function &F, PA.preserve(); PA.preserve(); return PA; - } else { - // The pass did not replace any calls, hence it preserves all analyses. - return PreservedAnalyses::all(); } + + // The pass did not replace any calls, hence it preserves all analyses. + return PreservedAnalyses::all(); } //////////////////////////////////////////////////////////////////////////////// diff --git a/llvm/test/CodeGen/AArch64/replace-intrinsics-with-veclib-armpl.ll b/llvm/test/CodeGen/AArch64/replace-intrinsics-with-veclib-armpl.ll index 18431ae021f97..d41870ec6e791 100644 --- a/llvm/test/CodeGen/AArch64/replace-intrinsics-with-veclib-armpl.ll +++ b/llvm/test/CodeGen/AArch64/replace-intrinsics-with-veclib-armpl.ll @@ -15,7 +15,7 @@ declare @llvm.cos.nxv2f64() declare @llvm.cos.nxv4f32() ;. -; CHECK: @llvm.compiler.used = appending global [16 x ptr] [ptr @armpl_vcosq_f64, ptr @armpl_vcosq_f32, ptr @armpl_vsinq_f64, ptr @armpl_vsinq_f32, ptr @armpl_vexpq_f64, ptr @armpl_vexpq_f32, ptr @armpl_vexp2q_f64, ptr @armpl_vexp2q_f32, ptr @armpl_vexp10q_f64, ptr @armpl_vexp10q_f32, ptr @armpl_vlogq_f64, ptr @armpl_vlogq_f32, ptr @armpl_vlog2q_f64, ptr @armpl_vlog2q_f32, ptr @armpl_vlog10q_f64, ptr @armpl_vlog10q_f32], section "llvm.metadata" +; CHECK: @llvm.compiler.used = appending global [32 x ptr] [ptr @armpl_vcosq_f64, ptr @armpl_vcosq_f32, ptr @armpl_svcos_f64_x, ptr @armpl_svcos_f32_x, ptr @armpl_vsinq_f64, ptr @armpl_vsinq_f32, ptr @armpl_svsin_f64_x, ptr @armpl_svsin_f32_x, ptr @armpl_vexpq_f64, ptr @armpl_vexpq_f32, ptr @armpl_svexp_f64_x, ptr @armpl_svexp_f32_x, ptr @armpl_vexp2q_f64, ptr @armpl_vexp2q_f32, ptr @armpl_svexp2_f64_x, ptr @armpl_svexp2_f32_x, ptr @armpl_vexp10q_f64, ptr @armpl_vexp10q_f32, ptr @armpl_svexp10_f64_x, ptr @armpl_svexp10_f32_x, ptr @armpl_vlogq_f64, ptr @armpl_vlogq_f32, ptr @armpl_svlog_f64_x, ptr @armpl_svlog_f32_x, ptr @armpl_vlog2q_f64, ptr @armpl_vlog2q_f32, ptr @armpl_svlog2_f64_x, ptr @armpl_svlog2_f32_x, ptr @armpl_vlog10q_f64, ptr @armpl_vlog10q_f32, ptr @armpl_svlog10_f64_x, ptr @armpl_svlog10_f32_x], section "llvm.metadata" ;. define <2 x double> @llvm_cos_f64(<2 x double> %in) { ; CHECK-LABEL: define <2 x double> @llvm_cos_f64 @@ -40,7 +40,7 @@ define <4 x float> @llvm_cos_f32(<4 x float> %in) { define @llvm_cos_vscale_f64( %in) #0 { ; CHECK-LABEL: define @llvm_cos_vscale_f64 ; CHECK-SAME: ( [[IN:%.*]]) #[[ATTR1:[0-9]+]] { -; CHECK-NEXT: [[TMP1:%.*]] = call fast @llvm.cos.nxv2f64( [[IN]]) +; CHECK-NEXT: [[TMP1:%.*]] = call fast @armpl_svcos_f64_x( [[IN]], shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer)) ; CHECK-NEXT: ret [[TMP1]] ; %1 = call fast @llvm.cos.nxv2f64( %in) @@ -50,7 +50,7 @@ define @llvm_cos_vscale_f64( %in) #0 define @llvm_cos_vscale_f32( %in) #0 { ; CHECK-LABEL: define @llvm_cos_vscale_f32 ; CHECK-SAME: ( [[IN:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: [[TMP1:%.*]] = call fast @llvm.cos.nxv4f32( [[IN]]) +; CHECK-NEXT: [[TMP1:%.*]] = call fast @armpl_svcos_f32_x( [[IN]], shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer)) ; CHECK-NEXT: ret [[TMP1]] ; %1 = call fast @llvm.cos.nxv4f32( %in) @@ -85,7 +85,7 @@ define <4 x float> @llvm_sin_f32(<4 x float> %in) { define @llvm_sin_vscale_f64( %in) #0 { ; CHECK-LABEL: define @llvm_sin_vscale_f64 ; CHECK-SAME: ( [[IN:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: [[TMP1:%.*]] = call fast @llvm.sin.nxv2f64( [[IN]]) +; CHECK-NEXT: [[TMP1:%.*]] = call fast @armpl_svsin_f64_x( [[IN]], shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer)) ; CHECK-NEXT: ret [[TMP1]] ; %1 = call fast @llvm.sin.nxv2f64( %in) @@ -95,7 +95,7 @@ define @llvm_sin_vscale_f64( %in) #0 define @llvm_sin_vscale_f32( %in) #0 { ; CHECK-LABEL: define @llvm_sin_vscale_f32 ; CHECK-SAME: ( [[IN:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: [[TMP1:%.*]] = call fast @llvm.sin.nxv4f32( [[IN]]) +; CHECK-NEXT: [[TMP1:%.*]] = call fast @armpl_svsin_f32_x( [[IN]], shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer)) ; CHECK-NEXT: ret [[TMP1]] ; %1 = call fast @llvm.sin.nxv4f32( %in) @@ -130,7 +130,7 @@ define <4 x float> @llvm_exp_f32(<4 x float> %in) { define @llvm_exp_vscale_f64( %in) #0 { ; CHECK-LABEL: define @llvm_exp_vscale_f64 ; CHECK-SAME: ( [[IN:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: [[TMP1:%.*]] = call fast @llvm.exp.nxv2f64( [[IN]]) +; CHECK-NEXT: [[TMP1:%.*]] = call fast @armpl_svexp_f64_x( [[IN]], shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer)) ; CHECK-NEXT: ret [[TMP1]] ; %1 = call fast @llvm.exp.nxv2f64( %in) @@ -140,7 +140,7 @@ define @llvm_exp_vscale_f64( %in) #0 define @llvm_exp_vscale_f32( %in) #0 { ; CHECK-LABEL: define @llvm_exp_vscale_f32 ; CHECK-SAME: ( [[IN:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: [[TMP1:%.*]] = call fast @llvm.exp.nxv4f32( [[IN]]) +; CHECK-NEXT: [[TMP1:%.*]] = call fast @armpl_svexp_f32_x( [[IN]], shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer)) ; CHECK-NEXT: ret [[TMP1]] ; %1 = call fast @llvm.exp.nxv4f32( %in) @@ -175,7 +175,7 @@ define <4 x float> @llvm_exp2_f32(<4 x float> %in) { define @llvm_exp2_vscale_f64( %in) #0 { ; CHECK-LABEL: define @llvm_exp2_vscale_f64 ; CHECK-SAME: ( [[IN:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: [[TMP1:%.*]] = call fast @llvm.exp2.nxv2f64( [[IN]]) +; CHECK-NEXT: [[TMP1:%.*]] = call fast @armpl_svexp2_f64_x( [[IN]], shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer)) ; CHECK-NEXT: ret [[TMP1]] ; %1 = call fast @llvm.exp2.nxv2f64( %in) @@ -185,7 +185,7 @@ define @llvm_exp2_vscale_f64( %in) #0 define @llvm_exp2_vscale_f32( %in) #0 { ; CHECK-LABEL: define @llvm_exp2_vscale_f32 ; CHECK-SAME: ( [[IN:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: [[TMP1:%.*]] = call fast @llvm.exp2.nxv4f32( [[IN]]) +; CHECK-NEXT: [[TMP1:%.*]] = call fast @armpl_svexp2_f32_x( [[IN]], shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer)) ; CHECK-NEXT: ret [[TMP1]] ; %1 = call fast @llvm.exp2.nxv4f32( %in) @@ -220,7 +220,7 @@ define <4 x float> @llvm_exp10_f32(<4 x float> %in) { define @llvm_exp10_vscale_f64( %in) #0 { ; CHECK-LABEL: define @llvm_exp10_vscale_f64 ; CHECK-SAME: ( [[IN:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: [[TMP1:%.*]] = call fast @llvm.exp10.nxv2f64( [[IN]]) +; CHECK-NEXT: [[TMP1:%.*]] = call fast @armpl_svexp10_f64_x( [[IN]], shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer)) ; CHECK-NEXT: ret [[TMP1]] ; %1 = call fast @llvm.exp10.nxv2f64( %in) @@ -230,7 +230,7 @@ define @llvm_exp10_vscale_f64( %in) # define @llvm_exp10_vscale_f32( %in) #0 { ; CHECK-LABEL: define @llvm_exp10_vscale_f32 ; CHECK-SAME: ( [[IN:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: [[TMP1:%.*]] = call fast @llvm.exp10.nxv4f32( [[IN]]) +; CHECK-NEXT: [[TMP1:%.*]] = call fast @armpl_svexp10_f32_x( [[IN]], shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer)) ; CHECK-NEXT: ret [[TMP1]] ; %1 = call fast @llvm.exp10.nxv4f32( %in) @@ -265,7 +265,7 @@ define <4 x float> @llvm_log_f32(<4 x float> %in) { define @llvm_log_vscale_f64( %in) #0 { ; CHECK-LABEL: define @llvm_log_vscale_f64 ; CHECK-SAME: ( [[IN:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: [[TMP1:%.*]] = call fast @llvm.log.nxv2f64( [[IN]]) +; CHECK-NEXT: [[TMP1:%.*]] = call fast @armpl_svlog_f64_x( [[IN]], shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer)) ; CHECK-NEXT: ret [[TMP1]] ; %1 = call fast @llvm.log.nxv2f64( %in) @@ -275,7 +275,7 @@ define @llvm_log_vscale_f64( %in) #0 define @llvm_log_vscale_f32( %in) #0 { ; CHECK-LABEL: define @llvm_log_vscale_f32 ; CHECK-SAME: ( [[IN:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: [[TMP1:%.*]] = call fast @llvm.log.nxv4f32( [[IN]]) +; CHECK-NEXT: [[TMP1:%.*]] = call fast @armpl_svlog_f32_x( [[IN]], shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer)) ; CHECK-NEXT: ret [[TMP1]] ; %1 = call fast @llvm.log.nxv4f32( %in) @@ -310,7 +310,7 @@ define <4 x float> @llvm_log2_f32(<4 x float> %in) { define @llvm_log2_vscale_f64( %in) #0 { ; CHECK-LABEL: define @llvm_log2_vscale_f64 ; CHECK-SAME: ( [[IN:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: [[TMP1:%.*]] = call fast @llvm.log2.nxv2f64( [[IN]]) +; CHECK-NEXT: [[TMP1:%.*]] = call fast @armpl_svlog2_f64_x( [[IN]], shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer)) ; CHECK-NEXT: ret [[TMP1]] ; %1 = call fast @llvm.log2.nxv2f64( %in) @@ -320,7 +320,7 @@ define @llvm_log2_vscale_f64( %in) #0 define @llvm_log2_vscale_f32( %in) #0 { ; CHECK-LABEL: define @llvm_log2_vscale_f32 ; CHECK-SAME: ( [[IN:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: [[TMP1:%.*]] = call fast @llvm.log2.nxv4f32( [[IN]]) +; CHECK-NEXT: [[TMP1:%.*]] = call fast @armpl_svlog2_f32_x( [[IN]], shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer)) ; CHECK-NEXT: ret [[TMP1]] ; %1 = call fast @llvm.log2.nxv4f32( %in) @@ -355,7 +355,7 @@ define <4 x float> @llvm_log10_f32(<4 x float> %in) { define @llvm_log10_vscale_f64( %in) #0 { ; CHECK-LABEL: define @llvm_log10_vscale_f64 ; CHECK-SAME: ( [[IN:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: [[TMP1:%.*]] = call fast @llvm.log10.nxv2f64( [[IN]]) +; CHECK-NEXT: [[TMP1:%.*]] = call fast @armpl_svlog10_f64_x( [[IN]], shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer)) ; CHECK-NEXT: ret [[TMP1]] ; %1 = call fast @llvm.log10.nxv2f64( %in) @@ -365,7 +365,7 @@ define @llvm_log10_vscale_f64( %in) # define @llvm_log10_vscale_f32( %in) #0 { ; CHECK-LABEL: define @llvm_log10_vscale_f32 ; CHECK-SAME: ( [[IN:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: [[TMP1:%.*]] = call fast @llvm.log10.nxv4f32( [[IN]]) +; CHECK-NEXT: [[TMP1:%.*]] = call fast @armpl_svlog10_f32_x( [[IN]], shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer)) ; CHECK-NEXT: ret [[TMP1]] ; %1 = call fast @llvm.log10.nxv4f32( %in) @@ -380,7 +380,7 @@ declare @llvm.pow.nxv4f32(, @llvm_ceil_vscale_f64( %in) { ; CHECK-LABEL: @llvm_ceil_vscale_f64( ; CHECK-NEXT: [[TMP1:%.*]] = call fast @llvm.ceil.nxv2f64( [[IN:%.*]]) @@ -43,7 +44,7 @@ define @llvm_copysign_vscale_f32( %mag, define @llvm_cos_vscale_f64( %in) { ; CHECK-LABEL: @llvm_cos_vscale_f64( -; CHECK-NEXT: [[TMP1:%.*]] = call fast @llvm.cos.nxv2f64( [[IN:%.*]]) +; CHECK-NEXT: [[TMP1:%.*]] = call fast @_ZGVsMxv_cos( [[IN:%.*]], shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer)) ; CHECK-NEXT: ret [[TMP1]] ; %1 = call fast @llvm.cos.nxv2f64( %in) @@ -52,7 +53,7 @@ define @llvm_cos_vscale_f64( %in) { define @llvm_cos_vscale_f32( %in) { ; CHECK-LABEL: @llvm_cos_vscale_f32( -; CHECK-NEXT: [[TMP1:%.*]] = call fast @llvm.cos.nxv4f32( [[IN:%.*]]) +; CHECK-NEXT: [[TMP1:%.*]] = call fast @_ZGVsMxv_cosf( [[IN:%.*]], shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer)) ; CHECK-NEXT: ret [[TMP1]] ; %1 = call fast @llvm.cos.nxv4f32( %in) @@ -61,7 +62,7 @@ define @llvm_cos_vscale_f32( %in) { define @llvm_exp_vscale_f64( %in) { ; CHECK-LABEL: @llvm_exp_vscale_f64( -; CHECK-NEXT: [[TMP1:%.*]] = call fast @llvm.exp.nxv2f64( [[IN:%.*]]) +; CHECK-NEXT: [[TMP1:%.*]] = call fast @_ZGVsMxv_exp( [[IN:%.*]], shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer)) ; CHECK-NEXT: ret [[TMP1]] ; %1 = call fast @llvm.exp.nxv2f64( %in) @@ -70,7 +71,7 @@ define @llvm_exp_vscale_f64( %in) { define @llvm_exp_vscale_f32( %in) { ; CHECK-LABEL: @llvm_exp_vscale_f32( -; CHECK-NEXT: [[TMP1:%.*]] = call fast @llvm.exp.nxv4f32( [[IN:%.*]]) +; CHECK-NEXT: [[TMP1:%.*]] = call fast @_ZGVsMxv_expf( [[IN:%.*]], shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer)) ; CHECK-NEXT: ret [[TMP1]] ; %1 = call fast @llvm.exp.nxv4f32( %in) @@ -79,7 +80,7 @@ define @llvm_exp_vscale_f32( %in) { define @llvm_exp2_vscale_f64( %in) { ; CHECK-LABEL: @llvm_exp2_vscale_f64( -; CHECK-NEXT: [[TMP1:%.*]] = call fast @llvm.exp2.nxv2f64( [[IN:%.*]]) +; CHECK-NEXT: [[TMP1:%.*]] = call fast @_ZGVsMxv_exp2( [[IN:%.*]], shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer)) ; CHECK-NEXT: ret [[TMP1]] ; %1 = call fast @llvm.exp2.nxv2f64( %in) @@ -88,7 +89,7 @@ define @llvm_exp2_vscale_f64( %in) { define @llvm_exp2_vscale_f32( %in) { ; CHECK-LABEL: @llvm_exp2_vscale_f32( -; CHECK-NEXT: [[TMP1:%.*]] = call fast @llvm.exp2.nxv4f32( [[IN:%.*]]) +; CHECK-NEXT: [[TMP1:%.*]] = call fast @_ZGVsMxv_exp2f( [[IN:%.*]], shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer)) ; CHECK-NEXT: ret [[TMP1]] ; %1 = call fast @llvm.exp2.nxv4f32( %in) @@ -97,7 +98,7 @@ define @llvm_exp2_vscale_f32( %in) { define @llvm_exp10_vscale_f64( %in) { ; CHECK-LABEL: @llvm_exp10_vscale_f64( -; CHECK-NEXT: [[TMP1:%.*]] = call fast @llvm.exp10.nxv2f64( [[IN:%.*]]) +; CHECK-NEXT: [[TMP1:%.*]] = call fast @_ZGVsMxv_exp10( [[IN:%.*]], shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer)) ; CHECK-NEXT: ret [[TMP1]] ; %1 = call fast @llvm.exp10.nxv2f64( %in) @@ -106,7 +107,7 @@ define @llvm_exp10_vscale_f64( %in) { define @llvm_exp10_vscale_f32( %in) { ; CHECK-LABEL: @llvm_exp10_vscale_f32( -; CHECK-NEXT: [[TMP1:%.*]] = call fast @llvm.exp10.nxv4f32( [[IN:%.*]]) +; CHECK-NEXT: [[TMP1:%.*]] = call fast @_ZGVsMxv_exp10f( [[IN:%.*]], shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer)) ; CHECK-NEXT: ret [[TMP1]] ; %1 = call fast @llvm.exp10.nxv4f32( %in) @@ -169,7 +170,7 @@ define @llvm_fma_vscale_f32( %a, @llvm_log_vscale_f64( %in) { ; CHECK-LABEL: @llvm_log_vscale_f64( -; CHECK-NEXT: [[TMP1:%.*]] = call fast @llvm.log.nxv2f64( [[IN:%.*]]) +; CHECK-NEXT: [[TMP1:%.*]] = call fast @_ZGVsMxv_log( [[IN:%.*]], shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer)) ; CHECK-NEXT: ret [[TMP1]] ; %1 = call fast @llvm.log.nxv2f64( %in) @@ -178,7 +179,7 @@ define @llvm_log_vscale_f64( %in) { define @llvm_log_vscale_f32( %in) { ; CHECK-LABEL: @llvm_log_vscale_f32( -; CHECK-NEXT: [[TMP1:%.*]] = call fast @llvm.log.nxv4f32( [[IN:%.*]]) +; CHECK-NEXT: [[TMP1:%.*]] = call fast @_ZGVsMxv_logf( [[IN:%.*]], shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer)) ; CHECK-NEXT: ret [[TMP1]] ; %1 = call fast @llvm.log.nxv4f32( %in) @@ -187,7 +188,7 @@ define @llvm_log_vscale_f32( %in) { define @llvm_log10_vscale_f64( %in) { ; CHECK-LABEL: @llvm_log10_vscale_f64( -; CHECK-NEXT: [[TMP1:%.*]] = call fast @llvm.log10.nxv2f64( [[IN:%.*]]) +; CHECK-NEXT: [[TMP1:%.*]] = call fast @_ZGVsMxv_log10( [[IN:%.*]], shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer)) ; CHECK-NEXT: ret [[TMP1]] ; %1 = call fast @llvm.log10.nxv2f64( %in) @@ -196,7 +197,7 @@ define @llvm_log10_vscale_f64( %in) { define @llvm_log10_vscale_f32( %in) { ; CHECK-LABEL: @llvm_log10_vscale_f32( -; CHECK-NEXT: [[TMP1:%.*]] = call fast @llvm.log10.nxv4f32( [[IN:%.*]]) +; CHECK-NEXT: [[TMP1:%.*]] = call fast @_ZGVsMxv_log10f( [[IN:%.*]], shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer)) ; CHECK-NEXT: ret [[TMP1]] ; %1 = call fast @llvm.log10.nxv4f32( %in) @@ -205,7 +206,7 @@ define @llvm_log10_vscale_f32( %in) { define @llvm_log2_vscale_f64( %in) { ; CHECK-LABEL: @llvm_log2_vscale_f64( -; CHECK-NEXT: [[TMP1:%.*]] = call fast @llvm.log2.nxv2f64( [[IN:%.*]]) +; CHECK-NEXT: [[TMP1:%.*]] = call fast @_ZGVsMxv_log2( [[IN:%.*]], shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer)) ; CHECK-NEXT: ret [[TMP1]] ; %1 = call fast @llvm.log2.nxv2f64( %in) @@ -214,7 +215,7 @@ define @llvm_log2_vscale_f64( %in) { define @llvm_log2_vscale_f32( %in) { ; CHECK-LABEL: @llvm_log2_vscale_f32( -; CHECK-NEXT: [[TMP1:%.*]] = call fast @llvm.log2.nxv4f32( [[IN:%.*]]) +; CHECK-NEXT: [[TMP1:%.*]] = call fast @_ZGVsMxv_log2f( [[IN:%.*]], shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer)) ; CHECK-NEXT: ret [[TMP1]] ; %1 = call fast @llvm.log2.nxv4f32( %in) @@ -331,7 +332,7 @@ define @llvm_round_vscale_f32( %in) { define @llvm_sin_vscale_f64( %in) { ; CHECK-LABEL: @llvm_sin_vscale_f64( -; CHECK-NEXT: [[TMP1:%.*]] = call fast @llvm.sin.nxv2f64( [[IN:%.*]]) +; CHECK-NEXT: [[TMP1:%.*]] = call fast @_ZGVsMxv_sin( [[IN:%.*]], shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer)) ; CHECK-NEXT: ret [[TMP1]] ; %1 = call fast @llvm.sin.nxv2f64( %in) @@ -340,7 +341,7 @@ define @llvm_sin_vscale_f64( %in) { define @llvm_sin_vscale_f32( %in) { ; CHECK-LABEL: @llvm_sin_vscale_f32( -; CHECK-NEXT: [[TMP1:%.*]] = call fast @llvm.sin.nxv4f32( [[IN:%.*]]) +; CHECK-NEXT: [[TMP1:%.*]] = call fast @_ZGVsMxv_sinf( [[IN:%.*]], shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer)) ; CHECK-NEXT: ret [[TMP1]] ; %1 = call fast @llvm.sin.nxv4f32( %in)