diff --git a/clang/include/clang/Basic/AArch64ACLETypes.def b/clang/include/clang/Basic/AArch64ACLETypes.def index 89e8e31d9ce3a..9acfd693288cf 100644 --- a/clang/include/clang/Basic/AArch64ACLETypes.def +++ b/clang/include/clang/Basic/AArch64ACLETypes.def @@ -6,7 +6,13 @@ // //===----------------------------------------------------------------------===// // -// This file defines various SVE builtin types. The macros are: +// This file defines various Neon and SVE builtin types. The macros are: +// +// NEON_VECTOR_TYPE: +// - (Name, BaseType, ElBits, NumEls, VectorKind) +// Unlike the SVE types, the Neon vector types are not builtin types and +// mapped to the equivalent __attribute__(neon_vector_type(...)) vector type. +// They are not builtin types. // // SVE_TYPE: // - (Name, MangledName, Id, SingletonId) @@ -57,6 +63,10 @@ // - IsBF true for vector of brain float elements. //===----------------------------------------------------------------------===// +#ifndef NEON_VECTOR_TYPE +#define NEON_VECTOR_TYPE(Name, BaseType, ElBits, NumEls, VectorKind) +#endif + #ifndef SVE_TYPE #define SVE_TYPE(Name, Id, SingletonId) #endif @@ -111,7 +121,38 @@ SVE_TYPE(Name, Id, SingletonId) #endif -//===- Vector point types -----------------------------------------------===// +//===- Neon Vector point types --------------------------------------------===// + +NEON_VECTOR_TYPE(__Int8x8_t, CharTy, 8, 8, VectorKind::Neon) +NEON_VECTOR_TYPE(__Int16x4_t, ShortTy, 16, 4, VectorKind::Neon) +NEON_VECTOR_TYPE(__Int32x2_t, IntTy, 32, 2, VectorKind::Neon) +NEON_VECTOR_TYPE(__Uint8x8_t, CharTy, 8, 8, VectorKind::Neon) +NEON_VECTOR_TYPE(__Uint16x4_t, UnsignedShortTy, 16, 4, VectorKind::Neon) +NEON_VECTOR_TYPE(__Uint32x2_t, UnsignedIntTy, 32, 2, VectorKind::Neon) +NEON_VECTOR_TYPE(__Float16x4_t, Float16Ty, 16, 4, VectorKind::Neon) +NEON_VECTOR_TYPE(__Float32x2_t, FloatTy, 32, 2, VectorKind::Neon) +NEON_VECTOR_TYPE(__Poly8x8_t, CharTy, 8, 8, VectorKind::NeonPoly) +NEON_VECTOR_TYPE(__Poly16x4_t, UnsignedShortTy, 16, 4, VectorKind::NeonPoly) +NEON_VECTOR_TYPE(__Bfloat16x4_t, BFloat16Ty, 16, 4, VectorKind::Neon) +NEON_VECTOR_TYPE(__Int8x16_t, CharTy, 8, 16, VectorKind::Neon) +NEON_VECTOR_TYPE(__Int16x8_t, ShortTy, 16, 8, VectorKind::Neon) +NEON_VECTOR_TYPE(__Int32x4_t, IntTy, 32, 4, VectorKind::Neon) +NEON_VECTOR_TYPE(__Int64x2_t, LongLongTy, 64, 2, VectorKind::Neon) +NEON_VECTOR_TYPE(__Uint8x16_t, CharTy, 8, 16, VectorKind::Neon) +NEON_VECTOR_TYPE(__Uint16x8_t, UnsignedShortTy, 16, 8, VectorKind::Neon) +NEON_VECTOR_TYPE(__Uint32x4_t, UnsignedIntTy, 32, 4, VectorKind::Neon) +NEON_VECTOR_TYPE(__Uint64x2_t, UnsignedLongLongTy, 64, 2, VectorKind::Neon) +NEON_VECTOR_TYPE(__Float16x8_t, Float16Ty, 16, 8, VectorKind::Neon) +NEON_VECTOR_TYPE(__Float32x4_t, FloatTy, 32, 4, VectorKind::Neon) +NEON_VECTOR_TYPE(__Float64x2_t, DoubleTy, 64, 2, VectorKind::Neon) +NEON_VECTOR_TYPE(__Poly8x16_t, CharTy, 8, 16, VectorKind::NeonPoly) +NEON_VECTOR_TYPE(__Poly16x8_t, UnsignedShortTy, 16, 8, VectorKind::NeonPoly) +NEON_VECTOR_TYPE(__Poly64x2_t, UnsignedLongLongTy, 64, 2, VectorKind::NeonPoly) +NEON_VECTOR_TYPE(__Bfloat16x8_t, BFloat16Ty, 16, 8, VectorKind::Neon) +NEON_VECTOR_TYPE(__Mfloat8x8_t, MFloat8Ty, 8, 8, VectorKind::Neon) +NEON_VECTOR_TYPE(__Mfloat8x16_t, MFloat8Ty, 8, 16, VectorKind::Neon) + +//===- SVE Vector point types ---------------------------------------------===// SVE_VECTOR_TYPE_INT(__SVInt8_t, __SVInt8_t, SveInt8, SveInt8Ty, 16, 8, 1, true) SVE_VECTOR_TYPE_INT(__SVInt16_t, __SVInt16_t, SveInt16, SveInt16Ty, 8, 16, 1, true) @@ -205,6 +246,7 @@ SVE_OPAQUE_TYPE(__SVCount_t, __SVCount_t, SveCount, SveCountTy) SVE_SCALAR_TYPE(__mfp8, __mfp8, MFloat8, MFloat8Ty, 8) +#undef NEON_VECTOR_TYPE #undef SVE_VECTOR_TYPE #undef SVE_VECTOR_TYPE_MFLOAT #undef SVE_VECTOR_TYPE_BFLOAT diff --git a/clang/include/clang/Basic/TargetInfo.h b/clang/include/clang/Basic/TargetInfo.h index 652dc064a7b1c..c3bce6e807f34 100644 --- a/clang/include/clang/Basic/TargetInfo.h +++ b/clang/include/clang/Basic/TargetInfo.h @@ -270,7 +270,7 @@ class TargetInfo : public TransferrableTargetInfo, unsigned HasBuiltinMSVaList : 1; LLVM_PREFERRED_TYPE(bool) - unsigned HasAArch64SVETypes : 1; + unsigned HasAArch64ACLETypes : 1; LLVM_PREFERRED_TYPE(bool) unsigned HasRISCVVTypes : 1; @@ -1055,9 +1055,9 @@ class TargetInfo : public TransferrableTargetInfo, /// available on this target. bool hasBuiltinMSVaList() const { return HasBuiltinMSVaList; } - /// Returns whether or not the AArch64 SVE built-in types are + /// Returns whether or not the AArch64 ACLE built-in types are /// available on this target. - bool hasAArch64SVETypes() const { return HasAArch64SVETypes; } + bool hasAArch64ACLETypes() const { return HasAArch64ACLETypes; } /// Returns whether or not the RISC-V V built-in types are /// available on this target. diff --git a/clang/lib/AST/ASTContext.cpp b/clang/lib/AST/ASTContext.cpp index 04eb44dc9426e..e73975975167a 100644 --- a/clang/lib/AST/ASTContext.cpp +++ b/clang/lib/AST/ASTContext.cpp @@ -1448,10 +1448,10 @@ void ASTContext::InitBuiltinTypes(const TargetInfo &Target, #include "clang/Basic/HLSLIntangibleTypes.def" } - if (Target.hasAArch64SVETypes() || - (AuxTarget && AuxTarget->hasAArch64SVETypes())) { -#define SVE_TYPE(Name, Id, SingletonId) \ - InitBuiltinType(SingletonId, BuiltinType::Id); + if (Target.hasAArch64ACLETypes() || + (AuxTarget && AuxTarget->hasAArch64ACLETypes())) { +#define SVE_TYPE(Name, Id, SingletonId) \ + InitBuiltinType(SingletonId, BuiltinType::Id); #include "clang/Basic/AArch64ACLETypes.def" } @@ -4530,7 +4530,7 @@ QualType ASTContext::getWebAssemblyExternrefType() const { /// type. QualType ASTContext::getScalableVectorType(QualType EltTy, unsigned NumElts, unsigned NumFields) const { - if (Target->hasAArch64SVETypes()) { + if (Target->hasAArch64ACLETypes()) { uint64_t EltTySize = getTypeSize(EltTy); #define SVE_VECTOR_TYPE_INT(Name, MangledName, Id, SingletonId, NumEls, \ diff --git a/clang/lib/Basic/TargetInfo.cpp b/clang/lib/Basic/TargetInfo.cpp index ab13c32f6943e..a82573b5b43f9 100644 --- a/clang/lib/Basic/TargetInfo.cpp +++ b/clang/lib/Basic/TargetInfo.cpp @@ -157,7 +157,7 @@ TargetInfo::TargetInfo(const llvm::Triple &T) : Triple(T) { SSERegParmMax = 0; HasAlignMac68kSupport = false; HasBuiltinMSVaList = false; - HasAArch64SVETypes = false; + HasAArch64ACLETypes = false; HasRISCVVTypes = false; AllowAMDGPUUnsafeFPAtomics = false; HasUnalignedAccess = false; diff --git a/clang/lib/Basic/Targets/AArch64.cpp b/clang/lib/Basic/Targets/AArch64.cpp index 1195c9293f3ea..a29c7240cf4d3 100644 --- a/clang/lib/Basic/Targets/AArch64.cpp +++ b/clang/lib/Basic/Targets/AArch64.cpp @@ -240,15 +240,15 @@ AArch64TargetInfo::AArch64TargetInfo(const llvm::Triple &Triple, // Make __builtin_ms_va_list available. HasBuiltinMSVaList = true; - // Make the SVE types available. Note that this deliberately doesn't - // depend on SveMode, since in principle it should be possible to turn + // Make the Neon ACLE and SVE types available. Note that this deliberately + // doesn't depend on SveMode, since in principle it should be possible to turn // SVE on and off within a translation unit. It should also be possible // to compile the global declaration: // // __SVInt8_t *ptr; // // even without SVE. - HasAArch64SVETypes = true; + HasAArch64ACLETypes = true; // {} in inline assembly are neon specifiers, not assembly variant // specifiers. diff --git a/clang/lib/Sema/Sema.cpp b/clang/lib/Sema/Sema.cpp index f8bfd60f2faad..fc63be037236e 100644 --- a/clang/lib/Sema/Sema.cpp +++ b/clang/lib/Sema/Sema.cpp @@ -515,11 +515,14 @@ void Sema::Initialize() { #include "clang/Basic/OpenCLExtensionTypes.def" } - if (Context.getTargetInfo().hasAArch64SVETypes() || + if (Context.getTargetInfo().hasAArch64ACLETypes() || (Context.getAuxTargetInfo() && - Context.getAuxTargetInfo()->hasAArch64SVETypes())) { + Context.getAuxTargetInfo()->hasAArch64ACLETypes())) { #define SVE_TYPE(Name, Id, SingletonId) \ addImplicitTypedef(#Name, Context.SingletonId); +#define NEON_VECTOR_TYPE(Name, BaseType, ElBits, NumEls, VectorKind) \ + addImplicitTypedef( \ + #Name, Context.getVectorType(Context.BaseType, NumEls, VectorKind)); #include "clang/Basic/AArch64ACLETypes.def" } diff --git a/clang/test/AST/ast-dump-aarch64-neon-types.c b/clang/test/AST/ast-dump-aarch64-neon-types.c new file mode 100644 index 0000000000000..16255cd51c9d8 --- /dev/null +++ b/clang/test/AST/ast-dump-aarch64-neon-types.c @@ -0,0 +1,125 @@ +// Test that NEON types are defined, even when arm_neon.h is not included. +// as required by AAPCS64 "Support for Advanced SIMD Extensions". + +// RUN: %clang_cc1 -ast-dump -triple aarch64-linux-gnu %s -x c | FileCheck %s +// RUN: %clang_cc1 -ast-dump -triple aarch64-linux-gnu %s -x c++ | FileCheck %s +// RUN: %clang_cc1 -verify -verify-ignore-unexpected=note -triple x86_64 %s -x c +// RUN: %clang_cc1 -verify -verify-ignore-unexpected=note -triple x86_64 %s -x c++ +// RUN: %clang_cc1 -verify -verify-ignore-unexpected=note -triple arm-linux-gnu %s -x c +// RUN: %clang_cc1 -verify -verify-ignore-unexpected=note -triple arm-linux-gnu %s -x c++ + +__Int8x8_t Int8x8; +// CHECK: Int8x8 '__Int8x8_t':'__attribute__((neon_vector_type(8))) char' +// expected-error@-2{{unknown type name '__Int8x8_t'}} + +__Int16x4_t Int16x4; +// CHECK: Int16x4 '__Int16x4_t':'__attribute__((neon_vector_type(4))) short' +// expected-error@-2{{unknown type name '__Int16x4_t'}} + +__Int32x2_t Int32x2; +// CHECK: Int32x2 '__Int32x2_t':'__attribute__((neon_vector_type(2))) int' +// expected-error@-2{{unknown type name '__Int32x2_t'}} + +__Uint8x8_t Uint8x8; +// CHECK: Uint8x8 '__Uint8x8_t':'__attribute__((neon_vector_type(8))) char' +// expected-error@-2{{unknown type name '__Uint8x8_t'}} + +__Uint16x4_t Uint16x4; +// CHECK: Uint16x4 '__Uint16x4_t':'__attribute__((neon_vector_type(4))) unsigned short' +// expected-error@-2{{unknown type name '__Uint16x4_t'}} + +__Uint32x2_t Uint32x2; +// CHECK: Uint32x2 '__Uint32x2_t':'__attribute__((neon_vector_type(2))) unsigned int' +// expected-error@-2{{unknown type name '__Uint32x2_t'}} + +__Float16x4_t Float16x4; +// CHECK: Float16x4 '__Float16x4_t':'__attribute__((neon_vector_type(4))) _Float16' +// expected-error@-2{{unknown type name '__Float16x4_t'}} + +__Float32x2_t Float32x2; +// CHECK: Float32x2 '__Float32x2_t':'__attribute__((neon_vector_type(2))) float' +// expected-error@-2{{unknown type name '__Float32x2_t'}} + +__Poly8x8_t Poly8x8; +// CHECK: Poly8x8 '__Poly8x8_t':'__attribute__((neon_polyvector_type(8))) char' +// expected-error@-2{{unknown type name '__Poly8x8_t'}} + +__Poly16x4_t Poly16x4; +// CHECK: Poly16x4 '__Poly16x4_t':'__attribute__((neon_polyvector_type(4))) unsigned short' +// expected-error@-2{{unknown type name '__Poly16x4_t'}} + +__Bfloat16x4_t Bfloat16x4; +// CHECK: Bfloat16x4 '__Bfloat16x4_t':'__attribute__((neon_vector_type(4))) __bf16' +// expected-error@-2{{unknown type name '__Bfloat16x4_t'}} + +__Int8x16_t Int8x16; +// CHECK: Int8x16 '__Int8x16_t':'__attribute__((neon_vector_type(16))) char' +// expected-error@-2{{unknown type name '__Int8x16_t'}} + +__Int16x8_t Int16x8; +// CHECK: Int16x8 '__Int16x8_t':'__attribute__((neon_vector_type(8))) short' +// expected-error@-2{{unknown type name '__Int16x8_t'}} + +__Int32x4_t Int32x4; +// CHECK: Int32x4 '__Int32x4_t':'__attribute__((neon_vector_type(4))) int' +// expected-error@-2{{unknown type name '__Int32x4_t'}} + +__Int64x2_t Int64x2; +// CHECK: Int64x2 '__Int64x2_t':'__attribute__((neon_vector_type(2))) long long' +// expected-error@-2{{unknown type name '__Int64x2_t'}} + +__Uint8x16_t Uint8x16; +// CHECK: Uint8x16 '__Uint8x16_t':'__attribute__((neon_vector_type(16))) char' +// expected-error@-2{{unknown type name '__Uint8x16_t'}} + +__Uint16x8_t Uint16x8; +// CHECK: Uint16x8 '__Uint16x8_t':'__attribute__((neon_vector_type(8))) unsigned short' +// expected-error@-2{{unknown type name '__Uint16x8_t'}} + +__Uint32x4_t Uint32x4; +// CHECK: Uint32x4 '__Uint32x4_t':'__attribute__((neon_vector_type(4))) unsigned int' +// expected-error@-2{{unknown type name '__Uint32x4_t'}} + +__Uint64x2_t Uint64x2; +// CHECK: Uint64x2 '__Uint64x2_t':'__attribute__((neon_vector_type(2))) unsigned long long' +// expected-error@-2{{unknown type name '__Uint64x2_t'}} + +__Float16x8_t Float16x8; +// CHECK: Float16x8 '__Float16x8_t':'__attribute__((neon_vector_type(8))) _Float16' +// expected-error@-2{{unknown type name '__Float16x8_t'}} + +__Float32x4_t Float32x4; +// CHECK: Float32x4 '__Float32x4_t':'__attribute__((neon_vector_type(4))) float' +// expected-error@-2{{unknown type name '__Float32x4_t'}} + +__Float64x2_t Float64x2; +// CHECK: Float64x2 '__Float64x2_t':'__attribute__((neon_vector_type(2))) double' +// expected-error@-2{{unknown type name '__Float64x2_t'}} + +__Poly8x16_t Poly8x16; +// CHECK: Poly8x16 '__Poly8x16_t':'__attribute__((neon_polyvector_type(16))) char' +// expected-error@-2{{unknown type name '__Poly8x16_t'}} + +__Poly16x8_t Poly16x8; +// CHECK: Poly16x8 '__Poly16x8_t':'__attribute__((neon_polyvector_type(8))) unsigned short' +// expected-error@-2{{unknown type name '__Poly16x8_t'}} + +__Poly64x2_t Poly64x2; +// CHECK: Poly64x2 '__Poly64x2_t':'__attribute__((neon_polyvector_type(2))) unsigned long long' +// expected-error@-2{{unknown type name '__Poly64x2_t'}} + +__Bfloat16x8_t Bfloat16x8; +// CHECK: Bfloat16x8 '__Bfloat16x8_t':'__attribute__((neon_vector_type(8))) __bf16' +// expected-error@-2{{unknown type name '__Bfloat16x8_t'}} + +__mfp8 mfp8; +// CHECK: mfp8 '__mfp8' +// expected-error@-2{{unknown type name '__mfp8'}} + +__Mfloat8x8_t Mfloat8x8; +// CHECK: Mfloat8x8 '__Mfloat8x8_t':'__attribute__((neon_vector_type(8))) __mfp8' +// expected-error@-2{{unknown type name '__Mfloat8x8_t'}} + +__Mfloat8x16_t Mfloat8x16; +// CHECK: Mfloat8x16 '__Mfloat8x16_t':'__attribute__((neon_vector_type(16))) __mfp8' +// expected-error@-2{{unknown type name '__Mfloat8x16_t'}} diff --git a/clang/test/CodeGen/AArch64/mixed-neon-types.c b/clang/test/CodeGen/AArch64/mixed-neon-types.c new file mode 100644 index 0000000000000..52c30eb4fa657 --- /dev/null +++ b/clang/test/CodeGen/AArch64/mixed-neon-types.c @@ -0,0 +1,73 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 5 +// RUN: %clang_cc1 -triple aarch64-linux-gnu -target-feature +neon -x c %s -emit-llvm -o - | FileCheck %s --check-prefix=CHECK-C +// RUN: %clang_cc1 -triple aarch64-linux-gnu -target-feature +neon -x c++ %s -emit-llvm -o - | FileCheck %s --check-prefix=CHECK-CPP + +typedef __Uint32x4_t X; + +// CHECK-C-LABEL: define dso_local <4 x i32> @test( +// CHECK-C-SAME: <4 x i32> noundef [[X:%.*]]) #[[ATTR0:[0-9]+]] { +// CHECK-C-NEXT: [[ENTRY:.*:]] +// CHECK-C-NEXT: [[X_ADDR:%.*]] = alloca <4 x i32>, align 16 +// CHECK-C-NEXT: store <4 x i32> [[X]], ptr [[X_ADDR]], align 16 +// CHECK-C-NEXT: [[TMP0:%.*]] = load <4 x i32>, ptr [[X_ADDR]], align 16 +// CHECK-C-NEXT: ret <4 x i32> [[TMP0]] +// +// CHECK-CPP-LABEL: define dso_local noundef <4 x i32> @_Z4test12__Uint32x4_t( +// CHECK-CPP-SAME: <4 x i32> noundef [[X:%.*]]) #[[ATTR0:[0-9]+]] { +// CHECK-CPP-NEXT: [[ENTRY:.*:]] +// CHECK-CPP-NEXT: [[X_ADDR:%.*]] = alloca <4 x i32>, align 16 +// CHECK-CPP-NEXT: store <4 x i32> [[X]], ptr [[X_ADDR]], align 16 +// CHECK-CPP-NEXT: [[TMP0:%.*]] = load <4 x i32>, ptr [[X_ADDR]], align 16 +// CHECK-CPP-NEXT: ret <4 x i32> [[TMP0]] +// +X test(X x) { + return x; +} + +#include + +// CHECK-C-LABEL: define dso_local <16 x i8> @testboth( +// CHECK-C-SAME: <4 x i32> noundef [[X:%.*]]) #[[ATTR0]] { +// CHECK-C-NEXT: [[ENTRY:.*:]] +// CHECK-C-NEXT: [[__P0_ADDR_I:%.*]] = alloca <16 x i8>, align 16 +// CHECK-C-NEXT: [[__P1_ADDR_I:%.*]] = alloca <16 x i8>, align 16 +// CHECK-C-NEXT: [[__RET_I:%.*]] = alloca <16 x i8>, align 16 +// CHECK-C-NEXT: [[X_ADDR:%.*]] = alloca <4 x i32>, align 16 +// CHECK-C-NEXT: store <4 x i32> [[X]], ptr [[X_ADDR]], align 16 +// CHECK-C-NEXT: [[TMP0:%.*]] = load <4 x i32>, ptr [[X_ADDR]], align 16 +// CHECK-C-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[TMP0]] to <16 x i8> +// CHECK-C-NEXT: [[TMP2:%.*]] = load <4 x i32>, ptr [[X_ADDR]], align 16 +// CHECK-C-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to <16 x i8> +// CHECK-C-NEXT: store <16 x i8> [[TMP1]], ptr [[__P0_ADDR_I]], align 16 +// CHECK-C-NEXT: store <16 x i8> [[TMP3]], ptr [[__P1_ADDR_I]], align 16 +// CHECK-C-NEXT: [[TMP4:%.*]] = load <16 x i8>, ptr [[__P0_ADDR_I]], align 16 +// CHECK-C-NEXT: [[TMP5:%.*]] = load <16 x i8>, ptr [[__P1_ADDR_I]], align 16 +// CHECK-C-NEXT: [[ADD_I:%.*]] = add <16 x i8> [[TMP4]], [[TMP5]] +// CHECK-C-NEXT: store <16 x i8> [[ADD_I]], ptr [[__RET_I]], align 16 +// CHECK-C-NEXT: [[TMP6:%.*]] = load <16 x i8>, ptr [[__RET_I]], align 16 +// CHECK-C-NEXT: ret <16 x i8> [[TMP6]] +// +// CHECK-CPP-LABEL: define dso_local noundef <16 x i8> @_Z8testboth12__Uint32x4_t( +// CHECK-CPP-SAME: <4 x i32> noundef [[X:%.*]]) #[[ATTR0]] { +// CHECK-CPP-NEXT: [[ENTRY:.*:]] +// CHECK-CPP-NEXT: [[__P0_ADDR_I:%.*]] = alloca <16 x i8>, align 16 +// CHECK-CPP-NEXT: [[__P1_ADDR_I:%.*]] = alloca <16 x i8>, align 16 +// CHECK-CPP-NEXT: [[__RET_I:%.*]] = alloca <16 x i8>, align 16 +// CHECK-CPP-NEXT: [[X_ADDR:%.*]] = alloca <4 x i32>, align 16 +// CHECK-CPP-NEXT: store <4 x i32> [[X]], ptr [[X_ADDR]], align 16 +// CHECK-CPP-NEXT: [[TMP0:%.*]] = load <4 x i32>, ptr [[X_ADDR]], align 16 +// CHECK-CPP-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[TMP0]] to <16 x i8> +// CHECK-CPP-NEXT: [[TMP2:%.*]] = load <4 x i32>, ptr [[X_ADDR]], align 16 +// CHECK-CPP-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to <16 x i8> +// CHECK-CPP-NEXT: store <16 x i8> [[TMP1]], ptr [[__P0_ADDR_I]], align 16 +// CHECK-CPP-NEXT: store <16 x i8> [[TMP3]], ptr [[__P1_ADDR_I]], align 16 +// CHECK-CPP-NEXT: [[TMP4:%.*]] = load <16 x i8>, ptr [[__P0_ADDR_I]], align 16 +// CHECK-CPP-NEXT: [[TMP5:%.*]] = load <16 x i8>, ptr [[__P1_ADDR_I]], align 16 +// CHECK-CPP-NEXT: [[ADD_I:%.*]] = add <16 x i8> [[TMP4]], [[TMP5]] +// CHECK-CPP-NEXT: store <16 x i8> [[ADD_I]], ptr [[__RET_I]], align 16 +// CHECK-CPP-NEXT: [[TMP6:%.*]] = load <16 x i8>, ptr [[__RET_I]], align 16 +// CHECK-CPP-NEXT: ret <16 x i8> [[TMP6]] +// +int8x16_t testboth(X x) { + return vaddq_u8(x, x); +}