diff --git a/clang/lib/CodeGen/Targets/AMDGPU.cpp b/clang/lib/CodeGen/Targets/AMDGPU.cpp index d1ff8b4b62f15..057f6ef40c513 100644 --- a/clang/lib/CodeGen/Targets/AMDGPU.cpp +++ b/clang/lib/CodeGen/Targets/AMDGPU.cpp @@ -120,7 +120,11 @@ void AMDGPUABIInfo::computeInfo(CGFunctionInfo &FI) const { Address AMDGPUABIInfo::EmitVAArg(CodeGenFunction &CGF, Address VAListAddr, QualType Ty) const { - llvm_unreachable("AMDGPU does not support varargs"); + const bool IsIndirect = false; + const bool AllowHigherAlign = false; + return emitVoidPtrVAArg(CGF, VAListAddr, Ty, IsIndirect, + getContext().getTypeInfoInChars(Ty), + CharUnits::fromQuantity(4), AllowHigherAlign); } ABIArgInfo AMDGPUABIInfo::classifyReturnType(QualType RetTy) const { diff --git a/clang/test/CodeGen/voidptr-vaarg.c b/clang/test/CodeGen/voidptr-vaarg.c new file mode 100644 index 0000000000000..d023ddf0fb5d2 --- /dev/null +++ b/clang/test/CodeGen/voidptr-vaarg.c @@ -0,0 +1,478 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py +// REQUIRES: webassembly-registered-target +// RUN: %clang_cc1 -triple wasm32-unknown-unknown -emit-llvm -o - %s | FileCheck %s + +// Multiple targets use emitVoidPtrVAArg to lower va_arg instructions in clang +// PPC is complicated, excluding from this case analysis +// ForceRightAdjust is false for all non-PPC targets +// AllowHigherAlign is only false for two Microsoft targets, both of which +// pass most things by reference. +// +// Address emitVoidPtrVAArg(CodeGenFunction &CGF, Address VAListAddr, +// QualType ValueTy, bool IsIndirect, +// TypeInfoChars ValueInfo, CharUnits SlotSizeAndAlign, +// bool AllowHigherAlign, bool ForceRightAdjust = +// false); +// +// Target IsIndirect SlotSize AllowHigher ForceRightAdjust +// ARC false four true false +// ARM varies four true false +// Mips false 4 or 8 true false +// RISCV varies register true false +// PPC elided +// LoongArch varies register true false +// NVPTX WIP +// AMDGPU WIP +// X86_32 false four true false +// X86_64 MS varies eight false false +// CSKY false four true false +// Webassembly varies four true false +// AArch64 false eight true false +// AArch64 MS false eight false false +// +// Webassembly passes indirectly iff it's an aggregate of multiple values +// Choosing this as a representative architecture to check IR generation +// partly because it has a relatively simple variadic calling convention. + +// Int, by itself and packed in structs +// CHECK-LABEL: @raw_int( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[LIST_ADDR:%.*]] = alloca ptr, align 4 +// CHECK-NEXT: store ptr [[LIST:%.*]], ptr [[LIST_ADDR]], align 4 +// CHECK-NEXT: [[ARGP_CUR:%.*]] = load ptr, ptr [[LIST_ADDR]], align 4 +// CHECK-NEXT: [[ARGP_NEXT:%.*]] = getelementptr inbounds i8, ptr [[ARGP_CUR]], i32 4 +// CHECK-NEXT: store ptr [[ARGP_NEXT]], ptr [[LIST_ADDR]], align 4 +// CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[ARGP_CUR]], align 4 +// CHECK-NEXT: ret i32 [[TMP0]] +// +int raw_int(__builtin_va_list list) { return __builtin_va_arg(list, int); } + +typedef struct { + int x; +} one_int_t; + +// CHECK-LABEL: @one_int( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[RETVAL:%.*]] = alloca [[STRUCT_ONE_INT_T:%.*]], align 4 +// CHECK-NEXT: [[LIST_ADDR:%.*]] = alloca ptr, align 4 +// CHECK-NEXT: store ptr [[LIST:%.*]], ptr [[LIST_ADDR]], align 4 +// CHECK-NEXT: [[ARGP_CUR:%.*]] = load ptr, ptr [[LIST_ADDR]], align 4 +// CHECK-NEXT: [[ARGP_NEXT:%.*]] = getelementptr inbounds i8, ptr [[ARGP_CUR]], i32 4 +// CHECK-NEXT: store ptr [[ARGP_NEXT]], ptr [[LIST_ADDR]], align 4 +// CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[RETVAL]], ptr align 4 [[ARGP_CUR]], i32 4, i1 false) +// CHECK-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds [[STRUCT_ONE_INT_T]], ptr [[RETVAL]], i32 0, i32 0 +// CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[COERCE_DIVE]], align 4 +// CHECK-NEXT: ret i32 [[TMP0]] +// +one_int_t one_int(__builtin_va_list list) { + return __builtin_va_arg(list, one_int_t); +} + +typedef struct { + int x; + int y; +} two_int_t; + +// CHECK-LABEL: @two_int( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[LIST_ADDR:%.*]] = alloca ptr, align 4 +// CHECK-NEXT: store ptr [[LIST:%.*]], ptr [[LIST_ADDR]], align 4 +// CHECK-NEXT: [[ARGP_CUR:%.*]] = load ptr, ptr [[LIST_ADDR]], align 4 +// CHECK-NEXT: [[ARGP_NEXT:%.*]] = getelementptr inbounds i8, ptr [[ARGP_CUR]], i32 4 +// CHECK-NEXT: store ptr [[ARGP_NEXT]], ptr [[LIST_ADDR]], align 4 +// CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[ARGP_CUR]], align 4 +// CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[AGG_RESULT:%.*]], ptr align 4 [[TMP0]], i32 8, i1 false) +// CHECK-NEXT: ret void +// +two_int_t two_int(__builtin_va_list list) { + return __builtin_va_arg(list, two_int_t); +} + +// Double, by itself and packed in structs +// CHECK-LABEL: @raw_double( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[LIST_ADDR:%.*]] = alloca ptr, align 4 +// CHECK-NEXT: store ptr [[LIST:%.*]], ptr [[LIST_ADDR]], align 4 +// CHECK-NEXT: [[ARGP_CUR:%.*]] = load ptr, ptr [[LIST_ADDR]], align 4 +// CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds i8, ptr [[ARGP_CUR]], i32 7 +// CHECK-NEXT: [[ARGP_CUR_ALIGNED:%.*]] = call ptr @llvm.ptrmask.p0.i32(ptr [[TMP0]], i32 -8) +// CHECK-NEXT: [[ARGP_NEXT:%.*]] = getelementptr inbounds i8, ptr [[ARGP_CUR_ALIGNED]], i32 8 +// CHECK-NEXT: store ptr [[ARGP_NEXT]], ptr [[LIST_ADDR]], align 4 +// CHECK-NEXT: [[TMP1:%.*]] = load double, ptr [[ARGP_CUR_ALIGNED]], align 8 +// CHECK-NEXT: ret double [[TMP1]] +// +double raw_double(__builtin_va_list list) { + return __builtin_va_arg(list, double); +} + +typedef struct { + double x; +} one_double_t; + +// CHECK-LABEL: @one_double( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[RETVAL:%.*]] = alloca [[STRUCT_ONE_DOUBLE_T:%.*]], align 8 +// CHECK-NEXT: [[LIST_ADDR:%.*]] = alloca ptr, align 4 +// CHECK-NEXT: store ptr [[LIST:%.*]], ptr [[LIST_ADDR]], align 4 +// CHECK-NEXT: [[ARGP_CUR:%.*]] = load ptr, ptr [[LIST_ADDR]], align 4 +// CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds i8, ptr [[ARGP_CUR]], i32 7 +// CHECK-NEXT: [[ARGP_CUR_ALIGNED:%.*]] = call ptr @llvm.ptrmask.p0.i32(ptr [[TMP0]], i32 -8) +// CHECK-NEXT: [[ARGP_NEXT:%.*]] = getelementptr inbounds i8, ptr [[ARGP_CUR_ALIGNED]], i32 8 +// CHECK-NEXT: store ptr [[ARGP_NEXT]], ptr [[LIST_ADDR]], align 4 +// CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 8 [[RETVAL]], ptr align 8 [[ARGP_CUR_ALIGNED]], i32 8, i1 false) +// CHECK-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds [[STRUCT_ONE_DOUBLE_T]], ptr [[RETVAL]], i32 0, i32 0 +// CHECK-NEXT: [[TMP1:%.*]] = load double, ptr [[COERCE_DIVE]], align 8 +// CHECK-NEXT: ret double [[TMP1]] +// +one_double_t one_double(__builtin_va_list list) { + return __builtin_va_arg(list, one_double_t); +} + +typedef struct { + double x; + double y; +} two_double_t; + +// CHECK-LABEL: @two_double( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[LIST_ADDR:%.*]] = alloca ptr, align 4 +// CHECK-NEXT: store ptr [[LIST:%.*]], ptr [[LIST_ADDR]], align 4 +// CHECK-NEXT: [[ARGP_CUR:%.*]] = load ptr, ptr [[LIST_ADDR]], align 4 +// CHECK-NEXT: [[ARGP_NEXT:%.*]] = getelementptr inbounds i8, ptr [[ARGP_CUR]], i32 4 +// CHECK-NEXT: store ptr [[ARGP_NEXT]], ptr [[LIST_ADDR]], align 4 +// CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[ARGP_CUR]], align 4 +// CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 8 [[AGG_RESULT:%.*]], ptr align 8 [[TMP0]], i32 16, i1 false) +// CHECK-NEXT: ret void +// +two_double_t two_double(__builtin_va_list list) { + return __builtin_va_arg(list, two_double_t); +} + +// Scalar smaller than the slot size (C would promote a short to int) +typedef struct { + char x; +} one_char_t; + +// CHECK-LABEL: @one_char( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[RETVAL:%.*]] = alloca [[STRUCT_ONE_CHAR_T:%.*]], align 1 +// CHECK-NEXT: [[LIST_ADDR:%.*]] = alloca ptr, align 4 +// CHECK-NEXT: store ptr [[LIST:%.*]], ptr [[LIST_ADDR]], align 4 +// CHECK-NEXT: [[ARGP_CUR:%.*]] = load ptr, ptr [[LIST_ADDR]], align 4 +// CHECK-NEXT: [[ARGP_NEXT:%.*]] = getelementptr inbounds i8, ptr [[ARGP_CUR]], i32 4 +// CHECK-NEXT: store ptr [[ARGP_NEXT]], ptr [[LIST_ADDR]], align 4 +// CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 1 [[RETVAL]], ptr align 4 [[ARGP_CUR]], i32 1, i1 false) +// CHECK-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds [[STRUCT_ONE_CHAR_T]], ptr [[RETVAL]], i32 0, i32 0 +// CHECK-NEXT: [[TMP0:%.*]] = load i8, ptr [[COERCE_DIVE]], align 1 +// CHECK-NEXT: ret i8 [[TMP0]] +// +one_char_t one_char(__builtin_va_list list) { + return __builtin_va_arg(list, one_char_t); +} + +typedef struct { + short x; +} one_short_t; + +// CHECK-LABEL: @one_short( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[RETVAL:%.*]] = alloca [[STRUCT_ONE_SHORT_T:%.*]], align 2 +// CHECK-NEXT: [[LIST_ADDR:%.*]] = alloca ptr, align 4 +// CHECK-NEXT: store ptr [[LIST:%.*]], ptr [[LIST_ADDR]], align 4 +// CHECK-NEXT: [[ARGP_CUR:%.*]] = load ptr, ptr [[LIST_ADDR]], align 4 +// CHECK-NEXT: [[ARGP_NEXT:%.*]] = getelementptr inbounds i8, ptr [[ARGP_CUR]], i32 4 +// CHECK-NEXT: store ptr [[ARGP_NEXT]], ptr [[LIST_ADDR]], align 4 +// CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 2 [[RETVAL]], ptr align 4 [[ARGP_CUR]], i32 2, i1 false) +// CHECK-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds [[STRUCT_ONE_SHORT_T]], ptr [[RETVAL]], i32 0, i32 0 +// CHECK-NEXT: [[TMP0:%.*]] = load i16, ptr [[COERCE_DIVE]], align 2 +// CHECK-NEXT: ret i16 [[TMP0]] +// +one_short_t one_short(__builtin_va_list list) { + return __builtin_va_arg(list, one_short_t); +} + +// Composite smaller than the slot size +typedef struct { + _Alignas(2) char x; + char y; +} char_pair_t; + +// CHECK-LABEL: @char_pair( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[LIST_ADDR:%.*]] = alloca ptr, align 4 +// CHECK-NEXT: store ptr [[LIST:%.*]], ptr [[LIST_ADDR]], align 4 +// CHECK-NEXT: [[ARGP_CUR:%.*]] = load ptr, ptr [[LIST_ADDR]], align 4 +// CHECK-NEXT: [[ARGP_NEXT:%.*]] = getelementptr inbounds i8, ptr [[ARGP_CUR]], i32 4 +// CHECK-NEXT: store ptr [[ARGP_NEXT]], ptr [[LIST_ADDR]], align 4 +// CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[ARGP_CUR]], align 4 +// CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 2 [[AGG_RESULT:%.*]], ptr align 2 [[TMP0]], i32 2, i1 false) +// CHECK-NEXT: ret void +// +char_pair_t char_pair(__builtin_va_list list) { + return __builtin_va_arg(list, char_pair_t); +} + +// Empty struct +typedef struct { +} empty_t; + +// CHECK-LABEL: @empty( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[RETVAL:%.*]] = alloca [[STRUCT_EMPTY_T:%.*]], align 1 +// CHECK-NEXT: [[LIST_ADDR:%.*]] = alloca ptr, align 4 +// CHECK-NEXT: store ptr [[LIST:%.*]], ptr [[LIST_ADDR]], align 4 +// CHECK-NEXT: [[ARGP_CUR:%.*]] = load ptr, ptr [[LIST_ADDR]], align 4 +// CHECK-NEXT: [[ARGP_NEXT:%.*]] = getelementptr inbounds i8, ptr [[ARGP_CUR]], i32 0 +// CHECK-NEXT: store ptr [[ARGP_NEXT]], ptr [[LIST_ADDR]], align 4 +// CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 1 [[RETVAL]], ptr align 4 [[ARGP_CUR]], i32 0, i1 false) +// CHECK-NEXT: ret void +// +empty_t empty(__builtin_va_list list) { + return __builtin_va_arg(list, empty_t); +} + +typedef struct { + empty_t x; + int y; +} empty_int_t; + +// CHECK-LABEL: @empty_int( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[RETVAL:%.*]] = alloca [[STRUCT_EMPTY_INT_T:%.*]], align 4 +// CHECK-NEXT: [[LIST_ADDR:%.*]] = alloca ptr, align 4 +// CHECK-NEXT: store ptr [[LIST:%.*]], ptr [[LIST_ADDR]], align 4 +// CHECK-NEXT: [[ARGP_CUR:%.*]] = load ptr, ptr [[LIST_ADDR]], align 4 +// CHECK-NEXT: [[ARGP_NEXT:%.*]] = getelementptr inbounds i8, ptr [[ARGP_CUR]], i32 4 +// CHECK-NEXT: store ptr [[ARGP_NEXT]], ptr [[LIST_ADDR]], align 4 +// CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[RETVAL]], ptr align 4 [[ARGP_CUR]], i32 4, i1 false) +// CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[RETVAL]], align 4 +// CHECK-NEXT: ret i32 [[TMP0]] +// +empty_int_t empty_int(__builtin_va_list list) { + return __builtin_va_arg(list, empty_int_t); +} + +typedef struct { + int x; + empty_t y; +} int_empty_t; + +// CHECK-LABEL: @int_empty( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[RETVAL:%.*]] = alloca [[STRUCT_INT_EMPTY_T:%.*]], align 4 +// CHECK-NEXT: [[LIST_ADDR:%.*]] = alloca ptr, align 4 +// CHECK-NEXT: store ptr [[LIST:%.*]], ptr [[LIST_ADDR]], align 4 +// CHECK-NEXT: [[ARGP_CUR:%.*]] = load ptr, ptr [[LIST_ADDR]], align 4 +// CHECK-NEXT: [[ARGP_NEXT:%.*]] = getelementptr inbounds i8, ptr [[ARGP_CUR]], i32 4 +// CHECK-NEXT: store ptr [[ARGP_NEXT]], ptr [[LIST_ADDR]], align 4 +// CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[RETVAL]], ptr align 4 [[ARGP_CUR]], i32 4, i1 false) +// CHECK-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds [[STRUCT_INT_EMPTY_T]], ptr [[RETVAL]], i32 0, i32 0 +// CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[COERCE_DIVE]], align 4 +// CHECK-NEXT: ret i32 [[TMP0]] +// +int_empty_t int_empty(__builtin_va_list list) { + return __builtin_va_arg(list, int_empty_t); +} + +// Need multiple va_arg instructions to check the postincrement +// Using types that are passed directly as the indirect handling +// is independent of the alignment handling in emitVoidPtrDirectVAArg. + +// CHECK-LABEL: @multiple_int( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[LIST_ADDR:%.*]] = alloca ptr, align 4 +// CHECK-NEXT: [[OUT0_ADDR:%.*]] = alloca ptr, align 4 +// CHECK-NEXT: [[OUT1_ADDR:%.*]] = alloca ptr, align 4 +// CHECK-NEXT: [[OUT2_ADDR:%.*]] = alloca ptr, align 4 +// CHECK-NEXT: store ptr [[LIST:%.*]], ptr [[LIST_ADDR]], align 4 +// CHECK-NEXT: store ptr [[OUT0:%.*]], ptr [[OUT0_ADDR]], align 4 +// CHECK-NEXT: store ptr [[OUT1:%.*]], ptr [[OUT1_ADDR]], align 4 +// CHECK-NEXT: store ptr [[OUT2:%.*]], ptr [[OUT2_ADDR]], align 4 +// CHECK-NEXT: [[ARGP_CUR:%.*]] = load ptr, ptr [[LIST_ADDR]], align 4 +// CHECK-NEXT: [[ARGP_NEXT:%.*]] = getelementptr inbounds i8, ptr [[ARGP_CUR]], i32 4 +// CHECK-NEXT: store ptr [[ARGP_NEXT]], ptr [[LIST_ADDR]], align 4 +// CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[ARGP_CUR]], align 4 +// CHECK-NEXT: [[TMP1:%.*]] = load ptr, ptr [[OUT0_ADDR]], align 4 +// CHECK-NEXT: store i32 [[TMP0]], ptr [[TMP1]], align 4 +// CHECK-NEXT: [[ARGP_CUR1:%.*]] = load ptr, ptr [[LIST_ADDR]], align 4 +// CHECK-NEXT: [[ARGP_NEXT2:%.*]] = getelementptr inbounds i8, ptr [[ARGP_CUR1]], i32 4 +// CHECK-NEXT: store ptr [[ARGP_NEXT2]], ptr [[LIST_ADDR]], align 4 +// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[ARGP_CUR1]], align 4 +// CHECK-NEXT: [[TMP3:%.*]] = load ptr, ptr [[OUT1_ADDR]], align 4 +// CHECK-NEXT: store i32 [[TMP2]], ptr [[TMP3]], align 4 +// CHECK-NEXT: [[ARGP_CUR3:%.*]] = load ptr, ptr [[LIST_ADDR]], align 4 +// CHECK-NEXT: [[ARGP_NEXT4:%.*]] = getelementptr inbounds i8, ptr [[ARGP_CUR3]], i32 4 +// CHECK-NEXT: store ptr [[ARGP_NEXT4]], ptr [[LIST_ADDR]], align 4 +// CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr [[ARGP_CUR3]], align 4 +// CHECK-NEXT: [[TMP5:%.*]] = load ptr, ptr [[OUT2_ADDR]], align 4 +// CHECK-NEXT: store i32 [[TMP4]], ptr [[TMP5]], align 4 +// CHECK-NEXT: ret void +// +void multiple_int(__builtin_va_list list, int *out0, int *out1, int *out2) { + *out0 = __builtin_va_arg(list, int); + *out1 = __builtin_va_arg(list, int); + *out2 = __builtin_va_arg(list, int); +} + +// Scalars in structs are an easy way of specifying alignment from C +// CHECK-LABEL: @increasing_alignment( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[LIST_ADDR:%.*]] = alloca ptr, align 4 +// CHECK-NEXT: [[OUT0_ADDR:%.*]] = alloca ptr, align 4 +// CHECK-NEXT: [[OUT1_ADDR:%.*]] = alloca ptr, align 4 +// CHECK-NEXT: [[OUT2_ADDR:%.*]] = alloca ptr, align 4 +// CHECK-NEXT: [[OUT3_ADDR:%.*]] = alloca ptr, align 4 +// CHECK-NEXT: store ptr [[LIST:%.*]], ptr [[LIST_ADDR]], align 4 +// CHECK-NEXT: store ptr [[OUT0:%.*]], ptr [[OUT0_ADDR]], align 4 +// CHECK-NEXT: store ptr [[OUT1:%.*]], ptr [[OUT1_ADDR]], align 4 +// CHECK-NEXT: store ptr [[OUT2:%.*]], ptr [[OUT2_ADDR]], align 4 +// CHECK-NEXT: store ptr [[OUT3:%.*]], ptr [[OUT3_ADDR]], align 4 +// CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[OUT0_ADDR]], align 4 +// CHECK-NEXT: [[ARGP_CUR:%.*]] = load ptr, ptr [[LIST_ADDR]], align 4 +// CHECK-NEXT: [[ARGP_NEXT:%.*]] = getelementptr inbounds i8, ptr [[ARGP_CUR]], i32 4 +// CHECK-NEXT: store ptr [[ARGP_NEXT]], ptr [[LIST_ADDR]], align 4 +// CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 1 [[TMP0]], ptr align 4 [[ARGP_CUR]], i32 1, i1 false) +// CHECK-NEXT: [[TMP1:%.*]] = load ptr, ptr [[OUT1_ADDR]], align 4 +// CHECK-NEXT: [[ARGP_CUR1:%.*]] = load ptr, ptr [[LIST_ADDR]], align 4 +// CHECK-NEXT: [[ARGP_NEXT2:%.*]] = getelementptr inbounds i8, ptr [[ARGP_CUR1]], i32 4 +// CHECK-NEXT: store ptr [[ARGP_NEXT2]], ptr [[LIST_ADDR]], align 4 +// CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 2 [[TMP1]], ptr align 4 [[ARGP_CUR1]], i32 2, i1 false) +// CHECK-NEXT: [[ARGP_CUR3:%.*]] = load ptr, ptr [[LIST_ADDR]], align 4 +// CHECK-NEXT: [[ARGP_NEXT4:%.*]] = getelementptr inbounds i8, ptr [[ARGP_CUR3]], i32 4 +// CHECK-NEXT: store ptr [[ARGP_NEXT4]], ptr [[LIST_ADDR]], align 4 +// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[ARGP_CUR3]], align 4 +// CHECK-NEXT: [[TMP3:%.*]] = load ptr, ptr [[OUT2_ADDR]], align 4 +// CHECK-NEXT: store i32 [[TMP2]], ptr [[TMP3]], align 4 +// CHECK-NEXT: [[ARGP_CUR5:%.*]] = load ptr, ptr [[LIST_ADDR]], align 4 +// CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i8, ptr [[ARGP_CUR5]], i32 7 +// CHECK-NEXT: [[ARGP_CUR5_ALIGNED:%.*]] = call ptr @llvm.ptrmask.p0.i32(ptr [[TMP4]], i32 -8) +// CHECK-NEXT: [[ARGP_NEXT6:%.*]] = getelementptr inbounds i8, ptr [[ARGP_CUR5_ALIGNED]], i32 8 +// CHECK-NEXT: store ptr [[ARGP_NEXT6]], ptr [[LIST_ADDR]], align 4 +// CHECK-NEXT: [[TMP5:%.*]] = load double, ptr [[ARGP_CUR5_ALIGNED]], align 8 +// CHECK-NEXT: [[TMP6:%.*]] = load ptr, ptr [[OUT3_ADDR]], align 4 +// CHECK-NEXT: store double [[TMP5]], ptr [[TMP6]], align 8 +// CHECK-NEXT: ret void +// +void increasing_alignment(__builtin_va_list list, one_char_t *out0, + one_short_t *out1, int *out2, double *out3) { + *out0 = __builtin_va_arg(list, one_char_t); + *out1 = __builtin_va_arg(list, one_short_t); + *out2 = __builtin_va_arg(list, int); + *out3 = __builtin_va_arg(list, double); +} + +// CHECK-LABEL: @decreasing_alignment( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[LIST_ADDR:%.*]] = alloca ptr, align 4 +// CHECK-NEXT: [[OUT0_ADDR:%.*]] = alloca ptr, align 4 +// CHECK-NEXT: [[OUT1_ADDR:%.*]] = alloca ptr, align 4 +// CHECK-NEXT: [[OUT2_ADDR:%.*]] = alloca ptr, align 4 +// CHECK-NEXT: [[OUT3_ADDR:%.*]] = alloca ptr, align 4 +// CHECK-NEXT: store ptr [[LIST:%.*]], ptr [[LIST_ADDR]], align 4 +// CHECK-NEXT: store ptr [[OUT0:%.*]], ptr [[OUT0_ADDR]], align 4 +// CHECK-NEXT: store ptr [[OUT1:%.*]], ptr [[OUT1_ADDR]], align 4 +// CHECK-NEXT: store ptr [[OUT2:%.*]], ptr [[OUT2_ADDR]], align 4 +// CHECK-NEXT: store ptr [[OUT3:%.*]], ptr [[OUT3_ADDR]], align 4 +// CHECK-NEXT: [[ARGP_CUR:%.*]] = load ptr, ptr [[LIST_ADDR]], align 4 +// CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds i8, ptr [[ARGP_CUR]], i32 7 +// CHECK-NEXT: [[ARGP_CUR_ALIGNED:%.*]] = call ptr @llvm.ptrmask.p0.i32(ptr [[TMP0]], i32 -8) +// CHECK-NEXT: [[ARGP_NEXT:%.*]] = getelementptr inbounds i8, ptr [[ARGP_CUR_ALIGNED]], i32 8 +// CHECK-NEXT: store ptr [[ARGP_NEXT]], ptr [[LIST_ADDR]], align 4 +// CHECK-NEXT: [[TMP1:%.*]] = load double, ptr [[ARGP_CUR_ALIGNED]], align 8 +// CHECK-NEXT: [[TMP2:%.*]] = load ptr, ptr [[OUT0_ADDR]], align 4 +// CHECK-NEXT: store double [[TMP1]], ptr [[TMP2]], align 8 +// CHECK-NEXT: [[ARGP_CUR1:%.*]] = load ptr, ptr [[LIST_ADDR]], align 4 +// CHECK-NEXT: [[ARGP_NEXT2:%.*]] = getelementptr inbounds i8, ptr [[ARGP_CUR1]], i32 4 +// CHECK-NEXT: store ptr [[ARGP_NEXT2]], ptr [[LIST_ADDR]], align 4 +// CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr [[ARGP_CUR1]], align 4 +// CHECK-NEXT: [[TMP4:%.*]] = load ptr, ptr [[OUT1_ADDR]], align 4 +// CHECK-NEXT: store i32 [[TMP3]], ptr [[TMP4]], align 4 +// CHECK-NEXT: [[TMP5:%.*]] = load ptr, ptr [[OUT2_ADDR]], align 4 +// CHECK-NEXT: [[ARGP_CUR3:%.*]] = load ptr, ptr [[LIST_ADDR]], align 4 +// CHECK-NEXT: [[ARGP_NEXT4:%.*]] = getelementptr inbounds i8, ptr [[ARGP_CUR3]], i32 4 +// CHECK-NEXT: store ptr [[ARGP_NEXT4]], ptr [[LIST_ADDR]], align 4 +// CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 2 [[TMP5]], ptr align 4 [[ARGP_CUR3]], i32 2, i1 false) +// CHECK-NEXT: [[TMP6:%.*]] = load ptr, ptr [[OUT3_ADDR]], align 4 +// CHECK-NEXT: [[ARGP_CUR5:%.*]] = load ptr, ptr [[LIST_ADDR]], align 4 +// CHECK-NEXT: [[ARGP_NEXT6:%.*]] = getelementptr inbounds i8, ptr [[ARGP_CUR5]], i32 4 +// CHECK-NEXT: store ptr [[ARGP_NEXT6]], ptr [[LIST_ADDR]], align 4 +// CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 1 [[TMP6]], ptr align 4 [[ARGP_CUR5]], i32 1, i1 false) +// CHECK-NEXT: ret void +// +void decreasing_alignment(__builtin_va_list list, double *out0, int *out1, + one_short_t *out2, one_char_t *out3) { + *out0 = __builtin_va_arg(list, double); + *out1 = __builtin_va_arg(list, int); + *out2 = __builtin_va_arg(list, one_short_t); + *out3 = __builtin_va_arg(list, one_char_t); +} + +// Typical edge cases, none hit special handling in VAArg lowering. +typedef struct { + int x[16]; + double y[8]; +} large_value_t; + +// CHECK-LABEL: @large_value( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[LIST_ADDR:%.*]] = alloca ptr, align 4 +// CHECK-NEXT: [[OUT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK-NEXT: store ptr [[LIST:%.*]], ptr [[LIST_ADDR]], align 4 +// CHECK-NEXT: store ptr [[OUT:%.*]], ptr [[OUT_ADDR]], align 4 +// CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[OUT_ADDR]], align 4 +// CHECK-NEXT: [[ARGP_CUR:%.*]] = load ptr, ptr [[LIST_ADDR]], align 4 +// CHECK-NEXT: [[ARGP_NEXT:%.*]] = getelementptr inbounds i8, ptr [[ARGP_CUR]], i32 4 +// CHECK-NEXT: store ptr [[ARGP_NEXT]], ptr [[LIST_ADDR]], align 4 +// CHECK-NEXT: [[TMP1:%.*]] = load ptr, ptr [[ARGP_CUR]], align 4 +// CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 8 [[TMP0]], ptr align 8 [[TMP1]], i32 128, i1 false) +// CHECK-NEXT: ret void +// +void large_value(__builtin_va_list list, large_value_t *out) { + *out = __builtin_va_arg(list, large_value_t); +} + +typedef int v128_t __attribute__((__vector_size__(16), __aligned__(16))); +// CHECK-LABEL: @vector( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[LIST_ADDR:%.*]] = alloca ptr, align 4 +// CHECK-NEXT: [[OUT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK-NEXT: store ptr [[LIST:%.*]], ptr [[LIST_ADDR]], align 4 +// CHECK-NEXT: store ptr [[OUT:%.*]], ptr [[OUT_ADDR]], align 4 +// CHECK-NEXT: [[ARGP_CUR:%.*]] = load ptr, ptr [[LIST_ADDR]], align 4 +// CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds i8, ptr [[ARGP_CUR]], i32 15 +// CHECK-NEXT: [[ARGP_CUR_ALIGNED:%.*]] = call ptr @llvm.ptrmask.p0.i32(ptr [[TMP0]], i32 -16) +// CHECK-NEXT: [[ARGP_NEXT:%.*]] = getelementptr inbounds i8, ptr [[ARGP_CUR_ALIGNED]], i32 16 +// CHECK-NEXT: store ptr [[ARGP_NEXT]], ptr [[LIST_ADDR]], align 4 +// CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr [[ARGP_CUR_ALIGNED]], align 16 +// CHECK-NEXT: [[TMP2:%.*]] = load ptr, ptr [[OUT_ADDR]], align 4 +// CHECK-NEXT: store <4 x i32> [[TMP1]], ptr [[TMP2]], align 16 +// CHECK-NEXT: ret void +// +void vector(__builtin_va_list list, v128_t *out) { + *out = __builtin_va_arg(list, v128_t); +} + +typedef struct BF { + float not_an_i32[2]; + int A : 1; + char B; + int C : 13; +} BF; + +// CHECK-LABEL: @bitfield( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[LIST_ADDR:%.*]] = alloca ptr, align 4 +// CHECK-NEXT: [[OUT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK-NEXT: store ptr [[LIST:%.*]], ptr [[LIST_ADDR]], align 4 +// CHECK-NEXT: store ptr [[OUT:%.*]], ptr [[OUT_ADDR]], align 4 +// CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[OUT_ADDR]], align 4 +// CHECK-NEXT: [[ARGP_CUR:%.*]] = load ptr, ptr [[LIST_ADDR]], align 4 +// CHECK-NEXT: [[ARGP_NEXT:%.*]] = getelementptr inbounds i8, ptr [[ARGP_CUR]], i32 4 +// CHECK-NEXT: store ptr [[ARGP_NEXT]], ptr [[LIST_ADDR]], align 4 +// CHECK-NEXT: [[TMP1:%.*]] = load ptr, ptr [[ARGP_CUR]], align 4 +// CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[TMP0]], ptr align 4 [[TMP1]], i32 12, i1 false) +// CHECK-NEXT: ret void +// +void bitfield(__builtin_va_list list, BF *out) { + *out = __builtin_va_arg(list, BF); +} diff --git a/clang/test/CodeGenCXX/inline-then-fold-variadics.cpp b/clang/test/CodeGenCXX/inline-then-fold-variadics.cpp new file mode 100644 index 0000000000000..a0673b96626d1 --- /dev/null +++ b/clang/test/CodeGenCXX/inline-then-fold-variadics.cpp @@ -0,0 +1,181 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --function-signature +// REQUIRES: webassembly-registered-target + +// Simple calls to known variadic functions that are completely elided when +// optimisations are on This is a functional check that the expand-variadic pass +// is consistent with clang's va_arg handling + +// When expand-variadics is added to the default pipeline, clang -O1 will +// suffice here -Wno-varargs avoids warning second argument to 'va_start' is not +// the last named parameter + +// RUN: %clang_cc1 %s -triple wasm32-unknown-unknown -Wno-varargs -O1 -emit-llvm -o - | opt - -S --passes='module(expand-variadics,default)' --expand-variadics-override=optimize -o - | FileCheck %s + +#include +#include + +template static X first(...) { + va_list va; + __builtin_va_start(va, 0); + X r = va_arg(va, X); + va_end(va); + return r; +} + +template static Y second(...) { + va_list va; + __builtin_va_start(va, 0); + va_arg(va, X); + Y r = va_arg(va, Y); + va_end(va); + return r; +} + +extern "C" { + +// CHECK-LABEL: define {{[^@]+}}@first_pair_i32 +// CHECK-SAME: (i32 noundef returned [[X:%.*]], i32 noundef [[Y:%.*]]) +// CHECK-NEXT: entry: +// CHECK-NEXT: ret i32 [[X]] +// +int first_pair_i32(int x, int y) { return first(x, y); } + +// CHECK-LABEL: define {{[^@]+}}@second_pair_i32 +// CHECK-SAME: (i32 noundef [[X:%.*]], i32 noundef returned [[Y:%.*]]) +// CHECK-NEXT: entry: +// CHECK-NEXT: ret i32 [[Y]] +// +int second_pair_i32(int x, int y) { return second(x, y); } + +// CHECK-LABEL: define {{[^@]+}}@first_pair_f64 +// CHECK-SAME: (double noundef returned [[X:%.*]], double noundef [[Y:%.*]]) +// CHECK-NEXT: entry: +// CHECK-NEXT: ret double [[X]] +// +double first_pair_f64(double x, double y) { + return first(x, y); +} + +// CHECK-LABEL: define {{[^@]+}}@second_pair_f64 +// CHECK-SAME: (double noundef [[X:%.*]], double noundef returned [[Y:%.*]]) +// CHECK-NEXT: entry: +// CHECK-NEXT: ret double [[Y]] +// +double second_pair_f64(double x, double y) { + return second(x, y); +} +} + +extern "C" { + +// CHECK-LABEL: define {{[^@]+}}@first_i32_f64 +// CHECK-SAME: (i32 noundef returned [[X:%.*]], double noundef [[Y:%.*]]) +// CHECK-NEXT: entry: +// CHECK-NEXT: ret i32 [[X]] +// +int first_i32_f64(int x, double y) { return first(x, y); } + +// CHECK-LABEL: define {{[^@]+}}@second_i32_f64 +// CHECK-SAME: (i32 noundef [[X:%.*]], double noundef returned [[Y:%.*]]) +// CHECK-NEXT: entry: +// CHECK-NEXT: ret double [[Y]] +// +double second_i32_f64(int x, double y) { return second(x, y); } + +// CHECK-LABEL: define {{[^@]+}}@first_f64_i32 +// CHECK-SAME: (double noundef returned [[X:%.*]], i32 noundef [[Y:%.*]]) +// CHECK-NEXT: entry: +// CHECK-NEXT: ret double [[X]] +// +double first_f64_i32(double x, int y) { return first(x, y); } + +// CHECK-LABEL: define {{[^@]+}}@second_f64_i32 +// CHECK-SAME: (double noundef [[X:%.*]], i32 noundef returned [[Y:%.*]]) +// CHECK-NEXT: entry: +// CHECK-NEXT: ret i32 [[Y]] +// +int second_f64_i32(double x, int y) { return second(x, y); } +} + +extern "C" { +typedef uint64_t ulong2 __attribute__((__vector_size__(16), __aligned__(16))); + +// CHECK-LABEL: define {{[^@]+}}@first_i32_ulong2 +// CHECK-SAME: (i32 noundef returned [[X:%.*]], ptr nocapture noundef readonly [[Y:%.*]]) +// CHECK-NEXT: entry: +// CHECK-NEXT: ret i32 [[X]] +// +int first_i32_ulong2(int x, ulong2 *y) { return first(x, *y); } + +// CHECK-LABEL: define {{[^@]+}}@second_i32_ulong2 +// CHECK-SAME: (i32 noundef [[X:%.*]], ptr nocapture noundef readonly [[Y:%.*]], ptr nocapture noundef writeonly [[R:%.*]]) local_unnamed_addr #[[ATTR1:[0-9]+]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = load <2 x i64>, ptr [[Y]], align 16, !tbaa [[TBAA2:![0-9]+]] +// CHECK-NEXT: store <2 x i64> [[TMP0]], ptr [[R]], align 16, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +void second_i32_ulong2(int x, ulong2 *y, ulong2 *r) { + *r = second(x, *y); +} + +// CHECK-LABEL: define {{[^@]+}}@first_ulong2_i32 +// CHECK-SAME: (ptr nocapture noundef readonly [[X:%.*]], i32 noundef [[Y:%.*]], ptr nocapture noundef writeonly [[R:%.*]]) local_unnamed_addr #[[ATTR1]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = load <2 x i64>, ptr [[X]], align 16, !tbaa [[TBAA2]] +// CHECK-NEXT: store <2 x i64> [[TMP0]], ptr [[R]], align 16, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +void first_ulong2_i32(ulong2 *x, int y, ulong2 *r) { + *r = first(*x, y); +} + +// CHECK-LABEL: define {{[^@]+}}@second_ulong2_i32 +// CHECK-SAME: (ptr nocapture noundef readonly [[X:%.*]], i32 noundef returned [[Y:%.*]]) +// CHECK-NEXT: entry: +// CHECK-NEXT: ret i32 [[Y]] +// +int second_ulong2_i32(ulong2 *x, int y) { return second(*x, y); } +} + +// ascending alignment +typedef struct { + char c; + short s; + int i; + long l; + float f; + double d; +} asc; + +extern "C" { + +// CHECK-LABEL: define {{[^@]+}}@first_i32_asc +// CHECK-SAME: (i32 noundef returned [[X:%.*]], ptr nocapture noundef readonly [[Y:%.*]]) +// CHECK-NEXT: entry: +// CHECK-NEXT: ret i32 [[X]] +// +int first_i32_asc(int x, asc *y) { return first(x, *y); } + +// CHECK-LABEL: define {{[^@]+}}@second_i32_asc +// CHECK-SAME: (i32 noundef [[X:%.*]], ptr nocapture noundef readonly [[Y:%.*]], ptr nocapture noundef writeonly [[R:%.*]]) local_unnamed_addr #[[ATTR1]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: tail call void @llvm.memmove.p0.p0.i32(ptr noundef nonnull align 8 dereferenceable(24) [[R]], ptr noundef nonnull align 1 dereferenceable(24) [[Y]], i32 24, i1 false) +// CHECK-NEXT: ret void +// +void second_i32_asc(int x, asc *y, asc *r) { *r = second(x, *y); } + +// CHECK-LABEL: define {{[^@]+}}@first_asc_i32 +// CHECK-SAME: (ptr nocapture noundef readonly [[X:%.*]], i32 noundef [[Y:%.*]], ptr nocapture noundef writeonly [[R:%.*]]) local_unnamed_addr #[[ATTR1]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: tail call void @llvm.memmove.p0.p0.i32(ptr noundef nonnull align 8 dereferenceable(24) [[R]], ptr noundef nonnull align 1 dereferenceable(24) [[X]], i32 24, i1 false) +// CHECK-NEXT: ret void +// +void first_asc_i32(asc *x, int y, asc *r) { *r = first(*x, y); } + +// CHECK-LABEL: define {{[^@]+}}@second_asc_i32 +// CHECK-SAME: (ptr nocapture noundef readonly [[X:%.*]], i32 noundef returned [[Y:%.*]]) +// CHECK-NEXT: entry: +// CHECK-NEXT: ret i32 [[Y]] +// +int second_asc_i32(asc *x, int y) { return second(*x, y); } +} diff --git a/libc/config/gpu/entrypoints.txt b/libc/config/gpu/entrypoints.txt index b678350e9fcb1..7b73b8a224214 100644 --- a/libc/config/gpu/entrypoints.txt +++ b/libc/config/gpu/entrypoints.txt @@ -181,6 +181,10 @@ set(TARGET_LIBC_ENTRYPOINTS libc.src.stdio.fflush libc.src.stdio.ftell libc.src.stdio.clearerr + libc.src.stdio.sprintf + libc.src.stdio.snprintf + libc.src.stdio.vsprintf + libc.src.stdio.vsnprintf libc.src.stdio.puts libc.src.stdio.fopen libc.src.stdio.fclose diff --git a/libc/test/src/__support/CMakeLists.txt b/libc/test/src/__support/CMakeLists.txt index 663aa2bb82cae..5afc4173f61a4 100644 --- a/libc/test/src/__support/CMakeLists.txt +++ b/libc/test/src/__support/CMakeLists.txt @@ -86,8 +86,8 @@ add_libc_test( libc.src.__support.uint128 ) -# The GPU does not support varargs currently. -if(NOT LIBC_TARGET_OS_IS_GPU) +# NVPTX does not support varargs currently. +if(NOT LIBC_TARGET_ARCHITECTURE_IS_NVPTX) add_libc_test( arg_list_test SUITE diff --git a/llvm/include/llvm/IR/InstrTypes.h b/llvm/include/llvm/IR/InstrTypes.h index 9dd1bb455a718..12cce1e4dae37 100644 --- a/llvm/include/llvm/IR/InstrTypes.h +++ b/llvm/include/llvm/IR/InstrTypes.h @@ -2120,6 +2120,15 @@ class CallBase : public Instruction { return Attrs.getParamStackAlignment(ArgNo); } + /// Extract the byref type for a call or parameter. + Type *getParamByRefType(unsigned ArgNo) const { + if (auto *Ty = Attrs.getParamByRefType(ArgNo)) + return Ty; + if (const Function *F = getCalledFunction()) + return F->getAttributes().getParamByRefType(ArgNo); + return nullptr; + } + /// Extract the byval type for a call or parameter. Type *getParamByValType(unsigned ArgNo) const { if (auto *Ty = Attrs.getParamByValType(ArgNo)) diff --git a/llvm/include/llvm/InitializePasses.h b/llvm/include/llvm/InitializePasses.h index c4c1825bbf09e..8803ef5a90e6e 100644 --- a/llvm/include/llvm/InitializePasses.h +++ b/llvm/include/llvm/InitializePasses.h @@ -106,6 +106,7 @@ void initializeExpandLargeDivRemLegacyPassPass(PassRegistry&); void initializeExpandMemCmpLegacyPassPass(PassRegistry &); void initializeExpandPostRAPass(PassRegistry&); void initializeExpandReductionsPass(PassRegistry&); +void initializeExpandVariadicsPass(PassRegistry &); void initializeExpandVectorPredicationPass(PassRegistry &); void initializeExternalAAWrapperPassPass(PassRegistry&); void initializeFEntryInserterPass(PassRegistry&); diff --git a/llvm/include/llvm/Transforms/IPO/ExpandVariadics.h b/llvm/include/llvm/Transforms/IPO/ExpandVariadics.h new file mode 100644 index 0000000000000..4c5a1b61e2d44 --- /dev/null +++ b/llvm/include/llvm/Transforms/IPO/ExpandVariadics.h @@ -0,0 +1,40 @@ +//===- ExpandVariadics.h - expand variadic functions ------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +#ifndef LLVM_TRANSFORMS_IPO_EXPANDVARIADICS_H +#define LLVM_TRANSFORMS_IPO_EXPANDVARIADICS_H + +#include "llvm/IR/PassManager.h" + +namespace llvm { + +class Module; +class ModulePass; +class OptimizationLevel; + +enum class ExpandVariadicsMode { + Unspecified, // Use the implementation defaults + Disable, // Disable the pass entirely + Optimize, // Optimise without changing ABI + Lowering, // Change variadic calling convention +}; + +class ExpandVariadicsPass : public PassInfoMixin { + const ExpandVariadicsMode Mode; + +public: + // Operates under passed mode unless overridden on commandline + ExpandVariadicsPass(ExpandVariadicsMode Mode); + + PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM); +}; + +ModulePass *createExpandVariadicsPass(ExpandVariadicsMode); + +} // end namespace llvm + +#endif // LLVM_TRANSFORMS_IPO_EXPANDVARIADICS_H diff --git a/llvm/lib/Passes/PassBuilder.cpp b/llvm/lib/Passes/PassBuilder.cpp index 09231504ef906..2d006ea757998 100644 --- a/llvm/lib/Passes/PassBuilder.cpp +++ b/llvm/lib/Passes/PassBuilder.cpp @@ -138,6 +138,7 @@ #include "llvm/Transforms/IPO/DeadArgumentElimination.h" #include "llvm/Transforms/IPO/ElimAvailExtern.h" #include "llvm/Transforms/IPO/EmbedBitcodePass.h" +#include "llvm/Transforms/IPO/ExpandVariadics.h" #include "llvm/Transforms/IPO/ForceFunctionAttrs.h" #include "llvm/Transforms/IPO/FunctionAttrs.h" #include "llvm/Transforms/IPO/FunctionImport.h" diff --git a/llvm/lib/Passes/PassRegistry.def b/llvm/lib/Passes/PassRegistry.def index 50682ca4970f1..dad97146a9f60 100644 --- a/llvm/lib/Passes/PassRegistry.def +++ b/llvm/lib/Passes/PassRegistry.def @@ -59,6 +59,7 @@ MODULE_PASS("dot-callgraph", CallGraphDOTPrinterPass()) MODULE_PASS("dxil-upgrade", DXILUpgradePass()) MODULE_PASS("elim-avail-extern", EliminateAvailableExternallyPass()) MODULE_PASS("extract-blocks", BlockExtractorPass({}, false)) +MODULE_PASS("expand-variadics", ExpandVariadicsPass(ExpandVariadicsMode::Disable)) MODULE_PASS("forceattrs", ForceFunctionAttrsPass()) MODULE_PASS("function-import", FunctionImportPass()) MODULE_PASS("globalopt", GlobalOptPass()) diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp index 9c94ca1e47080..17c961578382b 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp @@ -57,6 +57,7 @@ #include "llvm/Transforms/HipStdPar/HipStdPar.h" #include "llvm/Transforms/IPO.h" #include "llvm/Transforms/IPO/AlwaysInliner.h" +#include "llvm/Transforms/IPO/ExpandVariadics.h" #include "llvm/Transforms/IPO/GlobalDCE.h" #include "llvm/Transforms/IPO/Internalize.h" #include "llvm/Transforms/Scalar.h" @@ -992,6 +993,10 @@ void AMDGPUPassConfig::addIRPasses() { if (isPassEnabled(EnableImageIntrinsicOptimizer)) addPass(createAMDGPUImageIntrinsicOptimizerPass(&TM)); + // This can be disabled by passing ::Disable here or on the command line + // with --expand-variadics-override=disable. + addPass(createExpandVariadicsPass(ExpandVariadicsMode::Lowering)); + // Function calls are not supported, so make sure we inline everything. addPass(createAMDGPUAlwaysInlinePass()); addPass(createAlwaysInlinerLegacyPass()); diff --git a/llvm/lib/Transforms/IPO/CMakeLists.txt b/llvm/lib/Transforms/IPO/CMakeLists.txt index 5fbdbc3a014f9..92a9697720efd 100644 --- a/llvm/lib/Transforms/IPO/CMakeLists.txt +++ b/llvm/lib/Transforms/IPO/CMakeLists.txt @@ -12,6 +12,7 @@ add_llvm_component_library(LLVMipo DeadArgumentElimination.cpp ElimAvailExtern.cpp EmbedBitcodePass.cpp + ExpandVariadics.cpp ExtractGV.cpp ForceFunctionAttrs.cpp FunctionAttrs.cpp diff --git a/llvm/lib/Transforms/IPO/ExpandVariadics.cpp b/llvm/lib/Transforms/IPO/ExpandVariadics.cpp new file mode 100644 index 0000000000000..9a4f39948a30e --- /dev/null +++ b/llvm/lib/Transforms/IPO/ExpandVariadics.cpp @@ -0,0 +1,1013 @@ +//===-- ExpandVariadicsPass.cpp --------------------------------*- C++ -*-=// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This is an optimization pass for variadic functions. If called from codegen, +// it can serve as the implementation of variadic functions for a given target. +// +// The strategy is to turn the ... part of a variadic function into a va_list +// and fix up the call sites. The majority of the pass is target independent. +// The exceptions are the va_list type itself and the rules for where to store +// variables in memory such that va_arg can iterate over them given a va_list. +// +// The majority of the plumbing is splitting the variadic function into a +// single basic block that packs the variadic arguments into a va_list and +// a second function that does the work of the original. That packing is +// exactly what is done by va_start. Further, the transform from ... to va_list +// replaced va_start with an operation to copy a va_list from the new argument, +// which is exactly a va_copy. This is useful for reducing target-dependence. +// +// A va_list instance is a forward iterator, where the primary operation va_arg +// is dereference-then-increment. This interface forces significant convergent +// evolution between target specific implementations. The variation in runtime +// data layout is limited to that representable by the iterator, parameterised +// by the type passed to the va_arg instruction. +// +// Therefore the majority of the target specific subtlety is packing arguments +// into a stack allocated buffer such that a va_list can be initialised with it +// and the va_arg expansion for the target will find the arguments at runtime. +// +// The aggregate effect is to unblock other transforms, most critically the +// general purpose inliner. Known calls to variadic functions become zero cost. +// +// Consistency with clang is primarily tested by emitting va_arg using clang +// then expanding the variadic functions using this pass, followed by trying +// to constant fold the functions to no-ops. +// +// Target specific behaviour is tested in IR - mainly checking that values are +// put into positions in call frames that make sense for that particular target. +// +// There is one "clever" invariant in use. va_start intrinsics that are not +// within a varidic functions are an error in the IR verifier. When this +// transform moves blocks from a variadic function into a fixed arity one, it +// moves va_start intrinsics along with everything else. That means that the +// va_start intrinsics that need to be rewritten to use the trailing argument +// are exactly those that are in non-variadic functions so no further state +// is needed to distinguish those that need to be rewritten. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Transforms/IPO/ExpandVariadics.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/CodeGen/Passes.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/IRBuilder.h" +#include "llvm/IR/IntrinsicInst.h" +#include "llvm/IR/Module.h" +#include "llvm/IR/PassManager.h" +#include "llvm/InitializePasses.h" +#include "llvm/Pass.h" +#include "llvm/Passes/OptimizationLevel.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/TargetParser/Triple.h" +#include "llvm/Transforms/Utils/ModuleUtils.h" + +#define DEBUG_TYPE "expand-variadics" + +using namespace llvm; + +namespace { + +cl::opt ExpandVariadicsModeOption( + DEBUG_TYPE "-override", cl::desc("Override the behaviour of " DEBUG_TYPE), + cl::init(ExpandVariadicsMode::Unspecified), + cl::values(clEnumValN(ExpandVariadicsMode::Unspecified, "unspecified", + "Use the implementation defaults"), + clEnumValN(ExpandVariadicsMode::Disable, "disable", + "Disable the pass entirely"), + clEnumValN(ExpandVariadicsMode::Optimize, "optimize", + "Optimise without changing ABI"), + clEnumValN(ExpandVariadicsMode::Lowering, "lowering", + "Change variadic calling convention"))); + +bool commandLineOverride() { + return ExpandVariadicsModeOption != ExpandVariadicsMode::Unspecified; +} + +// Instances of this class encapsulate the target-dependant behaviour as a +// function of triple. Implementing a new ABI is adding a case to the switch +// in create(llvm::Triple) at the end of this file. +// This class may end up instantiated in TargetMachine instances, keeping it +// here for now until enough targets are implemented for the API to evolve. +class VariadicABIInfo { +protected: + VariadicABIInfo() = default; + +public: + static std::unique_ptr create(const Triple &T); + + // Allow overriding whether the pass runs on a per-target basis + virtual bool enableForTarget() = 0; + + // Whether a valist instance is passed by value or by address + // I.e. does it need to be alloca'ed and stored into, or can + // it be passed directly in a SSA register + virtual bool vaListPassedInSSARegister() = 0; + + // The type of a va_list iterator object + virtual Type *vaListType(LLVMContext &Ctx) = 0; + + // The type of a va_list as a function argument as lowered by C + virtual Type *vaListParameterType(Module &M) = 0; + + // Initialize an allocated va_list object to point to an already + // initialized contiguous memory region. + // Return the value to pass as the va_list argument + virtual Value *initializeVaList(Module &M, LLVMContext &Ctx, + IRBuilder<> &Builder, AllocaInst *VaList, + Value *Buffer) = 0; + + struct VAArgSlotInfo { + Align DataAlign; // With respect to the call frame + bool Indirect; // Passed via a pointer + }; + virtual VAArgSlotInfo slotInfo(const DataLayout &DL, Type *Parameter) = 0; + + // Targets implemented so far all have the same trivial lowering for these + bool vaEndIsNop() { return true; } + bool vaCopyIsMemcpy() { return true; } + + virtual ~VariadicABIInfo() = default; +}; + +// Module implements getFunction() which returns nullptr on missing declaration +// and getOrInsertFunction which creates one when absent. Intrinsics.h only +// implements getDeclaration which creates one when missing. Checking whether +// an intrinsic exists thus inserts it in the module and it then needs to be +// deleted again to clean up. +// The right name for the two functions on intrinsics would match Module::, +// but doing that in a single change would introduce nullptr dereferences +// where currently there are none. The minimal collateral damage approach +// would split the change over a release to help downstream branches. As it +// is unclear what approach will be preferred, implementing the trivial +// function here in the meantime to decouple from that discussion. +Function *getPreexistingDeclaration(Module *M, Intrinsic::ID Id, + ArrayRef Tys = {}) { + auto *FT = Intrinsic::getType(M->getContext(), Id, Tys); + return M->getFunction(Tys.empty() ? Intrinsic::getName(Id) + : Intrinsic::getName(Id, Tys, M, FT)); +} + +class ExpandVariadics : public ModulePass { + + // The pass construction sets the default to optimize when called from middle + // end and lowering when called from the backend. The command line variable + // overrides that. This is useful for testing and debugging. It also allows + // building an applications with variadic functions wholly removed if one + // has sufficient control over the dependencies, e.g. a statically linked + // clang that has no variadic function calls remaining in the binary. + +public: + static char ID; + const ExpandVariadicsMode Mode; + std::unique_ptr ABI; + + ExpandVariadics(ExpandVariadicsMode Mode) + : ModulePass(ID), + Mode(commandLineOverride() ? ExpandVariadicsModeOption : Mode) {} + + StringRef getPassName() const override { return "Expand variadic functions"; } + + bool rewriteABI() { return Mode == ExpandVariadicsMode::Lowering; } + + bool runOnModule(Module &M) override; + + bool runOnFunction(Module &M, IRBuilder<> &Builder, Function *F); + + Function *replaceAllUsesWithNewDeclaration(Module &M, + Function *OriginalFunction); + + Function *deriveFixedArityReplacement(Module &M, IRBuilder<> &Builder, + Function *OriginalFunction); + + Function *defineVariadicWrapper(Module &M, IRBuilder<> &Builder, + Function *VariadicWrapper, + Function *FixedArityReplacement); + + bool expandCall(Module &M, IRBuilder<> &Builder, CallBase *CB, FunctionType *, + Function *NF); + + // The intrinsic functions va_copy and va_end are removed unconditionally. + // They correspond to a memcpy and a no-op on all implemented targets. + // The va_start intrinsic is removed from basic blocks that were not created + // by this pass, some may remain if needed to maintain the external ABI. + + template + bool expandIntrinsicUsers(Module &M, IRBuilder<> &Builder, + PointerType *IntrinsicArgType) { + bool Changed = false; + const DataLayout &DL = M.getDataLayout(); + if (Function *Intrinsic = + getPreexistingDeclaration(&M, ID, {IntrinsicArgType})) { + for (User *U : make_early_inc_range(Intrinsic->users())) + if (auto *I = dyn_cast(U)) + Changed |= expandVAIntrinsicCall(Builder, DL, I); + + if (Intrinsic->use_empty()) + Intrinsic->eraseFromParent(); + } + return Changed; + } + + bool expandVAIntrinsicUsersWithAddrspace(Module &M, IRBuilder<> &Builder, + unsigned Addrspace) { + auto &Ctx = M.getContext(); + PointerType *IntrinsicArgType = PointerType::get(Ctx, Addrspace); + bool Changed = false; + + // expand vastart before vacopy as vastart may introduce a vacopy + Changed |= expandIntrinsicUsers( + M, Builder, IntrinsicArgType); + Changed |= expandIntrinsicUsers( + M, Builder, IntrinsicArgType); + Changed |= expandIntrinsicUsers( + M, Builder, IntrinsicArgType); + return Changed; + } + + bool expandVAIntrinsicCall(IRBuilder<> &Builder, const DataLayout &DL, + VAStartInst *Inst); + + bool expandVAIntrinsicCall(IRBuilder<> &, const DataLayout &, + VAEndInst *Inst); + + bool expandVAIntrinsicCall(IRBuilder<> &Builder, const DataLayout &DL, + VACopyInst *Inst); + + FunctionType *inlinableVariadicFunctionType(Module &M, FunctionType *FTy) { + // The type of "FTy" with the ... removed and a va_list appended + SmallVector ArgTypes(FTy->param_begin(), FTy->param_end()); + ArgTypes.push_back(ABI->vaListParameterType(M)); + return FunctionType::get(FTy->getReturnType(), ArgTypes, + /*IsVarArgs=*/false); + } + + static ConstantInt *sizeOfAlloca(LLVMContext &Ctx, const DataLayout &DL, + AllocaInst *Alloced) { + std::optional AllocaTypeSize = Alloced->getAllocationSize(DL); + uint64_t AsInt = AllocaTypeSize ? AllocaTypeSize->getFixedValue() : 0; + return ConstantInt::get(Type::getInt64Ty(Ctx), AsInt); + } + + bool expansionApplicableToFunction(Module &M, Function *F) { + if (F->isIntrinsic() || !F->isVarArg() || + F->hasFnAttribute(Attribute::Naked)) + return false; + + if (F->getCallingConv() != CallingConv::C) + return false; + + if (rewriteABI()) + return true; + + if (!F->hasExactDefinition()) + return false; + + return true; + } + + bool expansionApplicableToFunctionCall(CallBase *CB) { + if (CallInst *CI = dyn_cast(CB)) { + if (CI->isMustTailCall()) { + // Cannot expand musttail calls + return false; + } + + if (CI->getCallingConv() != CallingConv::C) + return false; + + return true; + } + + if (isa(CB)) { + // Invoke not implemented in initial implementation of pass + return false; + } + + // Other unimplemented derivative of CallBase + return false; + } + + class ExpandedCallFrame { + // Helper for constructing an alloca instance containing the arguments bound + // to the variadic ... parameter, rearranged to allow indexing through a + // va_list iterator + enum { N = 4 }; + SmallVector FieldTypes; + enum Tag { Store, Memcpy, Padding }; + SmallVector, N> Source; + + template void append(Type *FieldType, Value *V, uint64_t Bytes) { + FieldTypes.push_back(FieldType); + Source.push_back({V, Bytes, tag}); + } + + public: + void store(LLVMContext &Ctx, Type *T, Value *V) { append(T, V, 0); } + + void memcpy(LLVMContext &Ctx, Type *T, Value *V, uint64_t Bytes) { + append(T, V, Bytes); + } + + void padding(LLVMContext &Ctx, uint64_t By) { + append(ArrayType::get(Type::getInt8Ty(Ctx), By), nullptr, 0); + } + + size_t size() const { return FieldTypes.size(); } + bool empty() const { return FieldTypes.empty(); } + + StructType *asStruct(LLVMContext &Ctx, StringRef Name) { + const bool IsPacked = true; + return StructType::create(Ctx, FieldTypes, + (Twine(Name) + ".vararg").str(), IsPacked); + } + + void initializeStructAlloca(const DataLayout &DL, IRBuilder<> &Builder, + AllocaInst *Alloced) { + + StructType *VarargsTy = cast(Alloced->getAllocatedType()); + + for (size_t I = 0; I < size(); I++) { + + auto [V, bytes, tag] = Source[I]; + + if (tag == Padding) { + assert(V == nullptr); + continue; + } + + auto Dst = Builder.CreateStructGEP(VarargsTy, Alloced, I); + + assert(V != nullptr); + + if (tag == Store) + Builder.CreateStore(V, Dst); + + if (tag == Memcpy) + Builder.CreateMemCpy(Dst, {}, V, {}, bytes); + } + } + }; +}; + +bool ExpandVariadics::runOnModule(Module &M) { + bool Changed = false; + if (Mode == ExpandVariadicsMode::Disable) + return Changed; + + Triple TT(M.getTargetTriple()); + ABI = VariadicABIInfo::create(TT); + if (!ABI) + return Changed; + + if (!ABI->enableForTarget()) + return Changed; + + auto &Ctx = M.getContext(); + const DataLayout &DL = M.getDataLayout(); + IRBuilder<> Builder(Ctx); + + // Lowering needs to run on all functions exactly once. + // Optimize could run on functions containing va_start exactly once. + for (Function &F : make_early_inc_range(M)) + Changed |= runOnFunction(M, Builder, &F); + + // After runOnFunction, all known calls to known variadic functions have been + // replaced. va_start intrinsics are presently (and invalidly!) only present + // in functions that used to be variadic and have now been replaced to take a + // va_list instead. If lowering as opposed to optimising, calls to unknown + // variadic functions have also been replaced. + + { + // 0 and AllocaAddrSpace are sufficient for the targets implemented so far + unsigned Addrspace = 0; + Changed |= expandVAIntrinsicUsersWithAddrspace(M, Builder, Addrspace); + + Addrspace = DL.getAllocaAddrSpace(); + if (Addrspace != 0) + Changed |= expandVAIntrinsicUsersWithAddrspace(M, Builder, Addrspace); + } + + if (Mode != ExpandVariadicsMode::Lowering) + return Changed; + + for (Function &F : make_early_inc_range(M)) { + if (F.isDeclaration()) + continue; + + // Now need to track down indirect calls. Can't find those + // by walking uses of variadic functions, need to crawl the instruction + // stream. Fortunately this is only necessary for the ABI rewrite case. + for (BasicBlock &BB : F) { + for (Instruction &I : make_early_inc_range(BB)) { + if (CallBase *CB = dyn_cast(&I)) { + if (CB->isIndirectCall()) { + FunctionType *FTy = CB->getFunctionType(); + if (FTy->isVarArg()) + Changed |= expandCall(M, Builder, CB, FTy, 0); + } + } + } + } + } + + return Changed; +} + +bool ExpandVariadics::runOnFunction(Module &M, IRBuilder<> &Builder, + Function *OriginalFunction) { + bool Changed = false; + + if (!expansionApplicableToFunction(M, OriginalFunction)) + return Changed; + + const bool OriginalFunctionIsDeclaration = OriginalFunction->isDeclaration(); + assert(rewriteABI() || !OriginalFunctionIsDeclaration); + + // Declare a new function and redirect every use to that new function + Function *VariadicWrapper = + replaceAllUsesWithNewDeclaration(M, OriginalFunction); + assert(VariadicWrapper->isDeclaration()); + assert(OriginalFunction->use_empty()); + + // Create a new function taking va_list containing the implementation of the + // original + Function *FixedArityReplacement = + deriveFixedArityReplacement(M, Builder, OriginalFunction); + assert(OriginalFunction->isDeclaration()); + assert(FixedArityReplacement->isDeclaration() == + OriginalFunctionIsDeclaration); + assert(VariadicWrapper->isDeclaration()); + + // Create a single block forwarding wrapper that turns a ... into a va_list + Function *VariadicWrapperDefine = + defineVariadicWrapper(M, Builder, VariadicWrapper, FixedArityReplacement); + assert(VariadicWrapperDefine == VariadicWrapper); + assert(!VariadicWrapper->isDeclaration()); + + // We now have: + // 1. the original function, now as a declaration with no uses + // 2. a variadic function that unconditionally calls a fixed arity replacement + // 3. a fixed arity function equivalent to the original function + + // Replace known calls to the variadic with calls to the va_list equivalent + for (User *U : make_early_inc_range(VariadicWrapper->users())) { + if (CallBase *CB = dyn_cast(U)) { + Value *calledOperand = CB->getCalledOperand(); + if (VariadicWrapper == calledOperand) + Changed |= + expandCall(M, Builder, CB, VariadicWrapper->getFunctionType(), + FixedArityReplacement); + } + } + + // The original function will be erased. + // One of the two new functions will become a replacement for the original. + // When preserving the ABI, the other is an internal implementation detail. + // When rewriting the ABI, RAUW then the variadic one. + Function *const ExternallyAccessible = + rewriteABI() ? FixedArityReplacement : VariadicWrapper; + Function *const InternalOnly = + rewriteABI() ? VariadicWrapper : FixedArityReplacement; + + // The external function is the replacement for the original + ExternallyAccessible->setLinkage(OriginalFunction->getLinkage()); + ExternallyAccessible->setVisibility(OriginalFunction->getVisibility()); + ExternallyAccessible->setComdat(OriginalFunction->getComdat()); + ExternallyAccessible->takeName(OriginalFunction); + + // Annotate the internal one as internal + InternalOnly->setVisibility(GlobalValue::DefaultVisibility); + InternalOnly->setLinkage(GlobalValue::InternalLinkage); + + // The original is unused and obsolete + OriginalFunction->eraseFromParent(); + + InternalOnly->removeDeadConstantUsers(); + + if (rewriteABI()) { + // All known calls to the function have been removed by expandCall + // Resolve everything else by replaceAllUsesWith + VariadicWrapper->replaceAllUsesWith(FixedArityReplacement); + VariadicWrapper->eraseFromParent(); + } + + return Changed; +} + +Function * +ExpandVariadics::replaceAllUsesWithNewDeclaration(Module &M, + Function *OriginalFunction) { + auto &Ctx = M.getContext(); + Function &F = *OriginalFunction; + FunctionType *FTy = F.getFunctionType(); + Function *NF = Function::Create(FTy, F.getLinkage(), F.getAddressSpace()); + + NF->setName(F.getName() + ".varargs"); + NF->IsNewDbgInfoFormat = F.IsNewDbgInfoFormat; + + F.getParent()->getFunctionList().insert(F.getIterator(), NF); + + AttrBuilder ParamAttrs(Ctx); + AttributeList Attrs = NF->getAttributes(); + Attrs = Attrs.addParamAttributes(Ctx, FTy->getNumParams(), ParamAttrs); + NF->setAttributes(Attrs); + + OriginalFunction->replaceAllUsesWith(NF); + return NF; +} + +Function * +ExpandVariadics::deriveFixedArityReplacement(Module &M, IRBuilder<> &Builder, + Function *OriginalFunction) { + Function &F = *OriginalFunction; + // The purpose here is split the variadic function F into two functions + // One is a variadic function that bundles the passed argument into a va_list + // and passes it to the second function. The second function does whatever + // the original F does, except that it takes a va_list instead of the ... + + assert(expansionApplicableToFunction(M, &F)); + + auto &Ctx = M.getContext(); + + // Returned value isDeclaration() is equal to F.isDeclaration() + // but that property is not invariant throughout this function + const bool FunctionIsDefinition = !F.isDeclaration(); + + FunctionType *FTy = F.getFunctionType(); + SmallVector ArgTypes(FTy->param_begin(), FTy->param_end()); + ArgTypes.push_back(ABI->vaListParameterType(M)); + + FunctionType *NFTy = inlinableVariadicFunctionType(M, FTy); + Function *NF = Function::Create(NFTy, F.getLinkage(), F.getAddressSpace()); + + // Note - same attribute handling as DeadArgumentElimination + NF->copyAttributesFrom(&F); + NF->setComdat(F.getComdat()); + F.getParent()->getFunctionList().insert(F.getIterator(), NF); + NF->setName(F.getName() + ".valist"); + NF->IsNewDbgInfoFormat = F.IsNewDbgInfoFormat; + + AttrBuilder ParamAttrs(Ctx); + + AttributeList Attrs = NF->getAttributes(); + Attrs = Attrs.addParamAttributes(Ctx, NFTy->getNumParams() - 1, ParamAttrs); + NF->setAttributes(Attrs); + + // Splice the implementation into the new function with minimal changes + if (FunctionIsDefinition) { + NF->splice(NF->begin(), &F); + + auto NewArg = NF->arg_begin(); + for (Argument &Arg : F.args()) { + Arg.replaceAllUsesWith(NewArg); + NewArg->setName(Arg.getName()); // takeName without killing the old one + ++NewArg; + } + NewArg->setName("varargs"); + } + + SmallVector, 1> MDs; + F.getAllMetadata(MDs); + for (auto [KindID, Node] : MDs) + NF->addMetadata(KindID, *Node); + F.clearMetadata(); + + return NF; +} + +Function * +ExpandVariadics::defineVariadicWrapper(Module &M, IRBuilder<> &Builder, + Function *VariadicWrapper, + Function *FixedArityReplacement) { + auto &Ctx = Builder.getContext(); + const DataLayout &DL = M.getDataLayout(); + assert(VariadicWrapper->isDeclaration()); + Function &F = *VariadicWrapper; + + assert(F.isDeclaration()); + Type *VaListTy = ABI->vaListType(Ctx); + + auto *BB = BasicBlock::Create(Ctx, "entry", &F); + Builder.SetInsertPoint(BB); + + AllocaInst *VaListInstance = + Builder.CreateAlloca(VaListTy, nullptr, "va_start"); + + Builder.CreateLifetimeStart(VaListInstance, + sizeOfAlloca(Ctx, DL, VaListInstance)); + + Builder.CreateIntrinsic(Intrinsic::vastart, {DL.getAllocaPtrType(Ctx)}, + {VaListInstance}); + + SmallVector Args; + for (Argument &A : F.args()) + Args.push_back(&A); + + Type *ParameterType = ABI->vaListParameterType(M); + if (ABI->vaListPassedInSSARegister()) + Args.push_back(Builder.CreateLoad(ParameterType, VaListInstance)); + else + Args.push_back(Builder.CreateAddrSpaceCast(VaListInstance, ParameterType)); + + CallInst *Result = Builder.CreateCall(FixedArityReplacement, Args); + + Builder.CreateIntrinsic(Intrinsic::vaend, {DL.getAllocaPtrType(Ctx)}, + {VaListInstance}); + Builder.CreateLifetimeEnd(VaListInstance, + sizeOfAlloca(Ctx, DL, VaListInstance)); + + if (Result->getType()->isVoidTy()) + Builder.CreateRetVoid(); + else + Builder.CreateRet(Result); + + return VariadicWrapper; +} + +bool ExpandVariadics::expandCall(Module &M, IRBuilder<> &Builder, CallBase *CB, + FunctionType *VarargFunctionType, + Function *NF) { + bool Changed = false; + const DataLayout &DL = M.getDataLayout(); + + if (!expansionApplicableToFunctionCall(CB)) { + if (rewriteABI()) + report_fatal_error("Cannot lower callbase instruction"); + return Changed; + } + + // This is tricky. The call instruction's function type might not match + // the type of the caller. When optimising, can leave it unchanged. + // Webassembly detects that inconsistency and repairs it. + FunctionType *FuncType = CB->getFunctionType(); + if (FuncType != VarargFunctionType) { + if (!rewriteABI()) + return Changed; + FuncType = VarargFunctionType; + } + + auto &Ctx = CB->getContext(); + + Align MaxFieldAlign(1); + + // The strategy is to allocate a call frame containing the variadic + // arguments laid out such that a target specific va_list can be initialized + // with it, such that target specific va_arg instructions will correctly + // iterate over it. This means getting the alignment right and sometimes + // embedding a pointer to the value instead of embedding the value itself. + + Function *CBF = CB->getParent()->getParent(); + + ExpandedCallFrame Frame; + + uint64_t CurrentOffset = 0; + + for (unsigned I = FuncType->getNumParams(), E = CB->arg_size(); I < E; ++I) { + Value *ArgVal = CB->getArgOperand(I); + const bool IsByVal = CB->paramHasAttr(I, Attribute::ByVal); + const bool IsByRef = CB->paramHasAttr(I, Attribute::ByRef); + + // The type of the value being passed, decoded from byval/byref metadata if + // required + Type *const UnderlyingType = IsByVal ? CB->getParamByValType(I) + : IsByRef ? CB->getParamByRefType(I) + : ArgVal->getType(); + const uint64_t UnderlyingSize = + DL.getTypeAllocSize(UnderlyingType).getFixedValue(); + + // The type to be written into the call frame + Type *FrameFieldType = UnderlyingType; + + // The value to copy from when initialising the frame alloca + Value *SourceValue = ArgVal; + + VariadicABIInfo::VAArgSlotInfo SlotInfo = ABI->slotInfo(DL, UnderlyingType); + + if (SlotInfo.Indirect) { + // The va_arg lowering loads through a pointer. Set up an alloca to aim + // that pointer at. + Builder.SetInsertPointPastAllocas(CBF); + Builder.SetCurrentDebugLocation(CB->getStableDebugLoc()); + Value *CallerCopy = + Builder.CreateAlloca(UnderlyingType, nullptr, "IndirectAlloca"); + + Builder.SetInsertPoint(CB); + if (IsByVal) + Builder.CreateMemCpy(CallerCopy, {}, ArgVal, {}, UnderlyingSize); + else + Builder.CreateStore(ArgVal, CallerCopy); + + // Indirection now handled, pass the alloca ptr by value + FrameFieldType = DL.getAllocaPtrType(Ctx); + SourceValue = CallerCopy; + } + + // Alignment of the value within the frame + // This probably needs to be controllable as a function of type + Align DataAlign = SlotInfo.DataAlign; + + MaxFieldAlign = std::max(MaxFieldAlign, DataAlign); + + uint64_t DataAlignV = DataAlign.value(); + if (uint64_t Rem = CurrentOffset % DataAlignV) { + // Inject explicit padding to deal with alignment requirements + uint64_t Padding = DataAlignV - Rem; + Frame.padding(Ctx, Padding); + CurrentOffset += Padding; + } + + if (SlotInfo.Indirect) { + Frame.store(Ctx, FrameFieldType, SourceValue); + } else { + if (IsByVal) + Frame.memcpy(Ctx, FrameFieldType, SourceValue, UnderlyingSize); + else + Frame.store(Ctx, FrameFieldType, SourceValue); + } + + CurrentOffset += DL.getTypeAllocSize(FrameFieldType).getFixedValue(); + } + + if (Frame.empty()) { + // Not passing any arguments, hopefully va_arg won't try to read any + // Creating a single byte frame containing nothing to point the va_list + // instance as that is less special-casey in the compiler and probably + // easier to interpret in a debugger. + Frame.padding(Ctx, 1); + } + + StructType *VarargsTy = Frame.asStruct(Ctx, CBF->getName()); + + // The struct instance needs to be at least MaxFieldAlign for the alignment of + // the fields to be correct at runtime. Use the native stack alignment instead + // if that's greater as that tends to give better codegen. + // This is an awkward way to guess whether there is a known stack alignment + // without hitting an assert in DL.getStackAlignment, 1024 is an arbitrary + // number likely to be greater than the natural stack alignment. + // TODO: DL.getStackAlignment could return a MaybeAlign instead of assert + Align AllocaAlign = MaxFieldAlign; + if (DL.exceedsNaturalStackAlignment(Align(1024))) + AllocaAlign = std::max(AllocaAlign, DL.getStackAlignment()); + + // Put the alloca to hold the variadic args in the entry basic block. + Builder.SetInsertPointPastAllocas(CBF); + + // SetCurrentDebugLocation when the builder SetInsertPoint method does not + Builder.SetCurrentDebugLocation(CB->getStableDebugLoc()); + + // The awkward construction here is to set the alignment on the instance + AllocaInst *Alloced = Builder.Insert( + new AllocaInst(VarargsTy, DL.getAllocaAddrSpace(), nullptr, AllocaAlign), + "vararg_buffer"); + Changed = true; + assert(Alloced->getAllocatedType() == VarargsTy); + + // Initialize the fields in the struct + Builder.SetInsertPoint(CB); + Builder.CreateLifetimeStart(Alloced, sizeOfAlloca(Ctx, DL, Alloced)); + Frame.initializeStructAlloca(DL, Builder, Alloced); + + const unsigned NumArgs = FuncType->getNumParams(); + SmallVector Args(CB->arg_begin(), CB->arg_begin() + NumArgs); + + // Initialize a va_list pointing to that struct and pass it as the last + // argument + AllocaInst *VaList = nullptr; + { + if (!ABI->vaListPassedInSSARegister()) { + Type *VaListTy = ABI->vaListType(Ctx); + Builder.SetInsertPointPastAllocas(CBF); + Builder.SetCurrentDebugLocation(CB->getStableDebugLoc()); + VaList = Builder.CreateAlloca(VaListTy, nullptr, "va_argument"); + Builder.SetInsertPoint(CB); + Builder.CreateLifetimeStart(VaList, sizeOfAlloca(Ctx, DL, VaList)); + } + Builder.SetInsertPoint(CB); + Args.push_back(ABI->initializeVaList(M, Ctx, Builder, VaList, Alloced)); + } + + // Attributes excluding any on the vararg arguments + AttributeList PAL = CB->getAttributes(); + if (!PAL.isEmpty()) { + SmallVector ArgAttrs; + for (unsigned ArgNo = 0; ArgNo < NumArgs; ArgNo++) + ArgAttrs.push_back(PAL.getParamAttrs(ArgNo)); + PAL = + AttributeList::get(Ctx, PAL.getFnAttrs(), PAL.getRetAttrs(), ArgAttrs); + } + + SmallVector OpBundles; + CB->getOperandBundlesAsDefs(OpBundles); + + CallBase *NewCB = nullptr; + + if (CallInst *CI = dyn_cast(CB)) { + Value *Dst = NF ? NF : CI->getCalledOperand(); + FunctionType *NFTy = inlinableVariadicFunctionType(M, VarargFunctionType); + + NewCB = CallInst::Create(NFTy, Dst, Args, OpBundles, "", CI); + + CallInst::TailCallKind TCK = CI->getTailCallKind(); + assert(TCK != CallInst::TCK_MustTail); + + // Can't tail call a function that is being passed a pointer to an alloca + if (TCK == CallInst::TCK_Tail) + TCK = CallInst::TCK_None; + CI->setTailCallKind(TCK); + + } else { + llvm_unreachable("Unreachable when !expansionApplicableToFunctionCall()"); + } + + if (VaList) + Builder.CreateLifetimeEnd(VaList, sizeOfAlloca(Ctx, DL, VaList)); + + Builder.CreateLifetimeEnd(Alloced, sizeOfAlloca(Ctx, DL, Alloced)); + + NewCB->setAttributes(PAL); + NewCB->takeName(CB); + NewCB->setCallingConv(CB->getCallingConv()); + NewCB->setDebugLoc(DebugLoc()); + + // DeadArgElim and ArgPromotion copy exactly this metadata + NewCB->copyMetadata(*CB, {LLVMContext::MD_prof, LLVMContext::MD_dbg}); + + CB->replaceAllUsesWith(NewCB); + CB->eraseFromParent(); + return Changed; +} + +bool ExpandVariadics::expandVAIntrinsicCall(IRBuilder<> &Builder, + const DataLayout &DL, + VAStartInst *Inst) { + // Only removing va_start instructions that are not in variadic functions. + // Those would be rejected by the IR verifier before this pass. + // After splicing basic blocks from a variadic function into a fixed arity + // one the va_start that used to refer to the ... parameter still exist. + // There are also variadic functions that this pass did not change and + // va_start instances in the created single block wrapper functions. + // Replace exactly the instances in non-variadic functions as those are + // the ones to be fixed up to use the va_list passed as the final argument. + + Function *ContainingFunction = Inst->getFunction(); + if (ContainingFunction->isVarArg()) { + return false; + } + + // The last argument is a vaListParameterType, either a va_list + // or a pointer to one depending on the target. + bool PassedByValue = ABI->vaListPassedInSSARegister(); + Argument *PassedVaList = + ContainingFunction->getArg(ContainingFunction->arg_size() - 1); + + // va_start takes a pointer to a va_list, e.g. one on the stack + Value *VaStartArg = Inst->getArgList(); + + Builder.SetInsertPoint(Inst); + + if (PassedByValue) { + // The general thing to do is create an alloca, store the va_list argument + // to it, then create a va_copy. When vaCopyIsMemcpy(), this optimises to a + // store to the VaStartArg. + assert(ABI->vaCopyIsMemcpy()); + Builder.CreateStore(PassedVaList, VaStartArg); + } else { + + // Otherwise emit a vacopy to pick up target-specific handling if any + auto &Ctx = Builder.getContext(); + + Builder.CreateIntrinsic(Intrinsic::vacopy, {DL.getAllocaPtrType(Ctx)}, + {VaStartArg, PassedVaList}); + } + + Inst->eraseFromParent(); + return true; +} + +bool ExpandVariadics::expandVAIntrinsicCall(IRBuilder<> &, const DataLayout &, + VAEndInst *Inst) { + assert(ABI->vaEndIsNop()); + Inst->eraseFromParent(); + return true; +} + +bool ExpandVariadics::expandVAIntrinsicCall(IRBuilder<> &Builder, + const DataLayout &DL, + VACopyInst *Inst) { + assert(ABI->vaCopyIsMemcpy()); + Builder.SetInsertPoint(Inst); + + auto &Ctx = Builder.getContext(); + Type *VaListTy = ABI->vaListType(Ctx); + uint64_t Size = DL.getTypeAllocSize(VaListTy).getFixedValue(); + + Builder.CreateMemCpy(Inst->getDest(), {}, Inst->getSrc(), {}, + Builder.getInt32(Size)); + + Inst->eraseFromParent(); + return true; +} + +struct Amdgpu final : public VariadicABIInfo { + + bool enableForTarget() override { return true; } + + bool vaListPassedInSSARegister() override { return true; } + + Type *vaListType(LLVMContext &Ctx) override { + return PointerType::getUnqual(Ctx); + } + + Type *vaListParameterType(Module &M) override { + return PointerType::getUnqual(M.getContext()); + } + + Value *initializeVaList(Module &M, LLVMContext &Ctx, IRBuilder<> &Builder, + AllocaInst * /*va_list*/, Value *Buffer) override { + // Given Buffer, which is an AllocInst of vararg_buffer + // need to return something usable as parameter type + return Builder.CreateAddrSpaceCast(Buffer, vaListParameterType(M)); + } + + VAArgSlotInfo slotInfo(const DataLayout &DL, Type *Parameter) override { + return {Align(4), false}; + } +}; + +struct Wasm final : public VariadicABIInfo { + + bool enableForTarget() override { + // Currently wasm is only used for testing. + return commandLineOverride(); + } + + bool vaListPassedInSSARegister() override { return true; } + + Type *vaListType(LLVMContext &Ctx) override { + return PointerType::getUnqual(Ctx); + } + + Type *vaListParameterType(Module &M) override { + return PointerType::getUnqual(M.getContext()); + } + + Value *initializeVaList(Module &M, LLVMContext &Ctx, IRBuilder<> &Builder, + AllocaInst * /*va_list*/, Value *Buffer) override { + return Buffer; + } + + VAArgSlotInfo slotInfo(const DataLayout &DL, Type *Parameter) override { + LLVMContext &Ctx = Parameter->getContext(); + const unsigned MinAlign = 4; + Align A = DL.getABITypeAlign(Parameter); + if (A < MinAlign) + A = Align(MinAlign); + + if (auto s = dyn_cast(Parameter)) { + if (s->getNumElements() > 1) { + return {DL.getABITypeAlign(PointerType::getUnqual(Ctx)), true}; + } + } + + return {A, false}; + } +}; + +std::unique_ptr VariadicABIInfo::create(const Triple &T) { + switch (T.getArch()) { + case Triple::r600: + case Triple::amdgcn: { + return std::make_unique(); + } + + case Triple::wasm32: { + return std::make_unique(); + } + + default: + return {}; + } +} + +} // namespace + +char ExpandVariadics::ID = 0; + +INITIALIZE_PASS(ExpandVariadics, DEBUG_TYPE, "Expand variadic functions", false, + false) + +ModulePass *llvm::createExpandVariadicsPass(ExpandVariadicsMode M) { + return new ExpandVariadics(M); +} + +PreservedAnalyses ExpandVariadicsPass::run(Module &M, ModuleAnalysisManager &) { + return ExpandVariadics(Mode).runOnModule(M) ? PreservedAnalyses::none() + : PreservedAnalyses::all(); +} + +ExpandVariadicsPass::ExpandVariadicsPass(ExpandVariadicsMode M) : Mode(M) {} diff --git a/llvm/test/CodeGen/AMDGPU/expand-variadic-call.ll b/llvm/test/CodeGen/AMDGPU/expand-variadic-call.ll new file mode 100644 index 0000000000000..ce55558dabaf1 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/expand-variadic-call.ll @@ -0,0 +1,545 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: -p --function-signature +; RUN: opt -S --passes=expand-variadics --expand-variadics-override=lowering < %s | FileCheck %s +; REQUIRES: amdgpu-registered-target +target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-p7:160:256:256:32-p8:128:128-p9:192:256:256:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7:8:9" +target triple = "amdgcn-amd-amdhsa" + +; Check the variables are lowered to the locations this target expects + +; The types show the call frames +; CHECK: %single_i32.vararg = type <{ i32 }> +; CHECK: %single_double.vararg = type <{ double }> +; CHECK: %single_v4f32.vararg = type <{ <4 x float> }> +; CHECK: %single_v8f32.vararg = type <{ <8 x float> }> +; CHECK: %single_v16f32.vararg = type <{ <16 x float> }> +; CHECK: %single_v32f32.vararg = type <{ <32 x float> }> +; CHECK: %i32_double.vararg = type <{ i32, double }> +; CHECK: %double_i32.vararg = type <{ double, i32 }> +; CHECK: %i32_libcS.vararg = type <{ i32, %struct.libcS }> +; CHECK: %libcS_i32.vararg = type <{ %struct.libcS, i32 }> +; CHECK: %i32_v4f32.vararg = type <{ i32, <4 x float> }> +; CHECK: %v4f32_i32.vararg = type <{ <4 x float>, i32 }> +; CHECK: %i32_v8f32.vararg = type <{ i32, <8 x float> }> +; CHECK: %v8f32_i32.vararg = type <{ <8 x float>, i32 }> +; CHECK: %i32_v16f32.vararg = type <{ i32, <16 x float> }> +; CHECK: %v16f32_i32.vararg = type <{ <16 x float>, i32 }> +; CHECK: %i32_v32f32.vararg = type <{ i32, <32 x float> }> +; CHECK: %v32f32_i32.vararg = type <{ <32 x float>, i32 }> +; CHECK: %fptr_single_i32.vararg = type <{ i32 }> +; CHECK: %fptr_libcS.vararg = type <{ %struct.libcS }> + +%struct.libcS = type { i8, i16, i32, i64, float, double } + +@vararg_ptr = hidden addrspace(1) global ptr @vararg, align 8 + +define hidden void @copy(ptr noundef %va) { +; CHECK-LABEL: define {{[^@]+}}@copy(ptr noundef %va) { +; CHECK-NEXT: entry: +; CHECK-NEXT: %va.addr = alloca ptr, align 8, addrspace(5) +; CHECK-NEXT: %cp = alloca ptr, align 8, addrspace(5) +; CHECK-NEXT: %va.addr.ascast = addrspacecast ptr addrspace(5) %va.addr to ptr +; CHECK-NEXT: %cp.ascast = addrspacecast ptr addrspace(5) %cp to ptr +; CHECK-NEXT: store ptr %va, ptr addrspace(5) %va.addr, align 8 +; CHECK-NEXT: call void @llvm.lifetime.start.p5(i64 8, ptr addrspace(5) %cp) +; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr %cp.ascast, ptr %va.addr.ascast, i32 8, i1 false) +; CHECK-NEXT: %0 = load ptr, ptr addrspace(5) %cp, align 8 +; CHECK-NEXT: call void @valist(ptr noundef %0) +; CHECK-NEXT: call void @llvm.lifetime.end.p5(i64 8, ptr addrspace(5) %cp) +; CHECK-NEXT: ret void +; +entry: + %va.addr = alloca ptr, align 8, addrspace(5) + %cp = alloca ptr, align 8, addrspace(5) + %va.addr.ascast = addrspacecast ptr addrspace(5) %va.addr to ptr + %cp.ascast = addrspacecast ptr addrspace(5) %cp to ptr + store ptr %va, ptr addrspace(5) %va.addr, align 8 + call void @llvm.lifetime.start.p5(i64 8, ptr addrspace(5) %cp) + call void @llvm.va_copy.p0(ptr %cp.ascast, ptr nonnull %va.addr.ascast) + %0 = load ptr, ptr addrspace(5) %cp, align 8 + call void @valist(ptr noundef %0) + call void @llvm.lifetime.end.p5(i64 8, ptr addrspace(5) %cp) + ret void +} + +declare void @llvm.lifetime.start.p5(i64 immarg, ptr addrspace(5) nocapture) + +declare void @llvm.va_copy.p0(ptr, ptr) + +declare hidden void @valist(ptr noundef) + +declare void @llvm.lifetime.end.p5(i64 immarg, ptr addrspace(5) nocapture) + +define hidden void @start_once(...) { +; CHECK-LABEL: define {{[^@]+}}@start_once(ptr %varargs) { +; CHECK-NEXT: entry: +; CHECK-NEXT: %s = alloca ptr, align 8, addrspace(5) +; CHECK-NEXT: %s.ascast = addrspacecast ptr addrspace(5) %s to ptr +; CHECK-NEXT: call void @llvm.lifetime.start.p5(i64 8, ptr addrspace(5) %s) +; CHECK-NEXT: store ptr %varargs, ptr %s.ascast, align 8 +; CHECK-NEXT: %0 = load ptr, ptr addrspace(5) %s, align 8 +; CHECK-NEXT: call void @valist(ptr noundef %0) +; CHECK-NEXT: call void @llvm.lifetime.end.p5(i64 8, ptr addrspace(5) %s) +; CHECK-NEXT: ret void +; +entry: + %s = alloca ptr, align 8, addrspace(5) + %s.ascast = addrspacecast ptr addrspace(5) %s to ptr + call void @llvm.lifetime.start.p5(i64 8, ptr addrspace(5) %s) + call void @llvm.va_start.p0(ptr %s.ascast) + %0 = load ptr, ptr addrspace(5) %s, align 8 + call void @valist(ptr noundef %0) + call void @llvm.va_end.p0(ptr %s.ascast) + call void @llvm.lifetime.end.p5(i64 8, ptr addrspace(5) %s) + ret void +} + +declare void @llvm.va_start.p0(ptr) + +declare void @llvm.va_end.p0(ptr) + +define hidden void @start_twice(...) { +; CHECK-LABEL: define {{[^@]+}}@start_twice(ptr %varargs) { +; CHECK-NEXT: entry: +; CHECK-NEXT: %s0 = alloca ptr, align 8, addrspace(5) +; CHECK-NEXT: %s1 = alloca ptr, align 8, addrspace(5) +; CHECK-NEXT: %s0.ascast = addrspacecast ptr addrspace(5) %s0 to ptr +; CHECK-NEXT: %s1.ascast = addrspacecast ptr addrspace(5) %s1 to ptr +; CHECK-NEXT: call void @llvm.lifetime.start.p5(i64 8, ptr addrspace(5) %s0) +; CHECK-NEXT: call void @llvm.lifetime.start.p5(i64 8, ptr addrspace(5) %s1) +; CHECK-NEXT: store ptr %varargs, ptr %s0.ascast, align 8 +; CHECK-NEXT: %0 = load ptr, ptr addrspace(5) %s0, align 8 +; CHECK-NEXT: call void @valist(ptr noundef %0) +; CHECK-NEXT: store ptr %varargs, ptr %s1.ascast, align 8 +; CHECK-NEXT: %1 = load ptr, ptr addrspace(5) %s1, align 8 +; CHECK-NEXT: call void @valist(ptr noundef %1) +; CHECK-NEXT: call void @llvm.lifetime.end.p5(i64 8, ptr addrspace(5) %s1) +; CHECK-NEXT: call void @llvm.lifetime.end.p5(i64 8, ptr addrspace(5) %s0) +; CHECK-NEXT: ret void +; +entry: + %s0 = alloca ptr, align 8, addrspace(5) + %s1 = alloca ptr, align 8, addrspace(5) + %s0.ascast = addrspacecast ptr addrspace(5) %s0 to ptr + %s1.ascast = addrspacecast ptr addrspace(5) %s1 to ptr + call void @llvm.lifetime.start.p5(i64 8, ptr addrspace(5) %s0) + call void @llvm.lifetime.start.p5(i64 8, ptr addrspace(5) %s1) + call void @llvm.va_start.p0(ptr %s0.ascast) + %0 = load ptr, ptr addrspace(5) %s0, align 8 + call void @valist(ptr noundef %0) + call void @llvm.va_end.p0(ptr %s0.ascast) + call void @llvm.va_start.p0(ptr %s1.ascast) + %1 = load ptr, ptr addrspace(5) %s1, align 8 + call void @valist(ptr noundef %1) + call void @llvm.va_end.p0(ptr %s1.ascast) + call void @llvm.lifetime.end.p5(i64 8, ptr addrspace(5) %s1) + call void @llvm.lifetime.end.p5(i64 8, ptr addrspace(5) %s0) + ret void +} + +define hidden void @single_i32(i32 noundef %x) { +; CHECK-LABEL: define {{[^@]+}}@single_i32(i32 noundef %x) { +; CHECK-NEXT: entry: +; CHECK-NEXT: %vararg_buffer = alloca %single_i32.vararg, align 4, addrspace(5) +; CHECK-NEXT: call void @llvm.lifetime.start.p5(i64 4, ptr addrspace(5) %vararg_buffer) +; CHECK-NEXT: %0 = getelementptr inbounds %single_i32.vararg, ptr addrspace(5) %vararg_buffer, i32 0, i32 0 +; CHECK-NEXT: store i32 %x, ptr addrspace(5) %0, align 4 +; CHECK-NEXT: %1 = addrspacecast ptr addrspace(5) %vararg_buffer to ptr +; CHECK-NEXT: call void @vararg(ptr %1) +; CHECK-NEXT: call void @llvm.lifetime.end.p5(i64 4, ptr addrspace(5) %vararg_buffer) +; CHECK-NEXT: ret void +; +entry: + tail call void (...) @vararg(i32 noundef %x) + ret void +} + +declare hidden void @vararg(...) + +define hidden void @single_double(double noundef %x) { +; CHECK-LABEL: define {{[^@]+}}@single_double(double noundef %x) { +; CHECK-NEXT: entry: +; CHECK-NEXT: %vararg_buffer = alloca %single_double.vararg, align 4, addrspace(5) +; CHECK-NEXT: call void @llvm.lifetime.start.p5(i64 8, ptr addrspace(5) %vararg_buffer) +; CHECK-NEXT: %0 = getelementptr inbounds %single_double.vararg, ptr addrspace(5) %vararg_buffer, i32 0, i32 0 +; CHECK-NEXT: store double %x, ptr addrspace(5) %0, align 8 +; CHECK-NEXT: %1 = addrspacecast ptr addrspace(5) %vararg_buffer to ptr +; CHECK-NEXT: call void @vararg(ptr %1) +; CHECK-NEXT: call void @llvm.lifetime.end.p5(i64 8, ptr addrspace(5) %vararg_buffer) +; CHECK-NEXT: ret void +; +entry: + tail call void (...) @vararg(double noundef %x) + ret void +} + +define hidden void @single_v4f32(<4 x float> noundef %x) { +; CHECK-LABEL: define {{[^@]+}}@single_v4f32(<4 x float> noundef %x) { +; CHECK-NEXT: entry: +; CHECK-NEXT: %vararg_buffer = alloca %single_v4f32.vararg, align 4, addrspace(5) +; CHECK-NEXT: call void @llvm.lifetime.start.p5(i64 16, ptr addrspace(5) %vararg_buffer) +; CHECK-NEXT: %0 = getelementptr inbounds %single_v4f32.vararg, ptr addrspace(5) %vararg_buffer, i32 0, i32 0 +; CHECK-NEXT: store <4 x float> %x, ptr addrspace(5) %0, align 16 +; CHECK-NEXT: %1 = addrspacecast ptr addrspace(5) %vararg_buffer to ptr +; CHECK-NEXT: call void @vararg(ptr %1) +; CHECK-NEXT: call void @llvm.lifetime.end.p5(i64 16, ptr addrspace(5) %vararg_buffer) +; CHECK-NEXT: ret void +; +entry: + tail call void (...) @vararg(<4 x float> noundef %x) + ret void +} + +define hidden void @single_v8f32(<8 x float> noundef %x) { +; CHECK-LABEL: define {{[^@]+}}@single_v8f32(<8 x float> noundef %x) { +; CHECK-NEXT: entry: +; CHECK-NEXT: %vararg_buffer = alloca %single_v8f32.vararg, align 4, addrspace(5) +; CHECK-NEXT: call void @llvm.lifetime.start.p5(i64 32, ptr addrspace(5) %vararg_buffer) +; CHECK-NEXT: %0 = getelementptr inbounds %single_v8f32.vararg, ptr addrspace(5) %vararg_buffer, i32 0, i32 0 +; CHECK-NEXT: store <8 x float> %x, ptr addrspace(5) %0, align 32 +; CHECK-NEXT: %1 = addrspacecast ptr addrspace(5) %vararg_buffer to ptr +; CHECK-NEXT: call void @vararg(ptr %1) +; CHECK-NEXT: call void @llvm.lifetime.end.p5(i64 32, ptr addrspace(5) %vararg_buffer) +; CHECK-NEXT: ret void +; +entry: + tail call void (...) @vararg(<8 x float> noundef %x) + ret void +} + +define hidden void @single_v16f32(<16 x float> noundef %x) { +; CHECK-LABEL: define {{[^@]+}}@single_v16f32(<16 x float> noundef %x) { +; CHECK-NEXT: entry: +; CHECK-NEXT: %vararg_buffer = alloca %single_v16f32.vararg, align 4, addrspace(5) +; CHECK-NEXT: call void @llvm.lifetime.start.p5(i64 64, ptr addrspace(5) %vararg_buffer) +; CHECK-NEXT: %0 = getelementptr inbounds %single_v16f32.vararg, ptr addrspace(5) %vararg_buffer, i32 0, i32 0 +; CHECK-NEXT: store <16 x float> %x, ptr addrspace(5) %0, align 64 +; CHECK-NEXT: %1 = addrspacecast ptr addrspace(5) %vararg_buffer to ptr +; CHECK-NEXT: call void @vararg(ptr %1) +; CHECK-NEXT: call void @llvm.lifetime.end.p5(i64 64, ptr addrspace(5) %vararg_buffer) +; CHECK-NEXT: ret void +; +entry: + tail call void (...) @vararg(<16 x float> noundef %x) + ret void +} + +define hidden void @single_v32f32(<32 x float> noundef %x) { +; CHECK-LABEL: define {{[^@]+}}@single_v32f32(<32 x float> noundef %x) { +; CHECK-NEXT: entry: +; CHECK-NEXT: %vararg_buffer = alloca %single_v32f32.vararg, align 4, addrspace(5) +; CHECK-NEXT: call void @llvm.lifetime.start.p5(i64 128, ptr addrspace(5) %vararg_buffer) +; CHECK-NEXT: %0 = getelementptr inbounds %single_v32f32.vararg, ptr addrspace(5) %vararg_buffer, i32 0, i32 0 +; CHECK-NEXT: store <32 x float> %x, ptr addrspace(5) %0, align 128 +; CHECK-NEXT: %1 = addrspacecast ptr addrspace(5) %vararg_buffer to ptr +; CHECK-NEXT: call void @vararg(ptr %1) +; CHECK-NEXT: call void @llvm.lifetime.end.p5(i64 128, ptr addrspace(5) %vararg_buffer) +; CHECK-NEXT: ret void +; +entry: + tail call void (...) @vararg(<32 x float> noundef %x) + ret void +} + +define hidden void @i32_double(i32 noundef %x, double noundef %y) { +; CHECK-LABEL: define {{[^@]+}}@i32_double(i32 noundef %x, double noundef %y) { +; CHECK-NEXT: entry: +; CHECK-NEXT: %vararg_buffer = alloca %i32_double.vararg, align 4, addrspace(5) +; CHECK-NEXT: call void @llvm.lifetime.start.p5(i64 12, ptr addrspace(5) %vararg_buffer) +; CHECK-NEXT: %0 = getelementptr inbounds %i32_double.vararg, ptr addrspace(5) %vararg_buffer, i32 0, i32 0 +; CHECK-NEXT: store i32 %x, ptr addrspace(5) %0, align 4 +; CHECK-NEXT: %1 = getelementptr inbounds %i32_double.vararg, ptr addrspace(5) %vararg_buffer, i32 0, i32 1 +; CHECK-NEXT: store double %y, ptr addrspace(5) %1, align 8 +; CHECK-NEXT: %2 = addrspacecast ptr addrspace(5) %vararg_buffer to ptr +; CHECK-NEXT: call void @vararg(ptr %2) +; CHECK-NEXT: call void @llvm.lifetime.end.p5(i64 12, ptr addrspace(5) %vararg_buffer) +; CHECK-NEXT: ret void +; +entry: + tail call void (...) @vararg(i32 noundef %x, double noundef %y) + ret void +} + +define hidden void @double_i32(double noundef %x, i32 noundef %y) { +; CHECK-LABEL: define {{[^@]+}}@double_i32(double noundef %x, i32 noundef %y) { +; CHECK-NEXT: entry: +; CHECK-NEXT: %vararg_buffer = alloca %double_i32.vararg, align 4, addrspace(5) +; CHECK-NEXT: call void @llvm.lifetime.start.p5(i64 12, ptr addrspace(5) %vararg_buffer) +; CHECK-NEXT: %0 = getelementptr inbounds %double_i32.vararg, ptr addrspace(5) %vararg_buffer, i32 0, i32 0 +; CHECK-NEXT: store double %x, ptr addrspace(5) %0, align 8 +; CHECK-NEXT: %1 = getelementptr inbounds %double_i32.vararg, ptr addrspace(5) %vararg_buffer, i32 0, i32 1 +; CHECK-NEXT: store i32 %y, ptr addrspace(5) %1, align 4 +; CHECK-NEXT: %2 = addrspacecast ptr addrspace(5) %vararg_buffer to ptr +; CHECK-NEXT: call void @vararg(ptr %2) +; CHECK-NEXT: call void @llvm.lifetime.end.p5(i64 12, ptr addrspace(5) %vararg_buffer) +; CHECK-NEXT: ret void +; +entry: + tail call void (...) @vararg(double noundef %x, i32 noundef %y) + ret void +} + +define hidden void @i32_libcS(i32 noundef %x, i8 %y.coerce0, i16 %y.coerce1, i32 %y.coerce2, i64 %y.coerce3, float %y.coerce4, double %y.coerce5) { +; CHECK-LABEL: define {{[^@]+}}@i32_libcS(i32 noundef %x, i8 %y.coerce0, i16 %y.coerce1, i32 %y.coerce2, i64 %y.coerce3, float %y.coerce4, double %y.coerce5) { +; CHECK-NEXT: entry: +; CHECK-NEXT: %vararg_buffer = alloca %i32_libcS.vararg, align 4, addrspace(5) +; CHECK-NEXT: %.fca.0.insert = insertvalue %struct.libcS poison, i8 %y.coerce0, 0 +; CHECK-NEXT: %.fca.1.insert = insertvalue %struct.libcS %.fca.0.insert, i16 %y.coerce1, 1 +; CHECK-NEXT: %.fca.2.insert = insertvalue %struct.libcS %.fca.1.insert, i32 %y.coerce2, 2 +; CHECK-NEXT: %.fca.3.insert = insertvalue %struct.libcS %.fca.2.insert, i64 %y.coerce3, 3 +; CHECK-NEXT: %.fca.4.insert = insertvalue %struct.libcS %.fca.3.insert, float %y.coerce4, 4 +; CHECK-NEXT: %.fca.5.insert = insertvalue %struct.libcS %.fca.4.insert, double %y.coerce5, 5 +; CHECK-NEXT: call void @llvm.lifetime.start.p5(i64 36, ptr addrspace(5) %vararg_buffer) +; CHECK-NEXT: %0 = getelementptr inbounds %i32_libcS.vararg, ptr addrspace(5) %vararg_buffer, i32 0, i32 0 +; CHECK-NEXT: store i32 %x, ptr addrspace(5) %0, align 4 +; CHECK-NEXT: %1 = getelementptr inbounds %i32_libcS.vararg, ptr addrspace(5) %vararg_buffer, i32 0, i32 1 +; CHECK-NEXT: store %struct.libcS %.fca.5.insert, ptr addrspace(5) %1, align 8 +; CHECK-NEXT: %2 = addrspacecast ptr addrspace(5) %vararg_buffer to ptr +; CHECK-NEXT: call void @vararg(ptr %2) +; CHECK-NEXT: call void @llvm.lifetime.end.p5(i64 36, ptr addrspace(5) %vararg_buffer) +; CHECK-NEXT: ret void +; +entry: + %.fca.0.insert = insertvalue %struct.libcS poison, i8 %y.coerce0, 0 + %.fca.1.insert = insertvalue %struct.libcS %.fca.0.insert, i16 %y.coerce1, 1 + %.fca.2.insert = insertvalue %struct.libcS %.fca.1.insert, i32 %y.coerce2, 2 + %.fca.3.insert = insertvalue %struct.libcS %.fca.2.insert, i64 %y.coerce3, 3 + %.fca.4.insert = insertvalue %struct.libcS %.fca.3.insert, float %y.coerce4, 4 + %.fca.5.insert = insertvalue %struct.libcS %.fca.4.insert, double %y.coerce5, 5 + tail call void (...) @vararg(i32 noundef %x, %struct.libcS %.fca.5.insert) + ret void +} + +define hidden void @libcS_i32(i8 %x.coerce0, i16 %x.coerce1, i32 %x.coerce2, i64 %x.coerce3, float %x.coerce4, double %x.coerce5, i32 noundef %y) { +; CHECK-LABEL: define {{[^@]+}}@libcS_i32(i8 %x.coerce0, i16 %x.coerce1, i32 %x.coerce2, i64 %x.coerce3, float %x.coerce4, double %x.coerce5, i32 noundef %y) { +; CHECK-NEXT: entry: +; CHECK-NEXT: %vararg_buffer = alloca %libcS_i32.vararg, align 4, addrspace(5) +; CHECK-NEXT: %.fca.0.insert = insertvalue %struct.libcS poison, i8 %x.coerce0, 0 +; CHECK-NEXT: %.fca.1.insert = insertvalue %struct.libcS %.fca.0.insert, i16 %x.coerce1, 1 +; CHECK-NEXT: %.fca.2.insert = insertvalue %struct.libcS %.fca.1.insert, i32 %x.coerce2, 2 +; CHECK-NEXT: %.fca.3.insert = insertvalue %struct.libcS %.fca.2.insert, i64 %x.coerce3, 3 +; CHECK-NEXT: %.fca.4.insert = insertvalue %struct.libcS %.fca.3.insert, float %x.coerce4, 4 +; CHECK-NEXT: %.fca.5.insert = insertvalue %struct.libcS %.fca.4.insert, double %x.coerce5, 5 +; CHECK-NEXT: call void @llvm.lifetime.start.p5(i64 36, ptr addrspace(5) %vararg_buffer) +; CHECK-NEXT: %0 = getelementptr inbounds %libcS_i32.vararg, ptr addrspace(5) %vararg_buffer, i32 0, i32 0 +; CHECK-NEXT: store %struct.libcS %.fca.5.insert, ptr addrspace(5) %0, align 8 +; CHECK-NEXT: %1 = getelementptr inbounds %libcS_i32.vararg, ptr addrspace(5) %vararg_buffer, i32 0, i32 1 +; CHECK-NEXT: store i32 %y, ptr addrspace(5) %1, align 4 +; CHECK-NEXT: %2 = addrspacecast ptr addrspace(5) %vararg_buffer to ptr +; CHECK-NEXT: call void @vararg(ptr %2) +; CHECK-NEXT: call void @llvm.lifetime.end.p5(i64 36, ptr addrspace(5) %vararg_buffer) +; CHECK-NEXT: ret void +; +entry: + %.fca.0.insert = insertvalue %struct.libcS poison, i8 %x.coerce0, 0 + %.fca.1.insert = insertvalue %struct.libcS %.fca.0.insert, i16 %x.coerce1, 1 + %.fca.2.insert = insertvalue %struct.libcS %.fca.1.insert, i32 %x.coerce2, 2 + %.fca.3.insert = insertvalue %struct.libcS %.fca.2.insert, i64 %x.coerce3, 3 + %.fca.4.insert = insertvalue %struct.libcS %.fca.3.insert, float %x.coerce4, 4 + %.fca.5.insert = insertvalue %struct.libcS %.fca.4.insert, double %x.coerce5, 5 + tail call void (...) @vararg(%struct.libcS %.fca.5.insert, i32 noundef %y) + ret void +} + +define hidden void @i32_v4f32(i32 noundef %x, <4 x float> noundef %y) { +; CHECK-LABEL: define {{[^@]+}}@i32_v4f32(i32 noundef %x, <4 x float> noundef %y) { +; CHECK-NEXT: entry: +; CHECK-NEXT: %vararg_buffer = alloca %i32_v4f32.vararg, align 4, addrspace(5) +; CHECK-NEXT: call void @llvm.lifetime.start.p5(i64 20, ptr addrspace(5) %vararg_buffer) +; CHECK-NEXT: %0 = getelementptr inbounds %i32_v4f32.vararg, ptr addrspace(5) %vararg_buffer, i32 0, i32 0 +; CHECK-NEXT: store i32 %x, ptr addrspace(5) %0, align 4 +; CHECK-NEXT: %1 = getelementptr inbounds %i32_v4f32.vararg, ptr addrspace(5) %vararg_buffer, i32 0, i32 1 +; CHECK-NEXT: store <4 x float> %y, ptr addrspace(5) %1, align 16 +; CHECK-NEXT: %2 = addrspacecast ptr addrspace(5) %vararg_buffer to ptr +; CHECK-NEXT: call void @vararg(ptr %2) +; CHECK-NEXT: call void @llvm.lifetime.end.p5(i64 20, ptr addrspace(5) %vararg_buffer) +; CHECK-NEXT: ret void +; +entry: + tail call void (...) @vararg(i32 noundef %x, <4 x float> noundef %y) + ret void +} + +define hidden void @v4f32_i32(<4 x float> noundef %x, i32 noundef %y) { +; CHECK-LABEL: define {{[^@]+}}@v4f32_i32(<4 x float> noundef %x, i32 noundef %y) { +; CHECK-NEXT: entry: +; CHECK-NEXT: %vararg_buffer = alloca %v4f32_i32.vararg, align 4, addrspace(5) +; CHECK-NEXT: call void @llvm.lifetime.start.p5(i64 20, ptr addrspace(5) %vararg_buffer) +; CHECK-NEXT: %0 = getelementptr inbounds %v4f32_i32.vararg, ptr addrspace(5) %vararg_buffer, i32 0, i32 0 +; CHECK-NEXT: store <4 x float> %x, ptr addrspace(5) %0, align 16 +; CHECK-NEXT: %1 = getelementptr inbounds %v4f32_i32.vararg, ptr addrspace(5) %vararg_buffer, i32 0, i32 1 +; CHECK-NEXT: store i32 %y, ptr addrspace(5) %1, align 4 +; CHECK-NEXT: %2 = addrspacecast ptr addrspace(5) %vararg_buffer to ptr +; CHECK-NEXT: call void @vararg(ptr %2) +; CHECK-NEXT: call void @llvm.lifetime.end.p5(i64 20, ptr addrspace(5) %vararg_buffer) +; CHECK-NEXT: ret void +; +entry: + tail call void (...) @vararg(<4 x float> noundef %x, i32 noundef %y) + ret void +} + +define hidden void @i32_v8f32(i32 noundef %x, <8 x float> noundef %y) { +; CHECK-LABEL: define {{[^@]+}}@i32_v8f32(i32 noundef %x, <8 x float> noundef %y) { +; CHECK-NEXT: entry: +; CHECK-NEXT: %vararg_buffer = alloca %i32_v8f32.vararg, align 4, addrspace(5) +; CHECK-NEXT: call void @llvm.lifetime.start.p5(i64 36, ptr addrspace(5) %vararg_buffer) +; CHECK-NEXT: %0 = getelementptr inbounds %i32_v8f32.vararg, ptr addrspace(5) %vararg_buffer, i32 0, i32 0 +; CHECK-NEXT: store i32 %x, ptr addrspace(5) %0, align 4 +; CHECK-NEXT: %1 = getelementptr inbounds %i32_v8f32.vararg, ptr addrspace(5) %vararg_buffer, i32 0, i32 1 +; CHECK-NEXT: store <8 x float> %y, ptr addrspace(5) %1, align 32 +; CHECK-NEXT: %2 = addrspacecast ptr addrspace(5) %vararg_buffer to ptr +; CHECK-NEXT: call void @vararg(ptr %2) +; CHECK-NEXT: call void @llvm.lifetime.end.p5(i64 36, ptr addrspace(5) %vararg_buffer) +; CHECK-NEXT: ret void +; +entry: + tail call void (...) @vararg(i32 noundef %x, <8 x float> noundef %y) + ret void +} + +define hidden void @v8f32_i32(<8 x float> noundef %x, i32 noundef %y) { +; CHECK-LABEL: define {{[^@]+}}@v8f32_i32(<8 x float> noundef %x, i32 noundef %y) { +; CHECK-NEXT: entry: +; CHECK-NEXT: %vararg_buffer = alloca %v8f32_i32.vararg, align 4, addrspace(5) +; CHECK-NEXT: call void @llvm.lifetime.start.p5(i64 36, ptr addrspace(5) %vararg_buffer) +; CHECK-NEXT: %0 = getelementptr inbounds %v8f32_i32.vararg, ptr addrspace(5) %vararg_buffer, i32 0, i32 0 +; CHECK-NEXT: store <8 x float> %x, ptr addrspace(5) %0, align 32 +; CHECK-NEXT: %1 = getelementptr inbounds %v8f32_i32.vararg, ptr addrspace(5) %vararg_buffer, i32 0, i32 1 +; CHECK-NEXT: store i32 %y, ptr addrspace(5) %1, align 4 +; CHECK-NEXT: %2 = addrspacecast ptr addrspace(5) %vararg_buffer to ptr +; CHECK-NEXT: call void @vararg(ptr %2) +; CHECK-NEXT: call void @llvm.lifetime.end.p5(i64 36, ptr addrspace(5) %vararg_buffer) +; CHECK-NEXT: ret void +; +entry: + tail call void (...) @vararg(<8 x float> noundef %x, i32 noundef %y) + ret void +} + +define hidden void @i32_v16f32(i32 noundef %x, <16 x float> noundef %y) { +; CHECK-LABEL: define {{[^@]+}}@i32_v16f32(i32 noundef %x, <16 x float> noundef %y) { +; CHECK-NEXT: entry: +; CHECK-NEXT: %vararg_buffer = alloca %i32_v16f32.vararg, align 4, addrspace(5) +; CHECK-NEXT: call void @llvm.lifetime.start.p5(i64 68, ptr addrspace(5) %vararg_buffer) +; CHECK-NEXT: %0 = getelementptr inbounds %i32_v16f32.vararg, ptr addrspace(5) %vararg_buffer, i32 0, i32 0 +; CHECK-NEXT: store i32 %x, ptr addrspace(5) %0, align 4 +; CHECK-NEXT: %1 = getelementptr inbounds %i32_v16f32.vararg, ptr addrspace(5) %vararg_buffer, i32 0, i32 1 +; CHECK-NEXT: store <16 x float> %y, ptr addrspace(5) %1, align 64 +; CHECK-NEXT: %2 = addrspacecast ptr addrspace(5) %vararg_buffer to ptr +; CHECK-NEXT: call void @vararg(ptr %2) +; CHECK-NEXT: call void @llvm.lifetime.end.p5(i64 68, ptr addrspace(5) %vararg_buffer) +; CHECK-NEXT: ret void +; +entry: + tail call void (...) @vararg(i32 noundef %x, <16 x float> noundef %y) + ret void +} + +define hidden void @v16f32_i32(<16 x float> noundef %x, i32 noundef %y) { +; CHECK-LABEL: define {{[^@]+}}@v16f32_i32(<16 x float> noundef %x, i32 noundef %y) { +; CHECK-NEXT: entry: +; CHECK-NEXT: %vararg_buffer = alloca %v16f32_i32.vararg, align 4, addrspace(5) +; CHECK-NEXT: call void @llvm.lifetime.start.p5(i64 68, ptr addrspace(5) %vararg_buffer) +; CHECK-NEXT: %0 = getelementptr inbounds %v16f32_i32.vararg, ptr addrspace(5) %vararg_buffer, i32 0, i32 0 +; CHECK-NEXT: store <16 x float> %x, ptr addrspace(5) %0, align 64 +; CHECK-NEXT: %1 = getelementptr inbounds %v16f32_i32.vararg, ptr addrspace(5) %vararg_buffer, i32 0, i32 1 +; CHECK-NEXT: store i32 %y, ptr addrspace(5) %1, align 4 +; CHECK-NEXT: %2 = addrspacecast ptr addrspace(5) %vararg_buffer to ptr +; CHECK-NEXT: call void @vararg(ptr %2) +; CHECK-NEXT: call void @llvm.lifetime.end.p5(i64 68, ptr addrspace(5) %vararg_buffer) +; CHECK-NEXT: ret void +; +entry: + tail call void (...) @vararg(<16 x float> noundef %x, i32 noundef %y) + ret void +} + +define hidden void @i32_v32f32(i32 noundef %x, <32 x float> noundef %y) { +; CHECK-LABEL: define {{[^@]+}}@i32_v32f32(i32 noundef %x, <32 x float> noundef %y) { +; CHECK-NEXT: entry: +; CHECK-NEXT: %vararg_buffer = alloca %i32_v32f32.vararg, align 4, addrspace(5) +; CHECK-NEXT: call void @llvm.lifetime.start.p5(i64 132, ptr addrspace(5) %vararg_buffer) +; CHECK-NEXT: %0 = getelementptr inbounds %i32_v32f32.vararg, ptr addrspace(5) %vararg_buffer, i32 0, i32 0 +; CHECK-NEXT: store i32 %x, ptr addrspace(5) %0, align 4 +; CHECK-NEXT: %1 = getelementptr inbounds %i32_v32f32.vararg, ptr addrspace(5) %vararg_buffer, i32 0, i32 1 +; CHECK-NEXT: store <32 x float> %y, ptr addrspace(5) %1, align 128 +; CHECK-NEXT: %2 = addrspacecast ptr addrspace(5) %vararg_buffer to ptr +; CHECK-NEXT: call void @vararg(ptr %2) +; CHECK-NEXT: call void @llvm.lifetime.end.p5(i64 132, ptr addrspace(5) %vararg_buffer) +; CHECK-NEXT: ret void +; +entry: + tail call void (...) @vararg(i32 noundef %x, <32 x float> noundef %y) + ret void +} + +define hidden void @v32f32_i32(<32 x float> noundef %x, i32 noundef %y) { +; CHECK-LABEL: define {{[^@]+}}@v32f32_i32(<32 x float> noundef %x, i32 noundef %y) { +; CHECK-NEXT: entry: +; CHECK-NEXT: %vararg_buffer = alloca %v32f32_i32.vararg, align 4, addrspace(5) +; CHECK-NEXT: call void @llvm.lifetime.start.p5(i64 132, ptr addrspace(5) %vararg_buffer) +; CHECK-NEXT: %0 = getelementptr inbounds %v32f32_i32.vararg, ptr addrspace(5) %vararg_buffer, i32 0, i32 0 +; CHECK-NEXT: store <32 x float> %x, ptr addrspace(5) %0, align 128 +; CHECK-NEXT: %1 = getelementptr inbounds %v32f32_i32.vararg, ptr addrspace(5) %vararg_buffer, i32 0, i32 1 +; CHECK-NEXT: store i32 %y, ptr addrspace(5) %1, align 4 +; CHECK-NEXT: %2 = addrspacecast ptr addrspace(5) %vararg_buffer to ptr +; CHECK-NEXT: call void @vararg(ptr %2) +; CHECK-NEXT: call void @llvm.lifetime.end.p5(i64 132, ptr addrspace(5) %vararg_buffer) +; CHECK-NEXT: ret void +; +entry: + tail call void (...) @vararg(<32 x float> noundef %x, i32 noundef %y) + ret void +} + +define hidden void @fptr_single_i32(i32 noundef %x) { +; CHECK-LABEL: define {{[^@]+}}@fptr_single_i32(i32 noundef %x) { +; CHECK-NEXT: entry: +; CHECK-NEXT: %vararg_buffer = alloca %fptr_single_i32.vararg, align 4, addrspace(5) +; CHECK-NEXT: %0 = load volatile ptr, ptr addrspacecast (ptr addrspace(1) @vararg_ptr to ptr), align 8 +; CHECK-NEXT: call void @llvm.lifetime.start.p5(i64 4, ptr addrspace(5) %vararg_buffer) +; CHECK-NEXT: %1 = getelementptr inbounds %fptr_single_i32.vararg, ptr addrspace(5) %vararg_buffer, i32 0, i32 0 +; CHECK-NEXT: store i32 %x, ptr addrspace(5) %1, align 4 +; CHECK-NEXT: %2 = addrspacecast ptr addrspace(5) %vararg_buffer to ptr +; CHECK-NEXT: call void %0(ptr %2) +; CHECK-NEXT: call void @llvm.lifetime.end.p5(i64 4, ptr addrspace(5) %vararg_buffer) +; CHECK-NEXT: ret void +; +entry: + %0 = load volatile ptr, ptr addrspacecast (ptr addrspace(1) @vararg_ptr to ptr), align 8 + tail call void (...) %0(i32 noundef %x) + ret void +} + +define hidden void @fptr_libcS(i8 %x.coerce0, i16 %x.coerce1, i32 %x.coerce2, i64 %x.coerce3, float %x.coerce4, double %x.coerce5) { +; CHECK-LABEL: define {{[^@]+}}@fptr_libcS(i8 %x.coerce0, i16 %x.coerce1, i32 %x.coerce2, i64 %x.coerce3, float %x.coerce4, double %x.coerce5) { +; CHECK-NEXT: entry: +; CHECK-NEXT: %vararg_buffer = alloca %fptr_libcS.vararg, align 4, addrspace(5) +; CHECK-NEXT: %0 = load volatile ptr, ptr addrspacecast (ptr addrspace(1) @vararg_ptr to ptr), align 8 +; CHECK-NEXT: %.fca.0.insert = insertvalue %struct.libcS poison, i8 %x.coerce0, 0 +; CHECK-NEXT: %.fca.1.insert = insertvalue %struct.libcS %.fca.0.insert, i16 %x.coerce1, 1 +; CHECK-NEXT: %.fca.2.insert = insertvalue %struct.libcS %.fca.1.insert, i32 %x.coerce2, 2 +; CHECK-NEXT: %.fca.3.insert = insertvalue %struct.libcS %.fca.2.insert, i64 %x.coerce3, 3 +; CHECK-NEXT: %.fca.4.insert = insertvalue %struct.libcS %.fca.3.insert, float %x.coerce4, 4 +; CHECK-NEXT: %.fca.5.insert = insertvalue %struct.libcS %.fca.4.insert, double %x.coerce5, 5 +; CHECK-NEXT: call void @llvm.lifetime.start.p5(i64 32, ptr addrspace(5) %vararg_buffer) +; CHECK-NEXT: %1 = getelementptr inbounds %fptr_libcS.vararg, ptr addrspace(5) %vararg_buffer, i32 0, i32 0 +; CHECK-NEXT: store %struct.libcS %.fca.5.insert, ptr addrspace(5) %1, align 8 +; CHECK-NEXT: %2 = addrspacecast ptr addrspace(5) %vararg_buffer to ptr +; CHECK-NEXT: call void %0(ptr %2) +; CHECK-NEXT: call void @llvm.lifetime.end.p5(i64 32, ptr addrspace(5) %vararg_buffer) +; CHECK-NEXT: ret void +; +entry: + %0 = load volatile ptr, ptr addrspacecast (ptr addrspace(1) @vararg_ptr to ptr), align 8 + %.fca.0.insert = insertvalue %struct.libcS poison, i8 %x.coerce0, 0 + %.fca.1.insert = insertvalue %struct.libcS %.fca.0.insert, i16 %x.coerce1, 1 + %.fca.2.insert = insertvalue %struct.libcS %.fca.1.insert, i32 %x.coerce2, 2 + %.fca.3.insert = insertvalue %struct.libcS %.fca.2.insert, i64 %x.coerce3, 3 + %.fca.4.insert = insertvalue %struct.libcS %.fca.3.insert, float %x.coerce4, 4 + %.fca.5.insert = insertvalue %struct.libcS %.fca.4.insert, double %x.coerce5, 5 + tail call void (...) %0(%struct.libcS %.fca.5.insert) + ret void +} + + diff --git a/llvm/test/CodeGen/AMDGPU/llc-pipeline.ll b/llvm/test/CodeGen/AMDGPU/llc-pipeline.ll index 0db88d1c095d3..08cf83fd2bd0f 100644 --- a/llvm/test/CodeGen/AMDGPU/llc-pipeline.ll +++ b/llvm/test/CodeGen/AMDGPU/llc-pipeline.ll @@ -31,6 +31,7 @@ ; GCN-O0-NEXT: AMDGPU Remove Incompatible Functions ; GCN-O0-NEXT: AMDGPU Printf lowering ; GCN-O0-NEXT: Lower ctors and dtors for AMDGPU +; GCN-O0-NEXT: Expand variadic functions ; GCN-O0-NEXT: AMDGPU Inline All Functions ; GCN-O0-NEXT: Inliner for always_inline functions ; GCN-O0-NEXT: FunctionPass Manager @@ -178,6 +179,7 @@ ; GCN-O1-NEXT: AMDGPU Remove Incompatible Functions ; GCN-O1-NEXT: AMDGPU Printf lowering ; GCN-O1-NEXT: Lower ctors and dtors for AMDGPU +; GCN-O1-NEXT: Expand variadic functions ; GCN-O1-NEXT: AMDGPU Inline All Functions ; GCN-O1-NEXT: Inliner for always_inline functions ; GCN-O1-NEXT: FunctionPass Manager @@ -454,6 +456,7 @@ ; GCN-O1-OPTS-NEXT: AMDGPU Remove Incompatible Functions ; GCN-O1-OPTS-NEXT: AMDGPU Printf lowering ; GCN-O1-OPTS-NEXT: Lower ctors and dtors for AMDGPU +; GCN-O1-OPTS-NEXT: Expand variadic functions ; GCN-O1-OPTS-NEXT: AMDGPU Inline All Functions ; GCN-O1-OPTS-NEXT: Inliner for always_inline functions ; GCN-O1-OPTS-NEXT: FunctionPass Manager @@ -760,6 +763,7 @@ ; GCN-O2-NEXT: Lower ctors and dtors for AMDGPU ; GCN-O2-NEXT: FunctionPass Manager ; GCN-O2-NEXT: AMDGPU Image Intrinsic Optimizer +; GCN-O2-NEXT: Expand variadic functions ; GCN-O2-NEXT: AMDGPU Inline All Functions ; GCN-O2-NEXT: Inliner for always_inline functions ; GCN-O2-NEXT: FunctionPass Manager @@ -1070,6 +1074,7 @@ ; GCN-O3-NEXT: Lower ctors and dtors for AMDGPU ; GCN-O3-NEXT: FunctionPass Manager ; GCN-O3-NEXT: AMDGPU Image Intrinsic Optimizer +; GCN-O3-NEXT: Expand variadic functions ; GCN-O3-NEXT: AMDGPU Inline All Functions ; GCN-O3-NEXT: Inliner for always_inline functions ; GCN-O3-NEXT: FunctionPass Manager diff --git a/llvm/test/CodeGen/AMDGPU/unsupported-calls.ll b/llvm/test/CodeGen/AMDGPU/unsupported-calls.ll index fc00937e6c8a2..721114ece56d1 100644 --- a/llvm/test/CodeGen/AMDGPU/unsupported-calls.ll +++ b/llvm/test/CodeGen/AMDGPU/unsupported-calls.ll @@ -43,25 +43,6 @@ define i32 @test_tail_call(ptr addrspace(1) %out, ptr addrspace(1) %in) { ret i32 %c } -declare void @external.varargs(i32, double, i64, ...) - -; GCN: error: :0:0: in function test_call_varargs void (): unsupported call to variadic function external.varargs -; R600: in function test_call_varargs{{.*}}: unsupported call to function external.varargs -define void @test_call_varargs() { - call void (i32, double, i64, ...) @external.varargs(i32 42, double 1.0, i64 12, i8 3, i16 1, i32 4, float 1.0, double 2.0) - ret void -} - -declare i32 @extern_variadic(...) - -; GCN: in function test_tail_call_bitcast_extern_variadic{{.*}}: unsupported required tail call to function extern_variadic -; R600: in function test_tail_call_bitcast_extern_variadic{{.*}}: unsupported call to function extern_variadic -define i32 @test_tail_call_bitcast_extern_variadic(<4 x float> %arg0, <4 x float> %arg1, i32 %arg2) { - %add = fadd <4 x float> %arg0, %arg1 - %call = tail call i32 @extern_variadic(<4 x float> %add) - ret i32 %call -} - ; R600: in function test_c_call{{.*}}: unsupported call to function defined_function define amdgpu_ps i32 @test_c_call_from_shader() { %call = call i32 @defined_function(i32 0) diff --git a/llvm/test/CodeGen/WebAssembly/expand-variadic-call.ll b/llvm/test/CodeGen/WebAssembly/expand-variadic-call.ll new file mode 100644 index 0000000000000..80f3db0e52e0c --- /dev/null +++ b/llvm/test/CodeGen/WebAssembly/expand-variadic-call.ll @@ -0,0 +1,484 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: -p --function-signature +; RUN: opt -S --passes=expand-variadics --expand-variadics-override=lowering < %s | FileCheck %s +; REQUIRES: webassembly-registered-target +target datalayout = "e-m:e-p:32:32-p10:8:8-p20:8:8-i64:64-n32:64-S128-ni:1:10:20" +target triple = "wasm32-unknown-unknown" + +; Check the variables are lowered to the locations this target expects + +; The types show the call frames +; CHECK: %single_i32.vararg = type <{ i32 }> +; CHECK: %single_double.vararg = type <{ double }> +; CHECK: %single_v4f32.vararg = type <{ <4 x float> }> +; CHECK: %single_v8f32.vararg = type <{ <8 x float> }> +; CHECK: %single_v16f32.vararg = type <{ <16 x float> }> +; CHECK: %single_v32f32.vararg = type <{ <32 x float> }> +; CHECK: %i32_double.vararg = type <{ i32, [4 x i8], double }> +; CHECK: %double_i32.vararg = type <{ double, i32 }> +; CHECK: %i32_libcS.vararg = type <{ i32, ptr }> +; CHECK: %libcS_i32.vararg = type <{ ptr, i32 }> +; CHECK: %i32_v4f32.vararg = type <{ i32, [12 x i8], <4 x float> }> +; CHECK: %v4f32_i32.vararg = type <{ <4 x float>, i32 }> +; CHECK: %i32_v8f32.vararg = type <{ i32, [28 x i8], <8 x float> }> +; CHECK: %v8f32_i32.vararg = type <{ <8 x float>, i32 }> +; CHECK: %i32_v16f32.vararg = type <{ i32, [60 x i8], <16 x float> }> +; CHECK: %v16f32_i32.vararg = type <{ <16 x float>, i32 }> +; CHECK: %i32_v32f32.vararg = type <{ i32, [124 x i8], <32 x float> }> +; CHECK: %v32f32_i32.vararg = type <{ <32 x float>, i32 }> +; CHECK: %fptr_single_i32.vararg = type <{ i32 }> +; CHECK: %fptr_libcS.vararg = type <{ ptr }> + +%struct.libcS = type { i8, i16, i32, i32, float, double } + +@vararg_ptr = hidden global ptr @vararg, align 4 + +define hidden void @copy(ptr noundef %va) { +; CHECK-LABEL: define {{[^@]+}}@copy(ptr noundef %va) { +; CHECK-NEXT: entry: +; CHECK-NEXT: %va.addr = alloca ptr, align 4 +; CHECK-NEXT: %cp = alloca ptr, align 4 +; CHECK-NEXT: store ptr %va, ptr %va.addr, align 4 +; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 4, ptr nonnull %cp) +; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr %cp, ptr %va.addr, i32 4, i1 false) +; CHECK-NEXT: %0 = load ptr, ptr %cp, align 4 +; CHECK-NEXT: call void @valist(ptr noundef %0) +; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 4, ptr nonnull %cp) +; CHECK-NEXT: ret void +; +entry: + %va.addr = alloca ptr, align 4 + %cp = alloca ptr, align 4 + store ptr %va, ptr %va.addr, align 4 + call void @llvm.lifetime.start.p0(i64 4, ptr nonnull %cp) + call void @llvm.va_copy.p0(ptr nonnull %cp, ptr nonnull %va.addr) + %0 = load ptr, ptr %cp, align 4 + call void @valist(ptr noundef %0) + call void @llvm.lifetime.end.p0(i64 4, ptr nonnull %cp) + ret void +} + +declare void @llvm.lifetime.start.p0(i64 immarg, ptr nocapture) + +declare void @llvm.va_copy.p0(ptr, ptr) + +declare void @valist(ptr noundef) + +declare void @llvm.lifetime.end.p0(i64 immarg, ptr nocapture) + +define hidden void @start_once(...) { +; CHECK-LABEL: define {{[^@]+}}@start_once(ptr %varargs) { +; CHECK-NEXT: entry: +; CHECK-NEXT: %s = alloca ptr, align 4 +; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 4, ptr nonnull %s) +; CHECK-NEXT: store ptr %varargs, ptr %s, align 4 +; CHECK-NEXT: %0 = load ptr, ptr %s, align 4 +; CHECK-NEXT: call void @valist(ptr noundef %0) +; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 4, ptr nonnull %s) +; CHECK-NEXT: ret void +; +entry: + %s = alloca ptr, align 4 + call void @llvm.lifetime.start.p0(i64 4, ptr nonnull %s) + call void @llvm.va_start.p0(ptr nonnull %s) + %0 = load ptr, ptr %s, align 4 + call void @valist(ptr noundef %0) + call void @llvm.va_end.p0(ptr %s) + call void @llvm.lifetime.end.p0(i64 4, ptr nonnull %s) + ret void +} + +declare void @llvm.va_start.p0(ptr) + +declare void @llvm.va_end.p0(ptr) + +define hidden void @start_twice(...) { +; CHECK-LABEL: define {{[^@]+}}@start_twice(ptr %varargs) { +; CHECK-NEXT: entry: +; CHECK-NEXT: %s0 = alloca ptr, align 4 +; CHECK-NEXT: %s1 = alloca ptr, align 4 +; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 4, ptr nonnull %s0) +; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 4, ptr nonnull %s1) +; CHECK-NEXT: store ptr %varargs, ptr %s0, align 4 +; CHECK-NEXT: %0 = load ptr, ptr %s0, align 4 +; CHECK-NEXT: call void @valist(ptr noundef %0) +; CHECK-NEXT: store ptr %varargs, ptr %s1, align 4 +; CHECK-NEXT: %1 = load ptr, ptr %s1, align 4 +; CHECK-NEXT: call void @valist(ptr noundef %1) +; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 4, ptr nonnull %s1) +; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 4, ptr nonnull %s0) +; CHECK-NEXT: ret void +; +entry: + %s0 = alloca ptr, align 4 + %s1 = alloca ptr, align 4 + call void @llvm.lifetime.start.p0(i64 4, ptr nonnull %s0) + call void @llvm.lifetime.start.p0(i64 4, ptr nonnull %s1) + call void @llvm.va_start.p0(ptr nonnull %s0) + %0 = load ptr, ptr %s0, align 4 + call void @valist(ptr noundef %0) + call void @llvm.va_end.p0(ptr %s0) + call void @llvm.va_start.p0(ptr nonnull %s1) + %1 = load ptr, ptr %s1, align 4 + call void @valist(ptr noundef %1) + call void @llvm.va_end.p0(ptr %s1) + call void @llvm.lifetime.end.p0(i64 4, ptr nonnull %s1) + call void @llvm.lifetime.end.p0(i64 4, ptr nonnull %s0) + ret void +} + +define hidden void @single_i32(i32 noundef %x) { +; CHECK-LABEL: define {{[^@]+}}@single_i32(i32 noundef %x) { +; CHECK-NEXT: entry: +; CHECK-NEXT: %vararg_buffer = alloca %single_i32.vararg, align 16 +; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 4, ptr %vararg_buffer) +; CHECK-NEXT: %0 = getelementptr inbounds %single_i32.vararg, ptr %vararg_buffer, i32 0, i32 0 +; CHECK-NEXT: store i32 %x, ptr %0, align 4 +; CHECK-NEXT: call void @vararg(ptr %vararg_buffer) +; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 4, ptr %vararg_buffer) +; CHECK-NEXT: ret void +; +entry: + tail call void (...) @vararg(i32 noundef %x) + ret void +} + +declare void @vararg(...) + +define hidden void @single_double(double noundef %x) { +; CHECK-LABEL: define {{[^@]+}}@single_double(double noundef %x) { +; CHECK-NEXT: entry: +; CHECK-NEXT: %vararg_buffer = alloca %single_double.vararg, align 16 +; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 8, ptr %vararg_buffer) +; CHECK-NEXT: %0 = getelementptr inbounds %single_double.vararg, ptr %vararg_buffer, i32 0, i32 0 +; CHECK-NEXT: store double %x, ptr %0, align 8 +; CHECK-NEXT: call void @vararg(ptr %vararg_buffer) +; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 8, ptr %vararg_buffer) +; CHECK-NEXT: ret void +; +entry: + tail call void (...) @vararg(double noundef %x) + ret void +} + +define hidden void @single_v4f32(<4 x float> noundef %x) { +; CHECK-LABEL: define {{[^@]+}}@single_v4f32(<4 x float> noundef %x) { +; CHECK-NEXT: entry: +; CHECK-NEXT: %vararg_buffer = alloca %single_v4f32.vararg, align 16 +; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 16, ptr %vararg_buffer) +; CHECK-NEXT: %0 = getelementptr inbounds %single_v4f32.vararg, ptr %vararg_buffer, i32 0, i32 0 +; CHECK-NEXT: store <4 x float> %x, ptr %0, align 16 +; CHECK-NEXT: call void @vararg(ptr %vararg_buffer) +; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 16, ptr %vararg_buffer) +; CHECK-NEXT: ret void +; +entry: + tail call void (...) @vararg(<4 x float> noundef %x) + ret void +} + +define hidden void @single_v8f32(<8 x float> noundef %x) { +; CHECK-LABEL: define {{[^@]+}}@single_v8f32(<8 x float> noundef %x) { +; CHECK-NEXT: entry: +; CHECK-NEXT: %vararg_buffer = alloca %single_v8f32.vararg, align 32 +; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 32, ptr %vararg_buffer) +; CHECK-NEXT: %0 = getelementptr inbounds %single_v8f32.vararg, ptr %vararg_buffer, i32 0, i32 0 +; CHECK-NEXT: store <8 x float> %x, ptr %0, align 32 +; CHECK-NEXT: call void @vararg(ptr %vararg_buffer) +; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 32, ptr %vararg_buffer) +; CHECK-NEXT: ret void +; +entry: + tail call void (...) @vararg(<8 x float> noundef %x) + ret void +} + +define hidden void @single_v16f32(<16 x float> noundef %x) { +; CHECK-LABEL: define {{[^@]+}}@single_v16f32(<16 x float> noundef %x) { +; CHECK-NEXT: entry: +; CHECK-NEXT: %vararg_buffer = alloca %single_v16f32.vararg, align 64 +; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 64, ptr %vararg_buffer) +; CHECK-NEXT: %0 = getelementptr inbounds %single_v16f32.vararg, ptr %vararg_buffer, i32 0, i32 0 +; CHECK-NEXT: store <16 x float> %x, ptr %0, align 64 +; CHECK-NEXT: call void @vararg(ptr %vararg_buffer) +; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 64, ptr %vararg_buffer) +; CHECK-NEXT: ret void +; +entry: + tail call void (...) @vararg(<16 x float> noundef %x) + ret void +} + +define hidden void @single_v32f32(<32 x float> noundef %x) { +; CHECK-LABEL: define {{[^@]+}}@single_v32f32(<32 x float> noundef %x) { +; CHECK-NEXT: entry: +; CHECK-NEXT: %vararg_buffer = alloca %single_v32f32.vararg, align 128 +; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 128, ptr %vararg_buffer) +; CHECK-NEXT: %0 = getelementptr inbounds %single_v32f32.vararg, ptr %vararg_buffer, i32 0, i32 0 +; CHECK-NEXT: store <32 x float> %x, ptr %0, align 128 +; CHECK-NEXT: call void @vararg(ptr %vararg_buffer) +; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 128, ptr %vararg_buffer) +; CHECK-NEXT: ret void +; +entry: + tail call void (...) @vararg(<32 x float> noundef %x) + ret void +} + +define hidden void @i32_double(i32 noundef %x, double noundef %y) { +; CHECK-LABEL: define {{[^@]+}}@i32_double(i32 noundef %x, double noundef %y) { +; CHECK-NEXT: entry: +; CHECK-NEXT: %vararg_buffer = alloca %i32_double.vararg, align 16 +; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 16, ptr %vararg_buffer) +; CHECK-NEXT: %0 = getelementptr inbounds %i32_double.vararg, ptr %vararg_buffer, i32 0, i32 0 +; CHECK-NEXT: store i32 %x, ptr %0, align 4 +; CHECK-NEXT: %1 = getelementptr inbounds %i32_double.vararg, ptr %vararg_buffer, i32 0, i32 2 +; CHECK-NEXT: store double %y, ptr %1, align 8 +; CHECK-NEXT: call void @vararg(ptr %vararg_buffer) +; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 16, ptr %vararg_buffer) +; CHECK-NEXT: ret void +; +entry: + tail call void (...) @vararg(i32 noundef %x, double noundef %y) + ret void +} + +define hidden void @double_i32(double noundef %x, i32 noundef %y) { +; CHECK-LABEL: define {{[^@]+}}@double_i32(double noundef %x, i32 noundef %y) { +; CHECK-NEXT: entry: +; CHECK-NEXT: %vararg_buffer = alloca %double_i32.vararg, align 16 +; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 12, ptr %vararg_buffer) +; CHECK-NEXT: %0 = getelementptr inbounds %double_i32.vararg, ptr %vararg_buffer, i32 0, i32 0 +; CHECK-NEXT: store double %x, ptr %0, align 8 +; CHECK-NEXT: %1 = getelementptr inbounds %double_i32.vararg, ptr %vararg_buffer, i32 0, i32 1 +; CHECK-NEXT: store i32 %y, ptr %1, align 4 +; CHECK-NEXT: call void @vararg(ptr %vararg_buffer) +; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 12, ptr %vararg_buffer) +; CHECK-NEXT: ret void +; +entry: + tail call void (...) @vararg(double noundef %x, i32 noundef %y) + ret void +} + +define hidden void @i32_libcS(i32 noundef %x, ptr noundef byval(%struct.libcS) align 8 %y) { +; CHECK-LABEL: define {{[^@]+}}@i32_libcS(i32 noundef %x, ptr noundef byval(%struct.libcS) align 8 %y) { +; CHECK-NEXT: entry: +; CHECK-NEXT: %IndirectAlloca = alloca %struct.libcS, align 8 +; CHECK-NEXT: %vararg_buffer = alloca %i32_libcS.vararg, align 16 +; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr %IndirectAlloca, ptr %y, i64 24, i1 false) +; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 8, ptr %vararg_buffer) +; CHECK-NEXT: %0 = getelementptr inbounds %i32_libcS.vararg, ptr %vararg_buffer, i32 0, i32 0 +; CHECK-NEXT: store i32 %x, ptr %0, align 4 +; CHECK-NEXT: %1 = getelementptr inbounds %i32_libcS.vararg, ptr %vararg_buffer, i32 0, i32 1 +; CHECK-NEXT: store ptr %IndirectAlloca, ptr %1, align 4 +; CHECK-NEXT: call void @vararg(ptr %vararg_buffer) +; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 8, ptr %vararg_buffer) +; CHECK-NEXT: ret void +; +entry: + tail call void (...) @vararg(i32 noundef %x, ptr noundef nonnull byval(%struct.libcS) align 8 %y) + ret void +} + +define hidden void @libcS_i32(ptr noundef byval(%struct.libcS) align 8 %x, i32 noundef %y) { +; CHECK-LABEL: define {{[^@]+}}@libcS_i32(ptr noundef byval(%struct.libcS) align 8 %x, i32 noundef %y) { +; CHECK-NEXT: entry: +; CHECK-NEXT: %IndirectAlloca = alloca %struct.libcS, align 8 +; CHECK-NEXT: %vararg_buffer = alloca %libcS_i32.vararg, align 16 +; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr %IndirectAlloca, ptr %x, i64 24, i1 false) +; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 8, ptr %vararg_buffer) +; CHECK-NEXT: %0 = getelementptr inbounds %libcS_i32.vararg, ptr %vararg_buffer, i32 0, i32 0 +; CHECK-NEXT: store ptr %IndirectAlloca, ptr %0, align 4 +; CHECK-NEXT: %1 = getelementptr inbounds %libcS_i32.vararg, ptr %vararg_buffer, i32 0, i32 1 +; CHECK-NEXT: store i32 %y, ptr %1, align 4 +; CHECK-NEXT: call void @vararg(ptr %vararg_buffer) +; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 8, ptr %vararg_buffer) +; CHECK-NEXT: ret void +; +entry: + tail call void (...) @vararg(ptr noundef nonnull byval(%struct.libcS) align 8 %x, i32 noundef %y) + ret void +} + +define hidden void @i32_v4f32(i32 noundef %x, <4 x float> noundef %y) { +; CHECK-LABEL: define {{[^@]+}}@i32_v4f32(i32 noundef %x, <4 x float> noundef %y) { +; CHECK-NEXT: entry: +; CHECK-NEXT: %vararg_buffer = alloca %i32_v4f32.vararg, align 16 +; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 32, ptr %vararg_buffer) +; CHECK-NEXT: %0 = getelementptr inbounds %i32_v4f32.vararg, ptr %vararg_buffer, i32 0, i32 0 +; CHECK-NEXT: store i32 %x, ptr %0, align 4 +; CHECK-NEXT: %1 = getelementptr inbounds %i32_v4f32.vararg, ptr %vararg_buffer, i32 0, i32 2 +; CHECK-NEXT: store <4 x float> %y, ptr %1, align 16 +; CHECK-NEXT: call void @vararg(ptr %vararg_buffer) +; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 32, ptr %vararg_buffer) +; CHECK-NEXT: ret void +; +entry: + tail call void (...) @vararg(i32 noundef %x, <4 x float> noundef %y) + ret void +} + +define hidden void @v4f32_i32(<4 x float> noundef %x, i32 noundef %y) { +; CHECK-LABEL: define {{[^@]+}}@v4f32_i32(<4 x float> noundef %x, i32 noundef %y) { +; CHECK-NEXT: entry: +; CHECK-NEXT: %vararg_buffer = alloca %v4f32_i32.vararg, align 16 +; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 20, ptr %vararg_buffer) +; CHECK-NEXT: %0 = getelementptr inbounds %v4f32_i32.vararg, ptr %vararg_buffer, i32 0, i32 0 +; CHECK-NEXT: store <4 x float> %x, ptr %0, align 16 +; CHECK-NEXT: %1 = getelementptr inbounds %v4f32_i32.vararg, ptr %vararg_buffer, i32 0, i32 1 +; CHECK-NEXT: store i32 %y, ptr %1, align 4 +; CHECK-NEXT: call void @vararg(ptr %vararg_buffer) +; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 20, ptr %vararg_buffer) +; CHECK-NEXT: ret void +; +entry: + tail call void (...) @vararg(<4 x float> noundef %x, i32 noundef %y) + ret void +} + +define hidden void @i32_v8f32(i32 noundef %x, <8 x float> noundef %y) { +; CHECK-LABEL: define {{[^@]+}}@i32_v8f32(i32 noundef %x, <8 x float> noundef %y) { +; CHECK-NEXT: entry: +; CHECK-NEXT: %vararg_buffer = alloca %i32_v8f32.vararg, align 32 +; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 64, ptr %vararg_buffer) +; CHECK-NEXT: %0 = getelementptr inbounds %i32_v8f32.vararg, ptr %vararg_buffer, i32 0, i32 0 +; CHECK-NEXT: store i32 %x, ptr %0, align 4 +; CHECK-NEXT: %1 = getelementptr inbounds %i32_v8f32.vararg, ptr %vararg_buffer, i32 0, i32 2 +; CHECK-NEXT: store <8 x float> %y, ptr %1, align 32 +; CHECK-NEXT: call void @vararg(ptr %vararg_buffer) +; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 64, ptr %vararg_buffer) +; CHECK-NEXT: ret void +; +entry: + tail call void (...) @vararg(i32 noundef %x, <8 x float> noundef %y) + ret void +} + +define hidden void @v8f32_i32(<8 x float> noundef %x, i32 noundef %y) { +; CHECK-LABEL: define {{[^@]+}}@v8f32_i32(<8 x float> noundef %x, i32 noundef %y) { +; CHECK-NEXT: entry: +; CHECK-NEXT: %vararg_buffer = alloca %v8f32_i32.vararg, align 32 +; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 36, ptr %vararg_buffer) +; CHECK-NEXT: %0 = getelementptr inbounds %v8f32_i32.vararg, ptr %vararg_buffer, i32 0, i32 0 +; CHECK-NEXT: store <8 x float> %x, ptr %0, align 32 +; CHECK-NEXT: %1 = getelementptr inbounds %v8f32_i32.vararg, ptr %vararg_buffer, i32 0, i32 1 +; CHECK-NEXT: store i32 %y, ptr %1, align 4 +; CHECK-NEXT: call void @vararg(ptr %vararg_buffer) +; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 36, ptr %vararg_buffer) +; CHECK-NEXT: ret void +; +entry: + tail call void (...) @vararg(<8 x float> noundef %x, i32 noundef %y) + ret void +} + +define hidden void @i32_v16f32(i32 noundef %x, <16 x float> noundef %y) { +; CHECK-LABEL: define {{[^@]+}}@i32_v16f32(i32 noundef %x, <16 x float> noundef %y) { +; CHECK-NEXT: entry: +; CHECK-NEXT: %vararg_buffer = alloca %i32_v16f32.vararg, align 64 +; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 128, ptr %vararg_buffer) +; CHECK-NEXT: %0 = getelementptr inbounds %i32_v16f32.vararg, ptr %vararg_buffer, i32 0, i32 0 +; CHECK-NEXT: store i32 %x, ptr %0, align 4 +; CHECK-NEXT: %1 = getelementptr inbounds %i32_v16f32.vararg, ptr %vararg_buffer, i32 0, i32 2 +; CHECK-NEXT: store <16 x float> %y, ptr %1, align 64 +; CHECK-NEXT: call void @vararg(ptr %vararg_buffer) +; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 128, ptr %vararg_buffer) +; CHECK-NEXT: ret void +; +entry: + tail call void (...) @vararg(i32 noundef %x, <16 x float> noundef %y) + ret void +} + +define hidden void @v16f32_i32(<16 x float> noundef %x, i32 noundef %y) { +; CHECK-LABEL: define {{[^@]+}}@v16f32_i32(<16 x float> noundef %x, i32 noundef %y) { +; CHECK-NEXT: entry: +; CHECK-NEXT: %vararg_buffer = alloca %v16f32_i32.vararg, align 64 +; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 68, ptr %vararg_buffer) +; CHECK-NEXT: %0 = getelementptr inbounds %v16f32_i32.vararg, ptr %vararg_buffer, i32 0, i32 0 +; CHECK-NEXT: store <16 x float> %x, ptr %0, align 64 +; CHECK-NEXT: %1 = getelementptr inbounds %v16f32_i32.vararg, ptr %vararg_buffer, i32 0, i32 1 +; CHECK-NEXT: store i32 %y, ptr %1, align 4 +; CHECK-NEXT: call void @vararg(ptr %vararg_buffer) +; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 68, ptr %vararg_buffer) +; CHECK-NEXT: ret void +; +entry: + tail call void (...) @vararg(<16 x float> noundef %x, i32 noundef %y) + ret void +} + +define hidden void @i32_v32f32(i32 noundef %x, <32 x float> noundef %y) { +; CHECK-LABEL: define {{[^@]+}}@i32_v32f32(i32 noundef %x, <32 x float> noundef %y) { +; CHECK-NEXT: entry: +; CHECK-NEXT: %vararg_buffer = alloca %i32_v32f32.vararg, align 128 +; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 256, ptr %vararg_buffer) +; CHECK-NEXT: %0 = getelementptr inbounds %i32_v32f32.vararg, ptr %vararg_buffer, i32 0, i32 0 +; CHECK-NEXT: store i32 %x, ptr %0, align 4 +; CHECK-NEXT: %1 = getelementptr inbounds %i32_v32f32.vararg, ptr %vararg_buffer, i32 0, i32 2 +; CHECK-NEXT: store <32 x float> %y, ptr %1, align 128 +; CHECK-NEXT: call void @vararg(ptr %vararg_buffer) +; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 256, ptr %vararg_buffer) +; CHECK-NEXT: ret void +; +entry: + tail call void (...) @vararg(i32 noundef %x, <32 x float> noundef %y) + ret void +} + +define hidden void @v32f32_i32(<32 x float> noundef %x, i32 noundef %y) { +; CHECK-LABEL: define {{[^@]+}}@v32f32_i32(<32 x float> noundef %x, i32 noundef %y) { +; CHECK-NEXT: entry: +; CHECK-NEXT: %vararg_buffer = alloca %v32f32_i32.vararg, align 128 +; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 132, ptr %vararg_buffer) +; CHECK-NEXT: %0 = getelementptr inbounds %v32f32_i32.vararg, ptr %vararg_buffer, i32 0, i32 0 +; CHECK-NEXT: store <32 x float> %x, ptr %0, align 128 +; CHECK-NEXT: %1 = getelementptr inbounds %v32f32_i32.vararg, ptr %vararg_buffer, i32 0, i32 1 +; CHECK-NEXT: store i32 %y, ptr %1, align 4 +; CHECK-NEXT: call void @vararg(ptr %vararg_buffer) +; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 132, ptr %vararg_buffer) +; CHECK-NEXT: ret void +; +entry: + tail call void (...) @vararg(<32 x float> noundef %x, i32 noundef %y) + ret void +} + +define hidden void @fptr_single_i32(i32 noundef %x) { +; CHECK-LABEL: define {{[^@]+}}@fptr_single_i32(i32 noundef %x) { +; CHECK-NEXT: entry: +; CHECK-NEXT: %vararg_buffer = alloca %fptr_single_i32.vararg, align 16 +; CHECK-NEXT: %0 = load volatile ptr, ptr @vararg_ptr, align 4 +; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 4, ptr %vararg_buffer) +; CHECK-NEXT: %1 = getelementptr inbounds %fptr_single_i32.vararg, ptr %vararg_buffer, i32 0, i32 0 +; CHECK-NEXT: store i32 %x, ptr %1, align 4 +; CHECK-NEXT: call void %0(ptr %vararg_buffer) +; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 4, ptr %vararg_buffer) +; CHECK-NEXT: ret void +; +entry: + %0 = load volatile ptr, ptr @vararg_ptr, align 4 + tail call void (...) %0(i32 noundef %x) + ret void +} + +define hidden void @fptr_libcS(ptr noundef byval(%struct.libcS) align 8 %x) { +; CHECK-LABEL: define {{[^@]+}}@fptr_libcS(ptr noundef byval(%struct.libcS) align 8 %x) { +; CHECK-NEXT: entry: +; CHECK-NEXT: %IndirectAlloca = alloca %struct.libcS, align 8 +; CHECK-NEXT: %vararg_buffer = alloca %fptr_libcS.vararg, align 16 +; CHECK-NEXT: %0 = load volatile ptr, ptr @vararg_ptr, align 4 +; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr %IndirectAlloca, ptr %x, i64 24, i1 false) +; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 4, ptr %vararg_buffer) +; CHECK-NEXT: %1 = getelementptr inbounds %fptr_libcS.vararg, ptr %vararg_buffer, i32 0, i32 0 +; CHECK-NEXT: store ptr %IndirectAlloca, ptr %1, align 4 +; CHECK-NEXT: call void %0(ptr %vararg_buffer) +; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 4, ptr %vararg_buffer) +; CHECK-NEXT: ret void +; +entry: + %0 = load volatile ptr, ptr @vararg_ptr, align 4 + tail call void (...) %0(ptr noundef nonnull byval(%struct.libcS) align 8 %x) + ret void +} + diff --git a/llvm/test/CodeGen/WebAssembly/vararg-frame.ll b/llvm/test/CodeGen/WebAssembly/vararg-frame.ll new file mode 100644 index 0000000000000..5c76040325cc9 --- /dev/null +++ b/llvm/test/CodeGen/WebAssembly/vararg-frame.ll @@ -0,0 +1,526 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -verify-machineinstrs | FileCheck %s +; REQUIRES: webassembly-registered-target +target datalayout = "e-m:e-p:32:32-p10:8:8-p20:8:8-i64:64-n32:64-S128-ni:1:10:20" +target triple = "wasm32-unknown-unknown" + +; Function Attrs: nounwind +define void @pass_s0() { +; CHECK-LABEL: pass_s0: +; CHECK: .functype pass_s0 () -> () +; CHECK-NEXT: # %bb.0: # %entry +; CHECK-NEXT: i32.const 0 +; CHECK-NEXT: call sink +; CHECK-NEXT: # fallthrough-return +entry: + tail call void (...) @sink() + ret void +} + +declare void @sink(...) + +; Function Attrs: nounwind +define void @pass_s1(i8 %x.coerce) { +; CHECK-LABEL: pass_s1: +; CHECK: .functype pass_s1 (i32) -> () +; CHECK-NEXT: .local i32 +; CHECK-NEXT: # %bb.0: # %entry +; CHECK-NEXT: global.get __stack_pointer +; CHECK-NEXT: i32.const 16 +; CHECK-NEXT: i32.sub +; CHECK-NEXT: local.tee 1 +; CHECK-NEXT: global.set __stack_pointer +; CHECK-NEXT: local.get 1 +; CHECK-NEXT: local.get 0 +; CHECK-NEXT: i32.store 0 +; CHECK-NEXT: local.get 1 +; CHECK-NEXT: call sink +; CHECK-NEXT: local.get 1 +; CHECK-NEXT: i32.const 16 +; CHECK-NEXT: i32.add +; CHECK-NEXT: global.set __stack_pointer +; CHECK-NEXT: # fallthrough-return +entry: + tail call void (...) @sink(i8 %x.coerce) + ret void +} + +; Function Attrs: nounwind +define void @pass_s2(i16 %x.coerce) { +; CHECK-LABEL: pass_s2: +; CHECK: .functype pass_s2 (i32) -> () +; CHECK-NEXT: .local i32 +; CHECK-NEXT: # %bb.0: # %entry +; CHECK-NEXT: global.get __stack_pointer +; CHECK-NEXT: i32.const 16 +; CHECK-NEXT: i32.sub +; CHECK-NEXT: local.tee 1 +; CHECK-NEXT: global.set __stack_pointer +; CHECK-NEXT: local.get 1 +; CHECK-NEXT: local.get 0 +; CHECK-NEXT: i32.store 0 +; CHECK-NEXT: local.get 1 +; CHECK-NEXT: call sink +; CHECK-NEXT: local.get 1 +; CHECK-NEXT: i32.const 16 +; CHECK-NEXT: i32.add +; CHECK-NEXT: global.set __stack_pointer +; CHECK-NEXT: # fallthrough-return +entry: + tail call void (...) @sink(i16 %x.coerce) + ret void +} + +; Function Attrs: nounwind +define void @pass_s3(i32 %x.coerce) { +; CHECK-LABEL: pass_s3: +; CHECK: .functype pass_s3 (i32) -> () +; CHECK-NEXT: .local i32 +; CHECK-NEXT: # %bb.0: # %entry +; CHECK-NEXT: global.get __stack_pointer +; CHECK-NEXT: i32.const 16 +; CHECK-NEXT: i32.sub +; CHECK-NEXT: local.tee 1 +; CHECK-NEXT: global.set __stack_pointer +; CHECK-NEXT: local.get 1 +; CHECK-NEXT: local.get 0 +; CHECK-NEXT: i32.store 0 +; CHECK-NEXT: local.get 1 +; CHECK-NEXT: call sink +; CHECK-NEXT: local.get 1 +; CHECK-NEXT: i32.const 16 +; CHECK-NEXT: i32.add +; CHECK-NEXT: global.set __stack_pointer +; CHECK-NEXT: # fallthrough-return +entry: + tail call void (...) @sink(i32 %x.coerce) + ret void +} + +; Function Attrs: nounwind +define void @pass_s4(i64 %x.coerce) { +; CHECK-LABEL: pass_s4: +; CHECK: .functype pass_s4 (i64) -> () +; CHECK-NEXT: .local i32 +; CHECK-NEXT: # %bb.0: # %entry +; CHECK-NEXT: global.get __stack_pointer +; CHECK-NEXT: i32.const 16 +; CHECK-NEXT: i32.sub +; CHECK-NEXT: local.tee 1 +; CHECK-NEXT: global.set __stack_pointer +; CHECK-NEXT: local.get 1 +; CHECK-NEXT: local.get 0 +; CHECK-NEXT: i64.store 0 +; CHECK-NEXT: local.get 1 +; CHECK-NEXT: call sink +; CHECK-NEXT: local.get 1 +; CHECK-NEXT: i32.const 16 +; CHECK-NEXT: i32.add +; CHECK-NEXT: global.set __stack_pointer +; CHECK-NEXT: # fallthrough-return +entry: + tail call void (...) @sink(i64 %x.coerce) + ret void +} + +; Function Attrs: nounwind +define void @pass_s5(<4 x i32> noundef %x) { +; CHECK-LABEL: pass_s5: +; CHECK: .functype pass_s5 (i32, i32, i32, i32) -> () +; CHECK-NEXT: .local i32 +; CHECK-NEXT: # %bb.0: # %entry +; CHECK-NEXT: global.get __stack_pointer +; CHECK-NEXT: i32.const 16 +; CHECK-NEXT: i32.sub +; CHECK-NEXT: local.tee 4 +; CHECK-NEXT: global.set __stack_pointer +; CHECK-NEXT: local.get 4 +; CHECK-NEXT: local.get 3 +; CHECK-NEXT: i32.store 12 +; CHECK-NEXT: local.get 4 +; CHECK-NEXT: local.get 2 +; CHECK-NEXT: i32.store 8 +; CHECK-NEXT: local.get 4 +; CHECK-NEXT: local.get 1 +; CHECK-NEXT: i32.store 4 +; CHECK-NEXT: local.get 4 +; CHECK-NEXT: local.get 0 +; CHECK-NEXT: i32.store 0 +; CHECK-NEXT: local.get 4 +; CHECK-NEXT: call sink +; CHECK-NEXT: local.get 4 +; CHECK-NEXT: i32.const 16 +; CHECK-NEXT: i32.add +; CHECK-NEXT: global.set __stack_pointer +; CHECK-NEXT: # fallthrough-return +entry: + tail call void (...) @sink(<4 x i32> noundef %x) + ret void +} + +; Function Attrs: nounwind +define void @pass_int_s0(i32 noundef %i) { +; CHECK-LABEL: pass_int_s0: +; CHECK: .functype pass_int_s0 (i32) -> () +; CHECK-NEXT: .local i32 +; CHECK-NEXT: # %bb.0: # %entry +; CHECK-NEXT: global.get __stack_pointer +; CHECK-NEXT: i32.const 16 +; CHECK-NEXT: i32.sub +; CHECK-NEXT: local.tee 1 +; CHECK-NEXT: global.set __stack_pointer +; CHECK-NEXT: local.get 1 +; CHECK-NEXT: local.get 0 +; CHECK-NEXT: i32.store 0 +; CHECK-NEXT: local.get 1 +; CHECK-NEXT: call sink +; CHECK-NEXT: local.get 1 +; CHECK-NEXT: i32.const 16 +; CHECK-NEXT: i32.add +; CHECK-NEXT: global.set __stack_pointer +; CHECK-NEXT: # fallthrough-return +entry: + tail call void (...) @sink(i32 noundef %i) + ret void +} + +; Function Attrs: nounwind +define void @pass_int_s1(i32 noundef %i, i8 %x.coerce) { +; CHECK-LABEL: pass_int_s1: +; CHECK: .functype pass_int_s1 (i32, i32) -> () +; CHECK-NEXT: .local i32 +; CHECK-NEXT: # %bb.0: # %entry +; CHECK-NEXT: global.get __stack_pointer +; CHECK-NEXT: i32.const 16 +; CHECK-NEXT: i32.sub +; CHECK-NEXT: local.tee 2 +; CHECK-NEXT: global.set __stack_pointer +; CHECK-NEXT: local.get 2 +; CHECK-NEXT: local.get 1 +; CHECK-NEXT: i32.store 4 +; CHECK-NEXT: local.get 2 +; CHECK-NEXT: local.get 0 +; CHECK-NEXT: i32.store 0 +; CHECK-NEXT: local.get 2 +; CHECK-NEXT: call sink +; CHECK-NEXT: local.get 2 +; CHECK-NEXT: i32.const 16 +; CHECK-NEXT: i32.add +; CHECK-NEXT: global.set __stack_pointer +; CHECK-NEXT: # fallthrough-return +entry: + tail call void (...) @sink(i32 noundef %i, i8 %x.coerce) + ret void +} + +; Function Attrs: nounwind +define void @pass_int_s2(i32 noundef %i, i16 %x.coerce) { +; CHECK-LABEL: pass_int_s2: +; CHECK: .functype pass_int_s2 (i32, i32) -> () +; CHECK-NEXT: .local i32 +; CHECK-NEXT: # %bb.0: # %entry +; CHECK-NEXT: global.get __stack_pointer +; CHECK-NEXT: i32.const 16 +; CHECK-NEXT: i32.sub +; CHECK-NEXT: local.tee 2 +; CHECK-NEXT: global.set __stack_pointer +; CHECK-NEXT: local.get 2 +; CHECK-NEXT: local.get 1 +; CHECK-NEXT: i32.store 4 +; CHECK-NEXT: local.get 2 +; CHECK-NEXT: local.get 0 +; CHECK-NEXT: i32.store 0 +; CHECK-NEXT: local.get 2 +; CHECK-NEXT: call sink +; CHECK-NEXT: local.get 2 +; CHECK-NEXT: i32.const 16 +; CHECK-NEXT: i32.add +; CHECK-NEXT: global.set __stack_pointer +; CHECK-NEXT: # fallthrough-return +entry: + tail call void (...) @sink(i32 noundef %i, i16 %x.coerce) + ret void +} + +; Function Attrs: nounwind +define void @pass_int_s3(i32 noundef %i, i32 %x.coerce) { +; CHECK-LABEL: pass_int_s3: +; CHECK: .functype pass_int_s3 (i32, i32) -> () +; CHECK-NEXT: .local i32 +; CHECK-NEXT: # %bb.0: # %entry +; CHECK-NEXT: global.get __stack_pointer +; CHECK-NEXT: i32.const 16 +; CHECK-NEXT: i32.sub +; CHECK-NEXT: local.tee 2 +; CHECK-NEXT: global.set __stack_pointer +; CHECK-NEXT: local.get 2 +; CHECK-NEXT: local.get 1 +; CHECK-NEXT: i32.store 4 +; CHECK-NEXT: local.get 2 +; CHECK-NEXT: local.get 0 +; CHECK-NEXT: i32.store 0 +; CHECK-NEXT: local.get 2 +; CHECK-NEXT: call sink +; CHECK-NEXT: local.get 2 +; CHECK-NEXT: i32.const 16 +; CHECK-NEXT: i32.add +; CHECK-NEXT: global.set __stack_pointer +; CHECK-NEXT: # fallthrough-return +entry: + tail call void (...) @sink(i32 noundef %i, i32 %x.coerce) + ret void +} + +; Function Attrs: nounwind +define void @pass_int_s4(i32 noundef %i, i64 %x.coerce) { +; CHECK-LABEL: pass_int_s4: +; CHECK: .functype pass_int_s4 (i32, i64) -> () +; CHECK-NEXT: .local i32 +; CHECK-NEXT: # %bb.0: # %entry +; CHECK-NEXT: global.get __stack_pointer +; CHECK-NEXT: i32.const 16 +; CHECK-NEXT: i32.sub +; CHECK-NEXT: local.tee 2 +; CHECK-NEXT: global.set __stack_pointer +; CHECK-NEXT: local.get 2 +; CHECK-NEXT: local.get 1 +; CHECK-NEXT: i64.store 8 +; CHECK-NEXT: local.get 2 +; CHECK-NEXT: local.get 0 +; CHECK-NEXT: i32.store 0 +; CHECK-NEXT: local.get 2 +; CHECK-NEXT: call sink +; CHECK-NEXT: local.get 2 +; CHECK-NEXT: i32.const 16 +; CHECK-NEXT: i32.add +; CHECK-NEXT: global.set __stack_pointer +; CHECK-NEXT: # fallthrough-return +entry: + tail call void (...) @sink(i32 noundef %i, i64 %x.coerce) + ret void +} + +; Function Attrs: nounwind +define void @pass_int_s5(i32 noundef %i, <4 x i32> noundef %x) { +; CHECK-LABEL: pass_int_s5: +; CHECK: .functype pass_int_s5 (i32, i32, i32, i32, i32) -> () +; CHECK-NEXT: .local i32 +; CHECK-NEXT: # %bb.0: # %entry +; CHECK-NEXT: global.get __stack_pointer +; CHECK-NEXT: i32.const 32 +; CHECK-NEXT: i32.sub +; CHECK-NEXT: local.tee 5 +; CHECK-NEXT: global.set __stack_pointer +; CHECK-NEXT: local.get 5 +; CHECK-NEXT: i32.const 28 +; CHECK-NEXT: i32.add +; CHECK-NEXT: local.get 4 +; CHECK-NEXT: i32.store 0 +; CHECK-NEXT: local.get 5 +; CHECK-NEXT: i32.const 24 +; CHECK-NEXT: i32.add +; CHECK-NEXT: local.get 3 +; CHECK-NEXT: i32.store 0 +; CHECK-NEXT: local.get 5 +; CHECK-NEXT: i32.const 20 +; CHECK-NEXT: i32.add +; CHECK-NEXT: local.get 2 +; CHECK-NEXT: i32.store 0 +; CHECK-NEXT: local.get 5 +; CHECK-NEXT: i32.const 16 +; CHECK-NEXT: i32.add +; CHECK-NEXT: local.get 1 +; CHECK-NEXT: i32.store 0 +; CHECK-NEXT: local.get 5 +; CHECK-NEXT: local.get 0 +; CHECK-NEXT: i32.store 0 +; CHECK-NEXT: local.get 5 +; CHECK-NEXT: call sink +; CHECK-NEXT: local.get 5 +; CHECK-NEXT: i32.const 32 +; CHECK-NEXT: i32.add +; CHECK-NEXT: global.set __stack_pointer +; CHECK-NEXT: # fallthrough-return +entry: + tail call void (...) @sink(i32 noundef %i, <4 x i32> noundef %x) + ret void +} + +; Function Attrs: nounwind +define void @pass_asc(i8 %x1.coerce, i16 %x2.coerce, i32 %x3.coerce, i64 %x4.coerce, <4 x i32> noundef %x5) { +; CHECK-LABEL: pass_asc: +; CHECK: .functype pass_asc (i32, i32, i32, i64, i32, i32, i32, i32) -> () +; CHECK-NEXT: .local i32 +; CHECK-NEXT: # %bb.0: # %entry +; CHECK-NEXT: global.get __stack_pointer +; CHECK-NEXT: i32.const 48 +; CHECK-NEXT: i32.sub +; CHECK-NEXT: local.tee 8 +; CHECK-NEXT: global.set __stack_pointer +; CHECK-NEXT: local.get 8 +; CHECK-NEXT: i32.const 44 +; CHECK-NEXT: i32.add +; CHECK-NEXT: local.get 7 +; CHECK-NEXT: i32.store 0 +; CHECK-NEXT: local.get 8 +; CHECK-NEXT: i32.const 40 +; CHECK-NEXT: i32.add +; CHECK-NEXT: local.get 6 +; CHECK-NEXT: i32.store 0 +; CHECK-NEXT: local.get 8 +; CHECK-NEXT: i32.const 36 +; CHECK-NEXT: i32.add +; CHECK-NEXT: local.get 5 +; CHECK-NEXT: i32.store 0 +; CHECK-NEXT: local.get 8 +; CHECK-NEXT: i32.const 32 +; CHECK-NEXT: i32.add +; CHECK-NEXT: local.get 4 +; CHECK-NEXT: i32.store 0 +; CHECK-NEXT: local.get 8 +; CHECK-NEXT: i32.const 16 +; CHECK-NEXT: i32.add +; CHECK-NEXT: local.get 3 +; CHECK-NEXT: i64.store 0 +; CHECK-NEXT: local.get 8 +; CHECK-NEXT: local.get 2 +; CHECK-NEXT: i32.store 8 +; CHECK-NEXT: local.get 8 +; CHECK-NEXT: local.get 1 +; CHECK-NEXT: i32.store 4 +; CHECK-NEXT: local.get 8 +; CHECK-NEXT: local.get 0 +; CHECK-NEXT: i32.store 0 +; CHECK-NEXT: local.get 8 +; CHECK-NEXT: call sink +; CHECK-NEXT: local.get 8 +; CHECK-NEXT: i32.const 48 +; CHECK-NEXT: i32.add +; CHECK-NEXT: global.set __stack_pointer +; CHECK-NEXT: # fallthrough-return +entry: + tail call void (...) @sink(i8 %x1.coerce, i16 %x2.coerce, i32 %x3.coerce, i64 %x4.coerce, <4 x i32> noundef %x5) + ret void +} + +; Function Attrs: nounwind +define void @pass_dsc(<4 x i32> noundef %x0, i64 %x1.coerce, i32 %x2.coerce, i16 %x3.coerce, i8 %x4.coerce) { +; CHECK-LABEL: pass_dsc: +; CHECK: .functype pass_dsc (i32, i32, i32, i32, i64, i32, i32, i32) -> () +; CHECK-NEXT: .local i32 +; CHECK-NEXT: # %bb.0: # %entry +; CHECK-NEXT: global.get __stack_pointer +; CHECK-NEXT: i32.const 48 +; CHECK-NEXT: i32.sub +; CHECK-NEXT: local.tee 8 +; CHECK-NEXT: global.set __stack_pointer +; CHECK-NEXT: local.get 8 +; CHECK-NEXT: i32.const 32 +; CHECK-NEXT: i32.add +; CHECK-NEXT: local.get 7 +; CHECK-NEXT: i32.store 0 +; CHECK-NEXT: local.get 8 +; CHECK-NEXT: i32.const 28 +; CHECK-NEXT: i32.add +; CHECK-NEXT: local.get 6 +; CHECK-NEXT: i32.store 0 +; CHECK-NEXT: local.get 8 +; CHECK-NEXT: i32.const 24 +; CHECK-NEXT: i32.add +; CHECK-NEXT: local.get 5 +; CHECK-NEXT: i32.store 0 +; CHECK-NEXT: local.get 8 +; CHECK-NEXT: i32.const 16 +; CHECK-NEXT: i32.add +; CHECK-NEXT: local.get 4 +; CHECK-NEXT: i64.store 0 +; CHECK-NEXT: local.get 8 +; CHECK-NEXT: local.get 3 +; CHECK-NEXT: i32.store 12 +; CHECK-NEXT: local.get 8 +; CHECK-NEXT: local.get 2 +; CHECK-NEXT: i32.store 8 +; CHECK-NEXT: local.get 8 +; CHECK-NEXT: local.get 1 +; CHECK-NEXT: i32.store 4 +; CHECK-NEXT: local.get 8 +; CHECK-NEXT: local.get 0 +; CHECK-NEXT: i32.store 0 +; CHECK-NEXT: local.get 8 +; CHECK-NEXT: call sink +; CHECK-NEXT: local.get 8 +; CHECK-NEXT: i32.const 48 +; CHECK-NEXT: i32.add +; CHECK-NEXT: global.set __stack_pointer +; CHECK-NEXT: # fallthrough-return +entry: + tail call void (...) @sink(<4 x i32> noundef %x0, i64 %x1.coerce, i32 %x2.coerce, i16 %x3.coerce, i8 %x4.coerce) + ret void +} + +; Function Attrs: nounwind +define void @pass_multiple(i32 noundef %i, i8 %x1.coerce, i16 %x2.coerce, i32 %x3.coerce, i64 %x4.coerce, <4 x i32> noundef %x5) { +; CHECK-LABEL: pass_multiple: +; CHECK: .functype pass_multiple (i32, i32, i32, i32, i64, i32, i32, i32, i32) -> () +; CHECK-NEXT: .local i32 +; CHECK-NEXT: # %bb.0: # %entry +; CHECK-NEXT: global.get __stack_pointer +; CHECK-NEXT: i32.const 48 +; CHECK-NEXT: i32.sub +; CHECK-NEXT: local.tee 9 +; CHECK-NEXT: global.set __stack_pointer +; CHECK-NEXT: local.get 9 +; CHECK-NEXT: local.get 4 +; CHECK-NEXT: i64.store 40 +; CHECK-NEXT: local.get 9 +; CHECK-NEXT: local.get 2 +; CHECK-NEXT: i32.store 36 +; CHECK-NEXT: local.get 9 +; CHECK-NEXT: local.get 0 +; CHECK-NEXT: i32.store 32 +; CHECK-NEXT: local.get 9 +; CHECK-NEXT: i32.const 32 +; CHECK-NEXT: i32.add +; CHECK-NEXT: call sink +; CHECK-NEXT: local.get 9 +; CHECK-NEXT: i32.const 28 +; CHECK-NEXT: i32.add +; CHECK-NEXT: local.get 8 +; CHECK-NEXT: i32.store 0 +; CHECK-NEXT: local.get 9 +; CHECK-NEXT: i32.const 24 +; CHECK-NEXT: i32.add +; CHECK-NEXT: local.get 7 +; CHECK-NEXT: i32.store 0 +; CHECK-NEXT: local.get 9 +; CHECK-NEXT: i32.const 20 +; CHECK-NEXT: i32.add +; CHECK-NEXT: local.get 6 +; CHECK-NEXT: i32.store 0 +; CHECK-NEXT: local.get 9 +; CHECK-NEXT: i32.const 16 +; CHECK-NEXT: i32.add +; CHECK-NEXT: local.get 5 +; CHECK-NEXT: i32.store 0 +; CHECK-NEXT: local.get 9 +; CHECK-NEXT: local.get 3 +; CHECK-NEXT: i32.store 8 +; CHECK-NEXT: local.get 9 +; CHECK-NEXT: local.get 1 +; CHECK-NEXT: i32.store 4 +; CHECK-NEXT: local.get 9 +; CHECK-NEXT: local.get 0 +; CHECK-NEXT: i32.store 0 +; CHECK-NEXT: local.get 9 +; CHECK-NEXT: call sink +; CHECK-NEXT: local.get 9 +; CHECK-NEXT: i32.const 48 +; CHECK-NEXT: i32.add +; CHECK-NEXT: global.set __stack_pointer +; CHECK-NEXT: # fallthrough-return +entry: + tail call void (...) @sink(i32 noundef %i, i16 %x2.coerce, i64 %x4.coerce) + tail call void (...) @sink(i32 noundef %i, i8 %x1.coerce, i32 %x3.coerce, <4 x i32> noundef %x5) + ret void +} + diff --git a/llvm/test/Transforms/ExpandVariadics/expand-va-intrinsic-split-linkage.ll b/llvm/test/Transforms/ExpandVariadics/expand-va-intrinsic-split-linkage.ll new file mode 100644 index 0000000000000..f7e21cd586e60 --- /dev/null +++ b/llvm/test/Transforms/ExpandVariadics/expand-va-intrinsic-split-linkage.ll @@ -0,0 +1,232 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: -p --function-signature +; RUN: opt -mtriple=wasm32-unknown-unknown -S --passes=expand-variadics --expand-variadics-override=optimize < %s | FileCheck %s --check-prefixes=OPT +; RUN: opt -mtriple=wasm32-unknown-unknown -S --passes=expand-variadics --expand-variadics-override=lowering < %s | FileCheck %s --check-prefixes=ABI +; REQUIRES: webassembly-registered-target + +; Split variadic functions into two functions: +; - one equivalent to the original, same symbol etc +; - one implementing the contents of the original but taking a valist +; IR here is applicable to any target that uses a ptr for valist +; +; Defines a function with each linkage (in the order of the llvm documentation). +; If split applies it does the same transform to each. +; Whether split applies depends on whether the ABI is being changed or not - e.g. a weak +; function is not normally useful to split as the contents cannot be called from elsewhere. +; If the ABI is being rewritten then the function is still converted. Call sites tested elsewhere. + +; Update test checks doesn't emit checks for declares + +declare void @sink_valist(ptr) +declare void @llvm.va_start(ptr) +declare void @llvm.va_end(ptr) + +declare void @decl_simple(...) +define void @defn_simple(...) { +; OPT-LABEL: define {{[^@]+}}@defn_simple(...) { +; OPT-NEXT: entry: +; OPT-NEXT: %va_start = alloca ptr, align 4 +; OPT-NEXT: call void @llvm.lifetime.start.p0(i64 4, ptr %va_start) +; OPT-NEXT: call void @llvm.va_start.p0(ptr %va_start) +; OPT-NEXT: %0 = load ptr, ptr %va_start, align 4 +; OPT-NEXT: call void @defn_simple.valist(ptr %0) +; OPT-NEXT: call void @llvm.lifetime.end.p0(i64 4, ptr %va_start) +; OPT-NEXT: ret void +; +; ABI-LABEL: define {{[^@]+}}@defn_simple(ptr %varargs) { +; ABI-NEXT: %va = alloca ptr, align 4 +; ABI-NEXT: store ptr %varargs, ptr %va, align 4 +; ABI-NEXT: call void @sink_valist(ptr %va) +; ABI-NEXT: ret void +; + %va = alloca ptr, align 4 + call void @llvm.va_start(ptr %va) + call void @sink_valist(ptr %va) + call void @llvm.va_end(ptr %va) + ret void +} + +; no declare for private +define private void @defn_private_simple(...) { +; OPT-LABEL: define {{[^@]+}}@defn_private_simple(...) { +; OPT-NEXT: entry: +; OPT-NEXT: %va_start = alloca ptr, align 4 +; OPT-NEXT: call void @llvm.lifetime.start.p0(i64 4, ptr %va_start) +; OPT-NEXT: call void @llvm.va_start.p0(ptr %va_start) +; OPT-NEXT: %0 = load ptr, ptr %va_start, align 4 +; OPT-NEXT: call void @defn_private_simple.valist(ptr %0) +; OPT-NEXT: call void @llvm.lifetime.end.p0(i64 4, ptr %va_start) +; OPT-NEXT: ret void +; +; ABI-LABEL: define {{[^@]+}}@defn_private_simple(ptr %varargs) { +; ABI-NEXT: %va = alloca ptr, align 4 +; ABI-NEXT: store ptr %varargs, ptr %va, align 4 +; ABI-NEXT: call void @sink_valist(ptr %va) +; ABI-NEXT: ret void +; + %va = alloca ptr, align 4 + call void @llvm.va_start(ptr %va) + call void @sink_valist(ptr %va) + call void @llvm.va_end(ptr %va) + ret void +} + +; no declare for internal +define internal void @defn_internal_simple(...) { +; OPT-LABEL: define {{[^@]+}}@defn_internal_simple(...) { +; OPT-NEXT: entry: +; OPT-NEXT: %va_start = alloca ptr, align 4 +; OPT-NEXT: call void @llvm.lifetime.start.p0(i64 4, ptr %va_start) +; OPT-NEXT: call void @llvm.va_start.p0(ptr %va_start) +; OPT-NEXT: %0 = load ptr, ptr %va_start, align 4 +; OPT-NEXT: call void @defn_internal_simple.valist(ptr %0) +; OPT-NEXT: call void @llvm.lifetime.end.p0(i64 4, ptr %va_start) +; OPT-NEXT: ret void +; +; ABI-LABEL: define {{[^@]+}}@defn_internal_simple(ptr %varargs) { +; ABI-NEXT: %va = alloca ptr, align 4 +; ABI-NEXT: store ptr %varargs, ptr %va, align 4 +; ABI-NEXT: call void @sink_valist(ptr %va) +; ABI-NEXT: ret void +; + %va = alloca ptr, align 4 + call void @llvm.va_start(ptr %va) + call void @sink_valist(ptr %va) + call void @llvm.va_end(ptr %va) + ret void +} + +; no declare for available_externally +define available_externally void @available_externally_simple(...) { +; OPT-LABEL: define {{[^@]+}}@available_externally_simple(...) { +; OPT-NEXT: %va = alloca ptr, align 4 +; OPT-NEXT: call void @llvm.va_start.p0(ptr %va) +; OPT-NEXT: call void @sink_valist(ptr %va) +; OPT-NEXT: ret void +; +; ABI-LABEL: define {{[^@]+}}@available_externally_simple(ptr %varargs) { +; ABI-NEXT: %va = alloca ptr, align 4 +; ABI-NEXT: store ptr %varargs, ptr %va, align 4 +; ABI-NEXT: call void @sink_valist(ptr %va) +; ABI-NEXT: ret void +; + %va = alloca ptr, align 4 + call void @llvm.va_start(ptr %va) + call void @sink_valist(ptr %va) + call void @llvm.va_end(ptr %va) + ret void +} + +; no declare for linkonce +define linkonce void @defn_linkonce_simple(...) { +; OPT-LABEL: define {{[^@]+}}@defn_linkonce_simple(...) { +; OPT-NEXT: %va = alloca ptr, align 4 +; OPT-NEXT: call void @llvm.va_start.p0(ptr %va) +; OPT-NEXT: call void @sink_valist(ptr %va) +; OPT-NEXT: ret void +; +; ABI-LABEL: define {{[^@]+}}@defn_linkonce_simple(ptr %varargs) { +; ABI-NEXT: %va = alloca ptr, align 4 +; ABI-NEXT: store ptr %varargs, ptr %va, align 4 +; ABI-NEXT: call void @sink_valist(ptr %va) +; ABI-NEXT: ret void +; + %va = alloca ptr, align 4 + call void @llvm.va_start(ptr %va) + call void @sink_valist(ptr %va) + call void @llvm.va_end(ptr %va) + ret void +} + +; no declare for weak +define weak void @defn_weak_simple(...) { +; OPT-LABEL: define {{[^@]+}}@defn_weak_simple(...) { +; OPT-NEXT: %va = alloca ptr, align 4 +; OPT-NEXT: call void @llvm.va_start.p0(ptr %va) +; OPT-NEXT: call void @sink_valist(ptr %va) +; OPT-NEXT: ret void +; +; ABI-LABEL: define {{[^@]+}}@defn_weak_simple(ptr %varargs) { +; ABI-NEXT: %va = alloca ptr, align 4 +; ABI-NEXT: store ptr %varargs, ptr %va, align 4 +; ABI-NEXT: call void @sink_valist(ptr %va) +; ABI-NEXT: ret void +; + %va = alloca ptr, align 4 + call void @llvm.va_start(ptr %va) + call void @sink_valist(ptr %va) + call void @llvm.va_end(ptr %va) + ret void +} + +; common is not applicable to functions +; appending is not applicable to functions + +declare extern_weak void @decl_extern_weak_simple(...) +; no define for extern_weak + +; no declare for linkonce_odr +define linkonce_odr void @defn_linkonce_odr_simple(...) { +; OPT-LABEL: define {{[^@]+}}@defn_linkonce_odr_simple(...) { +; OPT-NEXT: %va = alloca ptr, align 4 +; OPT-NEXT: call void @llvm.va_start.p0(ptr %va) +; OPT-NEXT: call void @sink_valist(ptr %va) +; OPT-NEXT: ret void +; +; ABI-LABEL: define {{[^@]+}}@defn_linkonce_odr_simple(ptr %varargs) { +; ABI-NEXT: %va = alloca ptr, align 4 +; ABI-NEXT: store ptr %varargs, ptr %va, align 4 +; ABI-NEXT: call void @sink_valist(ptr %va) +; ABI-NEXT: ret void +; + %va = alloca ptr, align 4 + call void @llvm.va_start(ptr %va) + call void @sink_valist(ptr %va) + call void @llvm.va_end(ptr %va) + ret void +} + +; no declare for weak_odr +define weak_odr void @defn_weak_odr_simple(...) { +; OPT-LABEL: define {{[^@]+}}@defn_weak_odr_simple(...) { +; OPT-NEXT: %va = alloca ptr, align 4 +; OPT-NEXT: call void @llvm.va_start.p0(ptr %va) +; OPT-NEXT: call void @sink_valist(ptr %va) +; OPT-NEXT: ret void +; +; ABI-LABEL: define {{[^@]+}}@defn_weak_odr_simple(ptr %varargs) { +; ABI-NEXT: %va = alloca ptr, align 4 +; ABI-NEXT: store ptr %varargs, ptr %va, align 4 +; ABI-NEXT: call void @sink_valist(ptr %va) +; ABI-NEXT: ret void +; + %va = alloca ptr, align 4 + call void @llvm.va_start(ptr %va) + call void @sink_valist(ptr %va) + call void @llvm.va_end(ptr %va) + ret void +} + +declare external void @decl_external_simple(...) +define external void @defn_external_simple(...) { +; OPT-LABEL: define {{[^@]+}}@defn_external_simple(...) { +; OPT-NEXT: entry: +; OPT-NEXT: %va_start = alloca ptr, align 4 +; OPT-NEXT: call void @llvm.lifetime.start.p0(i64 4, ptr %va_start) +; OPT-NEXT: call void @llvm.va_start.p0(ptr %va_start) +; OPT-NEXT: %0 = load ptr, ptr %va_start, align 4 +; OPT-NEXT: call void @defn_external_simple.valist(ptr %0) +; OPT-NEXT: call void @llvm.lifetime.end.p0(i64 4, ptr %va_start) +; OPT-NEXT: ret void +; +; ABI-LABEL: define {{[^@]+}}@defn_external_simple(ptr %varargs) { +; ABI-NEXT: %va = alloca ptr, align 4 +; ABI-NEXT: store ptr %varargs, ptr %va, align 4 +; ABI-NEXT: call void @sink_valist(ptr %va) +; ABI-NEXT: ret void +; + %va = alloca ptr, align 4 + call void @llvm.va_start(ptr %va) + call void @sink_valist(ptr %va) + call void @llvm.va_end(ptr %va) + ret void +} diff --git a/llvm/test/Transforms/ExpandVariadics/expand-va-intrinsic-split-simple.ll b/llvm/test/Transforms/ExpandVariadics/expand-va-intrinsic-split-simple.ll new file mode 100644 index 0000000000000..9a86540ba2d5b --- /dev/null +++ b/llvm/test/Transforms/ExpandVariadics/expand-va-intrinsic-split-simple.ll @@ -0,0 +1,214 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: -p --function-signature +; RUN: opt -mtriple=wasm32-unknown-unknown -S --passes=expand-variadics --expand-variadics-override=optimize < %s | FileCheck %s --check-prefixes=OPT +; RUN: opt -mtriple=wasm32-unknown-unknown -S --passes=expand-variadics --expand-variadics-override=lowering < %s | FileCheck %s --check-prefixes=ABI +; REQUIRES: webassembly-registered-target + +; Examples are variadic functions that return the first or the second of an int and a double +; Split the functions into an internal equivalent that takes a va_list and a ABI preserving wrapper + +define i32 @variadic_int_double_get_firstz(...) { +; OPT-LABEL: define {{[^@]+}}@variadic_int_double_get_firstz(...) { +; OPT-NEXT: entry: +; OPT-NEXT: %va_start = alloca ptr, align 4 +; OPT-NEXT: call void @llvm.lifetime.start.p0(i64 4, ptr %va_start) +; OPT-NEXT: call void @llvm.va_start.p0(ptr %va_start) +; OPT-NEXT: %0 = load ptr, ptr %va_start, align 4 +; OPT-NEXT: %1 = call i32 @variadic_int_double_get_firstz.valist(ptr %0) +; OPT-NEXT: call void @llvm.lifetime.end.p0(i64 4, ptr %va_start) +; OPT-NEXT: ret i32 %1 +; +; ABI-LABEL: define {{[^@]+}}@variadic_int_double_get_firstz(ptr %varargs) { +; ABI-NEXT: entry: +; ABI-NEXT: %va = alloca ptr, align 4 +; ABI-NEXT: store ptr %varargs, ptr %va, align 4 +; ABI-NEXT: %argp.cur = load ptr, ptr %va, align 4 +; ABI-NEXT: %argp.next = getelementptr inbounds i8, ptr %argp.cur, i32 4 +; ABI-NEXT: store ptr %argp.next, ptr %va, align 4 +; ABI-NEXT: %0 = load i32, ptr %argp.cur, align 4 +; ABI-NEXT: ret i32 %0 +; +entry: + %va = alloca ptr, align 4 + call void @llvm.va_start.p0(ptr nonnull %va) + %argp.cur = load ptr, ptr %va, align 4 + %argp.next = getelementptr inbounds i8, ptr %argp.cur, i32 4 + store ptr %argp.next, ptr %va, align 4 + %0 = load i32, ptr %argp.cur, align 4 + call void @llvm.va_end.p0(ptr %va) + ret i32 %0 +} + +; CHECK-LABEL: define i32 @variadic_int_double_get_firstz(...) { +; CHECK-NEXT: entry: +; CHECK-NEXT: %va_list = alloca ptr, align 4 +; CHECK-NEXT: call void @llvm.va_start.p0(ptr %va_list) +; CHECK-NEXT: %0 = tail call i32 @variadic_int_double_get_firstz.valist(ptr %va_list) +; CHECK-NEXT: ret i32 %0 +; CHECK-NEXT: } + +; CHECK-LABEL: define internal i32 @variadic_int_double_get_firstz.valist(ptr noalias %varargs) { +; CHECK-NEXT: entry: +; CHECK-NEXT: %va = alloca ptr, align 4 +; CHECK-NEXT: store ptr %varargs, ptr %va, align 4 +; CHECK-NEXT: %argp.cur = load ptr, ptr %va, align 4 +; CHECK-NEXT: %argp.next = getelementptr inbounds i8, ptr %argp.cur, i32 4 +; CHECK-NEXT: store ptr %argp.next, ptr %va, align 4 +; CHECK-NEXT: %0 = load i32, ptr %argp.cur, align 4 +; CHECK-NEXT: ret i32 %0 +; CHECK-NEXT: } + +define double @variadic_int_double_get_secondz(...) { +; OPT-LABEL: define {{[^@]+}}@variadic_int_double_get_secondz(...) { +; OPT-NEXT: entry: +; OPT-NEXT: %va_start = alloca ptr, align 4 +; OPT-NEXT: call void @llvm.lifetime.start.p0(i64 4, ptr %va_start) +; OPT-NEXT: call void @llvm.va_start.p0(ptr %va_start) +; OPT-NEXT: %0 = load ptr, ptr %va_start, align 4 +; OPT-NEXT: %1 = call double @variadic_int_double_get_secondz.valist(ptr %0) +; OPT-NEXT: call void @llvm.lifetime.end.p0(i64 4, ptr %va_start) +; OPT-NEXT: ret double %1 +; +; ABI-LABEL: define {{[^@]+}}@variadic_int_double_get_secondz(ptr %varargs) { +; ABI-NEXT: entry: +; ABI-NEXT: %va = alloca ptr, align 4 +; ABI-NEXT: store ptr %varargs, ptr %va, align 4 +; ABI-NEXT: %argp.cur = load ptr, ptr %va, align 4 +; ABI-NEXT: %argp.next = getelementptr inbounds i8, ptr %argp.cur, i32 4 +; ABI-NEXT: %argp.next2 = getelementptr inbounds i8, ptr %argp.cur, i32 12 +; ABI-NEXT: store ptr %argp.next2, ptr %va, align 4 +; ABI-NEXT: %0 = load double, ptr %argp.next, align 4 +; ABI-NEXT: ret double %0 +; +entry: + %va = alloca ptr, align 4 + call void @llvm.va_start.p0(ptr nonnull %va) + %argp.cur = load ptr, ptr %va, align 4 + %argp.next = getelementptr inbounds i8, ptr %argp.cur, i32 4 + %argp.next2 = getelementptr inbounds i8, ptr %argp.cur, i32 12 + store ptr %argp.next2, ptr %va, align 4 + %0 = load double, ptr %argp.next, align 4 + call void @llvm.va_end.p0(ptr %va) + ret double %0 +} + +; CHECK-LABEL: define double @variadic_int_double_get_secondz(...) { +; CHECK-NEXT: entry: +; CHECK-NEXT: %va_list = alloca ptr, align 4 +; CHECK-NEXT: call void @llvm.va_start.p0(ptr %va_list) +; CHECK-NEXT: %0 = tail call double @variadic_int_double_get_secondz.valist(ptr %va_list) +; CHECK-NEXT: ret double %0 +; CHECK-NEXT: } + +; CHECK-LABEL: define internal double @variadic_int_double_get_secondz.valist(ptr noalias %varargs) { +; CHECK-NEXT: entry: +; CHECK-NEXT: %va = alloca ptr, align 4 +; CHECK-NEXT: store ptr %varargs, ptr %va, align 4 +; CHECK-NEXT: %argp.cur = load ptr, ptr %va, align 4 +; CHECK-NEXT: %argp.next = getelementptr inbounds i8, ptr %argp.cur, i32 4 +; CHECK-NEXT: %argp.next2 = getelementptr inbounds i8, ptr %argp.cur, i32 12 +; CHECK-NEXT: store ptr %argp.next2, ptr %va, align 4 +; CHECK-NEXT: %0 = load double, ptr %argp.next, align 4 +; CHECK-NEXT: ret double %0 +; CHECK-NEXT: } + + +; CHECK-LABEL: @variadic_can_get_firstIidEEbT_T0_(i32 %x, double %y) { +; CHECK-NEXT: entry: +; CHECK-NEXT: %vararg_buffer = alloca %variadic_can_get_firstIidEEbT_T0_.vararg, align 16 +; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 12, ptr %vararg_buffer) +; CHECK-NEXT: %0 = getelementptr inbounds %variadic_can_get_firstIidEEbT_T0_.vararg, ptr %vararg_buffer, i32 0, i32 0 +; CHECK-NEXT: store i32 %x, ptr %0, align 4 +; CHECK-NEXT: %1 = getelementptr inbounds %variadic_can_get_firstIidEEbT_T0_.vararg, ptr %vararg_buffer, i32 0, i32 1 +; CHECK-NEXT: store double %y, ptr %1, align 4 +; CHECK-NEXT: %call = call i32 @variadic_int_double_get_firstz.valist(ptr %vararg_buffer) +; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 12, ptr %vararg_buffer) +; CHECK-NEXT: %cmp.i = icmp eq i32 %call, %x +; CHECK-NEXT: ret i1 %cmp.i +; CHECK-NEXT: } + +define zeroext i1 @variadic_can_get_firstIidEEbT_T0_(i32 %x, double %y) { +; OPT-LABEL: define {{[^@]+}}@variadic_can_get_firstIidEEbT_T0_(i32 %x, double %y) { +; OPT-NEXT: entry: +; OPT-NEXT: %vararg_buffer = alloca %variadic_can_get_firstIidEEbT_T0_.vararg, align 16 +; OPT-NEXT: call void @llvm.lifetime.start.p0(i64 16, ptr %vararg_buffer) +; OPT-NEXT: %0 = getelementptr inbounds %variadic_can_get_firstIidEEbT_T0_.vararg, ptr %vararg_buffer, i32 0, i32 0 +; OPT-NEXT: store i32 %x, ptr %0, align 4 +; OPT-NEXT: %1 = getelementptr inbounds %variadic_can_get_firstIidEEbT_T0_.vararg, ptr %vararg_buffer, i32 0, i32 2 +; OPT-NEXT: store double %y, ptr %1, align 8 +; OPT-NEXT: %call = call i32 @variadic_int_double_get_firstz.valist(ptr %vararg_buffer) +; OPT-NEXT: call void @llvm.lifetime.end.p0(i64 16, ptr %vararg_buffer) +; OPT-NEXT: %cmp.i = icmp eq i32 %call, %x +; OPT-NEXT: ret i1 %cmp.i +; +; ABI-LABEL: define {{[^@]+}}@variadic_can_get_firstIidEEbT_T0_(i32 %x, double %y) { +; ABI-NEXT: entry: +; ABI-NEXT: %vararg_buffer = alloca %variadic_can_get_firstIidEEbT_T0_.vararg, align 16 +; ABI-NEXT: call void @llvm.lifetime.start.p0(i64 16, ptr %vararg_buffer) +; ABI-NEXT: %0 = getelementptr inbounds %variadic_can_get_firstIidEEbT_T0_.vararg, ptr %vararg_buffer, i32 0, i32 0 +; ABI-NEXT: store i32 %x, ptr %0, align 4 +; ABI-NEXT: %1 = getelementptr inbounds %variadic_can_get_firstIidEEbT_T0_.vararg, ptr %vararg_buffer, i32 0, i32 2 +; ABI-NEXT: store double %y, ptr %1, align 8 +; ABI-NEXT: %call = call i32 @variadic_int_double_get_firstz(ptr %vararg_buffer) +; ABI-NEXT: call void @llvm.lifetime.end.p0(i64 16, ptr %vararg_buffer) +; ABI-NEXT: %cmp.i = icmp eq i32 %call, %x +; ABI-NEXT: ret i1 %cmp.i +; +entry: + %call = call i32 (...) @variadic_int_double_get_firstz(i32 %x, double %y) + %cmp.i = icmp eq i32 %call, %x + ret i1 %cmp.i +} + +; CHECK-LABEL: @variadic_can_get_secondIidEEbT_T0_(i32 %x, double %y) { +; CHECK-NEXT: entry: +; CHECK-NEXT: %vararg_buffer = alloca %variadic_can_get_secondIidEEbT_T0_.vararg, align 16 +; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 12, ptr %vararg_buffer) +; CHECK-NEXT: %0 = getelementptr inbounds %variadic_can_get_secondIidEEbT_T0_.vararg, ptr %vararg_buffer, i32 0, i32 0 +; CHECK-NEXT: store i32 %x, ptr %0, align 4 +; CHECK-NEXT: %1 = getelementptr inbounds %variadic_can_get_secondIidEEbT_T0_.vararg, ptr %vararg_buffer, i32 0, i32 1 +; CHECK-NEXT: store double %y, ptr %1, align 4 +; CHECK-NEXT: %call = call double @variadic_int_double_get_secondz.valist(ptr %vararg_buffer) +; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 12, ptr %vararg_buffer) +; CHECK-NEXT: %cmp.i = fcmp oeq double %call, %y +; CHECK-NEXT: ret i1 %cmp.i +; CHECK-NEXT: } + +define zeroext i1 @variadic_can_get_secondIidEEbT_T0_(i32 %x, double %y) { +; OPT-LABEL: define {{[^@]+}}@variadic_can_get_secondIidEEbT_T0_(i32 %x, double %y) { +; OPT-NEXT: entry: +; OPT-NEXT: %vararg_buffer = alloca %variadic_can_get_secondIidEEbT_T0_.vararg, align 16 +; OPT-NEXT: call void @llvm.lifetime.start.p0(i64 16, ptr %vararg_buffer) +; OPT-NEXT: %0 = getelementptr inbounds %variadic_can_get_secondIidEEbT_T0_.vararg, ptr %vararg_buffer, i32 0, i32 0 +; OPT-NEXT: store i32 %x, ptr %0, align 4 +; OPT-NEXT: %1 = getelementptr inbounds %variadic_can_get_secondIidEEbT_T0_.vararg, ptr %vararg_buffer, i32 0, i32 2 +; OPT-NEXT: store double %y, ptr %1, align 8 +; OPT-NEXT: %call = call double @variadic_int_double_get_secondz.valist(ptr %vararg_buffer) +; OPT-NEXT: call void @llvm.lifetime.end.p0(i64 16, ptr %vararg_buffer) +; OPT-NEXT: %cmp.i = fcmp oeq double %call, %y +; OPT-NEXT: ret i1 %cmp.i +; +; ABI-LABEL: define {{[^@]+}}@variadic_can_get_secondIidEEbT_T0_(i32 %x, double %y) { +; ABI-NEXT: entry: +; ABI-NEXT: %vararg_buffer = alloca %variadic_can_get_secondIidEEbT_T0_.vararg, align 16 +; ABI-NEXT: call void @llvm.lifetime.start.p0(i64 16, ptr %vararg_buffer) +; ABI-NEXT: %0 = getelementptr inbounds %variadic_can_get_secondIidEEbT_T0_.vararg, ptr %vararg_buffer, i32 0, i32 0 +; ABI-NEXT: store i32 %x, ptr %0, align 4 +; ABI-NEXT: %1 = getelementptr inbounds %variadic_can_get_secondIidEEbT_T0_.vararg, ptr %vararg_buffer, i32 0, i32 2 +; ABI-NEXT: store double %y, ptr %1, align 8 +; ABI-NEXT: %call = call double @variadic_int_double_get_secondz(ptr %vararg_buffer) +; ABI-NEXT: call void @llvm.lifetime.end.p0(i64 16, ptr %vararg_buffer) +; ABI-NEXT: %cmp.i = fcmp oeq double %call, %y +; ABI-NEXT: ret i1 %cmp.i +; +entry: + %call = call double (...) @variadic_int_double_get_secondz(i32 %x, double %y) + %cmp.i = fcmp oeq double %call, %y + ret i1 %cmp.i +} + +; Declaration unchanged +; CHECK: declare void @variadic_without_callers(...) +declare void @variadic_without_callers(...) + +declare void @llvm.va_start.p0(ptr) +declare void @llvm.va_end.p0(ptr) diff --git a/llvm/test/Transforms/ExpandVariadics/indirect-calls.ll b/llvm/test/Transforms/ExpandVariadics/indirect-calls.ll new file mode 100644 index 0000000000000..de04c7235ad16 --- /dev/null +++ b/llvm/test/Transforms/ExpandVariadics/indirect-calls.ll @@ -0,0 +1,59 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt -mtriple=wasm32-unknown-unknown -S --passes=expand-variadics --expand-variadics-override=optimize < %s | FileCheck %s -check-prefixes=OPT +; RUN: opt -mtriple=wasm32-unknown-unknown -S --passes=expand-variadics --expand-variadics-override=lowering < %s | FileCheck %s -check-prefixes=ABI +; REQUIRES: webassembly-registered-target + +declare void @vararg(...) +@vararg_ptr = hidden global ptr @vararg, align 4 + +%struct.libcS = type { i8, i16, i32, i32, float, double } + +define hidden void @fptr_single_i32(i32 noundef %x) { +; OPT-LABEL: @fptr_single_i32( +; OPT-NEXT: entry: +; OPT-NEXT: [[TMP0:%.*]] = load volatile ptr, ptr @vararg_ptr, align 4 +; OPT-NEXT: tail call void (...) [[TMP0]](i32 noundef [[X:%.*]]) +; OPT-NEXT: ret void +; +; ABI-LABEL: @fptr_single_i32( +; ABI-NEXT: entry: +; ABI-NEXT: [[VARARG_BUFFER:%.*]] = alloca [[FPTR_SINGLE_I32_VARARG:%.*]], align 16 +; ABI-NEXT: [[TMP0:%.*]] = load volatile ptr, ptr @vararg_ptr, align 4 +; ABI-NEXT: call void @llvm.lifetime.start.p0(i64 4, ptr [[VARARG_BUFFER]]) +; ABI-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[FPTR_SINGLE_I32_VARARG]], ptr [[VARARG_BUFFER]], i32 0, i32 0 +; ABI-NEXT: store i32 [[X:%.*]], ptr [[TMP1]], align 4 +; ABI-NEXT: call void [[TMP0]](ptr [[VARARG_BUFFER]]) +; ABI-NEXT: call void @llvm.lifetime.end.p0(i64 4, ptr [[VARARG_BUFFER]]) +; ABI-NEXT: ret void +; +entry: + %0 = load volatile ptr, ptr @vararg_ptr, align 4 + tail call void (...) %0(i32 noundef %x) + ret void +} + +define hidden void @fptr_libcS(ptr noundef byval(%struct.libcS) align 8 %x) { +; OPT-LABEL: @fptr_libcS( +; OPT-NEXT: entry: +; OPT-NEXT: [[TMP0:%.*]] = load volatile ptr, ptr @vararg_ptr, align 4 +; OPT-NEXT: tail call void (...) [[TMP0]](ptr noundef nonnull byval([[STRUCT_LIBCS:%.*]]) align 8 [[X:%.*]]) +; OPT-NEXT: ret void +; +; ABI-LABEL: @fptr_libcS( +; ABI-NEXT: entry: +; ABI-NEXT: [[INDIRECTALLOCA:%.*]] = alloca [[STRUCT_LIBCS:%.*]], align 8 +; ABI-NEXT: [[VARARG_BUFFER:%.*]] = alloca [[FPTR_LIBCS_VARARG:%.*]], align 16 +; ABI-NEXT: [[TMP0:%.*]] = load volatile ptr, ptr @vararg_ptr, align 4 +; ABI-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr [[INDIRECTALLOCA]], ptr [[X:%.*]], i64 24, i1 false) +; ABI-NEXT: call void @llvm.lifetime.start.p0(i64 4, ptr [[VARARG_BUFFER]]) +; ABI-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[FPTR_LIBCS_VARARG]], ptr [[VARARG_BUFFER]], i32 0, i32 0 +; ABI-NEXT: store ptr [[INDIRECTALLOCA]], ptr [[TMP1]], align 4 +; ABI-NEXT: call void [[TMP0]](ptr [[VARARG_BUFFER]]) +; ABI-NEXT: call void @llvm.lifetime.end.p0(i64 4, ptr [[VARARG_BUFFER]]) +; ABI-NEXT: ret void +; +entry: + %0 = load volatile ptr, ptr @vararg_ptr, align 4 + tail call void (...) %0(ptr noundef nonnull byval(%struct.libcS) align 8 %x) + ret void +} diff --git a/llvm/test/Transforms/ExpandVariadics/intrinsics.ll b/llvm/test/Transforms/ExpandVariadics/intrinsics.ll new file mode 100644 index 0000000000000..1782c92295744 --- /dev/null +++ b/llvm/test/Transforms/ExpandVariadics/intrinsics.ll @@ -0,0 +1,120 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt -mtriple=wasm32-unknown-unknown -S --passes=expand-variadics --expand-variadics-override=optimize < %s | FileCheck %s -check-prefixes=CHECK,OPT +; RUN: opt -mtriple=wasm32-unknown-unknown -S --passes=expand-variadics --expand-variadics-override=lowering < %s | FileCheck %s -check-prefixes=CHECK,ABI +; REQUIRES: webassembly-registered-target + +declare void @llvm.lifetime.start.p0(i64 immarg, ptr nocapture) + +declare void @llvm.va_copy.p0(ptr, ptr) + +declare void @valist(ptr noundef) + +declare void @llvm.lifetime.end.p0(i64 immarg, ptr nocapture) + +declare void @llvm.va_start.p0(ptr) + +declare void @llvm.va_end.p0(ptr) + + +define void @start_once(...) { +; OPT-LABEL: @start_once( +; OPT-NEXT: entry: +; OPT-NEXT: [[VA_START:%.*]] = alloca ptr, align 4 +; OPT-NEXT: call void @llvm.lifetime.start.p0(i64 4, ptr [[VA_START]]) +; OPT-NEXT: call void @llvm.va_start.p0(ptr [[VA_START]]) +; OPT-NEXT: [[TMP0:%.*]] = load ptr, ptr [[VA_START]], align 4 +; OPT-NEXT: call void @start_once.valist(ptr [[TMP0]]) +; OPT-NEXT: call void @llvm.lifetime.end.p0(i64 4, ptr [[VA_START]]) +; OPT-NEXT: ret void +; +; ABI-LABEL: @start_once( +; ABI-NEXT: entry: +; ABI-NEXT: [[S:%.*]] = alloca ptr, align 4 +; ABI-NEXT: call void @llvm.lifetime.start.p0(i64 4, ptr nonnull [[S]]) +; ABI-NEXT: store ptr [[VARARGS:%.*]], ptr [[S]], align 4 +; ABI-NEXT: [[TMP0:%.*]] = load ptr, ptr [[S]], align 4 +; ABI-NEXT: call void @valist(ptr noundef [[TMP0]]) +; ABI-NEXT: call void @llvm.lifetime.end.p0(i64 4, ptr nonnull [[S]]) +; ABI-NEXT: ret void +; +entry: + %s = alloca ptr, align 4 + call void @llvm.lifetime.start.p0(i64 4, ptr nonnull %s) + call void @llvm.va_start.p0(ptr nonnull %s) + %0 = load ptr, ptr %s, align 4 + call void @valist(ptr noundef %0) + call void @llvm.va_end.p0(ptr %s) + call void @llvm.lifetime.end.p0(i64 4, ptr nonnull %s) + ret void +} + + +define void @start_twice(...) { +; OPT-LABEL: @start_twice( +; OPT-NEXT: entry: +; OPT-NEXT: [[VA_START:%.*]] = alloca ptr, align 4 +; OPT-NEXT: call void @llvm.lifetime.start.p0(i64 4, ptr [[VA_START]]) +; OPT-NEXT: call void @llvm.va_start.p0(ptr [[VA_START]]) +; OPT-NEXT: [[TMP0:%.*]] = load ptr, ptr [[VA_START]], align 4 +; OPT-NEXT: call void @start_twice.valist(ptr [[TMP0]]) +; OPT-NEXT: call void @llvm.lifetime.end.p0(i64 4, ptr [[VA_START]]) +; OPT-NEXT: ret void +; +; ABI-LABEL: @start_twice( +; ABI-NEXT: entry: +; ABI-NEXT: [[S0:%.*]] = alloca ptr, align 4 +; ABI-NEXT: [[S1:%.*]] = alloca ptr, align 4 +; ABI-NEXT: call void @llvm.lifetime.start.p0(i64 4, ptr nonnull [[S0]]) +; ABI-NEXT: call void @llvm.lifetime.start.p0(i64 4, ptr nonnull [[S1]]) +; ABI-NEXT: store ptr [[VARARGS:%.*]], ptr [[S0]], align 4 +; ABI-NEXT: [[TMP0:%.*]] = load ptr, ptr [[S0]], align 4 +; ABI-NEXT: call void @valist(ptr noundef [[TMP0]]) +; ABI-NEXT: store ptr [[VARARGS]], ptr [[S1]], align 4 +; ABI-NEXT: [[TMP1:%.*]] = load ptr, ptr [[S1]], align 4 +; ABI-NEXT: call void @valist(ptr noundef [[TMP1]]) +; ABI-NEXT: call void @llvm.lifetime.end.p0(i64 4, ptr nonnull [[S1]]) +; ABI-NEXT: call void @llvm.lifetime.end.p0(i64 4, ptr nonnull [[S0]]) +; ABI-NEXT: ret void +; +entry: + %s0 = alloca ptr, align 4 + %s1 = alloca ptr, align 4 + call void @llvm.lifetime.start.p0(i64 4, ptr nonnull %s0) + call void @llvm.lifetime.start.p0(i64 4, ptr nonnull %s1) + call void @llvm.va_start.p0(ptr nonnull %s0) + %0 = load ptr, ptr %s0, align 4 + call void @valist(ptr noundef %0) + call void @llvm.va_end.p0(ptr %s0) + call void @llvm.va_start.p0(ptr nonnull %s1) + %1 = load ptr, ptr %s1, align 4 + call void @valist(ptr noundef %1) + call void @llvm.va_end.p0(ptr %s1) + call void @llvm.lifetime.end.p0(i64 4, ptr nonnull %s1) + call void @llvm.lifetime.end.p0(i64 4, ptr nonnull %s0) + ret void +} + +define void @copy(ptr noundef %va) { +; CHECK-LABEL: @copy( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[VA_ADDR:%.*]] = alloca ptr, align 4 +; CHECK-NEXT: [[CP:%.*]] = alloca ptr, align 4 +; CHECK-NEXT: store ptr [[VA:%.*]], ptr [[VA_ADDR]], align 4 +; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 4, ptr nonnull [[CP]]) +; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr [[CP]], ptr [[VA_ADDR]], i32 4, i1 false) +; CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[CP]], align 4 +; CHECK-NEXT: call void @valist(ptr noundef [[TMP0]]) +; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 4, ptr nonnull [[CP]]) +; CHECK-NEXT: ret void +; +entry: + %va.addr = alloca ptr, align 4 + %cp = alloca ptr, align 4 + store ptr %va, ptr %va.addr, align 4 + call void @llvm.lifetime.start.p0(i64 4, ptr nonnull %cp) + call void @llvm.va_copy.p0(ptr nonnull %cp, ptr nonnull %va.addr) + %0 = load ptr, ptr %cp, align 4 + call void @valist(ptr noundef %0) + call void @llvm.lifetime.end.p0(i64 4, ptr nonnull %cp) + ret void +} diff --git a/llvm/test/Transforms/ExpandVariadics/invoke.ll b/llvm/test/Transforms/ExpandVariadics/invoke.ll new file mode 100644 index 0000000000000..ced2edf9274fa --- /dev/null +++ b/llvm/test/Transforms/ExpandVariadics/invoke.ll @@ -0,0 +1,89 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt -mtriple=wasm32-unknown-unknown -S --passes=expand-variadics --expand-variadics-override=optimize < %s | FileCheck %s -check-prefixes=CHECK +; RUN: not --crash opt -mtriple=wasm32-unknown-unknown -S --passes=expand-variadics --expand-variadics-override=lowering < %s 2>&1 | FileCheck %s -check-prefixes=ERROR +; REQUIRES: webassembly-registered-target +target datalayout = "e-m:e-p:32:32-p10:8:8-p20:8:8-i64:64-n32:64-S128-ni:1:10:20" + +; ERROR: LLVM ERROR: Cannot lower callbase instruction + +@_ZTIi = external constant ptr + +; Function Attrs: mustprogress +define hidden void @test0(i32 noundef %x) #0 personality ptr @__gxx_wasm_personality_v0 { +; CHECK-LABEL: @test0( +; CHECK-NEXT: entry: +; CHECK-NEXT: invoke void (...) @may_throw(i32 noundef [[X:%.*]]) +; CHECK-NEXT: to label [[TRY_CONT:%.*]] unwind label [[CATCH_DISPATCH:%.*]] +; CHECK: catch.dispatch: +; CHECK-NEXT: [[TMP0:%.*]] = catchswitch within none [label %catch.start] unwind to caller +; CHECK: catch.start: +; CHECK-NEXT: [[TMP1:%.*]] = catchpad within [[TMP0]] [ptr @_ZTIi] +; CHECK-NEXT: [[TMP2:%.*]] = tail call ptr @llvm.wasm.get.exception(token [[TMP1]]) +; CHECK-NEXT: [[TMP3:%.*]] = tail call i32 @llvm.wasm.get.ehselector(token [[TMP1]]) +; CHECK-NEXT: [[TMP4:%.*]] = tail call i32 @llvm.eh.typeid.for.p0(ptr nonnull @_ZTIi) +; CHECK-NEXT: [[MATCHES:%.*]] = icmp eq i32 [[TMP3]], [[TMP4]] +; CHECK-NEXT: br i1 [[MATCHES]], label [[CATCH:%.*]], label [[RETHROW:%.*]] +; CHECK: catch: +; CHECK-NEXT: [[TMP5:%.*]] = call ptr @__cxa_begin_catch(ptr [[TMP2]]) [ "funclet"(token [[TMP1]]) ] +; CHECK-NEXT: call void (...) @dont_throw(i32 noundef [[X]]) [ "funclet"(token [[TMP1]]) ] +; CHECK-NEXT: call void @__cxa_end_catch() [ "funclet"(token [[TMP1]]) ] +; CHECK-NEXT: catchret from [[TMP1]] to label [[TRY_CONT]] +; CHECK: rethrow: +; CHECK-NEXT: call void @llvm.wasm.rethrow() [ "funclet"(token [[TMP1]]) ] +; CHECK-NEXT: unreachable +; CHECK: try.cont: +; CHECK-NEXT: ret void +; +entry: + invoke void (...) @may_throw(i32 noundef %x) + to label %try.cont unwind label %catch.dispatch + +catch.dispatch: ; preds = %entry + %0 = catchswitch within none [label %catch.start] unwind to caller + +catch.start: ; preds = %catch.dispatch + %1 = catchpad within %0 [ptr @_ZTIi] + %2 = tail call ptr @llvm.wasm.get.exception(token %1) + %3 = tail call i32 @llvm.wasm.get.ehselector(token %1) + %4 = tail call i32 @llvm.eh.typeid.for.p0(ptr nonnull @_ZTIi) + %matches = icmp eq i32 %3, %4 + br i1 %matches, label %catch, label %rethrow + +catch: ; preds = %catch.start + %5 = call ptr @__cxa_begin_catch(ptr %2) #6 [ "funclet"(token %1) ] + call void (...) @dont_throw(i32 noundef %x) #6 [ "funclet"(token %1) ] + call void @__cxa_end_catch() #6 [ "funclet"(token %1) ] + catchret from %1 to label %try.cont + +rethrow: ; preds = %catch.start + call void @llvm.wasm.rethrow() #5 [ "funclet"(token %1) ] + unreachable + +try.cont: ; preds = %entry, %catch + ret void +} + +declare void @may_throw(...) + +declare i32 @__gxx_wasm_personality_v0(...) + +; Function Attrs: mustprogress nocallback nofree nosync nounwind willreturn +declare ptr @llvm.wasm.get.exception(token) + +; Function Attrs: mustprogress nocallback nofree nosync nounwind willreturn +declare i32 @llvm.wasm.get.ehselector(token) + +; Function Attrs: nofree nosync nounwind memory(none) +declare i32 @llvm.eh.typeid.for.p0(ptr) + +declare ptr @__cxa_begin_catch(ptr) + +; Function Attrs: nounwind +declare void @dont_throw(...) + +declare void @__cxa_end_catch() + +; Function Attrs: noreturn +declare void @llvm.wasm.rethrow() + + diff --git a/llvm/test/Transforms/ExpandVariadics/pass-byval-byref.ll b/llvm/test/Transforms/ExpandVariadics/pass-byval-byref.ll new file mode 100644 index 0000000000000..85fefda36a76e --- /dev/null +++ b/llvm/test/Transforms/ExpandVariadics/pass-byval-byref.ll @@ -0,0 +1,153 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt -mtriple=wasm32-unknown-unknown -S --passes=expand-variadics --expand-variadics-override=optimize < %s | FileCheck %s -check-prefixes=OPT +; RUN: opt -mtriple=wasm32-unknown-unknown -S --passes=expand-variadics --expand-variadics-override=lowering < %s | FileCheck %s -check-prefixes=ABI +; REQUIRES: webassembly-registered-target + +; CHECK: @sink +declare void @sink(...) + + +define void @pass_byval(ptr byval(i32) %b) { +; OPT-LABEL: @pass_byval( +; OPT-NEXT: entry: +; OPT-NEXT: tail call void (...) @sink(ptr byval(i32) [[B:%.*]]) +; OPT-NEXT: ret void +; +; ABI-LABEL: @pass_byval( +; ABI-NEXT: entry: +; ABI-NEXT: [[VARARG_BUFFER:%.*]] = alloca [[PASS_BYVAL_VARARG:%.*]], align 16 +; ABI-NEXT: call void @llvm.lifetime.start.p0(i64 4, ptr [[VARARG_BUFFER]]) +; ABI-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[PASS_BYVAL_VARARG]], ptr [[VARARG_BUFFER]], i32 0, i32 0 +; ABI-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr [[TMP0]], ptr [[B:%.*]], i64 4, i1 false) +; ABI-NEXT: call void @sink(ptr [[VARARG_BUFFER]]) +; ABI-NEXT: call void @llvm.lifetime.end.p0(i64 4, ptr [[VARARG_BUFFER]]) +; ABI-NEXT: ret void +; +entry: + tail call void (...) @sink(ptr byval(i32) %b) + ret void +} + +%struct.libcS = type { i8, i16, i32, i32, float, double } + +define void @i32_libcS_byval(i32 %x, ptr noundef byval(%struct.libcS) align 8 %y) { +; OPT-LABEL: @i32_libcS_byval( +; OPT-NEXT: entry: +; OPT-NEXT: tail call void (...) @sink(i32 [[X:%.*]], ptr byval([[STRUCT_LIBCS:%.*]]) align 8 [[Y:%.*]]) +; OPT-NEXT: ret void +; +; ABI-LABEL: @i32_libcS_byval( +; ABI-NEXT: entry: +; ABI-NEXT: [[INDIRECTALLOCA:%.*]] = alloca [[STRUCT_LIBCS:%.*]], align 8 +; ABI-NEXT: [[VARARG_BUFFER:%.*]] = alloca [[I32_LIBCS_BYVAL_VARARG:%.*]], align 16 +; ABI-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr [[INDIRECTALLOCA]], ptr [[Y:%.*]], i64 24, i1 false) +; ABI-NEXT: call void @llvm.lifetime.start.p0(i64 8, ptr [[VARARG_BUFFER]]) +; ABI-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[I32_LIBCS_BYVAL_VARARG]], ptr [[VARARG_BUFFER]], i32 0, i32 0 +; ABI-NEXT: store i32 [[X:%.*]], ptr [[TMP0]], align 4 +; ABI-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[I32_LIBCS_BYVAL_VARARG]], ptr [[VARARG_BUFFER]], i32 0, i32 1 +; ABI-NEXT: store ptr [[INDIRECTALLOCA]], ptr [[TMP1]], align 4 +; ABI-NEXT: call void @sink(ptr [[VARARG_BUFFER]]) +; ABI-NEXT: call void @llvm.lifetime.end.p0(i64 8, ptr [[VARARG_BUFFER]]) +; ABI-NEXT: ret void +; +entry: + tail call void (...) @sink(i32 %x, ptr byval(%struct.libcS) align 8 %y) + ret void +} + +define void @libcS_i32_byval(ptr byval(%struct.libcS) align 8 %x, i32 %y) { +; OPT-LABEL: @libcS_i32_byval( +; OPT-NEXT: entry: +; OPT-NEXT: tail call void (...) @sink(ptr byval([[STRUCT_LIBCS:%.*]]) align 8 [[X:%.*]], i32 [[Y:%.*]]) +; OPT-NEXT: ret void +; +; ABI-LABEL: @libcS_i32_byval( +; ABI-NEXT: entry: +; ABI-NEXT: [[INDIRECTALLOCA:%.*]] = alloca [[STRUCT_LIBCS:%.*]], align 8 +; ABI-NEXT: [[VARARG_BUFFER:%.*]] = alloca [[LIBCS_I32_BYVAL_VARARG:%.*]], align 16 +; ABI-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr [[INDIRECTALLOCA]], ptr [[X:%.*]], i64 24, i1 false) +; ABI-NEXT: call void @llvm.lifetime.start.p0(i64 8, ptr [[VARARG_BUFFER]]) +; ABI-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[LIBCS_I32_BYVAL_VARARG]], ptr [[VARARG_BUFFER]], i32 0, i32 0 +; ABI-NEXT: store ptr [[INDIRECTALLOCA]], ptr [[TMP0]], align 4 +; ABI-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[LIBCS_I32_BYVAL_VARARG]], ptr [[VARARG_BUFFER]], i32 0, i32 1 +; ABI-NEXT: store i32 [[Y:%.*]], ptr [[TMP1]], align 4 +; ABI-NEXT: call void @sink(ptr [[VARARG_BUFFER]]) +; ABI-NEXT: call void @llvm.lifetime.end.p0(i64 8, ptr [[VARARG_BUFFER]]) +; ABI-NEXT: ret void +; +entry: + tail call void (...) @sink(ptr byval(%struct.libcS) align 8 %x, i32 %y) + ret void +} + + +define void @pass_byref(ptr byref(i32) %b) { +; OPT-LABEL: @pass_byref( +; OPT-NEXT: entry: +; OPT-NEXT: tail call void (...) @sink(ptr byref(i32) [[B:%.*]]) +; OPT-NEXT: ret void +; +; ABI-LABEL: @pass_byref( +; ABI-NEXT: entry: +; ABI-NEXT: [[VARARG_BUFFER:%.*]] = alloca [[PASS_BYREF_VARARG:%.*]], align 16 +; ABI-NEXT: call void @llvm.lifetime.start.p0(i64 4, ptr [[VARARG_BUFFER]]) +; ABI-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[PASS_BYREF_VARARG]], ptr [[VARARG_BUFFER]], i32 0, i32 0 +; ABI-NEXT: store ptr [[B:%.*]], ptr [[TMP0]], align 4 +; ABI-NEXT: call void @sink(ptr [[VARARG_BUFFER]]) +; ABI-NEXT: call void @llvm.lifetime.end.p0(i64 4, ptr [[VARARG_BUFFER]]) +; ABI-NEXT: ret void +; +entry: + tail call void (...) @sink(ptr byref(i32) %b) + ret void +} + +define void @i32_libcS_byref(i32 %x, ptr noundef byref(%struct.libcS) align 8 %y) { +; OPT-LABEL: @i32_libcS_byref( +; OPT-NEXT: entry: +; OPT-NEXT: tail call void (...) @sink(i32 [[X:%.*]], ptr byref([[STRUCT_LIBCS:%.*]]) align 8 [[Y:%.*]]) +; OPT-NEXT: ret void +; +; ABI-LABEL: @i32_libcS_byref( +; ABI-NEXT: entry: +; ABI-NEXT: [[INDIRECTALLOCA:%.*]] = alloca [[STRUCT_LIBCS:%.*]], align 8 +; ABI-NEXT: [[VARARG_BUFFER:%.*]] = alloca [[I32_LIBCS_BYREF_VARARG:%.*]], align 16 +; ABI-NEXT: store ptr [[Y:%.*]], ptr [[INDIRECTALLOCA]], align 4 +; ABI-NEXT: call void @llvm.lifetime.start.p0(i64 8, ptr [[VARARG_BUFFER]]) +; ABI-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[I32_LIBCS_BYREF_VARARG]], ptr [[VARARG_BUFFER]], i32 0, i32 0 +; ABI-NEXT: store i32 [[X:%.*]], ptr [[TMP0]], align 4 +; ABI-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[I32_LIBCS_BYREF_VARARG]], ptr [[VARARG_BUFFER]], i32 0, i32 1 +; ABI-NEXT: store ptr [[INDIRECTALLOCA]], ptr [[TMP1]], align 4 +; ABI-NEXT: call void @sink(ptr [[VARARG_BUFFER]]) +; ABI-NEXT: call void @llvm.lifetime.end.p0(i64 8, ptr [[VARARG_BUFFER]]) +; ABI-NEXT: ret void +; +entry: + tail call void (...) @sink(i32 %x, ptr byref(%struct.libcS) align 8 %y) + ret void +} + +define void @libcS_i32_byref(ptr byref(%struct.libcS) align 8 %x, i32 %y) { +; OPT-LABEL: @libcS_i32_byref( +; OPT-NEXT: entry: +; OPT-NEXT: tail call void (...) @sink(ptr byref([[STRUCT_LIBCS:%.*]]) align 8 [[X:%.*]], i32 [[Y:%.*]]) +; OPT-NEXT: ret void +; +; ABI-LABEL: @libcS_i32_byref( +; ABI-NEXT: entry: +; ABI-NEXT: [[INDIRECTALLOCA:%.*]] = alloca [[STRUCT_LIBCS:%.*]], align 8 +; ABI-NEXT: [[VARARG_BUFFER:%.*]] = alloca [[LIBCS_I32_BYREF_VARARG:%.*]], align 16 +; ABI-NEXT: store ptr [[X:%.*]], ptr [[INDIRECTALLOCA]], align 4 +; ABI-NEXT: call void @llvm.lifetime.start.p0(i64 8, ptr [[VARARG_BUFFER]]) +; ABI-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[LIBCS_I32_BYREF_VARARG]], ptr [[VARARG_BUFFER]], i32 0, i32 0 +; ABI-NEXT: store ptr [[INDIRECTALLOCA]], ptr [[TMP0]], align 4 +; ABI-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[LIBCS_I32_BYREF_VARARG]], ptr [[VARARG_BUFFER]], i32 0, i32 1 +; ABI-NEXT: store i32 [[Y:%.*]], ptr [[TMP1]], align 4 +; ABI-NEXT: call void @sink(ptr [[VARARG_BUFFER]]) +; ABI-NEXT: call void @llvm.lifetime.end.p0(i64 8, ptr [[VARARG_BUFFER]]) +; ABI-NEXT: ret void +; +entry: + tail call void (...) @sink(ptr byref(%struct.libcS) align 8 %x, i32 %y) + ret void +} diff --git a/llvm/test/Transforms/ExpandVariadics/pass-indirect.ll b/llvm/test/Transforms/ExpandVariadics/pass-indirect.ll new file mode 100644 index 0000000000000..8dcbb86d02d6f --- /dev/null +++ b/llvm/test/Transforms/ExpandVariadics/pass-indirect.ll @@ -0,0 +1,59 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt -mtriple=wasm32-unknown-unknown -S --passes=expand-variadics --expand-variadics-override=optimize < %s | FileCheck %s -check-prefixes=OPT +; RUN: opt -mtriple=wasm32-unknown-unknown -S --passes=expand-variadics --expand-variadics-override=lowering < %s | FileCheck %s -check-prefixes=ABI +; REQUIRES: webassembly-registered-target + +; CHECK: @sink +declare void @sink(...) + +%struct.libcS = type { i8, i16, i32, i32, float, double } + +define void @i32_libcS(i32 %x, %struct.libcS %y) { +; OPT-LABEL: @i32_libcS( +; OPT-NEXT: entry: +; OPT-NEXT: tail call void (...) @sink(i32 [[X:%.*]], [[STRUCT_LIBCS:%.*]] [[Y:%.*]]) +; OPT-NEXT: ret void +; +; ABI-LABEL: @i32_libcS( +; ABI-NEXT: entry: +; ABI-NEXT: [[INDIRECTALLOCA:%.*]] = alloca [[STRUCT_LIBCS:%.*]], align 8 +; ABI-NEXT: [[VARARG_BUFFER:%.*]] = alloca [[I32_LIBCS_VARARG:%.*]], align 16 +; ABI-NEXT: store [[STRUCT_LIBCS]] [[Y:%.*]], ptr [[INDIRECTALLOCA]], align 8 +; ABI-NEXT: call void @llvm.lifetime.start.p0(i64 8, ptr [[VARARG_BUFFER]]) +; ABI-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[I32_LIBCS_VARARG]], ptr [[VARARG_BUFFER]], i32 0, i32 0 +; ABI-NEXT: store i32 [[X:%.*]], ptr [[TMP0]], align 4 +; ABI-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[I32_LIBCS_VARARG]], ptr [[VARARG_BUFFER]], i32 0, i32 1 +; ABI-NEXT: store ptr [[INDIRECTALLOCA]], ptr [[TMP1]], align 4 +; ABI-NEXT: call void @sink(ptr [[VARARG_BUFFER]]) +; ABI-NEXT: call void @llvm.lifetime.end.p0(i64 8, ptr [[VARARG_BUFFER]]) +; ABI-NEXT: ret void +; +entry: + tail call void (...) @sink(i32 %x, %struct.libcS %y) + ret void +} + +define void @libcS_i32(%struct.libcS %x, i32 %y) { +; OPT-LABEL: @libcS_i32( +; OPT-NEXT: entry: +; OPT-NEXT: tail call void (...) @sink([[STRUCT_LIBCS:%.*]] [[X:%.*]], i32 [[Y:%.*]]) +; OPT-NEXT: ret void +; +; ABI-LABEL: @libcS_i32( +; ABI-NEXT: entry: +; ABI-NEXT: [[INDIRECTALLOCA:%.*]] = alloca [[STRUCT_LIBCS:%.*]], align 8 +; ABI-NEXT: [[VARARG_BUFFER:%.*]] = alloca [[LIBCS_I32_VARARG:%.*]], align 16 +; ABI-NEXT: store [[STRUCT_LIBCS]] [[X:%.*]], ptr [[INDIRECTALLOCA]], align 8 +; ABI-NEXT: call void @llvm.lifetime.start.p0(i64 8, ptr [[VARARG_BUFFER]]) +; ABI-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[LIBCS_I32_VARARG]], ptr [[VARARG_BUFFER]], i32 0, i32 0 +; ABI-NEXT: store ptr [[INDIRECTALLOCA]], ptr [[TMP0]], align 4 +; ABI-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[LIBCS_I32_VARARG]], ptr [[VARARG_BUFFER]], i32 0, i32 1 +; ABI-NEXT: store i32 [[Y:%.*]], ptr [[TMP1]], align 4 +; ABI-NEXT: call void @sink(ptr [[VARARG_BUFFER]]) +; ABI-NEXT: call void @llvm.lifetime.end.p0(i64 8, ptr [[VARARG_BUFFER]]) +; ABI-NEXT: ret void +; +entry: + tail call void (...) @sink(%struct.libcS %x, i32 %y) + ret void +} diff --git a/llvm/test/Transforms/ExpandVariadics/pass-integers.ll b/llvm/test/Transforms/ExpandVariadics/pass-integers.ll new file mode 100644 index 0000000000000..a1cb6811800c3 --- /dev/null +++ b/llvm/test/Transforms/ExpandVariadics/pass-integers.ll @@ -0,0 +1,345 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt -mtriple=wasm32-unknown-unknown -S --passes=expand-variadics --expand-variadics-override=optimize < %s | FileCheck %s -check-prefixes=OPT +; RUN: opt -mtriple=wasm32-unknown-unknown -S --passes=expand-variadics --expand-variadics-override=lowering < %s | FileCheck %s -check-prefixes=ABI +; REQUIRES: webassembly-registered-target + +; Wasm passes struct {char} as an i8 so can check the varargs passing works on integers smaller than the slot size + +declare void @sink(...) + + +define void @pass_nothing() { +; OPT-LABEL: @pass_nothing( +; OPT-NEXT: entry: +; OPT-NEXT: tail call void (...) @sink() +; OPT-NEXT: ret void +; +; ABI-LABEL: @pass_nothing( +; ABI-NEXT: entry: +; ABI-NEXT: [[VARARG_BUFFER:%.*]] = alloca [[PASS_NOTHING_VARARG:%.*]], align 16 +; ABI-NEXT: call void @llvm.lifetime.start.p0(i64 1, ptr [[VARARG_BUFFER]]) +; ABI-NEXT: call void @sink(ptr [[VARARG_BUFFER]]) +; ABI-NEXT: call void @llvm.lifetime.end.p0(i64 1, ptr [[VARARG_BUFFER]]) +; ABI-NEXT: ret void +; +entry: + tail call void (...) @sink() + ret void +} + +define void @pass_s1(i8 %x) { +; OPT-LABEL: @pass_s1( +; OPT-NEXT: entry: +; OPT-NEXT: tail call void (...) @sink(i8 [[X:%.*]]) +; OPT-NEXT: ret void +; +; ABI-LABEL: @pass_s1( +; ABI-NEXT: entry: +; ABI-NEXT: [[VARARG_BUFFER:%.*]] = alloca [[PASS_S1_VARARG:%.*]], align 16 +; ABI-NEXT: call void @llvm.lifetime.start.p0(i64 1, ptr [[VARARG_BUFFER]]) +; ABI-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[PASS_S1_VARARG]], ptr [[VARARG_BUFFER]], i32 0, i32 0 +; ABI-NEXT: store i8 [[X:%.*]], ptr [[TMP0]], align 1 +; ABI-NEXT: call void @sink(ptr [[VARARG_BUFFER]]) +; ABI-NEXT: call void @llvm.lifetime.end.p0(i64 1, ptr [[VARARG_BUFFER]]) +; ABI-NEXT: ret void +; +entry: + tail call void (...) @sink(i8 %x) + ret void +} + +define void @pass_s2(i16 %x) { +; OPT-LABEL: @pass_s2( +; OPT-NEXT: entry: +; OPT-NEXT: tail call void (...) @sink(i16 [[X:%.*]]) +; OPT-NEXT: ret void +; +; ABI-LABEL: @pass_s2( +; ABI-NEXT: entry: +; ABI-NEXT: [[VARARG_BUFFER:%.*]] = alloca [[PASS_S2_VARARG:%.*]], align 16 +; ABI-NEXT: call void @llvm.lifetime.start.p0(i64 2, ptr [[VARARG_BUFFER]]) +; ABI-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[PASS_S2_VARARG]], ptr [[VARARG_BUFFER]], i32 0, i32 0 +; ABI-NEXT: store i16 [[X:%.*]], ptr [[TMP0]], align 2 +; ABI-NEXT: call void @sink(ptr [[VARARG_BUFFER]]) +; ABI-NEXT: call void @llvm.lifetime.end.p0(i64 2, ptr [[VARARG_BUFFER]]) +; ABI-NEXT: ret void +; +entry: + tail call void (...) @sink(i16 %x) + ret void +} + +define void @pass_s3(i32 %x) { +; OPT-LABEL: @pass_s3( +; OPT-NEXT: entry: +; OPT-NEXT: tail call void (...) @sink(i32 [[X:%.*]]) +; OPT-NEXT: ret void +; +; ABI-LABEL: @pass_s3( +; ABI-NEXT: entry: +; ABI-NEXT: [[VARARG_BUFFER:%.*]] = alloca [[PASS_S3_VARARG:%.*]], align 16 +; ABI-NEXT: call void @llvm.lifetime.start.p0(i64 4, ptr [[VARARG_BUFFER]]) +; ABI-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[PASS_S3_VARARG]], ptr [[VARARG_BUFFER]], i32 0, i32 0 +; ABI-NEXT: store i32 [[X:%.*]], ptr [[TMP0]], align 4 +; ABI-NEXT: call void @sink(ptr [[VARARG_BUFFER]]) +; ABI-NEXT: call void @llvm.lifetime.end.p0(i64 4, ptr [[VARARG_BUFFER]]) +; ABI-NEXT: ret void +; +entry: + tail call void (...) @sink(i32 %x) + ret void +} + +define void @pass_s4(i64 %x) { +; OPT-LABEL: @pass_s4( +; OPT-NEXT: entry: +; OPT-NEXT: tail call void (...) @sink(i64 [[X:%.*]]) +; OPT-NEXT: ret void +; +; ABI-LABEL: @pass_s4( +; ABI-NEXT: entry: +; ABI-NEXT: [[VARARG_BUFFER:%.*]] = alloca [[PASS_S4_VARARG:%.*]], align 16 +; ABI-NEXT: call void @llvm.lifetime.start.p0(i64 8, ptr [[VARARG_BUFFER]]) +; ABI-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[PASS_S4_VARARG]], ptr [[VARARG_BUFFER]], i32 0, i32 0 +; ABI-NEXT: store i64 [[X:%.*]], ptr [[TMP0]], align 8 +; ABI-NEXT: call void @sink(ptr [[VARARG_BUFFER]]) +; ABI-NEXT: call void @llvm.lifetime.end.p0(i64 8, ptr [[VARARG_BUFFER]]) +; ABI-NEXT: ret void +; +entry: + tail call void (...) @sink(i64 %x) + ret void +} + +define void @pass_s5(<4 x i32> %x) { +; OPT-LABEL: @pass_s5( +; OPT-NEXT: entry: +; OPT-NEXT: tail call void (...) @sink(<4 x i32> [[X:%.*]]) +; OPT-NEXT: ret void +; +; ABI-LABEL: @pass_s5( +; ABI-NEXT: entry: +; ABI-NEXT: [[VARARG_BUFFER:%.*]] = alloca [[PASS_S5_VARARG:%.*]], align 16 +; ABI-NEXT: call void @llvm.lifetime.start.p0(i64 16, ptr [[VARARG_BUFFER]]) +; ABI-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[PASS_S5_VARARG]], ptr [[VARARG_BUFFER]], i32 0, i32 0 +; ABI-NEXT: store <4 x i32> [[X:%.*]], ptr [[TMP0]], align 16 +; ABI-NEXT: call void @sink(ptr [[VARARG_BUFFER]]) +; ABI-NEXT: call void @llvm.lifetime.end.p0(i64 16, ptr [[VARARG_BUFFER]]) +; ABI-NEXT: ret void +; +entry: + tail call void (...) @sink(<4 x i32> %x) + ret void +} + +define void @pass_int_s1(i32 %i, i8 %x) { +; OPT-LABEL: @pass_int_s1( +; OPT-NEXT: entry: +; OPT-NEXT: tail call void (...) @sink(i32 [[I:%.*]], i8 [[X:%.*]]) +; OPT-NEXT: ret void +; +; ABI-LABEL: @pass_int_s1( +; ABI-NEXT: entry: +; ABI-NEXT: [[VARARG_BUFFER:%.*]] = alloca [[PASS_INT_S1_VARARG:%.*]], align 16 +; ABI-NEXT: call void @llvm.lifetime.start.p0(i64 5, ptr [[VARARG_BUFFER]]) +; ABI-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[PASS_INT_S1_VARARG]], ptr [[VARARG_BUFFER]], i32 0, i32 0 +; ABI-NEXT: store i32 [[I:%.*]], ptr [[TMP0]], align 4 +; ABI-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[PASS_INT_S1_VARARG]], ptr [[VARARG_BUFFER]], i32 0, i32 1 +; ABI-NEXT: store i8 [[X:%.*]], ptr [[TMP1]], align 1 +; ABI-NEXT: call void @sink(ptr [[VARARG_BUFFER]]) +; ABI-NEXT: call void @llvm.lifetime.end.p0(i64 5, ptr [[VARARG_BUFFER]]) +; ABI-NEXT: ret void +; +entry: + tail call void (...) @sink(i32 %i, i8 %x) + ret void +} + +define void @pass_int_s2(i32 %i, i16 %x) { +; OPT-LABEL: @pass_int_s2( +; OPT-NEXT: entry: +; OPT-NEXT: tail call void (...) @sink(i32 [[I:%.*]], i16 [[X:%.*]]) +; OPT-NEXT: ret void +; +; ABI-LABEL: @pass_int_s2( +; ABI-NEXT: entry: +; ABI-NEXT: [[VARARG_BUFFER:%.*]] = alloca [[PASS_INT_S2_VARARG:%.*]], align 16 +; ABI-NEXT: call void @llvm.lifetime.start.p0(i64 6, ptr [[VARARG_BUFFER]]) +; ABI-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[PASS_INT_S2_VARARG]], ptr [[VARARG_BUFFER]], i32 0, i32 0 +; ABI-NEXT: store i32 [[I:%.*]], ptr [[TMP0]], align 4 +; ABI-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[PASS_INT_S2_VARARG]], ptr [[VARARG_BUFFER]], i32 0, i32 1 +; ABI-NEXT: store i16 [[X:%.*]], ptr [[TMP1]], align 2 +; ABI-NEXT: call void @sink(ptr [[VARARG_BUFFER]]) +; ABI-NEXT: call void @llvm.lifetime.end.p0(i64 6, ptr [[VARARG_BUFFER]]) +; ABI-NEXT: ret void +; +entry: + tail call void (...) @sink(i32 %i, i16 %x) + ret void +} + +define void @pass_int_s3(i32 %i, i32 %x) { +; OPT-LABEL: @pass_int_s3( +; OPT-NEXT: entry: +; OPT-NEXT: tail call void (...) @sink(i32 [[I:%.*]], i32 [[X:%.*]]) +; OPT-NEXT: ret void +; +; ABI-LABEL: @pass_int_s3( +; ABI-NEXT: entry: +; ABI-NEXT: [[VARARG_BUFFER:%.*]] = alloca [[PASS_INT_S3_VARARG:%.*]], align 16 +; ABI-NEXT: call void @llvm.lifetime.start.p0(i64 8, ptr [[VARARG_BUFFER]]) +; ABI-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[PASS_INT_S3_VARARG]], ptr [[VARARG_BUFFER]], i32 0, i32 0 +; ABI-NEXT: store i32 [[I:%.*]], ptr [[TMP0]], align 4 +; ABI-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[PASS_INT_S3_VARARG]], ptr [[VARARG_BUFFER]], i32 0, i32 1 +; ABI-NEXT: store i32 [[X:%.*]], ptr [[TMP1]], align 4 +; ABI-NEXT: call void @sink(ptr [[VARARG_BUFFER]]) +; ABI-NEXT: call void @llvm.lifetime.end.p0(i64 8, ptr [[VARARG_BUFFER]]) +; ABI-NEXT: ret void +; +entry: + tail call void (...) @sink(i32 %i, i32 %x) + ret void +} + +define void @pass_int_s4(i32 %i, i64 %x) { +; OPT-LABEL: @pass_int_s4( +; OPT-NEXT: entry: +; OPT-NEXT: tail call void (...) @sink(i32 [[I:%.*]], i64 [[X:%.*]]) +; OPT-NEXT: ret void +; +; ABI-LABEL: @pass_int_s4( +; ABI-NEXT: entry: +; ABI-NEXT: [[VARARG_BUFFER:%.*]] = alloca [[PASS_INT_S4_VARARG:%.*]], align 16 +; ABI-NEXT: call void @llvm.lifetime.start.p0(i64 16, ptr [[VARARG_BUFFER]]) +; ABI-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[PASS_INT_S4_VARARG]], ptr [[VARARG_BUFFER]], i32 0, i32 0 +; ABI-NEXT: store i32 [[I:%.*]], ptr [[TMP0]], align 4 +; ABI-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[PASS_INT_S4_VARARG]], ptr [[VARARG_BUFFER]], i32 0, i32 2 +; ABI-NEXT: store i64 [[X:%.*]], ptr [[TMP1]], align 8 +; ABI-NEXT: call void @sink(ptr [[VARARG_BUFFER]]) +; ABI-NEXT: call void @llvm.lifetime.end.p0(i64 16, ptr [[VARARG_BUFFER]]) +; ABI-NEXT: ret void +; +entry: + tail call void (...) @sink(i32 %i, i64 %x) + ret void +} + +define void @pass_int_s5(i32 %i, <4 x i32> %x) { +; OPT-LABEL: @pass_int_s5( +; OPT-NEXT: entry: +; OPT-NEXT: tail call void (...) @sink(i32 [[I:%.*]], <4 x i32> [[X:%.*]]) +; OPT-NEXT: ret void +; +; ABI-LABEL: @pass_int_s5( +; ABI-NEXT: entry: +; ABI-NEXT: [[VARARG_BUFFER:%.*]] = alloca [[PASS_INT_S5_VARARG:%.*]], align 16 +; ABI-NEXT: call void @llvm.lifetime.start.p0(i64 32, ptr [[VARARG_BUFFER]]) +; ABI-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[PASS_INT_S5_VARARG]], ptr [[VARARG_BUFFER]], i32 0, i32 0 +; ABI-NEXT: store i32 [[I:%.*]], ptr [[TMP0]], align 4 +; ABI-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[PASS_INT_S5_VARARG]], ptr [[VARARG_BUFFER]], i32 0, i32 2 +; ABI-NEXT: store <4 x i32> [[X:%.*]], ptr [[TMP1]], align 16 +; ABI-NEXT: call void @sink(ptr [[VARARG_BUFFER]]) +; ABI-NEXT: call void @llvm.lifetime.end.p0(i64 32, ptr [[VARARG_BUFFER]]) +; ABI-NEXT: ret void +; +entry: + tail call void (...) @sink(i32 %i, <4 x i32> %x) + ret void +} + +define void @pass_asc(i8 %x1, i16 %x2, i32 %x3, i64 %x4, <4 x i32> %x5) { +; OPT-LABEL: @pass_asc( +; OPT-NEXT: entry: +; OPT-NEXT: tail call void (...) @sink(i8 [[X1:%.*]], i16 [[X2:%.*]], i32 [[X3:%.*]], i64 [[X4:%.*]], <4 x i32> [[X5:%.*]]) +; OPT-NEXT: ret void +; +; ABI-LABEL: @pass_asc( +; ABI-NEXT: entry: +; ABI-NEXT: [[VARARG_BUFFER:%.*]] = alloca [[PASS_ASC_VARARG:%.*]], align 16 +; ABI-NEXT: call void @llvm.lifetime.start.p0(i64 48, ptr [[VARARG_BUFFER]]) +; ABI-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[PASS_ASC_VARARG]], ptr [[VARARG_BUFFER]], i32 0, i32 0 +; ABI-NEXT: store i8 [[X1:%.*]], ptr [[TMP0]], align 1 +; ABI-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[PASS_ASC_VARARG]], ptr [[VARARG_BUFFER]], i32 0, i32 2 +; ABI-NEXT: store i16 [[X2:%.*]], ptr [[TMP1]], align 2 +; ABI-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[PASS_ASC_VARARG]], ptr [[VARARG_BUFFER]], i32 0, i32 4 +; ABI-NEXT: store i32 [[X3:%.*]], ptr [[TMP2]], align 4 +; ABI-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[PASS_ASC_VARARG]], ptr [[VARARG_BUFFER]], i32 0, i32 6 +; ABI-NEXT: store i64 [[X4:%.*]], ptr [[TMP3]], align 8 +; ABI-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[PASS_ASC_VARARG]], ptr [[VARARG_BUFFER]], i32 0, i32 8 +; ABI-NEXT: store <4 x i32> [[X5:%.*]], ptr [[TMP4]], align 16 +; ABI-NEXT: call void @sink(ptr [[VARARG_BUFFER]]) +; ABI-NEXT: call void @llvm.lifetime.end.p0(i64 48, ptr [[VARARG_BUFFER]]) +; ABI-NEXT: ret void +; +entry: + tail call void (...) @sink(i8 %x1, i16 %x2, i32 %x3, i64 %x4, <4 x i32> %x5) + ret void +} + +define void @pass_dsc(<4 x i32> %x0, i64 %x1, i32 %x2, i16 %x3, i8 %x4) { +; OPT-LABEL: @pass_dsc( +; OPT-NEXT: entry: +; OPT-NEXT: tail call void (...) @sink(<4 x i32> [[X0:%.*]], i64 [[X1:%.*]], i32 [[X2:%.*]], i16 [[X3:%.*]], i8 [[X4:%.*]]) +; OPT-NEXT: ret void +; +; ABI-LABEL: @pass_dsc( +; ABI-NEXT: entry: +; ABI-NEXT: [[VARARG_BUFFER:%.*]] = alloca [[PASS_DSC_VARARG:%.*]], align 16 +; ABI-NEXT: call void @llvm.lifetime.start.p0(i64 33, ptr [[VARARG_BUFFER]]) +; ABI-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[PASS_DSC_VARARG]], ptr [[VARARG_BUFFER]], i32 0, i32 0 +; ABI-NEXT: store <4 x i32> [[X0:%.*]], ptr [[TMP0]], align 16 +; ABI-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[PASS_DSC_VARARG]], ptr [[VARARG_BUFFER]], i32 0, i32 1 +; ABI-NEXT: store i64 [[X1:%.*]], ptr [[TMP1]], align 8 +; ABI-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[PASS_DSC_VARARG]], ptr [[VARARG_BUFFER]], i32 0, i32 2 +; ABI-NEXT: store i32 [[X2:%.*]], ptr [[TMP2]], align 4 +; ABI-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[PASS_DSC_VARARG]], ptr [[VARARG_BUFFER]], i32 0, i32 3 +; ABI-NEXT: store i16 [[X3:%.*]], ptr [[TMP3]], align 2 +; ABI-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[PASS_DSC_VARARG]], ptr [[VARARG_BUFFER]], i32 0, i32 5 +; ABI-NEXT: store i8 [[X4:%.*]], ptr [[TMP4]], align 1 +; ABI-NEXT: call void @sink(ptr [[VARARG_BUFFER]]) +; ABI-NEXT: call void @llvm.lifetime.end.p0(i64 33, ptr [[VARARG_BUFFER]]) +; ABI-NEXT: ret void +; +entry: + tail call void (...) @sink(<4 x i32> %x0, i64 %x1, i32 %x2, i16 %x3, i8 %x4) + ret void +} + +define void @pass_multiple(i32 %i, i8 %x1, i16 %x2, i32 %x3, i64 %x4, <4 x i32> %x5) { +; OPT-LABEL: @pass_multiple( +; OPT-NEXT: entry: +; OPT-NEXT: tail call void (...) @sink(i32 [[I:%.*]], i16 [[X2:%.*]], i64 [[X4:%.*]]) +; OPT-NEXT: tail call void (...) @sink(i32 [[I]], i8 [[X1:%.*]], i32 [[X3:%.*]], <4 x i32> [[X5:%.*]]) +; OPT-NEXT: ret void +; +; ABI-LABEL: @pass_multiple( +; ABI-NEXT: entry: +; ABI-NEXT: [[VARARG_BUFFER:%.*]] = alloca [[PASS_MULTIPLE_VARARG:%.*]], align 16 +; ABI-NEXT: [[VARARG_BUFFER1:%.*]] = alloca [[PASS_MULTIPLE_VARARG_0:%.*]], align 16 +; ABI-NEXT: call void @llvm.lifetime.start.p0(i64 16, ptr [[VARARG_BUFFER]]) +; ABI-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[PASS_MULTIPLE_VARARG]], ptr [[VARARG_BUFFER]], i32 0, i32 0 +; ABI-NEXT: store i32 [[I:%.*]], ptr [[TMP0]], align 4 +; ABI-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[PASS_MULTIPLE_VARARG]], ptr [[VARARG_BUFFER]], i32 0, i32 1 +; ABI-NEXT: store i16 [[X2:%.*]], ptr [[TMP1]], align 2 +; ABI-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[PASS_MULTIPLE_VARARG]], ptr [[VARARG_BUFFER]], i32 0, i32 3 +; ABI-NEXT: store i64 [[X4:%.*]], ptr [[TMP2]], align 8 +; ABI-NEXT: call void @sink(ptr [[VARARG_BUFFER]]) +; ABI-NEXT: call void @llvm.lifetime.end.p0(i64 16, ptr [[VARARG_BUFFER]]) +; ABI-NEXT: call void @llvm.lifetime.start.p0(i64 32, ptr [[VARARG_BUFFER1]]) +; ABI-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[PASS_MULTIPLE_VARARG_0]], ptr [[VARARG_BUFFER1]], i32 0, i32 0 +; ABI-NEXT: store i32 [[I]], ptr [[TMP3]], align 4 +; ABI-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[PASS_MULTIPLE_VARARG_0]], ptr [[VARARG_BUFFER1]], i32 0, i32 1 +; ABI-NEXT: store i8 [[X1:%.*]], ptr [[TMP4]], align 1 +; ABI-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[PASS_MULTIPLE_VARARG_0]], ptr [[VARARG_BUFFER1]], i32 0, i32 3 +; ABI-NEXT: store i32 [[X3:%.*]], ptr [[TMP5]], align 4 +; ABI-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[PASS_MULTIPLE_VARARG_0]], ptr [[VARARG_BUFFER1]], i32 0, i32 5 +; ABI-NEXT: store <4 x i32> [[X5:%.*]], ptr [[TMP6]], align 16 +; ABI-NEXT: call void @sink(ptr [[VARARG_BUFFER1]]) +; ABI-NEXT: call void @llvm.lifetime.end.p0(i64 32, ptr [[VARARG_BUFFER1]]) +; ABI-NEXT: ret void +; +entry: + tail call void (...) @sink(i32 %i, i16 %x2, i64 %x4) + tail call void (...) @sink(i32 %i, i8 %x1, i32 %x3, <4 x i32> %x5) + ret void +}