@@ -176,16 +176,20 @@ TARGET_BUILTIN(__nvvm_fmin_nan_xorsign_abs_f16x2, "V2hV2hV2h", "",
176
176
AND (SM_86, PTX72))
177
177
TARGET_BUILTIN(__nvvm_fmin_ftz_nan_xorsign_abs_f16x2, " V2hV2hV2h" , " " ,
178
178
AND (SM_86, PTX72))
179
- TARGET_BUILTIN(__nvvm_fmin_bf16, " UsUsUs" , " " , AND(SM_80, PTX70))
180
- TARGET_BUILTIN(__nvvm_fmin_nan_bf16, " UsUsUs" , " " , AND(SM_80, PTX70))
181
- TARGET_BUILTIN(__nvvm_fmin_xorsign_abs_bf16, " UsUsUs" , " " , AND(SM_86, PTX72))
182
- TARGET_BUILTIN(__nvvm_fmin_nan_xorsign_abs_bf16, " UsUsUs" , " " ,
179
+ TARGET_BUILTIN(__nvvm_fmin_bf16, " yyy" , " " , AND(SM_80, PTX70))
180
+ TARGET_BUILTIN(__nvvm_fmin_ftz_bf16, " yyy" , " " , AND(SM_80, PTX70))
181
+ TARGET_BUILTIN(__nvvm_fmin_nan_bf16, " yyy" , " " , AND(SM_80, PTX70))
182
+ TARGET_BUILTIN(__nvvm_fmin_ftz_nan_bf16, " yyy" , " " , AND(SM_80, PTX70))
183
+ TARGET_BUILTIN(__nvvm_fmin_xorsign_abs_bf16, " yyy" , " " , AND(SM_86, PTX72))
184
+ TARGET_BUILTIN(__nvvm_fmin_nan_xorsign_abs_bf16, " yyy" , " " ,
183
185
AND (SM_86, PTX72))
184
- TARGET_BUILTIN(__nvvm_fmin_bf16x2, " ZUiZUiZUi" , " " , AND(SM_80, PTX70))
185
- TARGET_BUILTIN(__nvvm_fmin_nan_bf16x2, " ZUiZUiZUi" , " " , AND(SM_80, PTX70))
186
- TARGET_BUILTIN(__nvvm_fmin_xorsign_abs_bf16x2, " ZUiZUiZUi" , " " ,
186
+ TARGET_BUILTIN(__nvvm_fmin_bf16x2, " V2yV2yV2y" , " " , AND(SM_80, PTX70))
187
+ TARGET_BUILTIN(__nvvm_fmin_ftz_bf16x2, " V2yV2yV2y" , " " , AND(SM_80, PTX70))
188
+ TARGET_BUILTIN(__nvvm_fmin_nan_bf16x2, " V2yV2yV2y" , " " , AND(SM_80, PTX70))
189
+ TARGET_BUILTIN(__nvvm_fmin_ftz_nan_bf16x2, " V2yV2yV2y" , " " , AND(SM_80, PTX70))
190
+ TARGET_BUILTIN(__nvvm_fmin_xorsign_abs_bf16x2, " V2yV2yV2y" , " " ,
187
191
AND (SM_86, PTX72))
188
- TARGET_BUILTIN(__nvvm_fmin_nan_xorsign_abs_bf16x2, " ZUiZUiZUi " , " " ,
192
+ TARGET_BUILTIN(__nvvm_fmin_nan_xorsign_abs_bf16x2, " V2yV2yV2y " , " " ,
189
193
AND (SM_86, PTX72))
190
194
BUILTIN(__nvvm_fmin_f, " fff" , " " )
191
195
BUILTIN(__nvvm_fmin_ftz_f, " fff" , " " )
@@ -218,16 +222,20 @@ TARGET_BUILTIN(__nvvm_fmax_nan_xorsign_abs_f16x2, "V2hV2hV2h", "",
218
222
AND (SM_86, PTX72))
219
223
TARGET_BUILTIN(__nvvm_fmax_ftz_nan_xorsign_abs_f16x2, " V2hV2hV2h" , " " ,
220
224
AND (SM_86, PTX72))
221
- TARGET_BUILTIN(__nvvm_fmax_bf16, " UsUsUs" , " " , AND(SM_80, PTX70))
222
- TARGET_BUILTIN(__nvvm_fmax_nan_bf16, " UsUsUs" , " " , AND(SM_80, PTX70))
223
- TARGET_BUILTIN(__nvvm_fmax_xorsign_abs_bf16, " UsUsUs" , " " , AND(SM_86, PTX72))
224
- TARGET_BUILTIN(__nvvm_fmax_nan_xorsign_abs_bf16, " UsUsUs" , " " ,
225
+ TARGET_BUILTIN(__nvvm_fmax_bf16, " yyy" , " " , AND(SM_80, PTX70))
226
+ TARGET_BUILTIN(__nvvm_fmax_ftz_bf16, " yyy" , " " , AND(SM_80, PTX70))
227
+ TARGET_BUILTIN(__nvvm_fmax_nan_bf16, " yyy" , " " , AND(SM_80, PTX70))
228
+ TARGET_BUILTIN(__nvvm_fmax_ftz_nan_bf16, " yyy" , " " , AND(SM_80, PTX70))
229
+ TARGET_BUILTIN(__nvvm_fmax_xorsign_abs_bf16, " yyy" , " " , AND(SM_86, PTX72))
230
+ TARGET_BUILTIN(__nvvm_fmax_nan_xorsign_abs_bf16, " yyy" , " " ,
225
231
AND (SM_86, PTX72))
226
- TARGET_BUILTIN(__nvvm_fmax_bf16x2, " ZUiZUiZUi" , " " , AND(SM_80, PTX70))
227
- TARGET_BUILTIN(__nvvm_fmax_nan_bf16x2, " ZUiZUiZUi" , " " , AND(SM_80, PTX70))
228
- TARGET_BUILTIN(__nvvm_fmax_xorsign_abs_bf16x2, " ZUiZUiZUi" , " " ,
232
+ TARGET_BUILTIN(__nvvm_fmax_bf16x2, " V2yV2yV2y" , " " , AND(SM_80, PTX70))
233
+ TARGET_BUILTIN(__nvvm_fmax_ftz_bf16x2, " V2yV2yV2y" , " " , AND(SM_80, PTX70))
234
+ TARGET_BUILTIN(__nvvm_fmax_nan_bf16x2, " V2yV2yV2y" , " " , AND(SM_80, PTX70))
235
+ TARGET_BUILTIN(__nvvm_fmax_ftz_nan_bf16x2, " V2yV2yV2y" , " " , AND(SM_80, PTX70))
236
+ TARGET_BUILTIN(__nvvm_fmax_xorsign_abs_bf16x2, " V2yV2yV2y" , " " ,
229
237
AND (SM_86, PTX72))
230
- TARGET_BUILTIN(__nvvm_fmax_nan_xorsign_abs_bf16x2, " ZUiZUiZUi " , " " ,
238
+ TARGET_BUILTIN(__nvvm_fmax_nan_xorsign_abs_bf16x2, " V2yV2yV2y " , " " ,
231
239
AND (SM_86, PTX72))
232
240
BUILTIN(__nvvm_fmax_f, " fff" , " " )
233
241
BUILTIN(__nvvm_fmax_ftz_f, " fff" , " " )
@@ -361,10 +369,10 @@ TARGET_BUILTIN(__nvvm_fma_rn_sat_f16x2, "V2hV2hV2hV2h", "", AND(SM_53, PTX42))
361
369
TARGET_BUILTIN(__nvvm_fma_rn_ftz_sat_f16x2, " V2hV2hV2hV2h" , " " , AND(SM_53, PTX42))
362
370
TARGET_BUILTIN(__nvvm_fma_rn_relu_f16x2, " V2hV2hV2hV2h" , " " , AND(SM_80, PTX70))
363
371
TARGET_BUILTIN(__nvvm_fma_rn_ftz_relu_f16x2, " V2hV2hV2hV2h" , " " , AND(SM_80, PTX70))
364
- TARGET_BUILTIN(__nvvm_fma_rn_bf16, " UsUsUsUs " , " " , AND(SM_80, PTX70))
365
- TARGET_BUILTIN(__nvvm_fma_rn_relu_bf16, " UsUsUsUs " , " " , AND(SM_80, PTX70))
366
- TARGET_BUILTIN(__nvvm_fma_rn_bf16x2, " ZUiZUiZUiZUi " , " " , AND(SM_80, PTX70))
367
- TARGET_BUILTIN(__nvvm_fma_rn_relu_bf16x2, " ZUiZUiZUiZUi " , " " , AND(SM_80, PTX70))
372
+ TARGET_BUILTIN(__nvvm_fma_rn_bf16, " yyyy " , " " , AND(SM_80, PTX70))
373
+ TARGET_BUILTIN(__nvvm_fma_rn_relu_bf16, " yyyy " , " " , AND(SM_80, PTX70))
374
+ TARGET_BUILTIN(__nvvm_fma_rn_bf16x2, " V2yV2yV2yV2y " , " " , AND(SM_80, PTX70))
375
+ TARGET_BUILTIN(__nvvm_fma_rn_relu_bf16x2, " V2yV2yV2yV2y " , " " , AND(SM_80, PTX70))
368
376
BUILTIN(__nvvm_fma_rn_ftz_f, " ffff" , " " )
369
377
BUILTIN(__nvvm_fma_rn_f, " ffff" , " " )
370
378
BUILTIN(__nvvm_fma_rz_ftz_f, " ffff" , " " )
@@ -553,20 +561,20 @@ BUILTIN(__nvvm_ull2d_rp, "dULLi", "")
553
561
BUILTIN(__nvvm_f2h_rn_ftz, " Usf" , " " )
554
562
BUILTIN(__nvvm_f2h_rn, " Usf" , " " )
555
563
556
- TARGET_BUILTIN(__nvvm_ff2bf16x2_rn, " ZUiff " , " " , AND(SM_80,PTX70))
557
- TARGET_BUILTIN(__nvvm_ff2bf16x2_rn_relu, " ZUiff " , " " , AND(SM_80,PTX70))
558
- TARGET_BUILTIN(__nvvm_ff2bf16x2_rz, " ZUiff " , " " , AND(SM_80,PTX70))
559
- TARGET_BUILTIN(__nvvm_ff2bf16x2_rz_relu, " ZUiff " , " " , AND(SM_80,PTX70))
564
+ TARGET_BUILTIN(__nvvm_ff2bf16x2_rn, " V2yff " , " " , AND(SM_80,PTX70))
565
+ TARGET_BUILTIN(__nvvm_ff2bf16x2_rn_relu, " V2yff " , " " , AND(SM_80,PTX70))
566
+ TARGET_BUILTIN(__nvvm_ff2bf16x2_rz, " V2yff " , " " , AND(SM_80,PTX70))
567
+ TARGET_BUILTIN(__nvvm_ff2bf16x2_rz_relu, " V2yff " , " " , AND(SM_80,PTX70))
560
568
561
569
TARGET_BUILTIN(__nvvm_ff2f16x2_rn, " V2hff" , " " , AND(SM_80,PTX70))
562
570
TARGET_BUILTIN(__nvvm_ff2f16x2_rn_relu, " V2hff" , " " , AND(SM_80,PTX70))
563
571
TARGET_BUILTIN(__nvvm_ff2f16x2_rz, " V2hff" , " " , AND(SM_80,PTX70))
564
572
TARGET_BUILTIN(__nvvm_ff2f16x2_rz_relu, " V2hff" , " " , AND(SM_80,PTX70))
565
573
566
- TARGET_BUILTIN(__nvvm_f2bf16_rn, " ZUsf " , " " , AND(SM_80,PTX70))
567
- TARGET_BUILTIN(__nvvm_f2bf16_rn_relu, " ZUsf " , " " , AND(SM_80,PTX70))
568
- TARGET_BUILTIN(__nvvm_f2bf16_rz, " ZUsf " , " " , AND(SM_80,PTX70))
569
- TARGET_BUILTIN(__nvvm_f2bf16_rz_relu, " ZUsf " , " " , AND(SM_80,PTX70))
574
+ TARGET_BUILTIN(__nvvm_f2bf16_rn, " yf " , " " , AND(SM_80,PTX70))
575
+ TARGET_BUILTIN(__nvvm_f2bf16_rn_relu, " yf " , " " , AND(SM_80,PTX70))
576
+ TARGET_BUILTIN(__nvvm_f2bf16_rz, " yf " , " " , AND(SM_80,PTX70))
577
+ TARGET_BUILTIN(__nvvm_f2bf16_rz_relu, " yf " , " " , AND(SM_80,PTX70))
570
578
571
579
TARGET_BUILTIN(__nvvm_f2tf32_rna, " ZUif" , " " , AND(SM_80,PTX70))
572
580
@@ -2649,10 +2657,10 @@ TARGET_BUILTIN(__nvvm_cp_async_wait_all, "v", "", AND(SM_80,PTX70))
2649
2657
2650
2658
2651
2659
// bf16, bf16x2 abs, neg
2652
- TARGET_BUILTIN(__nvvm_abs_bf16, " UsUs " , " " , AND(SM_80,PTX70))
2653
- TARGET_BUILTIN(__nvvm_abs_bf16x2, " ZUiZUi " , " " , AND(SM_80,PTX70))
2654
- TARGET_BUILTIN(__nvvm_neg_bf16, " UsUs " , " " , AND(SM_80,PTX70))
2655
- TARGET_BUILTIN(__nvvm_neg_bf16x2, " ZUiZUi " , " " , AND(SM_80,PTX70))
2660
+ TARGET_BUILTIN(__nvvm_abs_bf16, " yy " , " " , AND(SM_80,PTX70))
2661
+ TARGET_BUILTIN(__nvvm_abs_bf16x2, " V2yV2y " , " " , AND(SM_80,PTX70))
2662
+ TARGET_BUILTIN(__nvvm_neg_bf16, " yy " , " " , AND(SM_80,PTX70))
2663
+ TARGET_BUILTIN(__nvvm_neg_bf16x2, " V2yV2y " , " " , AND(SM_80,PTX70))
2656
2664
2657
2665
TARGET_BUILTIN(__nvvm_mapa, " v*v*i" , " " , AND(SM_90, PTX78))
2658
2666
TARGET_BUILTIN(__nvvm_mapa_shared_cluster, " v*3v*3i" , " " , AND(SM_90, PTX78))
0 commit comments