@@ -176,20 +176,16 @@ TARGET_BUILTIN(__nvvm_fmin_nan_xorsign_abs_f16x2, "V2hV2hV2h", "",
176
176
AND (SM_86, PTX72))
177
177
TARGET_BUILTIN(__nvvm_fmin_ftz_nan_xorsign_abs_f16x2, " V2hV2hV2h" , " " ,
178
178
AND (SM_86, PTX72))
179
- TARGET_BUILTIN(__nvvm_fmin_bf16, " yyy" , " " , AND(SM_80, PTX70))
180
- TARGET_BUILTIN(__nvvm_fmin_ftz_bf16, " yyy" , " " , AND(SM_80, PTX70))
181
- TARGET_BUILTIN(__nvvm_fmin_nan_bf16, " yyy" , " " , AND(SM_80, PTX70))
182
- TARGET_BUILTIN(__nvvm_fmin_ftz_nan_bf16, " yyy" , " " , AND(SM_80, PTX70))
183
- TARGET_BUILTIN(__nvvm_fmin_xorsign_abs_bf16, " yyy" , " " , AND(SM_86, PTX72))
184
- TARGET_BUILTIN(__nvvm_fmin_nan_xorsign_abs_bf16, " yyy" , " " ,
179
+ TARGET_BUILTIN(__nvvm_fmin_bf16, " UsUsUs" , " " , AND(SM_80, PTX70))
180
+ TARGET_BUILTIN(__nvvm_fmin_nan_bf16, " UsUsUs" , " " , AND(SM_80, PTX70))
181
+ TARGET_BUILTIN(__nvvm_fmin_xorsign_abs_bf16, " UsUsUs" , " " , AND(SM_86, PTX72))
182
+ TARGET_BUILTIN(__nvvm_fmin_nan_xorsign_abs_bf16, " UsUsUs" , " " ,
185
183
AND (SM_86, PTX72))
186
- TARGET_BUILTIN(__nvvm_fmin_bf16x2, " V2yV2yV2y" , " " , AND(SM_80, PTX70))
187
- TARGET_BUILTIN(__nvvm_fmin_ftz_bf16x2, " V2yV2yV2y" , " " , AND(SM_80, PTX70))
188
- TARGET_BUILTIN(__nvvm_fmin_nan_bf16x2, " V2yV2yV2y" , " " , AND(SM_80, PTX70))
189
- TARGET_BUILTIN(__nvvm_fmin_ftz_nan_bf16x2, " V2yV2yV2y" , " " , AND(SM_80, PTX70))
190
- TARGET_BUILTIN(__nvvm_fmin_xorsign_abs_bf16x2, " V2yV2yV2y" , " " ,
184
+ TARGET_BUILTIN(__nvvm_fmin_bf16x2, " ZUiZUiZUi" , " " , AND(SM_80, PTX70))
185
+ TARGET_BUILTIN(__nvvm_fmin_nan_bf16x2, " ZUiZUiZUi" , " " , AND(SM_80, PTX70))
186
+ TARGET_BUILTIN(__nvvm_fmin_xorsign_abs_bf16x2, " ZUiZUiZUi" , " " ,
191
187
AND (SM_86, PTX72))
192
- TARGET_BUILTIN(__nvvm_fmin_nan_xorsign_abs_bf16x2, " V2yV2yV2y " , " " ,
188
+ TARGET_BUILTIN(__nvvm_fmin_nan_xorsign_abs_bf16x2, " ZUiZUiZUi " , " " ,
193
189
AND (SM_86, PTX72))
194
190
BUILTIN(__nvvm_fmin_f, " fff" , " " )
195
191
BUILTIN(__nvvm_fmin_ftz_f, " fff" , " " )
@@ -222,20 +218,16 @@ TARGET_BUILTIN(__nvvm_fmax_nan_xorsign_abs_f16x2, "V2hV2hV2h", "",
222
218
AND (SM_86, PTX72))
223
219
TARGET_BUILTIN(__nvvm_fmax_ftz_nan_xorsign_abs_f16x2, " V2hV2hV2h" , " " ,
224
220
AND (SM_86, PTX72))
225
- TARGET_BUILTIN(__nvvm_fmax_bf16, " yyy" , " " , AND(SM_80, PTX70))
226
- TARGET_BUILTIN(__nvvm_fmax_ftz_bf16, " yyy" , " " , AND(SM_80, PTX70))
227
- TARGET_BUILTIN(__nvvm_fmax_nan_bf16, " yyy" , " " , AND(SM_80, PTX70))
228
- TARGET_BUILTIN(__nvvm_fmax_ftz_nan_bf16, " yyy" , " " , AND(SM_80, PTX70))
229
- TARGET_BUILTIN(__nvvm_fmax_xorsign_abs_bf16, " yyy" , " " , AND(SM_86, PTX72))
230
- TARGET_BUILTIN(__nvvm_fmax_nan_xorsign_abs_bf16, " yyy" , " " ,
221
+ TARGET_BUILTIN(__nvvm_fmax_bf16, " UsUsUs" , " " , AND(SM_80, PTX70))
222
+ TARGET_BUILTIN(__nvvm_fmax_nan_bf16, " UsUsUs" , " " , AND(SM_80, PTX70))
223
+ TARGET_BUILTIN(__nvvm_fmax_xorsign_abs_bf16, " UsUsUs" , " " , AND(SM_86, PTX72))
224
+ TARGET_BUILTIN(__nvvm_fmax_nan_xorsign_abs_bf16, " UsUsUs" , " " ,
231
225
AND (SM_86, PTX72))
232
- TARGET_BUILTIN(__nvvm_fmax_bf16x2, " V2yV2yV2y" , " " , AND(SM_80, PTX70))
233
- TARGET_BUILTIN(__nvvm_fmax_ftz_bf16x2, " V2yV2yV2y" , " " , AND(SM_80, PTX70))
234
- TARGET_BUILTIN(__nvvm_fmax_nan_bf16x2, " V2yV2yV2y" , " " , AND(SM_80, PTX70))
235
- TARGET_BUILTIN(__nvvm_fmax_ftz_nan_bf16x2, " V2yV2yV2y" , " " , AND(SM_80, PTX70))
236
- TARGET_BUILTIN(__nvvm_fmax_xorsign_abs_bf16x2, " V2yV2yV2y" , " " ,
226
+ TARGET_BUILTIN(__nvvm_fmax_bf16x2, " ZUiZUiZUi" , " " , AND(SM_80, PTX70))
227
+ TARGET_BUILTIN(__nvvm_fmax_nan_bf16x2, " ZUiZUiZUi" , " " , AND(SM_80, PTX70))
228
+ TARGET_BUILTIN(__nvvm_fmax_xorsign_abs_bf16x2, " ZUiZUiZUi" , " " ,
237
229
AND (SM_86, PTX72))
238
- TARGET_BUILTIN(__nvvm_fmax_nan_xorsign_abs_bf16x2, " V2yV2yV2y " , " " ,
230
+ TARGET_BUILTIN(__nvvm_fmax_nan_xorsign_abs_bf16x2, " ZUiZUiZUi " , " " ,
239
231
AND (SM_86, PTX72))
240
232
BUILTIN(__nvvm_fmax_f, " fff" , " " )
241
233
BUILTIN(__nvvm_fmax_ftz_f, " fff" , " " )
@@ -369,10 +361,10 @@ TARGET_BUILTIN(__nvvm_fma_rn_sat_f16x2, "V2hV2hV2hV2h", "", AND(SM_53, PTX42))
369
361
TARGET_BUILTIN(__nvvm_fma_rn_ftz_sat_f16x2, " V2hV2hV2hV2h" , " " , AND(SM_53, PTX42))
370
362
TARGET_BUILTIN(__nvvm_fma_rn_relu_f16x2, " V2hV2hV2hV2h" , " " , AND(SM_80, PTX70))
371
363
TARGET_BUILTIN(__nvvm_fma_rn_ftz_relu_f16x2, " V2hV2hV2hV2h" , " " , AND(SM_80, PTX70))
372
- TARGET_BUILTIN(__nvvm_fma_rn_bf16, " yyyy " , " " , AND(SM_80, PTX70))
373
- TARGET_BUILTIN(__nvvm_fma_rn_relu_bf16, " yyyy " , " " , AND(SM_80, PTX70))
374
- TARGET_BUILTIN(__nvvm_fma_rn_bf16x2, " V2yV2yV2yV2y " , " " , AND(SM_80, PTX70))
375
- TARGET_BUILTIN(__nvvm_fma_rn_relu_bf16x2, " V2yV2yV2yV2y " , " " , AND(SM_80, PTX70))
364
+ TARGET_BUILTIN(__nvvm_fma_rn_bf16, " UsUsUsUs " , " " , AND(SM_80, PTX70))
365
+ TARGET_BUILTIN(__nvvm_fma_rn_relu_bf16, " UsUsUsUs " , " " , AND(SM_80, PTX70))
366
+ TARGET_BUILTIN(__nvvm_fma_rn_bf16x2, " ZUiZUiZUiZUi " , " " , AND(SM_80, PTX70))
367
+ TARGET_BUILTIN(__nvvm_fma_rn_relu_bf16x2, " ZUiZUiZUiZUi " , " " , AND(SM_80, PTX70))
376
368
BUILTIN(__nvvm_fma_rn_ftz_f, " ffff" , " " )
377
369
BUILTIN(__nvvm_fma_rn_f, " ffff" , " " )
378
370
BUILTIN(__nvvm_fma_rz_ftz_f, " ffff" , " " )
@@ -561,20 +553,20 @@ BUILTIN(__nvvm_ull2d_rp, "dULLi", "")
561
553
BUILTIN(__nvvm_f2h_rn_ftz, " Usf" , " " )
562
554
BUILTIN(__nvvm_f2h_rn, " Usf" , " " )
563
555
564
- TARGET_BUILTIN(__nvvm_ff2bf16x2_rn, " V2yff " , " " , AND(SM_80,PTX70))
565
- TARGET_BUILTIN(__nvvm_ff2bf16x2_rn_relu, " V2yff " , " " , AND(SM_80,PTX70))
566
- TARGET_BUILTIN(__nvvm_ff2bf16x2_rz, " V2yff " , " " , AND(SM_80,PTX70))
567
- TARGET_BUILTIN(__nvvm_ff2bf16x2_rz_relu, " V2yff " , " " , AND(SM_80,PTX70))
556
+ TARGET_BUILTIN(__nvvm_ff2bf16x2_rn, " ZUiff " , " " , AND(SM_80,PTX70))
557
+ TARGET_BUILTIN(__nvvm_ff2bf16x2_rn_relu, " ZUiff " , " " , AND(SM_80,PTX70))
558
+ TARGET_BUILTIN(__nvvm_ff2bf16x2_rz, " ZUiff " , " " , AND(SM_80,PTX70))
559
+ TARGET_BUILTIN(__nvvm_ff2bf16x2_rz_relu, " ZUiff " , " " , AND(SM_80,PTX70))
568
560
569
561
TARGET_BUILTIN(__nvvm_ff2f16x2_rn, " V2hff" , " " , AND(SM_80,PTX70))
570
562
TARGET_BUILTIN(__nvvm_ff2f16x2_rn_relu, " V2hff" , " " , AND(SM_80,PTX70))
571
563
TARGET_BUILTIN(__nvvm_ff2f16x2_rz, " V2hff" , " " , AND(SM_80,PTX70))
572
564
TARGET_BUILTIN(__nvvm_ff2f16x2_rz_relu, " V2hff" , " " , AND(SM_80,PTX70))
573
565
574
- TARGET_BUILTIN(__nvvm_f2bf16_rn, " yf " , " " , AND(SM_80,PTX70))
575
- TARGET_BUILTIN(__nvvm_f2bf16_rn_relu, " yf " , " " , AND(SM_80,PTX70))
576
- TARGET_BUILTIN(__nvvm_f2bf16_rz, " yf " , " " , AND(SM_80,PTX70))
577
- TARGET_BUILTIN(__nvvm_f2bf16_rz_relu, " yf " , " " , AND(SM_80,PTX70))
566
+ TARGET_BUILTIN(__nvvm_f2bf16_rn, " ZUsf " , " " , AND(SM_80,PTX70))
567
+ TARGET_BUILTIN(__nvvm_f2bf16_rn_relu, " ZUsf " , " " , AND(SM_80,PTX70))
568
+ TARGET_BUILTIN(__nvvm_f2bf16_rz, " ZUsf " , " " , AND(SM_80,PTX70))
569
+ TARGET_BUILTIN(__nvvm_f2bf16_rz_relu, " ZUsf " , " " , AND(SM_80,PTX70))
578
570
579
571
TARGET_BUILTIN(__nvvm_f2tf32_rna, " ZUif" , " " , AND(SM_80,PTX70))
580
572
@@ -2657,10 +2649,10 @@ TARGET_BUILTIN(__nvvm_cp_async_wait_all, "v", "", AND(SM_80,PTX70))
2657
2649
2658
2650
2659
2651
// bf16, bf16x2 abs, neg
2660
- TARGET_BUILTIN(__nvvm_abs_bf16, " yy " , " " , AND(SM_80,PTX70))
2661
- TARGET_BUILTIN(__nvvm_abs_bf16x2, " V2yV2y " , " " , AND(SM_80,PTX70))
2662
- TARGET_BUILTIN(__nvvm_neg_bf16, " yy " , " " , AND(SM_80,PTX70))
2663
- TARGET_BUILTIN(__nvvm_neg_bf16x2, " V2yV2y " , " " , AND(SM_80,PTX70))
2652
+ TARGET_BUILTIN(__nvvm_abs_bf16, " UsUs " , " " , AND(SM_80,PTX70))
2653
+ TARGET_BUILTIN(__nvvm_abs_bf16x2, " ZUiZUi " , " " , AND(SM_80,PTX70))
2654
+ TARGET_BUILTIN(__nvvm_neg_bf16, " UsUs " , " " , AND(SM_80,PTX70))
2655
+ TARGET_BUILTIN(__nvvm_neg_bf16x2, " ZUiZUi " , " " , AND(SM_80,PTX70))
2664
2656
2665
2657
TARGET_BUILTIN(__nvvm_mapa, " v*v*i" , " " , AND(SM_90, PTX78))
2666
2658
TARGET_BUILTIN(__nvvm_mapa_shared_cluster, " v*3v*3i" , " " , AND(SM_90, PTX78))
0 commit comments