Skip to content

Commit a50cb6c

Browse files
authored
[AMDGPU][True16][CodeGen] fix a predicate bug in VGPRImm with f16/bf16 (#144942)
Fixed a typo issue that f16/bf16 VGPRImm patterrn is not guarded by the True16Predicate scope. The curly bracket is misplaced
1 parent bb8c42e commit a50cb6c

13 files changed

+202
-189
lines changed

llvm/lib/Target/AMDGPU/SIInstructions.td

Lines changed: 9 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -2200,17 +2200,17 @@ foreach pred = [NotHasTrue16BitInsts, UseFakeTrue16Insts] in {
22002200
(VGPRImm<(i16 imm)>:$imm),
22012201
(V_MOV_B32_e32 imm:$imm)
22022202
>;
2203-
}
22042203

2205-
// FIXME: Workaround for ordering issue with peephole optimizer where
2206-
// a register class copy interferes with immediate folding. Should
2207-
// use s_mov_b32, which can be shrunk to s_movk_i32
2204+
// FIXME: Workaround for ordering issue with peephole optimizer where
2205+
// a register class copy interferes with immediate folding. Should
2206+
// use s_mov_b32, which can be shrunk to s_movk_i32
22082207

2209-
foreach vt = [f16, bf16] in {
2210-
def : GCNPat <
2211-
(VGPRImm<(vt fpimm)>:$imm),
2212-
(V_MOV_B32_e32 (vt (bitcast_fpimm_to_i32 $imm)))
2213-
>;
2208+
foreach vt = [f16, bf16] in {
2209+
def : GCNPat <
2210+
(VGPRImm<(vt fpimm)>:$imm),
2211+
(V_MOV_B32_e32 (vt (bitcast_fpimm_to_i32 $imm)))
2212+
>;
2213+
}
22142214
}
22152215
}
22162216

llvm/test/CodeGen/AMDGPU/amdgcn.bitcast.48bit.ll

Lines changed: 20 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -150,32 +150,33 @@ define <3 x half> @bitcast_v3bf16_to_v3f16(<3 x bfloat> %a, i32 %b) {
150150
; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
151151
; GFX11-TRUE16-NEXT: v_dual_add_f32 v1, 0x40c00000, v1 :: v_dual_lshlrev_b32 v2, 16, v2
152152
; GFX11-TRUE16-NEXT: v_add_f32_e32 v2, 0x40c00000, v2
153-
; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_3)
154-
; GFX11-TRUE16-NEXT: v_bfe_u32 v4, v1, 16, 1
155-
; GFX11-TRUE16-NEXT: v_or_b32_e32 v5, 0x400000, v1
153+
; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
154+
; GFX11-TRUE16-NEXT: v_bfe_u32 v6, v1, 16, 1
156155
; GFX11-TRUE16-NEXT: v_bfe_u32 v3, v2, 16, 1
157-
; GFX11-TRUE16-NEXT: v_or_b32_e32 v7, 0x400000, v2
156+
; GFX11-TRUE16-NEXT: v_or_b32_e32 v5, 0x400000, v2
158157
; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v2, v2
159-
; GFX11-TRUE16-NEXT: v_add3_u32 v4, v4, v1, 0x7fff
160-
; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_1)
158+
; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
161159
; GFX11-TRUE16-NEXT: v_add3_u32 v3, v3, v2, 0x7fff
162-
; GFX11-TRUE16-NEXT: v_dual_cndmask_b32 v2, v3, v7 :: v_dual_mov_b32 v3, 0x7fc0
163-
; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v1, v1
160+
; GFX11-TRUE16-NEXT: v_cndmask_b32_e32 v2, v3, v5, vcc_lo
161+
; GFX11-TRUE16-NEXT: v_or_b32_e32 v3, 0x400000, v1
162+
; GFX11-TRUE16-NEXT: v_add3_u32 v5, v6, v1, 0x7fff
164163
; GFX11-TRUE16-NEXT: v_and_b32_e32 v0, 0xffff0000, v0
165-
; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2)
164+
; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_2)
166165
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v2.l, v2.h
167-
; GFX11-TRUE16-NEXT: v_dual_cndmask_b32 v1, v4, v5 :: v_dual_add_f32 v0, 0x40c00000, v0
168-
; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_2)
169-
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v1.l, v1.h
170-
; GFX11-TRUE16-NEXT: v_bfe_u32 v6, v0, 16, 1
171-
; GFX11-TRUE16-NEXT: v_or_b32_e32 v8, 0x400000, v0
166+
; GFX11-TRUE16-NEXT: v_add_f32_e32 v0, 0x40c00000, v0
167+
; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_3)
168+
; GFX11-TRUE16-NEXT: v_bfe_u32 v4, v0, 16, 1
169+
; GFX11-TRUE16-NEXT: v_or_b32_e32 v7, 0x400000, v0
172170
; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v0, v0
173-
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v1.h, v3.l
174-
; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_1)
175-
; GFX11-TRUE16-NEXT: v_add3_u32 v6, v6, v0, 0x7fff
176-
; GFX11-TRUE16-NEXT: v_cndmask_b32_e32 v0, v6, v8, vcc_lo
177-
; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
171+
; GFX11-TRUE16-NEXT: v_add3_u32 v4, v4, v0, 0x7fff
172+
; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_3)
173+
; GFX11-TRUE16-NEXT: v_cndmask_b32_e32 v0, v4, v7, vcc_lo
174+
; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v1, v1
175+
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v1.h, 0x7fc0
178176
; GFX11-TRUE16-NEXT: v_bfi_b32 v0, 0xffff, v2, v0
177+
; GFX11-TRUE16-NEXT: v_cndmask_b32_e32 v3, v5, v3, vcc_lo
178+
; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
179+
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v1.l, v3.h
179180
; GFX11-TRUE16-NEXT: .LBB0_2: ; %end
180181
; GFX11-TRUE16-NEXT: s_or_b32 exec_lo, exec_lo, s0
181182
; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31]

llvm/test/CodeGen/AMDGPU/dagcombine-fmul-sel.ll

Lines changed: 10 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1845,11 +1845,11 @@ define <2 x half> @fmul_select_v2f16_test3(<2 x half> %x, <2 x i32> %bool.arg1,
18451845
; GFX11-SDAG-TRUE16: ; %bb.0:
18461846
; GFX11-SDAG-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
18471847
; GFX11-SDAG-TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, v2, v4
1848-
; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v2.l, 0x4000
1848+
; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v2.l, 0x3c00
18491849
; GFX11-SDAG-TRUE16-NEXT: v_cmp_eq_u32_e64 s0, v1, v3
18501850
; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
1851-
; GFX11-SDAG-TRUE16-NEXT: v_cndmask_b16 v1.l, 0x3c00, v2.l, vcc_lo
1852-
; GFX11-SDAG-TRUE16-NEXT: v_cndmask_b16 v1.h, 0x3c00, v2.l, s0
1851+
; GFX11-SDAG-TRUE16-NEXT: v_cndmask_b16 v1.l, v2.l, 0x4000, vcc_lo
1852+
; GFX11-SDAG-TRUE16-NEXT: v_cndmask_b16 v1.h, v2.l, 0x4000, s0
18531853
; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
18541854
; GFX11-SDAG-TRUE16-NEXT: v_pack_b32_f16 v1, v1.h, v1.l
18551855
; GFX11-SDAG-TRUE16-NEXT: v_pk_mul_f16 v0, v0, v1
@@ -2002,11 +2002,11 @@ define <2 x half> @fmul_select_v2f16_test4(<2 x half> %x, <2 x i32> %bool.arg1,
20022002
; GFX11-SDAG-TRUE16: ; %bb.0:
20032003
; GFX11-SDAG-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
20042004
; GFX11-SDAG-TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, v2, v4
2005-
; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v2.l, 0x3800
2005+
; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v2.l, 0x3c00
20062006
; GFX11-SDAG-TRUE16-NEXT: v_cmp_eq_u32_e64 s0, v1, v3
20072007
; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
2008-
; GFX11-SDAG-TRUE16-NEXT: v_cndmask_b16 v1.l, 0x3c00, v2.l, vcc_lo
2009-
; GFX11-SDAG-TRUE16-NEXT: v_cndmask_b16 v1.h, 0x3c00, v2.l, s0
2008+
; GFX11-SDAG-TRUE16-NEXT: v_cndmask_b16 v1.l, v2.l, 0x3800, vcc_lo
2009+
; GFX11-SDAG-TRUE16-NEXT: v_cndmask_b16 v1.h, v2.l, 0x3800, s0
20102010
; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
20112011
; GFX11-SDAG-TRUE16-NEXT: v_pack_b32_f16 v1, v1.h, v1.l
20122012
; GFX11-SDAG-TRUE16-NEXT: v_pk_mul_f16 v0, v0, v1
@@ -2212,10 +2212,10 @@ define half @fmul_select_f16_test6(half %x, i32 %bool.arg1, i32 %bool.arg2) {
22122212
; GFX11-SDAG-TRUE16-LABEL: fmul_select_f16_test6:
22132213
; GFX11-SDAG-TRUE16: ; %bb.0:
22142214
; GFX11-SDAG-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2215-
; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v3.l, 0xc800
2215+
; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v3.l, 0x4200
22162216
; GFX11-SDAG-TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v2
22172217
; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
2218-
; GFX11-SDAG-TRUE16-NEXT: v_cndmask_b16 v1.l, 0x4200, v3.l, vcc_lo
2218+
; GFX11-SDAG-TRUE16-NEXT: v_cndmask_b16 v1.l, v3.l, 0xc800, vcc_lo
22192219
; GFX11-SDAG-TRUE16-NEXT: v_mul_f16_e32 v0.l, v0.l, v1.l
22202220
; GFX11-SDAG-TRUE16-NEXT: s_setpc_b64 s[30:31]
22212221
;
@@ -2320,10 +2320,10 @@ define half @fmul_select_f16_test7(half %x, i32 %bool.arg1, i32 %bool.arg2) {
23202320
; GFX11-SDAG-TRUE16-LABEL: fmul_select_f16_test7:
23212321
; GFX11-SDAG-TRUE16: ; %bb.0:
23222322
; GFX11-SDAG-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2323-
; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v3.l, 0x4800
2323+
; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v3.l, 0xc400
23242324
; GFX11-SDAG-TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v2
23252325
; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
2326-
; GFX11-SDAG-TRUE16-NEXT: v_cndmask_b16 v1.l, 0xc400, v3.l, vcc_lo
2326+
; GFX11-SDAG-TRUE16-NEXT: v_cndmask_b16 v1.l, v3.l, 0x4800, vcc_lo
23272327
; GFX11-SDAG-TRUE16-NEXT: v_mul_f16_e32 v0.l, v0.l, v1.l
23282328
; GFX11-SDAG-TRUE16-NEXT: s_setpc_b64 s[30:31]
23292329
;

llvm/test/CodeGen/AMDGPU/extract-subvector-16bit.ll

Lines changed: 19 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -524,16 +524,16 @@ define <4 x half> @vec_8xf16_extract_4xf16(ptr addrspace(1) %p0, ptr addrspace(1
524524
; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0)
525525
; GFX11-TRUE16-NEXT: .LBB2_3: ; %exit
526526
; GFX11-TRUE16-NEXT: v_cmp_ge_f16_e32 vcc_lo, 0.5, v2.l
527-
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v0.l, 0x3d00
527+
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v0.l, 0x3900
528528
; GFX11-TRUE16-NEXT: v_cmp_ge_f16_e64 s0, 0.5, v2.h
529529
; GFX11-TRUE16-NEXT: v_cmp_nge_f16_e64 s1, 0.5, v3.l
530530
; GFX11-TRUE16-NEXT: v_cmp_ge_f16_e64 s2, 0.5, v3.l
531531
; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
532-
; GFX11-TRUE16-NEXT: v_cndmask_b16 v0.h, 0x3900, v0.l, vcc_lo
533-
; GFX11-TRUE16-NEXT: v_cndmask_b16 v1.l, 0x3900, v0.l, s0
532+
; GFX11-TRUE16-NEXT: v_cndmask_b16 v0.h, v0.l, 0x3d00, vcc_lo
533+
; GFX11-TRUE16-NEXT: v_cndmask_b16 v1.l, v0.l, 0x3d00, s0
534534
; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
535-
; GFX11-TRUE16-NEXT: v_cndmask_b16 v1.h, v0.l, 0x3900, s1
536-
; GFX11-TRUE16-NEXT: v_cndmask_b16 v2.l, 0x3900, v0.l, s2
535+
; GFX11-TRUE16-NEXT: v_cndmask_b16 v1.h, 0x3d00, v0.l, s1
536+
; GFX11-TRUE16-NEXT: v_cndmask_b16 v2.l, v0.l, 0x3d00, s2
537537
; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2)
538538
; GFX11-TRUE16-NEXT: v_pack_b32_f16 v0, v0.h, v1.l
539539
; GFX11-TRUE16-NEXT: v_pack_b32_f16 v1, v2.l, v1.h
@@ -1254,16 +1254,16 @@ define <4 x half> @vec_16xf16_extract_4xf16(ptr addrspace(1) %p0, ptr addrspace(
12541254
; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0)
12551255
; GFX11-TRUE16-NEXT: .LBB5_3: ; %exit
12561256
; GFX11-TRUE16-NEXT: v_cmp_ge_f16_e32 vcc_lo, 0.5, v2.l
1257-
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v0.l, 0x3d00
1257+
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v0.l, 0x3900
12581258
; GFX11-TRUE16-NEXT: v_cmp_ge_f16_e64 s0, 0.5, v2.h
12591259
; GFX11-TRUE16-NEXT: v_cmp_nge_f16_e64 s1, 0.5, v3.l
12601260
; GFX11-TRUE16-NEXT: v_cmp_ge_f16_e64 s2, 0.5, v3.l
12611261
; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
1262-
; GFX11-TRUE16-NEXT: v_cndmask_b16 v0.h, 0x3900, v0.l, vcc_lo
1263-
; GFX11-TRUE16-NEXT: v_cndmask_b16 v1.l, 0x3900, v0.l, s0
1262+
; GFX11-TRUE16-NEXT: v_cndmask_b16 v0.h, v0.l, 0x3d00, vcc_lo
1263+
; GFX11-TRUE16-NEXT: v_cndmask_b16 v1.l, v0.l, 0x3d00, s0
12641264
; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
1265-
; GFX11-TRUE16-NEXT: v_cndmask_b16 v1.h, v0.l, 0x3900, s1
1266-
; GFX11-TRUE16-NEXT: v_cndmask_b16 v2.l, 0x3900, v0.l, s2
1265+
; GFX11-TRUE16-NEXT: v_cndmask_b16 v1.h, 0x3d00, v0.l, s1
1266+
; GFX11-TRUE16-NEXT: v_cndmask_b16 v2.l, v0.l, 0x3d00, s2
12671267
; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2)
12681268
; GFX11-TRUE16-NEXT: v_pack_b32_f16 v0, v0.h, v1.l
12691269
; GFX11-TRUE16-NEXT: v_pack_b32_f16 v1, v2.l, v1.h
@@ -1984,22 +1984,22 @@ define amdgpu_gfx <8 x half> @vec_16xf16_extract_8xf16_0(i1 inreg %cond, ptr add
19841984
; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0)
19851985
; GFX11-TRUE16-NEXT: .LBB8_4: ; %exit
19861986
; GFX11-TRUE16-NEXT: v_cmp_ge_f16_e32 vcc_lo, 0.5, v5.l
1987-
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v0.l, 0x3d00
1987+
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v0.l, 0x3900
19881988
; GFX11-TRUE16-NEXT: v_cmp_ge_f16_e64 s0, 0.5, v4.l
19891989
; GFX11-TRUE16-NEXT: v_cmp_nge_f16_e64 s1, 0.5, v5.h
19901990
; GFX11-TRUE16-NEXT: v_cmp_ge_f16_e64 s2, 0.5, v2.h
19911991
; GFX11-TRUE16-NEXT: v_cmp_ge_f16_e64 s3, 0.5, v3.h
1992-
; GFX11-TRUE16-NEXT: v_cndmask_b16 v4.l, 0x3900, v0.l, vcc_lo
1993-
; GFX11-TRUE16-NEXT: v_cndmask_b16 v5.l, 0x3900, v0.l, s0
1992+
; GFX11-TRUE16-NEXT: v_cndmask_b16 v4.l, v0.l, 0x3d00, vcc_lo
1993+
; GFX11-TRUE16-NEXT: v_cndmask_b16 v5.l, v0.l, 0x3d00, s0
19941994
; GFX11-TRUE16-NEXT: v_cmp_ge_f16_e32 vcc_lo, 0.5, v3.l
19951995
; GFX11-TRUE16-NEXT: v_cmp_ge_f16_e64 s0, 0.5, v2.l
19961996
; GFX11-TRUE16-NEXT: v_cmp_ge_f16_e64 s34, 0.5, v4.h
1997-
; GFX11-TRUE16-NEXT: v_cndmask_b16 v1.h, 0x3900, v0.l, s2
1998-
; GFX11-TRUE16-NEXT: v_cndmask_b16 v2.l, 0x3900, v0.l, s3
1999-
; GFX11-TRUE16-NEXT: v_cndmask_b16 v1.l, 0x3900, v0.l, vcc_lo
2000-
; GFX11-TRUE16-NEXT: v_cndmask_b16 v0.h, 0x3900, v0.l, s0
2001-
; GFX11-TRUE16-NEXT: v_cndmask_b16 v2.h, 0x3900, v0.l, s34
2002-
; GFX11-TRUE16-NEXT: v_cndmask_b16 v3.l, v0.l, 0x3900, s1
1997+
; GFX11-TRUE16-NEXT: v_cndmask_b16 v1.h, v0.l, 0x3d00, s2
1998+
; GFX11-TRUE16-NEXT: v_cndmask_b16 v2.l, v0.l, 0x3d00, s3
1999+
; GFX11-TRUE16-NEXT: v_cndmask_b16 v1.l, v0.l, 0x3d00, vcc_lo
2000+
; GFX11-TRUE16-NEXT: v_cndmask_b16 v0.h, v0.l, 0x3d00, s0
2001+
; GFX11-TRUE16-NEXT: v_cndmask_b16 v2.h, v0.l, 0x3d00, s34
2002+
; GFX11-TRUE16-NEXT: v_cndmask_b16 v3.l, 0x3d00, v0.l, s1
20032003
; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_1) | instid1(VALU_DEP_4)
20042004
; GFX11-TRUE16-NEXT: v_pack_b32_f16 v0, v0.h, v1.h
20052005
; GFX11-TRUE16-NEXT: v_pack_b32_f16 v1, v1.l, v2.l

llvm/test/CodeGen/AMDGPU/fmaximum3.ll

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2018,9 +2018,9 @@ define half @v_fmaximum3_f16_const1_const2(half %a) {
20182018
; GFX12-TRUE16-NEXT: s_wait_samplecnt 0x0
20192019
; GFX12-TRUE16-NEXT: s_wait_bvhcnt 0x0
20202020
; GFX12-TRUE16-NEXT: s_wait_kmcnt 0x0
2021-
; GFX12-TRUE16-NEXT: v_mov_b16_e32 v0.h, 0x4c00
2021+
; GFX12-TRUE16-NEXT: v_mov_b16_e32 v1.l, 0x4800
20222022
; GFX12-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
2023-
; GFX12-TRUE16-NEXT: v_maximum3_f16 v0.l, v0.l, 0x4800, v0.h
2023+
; GFX12-TRUE16-NEXT: v_maximum3_f16 v0.l, v0.l, v1.l, 0x4c00
20242024
; GFX12-TRUE16-NEXT: s_setpc_b64 s[30:31]
20252025
;
20262026
; GFX12-FAKE16-LABEL: v_fmaximum3_f16_const1_const2:

llvm/test/CodeGen/AMDGPU/fminimum3.ll

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2018,9 +2018,9 @@ define half @v_fminimum3_f16_const1_const2(half %a) {
20182018
; GFX12-TRUE16-NEXT: s_wait_samplecnt 0x0
20192019
; GFX12-TRUE16-NEXT: s_wait_bvhcnt 0x0
20202020
; GFX12-TRUE16-NEXT: s_wait_kmcnt 0x0
2021-
; GFX12-TRUE16-NEXT: v_mov_b16_e32 v0.h, 0x4c00
2021+
; GFX12-TRUE16-NEXT: v_mov_b16_e32 v1.l, 0x4800
20222022
; GFX12-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
2023-
; GFX12-TRUE16-NEXT: v_minimum3_f16 v0.l, v0.l, 0x4800, v0.h
2023+
; GFX12-TRUE16-NEXT: v_minimum3_f16 v0.l, v0.l, v1.l, 0x4c00
20242024
; GFX12-TRUE16-NEXT: s_setpc_b64 s[30:31]
20252025
;
20262026
; GFX12-FAKE16-LABEL: v_fminimum3_f16_const1_const2:

0 commit comments

Comments
 (0)