Syntax of gfx908 Instructions

Introduction

This document describes the syntax of instructions specific to gfx908.

For a description of other gfx908 instructions see Syntax of Core GFX9 Instructions.

Notation

Notation used in this document is explained here.

Overview

An overview of generic syntax and other features of AMDGPU instructions may be found in this document.

Instructions

FLAT

INSTRUCTION                    DST             SRC0      SRC1         SRC2           MODIFIERS
———————————————————————————————————————————————————————————————————————————————————————————————————
global_atomic_add_f32          vdst:opt,       vaddr,    vdata,       saddr          offset13s slc
global_atomic_pk_add_f16       vdst:opt:f16x2, vaddr,    vdata:f16x2, saddr          offset13s slc

MUBUF

INSTRUCTION                    SRC0             SRC1      SRC2      SRC3           MODIFIERS
————————————————————————————————————————————————————————————————————————————————————————————————————————————
buffer_atomic_add_f32          vdata:dst,       vaddr,    srsrc,    soffset        idxen offen offset12 slc
buffer_atomic_pk_add_f16       vdata:dst:f16x2, vaddr,    srsrc,    soffset        idxen offen offset12 slc

VOP2

INSTRUCTION           DST         SRC0         SRC1          MODIFIERS
————————————————————————————————————————————————————————————————————————————————————————————————————
v_dot2c_f32_f16       vdst,       src0:f16x2,  vsrc1:f16x2
v_dot2c_f32_f16_dpp   vdst,       vsrc0:f16x2, vsrc1:f16x2   dpp_ctrl row_mask bank_mask bound_ctrl
v_dot2c_i32_i16       vdst,       src0:i16x2,  vsrc1:i16x2
v_dot2c_i32_i16_dpp   vdst,       vsrc0:i16x2, vsrc1:i16x2   dpp_ctrl row_mask bank_mask bound_ctrl
v_dot4c_i32_i8        vdst,       src0:i8x4,   vsrc1:i8x4
v_dot4c_i32_i8_dpp    vdst,       vsrc0:i8x4,  vsrc1:i8x4    dpp_ctrl row_mask bank_mask bound_ctrl
v_dot8c_i32_i4        vdst,       src0:i4x8,   vsrc1:i4x8
v_dot8c_i32_i4_dpp    vdst,       vsrc0:i4x8,  vsrc1:i4x8    dpp_ctrl row_mask bank_mask bound_ctrl
v_fmac_f32            vdst,       src0,        vsrc1
v_fmac_f32_dpp        vdst,       vsrc0:m,     vsrc1:m       dpp_ctrl row_mask bank_mask bound_ctrl
v_pk_fmac_f16         vdst:f16x2, src0:f16x2,  vsrc1:f16x2
v_xnor_b32            vdst,       src0,        vsrc1
v_xnor_b32_dpp        vdst,       vsrc0,       vsrc1         dpp_ctrl row_mask bank_mask bound_ctrl
v_xnor_b32_sdwa       vdst,       src0:m,      vsrc1:m       dst_sel dst_unused src0_sel src1_sel

VOP3

INSTRUCTION                    DST       SRC0      SRC1           MODIFIERS
—————————————————————————————————————————————————————————————————————————————
v_fmac_f32_e64                 vdst,     src0:m,   src1:m         clamp omod
v_xnor_b32_e64                 vdst,     src0,     src1

VOP3P

INSTRUCTION            DST          SRC0           SRC1           SRC2           MODIFIERS
————————————————————————————————————————————————————————————————————————————————————————————————————————————
v_accvgpr_read_b32     vdst,        asrc
v_accvgpr_write_b32    adst,        src
v_dot2_f32_f16         vdst,        src0:f16x2,    src1:f16x2,    src2:f32       neg_lo neg_hi clamp
v_dot2_i32_i16         vdst,        src0:i16x2,    src1:i16x2,    src2:i32       clamp
v_dot2_u32_u16         vdst,        src0:u16x2,    src1:u16x2,    src2:u32       clamp
v_dot4_i32_i8          vdst,        src0:i8x4,     src1:i8x4,     src2:i32       clamp
v_dot4_u32_u8          vdst,        src0:u8x4,     src1:u8x4,     src2:u32       clamp
v_dot8_i32_i4          vdst,        src0:i4x8,     src1:i4x8,     src2:i32       clamp
v_dot8_u32_u4          vdst,        src0:u4x8,     src1:u4x8,     src2:u32       clamp
v_fma_mix_f32          vdst,        src0:m:fx,     src1:m:fx,     src2:m:fx      m_op_sel m_op_sel_hi clamp
v_fma_mixhi_f16        vdst,        src0:m:fx,     src1:m:fx,     src2:m:fx      m_op_sel m_op_sel_hi clamp
v_fma_mixlo_f16        vdst,        src0:m:fx,     src1:m:fx,     src2:m:fx      m_op_sel m_op_sel_hi clamp
v_mfma_f32_16x16x16f16 adst:f32x4,  vasrc0:f16x4,  vasrc1:f16x4,  asrc2:f32x4    cbsz abid blgp
v_mfma_f32_16x16x1f32  adst:f32x16, vasrc0:f32,    vasrc1:f32,    asrc2:f32x16   cbsz abid blgp
v_mfma_f32_16x16x2bf16 adst:f32x16, vasrc0:bf16x2, vasrc1:bf16x2, asrc2:f32x16   cbsz abid blgp
v_mfma_f32_16x16x4f16  adst:f32x16, vasrc0:f16x4,  vasrc1:f16x4,  asrc2:f32x16   cbsz abid blgp
v_mfma_f32_16x16x4f32  adst:f32x4,  vasrc0:f32,    vasrc1:f32,    asrc2:f32x4    cbsz abid blgp
v_mfma_f32_16x16x8bf16 adst:f32x4,  vasrc0:bf16x2, vasrc1:bf16x2, asrc2:f32x4    cbsz abid blgp
v_mfma_f32_32x32x1f32  adst:f32x32, vasrc0:f32,    vasrc1:f32,    asrc2:f32x32   cbsz abid blgp
v_mfma_f32_32x32x2bf16 adst:f32x32, vasrc0:bf16x2, vasrc1:bf16x2, asrc2:f32x32   cbsz abid blgp
v_mfma_f32_32x32x2f32  adst:f32x16, vasrc0:f32,    vasrc1:f32,    asrc2:f32x16   cbsz abid blgp
v_mfma_f32_32x32x4bf16 adst:f32x16, vasrc0:bf16x2, vasrc1:bf16x2, asrc2:f32x16   cbsz abid blgp
v_mfma_f32_32x32x4f16  adst:f32x32, vasrc0:f16x4,  vasrc1:f16x4,  asrc2:f32x32   cbsz abid blgp
v_mfma_f32_32x32x8f16  adst:f32x16, vasrc0:f16x4,  vasrc1:f16x4,  asrc2:f32x16   cbsz abid blgp
v_mfma_f32_4x4x1f32    adst:f32x4,  vasrc0:f32,    vasrc1:f32,    asrc2:f32x4    cbsz abid blgp
v_mfma_f32_4x4x2bf16   adst:f32x4,  vasrc0:bf16x2, vasrc1:bf16x2, asrc2:f32x4    cbsz abid blgp
v_mfma_f32_4x4x4f16    adst:f32x4,  vasrc0:f16x4,  vasrc1:f16x4,  asrc2:f32x4    cbsz abid blgp
v_mfma_i32_16x16x16i8  adst:i32x4,  vasrc0:i8x4,   vasrc1:i8x4,   asrc2:i32x4    cbsz abid blgp
v_mfma_i32_16x16x4i8   adst:i32x16, vasrc0:i8x4,   vasrc1:i8x4,   asrc2:i32x16   cbsz abid blgp
v_mfma_i32_32x32x4i8   adst:i32x32, vasrc0:i8x4,   vasrc1:i8x4,   asrc2:i32x32   cbsz abid blgp
v_mfma_i32_32x32x8i8   adst:i32x16, vasrc0:i8x4,   vasrc1:i8x4,   asrc2:i32x16   cbsz abid blgp
v_mfma_i32_4x4x4i8     adst:i32x4,  vasrc0:i8x4,   vasrc1:i8x4,   asrc2:i32x4    cbsz abid blgp