Syntax of gfx908 Instructions

Introduction

This document describes the syntax of instructions specific to gfx908.

For a description of other gfx908 instructions see Syntax of Core GFX9 Instructions.

Notation

Notation used in this document is explained here.

Overview

An overview of generic syntax and other features of AMDGPU instructions may be found in this document.

Instructions

FLAT

INSTRUCTION                    SRC0      SRC1      SRC2           MODIFIERS
———————————————————————————————————————————————————————————————————————————————
global_atomic_add_f32          vaddr,    vdata,    saddr          offset13s slc
global_atomic_pk_add_f16       vaddr,    vdata,    saddr          offset13s slc

MUBUF

INSTRUCTION                    SRC0      SRC1      SRC2      SRC3           MODIFIERS
————————————————————————————————————————————————————————————————————————————————————————————————————
buffer_atomic_add_f32          vdata,    vaddr,    srsrc,    soffset        idxen offen offset12 slc
buffer_atomic_pk_add_f16       vdata,    vaddr,    srsrc,    soffset        idxen offen offset12 slc

VOP2

INSTRUCTION             DST      SRC0         SRC1             MODIFIERS
—————————————————————————————————————————————————————————————————————————————————————————————————————
v_dot2c_f32_f16         vdst,    src0:f16x2,  vsrc1:f16x2
v_dot2c_f32_f16_dpp     vdst,    vsrc0:f16x2, vsrc1:f16x2      dpp_ctrl row_mask bank_mask bound_ctrl
v_dot2c_i32_i16         vdst,    src0:i16x2,  vsrc1:i16x2
v_dot2c_i32_i16_dpp     vdst,    vsrc0:i16x2, vsrc1:i16x2      dpp_ctrl row_mask bank_mask bound_ctrl
v_dot4c_i32_i8          vdst,    src0:i8x4,   vsrc1:i8x4
v_dot4c_i32_i8_dpp      vdst,    vsrc0:i8x4,  vsrc1:i8x4       dpp_ctrl row_mask bank_mask bound_ctrl
v_dot8c_i32_i4          vdst,    src0:i4x8,   vsrc1:i4x8
v_dot8c_i32_i4_dpp      vdst,    vsrc0:i4x8,  vsrc1:i4x8       dpp_ctrl row_mask bank_mask bound_ctrl
v_fmac_f32              vdst,    src0,        vsrc1
v_fmac_f32_dpp          vdst,    vsrc0:m,     vsrc1:m          dpp_ctrl row_mask bank_mask bound_ctrl
v_pk_fmac_f16           vdst,    src0,        vsrc1
v_xnor_b32              vdst,    src0,        vsrc1
v_xnor_b32_dpp          vdst,    vsrc0,       vsrc1            dpp_ctrl row_mask bank_mask bound_ctrl
v_xnor_b32_sdwa         vdst,    src0:m,      src1:m           dst_sel dst_unused src0_sel src1_sel

VOP3

INSTRUCTION                    DST       SRC0      SRC1           MODIFIERS
————————————————————————————————————————————————————————————————————————————
v_fmac_f32_e64                 vdst,     src0:m,   src1:m         clamp omod
v_xnor_b32_e64                 vdst,     src0,     src1

VOP3P

INSTRUCTION             DST          SRC0          SRC1          SRC2          MODIFIERS
—————————————————————————————————————————————————————————————————————————————————————————————————————————
v_accvgpr_read_b32      vdst,        vsrc
v_accvgpr_write_b32     vdst,        src
v_dot2_f32_f16          vdst,        src0:f16x2,   src1:f16x2,   src2:f32      neg_lo neg_hi clamp
v_dot2_i32_i16          vdst,        src0:i16x2,   src1:i16x2,   src2:i32      clamp
v_dot2_u32_u16          vdst,        src0:u16x2,   src1:u16x2,   src2:u32      clamp
v_dot4_i32_i8           vdst,        src0:i8x4,    src1:i8x4,    src2:i32      clamp
v_dot4_u32_u8           vdst,        src0:u8x4,    src1:u8x4,    src2:u32      clamp
v_dot8_i32_i4           vdst,        src0:i4x8,    src1:i4x8,    src2:i32      clamp
v_dot8_u32_u4           vdst,        src0:u4x8,    src1:u4x8,    src2:u32      clamp
v_fma_mix_f32           vdst,        src0:m:fx,    src1:m:fx,    src2:m:fx     m_op_sel m_op_sel_hi clamp
v_fma_mixhi_f16         vdst,        src0:m:fx,    src1:m:fx,    src2:m:fx     m_op_sel m_op_sel_hi clamp
v_fma_mixlo_f16         vdst,        src0:m:fx,    src1:m:fx,    src2:m:fx     m_op_sel m_op_sel_hi clamp
v_mfma_f32_16x16x16f16  vdst:f32x4,  vsrc0:f16x4,  vsrc1:f16x4,  vsrc2:f32x4   cbsz abid blgp
v_mfma_f32_16x16x1f32   vdst:f32x16, vsrc0:f32,    vsrc1:f32,    vsrc2:f32x16  cbsz abid blgp
v_mfma_f32_16x16x2bf16  vdst:f32x16, vsrc0:bf16x2, vsrc1:bf16x2, vsrc2:f32x16  cbsz abid blgp
v_mfma_f32_16x16x4f16   vdst:f32x16, vsrc0:f16x4,  vsrc1:f16x4,  vsrc2:f32x16  cbsz abid blgp
v_mfma_f32_16x16x4f32   vdst:f32x4,  vsrc0:f32,    vsrc1:f32,    vsrc2:f32x4   cbsz abid blgp
v_mfma_f32_16x16x8bf16  vdst:f32x4,  vsrc0:bf16x2, vsrc1:bf16x2, vsrc2:f32x4   cbsz abid blgp
v_mfma_f32_32x32x1f32   vdst:f32x32, vsrc0:f32,    vsrc1:f32,    vsrc2:f32x32  cbsz abid blgp
v_mfma_f32_32x32x2bf16  vdst:f32x32, vsrc0:bf16x2, vsrc1:bf16x2, vsrc2:f32x32  cbsz abid blgp
v_mfma_f32_32x32x2f32   vdst:f32x16, vsrc0:f32,    vsrc1:f32,    vsrc2:f32x16  cbsz abid blgp
v_mfma_f32_32x32x4bf16  vdst:f32x16, vsrc0:bf16x2, vsrc1:bf16x2, vsrc2:f32x16  cbsz abid blgp
v_mfma_f32_32x32x4f16   vdst:f32x32, vsrc0:f16x4,  vsrc1:f16x4,  vsrc2:f32x32  cbsz abid blgp
v_mfma_f32_32x32x8f16   vdst:f32x16, vsrc0:f16x4,  vsrc1:f16x4,  vsrc2:f32x16  cbsz abid blgp
v_mfma_f32_4x4x1f32     vdst:f32x4,  vsrc0:f32,    vsrc1:f32,    vsrc2:f32x4   cbsz abid blgp
v_mfma_f32_4x4x2bf16    vdst:f32x4,  vsrc0:bf16x2, vsrc1:bf16x2, vsrc2:f32x4   cbsz abid blgp
v_mfma_f32_4x4x4f16     vdst:f32x4,  vsrc0:f16x4,  vsrc1:f16x4,  vsrc2:f32x4   cbsz abid blgp
v_mfma_i32_16x16x16i8   vdst:i32x4,  vsrc0:i8x4,   vsrc1:i8x4,   vsrc2:i32x4   cbsz abid blgp
v_mfma_i32_16x16x4i8    vdst:i32x16, vsrc0:i8x4,   vsrc1:i8x4,   vsrc2:i32x16  cbsz abid blgp
v_mfma_i32_32x32x4i8    vdst:i32x32, vsrc0:i8x4,   vsrc1:i8x4,   vsrc2:i32x32  cbsz abid blgp
v_mfma_i32_32x32x8i8    vdst:i32x16, vsrc0:i8x4,   vsrc1:i8x4,   vsrc2:i32x16  cbsz abid blgp
v_mfma_i32_4x4x4i8      vdst:i32x4,  vsrc0:i8x4,   vsrc1:i8x4,   vsrc2:i32x4   cbsz abid blgp