diff --git a/sycl/doc/extensions/SPIRV/SPV_INTEL_joint_matrix.asciidoc b/sycl/doc/extensions/SPIRV/SPV_INTEL_joint_matrix.asciidoc new file mode 100644 index 0000000000000..1f819ecab5a71 --- /dev/null +++ b/sycl/doc/extensions/SPIRV/SPV_INTEL_joint_matrix.asciidoc @@ -0,0 +1,480 @@ +:extension_name: SPV_INTEL_joint_matrix +:capability_name: JointMatrixINTEL +:capability_token: 6118 +:OpTypeJointMatrixINTEL_token: 6119 +:OpJointMatrixLoadINTEL_token: 6120 +:OpJointMatrixStoreINTEL_token: 6121 +:OpJointMatrixMadINTEL_token: 6122 +:OpJointMatrixSUMadINTEL_token: 6128 +:OpJointMatrixUSMadINTEL_token: 6129 +:OpJointMatrixUUMadINTEL_token: 6130 + +{extension_name} +================ + + +== Name Strings + +{extension_name} + +== Contact + +To report problems with this extension, please open a new issue at: + +https://github.com/intel/llvm + +== Contributors + +- Alexey Sotkin, Intel + +- Dounia Khaldi, Intel + +- Mateusz, Belicki Intel + +- Dmitry Sidorov, Intel + + +== Notice + +Copyright (c) 2021 Intel Corporation. All rights reserved. + +== Status + +Working Draft + +This is a preview extension specification, intended to provide early access to a +feature for review and community feedback. When the feature matures, this +specification may be released as a formal extension. + + +Because the interfaces defined by this specification are not final and are +subject to change they are not intended to be used by shipping software +products. If you are interested in using this feature in your software product, +please let us know! + +== Version + +[width="40%",cols="25,25"] +|======================================== +| Last Modified Date | {docdate} +| Revision | A +|======================================== + +== Dependencies + +This extension is written against the SPIR-V Specification, +Version 1.5 Revision 5. + +This extension requires SPIR-V 1.0. + +== Overview + +This extension adds a type and instructions for joint matrices. Such matrices +are shared among a group of work items and is not private to each work item. +The type introduced with this extension allows to specify memory scope and +layout of the matrix, including layouts optimized for particular hardware(AMX) . +New instructions also allow to specify synchronization scope. + +== Extension Name + + +To use this extension within a SPIR-V module, the appropriate *OpExtension* must +be present in the module: + +[subs="attributes"] +---- +OpExtension "{extension_name}" +---- + +== New Capabilities + +This extension introduces new capabilities: + +[subs="attributes"] +---- +{capability_name} +---- + +== New Instructions + +Instructions added under the *{capability_name}* capability: + +---- + +OpTypeJointMatrixINTEL +OpJointMatrixLoadINTEL +OpJointMatrixStoreINTEL +OpJointMatrixMADINTEL +OpJointMatrixSUMADINTEL +OpJointMatrixUSMADINTEL +OpJointMatrixUUMADINTEL + +---- + +== Token Number Assignments + +[width="40%"] +[cols="70%,30%"] +[grid="rows"] +|==== +|*{capability_name}* | {capability_token} +|*OpTypeJointMatrixINTEL* | {OpTypeJointMatrixINTEL_token} +|*OpJointMatrixLoadINTEL* | {OpJointMatrixLoadINTEL_token} +|*OpJointMatrixStoreINTEL* | {OpJointMatrixStoreINTEL_token} +|*OpJointMatrixMadINTEL* | {OpJointMatrixMadINTEL_token} +|*OpJointMatrixSUMadINTEL* | {OpJointMatrixSUMadINTEL_token} +|*OpJointMatrixUSMadINTEL* | {OpJointMatrixUSMadINTEL_token} +|*OpJointMatrixUUMadINTEL* | {OpJointMatrixUUMadINTEL_token} +|==== + +== Modifications to the SPIR-V Specification, Version 1.5 + +=== Matrix layout + +Add section 3.XX, Matrix layout. + +[options="header"] +|==== +2+^| Layout ^| Enabling capability +| 0 | *ColumnMajor* | *{capability_name}* +| 1 | *RowMajor* | *{capability_name}* +| 2 | *PackedA* + +Suitable for VNNI instructions | *{capability_name}* +| 3 | *PackedB* + +Suitable for VNNI instructions | *{capability_name}* +|==== + +=== Capabilities + +Modify Section 3.31, Capability, adding rows to the Capability table: + +-- +[options="header"] +|==== +2+^| Capability ^| Implicitly Declares +| {capability_token} | *{capability_name}* +| Reserved. + + + +See also extension: *{extension_name}* +|==== +-- + +=== Instructions + +==== 3.37.6 Type-Declaration Instructions + +[cols="1,1,6*3",width="100%"] +|===== +7+|[[OpTypeJointMatrixINTEL]]*OpTypeJointMatrixINTEL* + + + +Declare a matrix type. + + + +'Component Type' is the type of each component in the resulting type. It must be +a scalar 'numerical type'. + + + +'Row Count' is the number of rows in the matrix type. It must be a constant +unsigned 32-bit integer. Behavior is undefined when 'Row Count' is 0 or +<>. + + + +'Column Count' is the number of columns in the matrix type. It must be a +constant unsigned 32-bit integer. Behavior is undefined when 'Column Count' is +0 or <>. + + + +'Layout' indicates how the values are arranged internally in the matrix type. +It must be the result of a constant instruction. + + + +'Scope' is memory scope for operations on the matrix. It must be the +result of a constant instruction with scalar 'integer type'. + + + +1+|Capability: + +*{capability_name}* +1+| 7 | {OpTypeJointMatrixINTEL_token} +| 'Result ' +| '' + +'Component Type' +| '' + +'Row Count' +| '' + +'Column Count' +| '' + +'Layout' +| '' + +'Scope' +|===== + + +==== 3.37.8. Memory Instructions + +[cols="1,1,7*3",width="100%"] +|===== +8+|[[OpJointMatrixLoadINTEL]]*OpJointMatrixLoadINTEL* + + + +Load a matrix through a pointer. + + + +'Result Type' is the type of the loaded matrix. It must be +<>. + + + +'Pointer' is the pointer to load through. It specifies start of memory region +where elements of the matrix are stored and arranged according to 'Layout'. + + + +'Stride' is the number of elements in memory between beginnings of successive +rows, columns (or words) in the result. It must be a scalar integer type. + + + +'Layout' indicates how the values loaded from memory are arranged. +It must be the result of a constant instruction. + + + +'Scope' is syncronization scope for operation on the matrix. It must be the +result of a constant instruction with scalar 'integer type'. + + + +If present, any 'Memory Operands' must begin with a +<> literal. If not present, it is the same as +specifying the <> *None*. + + +1+|Capability: + +*{capability_name}* +1+| 7 + variable | {OpJointMatrixLoadINTEL_token} +| '' + +'Result Type' +|'Result ' +| '' + +'Pointer' +| '' + +'Stride' +| '' + +'<>' +| '' + +'Scope' +| Optional + +'Memory Access' +|===== + + +[cols="1,1,6*3",width="100%"] +|===== +7+|[[OpJointMatrixStoreINTEL]]*OpJointMatrixStoreINTEL* + + + +Store a matrix through a pointer. + + + +'Pointer' is the pointer to store through. It specifies start of memory region +where elements of the matrix must be stored and arranged according to 'Layout'. + + + +'Object' is the matrix to store. It must be +<>. + + + +'Stride' is the number of elements in memory between beginnings of successive +rows, columns (or words) of the 'Object'. It must be a scalar integer type. + + + +'Layout' indicates how the values stored to memory are arranged. It must be the +result of a constant instruction. + + + +'Scope' is syncronization scope for operation on the matrix. It must be the +result of a constant instruction with scalar 'integer type'. + + + +If present, any 'Memory Operands' must begin with a +<> literal. If not present, it is the same as +specifying the <> *None*. + + +1+|Capability: + +*{capability_name}* +1+| 6 + variable | {OpJointMatrixStoreINTEL_token} +| '' + +'Pointer' +| '' + +'Object' +| '' + +'Stride' +| '' + +'<>' +| '' + +'Scope' +| Optional + +'Memory Access' +|===== + + +==== 3.37.13. Arithmetic Instructions + +[cols="1,1,6*3",width="100%"] +|===== +7+|[[OpJointMatrixMadINTEL]]*OpJointMatrixMadINTEL* + + + +Multiply matrix 'A' by matrix 'B' and add matrix 'C' to the result of the +multiplication: `A*B+C`. Here 'A' is a `M x K` matrix, 'B' is a `K x N` +matrix and 'C' is a `M x N` matrix. + + + +Behavior is undefined if sizes of operands do not meet the conditions above. +All operands and the 'Result Type' must be +<>. + + + +'A' must be a <> whose +'Component Type' is a signed 'numerical type', 'Row Count' equals to 'M' and +'Column Count' equals to 'K' + + + +'B' must be a <> whose +'Component Type' is a signed 'numerical type', 'Row Count' equals to 'K' and +'Column Count' equals to 'N' + + + +'C' and 'Result Type' must be a +<> with 'Row Count' equals to +'M' and 'Column Count' equals to 'N' + + + +'Scope' is syncronization scope for operation on the matrix. It must be the +result of a constant instruction with scalar 'integer type'. + + +1+|Capability: + +*{capability_name}* +1+| 7 | {OpJointMatrixMadINTEL_token} +| '' + +'Result Type' +|'Result ' +| '' + +'A' +| '' + +'B' +| '' + +'C' +| '' + +'Scope' +|===== + +[cols="1,1,6*3",width="100%"] +|===== +7+|[[OpJointMatrixSUMadINTEL]]*OpJointMatrixSUMadINTEL* + + + +Multiply matrix 'A' by matrix 'B' and add matrix 'C' to the result of the +multiplication: `A*B+C`. Here 'A' is a `M x K` matrix, 'B' is a `K x N` +matrix and 'C' is a `M x N` matrix. + + + +Behavior is undefined if sizes of operands do not meet the conditions above. +All operands and the 'Result Type' must be +<>. + + + +'A' must be a <> whose +'Component Type' is a signed 'numerical type', 'Row Count' equals to 'M' and +'Column Count' equals to 'K' + + + +'B' must be a <> whose +'Component Type' is an unsigned 'numerical type', 'Row Count' equals to 'K' and +'Column Count' equals to 'N' + + + +'C' and 'Result Type' must be a +<> with 'Row Count' equals to +'M' and 'Column Count' equals to 'N' + + + +'Scope' is syncronization scope for operation on the matrix. It must be the +result of a constant instruction with scalar 'integer type'. + + +1+|Capability: + +*{capability_name}* +1+| 7 | {OpJointMatrixSUMadINTEL_token} +| '' + +'Result Type' +|'Result ' +| '' + +'A' +| '' + +'B' +| '' + +'C' +| '' + +'Scope' +|===== + +[cols="1,1,6*3",width="100%"] +|===== +7+|[[OpJointMatrixUSMadINTEL]]*OpJointMatrixUSMadINTEL* + + + +Multiply matrix 'A' by matrix 'B' and add matrix 'C' to the result of the +multiplication: `A*B+C`. Here 'A' is a `M x K` matrix, 'B' is a `K x N` +matrix and 'C' is a `M x N` matrix. + + + +Behavior is undefined if sizes of operands do not meet the conditions above. +All operands and the 'Result Type' must be +<>. + + + +'A' must be a <> whose +'Component Type' is an unsigned 'numerical type', 'Row Count' equals to 'M' and +'Column Count' equals to 'K' + + + +'B' must be a <> whose +'Component Type' is a signed 'numerical type', 'Row Count' equals to 'K' and +'Column Count' equals to 'N' + + + +'C' and 'Result Type' must be a +<> with 'Row Count' equals to +'M' and 'Column Count' equals to 'N' + + + +'Scope' is syncronization scope for operation on the matrix. It must be the +result of a constant instruction with scalar 'integer type'. + + +1+|Capability: + +*{capability_name}* +1+| 7 | {OpJointMatrixUSMadINTEL_token} +| '' + +'Result Type' +|'Result ' +| '' + +'A' +| '' + +'B' +| '' + +'C' +| '' + +'Scope' +|===== + +[cols="1,1,6*3",width="100%"] +|===== +7+|[[OpJointMatrixUUMadINTEL]]*OpJointMatrixUUMadINTEL* + + + +Multiply matrix 'A' by matrix 'B' and add matrix 'C' to the result of the +multiplication: `A*B+C`. Here 'A' is a `M x K` matrix, 'B' is a `K x N` +matrix and 'C' is a `M x N` matrix. + + + +Behavior is undefined if sizes of operands do not meet the conditions above. +All operands and the 'Result Type' must be +<>. + + + +'A' must be a <> whose +'Component Type' is an unsigned 'numerical type', 'Row Count' equals to 'M' and +'Column Count' equals to 'K' + + + +'B' must be a <> whose +'Component Type' is an unsigned 'numerical type', 'Row Count' equals to 'K' and +'Column Count' equals to 'N' + + + +'C' and 'Result Type' must be a +<> with 'Row Count' equals to +'M' and 'Column Count' equals to 'N' + + + +'Scope' is syncronization scope for operation on the matrix. It must be the +result of a constant instruction with scalar 'integer type'. + + +1+|Capability: + +*{capability_name}* +1+| 7 | {OpJointMatrixUUMadINTEL_token} +| '' + +'Result Type' +|'Result ' +| '' + +'A' +| '' + +'B' +| '' + +'C' +| '' + +'Scope' +|===== + +=== Issues + +None + +Revision History +---------------- + +[cols="5,15,15,70"] +[grid="rows"] +[options="header"] +|======================================== +|Rev|Date|Author|Changes +|1|2021-02-16|Alexey Sotkin|Initial revision +|1|2021-09-06|Dmitry Sidorov|Split OpJointMatrixMadINTEL instruction into 4 +|========================================