1 #pragma once 2 3 #include <ATen/core/Tensor.h> 4 #include <ATen/Config.h> 5 #include <ATen/native/LinearAlgebraUtils.h> // For TransposeType 6 7 namespace at { namespace native { 8 9 // result = beta * result + alpha * gemm(mat1, mat2) 10 TORCH_API void mkldnn_matmul( 11 const Tensor &mat1, 12 const Tensor &mat2, 13 const Tensor &result, 14 float beta=1, 15 float alpha=1); 16 17 bool use_mkldnn_bf16_matmul( 18 const Tensor& mat1, 19 const Tensor& mat2, 20 const Tensor& result_opt); 21 22 bool use_mkldnn_fp16_matmul( 23 const Tensor& mat1, 24 const Tensor& mat2, 25 const Tensor& result_opt); 26 27 bool use_mkldnn_bf32_matmul( 28 const Tensor& mat1, 29 const Tensor& mat2, 30 const Tensor& result_opt); 31 32 // Try running mkldnn optimized gemm, or returns false if naive gemm would be faster 33 bool mkldnn_bf16_gemm( 34 TransposeType transa, TransposeType transb, 35 int64_t m, int64_t n, int64_t k, 36 float alpha, 37 const c10::BFloat16 *a, int64_t lda, 38 const c10::BFloat16 *b, int64_t ldb, 39 float beta, 40 c10::BFloat16 *c, int64_t ldc); 41 42 bool mkldnn_fp16_gemm( 43 TransposeType transa, TransposeType transb, 44 int64_t m, int64_t n, int64_t k, 45 float alpha, 46 const c10::Half *a, int64_t lda, 47 const c10::Half *b, int64_t ldb, 48 float beta, 49 c10::Half *c, int64_t ldc); 50 51 /* 52 oneDNN implicit reduced precision arithmetic feature 53 https://github.com/mgouicem/oneDNN/tree/mgouicem/rfcs/implicit_downconvert/rfcs/20210301-computation-datatype 54 to allow implicitly cast data type from FP32 to BF16 in onednn compute primitives 55 */ 56 bool mkldnn_bf32_gemm( 57 TransposeType transa, TransposeType transb, 58 int64_t m, int64_t n, int64_t k, 59 float alpha, 60 const float *a, int64_t lda, 61 const float *b, int64_t ldb, 62 float beta, 63 float *c, int64_t ldc); 64 65 bool use_mkldnn_matmul( 66 const Tensor& mat1, 67 const Tensor& mat2, 68 const Tensor& result); 69 70 // x:s8 * w:s8 -> y:s32 71 TORCH_API void mkldnn_matmul_i8i8i32( 72 const Tensor &mat1, 73 const Tensor &mat2, 74 const Tensor &result); 75 76 } 77 78 } 79