xref: /aosp_15_r20/external/pytorch/aten/src/ATen/native/mkldnn/Matmul.h (revision da0073e96a02ea20f0ac840b70461e3646d07c45)
1 #pragma once
2 
3 #include <ATen/core/Tensor.h>
4 #include <ATen/Config.h>
5 #include <ATen/native/LinearAlgebraUtils.h>  // For TransposeType
6 
7 namespace at { namespace native {
8 
9 // result = beta * result + alpha * gemm(mat1, mat2)
10 TORCH_API void mkldnn_matmul(
11         const Tensor &mat1,
12         const Tensor &mat2,
13         const Tensor &result,
14         float beta=1,
15         float alpha=1);
16 
17 bool use_mkldnn_bf16_matmul(
18     const Tensor& mat1,
19     const Tensor& mat2,
20     const Tensor& result_opt);
21 
22 bool use_mkldnn_fp16_matmul(
23     const Tensor& mat1,
24     const Tensor& mat2,
25     const Tensor& result_opt);
26 
27 bool use_mkldnn_bf32_matmul(
28     const Tensor& mat1,
29     const Tensor& mat2,
30     const Tensor& result_opt);
31 
32 // Try running mkldnn optimized gemm, or returns false if naive gemm would be faster
33 bool mkldnn_bf16_gemm(
34     TransposeType transa, TransposeType transb,
35     int64_t m, int64_t n, int64_t k,
36     float alpha,
37     const c10::BFloat16 *a, int64_t lda,
38     const c10::BFloat16 *b, int64_t ldb,
39     float beta,
40     c10::BFloat16 *c, int64_t ldc);
41 
42 bool mkldnn_fp16_gemm(
43     TransposeType transa, TransposeType transb,
44     int64_t m, int64_t n, int64_t k,
45     float alpha,
46     const c10::Half *a, int64_t lda,
47     const c10::Half *b, int64_t ldb,
48     float beta,
49     c10::Half *c, int64_t ldc);
50 
51 /*
52 oneDNN implicit reduced precision arithmetic feature
53 https://github.com/mgouicem/oneDNN/tree/mgouicem/rfcs/implicit_downconvert/rfcs/20210301-computation-datatype
54 to allow implicitly cast data type from FP32 to BF16 in onednn compute primitives
55 */
56 bool mkldnn_bf32_gemm(
57     TransposeType transa, TransposeType transb,
58     int64_t m, int64_t n, int64_t k,
59     float alpha,
60     const float *a, int64_t lda,
61     const float *b, int64_t ldb,
62     float beta,
63     float *c, int64_t ldc);
64 
65 bool use_mkldnn_matmul(
66     const Tensor& mat1,
67     const Tensor& mat2,
68     const Tensor& result);
69 
70 // x:s8 * w:s8 -> y:s32
71 TORCH_API void mkldnn_matmul_i8i8i32(
72     const Tensor &mat1,
73     const Tensor &mat2,
74     const Tensor &result);
75 
76 }
77 
78 }
79