1 /* 2 * Copyright (c) 2017-2021 Arm Limited. 3 * 4 * SPDX-License-Identifier: MIT 5 * 6 * Permission is hereby granted, free of charge, to any person obtaining a copy 7 * of this software and associated documentation files (the "Software"), to 8 * deal in the Software without restriction, including without limitation the 9 * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or 10 * sell copies of the Software, and to permit persons to whom the Software is 11 * furnished to do so, subject to the following conditions: 12 * 13 * The above copyright notice and this permission notice shall be included in all 14 * copies or substantial portions of the Software. 15 * 16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 19 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 22 * SOFTWARE. 23 */ 24 #pragma once 25 26 #ifdef __aarch64__ 27 28 #include "../std_transforms_fixed.hpp" 29 #include "../performance_parameters.hpp" 30 31 #include "../bfloat.hpp" 32 33 namespace arm_gemm { 34 35 // Actual kernel implementations 36 void a64_sgemm_asimd_8x12(const float *, const float *, float *, int, int, int); 37 void a64_sgemm_asimd_8x12_a53(const float *, const float *, float *, int, int, int); 38 void a64_sgemm_asimd_8x12_a55(const float *, const float *, float *, int, int, int); 39 void a64_sgemm_asimd_8x12_a55r1(const float *, const float *, float *, int, int, int); 40 void a64_sgemm_asimd_8x12_x1(const float *, const float *, float *, int, int, int); 41 42 // 8x12 SGEMM "strategy" class. 43 // 44 // This describes the characteristics of a family of kernels, in terms of 45 // the required interleave properties and the output block size. 46 // 47 // All kernels in the family must share these characteristics. The actual 48 // kernel to be used can be chosen at runtime, based on the CPU_type 49 // structure. 50 class cls_a64_sgemm_8x12 { 51 public: 52 typedef float operand_type; 53 typedef float result_type; 54 55 typedef void (*kern_type)(const float *, const float *, float *, int, int, int); 56 57 /* Kernel blocking parameters */ out_width()58 static unsigned int out_width() { 59 return 12; 60 } 61 out_height()62 static unsigned int out_height() { 63 return 8; 64 } 65 k_unroll()66 static unsigned int k_unroll() { 67 return 1; 68 } 69 70 // Use the standard fixed size transforms. 71 StdTransformsFixed<operand_type, result_type, 8, 12> transforms = {}; 72 73 template<typename T> get_performance_parameters(const CPUInfo * ci)74 static PerformanceParameters get_performance_parameters(const CPUInfo *ci) { 75 if (std::is_same<T, float>::value) { 76 switch (ci->get_cpu_model()) { 77 case CPUModel::A55r1: 78 return { 3.954, 1.252, 1.141 }; 79 80 case CPUModel::A53: 81 return { 2.777, 0.987, 0.898 }; 82 83 case CPUModel::A73: 84 return { 2.885, 1.429, 1.163 }; 85 86 case CPUModel::V1: 87 return { 14.95, 9.95, 5.28 }; 88 89 default: 90 return { 7.2307, 3.876, 2.932 }; 91 } 92 } 93 94 if (std::is_same<T, bfloat16>::value) { 95 switch(ci->get_cpu_model()) { 96 case CPUModel::A510: 97 return { 4.98, 2.27, 3.05 }; 98 99 default: 100 return { 7.99, 5.06, 7.32 }; 101 } 102 } 103 } 104 105 kern_type kernel=a64_sgemm_asimd_8x12; 106 cls_a64_sgemm_8x12(const CPUInfo * ci)107 cls_a64_sgemm_8x12(const CPUInfo *ci) { 108 // Select specific kernel if available 109 switch(ci->get_cpu_model()) { 110 case CPUModel::A53: 111 kernel = a64_sgemm_asimd_8x12_a53; 112 break; 113 114 case CPUModel::A55r0: 115 kernel = a64_sgemm_asimd_8x12_a55; 116 break; 117 118 case CPUModel::A55r1: 119 kernel = a64_sgemm_asimd_8x12_a55r1; 120 break; 121 122 case CPUModel::X1: 123 kernel = a64_sgemm_asimd_8x12_x1; 124 break; 125 126 default: 127 /* Generic kernel is initialized by default. */ 128 break; 129 } 130 } 131 }; 132 133 } // namespace arm_gemm 134 135 #endif // __aarch64__ 136