1 /* 2 * Copyright (c) 2020 Arm Limited. 3 * 4 * SPDX-License-Identifier: MIT 5 * 6 * Permission is hereby granted, free of charge, to any person obtaining a copy 7 * of this software and associated documentation files (the "Software"), to 8 * deal in the Software without restriction, including without limitation the 9 * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or 10 * sell copies of the Software, and to permit persons to whom the Software is 11 * furnished to do so, subject to the following conditions: 12 * 13 * The above copyright notice and this permission notice shall be included in all 14 * copies or substantial portions of the Software. 15 * 16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 19 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 22 * SOFTWARE. 23 */ 24 #pragma once 25 26 #include "convolution_parameters.hpp" 27 28 #include <algorithm> 29 #include <cstddef> 30 #include <tuple> 31 #include <vector> 32 33 namespace arm_gemm { 34 35 // Class to assist with convolution calculations. 36 // 37 // This is framed as a hierarchy of objects: 38 // 39 // - Top level object which depends only on convolution parameters. This sets up std::vectors for the padding and 40 // kernel offset arrays. From this you can request: 41 // 42 // - Mid level object (e.g. instantiated at start of 'ConvolutionInterleave'). This holds specifics about the 43 // input tensor, and the desired column range. Calculations specific to this can be done once when this is set 44 // up. From this you can request: 45 // 46 // - Low level object (instantiated for each range of rows). This contains methods to actually populate a row 47 // pointer array. 48 49 50 template<typename T> 51 class convolver { 52 private: 53 const ConvolutionParameters m_params; 54 55 // Vector of padding data 56 const std::vector<T> m_pad_row; 57 58 // X/Y offsets for each kernel position 59 std::vector<int> m_kernel_y; 60 std::vector<int> m_kernel_x; 61 62 class column_handler { 63 private: 64 const convolver<T> &m_parent; 65 66 // Base/stride of input image 67 const T * const m_input_base; 68 const size_t m_input_stride; 69 70 // Starting kernel point and channel offset within that point 71 const unsigned int m_start_pos; 72 const unsigned int m_start_offset; 73 74 // Total length to process, rounded length of each input channel block. 75 const unsigned int m_length; 76 const unsigned int m_rounded_stringlen; 77 78 class row_handler { 79 private: 80 const convolver<T> &m_convolver; 81 const column_handler &m_parent; 82 83 // These variables track progress through the current block of rows 84 unsigned int m_start_output_y=0; 85 unsigned int m_start_output_x=0; 86 87 unsigned int m_length_remaining=0; 88 unsigned int m_current_pos=0; 89 90 unsigned int m_active_height=0; 91 92 public: row_handler(const column_handler & parent,unsigned int start_row,unsigned int active_height)93 row_handler(const column_handler &parent, unsigned int start_row, unsigned int active_height) : 94 m_convolver(parent.m_parent), 95 m_parent(parent), 96 m_start_output_y(start_row / m_convolver.m_params.output_width), 97 m_start_output_x(start_row % m_convolver.m_params.output_width), 98 m_length_remaining(m_parent.m_length), 99 m_current_pos(m_parent.m_start_pos), 100 m_active_height(active_height) { } 101 finished() const102 bool finished() const { 103 return (m_length_remaining == 0); 104 } 105 next_block(const T ** const row_ptr)106 std::tuple<unsigned int, unsigned int> next_block(const T ** const row_ptr) { 107 if (finished()) { 108 return std::make_tuple(0, 0); 109 } 110 111 // "in_width" in the amount of data that will be read in (copied) 112 // "out_width" is the total amount of data that will be produced (including padding) 113 unsigned int offset = (m_current_pos == m_parent.m_start_pos) ? m_parent.m_start_offset : 0; 114 unsigned int in_width = std::min(m_length_remaining, static_cast<unsigned int>(m_convolver.m_params.input_channels) - offset); 115 unsigned int out_width = std::min(m_length_remaining, m_parent.m_rounded_stringlen - offset); 116 117 unsigned int output_y = m_start_output_y; 118 unsigned int output_x = m_start_output_x; 119 120 for (unsigned int row=0; row<m_active_height; row++) { 121 int input_y = (output_y * m_convolver.m_params.output_stride_h) + m_convolver.m_kernel_y[m_current_pos]; 122 int input_x = (output_x * m_convolver.m_params.output_stride_w) + m_convolver.m_kernel_x[m_current_pos]; 123 124 // Out-of-bounds points will read the padding data, 125 // otherwise find the correct address in the input image. 126 if (input_y < 0 || input_y >= m_convolver.m_params.input_height || input_x < 0 || input_x >= m_convolver.m_params.input_width) { 127 row_ptr[row] = m_convolver.m_pad_row.data(); 128 } else { 129 row_ptr[row] = m_parent.m_input_base + ((input_y * m_convolver.m_params.input_width) + input_x) * m_parent.m_input_stride; 130 } 131 132 output_x++; 133 if (output_x == m_convolver.m_params.output_width) { 134 output_y++; 135 output_x=0; 136 } 137 } 138 139 m_current_pos++; 140 m_length_remaining-=out_width; 141 142 return std::make_tuple(in_width, offset); 143 } 144 }; // end of "row handler" class 145 146 public: column_handler(const convolver<T> & parent,const T * input_base,size_t input_stride,unsigned int k_start,unsigned int k_end,unsigned int rounded_stringlen)147 column_handler(const convolver<T> &parent, const T *input_base, size_t input_stride, 148 unsigned int k_start, unsigned int k_end, unsigned int rounded_stringlen) 149 : m_parent(parent), m_input_base(input_base), m_input_stride(input_stride), 150 m_start_pos(k_start / rounded_stringlen), 151 m_start_offset(k_start % rounded_stringlen), 152 m_length(k_end - k_start), 153 m_rounded_stringlen(rounded_stringlen) { } 154 process_rows(unsigned int start_row,unsigned int active_height) const155 row_handler process_rows(unsigned int start_row, unsigned int active_height) const { 156 return row_handler(*this, start_row, active_height); 157 } 158 }; // end of "column handler" class 159 160 public: convolver(ConvolutionParameters params)161 convolver(ConvolutionParameters params) : 162 m_params (params), m_pad_row(params.input_channels, static_cast<T>(params.padding_value)), 163 m_kernel_y(params.kernel_width * params.kernel_height, 0), 164 m_kernel_x(params.kernel_width * params.kernel_height, 0) { 165 166 // Kernel points are addressed across, then down (assumed weight layout is WHIO) 167 for (unsigned int ky=0; ky<params.kernel_height; ky++) { 168 for (unsigned int kx=0; kx<params.kernel_width; kx++) { 169 unsigned int n = (ky * params.kernel_width) + kx; 170 m_kernel_y[n] = ky - params.padding_top; 171 m_kernel_x[n] = kx - params.padding_left; 172 } 173 } 174 } 175 process_columns(const T * input_base,size_t input_stride,unsigned int k_start,unsigned int k_end,unsigned int rounded_stringlen) const176 column_handler process_columns(const T *input_base, size_t input_stride, 177 unsigned int k_start, unsigned int k_end, unsigned int rounded_stringlen) const { 178 return column_handler(*this, input_base, input_stride, k_start, k_end, rounded_stringlen); 179 } 180 }; 181 182 } // namespace arm_gemm 183