xref: /aosp_15_r20/external/libaom/av1/encoder/arm/cnn_neon.c (revision 77c1e3ccc04c968bd2bc212e87364f250e820521)
1 /*
2  * Copyright (c) 2023, Alliance for Open Media. All rights reserved.
3  *
4  * This source code is subject to the terms of the BSD 2 Clause License and
5  * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
6  * was not distributed with this source code in the LICENSE file, you can
7  * obtain it at www.aomedia.org/license/software. If the Alliance for Open
8  * Media Patent License 1.0 was not distributed with this source code in the
9  * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
10  */
11 
12 #include <arm_neon.h>
13 #include <assert.h>
14 #include <math.h>
15 #include <stdbool.h>
16 
17 #include "config/aom_config.h"
18 #include "config/av1_rtcd.h"
19 
20 #include "aom_dsp/aom_dsp_common.h"
21 #include "aom_dsp/arm/sum_neon.h"
22 #include "av1/common/av1_common_int.h"
23 #include "av1/encoder/cnn.h"
24 #include "av1/encoder/partition_cnn_weights.h"
25 
26 // The CNN weights used in av1_cnn_convolve_no_maxpool_padding_valid are
27 // declared (av1_intra_mode_cnn_partition_cnn_layer_[01234]_kernel) in
28 // partition_cnn_weights.h. However, to enable linear memory access, rearrange
29 // the weight tables here.
30 static const float weights_layer_1[] = {
31   0.228403f,  0.031690f,  -0.251710f, -0.046230f, 0.413294f,  -0.236732f,
32   -0.038291f, 0.210766f,  0.427196f,  -0.384319f, -0.439463f, 0.366015f,
33   0.112263f,  -0.144168f, -0.075017f, 0.119629f,  0.325200f,  -0.678246f,
34   -0.370826f, -0.341362f, -0.503392f, 0.400884f,  0.465214f,  -0.360847f,
35   0.187100f,  -0.190757f, -0.131906f, 0.121492f,  -0.303556f, -0.007658f,
36   0.380077f,  -0.066394f, -0.016043f, -1.490730f, -0.120682f, 0.132062f,
37   0.086185f,  -0.042766f, -0.087069f, 0.029426f,  0.309583f,  -0.029985f,
38   -0.297429f, -0.018139f, -0.688828f, 0.756607f,  0.706410f,  -0.696826f,
39   -0.087793f, -0.023304f, -0.012332f, -0.018043f, -0.410268f, 0.352143f,
40   0.391284f,  -0.363178f, -0.295034f, 0.160246f,  -0.149446f, 0.260145f,
41   -0.252249f, 0.190826f,  0.251206f,  -0.270796f, -0.979219f, 0.884880f,
42   0.962057f,  -0.847601f, -0.011053f, 0.118765f,  -0.028428f, -0.020138f,
43   0.400274f,  -0.382845f, -0.462766f, 0.390654f,  0.361223f,  -0.320068f,
44   -0.372084f, 0.313196f,  0.241933f,  -0.416614f, -0.008722f, -0.255078f,
45   0.078730f,  -0.381935f, -0.204577f, 0.159768f,  0.071853f,  -0.126294f,
46   -0.036186f, -0.007900f, 0.380071f,  -0.298882f, 0.387941f,  -0.267350f,
47   -0.586802f, 0.477785f,  -0.000013f, 0.197296f,  -0.079154f, -0.005811f,
48   -0.044300f, -0.021192f, -0.020879f, -0.005265f, 0.082277f,  -0.139132f,
49   -0.239237f, 0.440234f,  -0.542342f, 0.378360f,  -0.070974f, 0.272702f,
50   -0.278939f, -0.044948f, -0.134197f, -0.007172f, -0.353628f, -0.128091f,
51   0.357458f,  -0.037614f, -0.144983f, 0.220623f,  -0.003394f, -0.070166f,
52   0.200370f,  -0.166037f, 0.224448f,  -0.012990f, -0.098853f, 0.008613f,
53   -0.017669f, 0.070641f,  0.174530f,  -0.119822f, -0.065096f, 0.118487f,
54   -0.024764f, -0.050466f, 0.066631f,  -0.075896f, -0.062363f, 0.212604f,
55   -0.377322f, 0.306306f,  -0.399733f, 0.238624f,  0.233571f,  -0.344080f,
56   0.462491f,  -0.565210f, -0.035074f, -0.010459f, 0.084382f,  0.052294f,
57   0.065714f,  0.013716f,  0.135036f,  0.000588f,  0.181079f,  -0.566344f,
58   0.395561f,  -0.398509f, 0.450017f,  -1.462710f, 1.138280f,  -0.447774f,
59   0.247936f,  -0.417067f, 0.165997f,  -0.458632f, -0.018527f, 0.308461f,
60   0.541266f,  0.162257f,  0.601786f,  -1.275840f, -0.373404f, -0.589747f,
61   0.026539f,  -0.219327f, 0.142972f,  -0.018496f, 0.075204f,  -0.775190f,
62   0.237307f,  -0.348252f, 0.117792f,  -0.094332f, 0.363101f,  -0.065025f,
63   0.816662f,  0.590110f,  0.752202f,  -0.308599f, 0.258337f,  -0.842085f,
64   0.695788f,  -0.205615f, 0.093930f,  -0.392536f, 0.463093f,  -0.432456f,
65   0.041660f,  -0.827264f, 0.309128f,  -0.354658f, 0.451957f,  -1.406640f,
66   0.773192f,  -0.892943f, 0.134856f,  -0.467808f, 0.306003f,  -0.226560f,
67   0.086865f,  -0.104102f, 0.148098f,  -0.082658f, 0.316655f,  -1.028310f,
68   0.741566f,  -0.345326f, 0.052379f,  -0.275613f, 0.191765f,  -0.162391f,
69   0.000976f,  0.093061f,  0.068649f,  0.033582f,  0.239727f,  -0.647769f,
70   0.218493f,  -0.397120f, 0.268229f,  -0.303424f, 0.185393f,  -0.314189f,
71   0.101728f,  -0.163083f, -0.084989f, 0.136783f,  -0.264346f, 0.465914f,
72   0.220395f,  -0.252968f, -0.326661f, 0.271483f,  0.374717f,  -0.311570f,
73   -0.082119f, 0.020870f,  0.091975f,  -0.030582f, -0.487148f, 0.198912f,
74   0.024554f,  -0.749363f, -0.102267f, 0.097787f,  0.141459f,  -0.110706f,
75   0.079467f,  -0.082570f, -0.347567f, 0.341043f,  -0.137871f, 0.112319f,
76   0.064733f,  -0.082869f, 0.269999f,  -0.408184f, -0.183443f, 0.180608f,
77   0.223345f,  -0.357376f, -0.244593f, 0.355348f,  -0.072701f, -0.034311f,
78   0.096544f,  0.016407f,  0.417550f,  -0.367772f, -0.484535f, 0.405977f,
79   0.314243f,  -0.099622f, -0.192218f, -0.012780f, 0.434551f,  -0.399047f,
80   -0.531499f, 0.484513f,  -0.691352f, 0.872823f,  1.207720f,  -1.377490f,
81   0.006872f,  -0.041453f, 0.007845f,  0.007463f,  0.467299f,  -0.476372f,
82   -0.452606f, 0.452357f,  0.447332f,  -0.365632f, -0.332435f, 0.300284f,
83   -0.290504f, 0.255410f,  0.310921f,  -0.293717f, -0.616299f, 0.594207f,
84   0.461347f,  -0.449439f, 0.278455f,  0.285085f,  -1.201340f, -0.016463f,
85   0.549095f,  0.610375f,  -4.608530f, -1.727390f, 0.150404f,  -0.012846f,
86   -0.481148f, -0.182257f, 0.918796f,  0.213872f,  1.050410f,  0.681526f,
87   -0.458777f, -0.710395f, -2.347200f, -0.277197f, 0.213294f,  0.337551f,
88   -0.177710f, -0.152136f, 0.167666f,  0.308403f,  -1.248500f, -0.565367f,
89   0.122054f,  0.087874f,  -0.476556f, -0.083548f, -0.358734f, -0.073131f,
90   -0.146320f, -2.241960f, 0.697639f,  0.545581f,  -1.889700f, -0.267725f,
91   0.433045f,  0.298224f,  -0.338508f, 0.250226f,  0.405675f,  0.447201f,
92   -1.184690f, -0.473447f, 0.307403f,  0.711236f,  -3.191560f, -1.663980f,
93   0.165201f,  0.101360f,  -0.624451f, -0.173269f, 0.089795f,  0.227478f,
94   -0.136664f, 0.007907f,  0.131079f,  0.605374f,  -2.991620f, -1.723790f,
95   0.082428f,  0.006781f,  -0.348732f, -0.019271f, -0.032040f, -0.067078f,
96   -0.437166f, -0.144472f, 0.069844f,  0.194625f,  -0.162284f, -0.374656f,
97   0.056472f,  -0.236524f, -0.114241f, -0.029161f, -0.222078f, -0.053435f,
98   -0.313938f, -0.555472f, 1.037550f,  0.689968f,  0.575694f,  0.065826f,
99   -0.659979f, -0.881351f, -0.626417f, -0.953975f, -0.576106f, -0.258708f,
100   0.263004f,  -0.229847f, 0.463835f,  1.390960f,  -2.614480f, -1.272910f,
101   0.065780f,  -0.058603f, 0.015612f,  0.104703f,  0.198028f,  0.262792f,
102   0.253616f,  -0.079126f, -0.587381f, -0.739021f, -0.822676f, -0.795512f,
103   0.193644f,  0.234643f,  -0.034407f, 0.421478f,  -0.572610f, -0.290714f,
104   -0.257803f, -0.644835f, -0.536938f, -0.375899f, -0.651077f, -0.522576f,
105   0.562564f,  0.834616f,  0.513893f,  0.649689f,  0.356530f,  0.400716f,
106   0.300606f,  0.290505f,  0.584608f,  0.671574f,  0.564584f,  0.419870f,
107   0.062061f,  0.018263f,  0.009831f,  0.084103f,  -0.128281f, -0.018818f,
108   -0.187244f, 0.067210f,  0.437147f,  0.442029f,  0.444939f,  0.226661f,
109   0.541609f,  0.444280f,  0.302795f,  0.633026f,  -0.180374f, 0.265197f,
110   0.210404f,  -0.118916f, -0.294013f, -0.692627f, -0.402347f, -0.356287f,
111   0.387578f,  0.385496f,  0.789542f,  0.690396f,  -0.203542f, -0.688546f,
112   0.045319f,  -0.448747f, -0.157148f, 0.152581f,  0.022360f,  0.058358f,
113   0.593007f,  1.131860f,  0.289006f,  1.015560f,  0.144942f,  -0.411577f,
114   0.264794f,  -0.085791f, 0.156996f,  0.200340f,  0.169264f,  0.267615f,
115   -0.361015f, -0.601842f, -0.442217f, -0.781086f, 0.112938f,  0.385305f,
116   0.482454f,  0.470268f,  1.193390f,  0.589642f,  0.127638f,  -0.640946f,
117   0.540310f,  0.741498f,  0.686937f,  0.435879f,  0.534523f,  0.693119f,
118   0.817577f,  0.783109f,  0.021681f,  -0.004973f, 0.201236f,  -0.086311f,
119   0.028628f,  0.227871f,  0.462751f,  0.126832f,  -0.389997f, -0.553965f,
120   -0.343953f, -0.448517f, 0.053129f,  -0.115083f, 0.018138f,  -0.067131f,
121   -0.293468f, -0.220700f, 0.074348f,  -0.273153f, 0.263637f,  0.122049f,
122   0.153025f,  0.076292f,  0.142320f,  0.286734f,  0.100542f,  0.308660f,
123   -0.759591f, -0.750938f, -0.788799f, -0.853076f, -0.588019f, -0.990063f,
124   -0.692327f, -0.722904f, 0.084736f,  0.151068f,  0.159606f,  0.147715f,
125   1.610180f,  1.950330f,  1.765670f,  2.265110f,  0.008262f,  0.185584f,
126   0.039337f,  0.164721f,  0.479446f,  0.314083f,  0.043969f,  0.291320f,
127   0.003400f,  -0.551190f, 0.060158f,  -0.147591f, 0.089117f,  0.042994f,
128   0.042802f,  0.127392f,  -0.066172f, 0.078370f,  0.051408f,  0.014004f,
129   0.086726f,  0.133334f,  -0.046733f, 0.155100f,  -0.118223f, -0.100778f,
130   -0.225245f, -0.460397f, 0.892644f,  1.003770f,  0.405155f,  0.517477f,
131   0.184585f,  0.279090f,  -0.036477f, 0.198703f,  0.027139f,  -0.055728f,
132   -0.022396f, -0.147319f, 2.275540f,  2.014990f,  2.296800f,  2.081730f,
133   -0.088713f, 0.105729f,  -0.027871f, -0.095047f, 0.012429f,  0.014244f,
134   -0.014755f, -0.003017f, 1.332700f,  1.300040f,  1.464250f,  1.305030f,
135   0.032568f,  0.118042f,  0.079632f,  -0.089405f, 0.163905f,  0.146608f,
136   0.026502f,  0.065307f,  -0.056909f, -0.065052f, 0.069851f,  -0.082958f,
137   0.023419f,  -0.026293f, 0.037616f,  -0.048096f, -0.073701f, -0.208295f,
138   -0.782095f, 0.000523f,  0.374131f,  0.420946f,  0.466151f,  0.349651f,
139   -0.679275f, -0.745827f, -0.379918f, -0.900107f, 0.044070f,  -0.347536f,
140   -1.224390f, 0.740113f,  -0.779966f, 0.510920f,  -0.968597f, -0.095630f,
141   0.120805f,  0.676803f,  -0.164827f, 0.172996f,  -0.106720f, 0.197527f,
142   0.337561f,  0.571094f,  -0.279090f, -0.396697f, -0.253083f, -0.690170f,
143   -0.363291f, 0.516921f,  0.489391f,  -0.920628f, 0.497572f,  0.483864f,
144   -0.125696f, -0.338123f, -0.041517f, -0.534630f, -0.388465f, -0.784554f,
145   0.215227f,  0.055088f,  0.179638f,  0.086997f,  0.569313f,  0.572926f,
146   0.137182f,  -0.045485f, 0.118087f,  0.210383f,  0.212664f,  0.482443f,
147   0.151921f,  0.307947f,  -0.084656f, -0.386206f, 0.542277f,  -0.207005f,
148   0.073792f,  -1.013240f, 0.303581f,  0.270527f,  0.265985f,  0.332702f,
149   0.848609f,  0.686757f,  0.767212f,  0.316901f,  -0.502460f, -0.567092f,
150   -0.484799f, -0.173350f, -0.426863f, 0.222375f,  -0.200267f, -0.523758f,
151   0.265180f,  -0.175648f, -0.229754f, 0.148740f,  0.402515f,  0.028243f,
152   -0.366109f, 0.157232f,  -0.131564f, 0.055136f,  0.211046f,  -0.115542f,
153   0.322379f,  -0.137768f, -0.247832f, 0.070394f,  0.058530f,  -0.295023f,
154   -0.196022f, -0.109097f, 0.261285f,  -0.273585f, -0.240632f, 0.258326f,
155   -0.077364f, 0.071405f,  -0.014766f, -0.008751f, -0.203622f, 0.177818f,
156   0.116726f,  -0.116735f, -0.723616f, -0.700154f, 0.145082f,  -0.184949f,
157   -0.287076f, 0.150405f,  0.258075f,  -0.157764f, -0.120909f, 0.105459f,
158   0.113288f,  -0.092963f, 0.328183f,  -0.300115f, -0.361289f, 0.319792f,
159   -0.048875f, 0.135673f,  0.132539f,  -0.162481f, 0.002109f,  0.065048f,
160   -0.135969f, 0.061558f,  1.510670f,  -0.884925f, -0.827022f, 0.190311f,
161   -0.060088f, -0.033362f, 0.013354f,  0.002847f,  0.353479f,  -0.462538f,
162   -0.319638f, 0.424484f,  0.199540f,  -0.073843f, -0.140621f, 0.072133f,
163   -0.098662f, 0.070613f,  0.031150f,  -0.021869f, -0.511253f, 0.503412f,
164   0.565963f,  -0.576146f, -1.081700f, 0.047670f,  0.266687f,  0.524804f,
165   -2.361150f, 0.147823f,  0.594717f,  0.956842f,  -1.048220f, 0.127083f,
166   0.079581f,  0.065419f,  0.176783f,  0.653953f,  0.260967f,  0.537892f,
167   -1.207580f, 0.245983f,  -0.727067f, 0.071755f,  -0.343025f, -0.173435f,
168   0.215289f,  0.268578f,  -1.158560f, 0.039263f,  -0.132888f, 0.217132f,
169   -0.622195f, -0.071256f, 0.317333f,  0.157614f,  -1.588250f, 0.316432f,
170   -0.736720f, -0.041698f, -1.959280f, 0.083451f,  0.570584f,  0.327620f,
171   -1.262200f, -0.026738f, 0.231198f,  0.326861f,  -1.644200f, -0.143833f,
172   -0.079495f, 0.493026f,  -2.488090f, -0.034046f, 0.165884f,  1.074260f,
173   -1.076980f, 0.248198f,  -0.017987f, 0.421900f,  -0.105860f, 0.076710f,
174   0.002072f,  0.070264f,  -1.734750f, 0.227145f,  0.209220f,  0.851459f,
175   -0.142369f, 0.066502f,  0.027816f,  0.044321f,  -0.186591f, -0.100340f,
176   0.115580f,  0.192252f,  -0.892114f, 0.209531f,  -0.308243f, 0.367968f,
177   -0.721770f, 0.220224f,  -0.062744f, 0.133754f,  0.040416f,  0.190428f,
178   -0.035428f, 0.162974f,  0.116427f,  0.669393f,  0.278891f,  0.856676f,
179   1.060390f,  0.936983f,  0.863355f,  0.990560f,  -0.147111f, -0.217883f,
180   0.355794f,  -0.186530f, -0.275614f, -0.095719f, 0.167346f,  0.359078f,
181   -0.079223f, -0.581596f, -0.213134f, -0.431123f, -0.516443f, -0.388628f,
182   -0.643821f, -0.202345f, 0.426230f,  0.516923f,  0.548131f,  0.555973f,
183   0.022286f,  0.361170f,  0.980065f,  0.648400f,  -0.056813f, -0.100310f,
184   -0.439481f, -0.166454f, 0.412449f,  0.509400f,  0.316208f,  0.470293f,
185   -0.827838f, -1.078380f, -1.047040f, -1.074560f, 0.274555f,  -0.316736f,
186   0.128818f,  0.228566f,  -0.520967f, -0.731674f, -0.687887f, -0.536388f,
187   -0.031187f, 0.041404f,  0.047821f,  0.064397f,  0.054230f,  0.105059f,
188   -0.178671f, 0.176847f,  -0.394797f, -0.260255f, -0.333734f, -0.162345f,
189   -0.444650f, -0.928438f, -0.705840f, -0.833162f, 0.306737f,  0.429699f,
190   0.417298f,  0.478469f,  0.420903f,  0.676871f,  0.429677f,  0.616921f,
191   -0.805199f, -0.643391f, -0.304100f, 0.797599f,  -0.172157f, 0.429085f,
192   -0.750676f, 0.149227f,  -0.207898f, -0.022534f, -0.341448f, -0.247976f,
193   0.095325f,  -0.561120f, 0.599694f,  -0.025236f, 0.292346f,  -0.312001f,
194   0.517478f,  0.301457f,  -0.106415f, 0.226263f,  -0.184163f, -0.114419f,
195   -0.322702f, 0.172541f,  0.445573f,  0.157213f,  0.670704f,  0.102174f,
196   -0.234667f, -0.293311f, 0.769852f,  0.038028f,  -0.036741f, -0.228060f,
197   -0.253335f, 0.424054f,  -0.597980f, 0.221007f,  -0.114741f, -0.411557f,
198   -0.592201f, 0.442684f,  0.115491f,  -0.106896f, -0.028110f, 0.354751f,
199   -0.248375f, 0.242570f,  -0.155856f, 0.280528f,  -0.198742f, 0.588725f,
200   0.371065f,  0.078197f,  0.114706f,  -0.448021f, 0.065255f,  0.133741f,
201   -0.227522f, -0.047339f, -0.052849f, 0.309480f,  0.597185f,  0.209182f,
202   0.226108f,  -0.601036f, -0.431672f, -0.172601f, -0.000174f, 0.194292f,
203   -0.133937f, 0.130676f,  0.059372f,  0.091381f,  0.098751f,  -0.150996f,
204   0.170514f,  -0.085494f, 0.336576f,  0.484004f,  0.033862f,  0.277473f,
205   -0.231482f, -0.328385f, -0.332739f, -0.626957f, 0.510167f,  0.575861f,
206   0.421494f,  0.482540f,  -0.636377f, -0.864661f, -0.694180f, -0.420014f,
207   -0.132781f, 0.017599f,  0.003538f,  0.486934f,  0.133878f,  -0.094622f,
208   0.016132f,  0.010117f,  0.156680f,  -0.022201f, -0.014621f, 0.228445f,
209   0.190826f,  0.171580f,  0.579923f,  0.245428f,  0.322713f,  0.480101f,
210   0.406320f,  0.412229f,  0.002334f,  -0.022349f, 0.074571f,  -0.043828f,
211   0.290453f,  0.451749f,  0.530376f,  0.271879f,  0.095144f,  0.169450f,
212   0.049482f,  0.114605f,  -0.635634f, -0.700768f, -0.558538f, -0.537625f,
213   0.190255f,  -0.308237f, -0.053703f, 0.212489f,  0.056520f,  -0.040019f,
214   0.089822f,  -0.014155f, -0.376004f, -0.448752f, -0.526717f, -0.571440f,
215   0.116482f,  0.162321f,  0.147895f,  0.280527f,  0.159037f,  -0.095958f,
216   0.007931f,  -0.086630f, 0.285625f,  0.514914f,  0.208908f,  0.519251f,
217   0.309368f,  0.379777f,  0.350565f,  0.487487f,  -0.541494f, -0.421836f,
218   -0.390001f, -0.500696f, -0.905736f, -0.150439f, -0.942304f, -0.566771f,
219   0.484233f,  0.767417f,  0.410477f,  0.670196f,  0.070210f,  0.488836f,
220   0.372805f,  0.197631f,  0.337892f,  0.524423f,  0.777219f,  -0.260955f,
221   -0.112981f, -0.060088f, -0.200250f, -0.195671f, 0.007584f,  0.252096f,
222   0.235511f,  0.366612f,  -0.304979f, -0.211068f, -0.420683f, -0.085370f,
223   0.085762f,  -0.097549f, -0.802509f, -0.468079f, -0.192787f, -0.069670f,
224   -0.235162f, -0.077772f, -0.441671f, -0.348479f, -0.431434f, -0.108256f,
225   -0.133779f, 0.017032f,  0.001964f,  -0.120647f, -0.187663f, -0.194985f,
226   -0.231742f, -0.175288f, -0.162639f, 0.245110f,  0.049951f,  0.104229f,
227   -0.159634f, -0.076545f, -0.022496f, -0.036532f, -0.147028f, -0.034215f,
228   0.028213f,  -0.059669f, -0.078259f, 0.062993f,  -0.124066f, -0.137362f,
229   -0.129977f, -0.010532f, -0.049090f, -0.189401f, 0.495471f,  0.615778f,
230   0.451437f,  0.803526f,  0.523532f,  0.841339f,  0.699528f,  0.745129f,
231   0.246264f,  -0.198290f, -0.283620f, 0.189917f,  -0.018306f, -0.419097f,
232   0.280363f,  -0.098085f, 0.138972f,  -0.140867f, -0.117025f, 0.098585f,
233   0.130979f,  0.268133f,  -0.161731f, -0.176629f, -0.357677f, -0.126379f,
234   0.553128f,  -0.126821f, -0.001511f, -0.010081f, -0.031162f, 0.079203f,
235   -0.157731f, 0.072865f,  0.535830f,  -0.529989f, -0.570075f, 0.295795f,
236   0.595613f,  -0.449278f, -0.669756f, 0.941452f,  0.356897f,  -0.723720f,
237   -0.115203f, -0.134479f, 0.133048f,  0.109860f,  -0.024250f, -0.049732f,
238   0.020098f,  0.048356f,  -0.048293f, 0.108754f,  0.062548f,  -0.238315f,
239   0.182700f,  0.312011f,  -0.244377f, -0.118012f, 0.012276f,  0.006089f,
240   0.098068f,  -0.079280f, -0.423987f, -0.411931f, -0.027425f, 0.870280f,
241   0.022825f,  -0.024481f, -0.036320f, -0.111189f, 0.364539f,  -0.244896f,
242   -0.373060f, 0.266345f,  -0.141778f, 0.277549f,  0.059834f,  -0.178242f,
243   -0.686222f, 0.594535f,  0.354546f,  -0.272516f, 1.060730f,  -1.059810f,
244   -0.948126f, 0.993267f,  0.116597f,  -0.227574f, -0.436144f, -0.333309f,
245   -0.575746f, -0.828102f, 0.284561f,  0.351668f,  -0.080164f, -0.762518f,
246   -0.511108f, -0.212855f, 0.293892f,  -0.548664f, 0.072057f,  0.006748f,
247   1.485110f,  0.124687f,  0.727211f,  1.557560f,  -0.064383f, -0.022242f,
248   0.002921f,  -0.151505f, 0.270926f,  0.173632f,  -0.640644f, 0.422410f,
249   -0.240699f, -0.361980f, -0.279864f, -0.055165f, -1.084140f, 0.231705f,
250   0.366172f,  -0.347698f, -0.097565f, -0.747227f, -0.243033f, 0.941545f,
251   -0.207460f, -0.353913f, 0.104303f,  -0.403151f, 0.203177f,  0.335893f,
252   -0.229033f, 0.029096f,  -0.409634f, -0.179599f, -0.442397f, 0.649114f,
253   0.460774f,  0.170906f,  -0.043857f, 0.402066f,  -0.226896f, -0.199624f,
254   0.016650f,  0.207894f,  0.056954f,  0.220329f,  0.374060f,  0.130361f,
255   -0.303960f, -0.078863f, 0.195410f,  0.729438f,  0.246818f,  0.287730f,
256   0.484876f,  0.111488f,  -0.168647f, -0.087878f, -0.070089f, -0.341329f,
257   -0.330280f, 0.259943f,  -0.364205f, 0.256555f,  -0.756804f, -0.086915f,
258   0.777351f,  0.006136f,  0.110348f,  0.248743f,  0.209326f,  -0.362741f,
259   -0.184416f, 0.422446f,  0.565193f,  0.310072f,  -0.011212f, -0.765226f,
260   0.039466f,  0.301288f,  0.172907f,  -1.539450f, 0.606202f,  0.477469f,
261   0.045894f,  -0.222180f, -0.013192f, -0.064077f, -0.241551f, 0.192914f,
262   0.028004f,  -0.540538f, 0.437440f,  0.179087f,  -0.753204f, -0.001374f,
263   1.185930f,  -0.151182f, 1.238580f,  -1.389900f, 0.277954f,  0.422208f,
264   0.041553f,  -0.542284f, 0.139019f,  -0.148580f, -0.130705f, 0.361830f,
265   0.322953f,  -0.092371f, 0.120180f,  -0.355299f, -0.028057f, 0.128114f,
266   0.250947f,  -0.349926f, -0.684633f, 0.246175f,  0.186731f,  -0.676313f,
267   0.060535f,  0.333371f,  -0.021172f, -0.421266f, -0.079650f, 0.031359f,
268   -0.303658f, -0.298286f, 0.119016f,  0.655585f,  0.200175f,  -0.887182f,
269   -0.197539f, -0.318883f, -0.130250f, 0.522487f,  -0.092616f, 0.405930f,
270   -0.281678f, 0.089728f,  0.081814f,  -0.781745f, 0.348878f,  0.082274f,
271   -0.914136f, 1.098810f,  0.855321f,  -1.078170f, -0.268018f, 0.246440f,
272   0.238347f,  -0.027228f, 0.074111f,  -0.061197f, -0.063582f, 0.089462f,
273   -0.040347f, 0.117082f,  0.122772f,  -0.162816f, -0.148668f, -0.342856f,
274   -0.495604f, -1.453630f, -0.045273f, -0.030463f, 0.043766f,  0.047978f,
275   0.016910f,  -0.009700f, 0.006288f,  -0.042556f, 0.632896f,  -0.845744f,
276   -0.516844f, 0.709439f,  0.486166f,  -1.203050f, -0.978381f, 0.631876f,
277   0.000705f,  0.123858f,  -0.001187f, -0.172312f, -0.422668f, 0.241838f,
278   0.437400f,  -0.268186f, -0.513259f, 0.450209f,  0.542629f,  -0.453810f,
279   -0.207119f, 0.072598f,  0.085066f,  -0.018986f, -0.149512f, 0.149521f,
280   0.182105f,  -0.227200f, -0.363240f, 0.172670f,  -0.502932f, 0.689256f,
281   0.093760f,  -0.090207f, -0.066803f, 0.056759f,  -0.002243f, -0.050662f,
282   -0.059324f, 0.152943f,  -0.701150f, 0.712540f,  0.660349f,  -0.654970f,
283   0.351772f,  -0.303383f, -0.311177f, 0.247653f,  0.013035f,  0.034648f,
284   -0.137832f, 0.041197f,  0.410265f,  0.345129f,  0.653338f,  0.047050f,
285   0.140399f,  0.018613f,  -0.012431f, -0.113632f, -0.029928f, 0.051564f,
286   -0.031349f, 0.151944f,  -0.160340f, 0.326798f,  -0.458067f, 0.636235f,
287   0.243184f,  0.514072f,  2.414450f,  1.421980f,  -0.001474f, -0.141389f,
288   -0.104817f, -0.141882f, -0.026395f, 0.053014f,  0.143885f,  -0.207774f,
289   -0.563846f, -0.242514f, -0.436574f, -0.456796f, -0.520646f, 0.282550f,
290   -0.684924f, 0.061105f,  -0.315884f, -0.392624f, 0.009805f,  -0.256597f,
291   -0.146732f, 0.331039f,  0.362342f,  0.270851f,  0.067679f,  -0.071331f,
292   -0.222423f, 0.081286f,  -0.208192f, -0.193816f, -0.008201f, -0.309340f,
293   0.167556f,  0.106071f,  0.172254f,  -0.163790f, -0.142205f, -0.043182f,
294   0.096145f,  0.145037f,  -0.066015f, -0.073194f, 0.132237f,  -0.088522f,
295   -0.044292f, -0.487128f, 0.033389f,  -0.573548f, 0.185449f,  0.273593f,
296   0.147503f,  0.457049f,  -0.021539f, 0.090786f,  0.009147f,  0.000899f,
297   0.018088f,  0.115791f,  -0.079165f, 0.139388f,
298 };
299 
300 static const float weights_layer_2[] = {
301   0.153048f,  0.112901f,  0.136781f,  0.154580f,  0.091610f,  0.045165f,
302   0.088490f,  0.116991f,  -0.463766f, -0.596567f, -0.567008f, -0.630565f,
303   0.141874f,  0.095726f,  0.175427f,  0.145027f,  -0.969824f, -1.018190f,
304   -1.073300f, -1.041130f, -0.070545f, -0.123600f, -0.114967f, -0.169453f,
305   -0.267458f, -0.147730f, -0.161419f, -0.164894f, -0.117508f, -0.204389f,
306   -0.122695f, -0.163107f, -0.003903f, -0.030470f, -0.037433f, -0.059568f,
307   0.138243f,  0.091019f,  0.160372f,  0.141650f,  -0.544565f, -0.620004f,
308   -0.504503f, -0.429979f, -0.099491f, -0.096384f, -0.155265f, -0.188536f,
309   0.084923f,  0.038345f,  0.066706f,  0.122083f,  0.267087f,  0.184419f,
310   0.261478f,  0.255746f,  -0.245894f, -0.114980f, -0.193880f, -0.227785f,
311   0.087536f,  0.095712f,  0.106105f,  0.099353f,  -0.059473f, -0.173247f,
312   -0.202386f, -0.076010f, 0.125928f,  0.100793f,  0.119638f,  0.129623f,
313   0.136593f,  0.102984f,  0.156550f,  0.140558f,  0.122524f,  0.051596f,
314   0.084164f,  0.123630f,  0.072542f,  0.096063f,  0.083236f,  0.087630f,
315   0.025900f,  0.023738f,  0.036385f,  0.053077f,  -0.029501f, 0.010544f,
316   -0.010026f, -0.051268f, 0.086302f,  0.109909f,  0.101385f,  0.127513f,
317   -0.031869f, 0.005340f,  -0.056267f, -0.032955f, 0.032748f,  0.023162f,
318   0.092118f,  -0.001780f, -0.123612f, -0.183433f, -0.202377f, -0.317516f,
319   0.129052f,  0.208112f,  0.145582f,  0.175502f,  0.018476f,  0.036349f,
320   0.072417f,  0.061194f,  0.086985f,  0.117086f,  0.072465f,  0.129068f,
321   0.020182f,  0.052114f,  0.017878f,  0.010478f,  -0.001381f, -0.034644f,
322   0.025135f,  -0.037748f, 0.004973f,  0.024778f,  0.041816f,  0.032111f,
323   0.080268f,  0.124998f,  0.105719f,  0.177047f,  -0.072114f, -0.011864f,
324   -0.076846f, -0.089840f, 0.069993f,  0.089362f,  0.088035f,  0.120621f,
325   0.065916f,  0.100946f,  -0.006784f, -0.007751f, 0.122039f,  0.126482f,
326   0.078629f,  0.140299f,  0.074034f,  0.092464f,  0.089798f,  0.108968f,
327   0.075729f,  0.057128f,  0.013570f,  0.021195f,  0.068901f,  0.054022f,
328   0.029781f,  0.031404f,  -0.209998f, -0.208731f, -0.198310f, -0.212454f,
329   -0.579168f, -0.490190f, -0.607567f, -0.520541f, 0.083863f,  0.056612f,
330   0.030366f,  0.061790f,  -0.004874f, -0.057203f, -0.060429f, -0.049145f,
331   0.080086f,  0.138602f,  0.223796f,  0.133279f,  -0.495954f, -0.612093f,
332   -0.545393f, -0.562310f, 0.070672f,  0.037702f,  0.139013f,  0.080192f,
333   -0.111387f, -0.048165f, 0.074359f,  -0.042125f, 0.113633f,  0.106579f,
334   0.042633f,  0.102734f,  -0.068220f, 0.128423f,  -0.181821f, -0.013260f,
335   -0.108563f, -0.138667f, -0.109304f, -0.131909f, -0.168667f, -0.126870f,
336   -0.132533f, -0.167096f, -0.184741f, -0.140890f, -0.125361f, -0.150632f,
337   0.309013f,  0.364376f,  0.361102f,  0.271566f,  0.116552f,  0.091160f,
338   0.096846f,  0.095954f,  0.046972f,  0.080489f,  0.028766f,  -0.012223f,
339   0.071379f,  0.041535f,  -0.000668f, 0.033698f,  -0.013493f, -0.027535f,
340   -0.025804f, -0.012267f, -0.097465f, -0.099232f, -0.208863f, -0.225201f,
341   -0.475608f, 0.077358f,  -0.002872f, 0.163890f,  -0.420298f, 0.072114f,
342   0.121601f,  -0.016727f, 0.573853f,  -0.080196f, 0.193053f,  0.053012f,
343   -0.454179f, 0.058563f,  0.067265f,  0.141154f,  0.412541f,  0.086933f,
344   0.030407f,  -0.030413f, 0.478757f,  -0.097731f, 0.277072f,  -0.086393f,
345   0.552604f,  -0.334201f, 0.091765f,  -0.270262f, -1.395060f, 0.271837f,
346   -0.005335f, 0.240499f,  0.175442f,  -0.326329f, -0.019353f, -0.270338f,
347   -0.459273f, 0.096183f,  0.153046f,  0.135818f,  0.759028f,  -0.177673f,
348   -0.099966f, 0.103363f,  0.697289f,  -0.234184f, -0.048706f, -0.116099f,
349   -0.282575f, 0.025655f,  -0.184759f, 0.040658f,  -0.558267f, 0.214087f,
350   -0.095620f, 0.200522f,  0.278996f,  0.031959f,  0.122936f,  -0.209196f,
351   -0.308217f, 0.092917f,  0.113269f,  0.136274f,  -0.037046f, 0.017263f,
352   -0.194183f, 0.089133f,  -0.161244f, 0.042799f,  0.030557f,  0.153545f,
353   -0.355048f, 0.070928f,  -0.152852f, 0.102875f,  -0.193649f, 0.007916f,
354   -0.062952f, 0.050602f,  0.073671f,  0.143045f,  -5.978970f, -7.013850f,
355   0.058713f,  0.076116f,  0.026445f,  -0.056599f, -0.005966f, 0.032234f,
356   0.006753f,  -0.024528f, 0.120308f,  0.179939f,  -6.624630f, -7.638680f,
357   0.026359f,  0.020758f,  0.194274f,  0.051489f,  -0.008491f, -0.028248f,
358   -0.061328f, -0.134423f, -0.103951f, -0.110877f, 0.042263f,  0.127016f,
359   0.012473f,  -0.008595f, 0.031357f,  0.087476f,  -0.084022f, -0.015590f,
360   -0.313546f, 0.120072f,  0.123880f,  0.162148f,  -6.596560f, -7.358830f,
361   0.004797f,  -0.003415f, 0.048455f,  0.026737f,  -0.103702f, 0.034416f,
362   -0.003475f, -0.236827f, 0.005378f,  0.048413f,  0.054612f,  -0.079359f,
363   0.043707f,  0.001085f,  0.023380f,  0.007785f,  0.025938f,  -0.052856f,
364   -0.033421f, 0.022643f,  0.034161f,  0.127681f,  -5.019490f, -5.233580f,
365   -0.128630f, 0.087741f,  -0.239834f, -0.377876f, 0.128082f,  0.142730f,
366   -0.086819f, -0.350927f, 0.089849f,  0.155776f,  -6.155120f, -5.721720f,
367   0.056110f,  0.008761f,  0.045579f,  0.016762f,  -0.134076f, -0.101551f,
368   -0.096058f, -0.117146f, 0.003527f,  -0.056942f, -0.005578f, 0.071287f,
369   0.023776f,  -0.028003f, -0.075390f, -0.191160f, -0.089672f, -0.104372f,
370   -0.104750f, -0.080813f, -0.249824f, -0.124479f, -0.243593f, -0.244284f,
371   -0.554911f, -0.549095f, -0.564693f, -0.475107f, -0.121771f, -0.143441f,
372   -0.171170f, -0.120920f, 0.109831f,  0.079708f,  0.327295f,  0.308907f,
373   -0.178785f, -0.428316f, -0.418882f, -0.366750f, -0.139296f, -0.129645f,
374   -0.081237f, -0.101533f, -0.006256f, -0.146756f, -0.322110f, -0.338865f,
375   -0.306085f, -0.319592f, -0.454803f, -0.363560f, -0.018557f, 0.006605f,
376   -0.131198f, -0.077708f, 0.138160f,  0.119611f,  0.271098f,  0.232168f,
377   0.027812f,  0.035390f,  -0.202503f, -0.091172f, -0.142020f, -0.159929f,
378   -0.106404f, -0.107433f, -0.381743f, -0.353222f, -0.484159f, -0.469926f,
379   -0.234659f, -0.315674f, -0.178327f, -0.213485f, -0.096207f, -0.190944f,
380   -0.118917f, -0.161288f, 0.015996f,  0.060737f,  0.051390f,  0.060876f,
381   0.229289f,  0.282418f,  0.250945f,  0.197273f,  0.045131f,  -0.008305f,
382   0.072024f,  0.044547f,  -0.050010f, 0.055504f,  0.001343f,  -0.014445f,
383   0.254909f,  0.309091f,  0.228249f,  0.274843f,  0.089778f,  -0.046581f,
384   0.072714f,  0.126814f,  -0.048931f, -0.045743f, -0.151333f, -0.004490f,
385   0.179966f,  0.058150f,  -0.178622f, -0.088159f, -0.074416f, -0.005821f,
386   -0.011799f, -0.002225f, -0.069361f, -0.098937f, -0.081575f, -0.034796f,
387   0.253792f,  0.301039f,  0.219163f,  0.256027f,  0.058007f,  -0.041431f,
388   0.040674f,  0.009019f,  -0.099670f, -0.099077f, -0.039437f, 0.017946f,
389   0.060717f,  0.045796f,  0.109664f,  0.032138f,  -0.071094f, 0.023697f,
390   0.011335f,  -0.030465f, 0.068677f,  0.039345f,  -0.045078f, 0.084037f,
391   0.135517f,  0.190417f,  0.175578f,  0.155286f,  -0.044505f, 0.010826f,
392   0.006717f,  -0.134715f, 0.068022f,  0.110095f,  0.079966f,  0.034481f,
393   0.185804f,  0.188273f,  0.227283f,  0.135935f,  0.033447f,  0.031571f,
394   -0.014766f, -0.024565f, 0.021792f,  0.017675f,  -0.001333f, -0.040069f,
395   -0.049384f, -0.045256f, -0.014013f, -0.000107f, -0.096928f, -0.111495f,
396   -0.051225f, -0.060449f, 0.071446f,  0.017294f,  -0.004822f, 0.006932f,
397   0.020884f,  0.089425f,  0.061097f,  -0.038708f, -0.184029f, -0.089541f,
398   -0.158035f, -0.214607f, -0.377947f, -0.318586f, -0.336977f, -0.323908f,
399   0.181612f,  0.140018f,  0.233524f,  0.193366f,  -0.254507f, -0.271902f,
400   -0.197144f, -0.119539f, 0.042162f,  0.000320f,  0.014708f,  -0.014228f,
401   -0.081119f, -0.089326f, 0.001763f,  0.081009f,  -0.142618f, -0.160650f,
402   -0.214597f, -0.202143f, -0.053495f, -0.012819f, -0.071468f, -0.010883f,
403   0.072570f,  0.071507f,  0.091045f,  0.083155f,  -0.271237f, -0.289211f,
404   -0.272345f, -0.299411f, 0.031697f,  -0.029795f, -0.030045f, -0.013604f,
405   -0.106843f, -0.045212f, -0.122459f, -0.096936f, 0.059793f,  0.006157f,
406   0.028092f,  0.040589f,  -0.014560f, -0.008975f, -0.051404f, -0.014309f,
407   -0.016883f, 0.018332f,  0.040114f,  0.050348f,  0.044921f,  -0.002445f,
408   -0.112396f, 0.014395f,  0.115160f,  0.145350f,  -0.166814f, -0.121449f,
409   0.155573f,  -0.099446f, -0.161661f, 0.187251f,  0.004711f,  0.024318f,
410   -0.060871f, -0.028311f, -0.098274f, 0.322030f,  -0.069242f, -0.153173f,
411   -0.227428f, -0.293965f, 0.228491f,  0.111413f,  -1.354720f, -0.344235f,
412   0.866715f,  0.872344f,  0.078789f,  -0.384865f, 0.162388f,  0.109018f,
413   -0.191549f, -0.002638f, 0.305053f,  0.087337f,  0.066506f,  -0.055810f,
414   -0.010984f, -0.056160f, -0.114617f, -0.058478f, 0.022059f,  -0.124368f,
415   -0.130989f, 0.369432f,  -0.248898f, -0.003955f, -0.021578f, 0.115991f,
416   -0.114163f, -0.065232f, 0.339857f,  -0.225997f, 0.006282f,  -0.125395f,
417   0.235082f,  -0.347785f, 0.662321f,  -0.529182f, 0.153297f,  -0.001326f,
418   -0.026725f, -0.024677f, -0.088065f, -0.116127f, 0.080896f,  0.212542f,
419   0.208421f,  0.032047f,  -0.211395f, 0.074997f,  0.096659f,  0.096423f,
420   -0.078643f, 0.106556f,  -0.123860f, 0.075609f,  0.066008f,  -0.097275f,
421   -1.000020f, -0.780154f, -0.856922f, -0.964007f, 0.083135f,  -0.018922f,
422   -0.266214f, -0.151480f, 0.051538f,  0.017802f,  0.066774f,  -0.021341f,
423   -0.869494f, -0.935252f, -0.895836f, -0.853871f, -0.160490f, 0.085850f,
424   -0.029670f, -0.056675f, 0.159989f,  0.166872f,  0.129970f,  0.194377f,
425   0.153294f,  0.199593f,  0.037692f,  0.103391f,  0.029335f,  -0.085324f,
426   -0.079326f, -0.077216f, 0.501561f,  0.366168f,  0.330196f,  0.296432f,
427   -0.977282f, -0.844295f, -1.014870f, -1.098990f, -0.099858f, -0.129552f,
428   0.090051f,  -0.013378f, 0.081330f,  0.194911f,  0.286501f,  0.177363f,
429   -0.148250f, -0.111700f, -0.243081f, -0.102918f, 0.161069f,  -0.012655f,
430   -0.071722f, -0.020329f, -0.077828f, -0.041716f, 0.109247f,  0.062229f,
431   -0.759722f, -0.742756f, -0.563713f, -0.631187f, 0.005911f,  0.268154f,
432   -0.263769f, 0.087149f,  -0.163623f, -0.359600f, -0.464577f, -0.369352f,
433   -0.515784f, -0.475822f, -0.523485f, -0.649813f, -0.112419f, -0.029285f,
434   0.021061f,  -0.041515f, 0.149133f,  -0.254428f, 0.115776f,  -0.061892f,
435   0.103675f,  -0.283363f, 0.005005f,  0.022034f,  -0.178454f, 0.035836f,
436   -0.113702f, -0.217823f, 0.209407f,  -0.296257f, 0.187976f,  -0.157370f,
437   -0.127190f, 0.251780f,  0.055633f,  0.294111f,  -0.067773f, 0.467190f,
438   -0.192625f, -0.071084f, -0.445284f, 0.511090f,  -0.319728f, 0.267971f,
439   0.494929f,  -0.586727f, 0.454543f,  -0.520675f, -0.085900f, 0.325989f,
440   -0.131006f, -0.069501f, 0.199927f,  -0.218919f, 0.170055f,  -0.106538f,
441   0.133312f,  0.127629f,  -0.561625f, 0.595666f,  -0.090927f, 0.363348f,
442   -0.249246f, 0.063068f,  -0.016458f, -0.291045f, -0.040509f, 0.017866f,
443   0.304871f,  -0.459214f, 0.214390f,  -0.238740f, -0.456541f, 0.545848f,
444   -0.218026f, 0.202475f,  0.128490f,  -0.036417f, 0.173885f,  -0.049385f,
445   0.235514f,  -0.132587f, -0.015066f, 0.164638f,  0.196873f,  -0.125330f,
446   0.216912f,  -0.109398f, 0.121602f,  -0.209374f, 0.164400f,  -0.123049f,
447   0.195520f,  -0.212932f, -0.015180f, -0.005784f, 0.049726f,  -5.822150f,
448   0.124536f,  0.040689f,  -0.018560f, -3.155020f, 0.014690f,  0.076202f,
449   -0.154008f, 1.070630f,  -0.071606f, 0.051026f,  0.138285f,  -5.836340f,
450   0.162173f,  0.085890f,  -0.186166f, 0.093221f,  0.019240f,  -0.017053f,
451   -0.090144f, 0.236254f,  -0.125344f, 0.056235f,  -0.089813f, -0.252281f,
452   -0.127406f, -0.155088f, 0.009972f,  -0.066449f, 0.044222f,  0.025943f,
453   -0.164921f, 0.165463f,  -0.001132f, -0.038386f, 0.115194f,  -5.757100f,
454   0.163386f,  0.061226f,  0.024626f,  0.132750f,  0.107279f,  -0.001622f,
455   -0.107860f, -0.356009f, -0.138935f, -0.145173f, -0.061198f, -0.646138f,
456   0.034279f,  0.078187f,  0.108138f,  -0.490444f, 0.074719f,  0.034984f,
457   -0.109303f, 0.741785f,  -0.066939f, 0.015558f,  0.114229f,  -4.001080f,
458   0.130772f,  0.044675f,  -0.165162f, -0.274810f, -0.042987f, -0.048579f,
459   0.156603f,  -1.288370f, 0.076198f,  0.035065f,  0.032043f,  -5.002520f,
460   0.086900f,  -0.010886f, 0.030850f,  -0.782259f, 0.056211f,  -0.097759f,
461   0.118988f,  0.106638f,  0.091419f,  0.079920f,  0.062325f,  0.097116f,
462   0.126035f,  0.122530f,  -0.278299f, -0.083314f, -0.300563f, -0.197946f,
463   0.081664f,  0.089925f,  0.074754f,  0.074628f,  0.102338f,  0.088845f,
464   0.105841f,  0.102381f,  0.003087f,  0.061599f,  0.098326f,  0.040119f,
465   -0.005298f, -0.028834f, 0.059938f,  -0.013668f, -0.585882f, -0.631436f,
466   -0.742673f, -0.736666f, 0.025071f,  0.066851f,  0.075046f,  0.091360f,
467   0.099045f,  0.098261f,  0.106413f,  0.099487f,  -0.016742f, -0.097334f,
468   -0.086152f, -0.212444f, -0.028043f, -0.007362f, 0.003914f,  -0.055864f,
469   0.034756f,  0.081361f,  0.080183f,  0.061319f,  0.193396f,  0.173716f,
470   0.207765f,  0.231701f,  -0.074565f, -0.073257f, -0.086470f, -0.083114f,
471   0.081489f,  0.078477f,  0.033452f,  0.058835f,  -0.069665f, -0.031691f,
472   -0.111255f, -0.167754f, 0.184179f,  0.174673f,  0.160288f,  0.190893f,
473   0.110930f,  0.103495f,  0.098408f,  0.102918f,  0.053764f,  0.089994f,
474   0.140308f,  0.124867f,  0.074176f,  0.117460f,  -0.160775f, -0.144132f,
475   -0.099373f, -0.035913f, 0.081237f,  0.062247f,  -0.166421f, 0.062125f,
476   0.276479f,  0.060955f,  0.066627f,  0.455347f,  0.219953f,  0.109912f,
477   0.273931f,  0.233153f,  0.102236f,  0.447606f,  -0.352243f, 0.499236f,
478   -0.931206f, 0.248595f,  0.254047f,  0.061542f,  0.268804f,  0.309517f,
479   -0.084414f, -0.245828f, -0.144882f, -0.296579f, -0.091628f, -0.142202f,
480   -0.541764f, -0.407470f, 0.053481f,  0.238955f,  0.150188f,  -0.060598f,
481   0.196118f,  -0.215617f, -0.086238f, -0.263420f, 0.206877f,  0.241788f,
482   -0.122544f, -0.448790f, 0.286917f,  0.112063f,  -0.268408f, -0.041770f,
483   0.089161f,  0.355811f,  -0.078245f, -0.148490f, -0.407301f, -1.296870f,
484   -0.633421f, 0.124253f,  0.275402f,  0.223048f,  0.077016f,  0.160766f,
485   0.115374f,  0.061053f,  -0.231872f, -0.515052f, -0.278331f, -0.235912f,
486   -0.416372f, -0.284106f, -0.055942f, 0.110698f,  -0.428288f, -0.298137f,
487   -0.018101f, 0.102677f,  -0.019639f, 0.013479f,  0.038549f,  0.048682f,
488   0.128684f,  0.116416f,  0.044852f,  0.008133f,  0.061597f,  0.083582f,
489   0.014953f,  0.063716f,  -0.155318f, -0.061732f, 0.084855f,  0.129505f,
490   0.068249f,  0.193775f,  -0.088631f, -0.446398f, -0.075710f, -0.061327f,
491   0.278715f,  0.540366f,  0.618715f,  0.538374f,  -0.037843f, 0.062370f,
492   -0.033184f, 0.119901f,  -0.008641f, -0.064789f, 0.087498f,  0.043486f,
493   0.247085f,  0.419992f,  0.299935f,  0.234276f,  0.089283f,  0.070357f,
494   0.068888f,  0.134311f,  0.109823f,  0.072431f,  0.081676f,  0.091366f,
495   -1.707980f, -2.213110f, -2.149930f, -1.556870f, 0.226598f,  0.191675f,
496   0.192207f,  0.159566f,  -0.070194f, -0.136070f, -0.015172f, -0.204272f,
497   -0.162191f, -0.043313f, -0.158007f, -0.227210f, 0.040398f,  0.043014f,
498   0.039439f,  -0.035439f, 0.245558f,  0.439691f,  0.219659f,  0.138210f,
499   -0.048129f, 0.004954f,  -0.102860f, -0.185376f, 0.035548f,  0.006821f,
500   0.079199f,  0.032901f,  0.039218f,  0.068113f,  0.023075f,  -0.037582f,
501   0.225181f,  0.164562f,  0.106718f,  0.032684f,  0.013402f,  0.018797f,
502   0.076606f,  0.046512f,  -0.070024f, 0.099921f,  -0.051231f, 0.074167f,
503   0.173313f,  0.220212f,  0.142665f,  0.069809f,  -0.195130f, -0.007912f,
504   -0.006764f, -0.063687f, 0.306374f,  0.402035f,  0.273759f,  0.449469f,
505   0.114597f,  0.210745f,  0.355326f,  0.271307f,  -0.109943f, -0.171912f,
506   -0.070726f, -0.128932f, 0.138770f,  0.164971f,  0.308516f,  0.332536f,
507   0.081537f,  0.096939f,  0.054136f,  0.052226f,  0.109489f,  0.010223f,
508   0.168072f,  -0.106279f, 0.525568f,  0.704816f,  0.588942f,  0.473398f,
509   0.149497f,  0.120835f,  0.080049f,  0.151340f,  -0.182038f, -0.191091f,
510   -0.196505f, -0.198309f, -0.801819f, -1.441620f, -1.107780f, -1.025650f,
511   0.035750f,  0.018049f,  -0.029033f, -0.067255f, 0.192049f,  0.009664f,
512   -0.043741f, 0.051557f,  0.082815f,  0.069547f,  -0.073379f, 0.010584f,
513   0.192128f,  0.208586f,  0.141904f,  0.100763f,  0.046183f,  0.044776f,
514   -0.033611f, -0.005812f, 0.012966f,  0.030301f,  0.100665f,  0.103641f,
515   -0.294776f, -0.361573f, -0.420156f, -0.388743f, 0.239287f,  0.191975f,
516   0.089644f,  0.117591f,  0.069563f,  0.021480f,  0.100287f,  0.174159f,
517   -0.013571f, 0.090960f,  0.010232f,  -0.034760f, -0.077205f, 0.060632f,
518   -0.145527f, -0.391110f, -0.143052f, -0.236448f, -0.103902f, -0.188463f,
519   0.071311f,  -0.080171f, 0.021987f,  0.041767f,  -0.419487f, -0.515479f,
520   -0.205470f, -0.732132f, 0.150901f,  0.107202f,  0.156307f,  0.143672f,
521   0.474682f,  0.178137f,  0.150063f,  0.414515f,  0.559891f,  0.697019f,
522   0.541231f,  0.505310f,  -0.478101f, -0.444267f, -0.586539f, -0.445996f,
523   -0.451873f, -0.530085f, -0.447980f, -0.364955f, 0.372435f,  0.318894f,
524   0.351211f,  0.193961f,  0.212295f,  0.212842f,  0.220003f,  0.243743f,
525   -0.388628f, -0.789620f, -0.536618f, -0.430691f, 0.247004f,  0.266489f,
526   0.261033f,  0.263692f,  0.050089f,  0.048958f,  0.065207f,  0.120180f,
527   -0.526230f, -0.481969f, -0.422411f, -0.272292f, 0.155593f,  0.229614f,
528   0.139579f,  0.171805f,  -0.251924f, -0.302067f, -0.126157f, -0.346650f,
529   -1.195450f, -1.281100f, -0.987911f, -1.478440f, 0.285667f,  0.284802f,
530   0.301887f,  0.259556f,  -0.194127f, -0.090440f, -0.257959f, -0.259572f,
531   -0.012273f, -0.049993f, -0.099431f, 0.012506f,  0.081526f,  0.166279f,
532   0.042594f,  0.185121f,  0.148830f,  0.073161f,  0.201728f,  0.125747f,
533   -0.295065f, -0.187585f, -0.333066f, -0.312291f, 0.253458f,  0.321585f,
534   0.178844f,  0.219944f,  -0.763475f, -0.943374f, -0.816825f, -0.709901f,
535   -0.166132f, 0.129186f,  0.015405f,  -0.065623f, -0.246006f, -0.340385f,
536   -0.118155f, -0.384905f, -0.233883f, -0.400666f, -0.228597f, -0.228428f,
537   -0.559083f, -0.377784f, -0.541458f, -0.542870f, 0.067400f,  0.122987f,
538   0.180901f,  0.186004f,  -0.482910f, -0.424823f, -0.477831f, -0.394719f,
539   0.091558f,  0.049248f,  0.049370f,  0.160429f,  0.133641f,  0.096625f,
540   0.104429f,  0.100782f,  -0.238252f, -0.221459f, -0.196974f, -0.250393f,
541   -3.071750f, -2.418450f, -0.861410f, -1.051580f, 0.071263f,  0.118014f,
542   -0.028430f, -0.072073f, -0.074463f, 0.034168f,  0.044089f,  -0.091109f,
543   -3.153840f, -2.945850f, -1.977360f, -1.498850f, -0.083429f, 0.131835f,
544   -0.063865f, -0.065785f, -0.069346f, -0.015520f, -0.119551f, 0.044881f,
545   -0.105280f, 0.127516f,  0.005255f,  -0.142777f, 0.061055f,  -0.117250f,
546   0.020454f,  0.157879f,  -0.213812f, -0.151783f, 0.028583f,  0.137759f,
547   -3.248250f, -3.005940f, -1.510540f, -1.475390f, 0.081874f,  -0.171465f,
548   -0.135690f, -0.001989f, -0.227574f, -0.132799f, -0.359742f, -0.137197f,
549   0.066324f,  0.039194f,  -0.050857f, 0.095166f,  0.044475f,  0.011221f,
550   0.054904f,  0.061414f,  -0.039189f, 0.123751f,  -0.017171f, -0.008494f,
551   -2.598220f, -2.832670f, -1.622030f, -1.201990f, 0.154313f,  -0.021436f,
552   0.042190f,  0.143947f,  -0.090623f, 0.086853f,  0.143137f,  0.099821f,
553   -1.732820f, -1.429730f, -0.775125f, -0.648036f, 0.082176f,  0.079448f,
554   -0.040575f, 0.024511f,  -0.064105f, -0.117122f, -0.190323f, -0.182589f,
555   -0.076430f, -0.095615f, -0.112513f, -0.101581f, 0.143037f,  0.148180f,
556   0.430958f,  0.359225f,  0.001403f,  -0.080541f, -0.295001f, -0.156706f,
557   0.426623f,  0.475597f,  0.455210f,  0.454352f,  0.074365f,  0.099440f,
558   0.066348f,  -0.007078f, 0.008335f,  -0.097116f, -0.133687f, -0.110535f,
559   0.204145f,  0.281478f,  0.078886f,  0.112857f,  -0.103620f, -0.068247f,
560   0.191147f,  0.227593f,  -0.011816f, -0.058755f, -0.149477f, -0.101828f,
561   0.079878f,  0.304949f,  0.557555f,  0.305288f,  -0.150955f, -0.118610f,
562   0.052073f,  0.064707f,  -0.121728f, -0.151132f, -0.193987f, -0.175046f,
563   0.043655f,  0.105270f,  -0.120715f, -0.040976f, 0.047776f,  -0.004443f,
564   0.149606f,  0.111240f,  -0.047502f, -0.064146f, -0.151858f, -0.151872f,
565   -0.160207f, -0.113846f, -0.081585f, -0.006708f, -0.203760f, -0.068597f,
566   -0.179979f, -0.127779f, -0.062460f, -0.064513f, -0.121479f, -0.111122f,
567   -0.212384f, -0.229157f, -0.283428f, -0.184891f,
568 };
569 
570 static const float weights_layer_3[] = {
571   -0.039388f, 0.033048f,  -0.113003f, -0.011642f, 0.170478f,  0.145713f,
572   0.040189f,  -0.280129f, -0.049050f, -0.043788f, -0.157425f, 0.323829f,
573   -0.250725f, -0.166349f, 0.101650f,  -0.049690f, 0.205606f,  0.281131f,
574   0.623204f,  0.993452f,  -0.015115f, -0.138995f, 0.009473f,  0.157673f,
575   -0.024687f, -0.067214f, 0.125566f,  -0.317619f, 0.057002f,  0.031202f,
576   -0.018167f, 0.068542f,  0.011609f,  -0.020233f, -0.000428f, -0.035956f,
577   -0.843274f, -0.800587f, -0.214917f, -0.221250f, 0.031255f,  -0.077330f,
578   -0.074902f, -0.063979f, -0.055562f, 0.679495f,  0.146609f,  1.315330f,
579   -0.118399f, -0.034539f, -0.050377f, 0.172867f,  -0.204607f, -0.034930f,
580   0.176014f,  0.089747f,  -0.003889f, 0.044980f,  0.002386f,  -0.141723f,
581   -0.035828f, -0.204701f, 0.099813f,  0.123580f,  0.209851f,  -0.110989f,
582   -0.043655f, -0.461118f, -0.139664f, 0.026855f,  -0.081714f, 0.207623f,
583   0.089942f,  0.253082f,  0.680568f,  0.811360f,  -0.090528f, -0.116818f,
584   -0.432361f, -0.075588f, -0.269924f, -0.276810f, -0.289192f, -0.282570f,
585   0.245566f,  0.267216f,  0.238622f,  0.286528f,  -0.157605f, -0.200401f,
586   -0.138924f, -0.185006f, 0.215203f,  0.203316f,  0.209532f,  0.293135f,
587   0.928046f,  0.733323f,  -0.094120f, 0.036918f,  -0.126643f, -0.083371f,
588   -0.147530f, -0.153195f, 0.097097f,  0.101852f,  0.109160f,  0.105129f,
589   -0.051869f, -0.064359f, -0.073469f, -0.059591f, 0.102431f,  0.109444f,
590   0.113614f,  0.105617f,  0.383311f,  0.325783f,  0.393234f,  0.382508f,
591   0.194720f,  0.189672f,  0.217477f,  0.177786f,  0.326461f,  0.114789f,
592   0.317061f,  0.048291f,  -0.061143f, -0.134641f, -0.067895f, -0.108446f,
593   0.082592f,  0.029918f,  -0.006580f, 0.015533f,  -0.053583f, -0.055540f,
594   -0.063395f, -0.023157f, -0.064955f, -0.073981f, -0.115452f, -0.086626f,
595   -0.036616f, 0.008454f,  0.012029f,  -0.008039f, -0.207395f, -0.216419f,
596   -0.205363f, -0.249099f, 0.343308f,  0.413215f,  -0.009918f, -0.109978f,
597   -0.059711f, -0.045089f, -0.029130f, -0.038483f, -0.070323f, -0.099409f,
598   -0.008849f, -0.063527f, 0.175963f,  0.185335f,  0.149151f,  0.199997f,
599   -0.027516f, -0.039812f, -0.027760f, -0.047910f, -0.007337f, 0.071065f,
600   0.086225f,  0.125539f,  0.151390f,  0.215488f,  0.203450f,  0.045380f,
601   0.095761f,  0.107809f,  0.103918f,  0.122383f,  0.116287f,  0.135455f,
602   0.115446f,  0.155673f,  -0.044648f, -0.027455f, -0.015473f, -0.026657f,
603   0.089852f,  0.077459f,  0.077631f,  0.082507f,  -0.102761f, -0.054669f,
604   -0.132223f, -0.024768f, 0.111573f,  0.060467f,  0.107883f,  0.056621f,
605   0.219357f,  -0.161153f, 0.074379f,  -0.118743f, -0.169931f, -0.153995f,
606   -0.220003f, -0.200186f, 0.032318f,  -0.060687f, -0.087550f, -0.038022f,
607   0.026633f,  -0.005534f, 0.029532f,  0.027081f,  0.011926f,  0.058412f,
608   0.010631f,  0.003068f,  -0.014911f, 0.063070f,  0.065271f,  0.089550f,
609   0.012885f,  0.005320f,  -0.037494f, -0.019849f, -0.009624f, -0.059090f,
610   -0.021222f, -0.088033f, -0.055261f, -0.055113f, -0.047598f, -0.055478f,
611   -0.023648f, -0.046827f, -0.036572f, -0.057655f, 0.104194f,  0.179800f,
612   0.175751f,  0.192851f,  -0.016950f, -0.073650f, -0.028592f, -0.088219f,
613   0.011130f,  0.061825f,  0.025643f,  0.034183f,  0.095548f,  0.001457f,
614   -0.132869f, 0.032981f,  -0.140178f, -0.105343f, -0.161799f, -0.161983f,
615   0.177746f,  0.132903f,  0.135627f,  0.152489f,  -0.012532f, -0.068747f,
616   -0.085849f, -0.095434f, 0.087037f,  0.139497f,  0.111899f,  0.100189f,
617   -0.024649f, -0.092003f, 0.020783f,  -0.115807f, 0.092039f,  0.093943f,
618   0.109466f,  0.049639f,  -0.133727f, 0.128430f,  -0.050546f, 0.190632f,
619   0.123733f,  0.082305f,  0.114878f,  0.122572f,  0.201618f,  0.137588f,
620   0.065582f,  0.125161f,  -0.095179f, -0.120719f, -0.127126f, -0.101961f,
621   -0.118120f, -0.104833f, -0.179632f, -0.131764f, -0.138096f, -0.147861f,
622   -0.131512f, -0.153905f, -0.201816f, -0.206641f, -0.196707f, -0.160013f,
623   -0.212605f, -0.093998f, -0.186258f, -0.076137f, -0.065340f, -0.006969f,
624   -0.071383f, -0.075005f,
625 };
626 
627 static const float weights_layer_4[] = {
628   -0.016102f, -0.022836f, 0.624049f,  0.273485f,  0.222800f,  -0.290175f,
629   -0.518415f, 0.413484f,  -0.264495f, 0.498083f,  -0.450145f, -0.106419f,
630   0.095103f,  -0.187451f, 0.145933f,  -0.371542f, -0.088871f, 0.184017f,
631   -0.429625f, -0.110882f, 0.292781f,  0.289588f,  0.185127f,  0.326017f,
632   -0.432009f, -0.342663f, -0.312206f, 0.004004f,  -1.114290f, 0.028497f,
633   -0.264944f, -0.419611f, 0.046336f,  0.138232f,  -0.869528f, 0.425557f,
634   -0.954838f, -0.186830f, -0.464622f, -0.757107f, -0.432686f, -0.125978f,
635   -0.402633f, -0.172266f, -0.041749f, -0.822238f, -0.118486f, 0.238617f,
636   -0.198037f, 0.146347f,  0.405257f,  0.513303f,  -0.078876f, -0.300385f,
637   -0.010293f, -0.183962f, 0.155738f,  0.186797f,  -0.086814f, 0.000179f,
638   0.123467f,  0.362523f,  0.068805f,  0.371834f,  0.038122f,  -0.117867f,
639   -0.120445f, -0.422322f, -0.131402f, 0.285449f,  0.038957f,  0.008844f,
640   -0.020197f, 0.187723f,  0.190433f,  0.146532f,  -0.091068f, -0.270865f,
641   -0.194231f, -0.226777f, 0.013548f,  0.248351f,  0.537685f,  0.056316f,
642   -0.171540f, -0.003865f, 0.406439f,  0.126507f,  0.192780f,  0.149335f,
643   -0.149602f, 0.255202f,  -0.015426f, 0.032335f,  -1.791330f, -0.894602f,
644   -0.196641f, -0.282846f, -0.391100f, -0.040969f, 0.049934f,  0.056348f,
645   -0.041426f, -0.075159f, -0.658335f, -0.827270f, -0.175029f, -0.427235f,
646   0.311201f,  0.560413f,  0.363408f,  0.374580f,  -0.433531f, -0.180580f,
647   0.142142f,  0.194768f,  -0.054118f, -0.376541f, -0.366185f, -0.308782f,
648   -0.273143f, -0.074097f, 0.009000f,  -0.182198f, -0.015616f, -0.003882f,
649   -0.174340f, -0.354866f, 0.527972f,  0.348355f,  0.091381f,  -0.419828f,
650   -0.530529f, 0.159899f,  -0.511867f, -0.104237f, -0.286079f, -0.659039f,
651   -0.266596f, -0.256557f, -0.600437f, -0.446333f, -0.229629f, 0.024931f,
652   -0.143716f, -0.415754f, -0.003760f, -0.107195f, -0.666165f, -0.697312f,
653   -0.650255f, -0.703877f, 0.243402f,  0.426710f,  0.217210f,  0.260255f,
654   0.027416f,  0.163147f,  0.132188f,  0.142374f,  0.558627f,  0.065717f,
655   0.382781f,  -1.192240f, 0.195492f,  0.028439f,  0.278252f,  -0.491806f,
656   0.497701f,  -0.448835f, -0.245079f, -0.014336f, -0.174907f, -0.409633f,
657   0.207548f,  0.433813f,  0.459889f,  0.431728f,  0.605050f,  0.485520f,
658   0.218548f,  0.437307f,  0.027023f,  -0.204251f, 0.012100f,  0.150677f,
659   -1.097980f, 0.086866f,  -1.293130f, -0.372575f, -0.876264f, -0.021818f,
660   0.322864f,  -0.231043f, -0.271608f, 0.132782f,  -0.314895f, 0.396800f,
661   0.262788f,  -0.317212f, -0.666308f, 0.830742f,  0.319409f,  -0.564373f,
662   -0.178656f, 0.306993f,  0.265634f,  -0.332480f, -0.491514f, -0.186745f,
663   -0.063044f, -0.009321f, 0.074944f,  -0.372082f, -0.029479f, 0.081548f,
664   0.028172f,  -0.233148f, -0.337938f, -0.087695f, 0.596556f,  0.559530f,
665   0.139332f,  0.107223f,  -0.190915f, 0.137401f,  -0.150625f, -0.225484f,
666   -0.191344f, -0.232535f, 0.126510f,  0.296323f,  -0.547901f, -0.653080f,
667   0.358514f,  0.726289f,  -0.421725f, -0.243620f, 0.236206f,  0.390823f,
668   -0.076560f, -0.282329f, -0.012460f, -0.428484f, 0.349469f,  0.394629f,
669   0.421537f,  0.219632f,  -0.117550f, -0.087894f, 0.077155f,  0.016000f,
670   -0.289137f, -0.092937f, -0.014518f, -0.027111f, 0.210329f,  -0.159678f,
671   0.013288f,  -0.039268f, 0.008112f,  0.003152f,  0.030084f,  -0.039859f,
672   0.322028f,  -0.407797f, 0.447087f,  -0.381562f, 0.529297f,  -0.520298f,
673   0.562865f,  -0.616878f, 0.689389f,  0.754262f,  0.138475f,  0.750697f,
674   -0.760157f, -0.383740f, 0.074219f,  0.556257f,  0.087827f,  -0.511826f,
675   -0.305507f, -0.638214f, 0.114833f,  -0.444022f, 0.526612f,  -0.604984f,
676   -0.100415f, 0.037824f,  -0.106264f, 0.337615f,  0.070743f,  0.031129f,
677   0.281954f,  0.176144f,  -0.032833f, -0.073902f, -0.285492f, -0.803803f,
678   -0.015589f, 0.186077f,  -0.033351f, 0.517269f,  -1.878800f, -1.685210f,
679   -0.416581f, 0.158476f,  -0.071929f, -0.624353f, -0.122069f, -0.075065f,
680   0.311816f,  0.506305f,  0.383896f,  0.259450f,  -0.308232f, -0.094221f,
681   -0.421885f, -0.293573f,
682 };
683 
684 static const float weights_layer_5[] = {
685   0.131894f,  0.078431f,  0.323121f,  -0.230680f, -0.684740f, 0.020895f,
686   0.364983f,  0.121656f,  0.132448f,  -0.731198f, 0.071148f,  0.739642f,
687   0.318437f,  -0.033021f, -1.037080f, 0.135335f,  0.383582f,  0.287332f,
688   0.054042f,  -0.825482f, 0.418533f,  0.305606f,  0.041549f,  0.432422f,
689   -0.826878f, -0.593536f, 0.105657f,  0.125357f,  0.408567f,  -0.293338f,
690   0.233905f,  -0.039609f, 0.547727f,  -0.435806f, 0.036160f,  0.220275f,
691   -0.020337f, -0.619403f, -0.455858f, 0.681455f,  0.543846f,  -0.495084f,
692   0.251496f,  -0.085686f, 0.091395f,  -0.476696f, 0.453628f,  -0.109663f,
693   0.383493f,  -0.456563f, -0.212935f, 0.020567f,  -0.719564f, -0.377813f,
694   -0.737511f, 0.765965f,  0.624309f,  -0.063679f, -0.055681f, -0.475969f,
695   -0.069902f, 0.725690f,  0.641094f,  0.439922f,  -0.111544f, -0.309061f,
696   0.280091f,  0.381416f,  0.481168f,  0.483543f,  -0.901267f, -0.499230f,
697   0.043449f,  -0.372395f, 0.021216f,  -0.002200f, -0.524089f, -0.071485f,
698   -0.273974f, -0.462654f, 0.042369f,  -0.138679f, -0.330060f, 0.021886f,
699   -0.306075f, -0.011130f, -0.260224f, -0.288435f, -0.104039f, -0.183563f,
700   0.118990f,  -0.531160f, 0.339632f,  -0.028374f, 0.159084f,  -0.008824f,
701   -0.791388f, 0.245242f,  0.356510f,  0.469867f,  -0.396949f, -0.476146f,
702   -0.168472f, 1.068400f,  0.474629f,  -0.117554f, -0.142453f, -0.306604f,
703   0.348525f,  -0.111929f, -0.435384f, 0.019952f,  -0.260185f, 0.373376f,
704   0.109729f,  -0.639168f, 0.033392f,  -0.082573f, -0.196018f, 0.301637f,
705   -0.124210f, -0.202515f, -1.221920f, -0.253690f, -0.144864f, 0.287753f,
706   -0.161206f, -0.213246f, 0.373968f,  0.141397f,  -0.248237f, 0.283090f,
707   -0.008977f, -0.172960f, -0.234146f, -0.720014f, -0.322451f, 0.181083f,
708   0.310659f,  -0.422646f, -0.719994f, -0.354339f, 0.352739f,  0.230923f,
709   0.427013f,  -0.660316f, 0.232140f,  0.685896f,  0.660208f,  0.225748f,
710   -0.918750f, -0.650790f, -0.674525f, -0.450305f, -0.152529f, 0.498480f,
711   0.895092f,  0.688242f,  0.669057f,  0.612669f,  0.593484f,  0.318204f,
712   -0.169294f, 0.388789f,  -0.529777f, -0.219706f, -0.044916f, 0.161697f,
713   -0.145288f, 0.196153f,  -0.022212f, -0.434209f, -0.208115f, -0.117745f,
714   -0.279029f, -0.009506f, 0.137474f,  0.330148f,  0.439258f,  0.345879f,
715   -0.845131f, -0.215713f, 0.094463f,  0.638604f,  0.882254f,  -0.964082f,
716   -0.383920f, 0.292645f,  0.266341f,  0.747473f,  -0.645631f, -0.538896f,
717   -0.319764f, 0.521880f,  0.460091f,  -0.470898f, -0.778283f, -0.061622f,
718   -0.142433f, 0.210520f,  0.804197f,  0.285840f,  -0.138414f, -0.381846f,
719   -0.499991f, 0.223648f,  0.439025f,  0.321508f,  -0.099560f, -0.622893f,
720   0.750925f,  0.740994f,  0.140405f,  0.074631f,  -0.270223f, -0.829049f,
721   -0.753355f, -0.258015f, 0.006285f,  -0.730573f, -1.107390f, -0.538015f,
722   -1.005520f, -0.724115f, -0.440183f, -0.395239f, 0.508768f,  0.204620f,
723   -0.267331f, 0.001740f,  -0.838709f, 0.659333f,  0.043739f,  -0.024099f,
724   0.262431f,  0.252433f,  -0.265215f, 0.057289f,  -0.428192f, -0.114350f,
725   -0.011475f, 0.463995f,  0.668833f,  -0.604556f, -0.122780f, -0.441645f,
726   0.145769f,  0.310450f,  -1.003500f, 0.936069f,  0.516604f,  -0.643386f,
727   -0.518571f, 0.306130f,  0.337387f,  0.583400f,  -0.366025f, -0.560035f,
728   -0.262332f, 0.465242f,  0.964332f,  -0.545410f, -0.637428f, -0.202695f,
729   0.378931f,  0.834604f,  0.000970f,  -0.553303f, -0.562879f, 0.221665f,
730   0.395160f,  0.446281f,  -0.184394f, -0.591780f, 0.170595f,  1.164390f,
731   0.227068f,  -0.150910f, -0.393690f, -0.131151f, 0.309956f,  -0.413518f,
732   -0.768334f, -0.548975f, 0.245384f,  -0.256904f, -0.514790f, -0.102616f,
733   -0.347625f, 0.420456f,  0.037804f,  -0.283200f, -0.578815f, 0.319282f,
734   0.674622f,  -0.011791f, -0.339329f, 0.466705f,  0.563444f,  0.409660f,
735   0.445784f,  -0.899507f, -0.605116f, 0.622438f,  0.427385f,  -0.062509f,
736   0.666570f,  0.057105f,  0.357894f,  -0.811016f, -0.421715f, -0.458397f,
737   0.288955f,  0.005857f,  0.236331f,  0.107957f,  0.587276f,  -0.375800f,
738   0.323799f,  -0.623363f, 0.254122f,  -0.198478f, -0.098436f, -0.282531f,
739   0.452453f,  -0.163349f, -0.413382f, -0.448732f, -0.528770f, -0.457449f,
740   -0.619619f, -0.265919f, -0.042760f, 0.438730f,  0.501798f,  -0.403851f,
741   0.519564f,  0.817314f,  0.366203f,  0.492610f,  0.546929f,  0.853094f,
742   0.289000f,  0.453941f,  -0.076152f, 0.007226f,  -0.183717f, -0.506252f,
743   -0.599989f, -0.576006f, 0.746488f,  0.631466f,  -0.475599f, -0.334991f,
744   -0.879614f, 0.918957f,  0.473471f,  -0.043781f, -0.688234f, -0.925875f,
745   -0.188081f, 0.050918f,  0.116855f,  0.221413f,  -0.066680f, -0.674395f,
746   -0.481985f, 0.247368f,  0.271129f,  0.637979f,  -1.006970f, -0.855441f,
747   0.144874f,  0.507424f,  1.506960f,  -0.338910f, 0.398203f,  0.738000f,
748   0.263193f,  -0.425908f, 0.358271f,  -1.072900f, -0.816209f, -0.425519f,
749   0.264373f,  0.694014f,  0.036333f,  0.635532f,  0.518856f,  0.047585f,
750   -0.854817f, -0.138202f, 0.006811f,  -0.052020f, -0.468498f, 0.489080f,
751   -0.105778f, 0.357038f,  -0.782875f, 0.649049f,  -0.562652f, -0.544392f,
752   -0.328526f, -0.402121f, -0.263172f, -0.668459f, -0.526702f, -0.395829f,
753   0.190986f,  0.307766f,  -1.001830f, -0.293051f, 0.283334f,  0.572450f,
754   0.906095f,  -1.144300f, 0.180989f,  0.421092f,  0.684571f,  0.527276f,
755   -0.122287f, 0.575067f,  0.675221f,  0.755029f,  0.094957f,  0.481403f,
756   0.825155f,  0.755035f,  0.641420f,  0.034497f,  0.518783f,  0.283800f,
757   0.293733f,  -0.074778f, -0.268720f, 0.798921f,  0.317714f,  -0.236391f,
758   -0.375071f, -0.414600f, 0.223413f,  -0.349044f, -0.191033f, -0.391779f,
759   -0.596894f, -0.378608f, -0.185920f, -0.822171f, -0.754962f, -0.167706f,
760   0.755378f,  0.671847f,  0.969414f,  0.793048f,  1.078610f,  -0.418963f,
761   0.367648f,  0.217645f,  0.294232f,  0.113027f,  0.060312f,  -0.327488f,
762   -0.305035f, -0.243600f, -0.020588f, -0.326324f, -0.417534f, -0.425868f,
763   -0.404614f, -0.346750f, -0.339145f, -0.348094f, -0.527290f, -0.617825f,
764   -0.258342f, -0.200753f, -0.249779f, -0.321039f, -0.023117f, -0.004167f,
765   -0.206788f, -0.612420f, -0.646428f, -0.548969f, -0.158875f, 0.213814f,
766   -0.084040f, -0.217365f, -0.511895f, -0.653285f, 0.440971f,  0.455591f,
767   -0.123900f, 0.134097f,  -0.251241f, 0.682463f,  0.740614f,  0.991212f,
768   0.565984f,  0.592690f,
769 };
770 
add_f32x4_x4(const float32x4_t a[4])771 static inline float32x4_t add_f32x4_x4(const float32x4_t a[4]) {
772   float32x4_t sum01 = vaddq_f32(a[0], a[1]);
773   float32x4_t sum23 = vaddq_f32(a[2], a[3]);
774   return vaddq_f32(sum01, sum23);
775 }
776 
av1_cnn_convolve_no_maxpool_padding_valid_2x2_large_neon(const float ** input,int in_width,int in_height,int in_stride,const float * bias,const int skip_width,const int skip_height,const int filter_width,const int filter_height,const int in_channels,const int out_channels,float ** output,int out_stride,int start_idx,const float * weights)777 static inline void av1_cnn_convolve_no_maxpool_padding_valid_2x2_large_neon(
778     const float **input, int in_width, int in_height, int in_stride,
779     const float *bias, const int skip_width, const int skip_height,
780     const int filter_width, const int filter_height, const int in_channels,
781     const int out_channels, float **output, int out_stride, int start_idx,
782     const float *weights) {
783   assert(filter_height == 2 && filter_width == 2);
784   assert(skip_width == 2 && skip_height == 2);
785   assert(in_width >= 16);
786   const int in_size = in_height * in_width;
787 
788   do {
789     const float32x4_t bias_v = vdupq_n_f32(bias[0]);
790     const float *weight_ptr0 = weights;
791     const float *in_ptr0 = *input;
792     float *out_ptr0 = *output;
793     int h = 0;
794 
795     do {
796       const float *in_ptr1 = in_ptr0;
797       float *out_ptr1 = out_ptr0;
798       int w = 0;
799 
800       do {
801         const float *weight_ptr1 = weight_ptr0;
802         const float *in_ptr2 = in_ptr1;
803         int k = 0;
804         float32x4_t sum0[4] = { bias_v, vdupq_n_f32(0), vdupq_n_f32(0),
805                                 vdupq_n_f32(0) };
806         float32x4_t sum1[4] = { bias_v, vdupq_n_f32(0), vdupq_n_f32(0),
807                                 vdupq_n_f32(0) };
808 
809         do {
810           const float32x4_t weights0 = vld1q_f32(weight_ptr1);
811           const float32x4_t weights1 = vld1q_f32(weight_ptr1 + 4);
812           const float32x2_t weights0_lo = vget_low_f32(weights0);
813           const float32x2_t weights0_hi = vget_high_f32(weights0);
814           const float32x2_t weights1_lo = vget_low_f32(weights1);
815           const float32x2_t weights1_hi = vget_high_f32(weights1);
816 
817           const float32x4x2_t in0_lo_0 = vld2q_f32(in_ptr2);
818           const float32x4x2_t in0_hi_0 = vld2q_f32(in_ptr2 + in_stride);
819           const float32x4x2_t in1_lo_0 = vld2q_f32(in_ptr2 + in_size);
820           const float32x4x2_t in1_hi_0 =
821               vld2q_f32(in_ptr2 + in_size + in_stride);
822 
823           sum0[0] = vmlaq_lane_f32(sum0[0], in0_lo_0.val[0], weights0_lo, 0);
824           sum0[0] = vmlaq_lane_f32(sum0[0], in0_lo_0.val[1], weights0_lo, 1);
825 
826           sum0[1] = vmlaq_lane_f32(sum0[1], in0_hi_0.val[0], weights0_hi, 0);
827           sum0[1] = vmlaq_lane_f32(sum0[1], in0_hi_0.val[1], weights0_hi, 1);
828 
829           sum0[2] = vmlaq_lane_f32(sum0[2], in1_lo_0.val[0], weights1_lo, 0);
830           sum0[2] = vmlaq_lane_f32(sum0[2], in1_lo_0.val[1], weights1_lo, 1);
831 
832           sum0[3] = vmlaq_lane_f32(sum0[3], in1_hi_0.val[0], weights1_hi, 0);
833           sum0[3] = vmlaq_lane_f32(sum0[3], in1_hi_0.val[1], weights1_hi, 1);
834 
835           const float32x4x2_t in0_lo_1 = vld2q_f32(in_ptr2 + 8);
836           const float32x4x2_t in0_hi_1 = vld2q_f32(in_ptr2 + in_stride + 8);
837           const float32x4x2_t in1_lo_1 = vld2q_f32(in_ptr2 + in_size + 8);
838           const float32x4x2_t in1_hi_1 =
839               vld2q_f32(in_ptr2 + in_size + in_stride + 8);
840 
841           sum1[0] = vmlaq_lane_f32(sum1[0], in0_lo_1.val[0], weights0_lo, 0);
842           sum1[0] = vmlaq_lane_f32(sum1[0], in0_lo_1.val[1], weights0_lo, 1);
843 
844           sum1[1] = vmlaq_lane_f32(sum1[1], in0_hi_1.val[0], weights0_hi, 0);
845           sum1[1] = vmlaq_lane_f32(sum1[1], in0_hi_1.val[1], weights0_hi, 1);
846 
847           sum1[2] = vmlaq_lane_f32(sum1[2], in1_lo_1.val[0], weights1_lo, 0);
848           sum1[2] = vmlaq_lane_f32(sum1[2], in1_lo_1.val[1], weights1_lo, 1);
849 
850           sum1[3] = vmlaq_lane_f32(sum1[3], in1_hi_1.val[0], weights1_hi, 0);
851           sum1[3] = vmlaq_lane_f32(sum1[3], in1_hi_1.val[1], weights1_hi, 1);
852 
853           weight_ptr1 += 8;
854           in_ptr2 += 2 * in_size;
855           k += 2;
856         } while (k < in_channels);
857 
858         vst1q_f32(out_ptr1, add_f32x4_x4(sum0));
859         vst1q_f32(out_ptr1 + 4, add_f32x4_x4(sum1));
860 
861         out_ptr1 += 8;
862         in_ptr1 += 8 * skip_width;
863         w += 8 * skip_width;
864       } while (w < in_width - filter_width + 1);
865 
866       out_ptr0 += out_stride;
867       in_ptr0 += skip_height * in_stride;
868       h += skip_height;
869     } while (h < in_height - filter_height + 1);
870 
871     ++bias;
872     ++output;
873     weights += in_channels * filter_height * filter_width;
874   } while (++start_idx < out_channels);
875 }
876 
av1_cnn_convolve_no_maxpool_padding_valid_2x2_neon(const float ** input,int in_width,int in_height,int in_stride,const float * bias,const int skip_width,const int skip_height,const int filter_width,const int filter_height,const int in_channels,const int out_channels,float ** output,int out_stride,int start_idx,const float * weights)877 static inline void av1_cnn_convolve_no_maxpool_padding_valid_2x2_neon(
878     const float **input, int in_width, int in_height, int in_stride,
879     const float *bias, const int skip_width, const int skip_height,
880     const int filter_width, const int filter_height, const int in_channels,
881     const int out_channels, float **output, int out_stride, int start_idx,
882     const float *weights) {
883   assert(filter_height == 2 && filter_width == 2);
884   assert(skip_width == 2 && skip_height == 2);
885   assert(in_width == 8);
886   const int in_size = in_height * in_width;
887   do {
888     const float32x4_t bias_v = vdupq_n_f32(*bias);
889     const float *weight_ptr0 = weights;
890     const float *in_ptr0 = *input;
891     float *out_ptr0 = *output;
892     int h = 0;
893 
894     do {
895       const float *in_ptr1 = in_ptr0;
896       float *out_ptr1 = out_ptr0;
897       int w = 0;
898 
899       do {
900         const float *weight_ptr1 = weight_ptr0;
901         const float *in_ptr2 = in_ptr1;
902         int k = 0;
903         float32x4_t sum[4] = { bias_v, vdupq_n_f32(0), vdupq_n_f32(0),
904                                vdupq_n_f32(0) };
905 
906         do {
907           const float32x4_t weights0 = vld1q_f32(weight_ptr1);
908           const float32x4_t weights1 = vld1q_f32(weight_ptr1 + 4);
909           const float32x2_t weights0_lo = vget_low_f32(weights0);
910           const float32x2_t weights0_hi = vget_high_f32(weights0);
911           const float32x2_t weights1_lo = vget_low_f32(weights1);
912           const float32x2_t weights1_hi = vget_high_f32(weights1);
913 
914           const float32x4x2_t in0_lo = vld2q_f32(in_ptr2);
915           const float32x4x2_t in0_hi = vld2q_f32(in_ptr2 + in_stride);
916           const float32x4x2_t in1_lo = vld2q_f32(in_ptr2 + in_size);
917           const float32x4x2_t in1_hi = vld2q_f32(in_ptr2 + in_size + in_stride);
918 
919           sum[0] = vmlaq_lane_f32(sum[0], in0_lo.val[0], weights0_lo, 0);
920           sum[0] = vmlaq_lane_f32(sum[0], in0_lo.val[1], weights0_lo, 1);
921 
922           sum[1] = vmlaq_lane_f32(sum[1], in0_hi.val[0], weights0_hi, 0);
923           sum[1] = vmlaq_lane_f32(sum[1], in0_hi.val[1], weights0_hi, 1);
924 
925           sum[2] = vmlaq_lane_f32(sum[2], in1_lo.val[0], weights1_lo, 0);
926           sum[2] = vmlaq_lane_f32(sum[2], in1_lo.val[1], weights1_lo, 1);
927 
928           sum[3] = vmlaq_lane_f32(sum[3], in1_hi.val[0], weights1_hi, 0);
929           sum[3] = vmlaq_lane_f32(sum[3], in1_hi.val[1], weights1_hi, 1);
930 
931           weight_ptr1 += 8;
932           in_ptr2 += 2 * in_size;
933           k += 2;
934         } while (k < in_channels);
935 
936         vst1q_f32(out_ptr1, add_f32x4_x4(sum));
937 
938         out_ptr1 += 4;
939         in_ptr1 += 4 * skip_width;
940         w += 4 * skip_width;
941       } while (w < in_width - filter_width + 1);
942 
943       out_ptr0 += out_stride;
944       in_ptr0 += skip_height * in_stride;
945       h += skip_height;
946     } while (h < in_height - filter_height + 1);
947 
948     ++bias;
949     ++output;
950     weights += in_channels * filter_height * filter_width;
951   } while (++start_idx < out_channels);
952 }
953 
av1_cnn_convolve_no_maxpool_padding_valid_5x5_neon(const float ** input,int in_width,int in_height,int in_stride,const float * bias,const int skip_width,const int skip_height,const int filter_width,const int filter_height,const int in_channels,const int out_channels,float ** output,int out_stride,int start_idx,const float * weights)954 static inline void av1_cnn_convolve_no_maxpool_padding_valid_5x5_neon(
955     const float **input, int in_width, int in_height, int in_stride,
956     const float *bias, const int skip_width, const int skip_height,
957     const int filter_width, const int filter_height, const int in_channels,
958     const int out_channels, float **output, int out_stride, int start_idx,
959     const float *weights) {
960   assert(filter_height == 5 && filter_width == 5);
961   assert(skip_width == 4 && skip_height == 4);
962   assert(in_width >= 16);
963   assert(in_channels == 1);
964   (void)in_channels;
965 
966   do {
967     const float32x4_t bias_v = vdupq_n_f32(*bias);
968     const float *in_ptr0 = *input;
969     const float *weights_ptr0 = weights;
970     float *out_ptr0 = *output;
971     int h = 0;
972 
973     do {
974       const float *in_ptr1 = in_ptr0;
975       float *out_ptr1 = out_ptr0;
976       int w = 0;
977 
978       do {
979         float32x4_t sum[2] = { bias_v, vdupq_n_f32(0) };
980 
981         const float32x4_t weight_0_3 = vld1q_f32(weights_ptr0);
982         const float32x4_t weight_4_7 = vld1q_f32(weights_ptr0 + 4);
983         const float32x4_t weight_8_11 = vld1q_f32(weights_ptr0 + 8);
984         const float32x4_t weight_12_15 = vld1q_f32(weights_ptr0 + 12);
985         const float32x4_t weight_16_19 = vld1q_f32(weights_ptr0 + 16);
986         const float32x4_t weight_20_23 = vld1q_f32(weights_ptr0 + 20);
987 
988         const float32x2_t weight_0_3_lo = vget_low_f32(weight_0_3);
989         const float32x2_t weight_0_3_hi = vget_high_f32(weight_0_3);
990         const float32x2_t weight_4_7_lo = vget_low_f32(weight_4_7);
991         const float32x2_t weight_4_7_hi = vget_high_f32(weight_4_7);
992         const float32x2_t weight_8_11_lo = vget_low_f32(weight_8_11);
993         const float32x2_t weight_8_11_hi = vget_high_f32(weight_8_11);
994         const float32x2_t weight_12_15_lo = vget_low_f32(weight_12_15);
995         const float32x2_t weight_12_15_hi = vget_high_f32(weight_12_15);
996         const float32x2_t weight_16_19_lo = vget_low_f32(weight_16_19);
997         const float32x2_t weight_16_19_hi = vget_high_f32(weight_16_19);
998         const float32x2_t weight_20_23_lo = vget_low_f32(weight_20_23);
999         const float32x2_t weight_20_23_hi = vget_high_f32(weight_20_23);
1000 
1001         const float32x4x4_t in0 = vld4q_f32(in_ptr1 + 0 * in_stride);
1002         const float32x4x4_t in1 = vld4q_f32(in_ptr1 + 1 * in_stride);
1003         const float32x4x4_t in2 = vld4q_f32(in_ptr1 + 2 * in_stride);
1004         const float32x4x4_t in3 = vld4q_f32(in_ptr1 + 3 * in_stride);
1005         const float32x4x4_t in4 = vld4q_f32(in_ptr1 + 4 * in_stride);
1006 
1007         const float32x4_t in0_4 = vextq_f32(
1008             in0.val[0], vdupq_n_f32(*(in_ptr1 + 16 + 0 * in_stride)), 1);
1009         const float32x4_t in1_4 = vextq_f32(
1010             in1.val[0], vdupq_n_f32(*(in_ptr1 + 16 + 1 * in_stride)), 1);
1011         const float32x4_t in2_4 = vextq_f32(
1012             in2.val[0], vdupq_n_f32(*(in_ptr1 + 16 + 2 * in_stride)), 1);
1013         const float32x4_t in3_4 = vextq_f32(
1014             in3.val[0], vdupq_n_f32(*(in_ptr1 + 16 + 3 * in_stride)), 1);
1015         const float32x4_t in4_4 = vextq_f32(
1016             in4.val[0], vdupq_n_f32(*(in_ptr1 + 16 + 4 * in_stride)), 1);
1017 
1018         // Kernel row 0.
1019         sum[0] = vmlaq_lane_f32(sum[0], in0.val[0], weight_0_3_lo, 0);
1020         sum[1] = vmlaq_lane_f32(sum[1], in0.val[1], weight_0_3_lo, 1);
1021         sum[0] = vmlaq_lane_f32(sum[0], in0.val[2], weight_0_3_hi, 0);
1022         sum[1] = vmlaq_lane_f32(sum[1], in0.val[3], weight_0_3_hi, 1);
1023         sum[0] = vmlaq_lane_f32(sum[0], in0_4, weight_4_7_lo, 0);
1024 
1025         // Kernel row 1.
1026         sum[1] = vmlaq_lane_f32(sum[1], in1.val[0], weight_4_7_lo, 1);
1027         sum[0] = vmlaq_lane_f32(sum[0], in1.val[1], weight_4_7_hi, 0);
1028         sum[1] = vmlaq_lane_f32(sum[1], in1.val[2], weight_4_7_hi, 1);
1029         sum[0] = vmlaq_lane_f32(sum[0], in1.val[3], weight_8_11_lo, 0);
1030         sum[1] = vmlaq_lane_f32(sum[1], in1_4, weight_8_11_lo, 1);
1031 
1032         // Kernel row 2.
1033         sum[0] = vmlaq_lane_f32(sum[0], in2.val[0], weight_8_11_hi, 0);
1034         sum[1] = vmlaq_lane_f32(sum[1], in2.val[1], weight_8_11_hi, 1);
1035         sum[0] = vmlaq_lane_f32(sum[0], in2.val[2], weight_12_15_lo, 0);
1036         sum[1] = vmlaq_lane_f32(sum[1], in2.val[3], weight_12_15_lo, 1);
1037         sum[0] = vmlaq_lane_f32(sum[0], in2_4, weight_12_15_hi, 0);
1038 
1039         // Kernel row 3.
1040         sum[1] = vmlaq_lane_f32(sum[1], in3.val[0], weight_12_15_hi, 1);
1041         sum[0] = vmlaq_lane_f32(sum[0], in3.val[1], weight_16_19_lo, 0);
1042         sum[1] = vmlaq_lane_f32(sum[1], in3.val[2], weight_16_19_lo, 1);
1043         sum[0] = vmlaq_lane_f32(sum[0], in3.val[3], weight_16_19_hi, 0);
1044         sum[1] = vmlaq_lane_f32(sum[1], in3_4, weight_16_19_hi, 1);
1045 
1046         // Kernel row 4.
1047         sum[0] = vmlaq_lane_f32(sum[0], in4.val[0], weight_20_23_lo, 0);
1048         sum[1] = vmlaq_lane_f32(sum[1], in4.val[1], weight_20_23_lo, 1);
1049         sum[0] = vmlaq_lane_f32(sum[0], in4.val[2], weight_20_23_hi, 0);
1050         sum[1] = vmlaq_lane_f32(sum[1], in4.val[3], weight_20_23_hi, 1);
1051         sum[0] = vmlaq_f32(sum[0], vdupq_n_f32(*(weights_ptr0 + 24)), in4_4);
1052 
1053         vst1q_f32(out_ptr1, vaddq_f32(sum[0], sum[1]));
1054 
1055         out_ptr1 += 4;
1056         in_ptr1 += 4 * skip_width;
1057         w += 4 * skip_width;
1058       } while (w < in_width - filter_width + 1);
1059 
1060       out_ptr0 += out_stride;
1061       in_ptr0 += skip_height * in_stride;
1062       h += skip_height;
1063     } while (h < in_height - filter_height + 1);
1064 
1065     ++output;
1066     ++bias;
1067     weights += 25;
1068   } while (++start_idx < out_channels);
1069 }
1070 
1071 // Neon variant of av1_cnn_convolve_no_maxpool_padding_valid_c().
1072 // As per the current encoder, av1_cnn_convolve function gets called for
1073 // block size equal to 64x64. av1_cnn_convolve() uses layer config values
1074 // set by av1_intra_mode_cnn_partition_cnn_config. The following are a few
1075 // details related to each layer's config parameters.
1076 // Layer_Number in_size out_size filter_wd filter_ht skip_wd skip_ht
1077 //     0         64x64    16x16      5         5         4       4
1078 //     1         16x16    8x8        2         2         2       2
1079 //     2         8x8      4x4        2         2         2       2
1080 //     3         4x4      2x2        2         2         2       2
1081 //     4         2x2      1x1        2         2         2       2
1082 // Here,
1083 // filter_wd = filter_width and filter_ht = filter_height,
1084 // skip_wd = skip_width and skip_ht = skip_height.
av1_cnn_convolve_no_maxpool_padding_valid_neon(const float ** input,int in_width,int in_height,int in_stride,const CNN_LAYER_CONFIG * layer_config,float ** output,int out_stride,int start_idx,int cstep,int channel_step)1085 void av1_cnn_convolve_no_maxpool_padding_valid_neon(
1086     const float **input, int in_width, int in_height, int in_stride,
1087     const CNN_LAYER_CONFIG *layer_config, float **output, int out_stride,
1088     int start_idx, int cstep, int channel_step) {
1089   assert((layer_config->skip_height == 1 && layer_config->skip_width == 1) ||
1090          !layer_config->maxpool);
1091   assert(layer_config->filter_height > 1 || layer_config->filter_width > 1);
1092   assert(layer_config->pad == PADDING_VALID);
1093   assert(channel_step == 1);
1094   assert(cstep == layer_config->in_channels * layer_config->out_channels);
1095 
1096   if (layer_config->filter_width == 5 && layer_config->filter_height == 5 &&
1097       layer_config->skip_width == 4 && layer_config->skip_height == 4) {
1098     av1_cnn_convolve_no_maxpool_padding_valid_5x5_neon(
1099         input, in_width, in_height, in_stride, layer_config->bias,
1100         layer_config->skip_width, layer_config->skip_height,
1101         layer_config->filter_width, layer_config->filter_height,
1102         layer_config->in_channels, layer_config->out_channels, output,
1103         out_stride, start_idx, weights_layer_5);
1104   } else if (layer_config->filter_width == 2 &&
1105              layer_config->filter_height == 2 &&
1106              layer_config->skip_width == 2 && layer_config->skip_height == 2) {
1107     const float *weights = weights_layer_1;
1108     if (layer_config->output_num ==
1109         av1_intra_mode_cnn_partition_cnn_config.layer_config[2].output_num) {
1110       weights = weights_layer_2;
1111     } else if ((layer_config->output_num ==
1112                 av1_intra_mode_cnn_partition_cnn_config.layer_config[3]
1113                     .output_num)) {
1114       weights = weights_layer_3;
1115     } else if ((layer_config->output_num ==
1116                 av1_intra_mode_cnn_partition_cnn_config.layer_config[4]
1117                     .output_num)) {
1118       weights = weights_layer_4;
1119     }
1120     if (in_width >= 16) {
1121       av1_cnn_convolve_no_maxpool_padding_valid_2x2_large_neon(
1122           input, in_width, in_height, in_stride, layer_config->bias,
1123           layer_config->skip_width, layer_config->skip_height,
1124           layer_config->filter_width, layer_config->filter_height,
1125           layer_config->in_channels, layer_config->out_channels, output,
1126           out_stride, start_idx, weights);
1127     } else if (in_width == 8) {
1128       av1_cnn_convolve_no_maxpool_padding_valid_2x2_neon(
1129           input, in_width, in_height, in_stride, layer_config->bias,
1130           layer_config->skip_width, layer_config->skip_height,
1131           layer_config->filter_width, layer_config->filter_height,
1132           layer_config->in_channels, layer_config->out_channels, output,
1133           out_stride, start_idx, weights);
1134     } else {
1135       av1_cnn_convolve_no_maxpool_padding_valid_c(
1136           input, in_width, in_height, in_stride, layer_config, output,
1137           out_stride, start_idx, cstep, channel_step);
1138     }
1139   } else {
1140     av1_cnn_convolve_no_maxpool_padding_valid_c(
1141         input, in_width, in_height, in_stride, layer_config, output, out_stride,
1142         start_idx, cstep, channel_step);
1143   }
1144 }
1145