1 /* ----------------------------------------------------------------------
2 * Copyright (C) 2010-2018 Arm Limited. All rights reserved.
3 *
4 *
5 * Project: CMSIS NN Library
6 * Title: arm_nnexamples_nn_test.cpp
7 *
8 * Description: Example code for NN kernel testing.
9 *
10 * Target Processor: Cortex-M cores
11 *
12 * Redistribution and use in source and binary forms, with or without
13 * modification, are permitted provided that the following conditions
14 * are met:
15 * - Redistributions of source code must retain the above copyright
16 * notice, this list of conditions and the following disclaimer.
17 * - Redistributions in binary form must reproduce the above copyright
18 * notice, this list of conditions and the following disclaimer in
19 * the documentation and/or other materials provided with the
20 * distribution.
21 * - Neither the name of ARM LIMITED nor the names of its contributors
22 * may be used to endorse or promote products derived from this
23 * software without specific prior written permission.
24 *
25 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
26 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
27 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
28 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
29 * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
30 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
31 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
32 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
33 * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
34 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
35 * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
36 * POSSIBILITY OF SUCH DAMAGE.
37 * -------------------------------------------------------------------- */
38
39 #include "arm_nnexamples_nn_test.h"
40
41 //#define TEST_SIGMOID
42 //#define TEST_TANH
43 #define TEST_POOL
44 #define TEST_RELU
45 #define TEST_IP
46 #define TEST_CONV
47 #define TEST_NONSQUARE
48 #define TEST_NNMULT
49
50 int test_index = 0;
51 q7_t test_flags[50];
52 bool test_pass;
53
main()54 int main()
55 {
56 printf("start tests\n");
57
58 srand(1);
59
60 // common pointers for testing data
61 q7_t *test1;
62 q15_t *test2;
63 q7_t *test3;
64 q15_t *test4;
65
66 for (test_index = 0; test_index<50; test_index++) {
67 test_flags[test_index] = -1;
68 }
69 test_index = 0;
70
71 #ifdef TEST_NNMULT
72 #define NNMULT_DIM 128
73 test1 = new q7_t[NNMULT_DIM*2];
74 test2 = new q15_t[NNMULT_DIM*2];
75 test3 = new q7_t[NNMULT_DIM*2];
76 test4 = new q15_t[NNMULT_DIM*2];
77
78 q7_t * mult_out_q7 = test3;
79 q7_t * mult_ref_q7 = test3 + NNMULT_DIM;
80 q15_t * mult_out_q15 = test4;
81 q15_t * mult_ref_q15 = test4 + NNMULT_DIM;
82
83 for (int i=0;i<NNMULT_DIM*2;i++) {
84 test1[i] = (rand() % 256 - 128);
85 test2[i] = (rand() % 65536 - 32768);
86 }
87
88 // Test q7
89 arm_nn_mult_q7(test1, test1+NNMULT_DIM, mult_out_q7, 5, NNMULT_DIM);
90
91 arm_nn_mult_q7_ref(test1, test1+NNMULT_DIM, mult_ref_q7, 5, NNMULT_DIM);
92
93 verify_results_q7(mult_out_q7, mult_ref_q7, NNMULT_DIM);
94
95 arm_nn_mult_q7(test1, test1+NNMULT_DIM, mult_out_q7, 9, NNMULT_DIM);
96
97 arm_nn_mult_q7_ref(test1, test1+NNMULT_DIM, mult_ref_q7, 9, NNMULT_DIM);
98
99 verify_results_q7(mult_out_q7, mult_ref_q7, NNMULT_DIM);
100
101 // Test q15
102 arm_nn_mult_q15(test2, test2+NNMULT_DIM, mult_out_q15, 13, NNMULT_DIM);
103
104 arm_nn_mult_q15_ref(test2, test2+NNMULT_DIM, mult_ref_q15, 13, NNMULT_DIM);
105
106 verify_results_q15(mult_out_q15, mult_ref_q15, NNMULT_DIM);
107
108 arm_nn_mult_q15(test2, test2+NNMULT_DIM, mult_out_q15, 18, NNMULT_DIM);
109
110 arm_nn_mult_q15_ref(test2, test2+NNMULT_DIM, mult_ref_q15, 18, NNMULT_DIM);
111
112 verify_results_q15(mult_out_q15, mult_ref_q15, NNMULT_DIM);
113
114 #endif
115
116 #ifdef TEST_SIGMOID
117
118 #define SIGMOID_DIM 128
119
120 /* This part tests the running of sigmoid functions */
121
122 test1 = new q7_t[SIGMOID_DIM];
123 test2 = new q15_t[SIGMOID_DIM];
124 test3 = new q7_t[SIGMOID_DIM];
125 test4 = new q15_t[SIGMOID_DIM];
126
127 srand(1);
128
129 for (int i = 0; i < SIGMOID_DIM; i++)
130 {
131 test1[i] = (rand() % 256 - 128);
132 test2[i] = (rand() % 65536 - 32768);
133 test3[i] = test1[i];
134 test4[i] = test2[i];
135 }
136
137 arm_nn_activations_direct_q7(test3, SIGMOID_DIM, 3, ARM_SIGMOID);
138
139 for (int i = 0; i < SIGMOID_DIM; i++)
140 {
141 printf("in: %d out: %d\n", test1[i], test3[i]);
142 }
143
144 printf("start testing q15_t sigmoid\n\n");
145
146 arm_nn_activations_direct_q15(test4, SIGMOID_DIM, 3, ARM_SIGMOID);
147
148 for (int i = 0; i < SIGMOID_DIM; i++)
149 {
150 printf("in: %d out: %d\n", test2[i], test4[i]);
151 }
152
153 delete[]test1;
154 delete[]test2;
155 delete[]test3;
156 delete[]test4;
157
158 #endif
159
160 #ifdef TEST_TANH
161
162 #define TANH_DIM 128
163
164 /* This part tests the running of sigmoid functions */
165
166 test1 = new q7_t[TANH_DIM];
167 test2 = new q15_t[TANH_DIM];
168 test3 = new q7_t[TANH_DIM];
169 test4 = new q15_t[TANH_DIM];
170
171 srand(1);
172
173 for (int i = 0; i < TANH_DIM; i++)
174 {
175 test1[i] = (rand() % 256 - 128);
176 test2[i] = (rand() % 65536 - 32768);
177 test3[i] = test1[i];
178 test4[i] = test2[i];
179 }
180
181 arm_nn_activations_direct_q7(test3, TANH_DIM, 3, ARM_TANH);
182
183 printf("start testing q7_t tanh\n\n");
184
185 for (int i = 0; i < TANH_DIM; i++)
186 {
187 printf("in: %d out: %d\n", test1[i], test3[i]);
188 }
189
190 printf("start testing q15_t tanh\n\n");
191
192 arm_nn_activations_direct_q15(test4, TANH_DIM, 3, ARM_TANH);
193
194 for (int i = 0; i < TANH_DIM; i++)
195 {
196 printf("in: %d out: %d\n", test2[i], test4[i]);
197 }
198
199 delete[]test1;
200 delete[]test2;
201 delete[]test3;
202 delete[]test4;
203
204 #endif
205
206 #ifdef TEST_POOL
207
208 #define POOL_IM_DIM 32
209 #define POOL_IM_CH 8
210
211 test1 = new q7_t[POOL_IM_DIM * POOL_IM_DIM * POOL_IM_CH * 2];
212 test2 = new q15_t[POOL_IM_DIM * POOL_IM_CH];
213 test3 = new q7_t[POOL_IM_DIM * POOL_IM_DIM * POOL_IM_CH];
214
215 for (int i = 0; i < POOL_IM_DIM * POOL_IM_DIM * POOL_IM_CH; i++)
216 {
217 test1[i] = (rand() % 256 - 128);
218 }
219
220 q7_t *img_in = test1 + POOL_IM_DIM * POOL_IM_DIM * POOL_IM_CH;
221 q7_t *pool_out_ref = test3;
222 q7_t *pool_out_opt = test3 + POOL_IM_DIM * POOL_IM_DIM * POOL_IM_CH / 2;
223
224 for (int i = 0; i < POOL_IM_DIM * POOL_IM_DIM * POOL_IM_CH; i++)
225 {
226 test3[i] = 0;
227 }
228
229 // copy over the img input
230 for (int i = 0; i < POOL_IM_DIM * POOL_IM_DIM * POOL_IM_CH; i++)
231 {
232 img_in[i] = test1[i];
233 }
234
235 initialize_results_q7(pool_out_ref, pool_out_opt, POOL_IM_DIM / 2 * POOL_IM_DIM / 2 * POOL_IM_CH);
236
237 printf("Start maxpool reference implementation\n");
238
239 arm_maxpool_q7_HWC_ref(img_in, POOL_IM_DIM, POOL_IM_CH, 3, 0, 2, POOL_IM_DIM / 2, (q7_t *) test2, pool_out_ref);
240
241 // copy over the img input
242 for (int i = 0; i < POOL_IM_DIM * POOL_IM_DIM * POOL_IM_CH; i++)
243 {
244 img_in[i] = test1[i];
245 }
246
247 printf("Start maxpool opt implementation\n");
248
249 arm_maxpool_q7_HWC(img_in, POOL_IM_DIM, POOL_IM_CH, 3, 0, 2, POOL_IM_DIM / 2, (q7_t *) test2, pool_out_opt);
250
251 verify_results_q7(pool_out_ref, pool_out_opt, POOL_IM_DIM / 2 * POOL_IM_DIM / 2 * POOL_IM_CH);
252
253 // copy over the img input
254 for (int i = 0; i < POOL_IM_DIM * POOL_IM_DIM * POOL_IM_CH; i++)
255 {
256 img_in[i] = test1[i];
257 }
258
259 // copy over the img input
260 for (int i = 0; i < POOL_IM_DIM * POOL_IM_DIM * POOL_IM_CH; i++)
261 {
262 img_in[i] = test1[i];
263 }
264
265 printf("Start avepool ref implementation\n");
266
267 arm_avepool_q7_HWC_ref(img_in, POOL_IM_DIM, POOL_IM_CH, 3, 0, 2, POOL_IM_DIM / 2, (q7_t *) test2, pool_out_ref);
268
269 // copy over the img input
270 for (int i = 0; i < POOL_IM_DIM * POOL_IM_DIM * POOL_IM_CH; i++)
271 {
272 img_in[i] = test1[i];
273 }
274
275 printf("Start avepool opt implementation\n");
276
277 arm_avepool_q7_HWC(img_in, POOL_IM_DIM, POOL_IM_CH, 3, 0, 2, POOL_IM_DIM / 2, (q7_t *) test2, pool_out_opt);
278
279 // special check here
280 bool if_ave_pool_match = true;
281 for (int i = 0; i < POOL_IM_DIM / 2 * POOL_IM_DIM / 2 * POOL_IM_CH; i++)
282 {
283 // we tolerate at most difference of 1 here because of rounding errors
284 if (pool_out_ref[i] - pool_out_opt[i] >= 2 || pool_out_opt[i] - pool_out_ref[i] >= 2)
285 {
286 printf("Output mismatch at %d, expected %d, actual %d\n", i, pool_out_ref[i], pool_out_opt[i]);
287 if_ave_pool_match = false;
288 }
289 }
290 if (if_ave_pool_match == true)
291 {
292 printf("Outputs match.\n");
293 }
294
295 delete[]test1;
296 delete[]test2;
297 delete[]test3;
298
299 #endif
300
301 #ifdef TEST_RELU
302
303 #define RELU_DIM 127
304
305 test1 = new q7_t[RELU_DIM];
306 test2 = new q15_t[RELU_DIM];
307 test3 = new q7_t[RELU_DIM];
308 test4 = new q15_t[RELU_DIM];
309
310 for (int i = 0; i < RELU_DIM; i++)
311 {
312 test1[i] = (rand() % 256 - 128);
313 test2[i] = (rand() % 65536 - 32768);
314 test3[i] = test1[i];
315 test4[i] = test2[i];
316 }
317
318 q7_t *relu_ref_data_q7 = test1;
319 q7_t *relu_opt_data_q7 = test3;
320 q15_t *relu_ref_data_q15 = test2;
321 q15_t *relu_opt_data_q15 = test4;
322
323 printf("Start ref relu q7 implementation\n");
324
325 arm_relu_q7_ref(relu_ref_data_q7, RELU_DIM);
326
327 printf("Start opt relu q7 implementation\n");
328
329 arm_relu_q7(relu_opt_data_q7, RELU_DIM);
330
331 verify_results_q7(relu_ref_data_q7, relu_opt_data_q7, RELU_DIM);
332
333 printf("Start ref relu q15 implementation\n");
334
335 arm_relu_q15_ref(relu_ref_data_q15, RELU_DIM);
336
337 printf("Start opt relu q15 implementation\n");
338
339 arm_relu_q15(relu_opt_data_q15, RELU_DIM);
340
341 verify_results_q15(relu_ref_data_q15, relu_opt_data_q15, RELU_DIM);
342
343 delete[]test1;
344 delete[]test2;
345 delete[]test3;
346 delete[]test4;
347
348 #endif
349
350 #ifdef TEST_IP
351
352 #define IP_ROW_DIM 127
353 #define IP_COL_DIM 127
354
355 q7_t ip_weights[IP_ROW_DIM * IP_COL_DIM] = IP2_WEIGHT;
356 q7_t ip_q7_opt_weights[IP_ROW_DIM * IP_COL_DIM] = IP4_WEIGHT;
357 q7_t ip_q7_q15_opt_weights[IP_ROW_DIM * IP_COL_DIM] = IP4_q7_q15_WEIGHT;
358 q15_t ip_q15_weights[IP_ROW_DIM * IP_COL_DIM] = IP2_WEIGHT;
359 q15_t ip_q15_opt_weights[IP_ROW_DIM * IP_COL_DIM] = IP4_WEIGHT_Q15;
360
361 test1 = new q7_t[IP_COL_DIM + IP_ROW_DIM];
362 test2 = new q15_t[IP_COL_DIM];
363 test3 = new q7_t[IP_ROW_DIM * 3];
364 test4 = new q15_t[IP_COL_DIM + IP_ROW_DIM * 2];
365
366 for (int i = 0; i < IP_ROW_DIM + IP_COL_DIM; i++)
367 {
368 test1[i] = rand() % 256 - 100;
369 }
370 for (int i = 0; i < IP_ROW_DIM * 3; i++)
371 {
372 test3[i] = 0;
373 }
374
375 q7_t *ip_bias_q7 = test1 + IP_COL_DIM;
376
377 q7_t *ip_out_q7_ref = test3;
378 q7_t *ip_out_q7_opt = test3 + IP_ROW_DIM;
379 q7_t *ip_out_q7_opt_fast = test3 + 2 * IP_ROW_DIM;
380 q15_t *ip_out_q15_ref = test4 + IP_COL_DIM;
381 q15_t *ip_out_q15_opt = test4 + IP_COL_DIM + IP_ROW_DIM;
382
383 initialize_results_q7(ip_out_q7_ref, ip_out_q7_opt, IP_ROW_DIM);
384 initialize_results_q7(ip_out_q7_ref, ip_out_q7_opt_fast, IP_ROW_DIM);
385 initialize_results_q7(ip_out_q7_ref, ip_out_q7_opt_fast, IP_ROW_DIM);
386
387 printf("Start ref q7 implementation\n");
388
389 arm_fully_connected_q7_ref(test1, ip_weights, IP_COL_DIM, IP_ROW_DIM, 1, 7, ip_bias_q7, ip_out_q7_ref, test2);
390
391 printf("Start q7 implementation\n");
392
393 arm_fully_connected_q7(test1, ip_weights, IP_COL_DIM, IP_ROW_DIM, 1, 7, ip_bias_q7, ip_out_q7_opt, test2);
394
395 verify_results_q7(ip_out_q7_ref, ip_out_q7_opt, IP_ROW_DIM);
396
397 printf("Start q7 ref opt implementation\n");
398
399 arm_fully_connected_q7_opt_ref(test1, ip_q7_opt_weights, IP_COL_DIM, IP_ROW_DIM, 1, 7, ip_bias_q7,
400 ip_out_q7_opt_fast, test2);
401
402 verify_results_q7(ip_out_q7_ref, ip_out_q7_opt_fast, IP_ROW_DIM);
403
404 printf("Start q7 opt implementation\n");
405
406 arm_fully_connected_q7_opt(test1, ip_q7_opt_weights, IP_COL_DIM, IP_ROW_DIM, 1, 7, ip_bias_q7, ip_out_q7_opt_fast,
407 test2);
408
409 verify_results_q7(ip_out_q7_ref, ip_out_q7_opt_fast, IP_ROW_DIM);
410
411 for (int i = 0; i < IP_ROW_DIM + IP_COL_DIM; i++)
412 {
413 test4[i] = (rand() % 65536 - 32768);
414 }
415
416 initialize_results_q15(ip_out_q15_ref, ip_out_q15_opt, IP_ROW_DIM);
417
418 printf("Start ref q15 implementation\n");
419
420 arm_fully_connected_q15_ref(test4, ip_q15_weights, IP_COL_DIM, IP_ROW_DIM, 1, 7, test2, ip_out_q15_ref, NULL);
421
422 printf("Start q15 implementation\n");
423
424 arm_fully_connected_q15(test4, ip_q15_weights, IP_COL_DIM, IP_ROW_DIM, 1, 7, test2, ip_out_q15_opt, NULL);
425
426 verify_results_q15(ip_out_q15_ref, ip_out_q15_opt, IP_ROW_DIM);
427
428 printf("Start ref opt q15 implementation\n");
429
430 arm_fully_connected_q15_opt_ref(test4, ip_q15_opt_weights, IP_COL_DIM, IP_ROW_DIM, 1, 7, test2, ip_out_q15_opt,
431 NULL);
432
433 verify_results_q15(ip_out_q15_ref, ip_out_q15_opt, IP_ROW_DIM);
434
435 printf("Start opt q15 implementation\n");
436
437 arm_fully_connected_q15_opt(test4, ip_q15_opt_weights, IP_COL_DIM, IP_ROW_DIM, 1, 7, test2, ip_out_q15_opt, NULL);
438
439 verify_results_q15(ip_out_q15_ref, ip_out_q15_opt, IP_ROW_DIM);
440
441 initialize_results_q15(ip_out_q15_ref, ip_out_q15_opt, IP_ROW_DIM);
442
443 printf("Start ref q7_q15 implementation\n");
444
445 arm_fully_connected_mat_q7_vec_q15_ref(test4, ip_weights, IP_COL_DIM, IP_ROW_DIM, 1, 7, ip_bias_q7, ip_out_q15_ref,
446 test2);
447
448 printf("Start q7_q15 implementation\n");
449
450 arm_fully_connected_mat_q7_vec_q15(test4, ip_weights, IP_COL_DIM, IP_ROW_DIM, 1, 7, ip_bias_q7, ip_out_q15_opt,
451 test2);
452
453 verify_results_q15(ip_out_q15_ref, ip_out_q15_opt, IP_ROW_DIM);
454
455 printf("Start ref opt q7_q15 implementation\n");
456
457 arm_fully_connected_mat_q7_vec_q15_opt_ref(test4, ip_q7_q15_opt_weights, IP_COL_DIM, IP_ROW_DIM, 1, 7, ip_bias_q7,
458 ip_out_q15_opt, test2);
459
460 verify_results_q15(ip_out_q15_ref, ip_out_q15_opt, IP_ROW_DIM);
461
462 printf("Start opt q7_q15 implementation\n");
463
464 arm_fully_connected_mat_q7_vec_q15_opt(test4, ip_q7_q15_opt_weights, IP_COL_DIM, IP_ROW_DIM, 1, 7, ip_bias_q7,
465 ip_out_q15_opt, test2);
466
467 verify_results_q15(ip_out_q15_ref, ip_out_q15_opt, IP_ROW_DIM);
468
469 delete[]test1;
470 delete[]test2;
471 delete[]test3;
472 delete[]test4;
473
474 #endif
475
476 #ifdef TEST_NONSQUARE
477
478 /* Use RCONV to differential with square CONV */
479
480 #define RCONV_IM_DIM_X 10
481 #define RCONV_IM_DIM_Y 8
482 #define RCONV_IM_CH 4
483 #define RCONV_KER_DIM_X 5
484 #define RCONV_KER_DIM_Y 3
485 #define RCONV_STRIDE_X 1
486 #define RCONV_STRIDE_Y 1
487 #define RCONV_PADDING_X 2
488 #define RCONV_PADDING_Y 1
489 #define RCONV_OUT_CH 4
490 #define RCONV_OUT_DIM_X 10
491 #define RCONV_OUT_DIM_Y 8
492
493 test1 = new q7_t[RCONV_KER_DIM_Y * RCONV_KER_DIM_X * RCONV_IM_CH * RCONV_OUT_CH + RCONV_OUT_CH];
494 test2 = new q15_t[2 * RCONV_KER_DIM_Y * RCONV_KER_DIM_X * RCONV_IM_CH];
495 test3 =
496 new q7_t[RCONV_IM_DIM_Y * RCONV_IM_DIM_X * RCONV_IM_CH + 2 * RCONV_OUT_DIM_Y * RCONV_OUT_DIM_X * RCONV_OUT_CH];
497
498 for (int i = 0; i < RCONV_KER_DIM_Y * RCONV_KER_DIM_X * RCONV_IM_CH * RCONV_OUT_CH + RCONV_OUT_CH; i++)
499 {
500 test1[i] = rand() % 256 - 100;
501 }
502
503 for (int i = 0;
504 i < RCONV_IM_DIM_Y * RCONV_IM_DIM_X * RCONV_IM_CH + 2 * RCONV_OUT_DIM_Y * RCONV_OUT_DIM_X * RCONV_OUT_CH; i++)
505 {
506 test3[i] = rand() % 256 - 100;
507 }
508
509 q7_t *rconv_weight_q7 = test1;
510 q7_t *rconv_bias_q7 = test1 + RCONV_KER_DIM_Y * RCONV_KER_DIM_X * RCONV_IM_CH * RCONV_OUT_CH;
511
512 q15_t *rconv_buf = test2;
513
514 q7_t *rconv_im_in_q7 = test3;
515 q7_t *rconv_im_out_ref_q7 = test3 + RCONV_IM_DIM_Y * RCONV_IM_DIM_X * RCONV_IM_CH;
516 q7_t *rconv_im_out_opt_q7 =
517 test3 + RCONV_IM_DIM_Y * RCONV_IM_DIM_X * RCONV_IM_CH + RCONV_OUT_DIM_Y * RCONV_OUT_DIM_X * RCONV_OUT_CH;
518
519 initialize_results_q7(rconv_im_out_ref_q7, rconv_im_out_opt_q7, RCONV_OUT_DIM_Y * RCONV_OUT_DIM_X * RCONV_OUT_CH);
520
521 printf("start conv q7 nonsquare ref implementation\n");
522 arm_convolve_HWC_q7_ref_nonsquare(rconv_im_in_q7, RCONV_IM_DIM_X, RCONV_IM_DIM_Y, RCONV_IM_CH, rconv_weight_q7,
523 RCONV_OUT_CH, RCONV_KER_DIM_X, RCONV_KER_DIM_Y, RCONV_PADDING_X, RCONV_PADDING_Y,
524 RCONV_STRIDE_X, RCONV_STRIDE_Y, rconv_bias_q7, 1, 7, rconv_im_out_ref_q7,
525 RCONV_OUT_DIM_X, RCONV_OUT_DIM_Y, rconv_buf, NULL);
526
527 printf("start conv q7 nonsquare opt implementation\n");
528 arm_convolve_HWC_q7_fast_nonsquare(rconv_im_in_q7, RCONV_IM_DIM_X, RCONV_IM_DIM_Y, RCONV_IM_CH, rconv_weight_q7,
529 RCONV_OUT_CH, RCONV_KER_DIM_X, RCONV_KER_DIM_Y, RCONV_PADDING_X, RCONV_PADDING_Y,
530 RCONV_STRIDE_X, RCONV_STRIDE_Y, rconv_bias_q7, 1, 7, rconv_im_out_opt_q7,
531 RCONV_OUT_DIM_X, RCONV_OUT_DIM_Y, rconv_buf, NULL);
532
533 verify_results_q7(rconv_im_out_ref_q7, rconv_im_out_opt_q7, RCONV_OUT_DIM_Y * RCONV_OUT_DIM_X * RCONV_OUT_CH);
534
535 initialize_results_q7(rconv_im_out_ref_q7, rconv_im_out_opt_q7, RCONV_OUT_DIM_Y * RCONV_OUT_DIM_X * RCONV_OUT_CH);
536
537 printf("start conv q7 nonsquare ref implementation\n");
538 arm_convolve_HWC_q7_ref_nonsquare(rconv_im_in_q7, RCONV_IM_DIM_X, RCONV_IM_DIM_Y, RCONV_IM_CH, rconv_weight_q7,
539 RCONV_OUT_CH, RCONV_KER_DIM_X, RCONV_KER_DIM_Y, RCONV_PADDING_X, RCONV_PADDING_Y,
540 RCONV_STRIDE_X, RCONV_STRIDE_Y, rconv_bias_q7, 1, 7, rconv_im_out_ref_q7,
541 RCONV_OUT_DIM_X, RCONV_OUT_DIM_Y, rconv_buf, NULL);
542
543 printf("start conv q7 nonsquare basic implementation\n");
544 arm_convolve_HWC_q7_basic_nonsquare(rconv_im_in_q7, RCONV_IM_DIM_X, RCONV_IM_DIM_Y, RCONV_IM_CH, rconv_weight_q7,
545 RCONV_OUT_CH, RCONV_KER_DIM_X, RCONV_KER_DIM_Y, RCONV_PADDING_X, RCONV_PADDING_Y,
546 RCONV_STRIDE_X, RCONV_STRIDE_Y, rconv_bias_q7, 1, 7, rconv_im_out_opt_q7,
547 RCONV_OUT_DIM_X, RCONV_OUT_DIM_Y, rconv_buf, NULL);
548
549 verify_results_q7(rconv_im_out_ref_q7, rconv_im_out_opt_q7, RCONV_OUT_DIM_Y * RCONV_OUT_DIM_X * RCONV_OUT_CH);
550
551 initialize_results_q7(rconv_im_out_ref_q7, rconv_im_out_opt_q7, RCONV_OUT_DIM_Y * RCONV_OUT_DIM_X * RCONV_OUT_CH);
552
553 printf("start 1x1 conv q7 nonsquare fast implementation\n");
554 arm_convolve_HWC_q7_fast_nonsquare(rconv_im_in_q7, RCONV_IM_DIM_X, RCONV_IM_DIM_Y, RCONV_IM_CH, rconv_weight_q7,
555 RCONV_OUT_CH, 1, 1, 0, 0, RCONV_STRIDE_X,
556 RCONV_STRIDE_Y, rconv_bias_q7, 1, 7, rconv_im_out_ref_q7, RCONV_OUT_DIM_X,
557 RCONV_OUT_DIM_Y, rconv_buf, NULL);
558
559 printf("start 1x1 conv q7 nonsquare dedicated function implementation\n");
560 arm_convolve_1x1_HWC_q7_fast_nonsquare(rconv_im_in_q7, RCONV_IM_DIM_X, RCONV_IM_DIM_Y, RCONV_IM_CH, rconv_weight_q7,
561 RCONV_OUT_CH, 1, 1, 0, 0, RCONV_STRIDE_X,
562 RCONV_STRIDE_Y, rconv_bias_q7, 1, 7, rconv_im_out_opt_q7, RCONV_OUT_DIM_X,
563 RCONV_OUT_DIM_Y, rconv_buf, NULL);
564
565 verify_results_q7(rconv_im_out_ref_q7, rconv_im_out_opt_q7, RCONV_OUT_DIM_Y * RCONV_OUT_DIM_X * RCONV_OUT_CH);
566
567 printf("start depthwise separable conv q7 nonsquare ref implementation\n");
568 arm_depthwise_separable_conv_HWC_q7_ref_nonsquare(rconv_im_in_q7, RCONV_IM_DIM_X, RCONV_IM_DIM_Y, RCONV_IM_CH,
569 rconv_weight_q7, RCONV_OUT_CH, RCONV_KER_DIM_X, RCONV_KER_DIM_Y,
570 RCONV_PADDING_X, RCONV_PADDING_Y, RCONV_STRIDE_X, RCONV_STRIDE_Y,
571 rconv_bias_q7, 1, 7, rconv_im_out_ref_q7, RCONV_OUT_DIM_X,
572 RCONV_OUT_DIM_Y, rconv_buf, NULL);
573
574 printf("start depthwise separable conv q7 nonsquare opt implementation\n");
575 arm_depthwise_separable_conv_HWC_q7_nonsquare(rconv_im_in_q7, RCONV_IM_DIM_X, RCONV_IM_DIM_Y, RCONV_IM_CH,
576 rconv_weight_q7, RCONV_OUT_CH, RCONV_KER_DIM_X, RCONV_KER_DIM_Y,
577 RCONV_PADDING_X, RCONV_PADDING_Y, RCONV_STRIDE_X, RCONV_STRIDE_Y,
578 rconv_bias_q7, 1, 7, rconv_im_out_opt_q7, RCONV_OUT_DIM_X,
579 RCONV_OUT_DIM_Y, rconv_buf, NULL);
580
581 verify_results_q7(rconv_im_out_ref_q7, rconv_im_out_opt_q7, RCONV_OUT_DIM_Y * RCONV_OUT_DIM_X * RCONV_OUT_CH);
582
583 delete[]test1;
584 delete[]test2;
585 delete[]test3;
586
587 test2 = new q15_t[RCONV_KER_DIM_Y * RCONV_KER_DIM_X * RCONV_IM_CH * RCONV_OUT_CH + RCONV_OUT_CH]; // weights + bias
588 test4 = new q15_t[2 * RCONV_KER_DIM_Y * RCONV_KER_DIM_X * RCONV_IM_CH //buffer
589 + RCONV_IM_DIM_Y * RCONV_IM_DIM_X * RCONV_IM_CH + 2 * RCONV_OUT_DIM_Y * RCONV_OUT_DIM_X * RCONV_OUT_CH]; // i/o
590
591 for (int i = 0; i < RCONV_KER_DIM_Y * RCONV_KER_DIM_X * RCONV_IM_CH * RCONV_OUT_CH + RCONV_OUT_CH; i++)
592 {
593 test2[i] = rand() % 256 - 100;
594 }
595
596 for (int i = 0;
597 i < 2 * RCONV_KER_DIM_Y * RCONV_KER_DIM_X * RCONV_IM_CH
598 + RCONV_IM_DIM_Y * RCONV_IM_DIM_X * RCONV_IM_CH + 2 * RCONV_OUT_DIM_Y * RCONV_OUT_DIM_X * RCONV_OUT_CH;
599 i++)
600 {
601 test4[i] = rand() % 256 - 100;
602 }
603
604 q15_t *rconv_weight_q15 = test2;
605 q15_t *rconv_bias_q15 = test2 + RCONV_KER_DIM_Y * RCONV_KER_DIM_X * RCONV_IM_CH * RCONV_OUT_CH;
606
607 rconv_buf = test4;
608
609 q15_t *rconv_im_in_q15 = test4 + 2 * RCONV_KER_DIM_Y * RCONV_KER_DIM_X * RCONV_IM_CH;
610 q15_t *rconv_im_out_ref_q15 = rconv_im_in_q15 + RCONV_IM_DIM_Y * RCONV_IM_DIM_X * RCONV_IM_CH;
611 q15_t *rconv_im_out_opt_q15 = rconv_im_out_ref_q15 + RCONV_OUT_DIM_Y * RCONV_OUT_DIM_X * RCONV_OUT_CH;
612
613 initialize_results_q15(rconv_im_out_ref_q15, rconv_im_out_opt_q15, RCONV_OUT_DIM_Y * RCONV_OUT_DIM_X * RCONV_OUT_CH);
614
615 printf("start conv q15 nonsquare ref implementation\n");
616 arm_convolve_HWC_q15_nonsquare_ref(rconv_im_in_q15, RCONV_IM_DIM_X, RCONV_IM_DIM_Y, RCONV_IM_CH, rconv_weight_q15,
617 RCONV_OUT_CH, RCONV_KER_DIM_X, RCONV_KER_DIM_Y, RCONV_PADDING_X, RCONV_PADDING_Y,
618 RCONV_STRIDE_X, RCONV_STRIDE_Y, rconv_bias_q15, 1, 7, rconv_im_out_ref_q15,
619 RCONV_OUT_DIM_X, RCONV_OUT_DIM_Y, rconv_buf, NULL);
620
621 printf("start conv q5 nonsquare opt implementation\n");
622 arm_convolve_HWC_q15_fast_nonsquare(rconv_im_in_q15, RCONV_IM_DIM_X, RCONV_IM_DIM_Y, RCONV_IM_CH, rconv_weight_q15,
623 RCONV_OUT_CH, RCONV_KER_DIM_X, RCONV_KER_DIM_Y, RCONV_PADDING_X, RCONV_PADDING_Y,
624 RCONV_STRIDE_X, RCONV_STRIDE_Y, rconv_bias_q15, 1, 7, rconv_im_out_opt_q15,
625 RCONV_OUT_DIM_X, RCONV_OUT_DIM_Y, rconv_buf, NULL);
626
627 verify_results_q15(rconv_im_out_ref_q15, rconv_im_out_opt_q15, RCONV_OUT_DIM_Y * RCONV_OUT_DIM_X * RCONV_OUT_CH);
628
629 delete [] test2;
630 delete [] test4;
631 #endif
632
633 #ifdef TEST_CONV
634
635 #define CONV_IM_DIM 16
636 #define CONV_IM_CH 16
637 #define CONV_KER_DIM 5
638 #define CONV_OUT_CH 16
639 #define CONV_OUT_DIM 16
640
641 test1 = new q7_t[CONV_KER_DIM * CONV_KER_DIM * CONV_IM_CH * CONV_OUT_CH + CONV_OUT_CH];
642 test2 =
643 new q15_t[CONV_KER_DIM * CONV_KER_DIM * CONV_IM_CH * CONV_OUT_CH +
644 2 * CONV_KER_DIM * CONV_KER_DIM * CONV_IM_CH * CONV_OUT_CH + CONV_OUT_CH];
645 test3 = new q7_t[CONV_IM_DIM * CONV_IM_DIM * CONV_IM_CH + 2 * CONV_OUT_DIM * CONV_OUT_DIM * CONV_OUT_CH];
646 test4 = new q15_t[CONV_IM_DIM * CONV_IM_DIM * CONV_IM_CH + 2 * CONV_OUT_DIM * CONV_OUT_DIM * CONV_OUT_CH];
647
648 for (int i = 0; i < CONV_KER_DIM * CONV_KER_DIM * CONV_IM_CH * CONV_OUT_CH + CONV_OUT_CH; i++)
649 {
650 test1[i] = rand() % 256 - 100;
651 }
652
653 for (int i = 0;
654 i <
655 CONV_KER_DIM * CONV_KER_DIM * CONV_IM_CH * CONV_OUT_CH +
656 2 * CONV_KER_DIM * CONV_KER_DIM * CONV_IM_CH * CONV_OUT_CH + CONV_OUT_CH; i++)
657 {
658 test2[i] = (rand() % 65536 - 32768);
659 }
660
661 for (int i = 0; i < CONV_IM_DIM * CONV_IM_DIM * CONV_IM_CH + 2 * CONV_OUT_DIM * CONV_OUT_DIM * CONV_OUT_CH; i++)
662 {
663 test3[i] = rand() % 256 - 100;
664 }
665
666 for (int i = 0; i < CONV_IM_DIM * CONV_IM_DIM * CONV_IM_CH + 2 * CONV_OUT_DIM * CONV_OUT_DIM * CONV_OUT_CH; i++)
667 {
668 test4[i] = (rand() % 65536 - 32768);
669 }
670
671 q7_t *conv_weight_q7 = test1;
672 q7_t *conv_bias_q7 = test1 + CONV_KER_DIM * CONV_KER_DIM * CONV_IM_CH * CONV_OUT_CH;
673
674 q15_t *conv_weight_q15 = test2;
675 q15_t *conv_buf = test2 + CONV_KER_DIM * CONV_KER_DIM * CONV_IM_CH * CONV_OUT_CH;
676 q15_t *conv_bias_q15 =
677 test2 + CONV_KER_DIM * CONV_KER_DIM * CONV_IM_CH * CONV_OUT_CH +
678 2 * CONV_KER_DIM * CONV_KER_DIM * CONV_IM_CH * CONV_OUT_CH;
679
680 q7_t *conv_im_in_q7 = test3;
681 q7_t *conv_im_out_ref_q7 = test3 + CONV_IM_DIM * CONV_IM_DIM * CONV_IM_CH;
682 q7_t *conv_im_out_opt_q7 =
683 test3 + CONV_IM_DIM * CONV_IM_DIM * CONV_IM_CH + CONV_OUT_DIM * CONV_OUT_DIM * CONV_OUT_CH;
684
685 q15_t *conv_im_in_q15 = test4;
686 q15_t *conv_im_out_ref_q15 = test4 + CONV_IM_DIM * CONV_IM_DIM * CONV_IM_CH;
687 q15_t *conv_im_out_opt_q15 =
688 test4 + CONV_IM_DIM * CONV_IM_DIM * CONV_IM_CH + CONV_OUT_DIM * CONV_OUT_DIM * CONV_OUT_CH;
689
690 initialize_results_q7(conv_im_out_ref_q7, conv_im_out_opt_q7, CONV_OUT_DIM * CONV_OUT_DIM * CONV_OUT_CH);
691
692 printf("start q7 ref implementation\n");
693
694 arm_convolve_HWC_q7_ref(conv_im_in_q7, CONV_IM_DIM, CONV_IM_CH, conv_weight_q7,
695 CONV_OUT_CH, CONV_KER_DIM, 2, 1, conv_bias_q7, 1, 7, conv_im_out_ref_q7,
696 CONV_OUT_DIM, conv_buf, NULL);
697
698 printf("start q7 basic implementation\n");
699
700 arm_convolve_HWC_q7_basic(conv_im_in_q7, CONV_IM_DIM, CONV_IM_CH, conv_weight_q7,
701 CONV_OUT_CH, CONV_KER_DIM, 2, 1, conv_bias_q7, 1, 7, conv_im_out_opt_q7,
702 CONV_OUT_DIM, conv_buf, NULL);
703
704 verify_results_q7(conv_im_out_ref_q7, conv_im_out_opt_q7, CONV_OUT_DIM * CONV_OUT_DIM * CONV_OUT_CH);
705
706 printf("start q7 fast implementation\n");
707
708 arm_convolve_HWC_q7_fast(conv_im_in_q7, CONV_IM_DIM, CONV_IM_CH, conv_weight_q7,
709 CONV_OUT_CH, CONV_KER_DIM, 2, 1, conv_bias_q7, 1, 7, conv_im_out_opt_q7,
710 CONV_OUT_DIM, conv_buf, NULL);
711
712 verify_results_q7(conv_im_out_ref_q7, conv_im_out_opt_q7, CONV_OUT_DIM * CONV_OUT_DIM * CONV_OUT_CH);
713
714 // testing with RGB
715 printf("start q7 ref implementation for RGB\n");
716
717 arm_convolve_HWC_q7_ref(conv_im_in_q7, CONV_IM_DIM, 3, conv_weight_q7,
718 CONV_OUT_CH, CONV_KER_DIM, 2, 1, conv_bias_q7, 1, 7, conv_im_out_ref_q7,
719 CONV_OUT_DIM, conv_buf, NULL);
720
721 printf("start q7 basic implementation for RGB\n");
722
723 arm_convolve_HWC_q7_basic(conv_im_in_q7, CONV_IM_DIM, 3, conv_weight_q7,
724 CONV_OUT_CH, CONV_KER_DIM, 2, 1, conv_bias_q7, 1, 7, conv_im_out_opt_q7,
725 CONV_OUT_DIM, conv_buf, NULL);
726
727 verify_results_q7(conv_im_out_ref_q7, conv_im_out_opt_q7, CONV_OUT_DIM * CONV_OUT_DIM * CONV_OUT_CH);
728
729 printf("start q7 RGB implementation for RGB\n");
730
731 arm_convolve_HWC_q7_RGB(conv_im_in_q7, CONV_IM_DIM, 3, conv_weight_q7,
732 CONV_OUT_CH, CONV_KER_DIM, 2, 1, conv_bias_q7, 1, 7, conv_im_out_opt_q7,
733 CONV_OUT_DIM, conv_buf, NULL);
734
735 verify_results_q7(conv_im_out_ref_q7, conv_im_out_opt_q7, CONV_OUT_DIM * CONV_OUT_DIM * CONV_OUT_CH);
736
737 // testing q15
738 initialize_results_q15(conv_im_out_ref_q15, conv_im_out_opt_q15, CONV_OUT_DIM * CONV_OUT_DIM * CONV_OUT_CH);
739
740 printf("start q15 ref implementation\n");
741
742 arm_convolve_HWC_q15_ref(conv_im_in_q15, CONV_IM_DIM, CONV_IM_CH, conv_weight_q15,
743 CONV_OUT_CH, CONV_KER_DIM, 2, 1, conv_bias_q15, 0, 15, conv_im_out_ref_q15,
744 CONV_OUT_DIM, conv_buf, NULL);
745
746 printf("start q15 basic implementation\n");
747
748 arm_convolve_HWC_q15_basic(conv_im_in_q15, CONV_IM_DIM, CONV_IM_CH, conv_weight_q15,
749 CONV_OUT_CH, CONV_KER_DIM, 2, 1, conv_bias_q15, 0, 15, conv_im_out_opt_q15,
750 CONV_OUT_DIM, conv_buf, NULL);
751
752 verify_results_q15(conv_im_out_ref_q15, conv_im_out_opt_q15, CONV_OUT_DIM * CONV_OUT_DIM * CONV_OUT_CH);
753
754 printf("start q15 fast implementation\n");
755
756 arm_convolve_HWC_q15_fast(conv_im_in_q15, CONV_IM_DIM, CONV_IM_CH, conv_weight_q15,
757 CONV_OUT_CH, CONV_KER_DIM, 2, 1, conv_bias_q15, 0, 15, conv_im_out_opt_q15,
758 CONV_OUT_DIM, conv_buf, NULL);
759
760 verify_results_q15(conv_im_out_ref_q15, conv_im_out_opt_q15, CONV_OUT_DIM * CONV_OUT_DIM * CONV_OUT_CH);
761
762 // depthwise separable conv
763 initialize_results_q7(conv_im_out_ref_q7, conv_im_out_opt_q7, CONV_OUT_DIM * CONV_OUT_DIM * CONV_OUT_CH);
764
765 printf("start q7 depthwise_separable_conv ref implementation\n");
766
767 arm_depthwise_separable_conv_HWC_q7_ref(conv_im_in_q7, CONV_IM_DIM, CONV_IM_CH, conv_weight_q7,
768 CONV_OUT_CH, CONV_KER_DIM, 2, 1, conv_bias_q7, 1, 7, conv_im_out_ref_q7,
769 CONV_OUT_DIM, conv_buf, NULL);
770
771 printf("start q7 depthwise_separable_conv implementation\n");
772
773 arm_depthwise_separable_conv_HWC_q7(conv_im_in_q7, CONV_IM_DIM, CONV_IM_CH, conv_weight_q7,
774 CONV_OUT_CH, CONV_KER_DIM, 2, 1, conv_bias_q7, 1, 7, conv_im_out_opt_q7,
775 CONV_OUT_DIM, conv_buf, NULL);
776
777 verify_results_q7(conv_im_out_ref_q7, conv_im_out_opt_q7, CONV_OUT_DIM * CONV_OUT_DIM * CONV_OUT_CH);
778
779 delete[]test1;
780 delete[]test2;
781 delete[]test3;
782 delete[]test4;
783
784 #endif
785
786 test_pass = true;
787 test_index = 0;
788 while (test_flags[test_index] != -1) {
789 if (test_flags[test_index]) {
790 test_pass = false;
791 }
792 test_index ++;
793 }
794 if (test_pass) {
795 printf("All tests passed\n");
796 } else {
797 printf("Test failed passed\n");
798 }
799
800 return 0;
801 }
802