1 /*
2 * Copyright (c) 2008-2012 Travis Geiselbrecht
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining
5 * a copy of this software and associated documentation files
6 * (the "Software"), to deal in the Software without restriction,
7 * including without limitation the rights to use, copy, modify, merge,
8 * publish, distribute, sublicense, and/or sell copies of the Software,
9 * and to permit persons to whom the Software is furnished to do so,
10 * subject to the following conditions:
11 *
12 * The above copyright notice and this permission notice shall be
13 * included in all copies or substantial portions of the Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
16 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
17 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
18 * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
19 * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
20 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
21 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
22 */
23 #include <sys/types.h>
24 #include <stdio.h>
25 #include <rand.h>
26 #include <err.h>
27 #include <stdlib.h>
28 #include <string.h>
29 #include <app/tests.h>
30 #include <kernel/thread.h>
31 #include <kernel/mutex.h>
32 #include <kernel/semaphore.h>
33 #include <kernel/event.h>
34 #include <platform.h>
35
36 const size_t BUFSIZE = (1024*1024);
37 const uint ITER = 1024;
38
bench_set_overhead(void)39 __NO_INLINE static void bench_set_overhead(void)
40 {
41 uint32_t *buf = malloc(BUFSIZE);
42 if (!buf) {
43 printf("failed to allocate buffer\n");
44 return;
45 }
46
47 uint count = arch_cycle_count();
48 for (uint i = 0; i < ITER; i++) {
49 __asm__ volatile("");
50 }
51 count = arch_cycle_count() - count;
52
53 printf("took %u cycles overhead to loop %u times\n",
54 count, ITER);
55
56 free(buf);
57 }
58
bench_memset(void)59 __NO_INLINE static void bench_memset(void)
60 {
61 void *buf = malloc(BUFSIZE);
62 if (!buf) {
63 printf("failed to allocate buffer\n");
64 return;
65 }
66
67 uint count = arch_cycle_count();
68 for (uint i = 0; i < ITER; i++) {
69 memset(buf, 0, BUFSIZE);
70 }
71 count = arch_cycle_count() - count;
72
73 printf("took %u cycles to memset a buffer of size %u %d times (%u bytes), %f bytes/cycle\n",
74 count, BUFSIZE, ITER, BUFSIZE * ITER, (BUFSIZE * ITER) / (float)count);
75
76 free(buf);
77 }
78
79 #define bench_cset(type) \
80 __NO_INLINE static void bench_cset_##type(void) \
81 { \
82 type *buf = malloc(BUFSIZE); \
83 if (!buf) { \
84 printf("failed to allocate buffer\n"); \
85 return; \
86 } \
87 \
88 uint count = arch_cycle_count(); \
89 for (uint i = 0; i < ITER; i++) { \
90 for (uint j = 0; j < BUFSIZE / sizeof(*buf); j++) { \
91 buf[j] = 0; \
92 } \
93 } \
94 count = arch_cycle_count() - count; \
95 \
96 printf("took %u cycles to manually clear a buffer using wordsize %d of size %u %d times (%u bytes), %f bytes/cycle\n", \
97 count, sizeof(*buf), BUFSIZE, ITER, BUFSIZE * ITER, (BUFSIZE * ITER) / (float)count); \
98 \
99 free(buf); \
100 }
101
102 bench_cset(uint8_t)
bench_cset(uint16_t)103 bench_cset(uint16_t)
104 bench_cset(uint32_t)
105 bench_cset(uint64_t)
106
107 __NO_INLINE static void bench_cset_wide(void)
108 {
109 uint32_t *buf = malloc(BUFSIZE);
110 if (!buf) {
111 printf("failed to allocate buffer\n");
112 return;
113 }
114
115 uint count = arch_cycle_count();
116 for (uint i = 0; i < ITER; i++) {
117 for (uint j = 0; j < BUFSIZE / sizeof(*buf) / 8; j++) {
118 buf[j*8] = 0;
119 buf[j*8+1] = 0;
120 buf[j*8+2] = 0;
121 buf[j*8+3] = 0;
122 buf[j*8+4] = 0;
123 buf[j*8+5] = 0;
124 buf[j*8+6] = 0;
125 buf[j*8+7] = 0;
126 }
127 }
128 count = arch_cycle_count() - count;
129
130 printf("took %u cycles to manually clear a buffer of size %u %d times 8 words at a time (%u bytes), %f bytes/cycle\n",
131 count, BUFSIZE, ITER, BUFSIZE * ITER, (BUFSIZE * ITER) / (float)count);
132
133 free(buf);
134 }
135
bench_memcpy(void)136 __NO_INLINE static void bench_memcpy(void)
137 {
138 uint8_t *buf = malloc(BUFSIZE);
139 if (!buf) {
140 printf("failed to allocate buffer\n");
141 return;
142 }
143
144 uint count = arch_cycle_count();
145 for (uint i = 0; i < ITER; i++) {
146 memcpy(buf, buf + BUFSIZE / 2, BUFSIZE / 2);
147 }
148 count = arch_cycle_count() - count;
149
150 printf("took %u cycles to memcpy a buffer of size %u %d times (%u source bytes), %f source bytes/cycle\n",
151 count, BUFSIZE / 2, ITER, BUFSIZE / 2 * ITER, (BUFSIZE / 2 * ITER) / (float)count);
152
153 free(buf);
154 }
155
156 #if ARCH_ARM
arm_bench_cset_stm(void)157 __NO_INLINE static void arm_bench_cset_stm(void)
158 {
159 uint32_t *buf = malloc(BUFSIZE);
160 if (!buf) {
161 printf("failed to allocate buffer\n");
162 return;
163 }
164
165 uint count = arch_cycle_count();
166 for (uint i = 0; i < ITER; i++) {
167 for (uint j = 0; j < BUFSIZE / sizeof(*buf) / 8; j++) {
168 __asm__ volatile(
169 "stm %0, {r0-r7};"
170 :: "r" (&buf[j*8])
171 );
172 }
173 }
174 count = arch_cycle_count() - count;
175
176 printf("took %u cycles to manually clear a buffer of size %u %d times 8 words at a time using stm (%u bytes), %f bytes/cycle\n",
177 count, BUFSIZE, ITER, BUFSIZE * ITER, (BUFSIZE * ITER) / (float)count);
178
179 free(buf);
180 }
181
182 #if (__CORTEX_M >= 0x03)
arm_bench_multi_issue(void)183 __NO_INLINE static void arm_bench_multi_issue(void)
184 {
185 uint32_t cycles;
186 uint32_t a = 0, b = 0, c = 0, d = 0, e = 0, f = 0, g = 0, h = 0;
187 #define ITER 1000000
188 uint count = ITER;
189 cycles = arch_cycle_count();
190 while (count--) {
191 asm volatile ("");
192 asm volatile ("add %0, %0, %0" : "=r" (a) : "r" (a));
193 asm volatile ("add %0, %0, %0" : "=r" (b) : "r" (b));
194 asm volatile ("and %0, %0, %0" : "=r" (c) : "r" (c));
195 asm volatile ("mov %0, %0" : "=r" (d) : "r" (d));
196 asm volatile ("orr %0, %0, %0" : "=r" (e) : "r" (e));
197 asm volatile ("add %0, %0, %0" : "=r" (f) : "r" (f));
198 asm volatile ("and %0, %0, %0" : "=r" (g) : "r" (g));
199 asm volatile ("mov %0, %0" : "=r" (h) : "r" (h));
200 }
201 cycles = arch_cycle_count() - cycles;
202
203 printf("took %u cycles to issue 8 integer ops (%f cycles/iteration)\n", cycles, (float)cycles / ITER);
204 #undef ITER
205 }
206 #endif // __CORTEX_M
207 #endif // ARCH_ARM
208
209 #if WITH_LIB_LIBM
210 #include <math.h>
211
bench_sincos(void)212 __NO_INLINE static void bench_sincos(void)
213 {
214 printf("touching the floating point unit\n");
215 __UNUSED volatile double _hole = sin(0);
216
217 uint count = arch_cycle_count();
218 __UNUSED double a = sin(2.0);
219 count = arch_cycle_count() - count;
220 printf("took %u cycles for sin()\n", count);
221
222 count = arch_cycle_count();
223 a = cos(2.0);
224 count = arch_cycle_count() - count;
225 printf("took %u cycles for cos()\n", count);
226
227 count = arch_cycle_count();
228 a = sinf(2.0);
229 count = arch_cycle_count() - count;
230 printf("took %u cycles for sinf()\n", count);
231
232 count = arch_cycle_count();
233 a = cosf(2.0);
234 count = arch_cycle_count() - count;
235 printf("took %u cycles for cosf()\n", count);
236
237 count = arch_cycle_count();
238 a = sqrt(1234567.0);
239 count = arch_cycle_count() - count;
240 printf("took %u cycles for sqrt()\n", count);
241
242 count = arch_cycle_count();
243 a = sqrtf(1234567.0f);
244 count = arch_cycle_count() - count;
245 printf("took %u cycles for sqrtf()\n", count);
246 }
247
248 #endif // WITH_LIB_LIBM
249
benchmarks(void)250 void benchmarks(void)
251 {
252 bench_set_overhead();
253 bench_memset();
254 bench_memcpy();
255
256 bench_cset_uint8_t();
257 bench_cset_uint16_t();
258 bench_cset_uint32_t();
259 bench_cset_uint64_t();
260 bench_cset_wide();
261
262 #if ARCH_ARM
263 arm_bench_cset_stm();
264
265 #if (__CORTEX_M >= 0x03)
266 arm_bench_multi_issue();
267 #endif
268 #endif
269 #if WITH_LIB_LIBM
270 bench_sincos();
271 #endif
272 }
273
274