xref: /aosp_15_r20/external/libopenapv/src/oapv_util.c (revision abb65b4b03b69e1d508d4d9a44dcf199df16e7c3)
1 /*
2  * Copyright (c) 2022 Samsung Electronics Co., Ltd.
3  * All Rights Reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions are met:
7  *
8  * - Redistributions of source code must retain the above copyright notice,
9  *   this list of conditions and the following disclaimer.
10  *
11  * - Redistributions in binary form must reproduce the above copyright notice,
12  *   this list of conditions and the following disclaimer in the documentation
13  *   and/or other materials provided with the distribution.
14  *
15  * - Neither the name of the copyright owner, nor the names of its contributors
16  *   may be used to endorse or promote products derived from this software
17  *   without specific prior written permission.
18  *
19  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
20  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22  * ARE DISCLAIMED.IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
23  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24  * CONSEQUENTIAL DAMAGES(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29  * POSSIBILITY OF SUCH DAMAGE.
30  */
31 
32 #include "oapv_util.h"
33 #include <math.h>
34 
35 /* MD5 functions */
36 #define MD5FUNC(f, w, x, y, z, msg1, s, msg2) \
37     (w += f(x, y, z) + msg1 + msg2, w = w << s | w >> (32 - s), w += x)
38 #define FF(x, y, z) (z ^ (x & (y ^ z)))
39 #define GG(x, y, z) (y ^ (z & (x ^ y)))
40 #define HH(x, y, z) (x ^ y ^ z)
41 #define II(x, y, z) (y ^ (x | ~z))
42 
md5_trans(u32 * buf,u32 * msg)43 static void md5_trans(u32 *buf, u32 *msg)
44 {
45     register u32 a, b, c, d;
46 
47     a = buf[0];
48     b = buf[1];
49     c = buf[2];
50     d = buf[3];
51 
52     MD5FUNC(FF, a, b, c, d, msg[0], 7, 0xd76aa478);  /* 1 */
53     MD5FUNC(FF, d, a, b, c, msg[1], 12, 0xe8c7b756); /* 2 */
54     MD5FUNC(FF, c, d, a, b, msg[2], 17, 0x242070db); /* 3 */
55     MD5FUNC(FF, b, c, d, a, msg[3], 22, 0xc1bdceee); /* 4 */
56 
57     MD5FUNC(FF, a, b, c, d, msg[4], 7, 0xf57c0faf);  /* 5 */
58     MD5FUNC(FF, d, a, b, c, msg[5], 12, 0x4787c62a); /* 6 */
59     MD5FUNC(FF, c, d, a, b, msg[6], 17, 0xa8304613); /* 7 */
60     MD5FUNC(FF, b, c, d, a, msg[7], 22, 0xfd469501); /* 8 */
61 
62     MD5FUNC(FF, a, b, c, d, msg[8], 7, 0x698098d8);   /* 9 */
63     MD5FUNC(FF, d, a, b, c, msg[9], 12, 0x8b44f7af);  /* 10 */
64     MD5FUNC(FF, c, d, a, b, msg[10], 17, 0xffff5bb1); /* 11 */
65     MD5FUNC(FF, b, c, d, a, msg[11], 22, 0x895cd7be); /* 12 */
66 
67     MD5FUNC(FF, a, b, c, d, msg[12], 7, 0x6b901122);  /* 13 */
68     MD5FUNC(FF, d, a, b, c, msg[13], 12, 0xfd987193); /* 14 */
69     MD5FUNC(FF, c, d, a, b, msg[14], 17, 0xa679438e); /* 15 */
70     MD5FUNC(FF, b, c, d, a, msg[15], 22, 0x49b40821); /* 16 */
71 
72     /* Round 2 */
73     MD5FUNC(GG, a, b, c, d, msg[1], 5, 0xf61e2562);   /* 17 */
74     MD5FUNC(GG, d, a, b, c, msg[6], 9, 0xc040b340);   /* 18 */
75     MD5FUNC(GG, c, d, a, b, msg[11], 14, 0x265e5a51); /* 19 */
76     MD5FUNC(GG, b, c, d, a, msg[0], 20, 0xe9b6c7aa);  /* 20 */
77 
78     MD5FUNC(GG, a, b, c, d, msg[5], 5, 0xd62f105d);   /* 21 */
79     MD5FUNC(GG, d, a, b, c, msg[10], 9, 0x2441453);   /* 22 */
80     MD5FUNC(GG, c, d, a, b, msg[15], 14, 0xd8a1e681); /* 23 */
81     MD5FUNC(GG, b, c, d, a, msg[4], 20, 0xe7d3fbc8);  /* 24 */
82 
83     MD5FUNC(GG, a, b, c, d, msg[9], 5, 0x21e1cde6);  /* 25 */
84     MD5FUNC(GG, d, a, b, c, msg[14], 9, 0xc33707d6); /* 26 */
85     MD5FUNC(GG, c, d, a, b, msg[3], 14, 0xf4d50d87); /* 27 */
86     MD5FUNC(GG, b, c, d, a, msg[8], 20, 0x455a14ed); /* 28 */
87 
88     MD5FUNC(GG, a, b, c, d, msg[13], 5, 0xa9e3e905);  /* 29 */
89     MD5FUNC(GG, d, a, b, c, msg[2], 9, 0xfcefa3f8);   /* 30 */
90     MD5FUNC(GG, c, d, a, b, msg[7], 14, 0x676f02d9);  /* 31 */
91     MD5FUNC(GG, b, c, d, a, msg[12], 20, 0x8d2a4c8a); /* 32 */
92 
93     /* Round 3 */
94     MD5FUNC(HH, a, b, c, d, msg[5], 4, 0xfffa3942);   /* 33 */
95     MD5FUNC(HH, d, a, b, c, msg[8], 11, 0x8771f681);  /* 34 */
96     MD5FUNC(HH, c, d, a, b, msg[11], 16, 0x6d9d6122); /* 35 */
97     MD5FUNC(HH, b, c, d, a, msg[14], 23, 0xfde5380c); /* 36 */
98 
99     MD5FUNC(HH, a, b, c, d, msg[1], 4, 0xa4beea44);   /* 37 */
100     MD5FUNC(HH, d, a, b, c, msg[4], 11, 0x4bdecfa9);  /* 38 */
101     MD5FUNC(HH, c, d, a, b, msg[7], 16, 0xf6bb4b60);  /* 39 */
102     MD5FUNC(HH, b, c, d, a, msg[10], 23, 0xbebfbc70); /* 40 */
103 
104     MD5FUNC(HH, a, b, c, d, msg[13], 4, 0x289b7ec6); /* 41 */
105     MD5FUNC(HH, d, a, b, c, msg[0], 11, 0xeaa127fa); /* 42 */
106     MD5FUNC(HH, c, d, a, b, msg[3], 16, 0xd4ef3085); /* 43 */
107     MD5FUNC(HH, b, c, d, a, msg[6], 23, 0x4881d05);  /* 44 */
108 
109     MD5FUNC(HH, a, b, c, d, msg[9], 4, 0xd9d4d039);   /* 45 */
110     MD5FUNC(HH, d, a, b, c, msg[12], 11, 0xe6db99e5); /* 46 */
111     MD5FUNC(HH, c, d, a, b, msg[15], 16, 0x1fa27cf8); /* 47 */
112     MD5FUNC(HH, b, c, d, a, msg[2], 23, 0xc4ac5665);  /* 48 */
113 
114     /* Round 4 */
115     MD5FUNC(II, a, b, c, d, msg[0], 6, 0xf4292244);   /* 49 */
116     MD5FUNC(II, d, a, b, c, msg[7], 10, 0x432aff97);  /* 50 */
117     MD5FUNC(II, c, d, a, b, msg[14], 15, 0xab9423a7); /* 51 */
118     MD5FUNC(II, b, c, d, a, msg[5], 21, 0xfc93a039);  /* 52 */
119 
120     MD5FUNC(II, a, b, c, d, msg[12], 6, 0x655b59c3);  /* 53 */
121     MD5FUNC(II, d, a, b, c, msg[3], 10, 0x8f0ccc92);  /* 54 */
122     MD5FUNC(II, c, d, a, b, msg[10], 15, 0xffeff47d); /* 55 */
123     MD5FUNC(II, b, c, d, a, msg[1], 21, 0x85845dd1);  /* 56 */
124 
125     MD5FUNC(II, a, b, c, d, msg[8], 6, 0x6fa87e4f);   /* 57 */
126     MD5FUNC(II, d, a, b, c, msg[15], 10, 0xfe2ce6e0); /* 58 */
127     MD5FUNC(II, c, d, a, b, msg[6], 15, 0xa3014314);  /* 59 */
128     MD5FUNC(II, b, c, d, a, msg[13], 21, 0x4e0811a1); /* 60 */
129 
130     MD5FUNC(II, a, b, c, d, msg[4], 6, 0xf7537e82);   /* 61 */
131     MD5FUNC(II, d, a, b, c, msg[11], 10, 0xbd3af235); /* 62 */
132     MD5FUNC(II, c, d, a, b, msg[2], 15, 0x2ad7d2bb);  /* 63 */
133     MD5FUNC(II, b, c, d, a, msg[9], 21, 0xeb86d391);  /* 64 */
134 
135     buf[0] += a;
136     buf[1] += b;
137     buf[2] += c;
138     buf[3] += d;
139 }
140 
md5_init(oapv_md5_t * md5)141 static void md5_init(oapv_md5_t *md5)
142 {
143     md5->h[0] = 0x67452301;
144     md5->h[1] = 0xefcdab89;
145     md5->h[2] = 0x98badcfe;
146     md5->h[3] = 0x10325476;
147 
148     md5->bits[0] = 0;
149     md5->bits[1] = 0;
150 }
151 
md5_update(oapv_md5_t * md5,void * buf_t,u32 len)152 static void md5_update(oapv_md5_t *md5, void *buf_t, u32 len)
153 {
154     u8 *buf;
155     u32 i, idx, part_len;
156 
157     buf = (u8 *)buf_t;
158 
159     idx = (u32)((md5->bits[0] >> 3) & 0x3f);
160 
161     md5->bits[0] += (len << 3);
162     if(md5->bits[0] < (len << 3)) {
163         (md5->bits[1])++;
164     }
165 
166     md5->bits[1] += (len >> 29);
167     part_len = 64 - idx;
168 
169     if(len >= part_len) {
170         oapv_mcpy(md5->msg + idx, buf, part_len);
171         md5_trans(md5->h, (u32 *)md5->msg);
172 
173         for(i = part_len; i + 63 < len; i += 64) {
174             md5_trans(md5->h, (u32 *)(buf + i));
175         }
176         idx = 0;
177     }
178     else {
179         i = 0;
180     }
181 
182     if(len - i > 0) {
183         oapv_mcpy(md5->msg + idx, buf + i, len - i);
184     }
185 }
186 
md5_update_16(oapv_md5_t * md5,void * buf_t,u32 len)187 static void md5_update_16(oapv_md5_t *md5, void *buf_t, u32 len)
188 {
189     u16 *buf;
190     u32  i, idx, part_len, j;
191     u8   t[512];
192 
193     buf = (u16 *)buf_t;
194     idx = (u32)((md5->bits[0] >> 3) & 0x3f);
195 
196     len = len * 2;
197     for(j = 0; j < len; j += 2) {
198         t[j] = (u8)(*(buf));
199         t[j + 1] = *(buf) >> 8;
200         buf++;
201     }
202 
203     md5->bits[0] += (len << 3);
204     if(md5->bits[0] < (len << 3)) {
205         (md5->bits[1])++;
206     }
207 
208     md5->bits[1] += (len >> 29);
209     part_len = 64 - idx;
210 
211     if(len >= part_len) {
212         oapv_mcpy(md5->msg + idx, t, part_len);
213         md5_trans(md5->h, (u32 *)md5->msg);
214 
215         for(i = part_len; i + 63 < len; i += 64) {
216             md5_trans(md5->h, (u32 *)(t + i));
217         }
218         idx = 0;
219     }
220     else {
221         i = 0;
222     }
223 
224     if(len - i > 0) {
225         oapv_mcpy(md5->msg + idx, t + i, len - i);
226     }
227 }
228 
md5_finish(oapv_md5_t * md5,u8 digest[16])229 static void md5_finish(oapv_md5_t *md5, u8 digest[16])
230 {
231     u8 *pos;
232     int cnt;
233 
234     cnt = (md5->bits[0] >> 3) & 0x3F;
235     pos = md5->msg + cnt;
236     *pos++ = 0x80;
237     cnt = 64 - 1 - cnt;
238 
239     if(cnt < 8) {
240         oapv_mset(pos, 0, cnt);
241         md5_trans(md5->h, (u32 *)md5->msg);
242         oapv_mset(md5->msg, 0, 56);
243     }
244     else {
245         oapv_mset(pos, 0, cnt - 8);
246     }
247 
248     oapv_mcpy((md5->msg + 14 * sizeof(u32)), &md5->bits[0], sizeof(u32));
249     oapv_mcpy((md5->msg + 15 * sizeof(u32)), &md5->bits[1], sizeof(u32));
250 
251     md5_trans(md5->h, (u32 *)md5->msg);
252     oapv_mcpy(digest, md5->h, 16);
253     oapv_mset(md5, 0, sizeof(oapv_md5_t));
254 }
255 
256 static unsigned char uuid_frm_hash[16] = {
257     0xf8, 0x72, 0x1b, 0x3e, 0xcd, 0xee, 0x47, 0x21,
258     0x98, 0x0d, 0x9b, 0x9e, 0x39, 0x20, 0x28, 0x49
259 };
260 
oapv_imgb_set_md5(oapv_imgb_t * imgb)261 void oapv_imgb_set_md5(oapv_imgb_t *imgb)
262 {
263 
264     oapv_md5_t md5[N_C];
265     int        i, j;
266     oapv_assert(imgb != NULL);
267     memset(imgb->hash, 0, sizeof(imgb->hash));
268 
269     for(i = 0; i < imgb->np; i++) {
270         md5_init(&md5[i]);
271 
272         for(j = 0; j < imgb->ah[i]; j++) {
273             md5_update(&md5[i], ((u8 *)imgb->a[i]) + j * imgb->s[i], imgb->aw[i] * 2);
274         }
275 
276         md5_finish(&md5[i], imgb->hash[i]);
277     }
278 }
279 
oapv_set_md5_pld(oapvm_t mid,int group_id,oapv_imgb_t * rec)280 int oapv_set_md5_pld(oapvm_t mid, int group_id, oapv_imgb_t *rec)
281 {
282     oapv_imgb_set_md5(rec);
283     u8 *mdp_data = oapv_malloc((16 * rec->np) + 16);
284     oapv_assert_rv(mdp_data != NULL, OAPV_ERR_OUT_OF_MEMORY)
285         memcpy(mdp_data, uuid_frm_hash, 16);
286     for(int i = 0; i < rec->np; i++) {
287         memcpy(mdp_data + ((i + 1) * 16), rec->hash[i], 16);
288     }
289     return oapvm_set(mid, group_id, OAPV_METADATA_USER_DEFINED, mdp_data, 16 * rec->np + 16, uuid_frm_hash);
290 }
291 
oapv_block_copy(s16 * src,int src_stride,s16 * dst,int dst_stride,int log2_copy_w,int log2_copy_h)292 void oapv_block_copy(s16 *src, int src_stride, s16 *dst, int dst_stride, int log2_copy_w, int log2_copy_h)
293 {
294     int  h;
295     int  copy_size = (1 << log2_copy_w) * (int)sizeof(s16);
296     s16 *tmp_src = src;
297     s16 *tmp_dst = dst;
298     for(h = 0; h < (1 << log2_copy_h); h++) {
299         oapv_mcpy(tmp_dst, tmp_src, copy_size);
300         tmp_dst += dst_stride;
301         tmp_src += src_stride;
302     }
303 }
304 
305 #if X86_SSE
306 #define OAPV_CPU_INFO_SSE2    0x7A // ((3 << 5) | 26)
307 #define OAPV_CPU_INFO_SSE3    0x40 // ((2 << 5) |  0)
308 #define OAPV_CPU_INFO_SSSE3   0x49 // ((2 << 5) |  9)
309 #define OAPV_CPU_INFO_SSE41   0x53 // ((2 << 5) | 19)
310 #define OAPV_CPU_INFO_OSXSAVE 0x5B // ((2 << 5) | 27)
311 #define OAPV_CPU_INFO_AVX     0x5C // ((2 << 5) | 28)
312 #define OAPV_CPU_INFO_AVX2    0x25 // ((1 << 5) |  5)
313 
314 #if(defined(_WIN64) || defined(_WIN32)) && !defined(__GNUC__)
315 #include <intrin.h >
316 #elif defined(__GNUC__)
317 #ifndef _XCR_XFEATURE_ENABLED_MASK
318 #define _XCR_XFEATURE_ENABLED_MASK 0
319 #endif
320 
__cpuid(int * info,int i)321 static void __cpuid(int *info, int i)
322 {
323     __asm__ __volatile__(
324         "cpuid" : "=a"(info[0]), "=b"(info[1]), "=c"(info[2]), "=d"(info[3])
325         : "a"(i), "c"(0));
326 }
327 
__xgetbv(unsigned int i)328 static unsigned long long __xgetbv(unsigned int i)
329 {
330     unsigned int eax, edx;
331     // clang-format off
332     __asm__ __volatile__(
333         "xgetbv;" : "=a" (eax), "=d"(edx)
334                   : "c" (i));
335     return ((unsigned long long)edx << 32) | eax;
336     // clang-format on
337 }
338 #endif
339 
340 #define GET_CPU_INFO(A, B) ((B[((A >> 5) & 0x03)] >> (A & 0x1f)) & 1)
341 
oapv_check_cpu_info_x86()342 int oapv_check_cpu_info_x86()
343 {
344     int support_sse = 0;
345     int support_avx = 0;
346     int support_avx2 = 0;
347     int cpu_info[4] = { 0 };
348     __cpuid(cpu_info, 0);
349     int id_cnt = cpu_info[0];
350 
351     if(id_cnt >= 1) {
352         __cpuid(cpu_info, 1);
353         support_sse = GET_CPU_INFO(OAPV_CPU_INFO_SSE41, cpu_info);
354         int os_use_xsave = GET_CPU_INFO(OAPV_CPU_INFO_OSXSAVE, cpu_info);
355         int cpu_support_avx = GET_CPU_INFO(OAPV_CPU_INFO_AVX, cpu_info);
356 
357         if(os_use_xsave && cpu_support_avx) {
358             unsigned long long xcr_feature_mask = __xgetbv(_XCR_XFEATURE_ENABLED_MASK);
359             support_avx = ((xcr_feature_mask & 0x6) || 0) ? 1 : 0;
360             if(id_cnt >= 7) {
361                 __cpuid(cpu_info, 7);
362                 support_avx2 = (support_avx && GET_CPU_INFO(OAPV_CPU_INFO_AVX2, cpu_info)) ? 1 : 0;
363             }
364         }
365     }
366 
367     return ((support_avx2 << 2) | (support_avx << 1) | (support_sse << 0));
368 }
369 #endif
370 
371 #if ENC_DEC_DUMP
372 #include <stdarg.h>
373 FILE *oapv_fp_dump;
374 int   oapv_is_dump;
375 ;
376 
oapv_dump_string0(int cond,const char * fmt,...)377 void oapv_dump_string0(int cond, const char *fmt, ...)
378 {
379     if(!oapv_is_dump)
380         return;
381     switch(cond) {
382     case OAPV_DUMP_HLS:
383         if(!DUMP_ENABLE_HLS)
384             return;
385         break;
386     case OAPV_DUMP_COEF:
387         if(!DUMP_ENABLE_COEF)
388             return;
389         break;
390     default:
391         break;
392     }
393 
394     va_list args;
395     va_start(args, fmt);
396     vfprintf(oapv_fp_dump, fmt, args);
397     fflush(oapv_fp_dump);
398     va_end(args);
399 }
400 
oapv_dump_coef0(short * coef,int size,int x,int y,int c)401 void oapv_dump_coef0(short *coef, int size, int x, int y, int c)
402 {
403     if(!DUMP_ENABLE_COEF || !oapv_is_dump)
404         return;
405 
406     fprintf(oapv_fp_dump, "x pos : % d y pos : % d comp : % d\n", x, y, c);
407     fprintf(oapv_fp_dump, "coef:");
408     for(int i = 0; i < size; i++) {
409         fprintf(oapv_fp_dump, " %d", coef[i]);
410     }
411     fprintf(oapv_fp_dump, "\n");
412     fflush(oapv_fp_dump);
413 }
414 
oapv_dump_create0(int is_enc)415 void oapv_dump_create0(int is_enc)
416 {
417     if(is_enc) {
418         if(DUMP_ENABLE_HLS || DUMP_ENABLE_COEF) {
419             oapv_fp_dump = fopen("enc_dump.txt", "w+");
420         }
421     }
422     else {
423         if(DUMP_ENABLE_HLS || DUMP_ENABLE_COEF) {
424             oapv_fp_dump = fopen("dec_dump.txt", "w+");
425         }
426     }
427     oapv_is_dump = 1;
428 }
429 
oapv_dump_delete0()430 void oapv_dump_delete0()
431 {
432     if(DUMP_ENABLE_HLS || DUMP_ENABLE_COEF) {
433         fclose(oapv_fp_dump);
434     }
435 }
436 #endif