xref: /aosp_15_r20/external/coreboot/src/commonlib/bsd/ipchksum.c (revision b9411a12aaaa7e1e6a6fb7c5e057f44ee179a49c)
1 /* SPDX-License-Identifier: BSD-3-Clause OR GPL-2.0-or-later */
2 
3 #include <commonlib/bsd/ipchksum.h>
4 
5 /* See RFC 1071 for mathematical explanations of why we can first sum in a larger register and
6    then narrow down, why we don't need to worry about endianness, etc. */
ipchksum(const void * data,size_t size)7 uint16_t ipchksum(const void *data, size_t size)
8 {
9 	const uint8_t *p1 = data;
10 	unsigned long wide_sum = 0;
11 	uint32_t sum = 0;
12 	size_t i = 0;
13 
14 #if defined(__aarch64__)
15 	size_t size16 = size / 16;
16 	const uint64_t *p8 = data;
17 	if (size16) {
18 		unsigned long tmp1, tmp2;
19 		i = size16 * 16;
20 		asm (
21 			"adds	xzr, xzr, xzr\n\t"	/* init carry flag for addition */
22 			"1:\n\t"
23 			"ldp	%[v1], %[v2], [%[p8]], #16\n\t"
24 			"adcs	%[wsum], %[wsum], %[v1]\n\t"
25 			"adcs	%[wsum], %[wsum], %[v2]\n\t"
26 			"sub	%[size16], %[size16], #1\n\t"
27 			"cbnz	%[size16], 1b\n\t"
28 			"adcs	%[wsum], %[wsum], xzr\n\t"	/* use up last carry */
29 		: [v1] "=r" (tmp1),
30 		  [v2] "=r" (tmp2),
31 		  [wsum] "+r" (wide_sum),
32 		  [p8] "+r" (p8),
33 		  [size16] "+r" (size16)
34 		:: "cc"
35 		);
36 	}
37 #elif defined(__i386__) || defined(__x86_64__)
38 	size_t size8 = size / 8;
39 	const uint64_t *p8 = data;
40 	i = size8 * 8;
41 	asm (
42 		"clc\n\t"
43 		"1:\n\t"
44 		"jecxz	2f\n\t"		/* technically RCX on 64, but not gonna be that big */
45 		"adc	(%[p8]), %[wsum]\n\t"
46 #if defined(__i386__)
47 		"adc	4(%[p8]), %[wsum]\n\t"
48 #endif	/* __i386__ */
49 		"lea	-1(%[size8]), %[size8]\n\t"	/* Use LEA as a makeshift ADD that */
50 		"lea	8(%[p8]), %[p8]\n\t"		/* doesn't modify the carry flag. */
51 		"jmp	1b\n\t"
52 		"2:\n\t"
53 		"setc	%b[size8]\n\t"	/* reuse size register to save last carry */
54 		"add	%[size8], %[wsum]\n\t"
55 	: [wsum] "+r" (wide_sum),
56 	  [p8] "+r" (p8),
57 	  [size8] "+c" (size8)		/* put size in ECX so we can JECXZ */
58 	:: "cc"
59 	);
60 #endif	/* __i386__ || __x86_64__ */
61 
62 	while (wide_sum) {
63 		sum += wide_sum & 0xFFFF;
64 		wide_sum >>= 16;
65 	}
66 	sum = (sum & 0xFFFF) + (sum >> 16);
67 
68 	for (; i < size; i++) {
69 		uint32_t v = p1[i];
70 		if (i % 2)
71 			v <<= 8;
72 		sum += v;
73 
74 		/* Doing this unconditionally seems to be faster. */
75 		sum = (sum & 0xFFFF) + (sum >> 16);
76 	}
77 
78 	return (uint16_t)~sum;
79 }
80 
ipchksum_add(size_t offset,uint16_t first,uint16_t second)81 uint16_t ipchksum_add(size_t offset, uint16_t first, uint16_t second)
82 {
83 	first = ~first;
84 	second = ~second;
85 
86 	/*
87 	 * Since the checksum is calculated in 16-bit chunks, if the offset at which
88 	 * the data covered by the second checksum would start (if both data streams
89 	 * came one after the other) is odd, that means the second stream starts in
90 	 * the middle of a 16-bit chunk. This means the second checksum is byte
91 	 * swapped compared to what we need it to be, and we must swap it back.
92 	 */
93 	if (offset % 2)
94 		second = (second >> 8) | (second << 8);
95 
96 	uint32_t sum = first + second;
97 	sum = (sum & 0xFFFF) + (sum >> 16);
98 
99 	return (uint16_t)~sum;
100 }
101