Lines Matching +full:3 +full:- +full:n

1 /* SPDX-License-Identifier: GPL-2.0-or-later */
6 * Optimized RAID-5 checksumming functions for SSE.
16 * High-speed RAID5 checksumming functions utilizing SSE instructions.
21 * x86-64 changes / gcc fixes from Andi Kleen.
25 * no advantages to be gotten from x86-64 here anyways.
39 #define PF0(x) " prefetchnta "PF_OFFS(x)"(%[p1]) ;\n"
40 #define LD(x, y) " movaps "OFFS(x)"(%[p1]), %%xmm"#y" ;\n"
41 #define ST(x, y) " movaps %%xmm"#y", "OFFS(x)"(%[p1]) ;\n"
42 #define PF1(x) " prefetchnta "PF_OFFS(x)"(%[p2]) ;\n"
43 #define PF2(x) " prefetchnta "PF_OFFS(x)"(%[p3]) ;\n"
44 #define PF3(x) " prefetchnta "PF_OFFS(x)"(%[p4]) ;\n"
45 #define PF4(x) " prefetchnta "PF_OFFS(x)"(%[p5]) ;\n"
46 #define XO1(x, y) " xorps "OFFS(x)"(%[p2]), %%xmm"#y" ;\n"
47 #define XO2(x, y) " xorps "OFFS(x)"(%[p3]), %%xmm"#y" ;\n"
48 #define XO3(x, y) " xorps "OFFS(x)"(%[p4]), %%xmm"#y" ;\n"
49 #define XO4(x, y) " xorps "OFFS(x)"(%[p5]), %%xmm"#y" ;\n"
57 op(i + 3, 3)
75 LD(i + 3, 3) \ in xor_sse_2()
81 XO1(i + 3, 3) \ in xor_sse_2()
85 ST(i + 3, 3) \ in xor_sse_2()
91 " .align 32 ;\n" in xor_sse_2()
92 " 1: ;\n" in xor_sse_2()
99 " add %[inc], %[p1] ;\n" in xor_sse_2()
100 " add %[inc], %[p2] ;\n" in xor_sse_2()
101 " dec %[cnt] ;\n" in xor_sse_2()
102 " jnz 1b ;\n" in xor_sse_2()
126 " .align 32 ;\n" in xor_sse_2_pf64()
127 " 1: ;\n" in xor_sse_2_pf64()
134 " add %[inc], %[p1] ;\n" in xor_sse_2_pf64()
135 " add %[inc], %[p2] ;\n" in xor_sse_2_pf64()
136 " dec %[cnt] ;\n" in xor_sse_2_pf64()
137 " jnz 1b ;\n" in xor_sse_2_pf64()
163 LD(i + 3, 3) \ in xor_sse_3()
171 XO1(i + 3, 3) \ in xor_sse_3()
175 XO2(i + 3, 3) \ in xor_sse_3()
179 ST(i + 3, 3) \ in xor_sse_3()
185 " .align 32 ;\n" in xor_sse_3()
186 " 1: ;\n" in xor_sse_3()
193 " add %[inc], %[p1] ;\n" in xor_sse_3()
194 " add %[inc], %[p2] ;\n" in xor_sse_3()
195 " add %[inc], %[p3] ;\n" in xor_sse_3()
196 " dec %[cnt] ;\n" in xor_sse_3()
197 " jnz 1b ;\n" in xor_sse_3()
223 " .align 32 ;\n" in xor_sse_3_pf64()
224 " 1: ;\n" in xor_sse_3_pf64()
231 " add %[inc], %[p1] ;\n" in xor_sse_3_pf64()
232 " add %[inc], %[p2] ;\n" in xor_sse_3_pf64()
233 " add %[inc], %[p3] ;\n" in xor_sse_3_pf64()
234 " dec %[cnt] ;\n" in xor_sse_3_pf64()
235 " jnz 1b ;\n" in xor_sse_3_pf64()
262 LD(i + 3, 3) \ in xor_sse_4()
268 XO1(i + 3, 3) \ in xor_sse_4()
276 XO2(i + 3, 3) \ in xor_sse_4()
280 XO3(i + 3, 3) \ in xor_sse_4()
284 ST(i + 3, 3) \ in xor_sse_4()
290 " .align 32 ;\n" in xor_sse_4()
291 " 1: ;\n" in xor_sse_4()
298 " add %[inc], %[p1] ;\n" in xor_sse_4()
299 " add %[inc], %[p2] ;\n" in xor_sse_4()
300 " add %[inc], %[p3] ;\n" in xor_sse_4()
301 " add %[inc], %[p4] ;\n" in xor_sse_4()
302 " dec %[cnt] ;\n" in xor_sse_4()
303 " jnz 1b ;\n" in xor_sse_4()
331 " .align 32 ;\n" in xor_sse_4_pf64()
332 " 1: ;\n" in xor_sse_4_pf64()
339 " add %[inc], %[p1] ;\n" in xor_sse_4_pf64()
340 " add %[inc], %[p2] ;\n" in xor_sse_4_pf64()
341 " add %[inc], %[p3] ;\n" in xor_sse_4_pf64()
342 " add %[inc], %[p4] ;\n" in xor_sse_4_pf64()
343 " dec %[cnt] ;\n" in xor_sse_4_pf64()
344 " jnz 1b ;\n" in xor_sse_4_pf64()
372 LD(i + 3, 3) \ in xor_sse_5()
378 XO1(i + 3, 3) \ in xor_sse_5()
384 XO2(i + 3, 3) \ in xor_sse_5()
392 XO3(i + 3, 3) \ in xor_sse_5()
396 XO4(i + 3, 3) \ in xor_sse_5()
400 ST(i + 3, 3) \ in xor_sse_5()
406 " .align 32 ;\n" in xor_sse_5()
407 " 1: ;\n" in xor_sse_5()
414 " add %[inc], %[p1] ;\n" in xor_sse_5()
415 " add %[inc], %[p2] ;\n" in xor_sse_5()
416 " add %[inc], %[p3] ;\n" in xor_sse_5()
417 " add %[inc], %[p4] ;\n" in xor_sse_5()
418 " add %[inc], %[p5] ;\n" in xor_sse_5()
419 " dec %[cnt] ;\n" in xor_sse_5()
420 " jnz 1b ;\n" in xor_sse_5()
450 " .align 32 ;\n" in xor_sse_5_pf64()
451 " 1: ;\n" in xor_sse_5_pf64()
458 " add %[inc], %[p1] ;\n" in xor_sse_5_pf64()
459 " add %[inc], %[p2] ;\n" in xor_sse_5_pf64()
460 " add %[inc], %[p3] ;\n" in xor_sse_5_pf64()
461 " add %[inc], %[p4] ;\n" in xor_sse_5_pf64()
462 " add %[inc], %[p5] ;\n" in xor_sse_5_pf64()
463 " dec %[cnt] ;\n" in xor_sse_5_pf64()
464 " jnz 1b ;\n" in xor_sse_5_pf64()
474 .name = "prefetch64-sse",