1// Copyright 2019 The Go Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style
3// license that can be found in the LICENSE file.
4
5//go:build gc && !purego
6
7#include "textflag.h"
8
9// This was ported from the amd64 implementation.
10
11#define POLY1305_ADD(msg, h0, h1, h2, t0, t1, t2) \
12	MOVD (msg), t0;  \
13	MOVD 8(msg), t1; \
14	MOVD $1, t2;     \
15	ADDC t0, h0, h0; \
16	ADDE t1, h1, h1; \
17	ADDE t2, h2;     \
18	ADD  $16, msg
19
20#define POLY1305_MUL(h0, h1, h2, r0, r1, t0, t1, t2, t3, t4, t5) \
21	MULLD  r0, h0, t0;  \
22	MULHDU r0, h0, t1;  \
23	MULLD  r0, h1, t4;  \
24	MULHDU r0, h1, t5;  \
25	ADDC   t4, t1, t1;  \
26	MULLD  r0, h2, t2;  \
27	MULHDU r1, h0, t4;  \
28	MULLD  r1, h0, h0;  \
29	ADDE   t5, t2, t2;  \
30	ADDC   h0, t1, t1;  \
31	MULLD  h2, r1, t3;  \
32	ADDZE  t4, h0;      \
33	MULHDU r1, h1, t5;  \
34	MULLD  r1, h1, t4;  \
35	ADDC   t4, t2, t2;  \
36	ADDE   t5, t3, t3;  \
37	ADDC   h0, t2, t2;  \
38	MOVD   $-4, t4;     \
39	ADDZE  t3;          \
40	RLDICL $0, t2, $62, h2; \
41	AND    t2, t4, h0;  \
42	ADDC   t0, h0, h0;  \
43	ADDE   t3, t1, h1;  \
44	SLD    $62, t3, t4; \
45	SRD    $2, t2;      \
46	ADDZE  h2;          \
47	OR     t4, t2, t2;  \
48	SRD    $2, t3;      \
49	ADDC   t2, h0, h0;  \
50	ADDE   t3, h1, h1;  \
51	ADDZE  h2
52
53DATA ·poly1305Mask<>+0x00(SB)/8, $0x0FFFFFFC0FFFFFFF
54DATA ·poly1305Mask<>+0x08(SB)/8, $0x0FFFFFFC0FFFFFFC
55GLOBL ·poly1305Mask<>(SB), RODATA, $16
56
57// func update(state *[7]uint64, msg []byte)
58TEXT ·update(SB), $0-32
59	MOVD state+0(FP), R3
60	MOVD msg_base+8(FP), R4
61	MOVD msg_len+16(FP), R5
62
63	MOVD 0(R3), R8   // h0
64	MOVD 8(R3), R9   // h1
65	MOVD 16(R3), R10 // h2
66	MOVD 24(R3), R11 // r0
67	MOVD 32(R3), R12 // r1
68
69	CMP R5, $16
70	BLT bytes_between_0_and_15
71
72loop:
73	POLY1305_ADD(R4, R8, R9, R10, R20, R21, R22)
74
75	PCALIGN $16
76multiply:
77	POLY1305_MUL(R8, R9, R10, R11, R12, R16, R17, R18, R14, R20, R21)
78	ADD $-16, R5
79	CMP R5, $16
80	BGE loop
81
82bytes_between_0_and_15:
83	CMP  R5, $0
84	BEQ  done
85	MOVD $0, R16 // h0
86	MOVD $0, R17 // h1
87
88flush_buffer:
89	CMP R5, $8
90	BLE just1
91
92	MOVD $8, R21
93	SUB  R21, R5, R21
94
95	// Greater than 8 -- load the rightmost remaining bytes in msg
96	// and put into R17 (h1)
97	MOVD (R4)(R21), R17
98	MOVD $16, R22
99
100	// Find the offset to those bytes
101	SUB R5, R22, R22
102	SLD $3, R22
103
104	// Shift to get only the bytes in msg
105	SRD R22, R17, R17
106
107	// Put 1 at high end
108	MOVD $1, R23
109	SLD  $3, R21
110	SLD  R21, R23, R23
111	OR   R23, R17, R17
112
113	// Remainder is 8
114	MOVD $8, R5
115
116just1:
117	CMP R5, $8
118	BLT less8
119
120	// Exactly 8
121	MOVD (R4), R16
122
123	CMP R17, $0
124
125	// Check if we've already set R17; if not
126	// set 1 to indicate end of msg.
127	BNE  carry
128	MOVD $1, R17
129	BR   carry
130
131less8:
132	MOVD  $0, R16   // h0
133	MOVD  $0, R22   // shift count
134	CMP   R5, $4
135	BLT   less4
136	MOVWZ (R4), R16
137	ADD   $4, R4
138	ADD   $-4, R5
139	MOVD  $32, R22
140
141less4:
142	CMP   R5, $2
143	BLT   less2
144	MOVHZ (R4), R21
145	SLD   R22, R21, R21
146	OR    R16, R21, R16
147	ADD   $16, R22
148	ADD   $-2, R5
149	ADD   $2, R4
150
151less2:
152	CMP   R5, $0
153	BEQ   insert1
154	MOVBZ (R4), R21
155	SLD   R22, R21, R21
156	OR    R16, R21, R16
157	ADD   $8, R22
158
159insert1:
160	// Insert 1 at end of msg
161	MOVD $1, R21
162	SLD  R22, R21, R21
163	OR   R16, R21, R16
164
165carry:
166	// Add new values to h0, h1, h2
167	ADDC  R16, R8
168	ADDE  R17, R9
169	ADDZE R10, R10
170	MOVD  $16, R5
171	ADD   R5, R4
172	BR    multiply
173
174done:
175	// Save h0, h1, h2 in state
176	MOVD R8, 0(R3)
177	MOVD R9, 8(R3)
178	MOVD R10, 16(R3)
179	RET
180