1*8617a60dSAndroid Build Coastguard Worker /* Copyright 2024 The ChromiumOS Authors
2*8617a60dSAndroid Build Coastguard Worker * Use of this source code is governed by a BSD-style license that can be
3*8617a60dSAndroid Build Coastguard Worker * found in the LICENSE file.
4*8617a60dSAndroid Build Coastguard Worker */
5*8617a60dSAndroid Build Coastguard Worker
6*8617a60dSAndroid Build Coastguard Worker #include "2common.h"
7*8617a60dSAndroid Build Coastguard Worker #include "2rsa.h"
8*8617a60dSAndroid Build Coastguard Worker
9*8617a60dSAndroid Build Coastguard Worker /**
10*8617a60dSAndroid Build Coastguard Worker * Montgomery c[] = d[] - e[] if d[] > e[], c[] = d[] - e[] + n[] otherwise.
11*8617a60dSAndroid Build Coastguard Worker * Uses "Subtract with Carry" and "Add with Carry" instructions to optimize BigNum
12*8617a60dSAndroid Build Coastguard Worker * arithmetic. e[] will be overwritten with intermediate results.
13*8617a60dSAndroid Build Coastguard Worker */
sub_mod(uint32_t * c,uint32_t * ed,const uint32_t * n,const uint32_t arrsize)14*8617a60dSAndroid Build Coastguard Worker static void sub_mod(uint32_t *c, uint32_t *ed, const uint32_t *n, const uint32_t arrsize)
15*8617a60dSAndroid Build Coastguard Worker {
16*8617a60dSAndroid Build Coastguard Worker uint32_t borrow, tmp1, tmp2, i;
17*8617a60dSAndroid Build Coastguard Worker
18*8617a60dSAndroid Build Coastguard Worker /* e[] = d[] - e[] */
19*8617a60dSAndroid Build Coastguard Worker uint32_t size_clobber = arrsize;
20*8617a60dSAndroid Build Coastguard Worker uint32_t *ed_clobber = ed;
21*8617a60dSAndroid Build Coastguard Worker asm (
22*8617a60dSAndroid Build Coastguard Worker "subs wzr, wzr, wzr\n\t" /* init carry flag for subtraction */
23*8617a60dSAndroid Build Coastguard Worker "1:\n\t"
24*8617a60dSAndroid Build Coastguard Worker "ldp %w[e], %w[d], [%[ed_ptr]]\n\t"
25*8617a60dSAndroid Build Coastguard Worker "sbcs %w[e], %w[d], %w[e]\n\t"
26*8617a60dSAndroid Build Coastguard Worker "str %w[e], [%[ed_ptr]], #8\n\t"
27*8617a60dSAndroid Build Coastguard Worker "sub %w[size], %w[size], #1\n\t"
28*8617a60dSAndroid Build Coastguard Worker "cbnz %w[size], 1b\n\t"
29*8617a60dSAndroid Build Coastguard Worker "cset %w[e], cc\n\t" /* "borrow" = carry flag is 0 (cleared) */
30*8617a60dSAndroid Build Coastguard Worker : [e] "=r" (borrow),
31*8617a60dSAndroid Build Coastguard Worker [d] "=r" (tmp1),
32*8617a60dSAndroid Build Coastguard Worker [size] "+r" (size_clobber),
33*8617a60dSAndroid Build Coastguard Worker [ed_ptr] "+r" (ed_clobber)
34*8617a60dSAndroid Build Coastguard Worker :: "cc", "memory"
35*8617a60dSAndroid Build Coastguard Worker );
36*8617a60dSAndroid Build Coastguard Worker
37*8617a60dSAndroid Build Coastguard Worker if (borrow) {
38*8617a60dSAndroid Build Coastguard Worker /* e[] = e[] + n[] */
39*8617a60dSAndroid Build Coastguard Worker size_clobber = arrsize;
40*8617a60dSAndroid Build Coastguard Worker ed_clobber = ed;
41*8617a60dSAndroid Build Coastguard Worker asm volatile (
42*8617a60dSAndroid Build Coastguard Worker "adds wzr, wzr, wzr\n\t" /* init carry flag for addition */
43*8617a60dSAndroid Build Coastguard Worker "1:\n\t"
44*8617a60dSAndroid Build Coastguard Worker "ldr %w[e], [%[ed_ptr]]\n\t"
45*8617a60dSAndroid Build Coastguard Worker "ldr %w[n], [%[n_ptr]], #4\n\t"
46*8617a60dSAndroid Build Coastguard Worker "adcs %w[e], %w[e], %w[n]\n\t"
47*8617a60dSAndroid Build Coastguard Worker "str %w[e], [%[ed_ptr]], #8\n\t"
48*8617a60dSAndroid Build Coastguard Worker "sub %w[size], %w[size], #1\n\t"
49*8617a60dSAndroid Build Coastguard Worker "cbnz %w[size], 1b\n\t"
50*8617a60dSAndroid Build Coastguard Worker : [e] "=r" (tmp1),
51*8617a60dSAndroid Build Coastguard Worker [n] "=r" (tmp2),
52*8617a60dSAndroid Build Coastguard Worker [size] "+r" (size_clobber),
53*8617a60dSAndroid Build Coastguard Worker [ed_ptr] "+r" (ed_clobber),
54*8617a60dSAndroid Build Coastguard Worker [n_ptr] "+r" (n)
55*8617a60dSAndroid Build Coastguard Worker :: "cc", "memory"
56*8617a60dSAndroid Build Coastguard Worker );
57*8617a60dSAndroid Build Coastguard Worker }
58*8617a60dSAndroid Build Coastguard Worker
59*8617a60dSAndroid Build Coastguard Worker /* c[] = e[] */
60*8617a60dSAndroid Build Coastguard Worker for (i = 0; i < arrsize; i++)
61*8617a60dSAndroid Build Coastguard Worker c[i] = ed[i * 2];
62*8617a60dSAndroid Build Coastguard Worker }
63*8617a60dSAndroid Build Coastguard Worker
64*8617a60dSAndroid Build Coastguard Worker /**
65*8617a60dSAndroid Build Coastguard Worker * Montgomery c[] = a[] * b[] / R % mod (`ed` is a local scratch buffer)
66*8617a60dSAndroid Build Coastguard Worker *
67*8617a60dSAndroid Build Coastguard Worker * Algorithm according to https://eprint.iacr.org/2013/519.pdf and
68*8617a60dSAndroid Build Coastguard Worker * https://chromium-review.googlesource.com/5055251.
69*8617a60dSAndroid Build Coastguard Worker */
mont_mult(uint32_t * c,const uint32_t * a,const uint32_t * b,const uint32_t * n,uint32_t * ed,const uint32_t mu,const uint32_t arrsize)70*8617a60dSAndroid Build Coastguard Worker static void mont_mult(uint32_t *c,
71*8617a60dSAndroid Build Coastguard Worker const uint32_t *a,
72*8617a60dSAndroid Build Coastguard Worker const uint32_t *b,
73*8617a60dSAndroid Build Coastguard Worker const uint32_t *n,
74*8617a60dSAndroid Build Coastguard Worker uint32_t *ed,
75*8617a60dSAndroid Build Coastguard Worker const uint32_t mu,
76*8617a60dSAndroid Build Coastguard Worker const uint32_t arrsize)
77*8617a60dSAndroid Build Coastguard Worker {
78*8617a60dSAndroid Build Coastguard Worker const uint32_t mub0 = mu * b[0];
79*8617a60dSAndroid Build Coastguard Worker uint32_t i;
80*8617a60dSAndroid Build Coastguard Worker
81*8617a60dSAndroid Build Coastguard Worker memset(ed, 0, arrsize * sizeof(uint32_t) * 2);
82*8617a60dSAndroid Build Coastguard Worker
83*8617a60dSAndroid Build Coastguard Worker for (i = 0; i < arrsize; i++) {
84*8617a60dSAndroid Build Coastguard Worker const uint32_t c0 = ed[1] - ed[0];
85*8617a60dSAndroid Build Coastguard Worker const uint32_t muc0 = mu * c0;
86*8617a60dSAndroid Build Coastguard Worker const uint32_t a_i = a[i];
87*8617a60dSAndroid Build Coastguard Worker const uint32_t q = muc0 + mub0 * a_i;
88*8617a60dSAndroid Build Coastguard Worker const uint32_t *n_clobber = n;
89*8617a60dSAndroid Build Coastguard Worker const uint32_t *b_clobber = b;
90*8617a60dSAndroid Build Coastguard Worker void *ed_clobber = ed;
91*8617a60dSAndroid Build Coastguard Worker uint32_t size_clobber = arrsize - 1;
92*8617a60dSAndroid Build Coastguard Worker asm volatile (
93*8617a60dSAndroid Build Coastguard Worker /* v4.2d = always contains [0, 0] (for idempotent Add High Narrow) */
94*8617a60dSAndroid Build Coastguard Worker "movi v4.2d, #0\n\t"
95*8617a60dSAndroid Build Coastguard Worker /* v3.2s = "mul" = [q, a[i]] */
96*8617a60dSAndroid Build Coastguard Worker "fmov s3, %w[q]\n\t"
97*8617a60dSAndroid Build Coastguard Worker "mov v3.s[1], %w[a_i]\n\t"
98*8617a60dSAndroid Build Coastguard Worker /* v1.2s = "bmod" = [n[0], b[0]] */
99*8617a60dSAndroid Build Coastguard Worker "ldr s1, [%[n]], #4\n\t"
100*8617a60dSAndroid Build Coastguard Worker "ld1 {v1.s}[1], [%[b]], #4\n\t"
101*8617a60dSAndroid Build Coastguard Worker /* v2.2s = [e, d] */
102*8617a60dSAndroid Build Coastguard Worker "ldr d2, [%[ed]]\n\t"
103*8617a60dSAndroid Build Coastguard Worker "uxtl v2.2d, v2.2s\n\t"
104*8617a60dSAndroid Build Coastguard Worker /* v2.2d = "p01" = ed + bmod * mul */
105*8617a60dSAndroid Build Coastguard Worker "umlal v2.2d, v1.2s, v3.2s\n\t"
106*8617a60dSAndroid Build Coastguard Worker /* v2.2d = "t01" = MSB-half(p01) */
107*8617a60dSAndroid Build Coastguard Worker "addhn v2.2s, v2.2d, v4.2d\n\t"
108*8617a60dSAndroid Build Coastguard Worker /* for (j = 1; j < arrsize - 1; j++) */
109*8617a60dSAndroid Build Coastguard Worker "1:"
110*8617a60dSAndroid Build Coastguard Worker /* v0.2d = zero-extend(ed + t01) */
111*8617a60dSAndroid Build Coastguard Worker "ldr d0, [%[ed], #8]\n\t"
112*8617a60dSAndroid Build Coastguard Worker "uaddl v0.2d, v0.2s, v2.2s\n\t"
113*8617a60dSAndroid Build Coastguard Worker /* v1.2s = "bmod" = [n[j], b[j]] */
114*8617a60dSAndroid Build Coastguard Worker "ldr s1, [%[n]], #4\n\t"
115*8617a60dSAndroid Build Coastguard Worker "ld1 {v1.s}[1], [%[b]], #4\n\t"
116*8617a60dSAndroid Build Coastguard Worker /* v0.2d = "p01" = ed[j] + t01 + bmod * mul */
117*8617a60dSAndroid Build Coastguard Worker "umlal v0.2d, v1.2s, v3.2s\n\t"
118*8617a60dSAndroid Build Coastguard Worker /* v2.2s = "t01" = MSB-half(p01) */
119*8617a60dSAndroid Build Coastguard Worker "addhn v2.2s, v0.2d, v4.2d\n\t"
120*8617a60dSAndroid Build Coastguard Worker /* store ed[j - 1] = LSB-half(p01) */
121*8617a60dSAndroid Build Coastguard Worker "xtn v0.2s, v0.2d\n\t"
122*8617a60dSAndroid Build Coastguard Worker "str d0, [%[ed]], #8\n\t"
123*8617a60dSAndroid Build Coastguard Worker "subs %w[size], %w[size], #1\n\t"
124*8617a60dSAndroid Build Coastguard Worker "b.hi 1b\n\t"
125*8617a60dSAndroid Build Coastguard Worker /* store ed[arrsize - 1] = final t01 */
126*8617a60dSAndroid Build Coastguard Worker "str d2, [%[ed]]\n\t"
127*8617a60dSAndroid Build Coastguard Worker : [ed] "+r" (ed_clobber),
128*8617a60dSAndroid Build Coastguard Worker [n] "+r" (n_clobber),
129*8617a60dSAndroid Build Coastguard Worker [b] "+r" (b_clobber),
130*8617a60dSAndroid Build Coastguard Worker [size] "+r" (size_clobber)
131*8617a60dSAndroid Build Coastguard Worker : [q] "r" (q),
132*8617a60dSAndroid Build Coastguard Worker [a_i] "r" (a_i)
133*8617a60dSAndroid Build Coastguard Worker : "v0", "v1","v2", "v3", "v4", "cc", "memory"
134*8617a60dSAndroid Build Coastguard Worker );
135*8617a60dSAndroid Build Coastguard Worker }
136*8617a60dSAndroid Build Coastguard Worker
137*8617a60dSAndroid Build Coastguard Worker sub_mod(c, ed, n, arrsize);
138*8617a60dSAndroid Build Coastguard Worker }
139*8617a60dSAndroid Build Coastguard Worker
swap_bignumber_endianness(const void * in,void * out,size_t size_bytes)140*8617a60dSAndroid Build Coastguard Worker static void swap_bignumber_endianness(const void *in, void *out, size_t size_bytes)
141*8617a60dSAndroid Build Coastguard Worker {
142*8617a60dSAndroid Build Coastguard Worker const void *in_end = in + size_bytes;
143*8617a60dSAndroid Build Coastguard Worker
144*8617a60dSAndroid Build Coastguard Worker /* REV64 can only swap within each 8-byte half of the 16-byte register, so use a
145*8617a60dSAndroid Build Coastguard Worker transposed STP to do the final swap of the two halves afterwards. */
146*8617a60dSAndroid Build Coastguard Worker asm volatile (
147*8617a60dSAndroid Build Coastguard Worker "1:\n\t"
148*8617a60dSAndroid Build Coastguard Worker "ldr q0, [%[in], #-16]!\n\t"
149*8617a60dSAndroid Build Coastguard Worker "rev64 v0.16b, v0.16b\n\t"
150*8617a60dSAndroid Build Coastguard Worker "mov d1, v0.d[1]\n\t"
151*8617a60dSAndroid Build Coastguard Worker "stp d1, d0, [%[out]], #16\n\t"
152*8617a60dSAndroid Build Coastguard Worker "subs %[size], %[size], #16\n\t"
153*8617a60dSAndroid Build Coastguard Worker "b.hi 1b\n\t"
154*8617a60dSAndroid Build Coastguard Worker : [in] "+r" (in_end),
155*8617a60dSAndroid Build Coastguard Worker [out] "+r" (out),
156*8617a60dSAndroid Build Coastguard Worker [size] "+r" (size_bytes)
157*8617a60dSAndroid Build Coastguard Worker :: "v0", "v1", "cc", "memory"
158*8617a60dSAndroid Build Coastguard Worker );
159*8617a60dSAndroid Build Coastguard Worker }
160*8617a60dSAndroid Build Coastguard Worker
vb2ex_hwcrypto_modexp(const struct vb2_public_key * key,uint8_t * inout,void * workbuf,size_t workbuf_size,int exp)161*8617a60dSAndroid Build Coastguard Worker vb2_error_t vb2ex_hwcrypto_modexp(const struct vb2_public_key *key,
162*8617a60dSAndroid Build Coastguard Worker uint8_t *inout, void *workbuf,
163*8617a60dSAndroid Build Coastguard Worker size_t workbuf_size, int exp)
164*8617a60dSAndroid Build Coastguard Worker {
165*8617a60dSAndroid Build Coastguard Worker const uint32_t mu = -key->n0inv;
166*8617a60dSAndroid Build Coastguard Worker const uint32_t *n = key->n;
167*8617a60dSAndroid Build Coastguard Worker const uint32_t arrsize = key->arrsize;
168*8617a60dSAndroid Build Coastguard Worker uint32_t *a = workbuf;
169*8617a60dSAndroid Build Coastguard Worker uint32_t *aR = (void *)inout; /* Re-use location. */
170*8617a60dSAndroid Build Coastguard Worker uint32_t *aaR = a + arrsize;
171*8617a60dSAndroid Build Coastguard Worker uint32_t *aaa = aaR; /* Re-use location. */
172*8617a60dSAndroid Build Coastguard Worker uint32_t *ed = aaR + arrsize; /* 8-byte align guaranteed by VB2_WORKBUF_ALIGN */
173*8617a60dSAndroid Build Coastguard Worker uint32_t i;
174*8617a60dSAndroid Build Coastguard Worker
175*8617a60dSAndroid Build Coastguard Worker if (exp != 65537 || arrsize % 16 != 0 ||
176*8617a60dSAndroid Build Coastguard Worker (void *)&ed[arrsize * 2] - workbuf > workbuf_size)
177*8617a60dSAndroid Build Coastguard Worker return VB2_ERROR_EX_HWCRYPTO_UNSUPPORTED;
178*8617a60dSAndroid Build Coastguard Worker
179*8617a60dSAndroid Build Coastguard Worker /* Convert from big endian byte array to little endian word array. */
180*8617a60dSAndroid Build Coastguard Worker swap_bignumber_endianness(inout, a, arrsize * sizeof(uint32_t));
181*8617a60dSAndroid Build Coastguard Worker
182*8617a60dSAndroid Build Coastguard Worker mont_mult(aR, a, key->rr, n, ed, mu, arrsize); /* aR = a * RR / R mod M */
183*8617a60dSAndroid Build Coastguard Worker for (i = 0; i < 16; i += 2) {
184*8617a60dSAndroid Build Coastguard Worker mont_mult(aaR, aR, aR, n, ed, mu, arrsize); /* aaR = aR * aR / R mod M */
185*8617a60dSAndroid Build Coastguard Worker mont_mult(aR, aaR, aaR, n, ed, mu, arrsize); /* aR = aaR * aaR / R mod M */
186*8617a60dSAndroid Build Coastguard Worker }
187*8617a60dSAndroid Build Coastguard Worker mont_mult(aaa, aR, a, n, ed, mu, arrsize); /* aaa = aR * a / R mod M */
188*8617a60dSAndroid Build Coastguard Worker
189*8617a60dSAndroid Build Coastguard Worker /* Convert back to bigendian byte array */
190*8617a60dSAndroid Build Coastguard Worker swap_bignumber_endianness(aaa, inout, arrsize * sizeof(uint32_t));
191*8617a60dSAndroid Build Coastguard Worker
192*8617a60dSAndroid Build Coastguard Worker return VB2_SUCCESS;
193*8617a60dSAndroid Build Coastguard Worker }
194