1 /* Copyright (c) 2015, Google Inc.
2  *
3  * Permission to use, copy, modify, and/or distribute this software for any
4  * purpose with or without fee is hereby granted, provided that the above
5  * copyright notice and this permission notice appear in all copies.
6  *
7  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
8  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
9  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
10  * SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
11  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION
12  * OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN
13  * CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. */
14 
15 #include <ring-core/base.h>
16 
17 #include "../../internal.h"
18 
19 #if defined(__GNUC__)
20 #pragma GCC diagnostic ignored "-Wconversion"
21 #pragma GCC diagnostic ignored "-Wsign-conversion"
22 #endif
23 
24 
25 // This function looks at 5+1 scalar bits (5 current, 1 adjacent less
26 // significant bit), and recodes them into a signed digit for use in fast point
27 // multiplication: the use of signed rather than unsigned digits means that
28 // fewer points need to be precomputed, given that point inversion is easy (a
29 // precomputed point dP makes -dP available as well).
30 //
31 // BACKGROUND:
32 //
33 // Signed digits for multiplication were introduced by Booth ("A signed binary
34 // multiplication technique", Quart. Journ. Mech. and Applied Math., vol. IV,
35 // pt. 2 (1951), pp. 236-240), in that case for multiplication of integers.
36 // Booth's original encoding did not generally improve the density of nonzero
37 // digits over the binary representation, and was merely meant to simplify the
38 // handling of signed factors given in two's complement; but it has since been
39 // shown to be the basis of various signed-digit representations that do have
40 // further advantages, including the wNAF, using the following general
41 // approach:
42 //
43 // (1) Given a binary representation
44 //
45 //       b_k  ...  b_2  b_1  b_0,
46 //
47 //     of a nonnegative integer (b_k in {0, 1}), rewrite it in digits 0, 1, -1
48 //     by using bit-wise subtraction as follows:
49 //
50 //        b_k     b_(k-1)  ...  b_2  b_1  b_0
51 //      -         b_k      ...  b_3  b_2  b_1  b_0
52 //       -----------------------------------------
53 //        s_(k+1) s_k      ...  s_3  s_2  s_1  s_0
54 //
55 //     A left-shift followed by subtraction of the original value yields a new
56 //     representation of the same value, using signed bits s_i = b_(i-1) - b_i.
57 //     This representation from Booth's paper has since appeared in the
58 //     literature under a variety of different names including "reversed binary
59 //     form", "alternating greedy expansion", "mutual opposite form", and
60 //     "sign-alternating {+-1}-representation".
61 //
62 //     An interesting property is that among the nonzero bits, values 1 and -1
63 //     strictly alternate.
64 //
65 // (2) Various window schemes can be applied to the Booth representation of
66 //     integers: for example, right-to-left sliding windows yield the wNAF
67 //     (a signed-digit encoding independently discovered by various researchers
68 //     in the 1990s), and left-to-right sliding windows yield a left-to-right
69 //     equivalent of the wNAF (independently discovered by various researchers
70 //     around 2004).
71 //
72 // To prevent leaking information through side channels in point multiplication,
73 // we need to recode the given integer into a regular pattern: sliding windows
74 // as in wNAFs won't do, we need their fixed-window equivalent -- which is a few
75 // decades older: we'll be using the so-called "modified Booth encoding" due to
76 // MacSorley ("High-speed arithmetic in binary computers", Proc. IRE, vol. 49
77 // (1961), pp. 67-91), in a radix-2^5 setting.  That is, we always combine five
78 // signed bits into a signed digit:
79 //
80 //       s_(5j + 4) s_(5j + 3) s_(5j + 2) s_(5j + 1) s_(5j)
81 //
82 // The sign-alternating property implies that the resulting digit values are
83 // integers from -16 to 16.
84 //
85 // Of course, we don't actually need to compute the signed digits s_i as an
86 // intermediate step (that's just a nice way to see how this scheme relates
87 // to the wNAF): a direct computation obtains the recoded digit from the
88 // six bits b_(5j + 4) ... b_(5j - 1).
89 //
90 // This function takes those six bits as an integer (0 .. 63), writing the
91 // recoded digit to *sign (0 for positive, 1 for negative) and *digit (absolute
92 // value, in the range 0 .. 16).  Note that this integer essentially provides
93 // the input bits "shifted to the left" by one position: for example, the input
94 // to compute the least significant recoded digit, given that there's no bit
95 // b_-1, has to be b_4 b_3 b_2 b_1 b_0 0.
96 //
97 // DOUBLING CASE:
98 //
99 // Point addition formulas for short Weierstrass curves are often incomplete.
100 // Edge cases such as P + P or P + ∞ must be handled separately. This
101 // complicates constant-time requirements. P + ∞ cannot be avoided (any window
102 // may be zero) and is handled with constant-time selects. P + P (where P is not
103 // ∞) usually is not. Instead, windowing strategies are chosen to avoid this
104 // case. Whether this happens depends on the group order.
105 //
106 // Let w be the window width (in this function, w = 5). The non-trivial doubling
107 // case in single-point scalar multiplication may occur if and only if the
108 // 2^(w-1) bit of the group order is zero.
109 //
110 // Note the above only holds if the scalar is fully reduced and the group order
111 // is a prime that is much larger than 2^w. It also only holds when windows
112 // are applied from most significant to least significant, doubling between each
113 // window. It does not apply to more complex table strategies such as
114 // |EC_nistz256_method|.
115 //
116 // PROOF:
117 //
118 // Let n be the group order. Let l be the number of bits needed to represent n.
119 // Assume there exists some 0 <= k < n such that signed w-bit windowed
120 // multiplication hits the doubling case.
121 //
122 // Windowed multiplication consists of iterating over groups of s_i (defined
123 // above based on k's binary representation) from most to least significant. At
124 // iteration i (for i = ..., 3w, 2w, w, 0, starting from the most significant
125 // window), we:
126 //
127 //  1. Double the accumulator A, w times. Let A_i be the value of A at this
128 //     point.
129 //
130 //  2. Set A to T_i + A_i, where T_i is a precomputed multiple of P
131 //     corresponding to the window s_(i+w-1) ... s_i.
132 //
133 // Let j be the index such that A_j = T_j ≠ ∞. Looking at A_i and T_i as
134 // multiples of P, define a_i and t_i to be scalar coefficients of A_i and T_i.
135 // Thus a_j = t_j ≠ 0 (mod n). Note a_i and t_i may not be reduced mod n. t_i is
136 // the value of the w signed bits s_(i+w-1) ... s_i. a_i is computed as a_i =
137 // 2^w * (a_(i+w) + t_(i+w)).
138 //
139 // t_i is bounded by -2^(w-1) <= t_i <= 2^(w-1). Additionally, we may write it
140 // in terms of unsigned bits b_i. t_i consists of signed bits s_(i+w-1) ... s_i.
141 // This is computed as:
142 //
143 //         b_(i+w-2) b_(i+w-3)  ...  b_i      b_(i-1)
144 //      -  b_(i+w-1) b_(i+w-2)  ...  b_(i+1)  b_i
145 //       --------------------------------------------
146 //   t_i = s_(i+w-1) s_(i+w-2)  ...  s_(i+1)  s_i
147 //
148 // Observe that b_(i+w-2) through b_i occur in both terms. Let x be the integer
149 // represented by that bit string, i.e. 2^(w-2)*b_(i+w-2) + ... + b_i.
150 //
151 //   t_i = (2*x + b_(i-1)) - (2^(w-1)*b_(i+w-1) + x)
152 //       = x - 2^(w-1)*b_(i+w-1) + b_(i-1)
153 //
154 // Or, using C notation for bit operations:
155 //
156 //   t_i = (k>>i) & ((1<<(w-1)) - 1) - (k>>i) & (1<<(w-1)) + (k>>(i-1)) & 1
157 //
158 // Note b_(i-1) is added in left-shifted by one (or doubled) from its place.
159 // This is compensated by t_(i-w)'s subtraction term. Thus, a_i may be computed
160 // by adding b_l b_(l-1) ... b_(i+1) b_i and an extra copy of b_(i-1). In C
161 // notation, this is:
162 //
163 //   a_i = (k>>(i+w)) << w + ((k>>(i+w-1)) & 1) << w
164 //
165 // Observe that, while t_i may be positive or negative, a_i is bounded by
166 // 0 <= a_i < n + 2^w. Additionally, a_i can only be zero if b_(i+w-1) and up
167 // are all zero. (Note this implies a non-trivial P + (-P) is unreachable for
168 // all groups. That would imply the subsequent a_i is zero, which means all
169 // terms thus far were zero.)
170 //
171 // Returning to our doubling position, we have a_j = t_j (mod n). We now
172 // determine the value of a_j - t_j, which must be divisible by n. Our bounds on
173 // a_j and t_j imply a_j - t_j is 0 or n. If it is 0, a_j = t_j. However, 2^w
174 // divides a_j and -2^(w-1) <= t_j <= 2^(w-1), so this can only happen if
175 // a_j = t_j = 0, which is a trivial doubling. Therefore, a_j - t_j = n.
176 //
177 // Now we determine j. Suppose j > 0. w divides j, so j >= w. Then,
178 //
179 //   n = a_j - t_j = (k>>(j+w)) << w + ((k>>(j+w-1)) & 1) << w - t_j
180 //                <= k/2^j + 2^w - t_j
181 //                 < n/2^w + 2^w + 2^(w-1)
182 //
183 // n is much larger than 2^w, so this is impossible. Thus, j = 0: only the final
184 // addition may hit the doubling case.
185 //
186 // Finally, we consider bit patterns for n and k. Divide k into k_H + k_M + k_L
187 // such that k_H is the contribution from b_(l-1) .. b_w, k_M is the
188 // contribution from b_(w-1), and k_L is the contribution from b_(w-2) ... b_0.
189 // That is:
190 //
191 // - 2^w divides k_H
192 // - k_M is 0 or 2^(w-1)
193 // - 0 <= k_L < 2^(w-1)
194 //
195 // Divide n into n_H + n_M + n_L similarly. We thus have:
196 //
197 //   t_0 = (k>>0) & ((1<<(w-1)) - 1) - (k>>0) & (1<<(w-1)) + (k>>(0-1)) & 1
198 //       = k & ((1<<(w-1)) - 1) - k & (1<<(w-1))
199 //       = k_L - k_M
200 //
201 //   a_0 = (k>>(0+w)) << w + ((k>>(0+w-1)) & 1) << w
202 //       = (k>>w) << w + ((k>>(w-1)) & 1) << w
203 //       = k_H + 2*k_M
204 //
205 //                 n = a_0 - t_0
206 //   n_H + n_M + n_L = (k_H + 2*k_M) - (k_L - k_M)
207 //                   = k_H + 3*k_M - k_L
208 //
209 // k_H - k_L < k and k < n, so k_H - k_L ≠ n. Therefore k_M is not 0 and must be
210 // 2^(w-1). Now we consider k_H and n_H. We know k_H <= n_H. Suppose k_H = n_H.
211 // Then,
212 //
213 //   n_M + n_L = 3*(2^(w-1)) - k_L
214 //             > 3*(2^(w-1)) - 2^(w-1)
215 //             = 2^w
216 //
217 // Contradiction (n_M + n_L is the bottom w bits of n). Thus k_H < n_H. Suppose
218 // k_H < n_H - 2*2^w. Then,
219 //
220 //   n_H + n_M + n_L = k_H + 3*(2^(w-1)) - k_L
221 //                   < n_H - 2*2^w + 3*(2^(w-1)) - k_L
222 //         n_M + n_L < -2^(w-1) - k_L
223 //
224 // Contradiction. Thus, k_H = n_H - 2^w. (Note 2^w divides n_H and k_H.) Thus,
225 //
226 //   n_H + n_M + n_L = k_H + 3*(2^(w-1)) - k_L
227 //                   = n_H - 2^w + 3*(2^(w-1)) - k_L
228 //         n_M + n_L = 2^(w-1) - k_L
229 //                  <= 2^(w-1)
230 //
231 // Equality would mean 2^(w-1) divides n, which is impossible if n is prime.
232 // Thus n_M + n_L < 2^(w-1), so n_M is zero, proving our condition.
233 //
234 // This proof constructs k, so, to show the converse, let k_H = n_H - 2^w,
235 // k_M = 2^(w-1), k_L = 2^(w-1) - n_L. This will result in a non-trivial point
236 // doubling in the final addition and is the only such scalar.
237 //
238 // COMMON CURVES:
239 //
240 // The group orders for common curves end in the following bit patterns:
241 //
242 //   P-521: ...00001001; w = 4 is okay
243 //   P-384: ...01110011; w = 2, 5, 6, 7 are okay
244 //   P-256: ...01010001; w = 5, 7 are okay
245 //   P-224: ...00111101; w = 3, 4, 5, 6 are okay
recode_scalar_bits(crypto_word_t * sign,crypto_word_t * digit,crypto_word_t in)246 static inline void recode_scalar_bits(crypto_word_t *sign, crypto_word_t *digit,
247                                       crypto_word_t in) {
248   crypto_word_t s, d;
249 
250   s = ~((in >> 5) - 1); /* sets all bits to MSB(in), 'in' seen as
251                           * 6-bit value */
252   d = (1 << 6) - in - 1;
253   d = (d & s) | (in & ~s);
254   d = (d >> 1) + (d & 1);
255 
256   *sign = s & 1;
257   *digit = d;
258 }
259