1// Copyright 2015 The Go Authors. All rights reserved. 2// Use of this source code is governed by a BSD-style 3// license that can be found in the LICENSE file. 4 5// This file contains the Go wrapper for the constant-time, 64-bit assembly 6// implementation of P256. The optimizations performed here are described in 7// detail in: 8// S.Gueron and V.Krasnov, "Fast prime field elliptic-curve cryptography with 9// 256-bit primes" 10// https://link.springer.com/article/10.1007%2Fs13389-014-0090-x 11// https://eprint.iacr.org/2013/816.pdf 12 13//go:build (amd64 || arm64 || ppc64le || s390x) && !purego 14 15package nistec 16 17import ( 18 _ "embed" 19 "errors" 20 "internal/byteorder" 21 "math/bits" 22 "runtime" 23 "unsafe" 24) 25 26// p256Element is a P-256 base field element in [0, P-1] in the Montgomery 27// domain (with R 2²⁵⁶) as four limbs in little-endian order value. 28type p256Element [4]uint64 29 30// p256One is one in the Montgomery domain. 31var p256One = p256Element{0x0000000000000001, 0xffffffff00000000, 32 0xffffffffffffffff, 0x00000000fffffffe} 33 34var p256Zero = p256Element{} 35 36// p256P is 2²⁵⁶ - 2²²⁴ + 2¹⁹² + 2⁹⁶ - 1 in the Montgomery domain. 37var p256P = p256Element{0xffffffffffffffff, 0x00000000ffffffff, 38 0x0000000000000000, 0xffffffff00000001} 39 40// P256Point is a P-256 point. The zero value should not be assumed to be valid 41// (although it is in this implementation). 42type P256Point struct { 43 // (X:Y:Z) are Jacobian coordinates where x = X/Z² and y = Y/Z³. The point 44 // at infinity can be represented by any set of coordinates with Z = 0. 45 x, y, z p256Element 46} 47 48// NewP256Point returns a new P256Point representing the point at infinity. 49func NewP256Point() *P256Point { 50 return &P256Point{ 51 x: p256One, y: p256One, z: p256Zero, 52 } 53} 54 55// SetGenerator sets p to the canonical generator and returns p. 56func (p *P256Point) SetGenerator() *P256Point { 57 p.x = p256Element{0x79e730d418a9143c, 0x75ba95fc5fedb601, 58 0x79fb732b77622510, 0x18905f76a53755c6} 59 p.y = p256Element{0xddf25357ce95560a, 0x8b4ab8e4ba19e45c, 60 0xd2e88688dd21f325, 0x8571ff1825885d85} 61 p.z = p256One 62 return p 63} 64 65// Set sets p = q and returns p. 66func (p *P256Point) Set(q *P256Point) *P256Point { 67 p.x, p.y, p.z = q.x, q.y, q.z 68 return p 69} 70 71const p256ElementLength = 32 72const p256UncompressedLength = 1 + 2*p256ElementLength 73const p256CompressedLength = 1 + p256ElementLength 74 75// SetBytes sets p to the compressed, uncompressed, or infinity value encoded in 76// b, as specified in SEC 1, Version 2.0, Section 2.3.4. If the point is not on 77// the curve, it returns nil and an error, and the receiver is unchanged. 78// Otherwise, it returns p. 79func (p *P256Point) SetBytes(b []byte) (*P256Point, error) { 80 // p256Mul operates in the Montgomery domain with R = 2²⁵⁶ mod p. Thus rr 81 // here is R in the Montgomery domain, or R×R mod p. See comment in 82 // P256OrdInverse about how this is used. 83 rr := p256Element{0x0000000000000003, 0xfffffffbffffffff, 84 0xfffffffffffffffe, 0x00000004fffffffd} 85 86 switch { 87 // Point at infinity. 88 case len(b) == 1 && b[0] == 0: 89 return p.Set(NewP256Point()), nil 90 91 // Uncompressed form. 92 case len(b) == p256UncompressedLength && b[0] == 4: 93 var r P256Point 94 p256BigToLittle(&r.x, (*[32]byte)(b[1:33])) 95 p256BigToLittle(&r.y, (*[32]byte)(b[33:65])) 96 if p256LessThanP(&r.x) == 0 || p256LessThanP(&r.y) == 0 { 97 return nil, errors.New("invalid P256 element encoding") 98 } 99 p256Mul(&r.x, &r.x, &rr) 100 p256Mul(&r.y, &r.y, &rr) 101 if err := p256CheckOnCurve(&r.x, &r.y); err != nil { 102 return nil, err 103 } 104 r.z = p256One 105 return p.Set(&r), nil 106 107 // Compressed form. 108 case len(b) == p256CompressedLength && (b[0] == 2 || b[0] == 3): 109 var r P256Point 110 p256BigToLittle(&r.x, (*[32]byte)(b[1:33])) 111 if p256LessThanP(&r.x) == 0 { 112 return nil, errors.New("invalid P256 element encoding") 113 } 114 p256Mul(&r.x, &r.x, &rr) 115 116 // y² = x³ - 3x + b 117 p256Polynomial(&r.y, &r.x) 118 if !p256Sqrt(&r.y, &r.y) { 119 return nil, errors.New("invalid P256 compressed point encoding") 120 } 121 122 // Select the positive or negative root, as indicated by the least 123 // significant bit, based on the encoding type byte. 124 yy := new(p256Element) 125 p256FromMont(yy, &r.y) 126 cond := int(yy[0]&1) ^ int(b[0]&1) 127 p256NegCond(&r.y, cond) 128 129 r.z = p256One 130 return p.Set(&r), nil 131 132 default: 133 return nil, errors.New("invalid P256 point encoding") 134 } 135} 136 137// p256Polynomial sets y2 to x³ - 3x + b, and returns y2. 138func p256Polynomial(y2, x *p256Element) *p256Element { 139 x3 := new(p256Element) 140 p256Sqr(x3, x, 1) 141 p256Mul(x3, x3, x) 142 143 threeX := new(p256Element) 144 p256Add(threeX, x, x) 145 p256Add(threeX, threeX, x) 146 p256NegCond(threeX, 1) 147 148 p256B := &p256Element{0xd89cdf6229c4bddf, 0xacf005cd78843090, 149 0xe5a220abf7212ed6, 0xdc30061d04874834} 150 151 p256Add(x3, x3, threeX) 152 p256Add(x3, x3, p256B) 153 154 *y2 = *x3 155 return y2 156} 157 158func p256CheckOnCurve(x, y *p256Element) error { 159 // y² = x³ - 3x + b 160 rhs := p256Polynomial(new(p256Element), x) 161 lhs := new(p256Element) 162 p256Sqr(lhs, y, 1) 163 if p256Equal(lhs, rhs) != 1 { 164 return errors.New("P256 point not on curve") 165 } 166 return nil 167} 168 169// p256LessThanP returns 1 if x < p, and 0 otherwise. Note that a p256Element is 170// not allowed to be equal to or greater than p, so if this function returns 0 171// then x is invalid. 172func p256LessThanP(x *p256Element) int { 173 var b uint64 174 _, b = bits.Sub64(x[0], p256P[0], b) 175 _, b = bits.Sub64(x[1], p256P[1], b) 176 _, b = bits.Sub64(x[2], p256P[2], b) 177 _, b = bits.Sub64(x[3], p256P[3], b) 178 return int(b) 179} 180 181// p256Add sets res = x + y. 182func p256Add(res, x, y *p256Element) { 183 var c, b uint64 184 t1 := make([]uint64, 4) 185 t1[0], c = bits.Add64(x[0], y[0], 0) 186 t1[1], c = bits.Add64(x[1], y[1], c) 187 t1[2], c = bits.Add64(x[2], y[2], c) 188 t1[3], c = bits.Add64(x[3], y[3], c) 189 t2 := make([]uint64, 4) 190 t2[0], b = bits.Sub64(t1[0], p256P[0], 0) 191 t2[1], b = bits.Sub64(t1[1], p256P[1], b) 192 t2[2], b = bits.Sub64(t1[2], p256P[2], b) 193 t2[3], b = bits.Sub64(t1[3], p256P[3], b) 194 // Three options: 195 // - a+b < p 196 // then c is 0, b is 1, and t1 is correct 197 // - p <= a+b < 2^256 198 // then c is 0, b is 0, and t2 is correct 199 // - 2^256 <= a+b 200 // then c is 1, b is 1, and t2 is correct 201 t2Mask := (c ^ b) - 1 202 res[0] = (t1[0] & ^t2Mask) | (t2[0] & t2Mask) 203 res[1] = (t1[1] & ^t2Mask) | (t2[1] & t2Mask) 204 res[2] = (t1[2] & ^t2Mask) | (t2[2] & t2Mask) 205 res[3] = (t1[3] & ^t2Mask) | (t2[3] & t2Mask) 206} 207 208// p256Sqrt sets e to a square root of x. If x is not a square, p256Sqrt returns 209// false and e is unchanged. e and x can overlap. 210func p256Sqrt(e, x *p256Element) (isSquare bool) { 211 t0, t1 := new(p256Element), new(p256Element) 212 213 // Since p = 3 mod 4, exponentiation by (p + 1) / 4 yields a square root candidate. 214 // 215 // The sequence of 7 multiplications and 253 squarings is derived from the 216 // following addition chain generated with github.com/mmcloughlin/addchain v0.4.0. 217 // 218 // _10 = 2*1 219 // _11 = 1 + _10 220 // _1100 = _11 << 2 221 // _1111 = _11 + _1100 222 // _11110000 = _1111 << 4 223 // _11111111 = _1111 + _11110000 224 // x16 = _11111111 << 8 + _11111111 225 // x32 = x16 << 16 + x16 226 // return ((x32 << 32 + 1) << 96 + 1) << 94 227 // 228 p256Sqr(t0, x, 1) 229 p256Mul(t0, x, t0) 230 p256Sqr(t1, t0, 2) 231 p256Mul(t0, t0, t1) 232 p256Sqr(t1, t0, 4) 233 p256Mul(t0, t0, t1) 234 p256Sqr(t1, t0, 8) 235 p256Mul(t0, t0, t1) 236 p256Sqr(t1, t0, 16) 237 p256Mul(t0, t0, t1) 238 p256Sqr(t0, t0, 32) 239 p256Mul(t0, x, t0) 240 p256Sqr(t0, t0, 96) 241 p256Mul(t0, x, t0) 242 p256Sqr(t0, t0, 94) 243 244 p256Sqr(t1, t0, 1) 245 if p256Equal(t1, x) != 1 { 246 return false 247 } 248 *e = *t0 249 return true 250} 251 252// The following assembly functions are implemented in p256_asm_*.s 253 254// Montgomery multiplication. Sets res = in1 * in2 * R⁻¹ mod p. 255// 256//go:noescape 257func p256Mul(res, in1, in2 *p256Element) 258 259// Montgomery square, repeated n times (n >= 1). 260// 261//go:noescape 262func p256Sqr(res, in *p256Element, n int) 263 264// Montgomery multiplication by R⁻¹, or 1 outside the domain. 265// Sets res = in * R⁻¹, bringing res out of the Montgomery domain. 266// 267//go:noescape 268func p256FromMont(res, in *p256Element) 269 270// If cond is not 0, sets val = -val mod p. 271// 272//go:noescape 273func p256NegCond(val *p256Element, cond int) 274 275// If cond is 0, sets res = b, otherwise sets res = a. 276// 277//go:noescape 278func p256MovCond(res, a, b *P256Point, cond int) 279 280//go:noescape 281func p256BigToLittle(res *p256Element, in *[32]byte) 282 283//go:noescape 284func p256LittleToBig(res *[32]byte, in *p256Element) 285 286//go:noescape 287func p256OrdBigToLittle(res *p256OrdElement, in *[32]byte) 288 289//go:noescape 290func p256OrdLittleToBig(res *[32]byte, in *p256OrdElement) 291 292// p256Table is a table of the first 16 multiples of a point. Points are stored 293// at an index offset of -1 so [8]P is at index 7, P is at 0, and [16]P is at 15. 294// [0]P is the point at infinity and it's not stored. 295type p256Table [16]P256Point 296 297// p256Select sets res to the point at index idx in the table. 298// idx must be in [0, 15]. It executes in constant time. 299// 300//go:noescape 301func p256Select(res *P256Point, table *p256Table, idx int) 302 303// p256AffinePoint is a point in affine coordinates (x, y). x and y are still 304// Montgomery domain elements. The point can't be the point at infinity. 305type p256AffinePoint struct { 306 x, y p256Element 307} 308 309// p256AffineTable is a table of the first 32 multiples of a point. Points are 310// stored at an index offset of -1 like in p256Table, and [0]P is not stored. 311type p256AffineTable [32]p256AffinePoint 312 313// p256Precomputed is a series of precomputed multiples of G, the canonical 314// generator. The first p256AffineTable contains multiples of G. The second one 315// multiples of [2⁶]G, the third one of [2¹²]G, and so on, where each successive 316// table is the previous table doubled six times. Six is the width of the 317// sliding window used in p256ScalarMult, and having each table already 318// pre-doubled lets us avoid the doublings between windows entirely. This table 319// MUST NOT be modified, as it aliases into p256PrecomputedEmbed below. 320var p256Precomputed *[43]p256AffineTable 321 322//go:embed p256_asm_table.bin 323var p256PrecomputedEmbed string 324 325func init() { 326 p256PrecomputedPtr := (*unsafe.Pointer)(unsafe.Pointer(&p256PrecomputedEmbed)) 327 if runtime.GOARCH == "s390x" { 328 var newTable [43 * 32 * 2 * 4]uint64 329 for i, x := range (*[43 * 32 * 2 * 4][8]byte)(*p256PrecomputedPtr) { 330 newTable[i] = byteorder.LeUint64(x[:]) 331 } 332 newTablePtr := unsafe.Pointer(&newTable) 333 p256PrecomputedPtr = &newTablePtr 334 } 335 p256Precomputed = (*[43]p256AffineTable)(*p256PrecomputedPtr) 336} 337 338// p256SelectAffine sets res to the point at index idx in the table. 339// idx must be in [0, 31]. It executes in constant time. 340// 341//go:noescape 342func p256SelectAffine(res *p256AffinePoint, table *p256AffineTable, idx int) 343 344// Point addition with an affine point and constant time conditions. 345// If zero is 0, sets res = in2. If sel is 0, sets res = in1. 346// If sign is not 0, sets res = in1 + -in2. Otherwise, sets res = in1 + in2 347// 348//go:noescape 349func p256PointAddAffineAsm(res, in1 *P256Point, in2 *p256AffinePoint, sign, sel, zero int) 350 351// Point addition. Sets res = in1 + in2. Returns one if the two input points 352// were equal and zero otherwise. If in1 or in2 are the point at infinity, res 353// and the return value are undefined. 354// 355//go:noescape 356func p256PointAddAsm(res, in1, in2 *P256Point) int 357 358// Point doubling. Sets res = in + in. in can be the point at infinity. 359// 360//go:noescape 361func p256PointDoubleAsm(res, in *P256Point) 362 363// p256OrdElement is a P-256 scalar field element in [0, ord(G)-1] in the 364// Montgomery domain (with R 2²⁵⁶) as four uint64 limbs in little-endian order. 365type p256OrdElement [4]uint64 366 367// p256OrdReduce ensures s is in the range [0, ord(G)-1]. 368func p256OrdReduce(s *p256OrdElement) { 369 // Since 2 * ord(G) > 2²⁵⁶, we can just conditionally subtract ord(G), 370 // keeping the result if it doesn't underflow. 371 t0, b := bits.Sub64(s[0], 0xf3b9cac2fc632551, 0) 372 t1, b := bits.Sub64(s[1], 0xbce6faada7179e84, b) 373 t2, b := bits.Sub64(s[2], 0xffffffffffffffff, b) 374 t3, b := bits.Sub64(s[3], 0xffffffff00000000, b) 375 tMask := b - 1 // zero if subtraction underflowed 376 s[0] ^= (t0 ^ s[0]) & tMask 377 s[1] ^= (t1 ^ s[1]) & tMask 378 s[2] ^= (t2 ^ s[2]) & tMask 379 s[3] ^= (t3 ^ s[3]) & tMask 380} 381 382// Add sets q = p1 + p2, and returns q. The points may overlap. 383func (q *P256Point) Add(r1, r2 *P256Point) *P256Point { 384 var sum, double P256Point 385 r1IsInfinity := r1.isInfinity() 386 r2IsInfinity := r2.isInfinity() 387 pointsEqual := p256PointAddAsm(&sum, r1, r2) 388 p256PointDoubleAsm(&double, r1) 389 p256MovCond(&sum, &double, &sum, pointsEqual) 390 p256MovCond(&sum, r1, &sum, r2IsInfinity) 391 p256MovCond(&sum, r2, &sum, r1IsInfinity) 392 return q.Set(&sum) 393} 394 395// Double sets q = p + p, and returns q. The points may overlap. 396func (q *P256Point) Double(p *P256Point) *P256Point { 397 var double P256Point 398 p256PointDoubleAsm(&double, p) 399 return q.Set(&double) 400} 401 402// ScalarBaseMult sets r = scalar * generator, where scalar is a 32-byte big 403// endian value, and returns r. If scalar is not 32 bytes long, ScalarBaseMult 404// returns an error and the receiver is unchanged. 405func (r *P256Point) ScalarBaseMult(scalar []byte) (*P256Point, error) { 406 if len(scalar) != 32 { 407 return nil, errors.New("invalid scalar length") 408 } 409 scalarReversed := new(p256OrdElement) 410 p256OrdBigToLittle(scalarReversed, (*[32]byte)(scalar)) 411 p256OrdReduce(scalarReversed) 412 413 r.p256BaseMult(scalarReversed) 414 return r, nil 415} 416 417// ScalarMult sets r = scalar * q, where scalar is a 32-byte big endian value, 418// and returns r. If scalar is not 32 bytes long, ScalarBaseMult returns an 419// error and the receiver is unchanged. 420func (r *P256Point) ScalarMult(q *P256Point, scalar []byte) (*P256Point, error) { 421 if len(scalar) != 32 { 422 return nil, errors.New("invalid scalar length") 423 } 424 scalarReversed := new(p256OrdElement) 425 p256OrdBigToLittle(scalarReversed, (*[32]byte)(scalar)) 426 p256OrdReduce(scalarReversed) 427 428 r.Set(q).p256ScalarMult(scalarReversed) 429 return r, nil 430} 431 432// uint64IsZero returns 1 if x is zero and zero otherwise. 433func uint64IsZero(x uint64) int { 434 x = ^x 435 x &= x >> 32 436 x &= x >> 16 437 x &= x >> 8 438 x &= x >> 4 439 x &= x >> 2 440 x &= x >> 1 441 return int(x & 1) 442} 443 444// p256Equal returns 1 if a and b are equal and 0 otherwise. 445func p256Equal(a, b *p256Element) int { 446 var acc uint64 447 for i := range a { 448 acc |= a[i] ^ b[i] 449 } 450 return uint64IsZero(acc) 451} 452 453// isInfinity returns 1 if p is the point at infinity and 0 otherwise. 454func (p *P256Point) isInfinity() int { 455 return p256Equal(&p.z, &p256Zero) 456} 457 458// Bytes returns the uncompressed or infinity encoding of p, as specified in 459// SEC 1, Version 2.0, Section 2.3.3. Note that the encoding of the point at 460// infinity is shorter than all other encodings. 461func (p *P256Point) Bytes() []byte { 462 // This function is outlined to make the allocations inline in the caller 463 // rather than happen on the heap. 464 var out [p256UncompressedLength]byte 465 return p.bytes(&out) 466} 467 468func (p *P256Point) bytes(out *[p256UncompressedLength]byte) []byte { 469 // The proper representation of the point at infinity is a single zero byte. 470 if p.isInfinity() == 1 { 471 return append(out[:0], 0) 472 } 473 474 x, y := new(p256Element), new(p256Element) 475 p.affineFromMont(x, y) 476 477 out[0] = 4 // Uncompressed form. 478 p256LittleToBig((*[32]byte)(out[1:33]), x) 479 p256LittleToBig((*[32]byte)(out[33:65]), y) 480 481 return out[:] 482} 483 484// affineFromMont sets (x, y) to the affine coordinates of p, converted out of the 485// Montgomery domain. 486func (p *P256Point) affineFromMont(x, y *p256Element) { 487 p256Inverse(y, &p.z) 488 p256Sqr(x, y, 1) 489 p256Mul(y, y, x) 490 491 p256Mul(x, &p.x, x) 492 p256Mul(y, &p.y, y) 493 494 p256FromMont(x, x) 495 p256FromMont(y, y) 496} 497 498// BytesX returns the encoding of the x-coordinate of p, as specified in SEC 1, 499// Version 2.0, Section 2.3.5, or an error if p is the point at infinity. 500func (p *P256Point) BytesX() ([]byte, error) { 501 // This function is outlined to make the allocations inline in the caller 502 // rather than happen on the heap. 503 var out [p256ElementLength]byte 504 return p.bytesX(&out) 505} 506 507func (p *P256Point) bytesX(out *[p256ElementLength]byte) ([]byte, error) { 508 if p.isInfinity() == 1 { 509 return nil, errors.New("P256 point is the point at infinity") 510 } 511 512 x := new(p256Element) 513 p256Inverse(x, &p.z) 514 p256Sqr(x, x, 1) 515 p256Mul(x, &p.x, x) 516 p256FromMont(x, x) 517 p256LittleToBig((*[32]byte)(out[:]), x) 518 519 return out[:], nil 520} 521 522// BytesCompressed returns the compressed or infinity encoding of p, as 523// specified in SEC 1, Version 2.0, Section 2.3.3. Note that the encoding of the 524// point at infinity is shorter than all other encodings. 525func (p *P256Point) BytesCompressed() []byte { 526 // This function is outlined to make the allocations inline in the caller 527 // rather than happen on the heap. 528 var out [p256CompressedLength]byte 529 return p.bytesCompressed(&out) 530} 531 532func (p *P256Point) bytesCompressed(out *[p256CompressedLength]byte) []byte { 533 if p.isInfinity() == 1 { 534 return append(out[:0], 0) 535 } 536 537 x, y := new(p256Element), new(p256Element) 538 p.affineFromMont(x, y) 539 540 out[0] = 2 | byte(y[0]&1) 541 p256LittleToBig((*[32]byte)(out[1:33]), x) 542 543 return out[:] 544} 545 546// Select sets q to p1 if cond == 1, and to p2 if cond == 0. 547func (q *P256Point) Select(p1, p2 *P256Point, cond int) *P256Point { 548 p256MovCond(q, p1, p2, cond) 549 return q 550} 551 552// p256Inverse sets out to in⁻¹ mod p. If in is zero, out will be zero. 553func p256Inverse(out, in *p256Element) { 554 // Inversion is calculated through exponentiation by p - 2, per Fermat's 555 // little theorem. 556 // 557 // The sequence of 12 multiplications and 255 squarings is derived from the 558 // following addition chain generated with github.com/mmcloughlin/addchain 559 // v0.4.0. 560 // 561 // _10 = 2*1 562 // _11 = 1 + _10 563 // _110 = 2*_11 564 // _111 = 1 + _110 565 // _111000 = _111 << 3 566 // _111111 = _111 + _111000 567 // x12 = _111111 << 6 + _111111 568 // x15 = x12 << 3 + _111 569 // x16 = 2*x15 + 1 570 // x32 = x16 << 16 + x16 571 // i53 = x32 << 15 572 // x47 = x15 + i53 573 // i263 = ((i53 << 17 + 1) << 143 + x47) << 47 574 // return (x47 + i263) << 2 + 1 575 // 576 var z = new(p256Element) 577 var t0 = new(p256Element) 578 var t1 = new(p256Element) 579 580 p256Sqr(z, in, 1) 581 p256Mul(z, in, z) 582 p256Sqr(z, z, 1) 583 p256Mul(z, in, z) 584 p256Sqr(t0, z, 3) 585 p256Mul(t0, z, t0) 586 p256Sqr(t1, t0, 6) 587 p256Mul(t0, t0, t1) 588 p256Sqr(t0, t0, 3) 589 p256Mul(z, z, t0) 590 p256Sqr(t0, z, 1) 591 p256Mul(t0, in, t0) 592 p256Sqr(t1, t0, 16) 593 p256Mul(t0, t0, t1) 594 p256Sqr(t0, t0, 15) 595 p256Mul(z, z, t0) 596 p256Sqr(t0, t0, 17) 597 p256Mul(t0, in, t0) 598 p256Sqr(t0, t0, 143) 599 p256Mul(t0, z, t0) 600 p256Sqr(t0, t0, 47) 601 p256Mul(z, z, t0) 602 p256Sqr(z, z, 2) 603 p256Mul(out, in, z) 604} 605 606func boothW5(in uint) (int, int) { 607 var s uint = ^((in >> 5) - 1) 608 var d uint = (1 << 6) - in - 1 609 d = (d & s) | (in & (^s)) 610 d = (d >> 1) + (d & 1) 611 return int(d), int(s & 1) 612} 613 614func boothW6(in uint) (int, int) { 615 var s uint = ^((in >> 6) - 1) 616 var d uint = (1 << 7) - in - 1 617 d = (d & s) | (in & (^s)) 618 d = (d >> 1) + (d & 1) 619 return int(d), int(s & 1) 620} 621 622func (p *P256Point) p256BaseMult(scalar *p256OrdElement) { 623 var t0 p256AffinePoint 624 625 wvalue := (scalar[0] << 1) & 0x7f 626 sel, sign := boothW6(uint(wvalue)) 627 p256SelectAffine(&t0, &p256Precomputed[0], sel) 628 p.x, p.y, p.z = t0.x, t0.y, p256One 629 p256NegCond(&p.y, sign) 630 631 index := uint(5) 632 zero := sel 633 634 for i := 1; i < 43; i++ { 635 if index < 192 { 636 wvalue = ((scalar[index/64] >> (index % 64)) + (scalar[index/64+1] << (64 - (index % 64)))) & 0x7f 637 } else { 638 wvalue = (scalar[index/64] >> (index % 64)) & 0x7f 639 } 640 index += 6 641 sel, sign = boothW6(uint(wvalue)) 642 p256SelectAffine(&t0, &p256Precomputed[i], sel) 643 p256PointAddAffineAsm(p, p, &t0, sign, sel, zero) 644 zero |= sel 645 } 646 647 // If the whole scalar was zero, set to the point at infinity. 648 p256MovCond(p, p, NewP256Point(), zero) 649} 650 651func (p *P256Point) p256ScalarMult(scalar *p256OrdElement) { 652 // precomp is a table of precomputed points that stores powers of p 653 // from p^1 to p^16. 654 var precomp p256Table 655 var t0, t1, t2, t3 P256Point 656 657 // Prepare the table 658 precomp[0] = *p // 1 659 660 p256PointDoubleAsm(&t0, p) 661 p256PointDoubleAsm(&t1, &t0) 662 p256PointDoubleAsm(&t2, &t1) 663 p256PointDoubleAsm(&t3, &t2) 664 precomp[1] = t0 // 2 665 precomp[3] = t1 // 4 666 precomp[7] = t2 // 8 667 precomp[15] = t3 // 16 668 669 p256PointAddAsm(&t0, &t0, p) 670 p256PointAddAsm(&t1, &t1, p) 671 p256PointAddAsm(&t2, &t2, p) 672 precomp[2] = t0 // 3 673 precomp[4] = t1 // 5 674 precomp[8] = t2 // 9 675 676 p256PointDoubleAsm(&t0, &t0) 677 p256PointDoubleAsm(&t1, &t1) 678 precomp[5] = t0 // 6 679 precomp[9] = t1 // 10 680 681 p256PointAddAsm(&t2, &t0, p) 682 p256PointAddAsm(&t1, &t1, p) 683 precomp[6] = t2 // 7 684 precomp[10] = t1 // 11 685 686 p256PointDoubleAsm(&t0, &t0) 687 p256PointDoubleAsm(&t2, &t2) 688 precomp[11] = t0 // 12 689 precomp[13] = t2 // 14 690 691 p256PointAddAsm(&t0, &t0, p) 692 p256PointAddAsm(&t2, &t2, p) 693 precomp[12] = t0 // 13 694 precomp[14] = t2 // 15 695 696 // Start scanning the window from top bit 697 index := uint(254) 698 var sel, sign int 699 700 wvalue := (scalar[index/64] >> (index % 64)) & 0x3f 701 sel, _ = boothW5(uint(wvalue)) 702 703 p256Select(p, &precomp, sel) 704 zero := sel 705 706 for index > 4 { 707 index -= 5 708 p256PointDoubleAsm(p, p) 709 p256PointDoubleAsm(p, p) 710 p256PointDoubleAsm(p, p) 711 p256PointDoubleAsm(p, p) 712 p256PointDoubleAsm(p, p) 713 714 if index < 192 { 715 wvalue = ((scalar[index/64] >> (index % 64)) + (scalar[index/64+1] << (64 - (index % 64)))) & 0x3f 716 } else { 717 wvalue = (scalar[index/64] >> (index % 64)) & 0x3f 718 } 719 720 sel, sign = boothW5(uint(wvalue)) 721 722 p256Select(&t0, &precomp, sel) 723 p256NegCond(&t0.y, sign) 724 p256PointAddAsm(&t1, p, &t0) 725 p256MovCond(&t1, &t1, p, sel) 726 p256MovCond(p, &t1, &t0, zero) 727 zero |= sel 728 } 729 730 p256PointDoubleAsm(p, p) 731 p256PointDoubleAsm(p, p) 732 p256PointDoubleAsm(p, p) 733 p256PointDoubleAsm(p, p) 734 p256PointDoubleAsm(p, p) 735 736 wvalue = (scalar[0] << 1) & 0x3f 737 sel, sign = boothW5(uint(wvalue)) 738 739 p256Select(&t0, &precomp, sel) 740 p256NegCond(&t0.y, sign) 741 p256PointAddAsm(&t1, p, &t0) 742 p256MovCond(&t1, &t1, p, sel) 743 p256MovCond(p, &t1, &t0, zero) 744} 745