Patrice Arruda | 748609c | 2020-06-25 12:12:21 -0700 | [diff] [blame] | 1 | // Copyright 2019 The Go Authors. All rights reserved. |
| 2 | // Use of this source code is governed by a BSD-style |
| 3 | // license that can be found in the LICENSE file. |
| 4 | |
Dan Willemsen | cc753b7 | 2021-08-31 13:25:42 -0700 | [diff] [blame] | 5 | //go:build ppc64le |
Patrice Arruda | 748609c | 2020-06-25 12:12:21 -0700 | [diff] [blame] | 6 | |
| 7 | package elliptic |
| 8 | |
| 9 | import ( |
| 10 | "crypto/subtle" |
| 11 | "encoding/binary" |
| 12 | "math/big" |
| 13 | ) |
| 14 | |
| 15 | // This was ported from the s390x implementation for ppc64le. |
| 16 | // Some hints are included here for changes that should be |
| 17 | // in the big endian ppc64 implementation, however more |
| 18 | // investigation and testing is needed for the ppc64 big |
| 19 | // endian version to work. |
| 20 | type p256CurveFast struct { |
| 21 | *CurveParams |
| 22 | } |
| 23 | |
| 24 | type p256Point struct { |
| 25 | x [32]byte |
| 26 | y [32]byte |
| 27 | z [32]byte |
| 28 | } |
| 29 | |
| 30 | var ( |
| 31 | p256 Curve |
| 32 | p256PreFast *[37][64]p256Point |
| 33 | ) |
| 34 | |
| 35 | func initP256Arch() { |
| 36 | p256 = p256CurveFast{p256Params} |
| 37 | initTable() |
| 38 | return |
| 39 | } |
| 40 | |
| 41 | func (curve p256CurveFast) Params() *CurveParams { |
| 42 | return curve.CurveParams |
| 43 | } |
| 44 | |
| 45 | // Functions implemented in p256_asm_ppc64le.s |
| 46 | // Montgomery multiplication modulo P256 |
| 47 | // |
| 48 | //go:noescape |
| 49 | func p256MulAsm(res, in1, in2 []byte) |
| 50 | |
| 51 | // Montgomery square modulo P256 |
| 52 | // |
| 53 | func p256Sqr(res, in []byte) { |
| 54 | p256MulAsm(res, in, in) |
| 55 | } |
| 56 | |
| 57 | // Montgomery multiplication by 1 |
| 58 | // |
| 59 | //go:noescape |
| 60 | func p256FromMont(res, in []byte) |
| 61 | |
| 62 | // iff cond == 1 val <- -val |
| 63 | // |
| 64 | //go:noescape |
| 65 | func p256NegCond(val *p256Point, cond int) |
| 66 | |
| 67 | // if cond == 0 res <- b; else res <- a |
| 68 | // |
| 69 | //go:noescape |
| 70 | func p256MovCond(res, a, b *p256Point, cond int) |
| 71 | |
| 72 | // Constant time table access |
| 73 | // |
| 74 | //go:noescape |
| 75 | func p256Select(point *p256Point, table []p256Point, idx int) |
| 76 | |
| 77 | // |
| 78 | //go:noescape |
| 79 | func p256SelectBase(point *p256Point, table []p256Point, idx int) |
| 80 | |
| 81 | // Point add with P2 being affine point |
| 82 | // If sign == 1 -> P2 = -P2 |
| 83 | // If sel == 0 -> P3 = P1 |
| 84 | // if zero == 0 -> P3 = P2 |
| 85 | // |
| 86 | //go:noescape |
| 87 | func p256PointAddAffineAsm(res, in1, in2 *p256Point, sign, sel, zero int) |
| 88 | |
| 89 | // Point add |
| 90 | // |
| 91 | //go:noescape |
| 92 | func p256PointAddAsm(res, in1, in2 *p256Point) int |
| 93 | |
| 94 | // |
| 95 | //go:noescape |
| 96 | func p256PointDoubleAsm(res, in *p256Point) |
| 97 | |
| 98 | // The result should be a slice in LE order, but the slice |
| 99 | // from big.Bytes is in BE order. |
| 100 | // TODO: For big endian implementation, do not reverse bytes. |
| 101 | // |
| 102 | func fromBig(big *big.Int) []byte { |
| 103 | // This could be done a lot more efficiently... |
| 104 | res := big.Bytes() |
| 105 | t := make([]byte, 32) |
| 106 | if len(res) < 32 { |
| 107 | copy(t[32-len(res):], res) |
| 108 | } else if len(res) == 32 { |
| 109 | copy(t, res) |
| 110 | } else { |
| 111 | copy(t, res[len(res)-32:]) |
| 112 | } |
| 113 | p256ReverseBytes(t, t) |
| 114 | return t |
| 115 | } |
| 116 | |
| 117 | // p256GetMultiplier makes sure byte array will have 32 byte elements, If the scalar |
| 118 | // is equal or greater than the order of the group, it's reduced modulo that order. |
| 119 | func p256GetMultiplier(in []byte) []byte { |
| 120 | n := new(big.Int).SetBytes(in) |
| 121 | |
| 122 | if n.Cmp(p256Params.N) >= 0 { |
| 123 | n.Mod(n, p256Params.N) |
| 124 | } |
| 125 | return fromBig(n) |
| 126 | } |
| 127 | |
| 128 | // p256MulAsm operates in a Montgomery domain with R = 2^256 mod p, where p is the |
| 129 | // underlying field of the curve. (See initP256 for the value.) Thus rr here is |
| 130 | // R×R mod p. See comment in Inverse about how this is used. |
| 131 | // TODO: For big endian implementation, the bytes in these slices should be in reverse order, |
| 132 | // as found in the s390x implementation. |
| 133 | var rr = []byte{0x03, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0, 0xff, 0xff, 0xff, 0xff, 0xfb, 0xff, 0xff, 0xff, 0xfe, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xfd, 0xff, 0xff, 0xff, 0x04, 0x00, 0x00, 0x00} |
| 134 | |
| 135 | // (This is one, in the Montgomery domain.) |
| 136 | var one = []byte{0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xfe, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00} |
| 137 | |
| 138 | func maybeReduceModP(in *big.Int) *big.Int { |
| 139 | if in.Cmp(p256Params.P) < 0 { |
| 140 | return in |
| 141 | } |
| 142 | return new(big.Int).Mod(in, p256Params.P) |
| 143 | } |
| 144 | |
| 145 | // p256ReverseBytes copies the first 32 bytes from in to res in reverse order. |
| 146 | func p256ReverseBytes(res, in []byte) { |
| 147 | // remove bounds check |
| 148 | in = in[:32] |
| 149 | res = res[:32] |
| 150 | |
| 151 | // Load in reverse order |
| 152 | a := binary.BigEndian.Uint64(in[0:]) |
| 153 | b := binary.BigEndian.Uint64(in[8:]) |
| 154 | c := binary.BigEndian.Uint64(in[16:]) |
| 155 | d := binary.BigEndian.Uint64(in[24:]) |
| 156 | |
| 157 | // Store in normal order |
| 158 | binary.LittleEndian.PutUint64(res[0:], d) |
| 159 | binary.LittleEndian.PutUint64(res[8:], c) |
| 160 | binary.LittleEndian.PutUint64(res[16:], b) |
| 161 | binary.LittleEndian.PutUint64(res[24:], a) |
| 162 | } |
| 163 | |
| 164 | func (curve p256CurveFast) CombinedMult(bigX, bigY *big.Int, baseScalar, scalar []byte) (x, y *big.Int) { |
| 165 | var r1, r2 p256Point |
| 166 | |
| 167 | scalarReduced := p256GetMultiplier(baseScalar) |
| 168 | r1IsInfinity := scalarIsZero(scalarReduced) |
| 169 | r1.p256BaseMult(scalarReduced) |
| 170 | |
| 171 | copy(r2.x[:], fromBig(maybeReduceModP(bigX))) |
| 172 | copy(r2.y[:], fromBig(maybeReduceModP(bigY))) |
| 173 | copy(r2.z[:], one) |
| 174 | p256MulAsm(r2.x[:], r2.x[:], rr[:]) |
| 175 | p256MulAsm(r2.y[:], r2.y[:], rr[:]) |
| 176 | |
| 177 | scalarReduced = p256GetMultiplier(scalar) |
| 178 | r2IsInfinity := scalarIsZero(scalarReduced) |
| 179 | r2.p256ScalarMult(scalarReduced) |
| 180 | |
| 181 | var sum, double p256Point |
| 182 | pointsEqual := p256PointAddAsm(&sum, &r1, &r2) |
| 183 | p256PointDoubleAsm(&double, &r1) |
| 184 | p256MovCond(&sum, &double, &sum, pointsEqual) |
| 185 | p256MovCond(&sum, &r1, &sum, r2IsInfinity) |
| 186 | p256MovCond(&sum, &r2, &sum, r1IsInfinity) |
| 187 | return sum.p256PointToAffine() |
| 188 | } |
| 189 | |
| 190 | func (curve p256CurveFast) ScalarBaseMult(scalar []byte) (x, y *big.Int) { |
| 191 | var r p256Point |
| 192 | reducedScalar := p256GetMultiplier(scalar) |
| 193 | r.p256BaseMult(reducedScalar) |
| 194 | return r.p256PointToAffine() |
| 195 | } |
| 196 | |
| 197 | func (curve p256CurveFast) ScalarMult(bigX, bigY *big.Int, scalar []byte) (x, y *big.Int) { |
| 198 | scalarReduced := p256GetMultiplier(scalar) |
| 199 | var r p256Point |
| 200 | copy(r.x[:], fromBig(maybeReduceModP(bigX))) |
| 201 | copy(r.y[:], fromBig(maybeReduceModP(bigY))) |
| 202 | copy(r.z[:], one) |
| 203 | p256MulAsm(r.x[:], r.x[:], rr[:]) |
| 204 | p256MulAsm(r.y[:], r.y[:], rr[:]) |
| 205 | r.p256ScalarMult(scalarReduced) |
| 206 | return r.p256PointToAffine() |
| 207 | } |
| 208 | |
| 209 | func scalarIsZero(scalar []byte) int { |
| 210 | // If any byte is not zero, return 0. |
| 211 | // Check for -0.... since that appears to compare to 0. |
| 212 | b := byte(0) |
| 213 | for _, s := range scalar { |
| 214 | b |= s |
| 215 | } |
| 216 | return subtle.ConstantTimeByteEq(b, 0) |
| 217 | } |
| 218 | |
| 219 | func (p *p256Point) p256PointToAffine() (x, y *big.Int) { |
| 220 | zInv := make([]byte, 32) |
| 221 | zInvSq := make([]byte, 32) |
| 222 | |
| 223 | p256Inverse(zInv, p.z[:]) |
| 224 | p256Sqr(zInvSq, zInv) |
| 225 | p256MulAsm(zInv, zInv, zInvSq) |
| 226 | |
| 227 | p256MulAsm(zInvSq, p.x[:], zInvSq) |
| 228 | p256MulAsm(zInv, p.y[:], zInv) |
| 229 | |
| 230 | p256FromMont(zInvSq, zInvSq) |
| 231 | p256FromMont(zInv, zInv) |
| 232 | |
| 233 | // SetBytes expects a slice in big endian order, |
| 234 | // since ppc64le is little endian, reverse the bytes. |
| 235 | // TODO: For big endian, bytes don't need to be reversed. |
| 236 | p256ReverseBytes(zInvSq, zInvSq) |
| 237 | p256ReverseBytes(zInv, zInv) |
| 238 | rx := new(big.Int).SetBytes(zInvSq) |
| 239 | ry := new(big.Int).SetBytes(zInv) |
| 240 | return rx, ry |
| 241 | } |
| 242 | |
| 243 | // p256Inverse sets out to in^-1 mod p. |
| 244 | func p256Inverse(out, in []byte) { |
| 245 | var stack [6 * 32]byte |
| 246 | p2 := stack[32*0 : 32*0+32] |
| 247 | p4 := stack[32*1 : 32*1+32] |
| 248 | p8 := stack[32*2 : 32*2+32] |
| 249 | p16 := stack[32*3 : 32*3+32] |
| 250 | p32 := stack[32*4 : 32*4+32] |
| 251 | |
| 252 | p256Sqr(out, in) |
| 253 | p256MulAsm(p2, out, in) // 3*p |
| 254 | |
| 255 | p256Sqr(out, p2) |
| 256 | p256Sqr(out, out) |
| 257 | p256MulAsm(p4, out, p2) // f*p |
| 258 | |
| 259 | p256Sqr(out, p4) |
| 260 | p256Sqr(out, out) |
| 261 | p256Sqr(out, out) |
| 262 | p256Sqr(out, out) |
| 263 | p256MulAsm(p8, out, p4) // ff*p |
| 264 | |
| 265 | p256Sqr(out, p8) |
| 266 | |
| 267 | for i := 0; i < 7; i++ { |
| 268 | p256Sqr(out, out) |
| 269 | } |
| 270 | p256MulAsm(p16, out, p8) // ffff*p |
| 271 | |
| 272 | p256Sqr(out, p16) |
| 273 | for i := 0; i < 15; i++ { |
| 274 | p256Sqr(out, out) |
| 275 | } |
| 276 | p256MulAsm(p32, out, p16) // ffffffff*p |
| 277 | |
| 278 | p256Sqr(out, p32) |
| 279 | |
| 280 | for i := 0; i < 31; i++ { |
| 281 | p256Sqr(out, out) |
| 282 | } |
| 283 | p256MulAsm(out, out, in) |
| 284 | |
| 285 | for i := 0; i < 32*4; i++ { |
| 286 | p256Sqr(out, out) |
| 287 | } |
| 288 | p256MulAsm(out, out, p32) |
| 289 | |
| 290 | for i := 0; i < 32; i++ { |
| 291 | p256Sqr(out, out) |
| 292 | } |
| 293 | p256MulAsm(out, out, p32) |
| 294 | |
| 295 | for i := 0; i < 16; i++ { |
| 296 | p256Sqr(out, out) |
| 297 | } |
| 298 | p256MulAsm(out, out, p16) |
| 299 | |
| 300 | for i := 0; i < 8; i++ { |
| 301 | p256Sqr(out, out) |
| 302 | } |
| 303 | p256MulAsm(out, out, p8) |
| 304 | |
| 305 | p256Sqr(out, out) |
| 306 | p256Sqr(out, out) |
| 307 | p256Sqr(out, out) |
| 308 | p256Sqr(out, out) |
| 309 | p256MulAsm(out, out, p4) |
| 310 | |
| 311 | p256Sqr(out, out) |
| 312 | p256Sqr(out, out) |
| 313 | p256MulAsm(out, out, p2) |
| 314 | |
| 315 | p256Sqr(out, out) |
| 316 | p256Sqr(out, out) |
| 317 | p256MulAsm(out, out, in) |
| 318 | } |
| 319 | |
| 320 | func boothW5(in uint) (int, int) { |
| 321 | var s uint = ^((in >> 5) - 1) |
| 322 | var d uint = (1 << 6) - in - 1 |
| 323 | d = (d & s) | (in & (^s)) |
| 324 | d = (d >> 1) + (d & 1) |
| 325 | return int(d), int(s & 1) |
| 326 | } |
| 327 | |
| 328 | func boothW6(in uint) (int, int) { |
| 329 | var s uint = ^((in >> 6) - 1) |
| 330 | var d uint = (1 << 7) - in - 1 |
| 331 | d = (d & s) | (in & (^s)) |
| 332 | d = (d >> 1) + (d & 1) |
| 333 | return int(d), int(s & 1) |
| 334 | } |
| 335 | |
| 336 | func boothW7(in uint) (int, int) { |
| 337 | var s uint = ^((in >> 7) - 1) |
| 338 | var d uint = (1 << 8) - in - 1 |
| 339 | d = (d & s) | (in & (^s)) |
| 340 | d = (d >> 1) + (d & 1) |
| 341 | return int(d), int(s & 1) |
| 342 | } |
| 343 | |
| 344 | func initTable() { |
| 345 | |
| 346 | p256PreFast = new([37][64]p256Point) |
| 347 | |
| 348 | // TODO: For big endian, these slices should be in reverse byte order, |
| 349 | // as found in the s390x implementation. |
| 350 | basePoint := p256Point{ |
| 351 | x: [32]byte{0x3c, 0x14, 0xa9, 0x18, 0xd4, 0x30, 0xe7, 0x79, 0x01, 0xb6, 0xed, 0x5f, 0xfc, 0x95, 0xba, 0x75, |
| 352 | 0x10, 0x25, 0x62, 0x77, 0x2b, 0x73, 0xfb, 0x79, 0xc6, 0x55, 0x37, 0xa5, 0x76, 0x5f, 0x90, 0x18}, //(p256.x*2^256)%p |
| 353 | y: [32]byte{0x0a, 0x56, 0x95, 0xce, 0x57, 0x53, 0xf2, 0xdd, 0x5c, 0xe4, 0x19, 0xba, 0xe4, 0xb8, 0x4a, 0x8b, |
| 354 | 0x25, 0xf3, 0x21, 0xdd, 0x88, 0x86, 0xe8, 0xd2, 0x85, 0x5d, 0x88, 0x25, 0x18, 0xff, 0x71, 0x85}, //(p256.y*2^256)%p |
| 355 | z: [32]byte{0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff, |
| 356 | 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xfe, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00}, //(p256.z*2^256)%p |
| 357 | |
| 358 | } |
| 359 | |
| 360 | t1 := new(p256Point) |
| 361 | t2 := new(p256Point) |
| 362 | *t2 = basePoint |
| 363 | |
| 364 | zInv := make([]byte, 32) |
| 365 | zInvSq := make([]byte, 32) |
| 366 | for j := 0; j < 64; j++ { |
| 367 | *t1 = *t2 |
| 368 | for i := 0; i < 37; i++ { |
| 369 | // The window size is 7 so we need to double 7 times. |
| 370 | if i != 0 { |
| 371 | for k := 0; k < 7; k++ { |
| 372 | p256PointDoubleAsm(t1, t1) |
| 373 | } |
| 374 | } |
| 375 | // Convert the point to affine form. (Its values are |
| 376 | // still in Montgomery form however.) |
| 377 | p256Inverse(zInv, t1.z[:]) |
| 378 | p256Sqr(zInvSq, zInv) |
| 379 | p256MulAsm(zInv, zInv, zInvSq) |
| 380 | |
| 381 | p256MulAsm(t1.x[:], t1.x[:], zInvSq) |
| 382 | p256MulAsm(t1.y[:], t1.y[:], zInv) |
| 383 | |
| 384 | copy(t1.z[:], basePoint.z[:]) |
| 385 | // Update the table entry |
| 386 | copy(p256PreFast[i][j].x[:], t1.x[:]) |
| 387 | copy(p256PreFast[i][j].y[:], t1.y[:]) |
| 388 | } |
| 389 | if j == 0 { |
| 390 | p256PointDoubleAsm(t2, &basePoint) |
| 391 | } else { |
| 392 | p256PointAddAsm(t2, t2, &basePoint) |
| 393 | } |
| 394 | } |
| 395 | } |
| 396 | |
| 397 | func (p *p256Point) p256BaseMult(scalar []byte) { |
| 398 | // TODO: For big endian, the index should be 31 not 0. |
| 399 | wvalue := (uint(scalar[0]) << 1) & 0xff |
| 400 | sel, sign := boothW7(uint(wvalue)) |
| 401 | p256SelectBase(p, p256PreFast[0][:], sel) |
| 402 | p256NegCond(p, sign) |
| 403 | |
| 404 | copy(p.z[:], one[:]) |
| 405 | var t0 p256Point |
| 406 | |
| 407 | copy(t0.z[:], one[:]) |
| 408 | |
| 409 | index := uint(6) |
| 410 | zero := sel |
| 411 | for i := 1; i < 37; i++ { |
| 412 | // TODO: For big endian, use the same index values as found |
| 413 | // in the s390x implementation. |
| 414 | if index < 247 { |
| 415 | wvalue = ((uint(scalar[index/8]) >> (index % 8)) + (uint(scalar[index/8+1]) << (8 - (index % 8)))) & 0xff |
| 416 | } else { |
| 417 | wvalue = (uint(scalar[index/8]) >> (index % 8)) & 0xff |
| 418 | } |
| 419 | index += 7 |
| 420 | sel, sign = boothW7(uint(wvalue)) |
| 421 | p256SelectBase(&t0, p256PreFast[i][:], sel) |
| 422 | p256PointAddAffineAsm(p, p, &t0, sign, sel, zero) |
| 423 | zero |= sel |
| 424 | } |
| 425 | } |
| 426 | |
| 427 | func (p *p256Point) p256ScalarMult(scalar []byte) { |
| 428 | // precomp is a table of precomputed points that stores powers of p |
| 429 | // from p^1 to p^16. |
| 430 | var precomp [16]p256Point |
| 431 | var t0, t1, t2, t3 p256Point |
| 432 | |
| 433 | *&precomp[0] = *p |
| 434 | p256PointDoubleAsm(&t0, p) |
| 435 | p256PointDoubleAsm(&t1, &t0) |
| 436 | p256PointDoubleAsm(&t2, &t1) |
| 437 | p256PointDoubleAsm(&t3, &t2) |
| 438 | *&precomp[1] = t0 |
| 439 | *&precomp[3] = t1 |
| 440 | *&precomp[7] = t2 |
| 441 | *&precomp[15] = t3 |
| 442 | |
| 443 | p256PointAddAsm(&t0, &t0, p) |
| 444 | p256PointAddAsm(&t1, &t1, p) |
| 445 | p256PointAddAsm(&t2, &t2, p) |
| 446 | |
| 447 | *&precomp[2] = t0 |
| 448 | *&precomp[4] = t1 |
| 449 | *&precomp[8] = t2 |
| 450 | |
| 451 | p256PointDoubleAsm(&t0, &t0) |
| 452 | p256PointDoubleAsm(&t1, &t1) |
| 453 | *&precomp[5] = t0 |
| 454 | *&precomp[9] = t1 |
| 455 | |
| 456 | p256PointAddAsm(&t2, &t0, p) |
| 457 | p256PointAddAsm(&t1, &t1, p) |
| 458 | *&precomp[6] = t2 |
| 459 | *&precomp[10] = t1 |
| 460 | |
| 461 | p256PointDoubleAsm(&t0, &t0) |
| 462 | p256PointDoubleAsm(&t2, &t2) |
| 463 | *&precomp[11] = t0 |
| 464 | *&precomp[13] = t2 |
| 465 | |
| 466 | p256PointAddAsm(&t0, &t0, p) |
| 467 | p256PointAddAsm(&t2, &t2, p) |
| 468 | *&precomp[12] = t0 |
| 469 | *&precomp[14] = t2 |
| 470 | |
| 471 | // Start scanning the window from top bit |
| 472 | index := uint(254) |
| 473 | var sel, sign int |
| 474 | |
| 475 | // TODO: For big endian, use index found in s390x implementation. |
| 476 | wvalue := (uint(scalar[index/8]) >> (index % 8)) & 0x3f |
| 477 | sel, _ = boothW5(uint(wvalue)) |
| 478 | p256Select(p, precomp[:], sel) |
| 479 | zero := sel |
| 480 | |
| 481 | for index > 4 { |
| 482 | index -= 5 |
| 483 | p256PointDoubleAsm(p, p) |
| 484 | p256PointDoubleAsm(p, p) |
| 485 | p256PointDoubleAsm(p, p) |
| 486 | p256PointDoubleAsm(p, p) |
| 487 | p256PointDoubleAsm(p, p) |
| 488 | |
| 489 | // TODO: For big endian, use index values as found in s390x implementation. |
| 490 | if index < 247 { |
| 491 | wvalue = ((uint(scalar[index/8]) >> (index % 8)) + (uint(scalar[index/8+1]) << (8 - (index % 8)))) & 0x3f |
| 492 | } else { |
| 493 | wvalue = (uint(scalar[index/8]) >> (index % 8)) & 0x3f |
| 494 | } |
| 495 | |
| 496 | sel, sign = boothW5(uint(wvalue)) |
| 497 | |
| 498 | p256Select(&t0, precomp[:], sel) |
| 499 | p256NegCond(&t0, sign) |
| 500 | p256PointAddAsm(&t1, p, &t0) |
| 501 | p256MovCond(&t1, &t1, p, sel) |
| 502 | p256MovCond(p, &t1, &t0, zero) |
| 503 | zero |= sel |
| 504 | } |
| 505 | |
| 506 | p256PointDoubleAsm(p, p) |
| 507 | p256PointDoubleAsm(p, p) |
| 508 | p256PointDoubleAsm(p, p) |
| 509 | p256PointDoubleAsm(p, p) |
| 510 | p256PointDoubleAsm(p, p) |
| 511 | |
| 512 | // TODO: Use index for big endian as found in s390x implementation. |
| 513 | wvalue = (uint(scalar[0]) << 1) & 0x3f |
| 514 | sel, sign = boothW5(uint(wvalue)) |
| 515 | |
| 516 | p256Select(&t0, precomp[:], sel) |
| 517 | p256NegCond(&t0, sign) |
| 518 | p256PointAddAsm(&t1, p, &t0) |
| 519 | p256MovCond(&t1, &t1, p, sel) |
| 520 | p256MovCond(p, &t1, &t0, zero) |
| 521 | } |