Blame - src/crypto/elliptic/p256_ppc64le.go - platform/prebuilts/go/darwin-x86

blob: e9a6a067a2258a23e90e8071ba183b699e7fab76 [file] [log] [blame]

Patrice Arruda	748609c	2020-06-25 12:12:21 -0700	[diff] [blame]	1	// Copyright 2019 The Go Authors. All rights reserved.
				2	// Use of this source code is governed by a BSD-style
				3	// license that can be found in the LICENSE file.
				4
Dan Willemsen	cc753b7	2021-08-31 13:25:42 -0700	[diff] [blame]	5	//go:build ppc64le
Patrice Arruda	748609c	2020-06-25 12:12:21 -0700	[diff] [blame]	6
				7	package elliptic
				8
				9	import (
				10	"crypto/subtle"
				11	"encoding/binary"
				12	"math/big"
				13	)
				14
				15	// This was ported from the s390x implementation for ppc64le.
				16	// Some hints are included here for changes that should be
				17	// in the big endian ppc64 implementation, however more
				18	// investigation and testing is needed for the ppc64 big
				19	// endian version to work.
				20	type p256CurveFast struct {
				21	*CurveParams
				22	}
				23
				24	type p256Point struct {
				25	x [32]byte
				26	y [32]byte
				27	z [32]byte
				28	}
				29
				30	var (
				31	p256 Curve
				32	p256PreFast *[37][64]p256Point
				33	)
				34
				35	func initP256Arch() {
				36	p256 = p256CurveFast{p256Params}
				37	initTable()
				38	return
				39	}
				40
				41	func (curve p256CurveFast) Params() *CurveParams {
				42	return curve.CurveParams
				43	}
				44
				45	// Functions implemented in p256_asm_ppc64le.s
				46	// Montgomery multiplication modulo P256
				47	//
				48	//go:noescape
				49	func p256MulAsm(res, in1, in2 []byte)
				50
				51	// Montgomery square modulo P256
				52	//
				53	func p256Sqr(res, in []byte) {
				54	p256MulAsm(res, in, in)
				55	}
				56
				57	// Montgomery multiplication by 1
				58	//
				59	//go:noescape
				60	func p256FromMont(res, in []byte)
				61
				62	// iff cond == 1 val <- -val
				63	//
				64	//go:noescape
				65	func p256NegCond(val *p256Point, cond int)
				66
				67	// if cond == 0 res <- b; else res <- a
				68	//
				69	//go:noescape
				70	func p256MovCond(res, a, b *p256Point, cond int)
				71
				72	// Constant time table access
				73	//
				74	//go:noescape
				75	func p256Select(point *p256Point, table []p256Point, idx int)
				76
				77	//
				78	//go:noescape
				79	func p256SelectBase(point *p256Point, table []p256Point, idx int)
				80
				81	// Point add with P2 being affine point
				82	// If sign == 1 -> P2 = -P2
				83	// If sel == 0 -> P3 = P1
				84	// if zero == 0 -> P3 = P2
				85	//
				86	//go:noescape
				87	func p256PointAddAffineAsm(res, in1, in2 *p256Point, sign, sel, zero int)
				88
				89	// Point add
				90	//
				91	//go:noescape
				92	func p256PointAddAsm(res, in1, in2 *p256Point) int
				93
				94	//
				95	//go:noescape
				96	func p256PointDoubleAsm(res, in *p256Point)
				97
				98	// The result should be a slice in LE order, but the slice
				99	// from big.Bytes is in BE order.
				100	// TODO: For big endian implementation, do not reverse bytes.
				101	//
				102	func fromBig(big *big.Int) []byte {
				103	// This could be done a lot more efficiently...
				104	res := big.Bytes()
				105	t := make([]byte, 32)
				106	if len(res) < 32 {
				107	copy(t[32-len(res):], res)
				108	} else if len(res) == 32 {
				109	copy(t, res)
				110	} else {
				111	copy(t, res[len(res)-32:])
				112	}
				113	p256ReverseBytes(t, t)
				114	return t
				115	}
				116
				117	// p256GetMultiplier makes sure byte array will have 32 byte elements, If the scalar
				118	// is equal or greater than the order of the group, it's reduced modulo that order.
				119	func p256GetMultiplier(in []byte) []byte {
				120	n := new(big.Int).SetBytes(in)
				121
				122	if n.Cmp(p256Params.N) >= 0 {
				123	n.Mod(n, p256Params.N)
				124	}
				125	return fromBig(n)
				126	}
				127
				128	// p256MulAsm operates in a Montgomery domain with R = 2^256 mod p, where p is the
				129	// underlying field of the curve. (See initP256 for the value.) Thus rr here is
				130	// R×R mod p. See comment in Inverse about how this is used.
				131	// TODO: For big endian implementation, the bytes in these slices should be in reverse order,
				132	// as found in the s390x implementation.
				133	var rr = []byte{0x03, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0, 0xff, 0xff, 0xff, 0xff, 0xfb, 0xff, 0xff, 0xff, 0xfe, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xfd, 0xff, 0xff, 0xff, 0x04, 0x00, 0x00, 0x00}
				134
				135	// (This is one, in the Montgomery domain.)
				136	var one = []byte{0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xfe, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00}
				137
				138	func maybeReduceModP(in big.Int) big.Int {
				139	if in.Cmp(p256Params.P) < 0 {
				140	return in
				141	}
				142	return new(big.Int).Mod(in, p256Params.P)
				143	}
				144
				145	// p256ReverseBytes copies the first 32 bytes from in to res in reverse order.
				146	func p256ReverseBytes(res, in []byte) {
				147	// remove bounds check
				148	in = in[:32]
				149	res = res[:32]
				150
				151	// Load in reverse order
				152	a := binary.BigEndian.Uint64(in[0:])
				153	b := binary.BigEndian.Uint64(in[8:])
				154	c := binary.BigEndian.Uint64(in[16:])
				155	d := binary.BigEndian.Uint64(in[24:])
				156
				157	// Store in normal order
				158	binary.LittleEndian.PutUint64(res[0:], d)
				159	binary.LittleEndian.PutUint64(res[8:], c)
				160	binary.LittleEndian.PutUint64(res[16:], b)
				161	binary.LittleEndian.PutUint64(res[24:], a)
				162	}
				163
				164	func (curve p256CurveFast) CombinedMult(bigX, bigY big.Int, baseScalar, scalar []byte) (x, y big.Int) {
				165	var r1, r2 p256Point
				166
				167	scalarReduced := p256GetMultiplier(baseScalar)
				168	r1IsInfinity := scalarIsZero(scalarReduced)
				169	r1.p256BaseMult(scalarReduced)
				170
				171	copy(r2.x[:], fromBig(maybeReduceModP(bigX)))
				172	copy(r2.y[:], fromBig(maybeReduceModP(bigY)))
				173	copy(r2.z[:], one)
				174	p256MulAsm(r2.x[:], r2.x[:], rr[:])
				175	p256MulAsm(r2.y[:], r2.y[:], rr[:])
				176
				177	scalarReduced = p256GetMultiplier(scalar)
				178	r2IsInfinity := scalarIsZero(scalarReduced)
				179	r2.p256ScalarMult(scalarReduced)
				180
				181	var sum, double p256Point
				182	pointsEqual := p256PointAddAsm(&sum, &r1, &r2)
				183	p256PointDoubleAsm(&double, &r1)
				184	p256MovCond(&sum, &double, &sum, pointsEqual)
				185	p256MovCond(&sum, &r1, &sum, r2IsInfinity)
				186	p256MovCond(&sum, &r2, &sum, r1IsInfinity)
				187	return sum.p256PointToAffine()
				188	}
				189
				190	func (curve p256CurveFast) ScalarBaseMult(scalar []byte) (x, y *big.Int) {
				191	var r p256Point
				192	reducedScalar := p256GetMultiplier(scalar)
				193	r.p256BaseMult(reducedScalar)
				194	return r.p256PointToAffine()
				195	}
				196
				197	func (curve p256CurveFast) ScalarMult(bigX, bigY big.Int, scalar []byte) (x, y big.Int) {
				198	scalarReduced := p256GetMultiplier(scalar)
				199	var r p256Point
				200	copy(r.x[:], fromBig(maybeReduceModP(bigX)))
				201	copy(r.y[:], fromBig(maybeReduceModP(bigY)))
				202	copy(r.z[:], one)
				203	p256MulAsm(r.x[:], r.x[:], rr[:])
				204	p256MulAsm(r.y[:], r.y[:], rr[:])
				205	r.p256ScalarMult(scalarReduced)
				206	return r.p256PointToAffine()
				207	}
				208
				209	func scalarIsZero(scalar []byte) int {
				210	// If any byte is not zero, return 0.
				211	// Check for -0.... since that appears to compare to 0.
				212	b := byte(0)
				213	for _, s := range scalar {
				214	b \|= s
				215	}
				216	return subtle.ConstantTimeByteEq(b, 0)
				217	}
				218
				219	func (p p256Point) p256PointToAffine() (x, y big.Int) {
				220	zInv := make([]byte, 32)
				221	zInvSq := make([]byte, 32)
				222
				223	p256Inverse(zInv, p.z[:])
				224	p256Sqr(zInvSq, zInv)
				225	p256MulAsm(zInv, zInv, zInvSq)
				226
				227	p256MulAsm(zInvSq, p.x[:], zInvSq)
				228	p256MulAsm(zInv, p.y[:], zInv)
				229
				230	p256FromMont(zInvSq, zInvSq)
				231	p256FromMont(zInv, zInv)
				232
				233	// SetBytes expects a slice in big endian order,
				234	// since ppc64le is little endian, reverse the bytes.
				235	// TODO: For big endian, bytes don't need to be reversed.
				236	p256ReverseBytes(zInvSq, zInvSq)
				237	p256ReverseBytes(zInv, zInv)
				238	rx := new(big.Int).SetBytes(zInvSq)
				239	ry := new(big.Int).SetBytes(zInv)
				240	return rx, ry
				241	}
				242
				243	// p256Inverse sets out to in^-1 mod p.
				244	func p256Inverse(out, in []byte) {
				245	var stack [6 * 32]byte
				246	p2 := stack[320 : 320+32]
				247	p4 := stack[321 : 321+32]
				248	p8 := stack[322 : 322+32]
				249	p16 := stack[323 : 323+32]
				250	p32 := stack[324 : 324+32]
				251
				252	p256Sqr(out, in)
				253	p256MulAsm(p2, out, in) // 3*p
				254
				255	p256Sqr(out, p2)
				256	p256Sqr(out, out)
				257	p256MulAsm(p4, out, p2) // f*p
				258
				259	p256Sqr(out, p4)
				260	p256Sqr(out, out)
				261	p256Sqr(out, out)
				262	p256Sqr(out, out)
				263	p256MulAsm(p8, out, p4) // ff*p
				264
				265	p256Sqr(out, p8)
				266
				267	for i := 0; i < 7; i++ {
				268	p256Sqr(out, out)
				269	}
				270	p256MulAsm(p16, out, p8) // ffff*p
				271
				272	p256Sqr(out, p16)
				273	for i := 0; i < 15; i++ {
				274	p256Sqr(out, out)
				275	}
				276	p256MulAsm(p32, out, p16) // ffffffff*p
				277
				278	p256Sqr(out, p32)
				279
				280	for i := 0; i < 31; i++ {
				281	p256Sqr(out, out)
				282	}
				283	p256MulAsm(out, out, in)
				284
				285	for i := 0; i < 32*4; i++ {
				286	p256Sqr(out, out)
				287	}
				288	p256MulAsm(out, out, p32)
				289
				290	for i := 0; i < 32; i++ {
				291	p256Sqr(out, out)
				292	}
				293	p256MulAsm(out, out, p32)
				294
				295	for i := 0; i < 16; i++ {
				296	p256Sqr(out, out)
				297	}
				298	p256MulAsm(out, out, p16)
				299
				300	for i := 0; i < 8; i++ {
				301	p256Sqr(out, out)
				302	}
				303	p256MulAsm(out, out, p8)
				304
				305	p256Sqr(out, out)
				306	p256Sqr(out, out)
				307	p256Sqr(out, out)
				308	p256Sqr(out, out)
				309	p256MulAsm(out, out, p4)
				310
				311	p256Sqr(out, out)
				312	p256Sqr(out, out)
				313	p256MulAsm(out, out, p2)
				314
				315	p256Sqr(out, out)
				316	p256Sqr(out, out)
				317	p256MulAsm(out, out, in)
				318	}
				319
				320	func boothW5(in uint) (int, int) {
				321	var s uint = ^((in >> 5) - 1)
				322	var d uint = (1 << 6) - in - 1
				323	d = (d & s) \| (in & (^s))
				324	d = (d >> 1) + (d & 1)
				325	return int(d), int(s & 1)
				326	}
				327
				328	func boothW6(in uint) (int, int) {
				329	var s uint = ^((in >> 6) - 1)
				330	var d uint = (1 << 7) - in - 1
				331	d = (d & s) \| (in & (^s))
				332	d = (d >> 1) + (d & 1)
				333	return int(d), int(s & 1)
				334	}
				335
				336	func boothW7(in uint) (int, int) {
				337	var s uint = ^((in >> 7) - 1)
				338	var d uint = (1 << 8) - in - 1
				339	d = (d & s) \| (in & (^s))
				340	d = (d >> 1) + (d & 1)
				341	return int(d), int(s & 1)
				342	}
				343
				344	func initTable() {
				345
				346	p256PreFast = new([37][64]p256Point)
				347
				348	// TODO: For big endian, these slices should be in reverse byte order,
				349	// as found in the s390x implementation.
				350	basePoint := p256Point{
				351	x: [32]byte{0x3c, 0x14, 0xa9, 0x18, 0xd4, 0x30, 0xe7, 0x79, 0x01, 0xb6, 0xed, 0x5f, 0xfc, 0x95, 0xba, 0x75,
				352	0x10, 0x25, 0x62, 0x77, 0x2b, 0x73, 0xfb, 0x79, 0xc6, 0x55, 0x37, 0xa5, 0x76, 0x5f, 0x90, 0x18}, //(p256.x*2^256)%p
				353	y: [32]byte{0x0a, 0x56, 0x95, 0xce, 0x57, 0x53, 0xf2, 0xdd, 0x5c, 0xe4, 0x19, 0xba, 0xe4, 0xb8, 0x4a, 0x8b,
				354	0x25, 0xf3, 0x21, 0xdd, 0x88, 0x86, 0xe8, 0xd2, 0x85, 0x5d, 0x88, 0x25, 0x18, 0xff, 0x71, 0x85}, //(p256.y*2^256)%p
				355	z: [32]byte{0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff,
				356	0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xfe, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00}, //(p256.z*2^256)%p
				357
				358	}
				359
				360	t1 := new(p256Point)
				361	t2 := new(p256Point)
				362	*t2 = basePoint
				363
				364	zInv := make([]byte, 32)
				365	zInvSq := make([]byte, 32)
				366	for j := 0; j < 64; j++ {
				367	t1 = t2
				368	for i := 0; i < 37; i++ {
				369	// The window size is 7 so we need to double 7 times.
				370	if i != 0 {
				371	for k := 0; k < 7; k++ {
				372	p256PointDoubleAsm(t1, t1)
				373	}
				374	}
				375	// Convert the point to affine form. (Its values are
				376	// still in Montgomery form however.)
				377	p256Inverse(zInv, t1.z[:])
				378	p256Sqr(zInvSq, zInv)
				379	p256MulAsm(zInv, zInv, zInvSq)
				380
				381	p256MulAsm(t1.x[:], t1.x[:], zInvSq)
				382	p256MulAsm(t1.y[:], t1.y[:], zInv)
				383
				384	copy(t1.z[:], basePoint.z[:])
				385	// Update the table entry
				386	copy(p256PreFast[i][j].x[:], t1.x[:])
				387	copy(p256PreFast[i][j].y[:], t1.y[:])
				388	}
				389	if j == 0 {
				390	p256PointDoubleAsm(t2, &basePoint)
				391	} else {
				392	p256PointAddAsm(t2, t2, &basePoint)
				393	}
				394	}
				395	}
				396
				397	func (p *p256Point) p256BaseMult(scalar []byte) {
				398	// TODO: For big endian, the index should be 31 not 0.
				399	wvalue := (uint(scalar[0]) << 1) & 0xff
				400	sel, sign := boothW7(uint(wvalue))
				401	p256SelectBase(p, p256PreFast[0][:], sel)
				402	p256NegCond(p, sign)
				403
				404	copy(p.z[:], one[:])
				405	var t0 p256Point
				406
				407	copy(t0.z[:], one[:])
				408
				409	index := uint(6)
				410	zero := sel
				411	for i := 1; i < 37; i++ {
				412	// TODO: For big endian, use the same index values as found
				413	// in the s390x implementation.
				414	if index < 247 {
				415	wvalue = ((uint(scalar[index/8]) >> (index % 8)) + (uint(scalar[index/8+1]) << (8 - (index % 8)))) & 0xff
				416	} else {
				417	wvalue = (uint(scalar[index/8]) >> (index % 8)) & 0xff
				418	}
				419	index += 7
				420	sel, sign = boothW7(uint(wvalue))
				421	p256SelectBase(&t0, p256PreFast[i][:], sel)
				422	p256PointAddAffineAsm(p, p, &t0, sign, sel, zero)
				423	zero \|= sel
				424	}
				425	}
				426
				427	func (p *p256Point) p256ScalarMult(scalar []byte) {
				428	// precomp is a table of precomputed points that stores powers of p
				429	// from p^1 to p^16.
				430	var precomp [16]p256Point
				431	var t0, t1, t2, t3 p256Point
				432
				433	&precomp[0] = p
				434	p256PointDoubleAsm(&t0, p)
				435	p256PointDoubleAsm(&t1, &t0)
				436	p256PointDoubleAsm(&t2, &t1)
				437	p256PointDoubleAsm(&t3, &t2)
				438	*&precomp[1] = t0
				439	*&precomp[3] = t1
				440	*&precomp[7] = t2
				441	*&precomp[15] = t3
				442
				443	p256PointAddAsm(&t0, &t0, p)
				444	p256PointAddAsm(&t1, &t1, p)
				445	p256PointAddAsm(&t2, &t2, p)
				446
				447	*&precomp[2] = t0
				448	*&precomp[4] = t1
				449	*&precomp[8] = t2
				450
				451	p256PointDoubleAsm(&t0, &t0)
				452	p256PointDoubleAsm(&t1, &t1)
				453	*&precomp[5] = t0
				454	*&precomp[9] = t1
				455
				456	p256PointAddAsm(&t2, &t0, p)
				457	p256PointAddAsm(&t1, &t1, p)
				458	*&precomp[6] = t2
				459	*&precomp[10] = t1
				460
				461	p256PointDoubleAsm(&t0, &t0)
				462	p256PointDoubleAsm(&t2, &t2)
				463	*&precomp[11] = t0
				464	*&precomp[13] = t2
				465
				466	p256PointAddAsm(&t0, &t0, p)
				467	p256PointAddAsm(&t2, &t2, p)
				468	*&precomp[12] = t0
				469	*&precomp[14] = t2
				470
				471	// Start scanning the window from top bit
				472	index := uint(254)
				473	var sel, sign int
				474
				475	// TODO: For big endian, use index found in s390x implementation.
				476	wvalue := (uint(scalar[index/8]) >> (index % 8)) & 0x3f
				477	sel, _ = boothW5(uint(wvalue))
				478	p256Select(p, precomp[:], sel)
				479	zero := sel
				480
				481	for index > 4 {
				482	index -= 5
				483	p256PointDoubleAsm(p, p)
				484	p256PointDoubleAsm(p, p)
				485	p256PointDoubleAsm(p, p)
				486	p256PointDoubleAsm(p, p)
				487	p256PointDoubleAsm(p, p)
				488
				489	// TODO: For big endian, use index values as found in s390x implementation.
				490	if index < 247 {
				491	wvalue = ((uint(scalar[index/8]) >> (index % 8)) + (uint(scalar[index/8+1]) << (8 - (index % 8)))) & 0x3f
				492	} else {
				493	wvalue = (uint(scalar[index/8]) >> (index % 8)) & 0x3f
				494	}
				495
				496	sel, sign = boothW5(uint(wvalue))
				497
				498	p256Select(&t0, precomp[:], sel)
				499	p256NegCond(&t0, sign)
				500	p256PointAddAsm(&t1, p, &t0)
				501	p256MovCond(&t1, &t1, p, sel)
				502	p256MovCond(p, &t1, &t0, zero)
				503	zero \|= sel
				504	}
				505
				506	p256PointDoubleAsm(p, p)
				507	p256PointDoubleAsm(p, p)
				508	p256PointDoubleAsm(p, p)
				509	p256PointDoubleAsm(p, p)
				510	p256PointDoubleAsm(p, p)
				511
				512	// TODO: Use index for big endian as found in s390x implementation.
				513	wvalue = (uint(scalar[0]) << 1) & 0x3f
				514	sel, sign = boothW5(uint(wvalue))
				515
				516	p256Select(&t0, precomp[:], sel)
				517	p256NegCond(&t0, sign)
				518	p256PointAddAsm(&t1, p, &t0)
				519	p256MovCond(&t1, &t1, p, sel)
				520	p256MovCond(p, &t1, &t0, zero)
				521	}