Blame - arch/arm/vfp/vfpsingle.c - kernel/hikey-linaro

blob: 6849fe35cb2e7d59d3310d0c95807e3bdd28db08 [file] [log] [blame]

Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame^]	1	/*
				2	* linux/arch/arm/vfp/vfpsingle.c
				3	*
				4	* This code is derived in part from John R. Housers softfloat library, which
				5	* carries the following notice:
				6	*
				7	* ===========================================================================
				8	* This C source file is part of the SoftFloat IEC/IEEE Floating-point
				9	* Arithmetic Package, Release 2.
				10	*
				11	* Written by John R. Hauser. This work was made possible in part by the
				12	* International Computer Science Institute, located at Suite 600, 1947 Center
				13	* Street, Berkeley, California 94704. Funding was partially provided by the
				14	* National Science Foundation under grant MIP-9311980. The original version
				15	* of this code was written as part of a project to build a fixed-point vector
				16	* processor in collaboration with the University of California at Berkeley,
				17	* overseen by Profs. Nelson Morgan and John Wawrzynek. More information
				18	* is available through the web page `http://HTTP.CS.Berkeley.EDU/~jhauser/
				19	* arithmetic/softfloat.html'.
				20	*
				21	* THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE. Although reasonable effort
				22	* has been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT
				23	* TIMES RESULT IN INCORRECT BEHAVIOR. USE OF THIS SOFTWARE IS RESTRICTED TO
				24	* PERSONS AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ANY
				25	* AND ALL LOSSES, COSTS, OR OTHER PROBLEMS ARISING FROM ITS USE.
				26	*
				27	* Derivative works are acceptable, even for commercial purposes, so long as
				28	* (1) they include prominent notice that the work is derivative, and (2) they
				29	* include prominent notice akin to these three paragraphs for those parts of
				30	* this code that are retained.
				31	* ===========================================================================
				32	*/
				33	#include <linux/kernel.h>
				34	#include <linux/bitops.h>
				35	#include <asm/ptrace.h>
				36	#include <asm/vfp.h>
				37
				38	#include "vfpinstr.h"
				39	#include "vfp.h"
				40
				41	static struct vfp_single vfp_single_default_qnan = {
				42	.exponent = 255,
				43	.sign = 0,
				44	.significand = VFP_SINGLE_SIGNIFICAND_QNAN,
				45	};
				46
				47	static void vfp_single_dump(const char str, struct vfp_single s)
				48	{
				49	pr_debug("VFP: %s: sign=%d exponent=%d significand=%08x\n",
				50	str, s->sign != 0, s->exponent, s->significand);
				51	}
				52
				53	static void vfp_single_normalise_denormal(struct vfp_single *vs)
				54	{
				55	int bits = 31 - fls(vs->significand);
				56
				57	vfp_single_dump("normalise_denormal: in", vs);
				58
				59	if (bits) {
				60	vs->exponent -= bits - 1;
				61	vs->significand <<= bits;
				62	}
				63
				64	vfp_single_dump("normalise_denormal: out", vs);
				65	}
				66
				67	#ifndef DEBUG
				68	#define vfp_single_normaliseround(sd,vsd,fpscr,except,func) __vfp_single_normaliseround(sd,vsd,fpscr,except)
				69	u32 __vfp_single_normaliseround(int sd, struct vfp_single *vs, u32 fpscr, u32 exceptions)
				70	#else
				71	u32 vfp_single_normaliseround(int sd, struct vfp_single vs, u32 fpscr, u32 exceptions, const char func)
				72	#endif
				73	{
				74	u32 significand, incr, rmode;
				75	int exponent, shift, underflow;
				76
				77	vfp_single_dump("pack: in", vs);
				78
				79	/*
				80	* Infinities and NaNs are a special case.
				81	*/
				82	if (vs->exponent == 255 && (vs->significand == 0 \|\| exceptions))
				83	goto pack;
				84
				85	/*
				86	* Special-case zero.
				87	*/
				88	if (vs->significand == 0) {
				89	vs->exponent = 0;
				90	goto pack;
				91	}
				92
				93	exponent = vs->exponent;
				94	significand = vs->significand;
				95
				96	/*
				97	* Normalise first. Note that we shift the significand up to
				98	* bit 31, so we have VFP_SINGLE_LOW_BITS + 1 below the least
				99	* significant bit.
				100	*/
				101	shift = 32 - fls(significand);
				102	if (shift < 32 && shift) {
				103	exponent -= shift;
				104	significand <<= shift;
				105	}
				106
				107	#ifdef DEBUG
				108	vs->exponent = exponent;
				109	vs->significand = significand;
				110	vfp_single_dump("pack: normalised", vs);
				111	#endif
				112
				113	/*
				114	* Tiny number?
				115	*/
				116	underflow = exponent < 0;
				117	if (underflow) {
				118	significand = vfp_shiftright32jamming(significand, -exponent);
				119	exponent = 0;
				120	#ifdef DEBUG
				121	vs->exponent = exponent;
				122	vs->significand = significand;
				123	vfp_single_dump("pack: tiny number", vs);
				124	#endif
				125	if (!(significand & ((1 << (VFP_SINGLE_LOW_BITS + 1)) - 1)))
				126	underflow = 0;
				127	}
				128
				129	/*
				130	* Select rounding increment.
				131	*/
				132	incr = 0;
				133	rmode = fpscr & FPSCR_RMODE_MASK;
				134
				135	if (rmode == FPSCR_ROUND_NEAREST) {
				136	incr = 1 << VFP_SINGLE_LOW_BITS;
				137	if ((significand & (1 << (VFP_SINGLE_LOW_BITS + 1))) == 0)
				138	incr -= 1;
				139	} else if (rmode == FPSCR_ROUND_TOZERO) {
				140	incr = 0;
				141	} else if ((rmode == FPSCR_ROUND_PLUSINF) ^ (vs->sign != 0))
				142	incr = (1 << (VFP_SINGLE_LOW_BITS + 1)) - 1;
				143
				144	pr_debug("VFP: rounding increment = 0x%08x\n", incr);
				145
				146	/*
				147	* Is our rounding going to overflow?
				148	*/
				149	if ((significand + incr) < significand) {
				150	exponent += 1;
				151	significand = (significand >> 1) \| (significand & 1);
				152	incr >>= 1;
				153	#ifdef DEBUG
				154	vs->exponent = exponent;
				155	vs->significand = significand;
				156	vfp_single_dump("pack: overflow", vs);
				157	#endif
				158	}
				159
				160	/*
				161	* If any of the low bits (which will be shifted out of the
				162	* number) are non-zero, the result is inexact.
				163	*/
				164	if (significand & ((1 << (VFP_SINGLE_LOW_BITS + 1)) - 1))
				165	exceptions \|= FPSCR_IXC;
				166
				167	/*
				168	* Do our rounding.
				169	*/
				170	significand += incr;
				171
				172	/*
				173	* Infinity?
				174	*/
				175	if (exponent >= 254) {
				176	exceptions \|= FPSCR_OFC \| FPSCR_IXC;
				177	if (incr == 0) {
				178	vs->exponent = 253;
				179	vs->significand = 0x7fffffff;
				180	} else {
				181	vs->exponent = 255; /* infinity */
				182	vs->significand = 0;
				183	}
				184	} else {
				185	if (significand >> (VFP_SINGLE_LOW_BITS + 1) == 0)
				186	exponent = 0;
				187	if (exponent \|\| significand > 0x80000000)
				188	underflow = 0;
				189	if (underflow)
				190	exceptions \|= FPSCR_UFC;
				191	vs->exponent = exponent;
				192	vs->significand = significand >> 1;
				193	}
				194
				195	pack:
				196	vfp_single_dump("pack: final", vs);
				197	{
				198	s32 d = vfp_single_pack(vs);
				199	pr_debug("VFP: %s: d(s%d)=%08x exceptions=%08x\n", func,
				200	sd, d, exceptions);
				201	vfp_put_float(sd, d);
				202	}
				203
				204	return exceptions & ~VFP_NAN_FLAG;
				205	}
				206
				207	/*
				208	* Propagate the NaN, setting exceptions if it is signalling.
				209	* 'n' is always a NaN. 'm' may be a number, NaN or infinity.
				210	*/
				211	static u32
				212	vfp_propagate_nan(struct vfp_single vsd, struct vfp_single vsn,
				213	struct vfp_single *vsm, u32 fpscr)
				214	{
				215	struct vfp_single *nan;
				216	int tn, tm = 0;
				217
				218	tn = vfp_single_type(vsn);
				219
				220	if (vsm)
				221	tm = vfp_single_type(vsm);
				222
				223	if (fpscr & FPSCR_DEFAULT_NAN)
				224	/*
				225	* Default NaN mode - always returns a quiet NaN
				226	*/
				227	nan = &vfp_single_default_qnan;
				228	else {
				229	/*
				230	* Contemporary mode - select the first signalling
				231	* NAN, or if neither are signalling, the first
				232	* quiet NAN.
				233	*/
				234	if (tn == VFP_SNAN \|\| (tm != VFP_SNAN && tn == VFP_QNAN))
				235	nan = vsn;
				236	else
				237	nan = vsm;
				238	/*
				239	* Make the NaN quiet.
				240	*/
				241	nan->significand \|= VFP_SINGLE_SIGNIFICAND_QNAN;
				242	}
				243
				244	vsd = nan;
				245
				246	/*
				247	* If one was a signalling NAN, raise invalid operation.
				248	*/
				249	return tn == VFP_SNAN \|\| tm == VFP_SNAN ? FPSCR_IOC : VFP_NAN_FLAG;
				250	}
				251
				252
				253	/*
				254	* Extended operations
				255	*/
				256	static u32 vfp_single_fabs(int sd, int unused, s32 m, u32 fpscr)
				257	{
				258	vfp_put_float(sd, vfp_single_packed_abs(m));
				259	return 0;
				260	}
				261
				262	static u32 vfp_single_fcpy(int sd, int unused, s32 m, u32 fpscr)
				263	{
				264	vfp_put_float(sd, m);
				265	return 0;
				266	}
				267
				268	static u32 vfp_single_fneg(int sd, int unused, s32 m, u32 fpscr)
				269	{
				270	vfp_put_float(sd, vfp_single_packed_negate(m));
				271	return 0;
				272	}
				273
				274	static const u16 sqrt_oddadjust[] = {
				275	0x0004, 0x0022, 0x005d, 0x00b1, 0x011d, 0x019f, 0x0236, 0x02e0,
				276	0x039c, 0x0468, 0x0545, 0x0631, 0x072b, 0x0832, 0x0946, 0x0a67
				277	};
				278
				279	static const u16 sqrt_evenadjust[] = {
				280	0x0a2d, 0x08af, 0x075a, 0x0629, 0x051a, 0x0429, 0x0356, 0x029e,
				281	0x0200, 0x0179, 0x0109, 0x00af, 0x0068, 0x0034, 0x0012, 0x0002
				282	};
				283
				284	u32 vfp_estimate_sqrt_significand(u32 exponent, u32 significand)
				285	{
				286	int index;
				287	u32 z, a;
				288
				289	if ((significand & 0xc0000000) != 0x40000000) {
				290	printk(KERN_WARNING "VFP: estimate_sqrt: invalid significand\n");
				291	}
				292
				293	a = significand << 1;
				294	index = (a >> 27) & 15;
				295	if (exponent & 1) {
				296	z = 0x4000 + (a >> 17) - sqrt_oddadjust[index];
				297	z = ((a / z) << 14) + (z << 15);
				298	a >>= 1;
				299	} else {
				300	z = 0x8000 + (a >> 17) - sqrt_evenadjust[index];
				301	z = a / z + z;
				302	z = (z >= 0x20000) ? 0xffff8000 : (z << 15);
				303	if (z <= a)
				304	return (s32)a >> 1;
				305	}
				306	return (u32)(((u64)a << 31) / z) + (z >> 1);
				307	}
				308
				309	static u32 vfp_single_fsqrt(int sd, int unused, s32 m, u32 fpscr)
				310	{
				311	struct vfp_single vsm, vsd;
				312	int ret, tm;
				313
				314	vfp_single_unpack(&vsm, m);
				315	tm = vfp_single_type(&vsm);
				316	if (tm & (VFP_NAN\|VFP_INFINITY)) {
				317	struct vfp_single *vsp = &vsd;
				318
				319	if (tm & VFP_NAN)
				320	ret = vfp_propagate_nan(vsp, &vsm, NULL, fpscr);
				321	else if (vsm.sign == 0) {
				322	sqrt_copy:
				323	vsp = &vsm;
				324	ret = 0;
				325	} else {
				326	sqrt_invalid:
				327	vsp = &vfp_single_default_qnan;
				328	ret = FPSCR_IOC;
				329	}
				330	vfp_put_float(sd, vfp_single_pack(vsp));
				331	return ret;
				332	}
				333
				334	/*
				335	* sqrt(+/- 0) == +/- 0
				336	*/
				337	if (tm & VFP_ZERO)
				338	goto sqrt_copy;
				339
				340	/*
				341	* Normalise a denormalised number
				342	*/
				343	if (tm & VFP_DENORMAL)
				344	vfp_single_normalise_denormal(&vsm);
				345
				346	/*
				347	* sqrt(<0) = invalid
				348	*/
				349	if (vsm.sign)
				350	goto sqrt_invalid;
				351
				352	vfp_single_dump("sqrt", &vsm);
				353
				354	/*
				355	* Estimate the square root.
				356	*/
				357	vsd.sign = 0;
				358	vsd.exponent = ((vsm.exponent - 127) >> 1) + 127;
				359	vsd.significand = vfp_estimate_sqrt_significand(vsm.exponent, vsm.significand) + 2;
				360
				361	vfp_single_dump("sqrt estimate", &vsd);
				362
				363	/*
				364	* And now adjust.
				365	*/
				366	if ((vsd.significand & VFP_SINGLE_LOW_BITS_MASK) <= 5) {
				367	if (vsd.significand < 2) {
				368	vsd.significand = 0xffffffff;
				369	} else {
				370	u64 term;
				371	s64 rem;
				372	vsm.significand <<= !(vsm.exponent & 1);
				373	term = (u64)vsd.significand * vsd.significand;
				374	rem = ((u64)vsm.significand << 32) - term;
				375
				376	pr_debug("VFP: term=%016llx rem=%016llx\n", term, rem);
				377
				378	while (rem < 0) {
				379	vsd.significand -= 1;
				380	rem += ((u64)vsd.significand << 1) \| 1;
				381	}
				382	vsd.significand \|= rem != 0;
				383	}
				384	}
				385	vsd.significand = vfp_shiftright32jamming(vsd.significand, 1);
				386
				387	return vfp_single_normaliseround(sd, &vsd, fpscr, 0, "fsqrt");
				388	}
				389
				390	/*
				391	* Equal := ZC
				392	* Less than := N
				393	* Greater than := C
				394	* Unordered := CV
				395	*/
				396	static u32 vfp_compare(int sd, int signal_on_qnan, s32 m, u32 fpscr)
				397	{
				398	s32 d;
				399	u32 ret = 0;
				400
				401	d = vfp_get_float(sd);
				402	if (vfp_single_packed_exponent(m) == 255 && vfp_single_packed_mantissa(m)) {
				403	ret \|= FPSCR_C \| FPSCR_V;
				404	if (signal_on_qnan \|\| !(vfp_single_packed_mantissa(m) & (1 << (VFP_SINGLE_MANTISSA_BITS - 1))))
				405	/*
				406	* Signalling NaN, or signalling on quiet NaN
				407	*/
				408	ret \|= FPSCR_IOC;
				409	}
				410
				411	if (vfp_single_packed_exponent(d) == 255 && vfp_single_packed_mantissa(d)) {
				412	ret \|= FPSCR_C \| FPSCR_V;
				413	if (signal_on_qnan \|\| !(vfp_single_packed_mantissa(d) & (1 << (VFP_SINGLE_MANTISSA_BITS - 1))))
				414	/*
				415	* Signalling NaN, or signalling on quiet NaN
				416	*/
				417	ret \|= FPSCR_IOC;
				418	}
				419
				420	if (ret == 0) {
				421	if (d == m \|\| vfp_single_packed_abs(d \| m) == 0) {
				422	/*
				423	* equal
				424	*/
				425	ret \|= FPSCR_Z \| FPSCR_C;
				426	} else if (vfp_single_packed_sign(d ^ m)) {
				427	/*
				428	* different signs
				429	*/
				430	if (vfp_single_packed_sign(d))
				431	/*
				432	* d is negative, so d < m
				433	*/
				434	ret \|= FPSCR_N;
				435	else
				436	/*
				437	* d is positive, so d > m
				438	*/
				439	ret \|= FPSCR_C;
				440	} else if ((vfp_single_packed_sign(d) != 0) ^ (d < m)) {
				441	/*
				442	* d < m
				443	*/
				444	ret \|= FPSCR_N;
				445	} else if ((vfp_single_packed_sign(d) != 0) ^ (d > m)) {
				446	/*
				447	* d > m
				448	*/
				449	ret \|= FPSCR_C;
				450	}
				451	}
				452	return ret;
				453	}
				454
				455	static u32 vfp_single_fcmp(int sd, int unused, s32 m, u32 fpscr)
				456	{
				457	return vfp_compare(sd, 0, m, fpscr);
				458	}
				459
				460	static u32 vfp_single_fcmpe(int sd, int unused, s32 m, u32 fpscr)
				461	{
				462	return vfp_compare(sd, 1, m, fpscr);
				463	}
				464
				465	static u32 vfp_single_fcmpz(int sd, int unused, s32 m, u32 fpscr)
				466	{
				467	return vfp_compare(sd, 0, 0, fpscr);
				468	}
				469
				470	static u32 vfp_single_fcmpez(int sd, int unused, s32 m, u32 fpscr)
				471	{
				472	return vfp_compare(sd, 1, 0, fpscr);
				473	}
				474
				475	static u32 vfp_single_fcvtd(int dd, int unused, s32 m, u32 fpscr)
				476	{
				477	struct vfp_single vsm;
				478	struct vfp_double vdd;
				479	int tm;
				480	u32 exceptions = 0;
				481
				482	vfp_single_unpack(&vsm, m);
				483
				484	tm = vfp_single_type(&vsm);
				485
				486	/*
				487	* If we have a signalling NaN, signal invalid operation.
				488	*/
				489	if (tm == VFP_SNAN)
				490	exceptions = FPSCR_IOC;
				491
				492	if (tm & VFP_DENORMAL)
				493	vfp_single_normalise_denormal(&vsm);
				494
				495	vdd.sign = vsm.sign;
				496	vdd.significand = (u64)vsm.significand << 32;
				497
				498	/*
				499	* If we have an infinity or NaN, the exponent must be 2047.
				500	*/
				501	if (tm & (VFP_INFINITY\|VFP_NAN)) {
				502	vdd.exponent = 2047;
				503	if (tm & VFP_NAN)
				504	vdd.significand \|= VFP_DOUBLE_SIGNIFICAND_QNAN;
				505	goto pack_nan;
				506	} else if (tm & VFP_ZERO)
				507	vdd.exponent = 0;
				508	else
				509	vdd.exponent = vsm.exponent + (1023 - 127);
				510
				511	/*
				512	* Technically, if bit 0 of dd is set, this is an invalid
				513	* instruction. However, we ignore this for efficiency.
				514	*/
				515	return vfp_double_normaliseround(dd, &vdd, fpscr, exceptions, "fcvtd");
				516
				517	pack_nan:
				518	vfp_put_double(dd, vfp_double_pack(&vdd));
				519	return exceptions;
				520	}
				521
				522	static u32 vfp_single_fuito(int sd, int unused, s32 m, u32 fpscr)
				523	{
				524	struct vfp_single vs;
				525
				526	vs.sign = 0;
				527	vs.exponent = 127 + 31 - 1;
				528	vs.significand = (u32)m;
				529
				530	return vfp_single_normaliseround(sd, &vs, fpscr, 0, "fuito");
				531	}
				532
				533	static u32 vfp_single_fsito(int sd, int unused, s32 m, u32 fpscr)
				534	{
				535	struct vfp_single vs;
				536
				537	vs.sign = (m & 0x80000000) >> 16;
				538	vs.exponent = 127 + 31 - 1;
				539	vs.significand = vs.sign ? -m : m;
				540
				541	return vfp_single_normaliseround(sd, &vs, fpscr, 0, "fsito");
				542	}
				543
				544	static u32 vfp_single_ftoui(int sd, int unused, s32 m, u32 fpscr)
				545	{
				546	struct vfp_single vsm;
				547	u32 d, exceptions = 0;
				548	int rmode = fpscr & FPSCR_RMODE_MASK;
				549	int tm;
				550
				551	vfp_single_unpack(&vsm, m);
				552	vfp_single_dump("VSM", &vsm);
				553
				554	/*
				555	* Do we have a denormalised number?
				556	*/
				557	tm = vfp_single_type(&vsm);
				558	if (tm & VFP_DENORMAL)
				559	exceptions \|= FPSCR_IDC;
				560
				561	if (tm & VFP_NAN)
				562	vsm.sign = 0;
				563
				564	if (vsm.exponent >= 127 + 32) {
				565	d = vsm.sign ? 0 : 0xffffffff;
				566	exceptions = FPSCR_IOC;
				567	} else if (vsm.exponent >= 127 - 1) {
				568	int shift = 127 + 31 - vsm.exponent;
				569	u32 rem, incr = 0;
				570
				571	/*
				572	* 2^0 <= m < 2^32-2^8
				573	*/
				574	d = (vsm.significand << 1) >> shift;
				575	rem = vsm.significand << (33 - shift);
				576
				577	if (rmode == FPSCR_ROUND_NEAREST) {
				578	incr = 0x80000000;
				579	if ((d & 1) == 0)
				580	incr -= 1;
				581	} else if (rmode == FPSCR_ROUND_TOZERO) {
				582	incr = 0;
				583	} else if ((rmode == FPSCR_ROUND_PLUSINF) ^ (vsm.sign != 0)) {
				584	incr = ~0;
				585	}
				586
				587	if ((rem + incr) < rem) {
				588	if (d < 0xffffffff)
				589	d += 1;
				590	else
				591	exceptions \|= FPSCR_IOC;
				592	}
				593
				594	if (d && vsm.sign) {
				595	d = 0;
				596	exceptions \|= FPSCR_IOC;
				597	} else if (rem)
				598	exceptions \|= FPSCR_IXC;
				599	} else {
				600	d = 0;
				601	if (vsm.exponent \| vsm.significand) {
				602	exceptions \|= FPSCR_IXC;
				603	if (rmode == FPSCR_ROUND_PLUSINF && vsm.sign == 0)
				604	d = 1;
				605	else if (rmode == FPSCR_ROUND_MINUSINF && vsm.sign) {
				606	d = 0;
				607	exceptions \|= FPSCR_IOC;
				608	}
				609	}
				610	}
				611
				612	pr_debug("VFP: ftoui: d(s%d)=%08x exceptions=%08x\n", sd, d, exceptions);
				613
				614	vfp_put_float(sd, d);
				615
				616	return exceptions;
				617	}
				618
				619	static u32 vfp_single_ftouiz(int sd, int unused, s32 m, u32 fpscr)
				620	{
				621	return vfp_single_ftoui(sd, unused, m, FPSCR_ROUND_TOZERO);
				622	}
				623
				624	static u32 vfp_single_ftosi(int sd, int unused, s32 m, u32 fpscr)
				625	{
				626	struct vfp_single vsm;
				627	u32 d, exceptions = 0;
				628	int rmode = fpscr & FPSCR_RMODE_MASK;
				629
				630	vfp_single_unpack(&vsm, m);
				631	vfp_single_dump("VSM", &vsm);
				632
				633	/*
				634	* Do we have a denormalised number?
				635	*/
				636	if (vfp_single_type(&vsm) & VFP_DENORMAL)
				637	exceptions \|= FPSCR_IDC;
				638
				639	if (vsm.exponent >= 127 + 32) {
				640	/*
				641	* m >= 2^31-2^7: invalid
				642	*/
				643	d = 0x7fffffff;
				644	if (vsm.sign)
				645	d = ~d;
				646	exceptions \|= FPSCR_IOC;
				647	} else if (vsm.exponent >= 127 - 1) {
				648	int shift = 127 + 31 - vsm.exponent;
				649	u32 rem, incr = 0;
				650
				651	/* 2^0 <= m <= 2^31-2^7 */
				652	d = (vsm.significand << 1) >> shift;
				653	rem = vsm.significand << (33 - shift);
				654
				655	if (rmode == FPSCR_ROUND_NEAREST) {
				656	incr = 0x80000000;
				657	if ((d & 1) == 0)
				658	incr -= 1;
				659	} else if (rmode == FPSCR_ROUND_TOZERO) {
				660	incr = 0;
				661	} else if ((rmode == FPSCR_ROUND_PLUSINF) ^ (vsm.sign != 0)) {
				662	incr = ~0;
				663	}
				664
				665	if ((rem + incr) < rem && d < 0xffffffff)
				666	d += 1;
				667	if (d > 0x7fffffff + (vsm.sign != 0)) {
				668	d = 0x7fffffff + (vsm.sign != 0);
				669	exceptions \|= FPSCR_IOC;
				670	} else if (rem)
				671	exceptions \|= FPSCR_IXC;
				672
				673	if (vsm.sign)
				674	d = -d;
				675	} else {
				676	d = 0;
				677	if (vsm.exponent \| vsm.significand) {
				678	exceptions \|= FPSCR_IXC;
				679	if (rmode == FPSCR_ROUND_PLUSINF && vsm.sign == 0)
				680	d = 1;
				681	else if (rmode == FPSCR_ROUND_MINUSINF && vsm.sign)
				682	d = -1;
				683	}
				684	}
				685
				686	pr_debug("VFP: ftosi: d(s%d)=%08x exceptions=%08x\n", sd, d, exceptions);
				687
				688	vfp_put_float(sd, (s32)d);
				689
				690	return exceptions;
				691	}
				692
				693	static u32 vfp_single_ftosiz(int sd, int unused, s32 m, u32 fpscr)
				694	{
				695	return vfp_single_ftosi(sd, unused, m, FPSCR_ROUND_TOZERO);
				696	}
				697
				698	static u32 (* const fop_extfns[32])(int sd, int unused, s32 m, u32 fpscr) = {
				699	[FEXT_TO_IDX(FEXT_FCPY)] = vfp_single_fcpy,
				700	[FEXT_TO_IDX(FEXT_FABS)] = vfp_single_fabs,
				701	[FEXT_TO_IDX(FEXT_FNEG)] = vfp_single_fneg,
				702	[FEXT_TO_IDX(FEXT_FSQRT)] = vfp_single_fsqrt,
				703	[FEXT_TO_IDX(FEXT_FCMP)] = vfp_single_fcmp,
				704	[FEXT_TO_IDX(FEXT_FCMPE)] = vfp_single_fcmpe,
				705	[FEXT_TO_IDX(FEXT_FCMPZ)] = vfp_single_fcmpz,
				706	[FEXT_TO_IDX(FEXT_FCMPEZ)] = vfp_single_fcmpez,
				707	[FEXT_TO_IDX(FEXT_FCVT)] = vfp_single_fcvtd,
				708	[FEXT_TO_IDX(FEXT_FUITO)] = vfp_single_fuito,
				709	[FEXT_TO_IDX(FEXT_FSITO)] = vfp_single_fsito,
				710	[FEXT_TO_IDX(FEXT_FTOUI)] = vfp_single_ftoui,
				711	[FEXT_TO_IDX(FEXT_FTOUIZ)] = vfp_single_ftouiz,
				712	[FEXT_TO_IDX(FEXT_FTOSI)] = vfp_single_ftosi,
				713	[FEXT_TO_IDX(FEXT_FTOSIZ)] = vfp_single_ftosiz,
				714	};
				715
				716
				717
				718
				719
				720	static u32
				721	vfp_single_fadd_nonnumber(struct vfp_single vsd, struct vfp_single vsn,
				722	struct vfp_single *vsm, u32 fpscr)
				723	{
				724	struct vfp_single *vsp;
				725	u32 exceptions = 0;
				726	int tn, tm;
				727
				728	tn = vfp_single_type(vsn);
				729	tm = vfp_single_type(vsm);
				730
				731	if (tn & tm & VFP_INFINITY) {
				732	/*
				733	* Two infinities. Are they different signs?
				734	*/
				735	if (vsn->sign ^ vsm->sign) {
				736	/*
				737	* different signs -> invalid
				738	*/
				739	exceptions = FPSCR_IOC;
				740	vsp = &vfp_single_default_qnan;
				741	} else {
				742	/*
				743	* same signs -> valid
				744	*/
				745	vsp = vsn;
				746	}
				747	} else if (tn & VFP_INFINITY && tm & VFP_NUMBER) {
				748	/*
				749	* One infinity and one number -> infinity
				750	*/
				751	vsp = vsn;
				752	} else {
				753	/*
				754	* 'n' is a NaN of some type
				755	*/
				756	return vfp_propagate_nan(vsd, vsn, vsm, fpscr);
				757	}
				758	vsd = vsp;
				759	return exceptions;
				760	}
				761
				762	static u32
				763	vfp_single_add(struct vfp_single vsd, struct vfp_single vsn,
				764	struct vfp_single *vsm, u32 fpscr)
				765	{
				766	u32 exp_diff, m_sig;
				767
				768	if (vsn->significand & 0x80000000 \|\|
				769	vsm->significand & 0x80000000) {
				770	pr_info("VFP: bad FP values in %s\n", __func__);
				771	vfp_single_dump("VSN", vsn);
				772	vfp_single_dump("VSM", vsm);
				773	}
				774
				775	/*
				776	* Ensure that 'n' is the largest magnitude number. Note that
				777	* if 'n' and 'm' have equal exponents, we do not swap them.
				778	* This ensures that NaN propagation works correctly.
				779	*/
				780	if (vsn->exponent < vsm->exponent) {
				781	struct vfp_single *t = vsn;
				782	vsn = vsm;
				783	vsm = t;
				784	}
				785
				786	/*
				787	* Is 'n' an infinity or a NaN? Note that 'm' may be a number,
				788	* infinity or a NaN here.
				789	*/
				790	if (vsn->exponent == 255)
				791	return vfp_single_fadd_nonnumber(vsd, vsn, vsm, fpscr);
				792
				793	/*
				794	* We have two proper numbers, where 'vsn' is the larger magnitude.
				795	*
				796	* Copy 'n' to 'd' before doing the arithmetic.
				797	*/
				798	vsd = vsn;
				799
				800	/*
				801	* Align both numbers.
				802	*/
				803	exp_diff = vsn->exponent - vsm->exponent;
				804	m_sig = vfp_shiftright32jamming(vsm->significand, exp_diff);
				805
				806	/*
				807	* If the signs are different, we are really subtracting.
				808	*/
				809	if (vsn->sign ^ vsm->sign) {
				810	m_sig = vsn->significand - m_sig;
				811	if ((s32)m_sig < 0) {
				812	vsd->sign = vfp_sign_negate(vsd->sign);
				813	m_sig = -m_sig;
				814	} else if (m_sig == 0) {
				815	vsd->sign = (fpscr & FPSCR_RMODE_MASK) ==
				816	FPSCR_ROUND_MINUSINF ? 0x8000 : 0;
				817	}
				818	} else {
				819	m_sig = vsn->significand + m_sig;
				820	}
				821	vsd->significand = m_sig;
				822
				823	return 0;
				824	}
				825
				826	static u32
				827	vfp_single_multiply(struct vfp_single vsd, struct vfp_single vsn, struct vfp_single *vsm, u32 fpscr)
				828	{
				829	vfp_single_dump("VSN", vsn);
				830	vfp_single_dump("VSM", vsm);
				831
				832	/*
				833	* Ensure that 'n' is the largest magnitude number. Note that
				834	* if 'n' and 'm' have equal exponents, we do not swap them.
				835	* This ensures that NaN propagation works correctly.
				836	*/
				837	if (vsn->exponent < vsm->exponent) {
				838	struct vfp_single *t = vsn;
				839	vsn = vsm;
				840	vsm = t;
				841	pr_debug("VFP: swapping M <-> N\n");
				842	}
				843
				844	vsd->sign = vsn->sign ^ vsm->sign;
				845
				846	/*
				847	* If 'n' is an infinity or NaN, handle it. 'm' may be anything.
				848	*/
				849	if (vsn->exponent == 255) {
				850	if (vsn->significand \|\| (vsm->exponent == 255 && vsm->significand))
				851	return vfp_propagate_nan(vsd, vsn, vsm, fpscr);
				852	if ((vsm->exponent \| vsm->significand) == 0) {
				853	*vsd = vfp_single_default_qnan;
				854	return FPSCR_IOC;
				855	}
				856	vsd->exponent = vsn->exponent;
				857	vsd->significand = 0;
				858	return 0;
				859	}
				860
				861	/*
				862	* If 'm' is zero, the result is always zero. In this case,
				863	* 'n' may be zero or a number, but it doesn't matter which.
				864	*/
				865	if ((vsm->exponent \| vsm->significand) == 0) {
				866	vsd->exponent = 0;
				867	vsd->significand = 0;
				868	return 0;
				869	}
				870
				871	/*
				872	* We add 2 to the destination exponent for the same reason as
				873	* the addition case - though this time we have +1 from each
				874	* input operand.
				875	*/
				876	vsd->exponent = vsn->exponent + vsm->exponent - 127 + 2;
				877	vsd->significand = vfp_hi64to32jamming((u64)vsn->significand * vsm->significand);
				878
				879	vfp_single_dump("VSD", vsd);
				880	return 0;
				881	}
				882
				883	#define NEG_MULTIPLY (1 << 0)
				884	#define NEG_SUBTRACT (1 << 1)
				885
				886	static u32
				887	vfp_single_multiply_accumulate(int sd, int sn, s32 m, u32 fpscr, u32 negate, char *func)
				888	{
				889	struct vfp_single vsd, vsp, vsn, vsm;
				890	u32 exceptions;
				891	s32 v;
				892
				893	v = vfp_get_float(sn);
				894	pr_debug("VFP: s%u = %08x\n", sn, v);
				895	vfp_single_unpack(&vsn, v);
				896	if (vsn.exponent == 0 && vsn.significand)
				897	vfp_single_normalise_denormal(&vsn);
				898
				899	vfp_single_unpack(&vsm, m);
				900	if (vsm.exponent == 0 && vsm.significand)
				901	vfp_single_normalise_denormal(&vsm);
				902
				903	exceptions = vfp_single_multiply(&vsp, &vsn, &vsm, fpscr);
				904	if (negate & NEG_MULTIPLY)
				905	vsp.sign = vfp_sign_negate(vsp.sign);
				906
				907	v = vfp_get_float(sd);
				908	pr_debug("VFP: s%u = %08x\n", sd, v);
				909	vfp_single_unpack(&vsn, v);
				910	if (negate & NEG_SUBTRACT)
				911	vsn.sign = vfp_sign_negate(vsn.sign);
				912
				913	exceptions \|= vfp_single_add(&vsd, &vsn, &vsp, fpscr);
				914
				915	return vfp_single_normaliseround(sd, &vsd, fpscr, exceptions, func);
				916	}
				917
				918	/*
				919	* Standard operations
				920	*/
				921
				922	/*
				923	* sd = sd + (sn * sm)
				924	*/
				925	static u32 vfp_single_fmac(int sd, int sn, s32 m, u32 fpscr)
				926	{
				927	return vfp_single_multiply_accumulate(sd, sn, m, fpscr, 0, "fmac");
				928	}
				929
				930	/*
				931	* sd = sd - (sn * sm)
				932	*/
				933	static u32 vfp_single_fnmac(int sd, int sn, s32 m, u32 fpscr)
				934	{
				935	return vfp_single_multiply_accumulate(sd, sn, m, fpscr, NEG_MULTIPLY, "fnmac");
				936	}
				937
				938	/*
				939	* sd = -sd + (sn * sm)
				940	*/
				941	static u32 vfp_single_fmsc(int sd, int sn, s32 m, u32 fpscr)
				942	{
				943	return vfp_single_multiply_accumulate(sd, sn, m, fpscr, NEG_SUBTRACT, "fmsc");
				944	}
				945
				946	/*
				947	* sd = -sd - (sn * sm)
				948	*/
				949	static u32 vfp_single_fnmsc(int sd, int sn, s32 m, u32 fpscr)
				950	{
				951	return vfp_single_multiply_accumulate(sd, sn, m, fpscr, NEG_SUBTRACT \| NEG_MULTIPLY, "fnmsc");
				952	}
				953
				954	/*
				955	* sd = sn * sm
				956	*/
				957	static u32 vfp_single_fmul(int sd, int sn, s32 m, u32 fpscr)
				958	{
				959	struct vfp_single vsd, vsn, vsm;
				960	u32 exceptions;
				961	s32 n = vfp_get_float(sn);
				962
				963	pr_debug("VFP: s%u = %08x\n", sn, n);
				964
				965	vfp_single_unpack(&vsn, n);
				966	if (vsn.exponent == 0 && vsn.significand)
				967	vfp_single_normalise_denormal(&vsn);
				968
				969	vfp_single_unpack(&vsm, m);
				970	if (vsm.exponent == 0 && vsm.significand)
				971	vfp_single_normalise_denormal(&vsm);
				972
				973	exceptions = vfp_single_multiply(&vsd, &vsn, &vsm, fpscr);
				974	return vfp_single_normaliseround(sd, &vsd, fpscr, exceptions, "fmul");
				975	}
				976
				977	/*
				978	* sd = -(sn * sm)
				979	*/
				980	static u32 vfp_single_fnmul(int sd, int sn, s32 m, u32 fpscr)
				981	{
				982	struct vfp_single vsd, vsn, vsm;
				983	u32 exceptions;
				984	s32 n = vfp_get_float(sn);
				985
				986	pr_debug("VFP: s%u = %08x\n", sn, n);
				987
				988	vfp_single_unpack(&vsn, n);
				989	if (vsn.exponent == 0 && vsn.significand)
				990	vfp_single_normalise_denormal(&vsn);
				991
				992	vfp_single_unpack(&vsm, m);
				993	if (vsm.exponent == 0 && vsm.significand)
				994	vfp_single_normalise_denormal(&vsm);
				995
				996	exceptions = vfp_single_multiply(&vsd, &vsn, &vsm, fpscr);
				997	vsd.sign = vfp_sign_negate(vsd.sign);
				998	return vfp_single_normaliseround(sd, &vsd, fpscr, exceptions, "fnmul");
				999	}
				1000
				1001	/*
				1002	* sd = sn + sm
				1003	*/
				1004	static u32 vfp_single_fadd(int sd, int sn, s32 m, u32 fpscr)
				1005	{
				1006	struct vfp_single vsd, vsn, vsm;
				1007	u32 exceptions;
				1008	s32 n = vfp_get_float(sn);
				1009
				1010	pr_debug("VFP: s%u = %08x\n", sn, n);
				1011
				1012	/*
				1013	* Unpack and normalise denormals.
				1014	*/
				1015	vfp_single_unpack(&vsn, n);
				1016	if (vsn.exponent == 0 && vsn.significand)
				1017	vfp_single_normalise_denormal(&vsn);
				1018
				1019	vfp_single_unpack(&vsm, m);
				1020	if (vsm.exponent == 0 && vsm.significand)
				1021	vfp_single_normalise_denormal(&vsm);
				1022
				1023	exceptions = vfp_single_add(&vsd, &vsn, &vsm, fpscr);
				1024
				1025	return vfp_single_normaliseround(sd, &vsd, fpscr, exceptions, "fadd");
				1026	}
				1027
				1028	/*
				1029	* sd = sn - sm
				1030	*/
				1031	static u32 vfp_single_fsub(int sd, int sn, s32 m, u32 fpscr)
				1032	{
				1033	/*
				1034	* Subtraction is addition with one sign inverted.
				1035	*/
				1036	return vfp_single_fadd(sd, sn, vfp_single_packed_negate(m), fpscr);
				1037	}
				1038
				1039	/*
				1040	* sd = sn / sm
				1041	*/
				1042	static u32 vfp_single_fdiv(int sd, int sn, s32 m, u32 fpscr)
				1043	{
				1044	struct vfp_single vsd, vsn, vsm;
				1045	u32 exceptions = 0;
				1046	s32 n = vfp_get_float(sn);
				1047	int tm, tn;
				1048
				1049	pr_debug("VFP: s%u = %08x\n", sn, n);
				1050
				1051	vfp_single_unpack(&vsn, n);
				1052	vfp_single_unpack(&vsm, m);
				1053
				1054	vsd.sign = vsn.sign ^ vsm.sign;
				1055
				1056	tn = vfp_single_type(&vsn);
				1057	tm = vfp_single_type(&vsm);
				1058
				1059	/*
				1060	* Is n a NAN?
				1061	*/
				1062	if (tn & VFP_NAN)
				1063	goto vsn_nan;
				1064
				1065	/*
				1066	* Is m a NAN?
				1067	*/
				1068	if (tm & VFP_NAN)
				1069	goto vsm_nan;
				1070
				1071	/*
				1072	* If n and m are infinity, the result is invalid
				1073	* If n and m are zero, the result is invalid
				1074	*/
				1075	if (tm & tn & (VFP_INFINITY\|VFP_ZERO))
				1076	goto invalid;
				1077
				1078	/*
				1079	* If n is infinity, the result is infinity
				1080	*/
				1081	if (tn & VFP_INFINITY)
				1082	goto infinity;
				1083
				1084	/*
				1085	* If m is zero, raise div0 exception
				1086	*/
				1087	if (tm & VFP_ZERO)
				1088	goto divzero;
				1089
				1090	/*
				1091	* If m is infinity, or n is zero, the result is zero
				1092	*/
				1093	if (tm & VFP_INFINITY \|\| tn & VFP_ZERO)
				1094	goto zero;
				1095
				1096	if (tn & VFP_DENORMAL)
				1097	vfp_single_normalise_denormal(&vsn);
				1098	if (tm & VFP_DENORMAL)
				1099	vfp_single_normalise_denormal(&vsm);
				1100
				1101	/*
				1102	* Ok, we have two numbers, we can perform division.
				1103	*/
				1104	vsd.exponent = vsn.exponent - vsm.exponent + 127 - 1;
				1105	vsm.significand <<= 1;
				1106	if (vsm.significand <= (2 * vsn.significand)) {
				1107	vsn.significand >>= 1;
				1108	vsd.exponent++;
				1109	}
				1110	vsd.significand = ((u64)vsn.significand << 32) / vsm.significand;
				1111	if ((vsd.significand & 0x3f) == 0)
				1112	vsd.significand \|= ((u64)vsm.significand * vsd.significand != (u64)vsn.significand << 32);
				1113
				1114	return vfp_single_normaliseround(sd, &vsd, fpscr, 0, "fdiv");
				1115
				1116	vsn_nan:
				1117	exceptions = vfp_propagate_nan(&vsd, &vsn, &vsm, fpscr);
				1118	pack:
				1119	vfp_put_float(sd, vfp_single_pack(&vsd));
				1120	return exceptions;
				1121
				1122	vsm_nan:
				1123	exceptions = vfp_propagate_nan(&vsd, &vsm, &vsn, fpscr);
				1124	goto pack;
				1125
				1126	zero:
				1127	vsd.exponent = 0;
				1128	vsd.significand = 0;
				1129	goto pack;
				1130
				1131	divzero:
				1132	exceptions = FPSCR_DZC;
				1133	infinity:
				1134	vsd.exponent = 255;
				1135	vsd.significand = 0;
				1136	goto pack;
				1137
				1138	invalid:
				1139	vfp_put_float(sd, vfp_single_pack(&vfp_single_default_qnan));
				1140	return FPSCR_IOC;
				1141	}
				1142
				1143	static u32 (* const fop_fns[16])(int sd, int sn, s32 m, u32 fpscr) = {
				1144	[FOP_TO_IDX(FOP_FMAC)] = vfp_single_fmac,
				1145	[FOP_TO_IDX(FOP_FNMAC)] = vfp_single_fnmac,
				1146	[FOP_TO_IDX(FOP_FMSC)] = vfp_single_fmsc,
				1147	[FOP_TO_IDX(FOP_FNMSC)] = vfp_single_fnmsc,
				1148	[FOP_TO_IDX(FOP_FMUL)] = vfp_single_fmul,
				1149	[FOP_TO_IDX(FOP_FNMUL)] = vfp_single_fnmul,
				1150	[FOP_TO_IDX(FOP_FADD)] = vfp_single_fadd,
				1151	[FOP_TO_IDX(FOP_FSUB)] = vfp_single_fsub,
				1152	[FOP_TO_IDX(FOP_FDIV)] = vfp_single_fdiv,
				1153	};
				1154
				1155	#define FREG_BANK(x) ((x) & 0x18)
				1156	#define FREG_IDX(x) ((x) & 7)
				1157
				1158	u32 vfp_single_cpdo(u32 inst, u32 fpscr)
				1159	{
				1160	u32 op = inst & FOP_MASK;
				1161	u32 exceptions = 0;
				1162	unsigned int sd = vfp_get_sd(inst);
				1163	unsigned int sn = vfp_get_sn(inst);
				1164	unsigned int sm = vfp_get_sm(inst);
				1165	unsigned int vecitr, veclen, vecstride;
				1166	u32 (*fop)(int, int, s32, u32);
				1167
				1168	veclen = fpscr & FPSCR_LENGTH_MASK;
				1169	vecstride = 1 + ((fpscr & FPSCR_STRIDE_MASK) == FPSCR_STRIDE_MASK);
				1170
				1171	/*
				1172	* If destination bank is zero, vector length is always '1'.
				1173	* ARM DDI0100F C5.1.3, C5.3.2.
				1174	*/
				1175	if (FREG_BANK(sd) == 0)
				1176	veclen = 0;
				1177
				1178	pr_debug("VFP: vecstride=%u veclen=%u\n", vecstride,
				1179	(veclen >> FPSCR_LENGTH_BIT) + 1);
				1180
				1181	fop = (op == FOP_EXT) ? fop_extfns[sn] : fop_fns[FOP_TO_IDX(op)];
				1182	if (!fop)
				1183	goto invalid;
				1184
				1185	for (vecitr = 0; vecitr <= veclen; vecitr += 1 << FPSCR_LENGTH_BIT) {
				1186	s32 m = vfp_get_float(sm);
				1187	u32 except;
				1188
				1189	if (op == FOP_EXT)
				1190	pr_debug("VFP: itr%d (s%u) = op[%u] (s%u=%08x)\n",
				1191	vecitr >> FPSCR_LENGTH_BIT, sd, sn, sm, m);
				1192	else
				1193	pr_debug("VFP: itr%d (s%u) = (s%u) op[%u] (s%u=%08x)\n",
				1194	vecitr >> FPSCR_LENGTH_BIT, sd, sn,
				1195	FOP_TO_IDX(op), sm, m);
				1196
				1197	except = fop(sd, sn, m, fpscr);
				1198	pr_debug("VFP: itr%d: exceptions=%08x\n",
				1199	vecitr >> FPSCR_LENGTH_BIT, except);
				1200
				1201	exceptions \|= except;
				1202
				1203	/*
				1204	* This ensures that comparisons only operate on scalars;
				1205	* comparisons always return with one FPSCR status bit set.
				1206	*/
				1207	if (except & (FPSCR_N\|FPSCR_Z\|FPSCR_C\|FPSCR_V))
				1208	break;
				1209
				1210	/*
				1211	* CHECK: It appears to be undefined whether we stop when
				1212	* we encounter an exception. We continue.
				1213	*/
				1214
				1215	sd = FREG_BANK(sd) + ((FREG_IDX(sd) + vecstride) & 7);
				1216	sn = FREG_BANK(sn) + ((FREG_IDX(sn) + vecstride) & 7);
				1217	if (FREG_BANK(sm) != 0)
				1218	sm = FREG_BANK(sm) + ((FREG_IDX(sm) + vecstride) & 7);
				1219	}
				1220	return exceptions;
				1221
				1222	invalid:
				1223	return (u32)-1;
				1224	}