Blame - include/math-emu/op-4.h - kernel/hikey-linaro

blob: ba226f8237a696cd7310d81a8cbe9bc1e5e75ebf [file] [log] [blame]

Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1	/* Software floating-point emulation.
				2	Basic four-word fraction declaration and manipulation.
				3	Copyright (C) 1997,1998,1999 Free Software Foundation, Inc.
				4	This file is part of the GNU C Library.
				5	Contributed by Richard Henderson (rth@cygnus.com),
				6	Jakub Jelinek (jj@ultra.linux.cz),
				7	David S. Miller (davem@redhat.com) and
				8	Peter Maydell (pmaydell@chiark.greenend.org.uk).
				9
				10	The GNU C Library is free software; you can redistribute it and/or
				11	modify it under the terms of the GNU Library General Public License as
				12	published by the Free Software Foundation; either version 2 of the
				13	License, or (at your option) any later version.
				14
				15	The GNU C Library is distributed in the hope that it will be useful,
				16	but WITHOUT ANY WARRANTY; without even the implied warranty of
				17	MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
				18	Library General Public License for more details.
				19
				20	You should have received a copy of the GNU Library General Public
				21	License along with the GNU C Library; see the file COPYING.LIB. If
				22	not, write to the Free Software Foundation, Inc.,
				23	59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */
				24
				25	#ifndef __MATH_EMU_OP_4_H__
				26	#define __MATH_EMU_OP_4_H__
				27
				28	#define _FP_FRAC_DECL_4(X) _FP_W_TYPE X##_f[4]
				29	#define _FP_FRAC_COPY_4(D,S) \
				30	(D##_f[0] = S##_f[0], D##_f[1] = S##_f[1], \
				31	D##_f[2] = S##_f[2], D##_f[3] = S##_f[3])
				32	#define _FP_FRAC_SET_4(X,I) __FP_FRAC_SET_4(X, I)
				33	#define _FP_FRAC_HIGH_4(X) (X##_f[3])
				34	#define _FP_FRAC_LOW_4(X) (X##_f[0])
				35	#define _FP_FRAC_WORD_4(X,w) (X##_f[w])
				36
				37	#define _FP_FRAC_SLL_4(X,N) \
				38	do { \
				39	_FP_I_TYPE _up, _down, _skip, _i; \
				40	_skip = (N) / _FP_W_TYPE_SIZE; \
				41	_up = (N) % _FP_W_TYPE_SIZE; \
				42	_down = _FP_W_TYPE_SIZE - _up; \
				43	if (!_up) \
				44	for (_i = 3; _i >= _skip; --_i) \
				45	X##_f[_i] = X##_f[_i-_skip]; \
				46	else \
				47	{ \
				48	for (_i = 3; _i > _skip; --_i) \
				49	X##_f[_i] = X##_f[_i-_skip] << _up \
				50	\| X##_f[_i-_skip-1] >> _down; \
				51	X##_f[_i--] = X##_f[0] << _up; \
				52	} \
				53	for (; _i >= 0; --_i) \
				54	X##_f[_i] = 0; \
				55	} while (0)
				56
				57	/* This one was broken too */
				58	#define _FP_FRAC_SRL_4(X,N) \
				59	do { \
				60	_FP_I_TYPE _up, _down, _skip, _i; \
				61	_skip = (N) / _FP_W_TYPE_SIZE; \
				62	_down = (N) % _FP_W_TYPE_SIZE; \
				63	_up = _FP_W_TYPE_SIZE - _down; \
				64	if (!_down) \
				65	for (_i = 0; _i <= 3-_skip; ++_i) \
				66	X##_f[_i] = X##_f[_i+_skip]; \
				67	else \
				68	{ \
				69	for (_i = 0; _i < 3-_skip; ++_i) \
				70	X##_f[_i] = X##_f[_i+_skip] >> _down \
				71	\| X##_f[_i+_skip+1] << _up; \
				72	X##_f[_i++] = X##_f[3] >> _down; \
				73	} \
				74	for (; _i < 4; ++_i) \
				75	X##_f[_i] = 0; \
				76	} while (0)
				77
				78
				79	/* Right shift with sticky-lsb.
				80	* What this actually means is that we do a standard right-shift,
				81	* but that if any of the bits that fall off the right hand side
				82	* were one then we always set the LSbit.
				83	*/
				84	#define _FP_FRAC_SRS_4(X,N,size) \
				85	do { \
				86	_FP_I_TYPE _up, _down, _skip, _i; \
				87	_FP_W_TYPE _s; \
				88	_skip = (N) / _FP_W_TYPE_SIZE; \
				89	_down = (N) % _FP_W_TYPE_SIZE; \
				90	_up = _FP_W_TYPE_SIZE - _down; \
				91	for (_s = _i = 0; _i < _skip; ++_i) \
				92	_s \|= X##_f[_i]; \
				93	_s \|= X##_f[_i] << _up; \
				94	/* s is now != 0 if we want to set the LSbit */ \
				95	if (!_down) \
				96	for (_i = 0; _i <= 3-_skip; ++_i) \
				97	X##_f[_i] = X##_f[_i+_skip]; \
				98	else \
				99	{ \
				100	for (_i = 0; _i < 3-_skip; ++_i) \
				101	X##_f[_i] = X##_f[_i+_skip] >> _down \
				102	\| X##_f[_i+_skip+1] << _up; \
				103	X##_f[_i++] = X##_f[3] >> _down; \
				104	} \
				105	for (; _i < 4; ++_i) \
				106	X##_f[_i] = 0; \
				107	/* don't fix the LSB until the very end when we're sure f[0] is stable */ \
				108	X##_f[0] \|= (_s != 0); \
				109	} while (0)
				110
				111	#define _FP_FRAC_ADD_4(R,X,Y) \
				112	__FP_FRAC_ADD_4(R##_f[3], R##_f[2], R##_f[1], R##_f[0], \
				113	X##_f[3], X##_f[2], X##_f[1], X##_f[0], \
				114	Y##_f[3], Y##_f[2], Y##_f[1], Y##_f[0])
				115
				116	#define _FP_FRAC_SUB_4(R,X,Y) \
				117	__FP_FRAC_SUB_4(R##_f[3], R##_f[2], R##_f[1], R##_f[0], \
				118	X##_f[3], X##_f[2], X##_f[1], X##_f[0], \
				119	Y##_f[3], Y##_f[2], Y##_f[1], Y##_f[0])
				120
				121	#define _FP_FRAC_DEC_4(X,Y) \
				122	__FP_FRAC_DEC_4(X##_f[3], X##_f[2], X##_f[1], X##_f[0], \
				123	Y##_f[3], Y##_f[2], Y##_f[1], Y##_f[0])
				124
				125	#define _FP_FRAC_ADDI_4(X,I) \
				126	__FP_FRAC_ADDI_4(X##_f[3], X##_f[2], X##_f[1], X##_f[0], I)
				127
				128	#define _FP_ZEROFRAC_4 0,0,0,0
				129	#define _FP_MINFRAC_4 0,0,0,1
				130	#define _FP_MAXFRAC_4 (~(_FP_WS_TYPE)0), (~(_FP_WS_TYPE)0), (~(_FP_WS_TYPE)0), (~(_FP_WS_TYPE)0)
				131
				132	#define _FP_FRAC_ZEROP_4(X) ((X##_f[0] \| X##_f[1] \| X##_f[2] \| X##_f[3]) == 0)
				133	#define _FP_FRAC_NEGP_4(X) ((_FP_WS_TYPE)X##_f[3] < 0)
				134	#define _FP_FRAC_OVERP_4(fs,X) (_FP_FRAC_HIGH_##fs(X) & _FP_OVERFLOW_##fs)
				135	#define _FP_FRAC_CLEAR_OVERP_4(fs,X) (_FP_FRAC_HIGH_##fs(X) &= ~_FP_OVERFLOW_##fs)
				136
				137	#define _FP_FRAC_EQ_4(X,Y) \
				138	(X##_f[0] == Y##_f[0] && X##_f[1] == Y##_f[1] \
				139	&& X##_f[2] == Y##_f[2] && X##_f[3] == Y##_f[3])
				140
				141	#define _FP_FRAC_GT_4(X,Y) \
				142	(X##_f[3] > Y##_f[3] \|\| \
				143	(X##_f[3] == Y##_f[3] && (X##_f[2] > Y##_f[2] \|\| \
				144	(X##_f[2] == Y##_f[2] && (X##_f[1] > Y##_f[1] \|\| \
				145	(X##_f[1] == Y##_f[1] && X##_f[0] > Y##_f[0]) \
				146	)) \
				147	)) \
				148	)
				149
				150	#define _FP_FRAC_GE_4(X,Y) \
				151	(X##_f[3] > Y##_f[3] \|\| \
				152	(X##_f[3] == Y##_f[3] && (X##_f[2] > Y##_f[2] \|\| \
				153	(X##_f[2] == Y##_f[2] && (X##_f[1] > Y##_f[1] \|\| \
				154	(X##_f[1] == Y##_f[1] && X##_f[0] >= Y##_f[0]) \
				155	)) \
				156	)) \
				157	)
				158
				159
				160	#define _FP_FRAC_CLZ_4(R,X) \
				161	do { \
				162	if (X##_f[3]) \
				163	{ \
				164	__FP_CLZ(R,X##_f[3]); \
				165	} \
				166	else if (X##_f[2]) \
				167	{ \
				168	__FP_CLZ(R,X##_f[2]); \
				169	R += _FP_W_TYPE_SIZE; \
				170	} \
				171	else if (X##_f[1]) \
				172	{ \
				173	__FP_CLZ(R,X##_f[2]); \
				174	R += _FP_W_TYPE_SIZE*2; \
				175	} \
				176	else \
				177	{ \
				178	__FP_CLZ(R,X##_f[0]); \
				179	R += _FP_W_TYPE_SIZE*3; \
				180	} \
				181	} while(0)
				182
				183
				184	#define _FP_UNPACK_RAW_4(fs, X, val) \
				185	do { \
				186	union _FP_UNION_##fs _flo; _flo.flt = (val); \
				187	X##_f[0] = _flo.bits.frac0; \
				188	X##_f[1] = _flo.bits.frac1; \
				189	X##_f[2] = _flo.bits.frac2; \
				190	X##_f[3] = _flo.bits.frac3; \
				191	X##_e = _flo.bits.exp; \
				192	X##_s = _flo.bits.sign; \
				193	} while (0)
				194
				195	#define _FP_UNPACK_RAW_4_P(fs, X, val) \
				196	do { \
				197	union _FP_UNION_##fs *_flo = \
				198	(union _FP_UNION_##fs *)(val); \
				199	\
				200	X##_f[0] = _flo->bits.frac0; \
				201	X##_f[1] = _flo->bits.frac1; \
				202	X##_f[2] = _flo->bits.frac2; \
				203	X##_f[3] = _flo->bits.frac3; \
				204	X##_e = _flo->bits.exp; \
				205	X##_s = _flo->bits.sign; \
				206	} while (0)
				207
				208	#define _FP_PACK_RAW_4(fs, val, X) \
				209	do { \
				210	union _FP_UNION_##fs _flo; \
				211	_flo.bits.frac0 = X##_f[0]; \
				212	_flo.bits.frac1 = X##_f[1]; \
				213	_flo.bits.frac2 = X##_f[2]; \
				214	_flo.bits.frac3 = X##_f[3]; \
				215	_flo.bits.exp = X##_e; \
				216	_flo.bits.sign = X##_s; \
				217	(val) = _flo.flt; \
				218	} while (0)
				219
				220	#define _FP_PACK_RAW_4_P(fs, val, X) \
				221	do { \
				222	union _FP_UNION_##fs *_flo = \
				223	(union _FP_UNION_##fs *)(val); \
				224	\
				225	_flo->bits.frac0 = X##_f[0]; \
				226	_flo->bits.frac1 = X##_f[1]; \
				227	_flo->bits.frac2 = X##_f[2]; \
				228	_flo->bits.frac3 = X##_f[3]; \
				229	_flo->bits.exp = X##_e; \
				230	_flo->bits.sign = X##_s; \
				231	} while (0)
				232
				233	/*
				234	* Multiplication algorithms:
				235	*/
				236
				237	/* Given a 1W * 1W => 2W primitive, do the extended multiplication. */
				238
				239	#define _FP_MUL_MEAT_4_wide(wfracbits, R, X, Y, doit) \
				240	do { \
				241	_FP_FRAC_DECL_8(_z); _FP_FRAC_DECL_2(_b); _FP_FRAC_DECL_2(_c); \
				242	_FP_FRAC_DECL_2(_d); _FP_FRAC_DECL_2(_e); _FP_FRAC_DECL_2(_f); \
				243	\
				244	doit(_FP_FRAC_WORD_8(_z,1), _FP_FRAC_WORD_8(_z,0), X##_f[0], Y##_f[0]); \
				245	doit(_b_f1, _b_f0, X##_f[0], Y##_f[1]); \
				246	doit(_c_f1, _c_f0, X##_f[1], Y##_f[0]); \
				247	doit(_d_f1, _d_f0, X##_f[1], Y##_f[1]); \
				248	doit(_e_f1, _e_f0, X##_f[0], Y##_f[2]); \
				249	doit(_f_f1, _f_f0, X##_f[2], Y##_f[0]); \
				250	__FP_FRAC_ADD_3(_FP_FRAC_WORD_8(_z,3),_FP_FRAC_WORD_8(_z,2), \
				251	_FP_FRAC_WORD_8(_z,1), 0,_b_f1,_b_f0, \
				252	0,0,_FP_FRAC_WORD_8(_z,1)); \
				253	__FP_FRAC_ADD_3(_FP_FRAC_WORD_8(_z,3),_FP_FRAC_WORD_8(_z,2), \
				254	_FP_FRAC_WORD_8(_z,1), 0,_c_f1,_c_f0, \
				255	_FP_FRAC_WORD_8(_z,3),_FP_FRAC_WORD_8(_z,2), \
				256	_FP_FRAC_WORD_8(_z,1)); \
				257	__FP_FRAC_ADD_3(_FP_FRAC_WORD_8(_z,4),_FP_FRAC_WORD_8(_z,3), \
				258	_FP_FRAC_WORD_8(_z,2), 0,_d_f1,_d_f0, \
				259	0,_FP_FRAC_WORD_8(_z,3),_FP_FRAC_WORD_8(_z,2)); \
				260	__FP_FRAC_ADD_3(_FP_FRAC_WORD_8(_z,4),_FP_FRAC_WORD_8(_z,3), \
				261	_FP_FRAC_WORD_8(_z,2), 0,_e_f1,_e_f0, \
				262	_FP_FRAC_WORD_8(_z,4),_FP_FRAC_WORD_8(_z,3), \
				263	_FP_FRAC_WORD_8(_z,2)); \
				264	__FP_FRAC_ADD_3(_FP_FRAC_WORD_8(_z,4),_FP_FRAC_WORD_8(_z,3), \
				265	_FP_FRAC_WORD_8(_z,2), 0,_f_f1,_f_f0, \
				266	_FP_FRAC_WORD_8(_z,4),_FP_FRAC_WORD_8(_z,3), \
				267	_FP_FRAC_WORD_8(_z,2)); \
				268	doit(_b_f1, _b_f0, X##_f[0], Y##_f[3]); \
				269	doit(_c_f1, _c_f0, X##_f[3], Y##_f[0]); \
				270	doit(_d_f1, _d_f0, X##_f[1], Y##_f[2]); \
				271	doit(_e_f1, _e_f0, X##_f[2], Y##_f[1]); \
				272	__FP_FRAC_ADD_3(_FP_FRAC_WORD_8(_z,5),_FP_FRAC_WORD_8(_z,4), \
				273	_FP_FRAC_WORD_8(_z,3), 0,_b_f1,_b_f0, \
				274	0,_FP_FRAC_WORD_8(_z,4),_FP_FRAC_WORD_8(_z,3)); \
				275	__FP_FRAC_ADD_3(_FP_FRAC_WORD_8(_z,5),_FP_FRAC_WORD_8(_z,4), \
				276	_FP_FRAC_WORD_8(_z,3), 0,_c_f1,_c_f0, \
				277	_FP_FRAC_WORD_8(_z,5),_FP_FRAC_WORD_8(_z,4), \
				278	_FP_FRAC_WORD_8(_z,3)); \
				279	__FP_FRAC_ADD_3(_FP_FRAC_WORD_8(_z,5),_FP_FRAC_WORD_8(_z,4), \
				280	_FP_FRAC_WORD_8(_z,3), 0,_d_f1,_d_f0, \
				281	_FP_FRAC_WORD_8(_z,5),_FP_FRAC_WORD_8(_z,4), \
				282	_FP_FRAC_WORD_8(_z,3)); \
				283	__FP_FRAC_ADD_3(_FP_FRAC_WORD_8(_z,5),_FP_FRAC_WORD_8(_z,4), \
				284	_FP_FRAC_WORD_8(_z,3), 0,_e_f1,_e_f0, \
				285	_FP_FRAC_WORD_8(_z,5),_FP_FRAC_WORD_8(_z,4), \
				286	_FP_FRAC_WORD_8(_z,3)); \
				287	doit(_b_f1, _b_f0, X##_f[2], Y##_f[2]); \
				288	doit(_c_f1, _c_f0, X##_f[1], Y##_f[3]); \
				289	doit(_d_f1, _d_f0, X##_f[3], Y##_f[1]); \
				290	doit(_e_f1, _e_f0, X##_f[2], Y##_f[3]); \
				291	doit(_f_f1, _f_f0, X##_f[3], Y##_f[2]); \
				292	__FP_FRAC_ADD_3(_FP_FRAC_WORD_8(_z,6),_FP_FRAC_WORD_8(_z,5), \
				293	_FP_FRAC_WORD_8(_z,4), 0,_b_f1,_b_f0, \
				294	0,_FP_FRAC_WORD_8(_z,5),_FP_FRAC_WORD_8(_z,4)); \
				295	__FP_FRAC_ADD_3(_FP_FRAC_WORD_8(_z,6),_FP_FRAC_WORD_8(_z,5), \
				296	_FP_FRAC_WORD_8(_z,4), 0,_c_f1,_c_f0, \
				297	_FP_FRAC_WORD_8(_z,6),_FP_FRAC_WORD_8(_z,5), \
				298	_FP_FRAC_WORD_8(_z,4)); \
				299	__FP_FRAC_ADD_3(_FP_FRAC_WORD_8(_z,6),_FP_FRAC_WORD_8(_z,5), \
				300	_FP_FRAC_WORD_8(_z,4), 0,_d_f1,_d_f0, \
				301	_FP_FRAC_WORD_8(_z,6),_FP_FRAC_WORD_8(_z,5), \
				302	_FP_FRAC_WORD_8(_z,4)); \
				303	__FP_FRAC_ADD_3(_FP_FRAC_WORD_8(_z,7),_FP_FRAC_WORD_8(_z,6), \
				304	_FP_FRAC_WORD_8(_z,5), 0,_e_f1,_e_f0, \
				305	0,_FP_FRAC_WORD_8(_z,6),_FP_FRAC_WORD_8(_z,5)); \
				306	__FP_FRAC_ADD_3(_FP_FRAC_WORD_8(_z,7),_FP_FRAC_WORD_8(_z,6), \
				307	_FP_FRAC_WORD_8(_z,5), 0,_f_f1,_f_f0, \
				308	_FP_FRAC_WORD_8(_z,7),_FP_FRAC_WORD_8(_z,6), \
				309	_FP_FRAC_WORD_8(_z,5)); \
				310	doit(_b_f1, _b_f0, X##_f[3], Y##_f[3]); \
				311	__FP_FRAC_ADD_2(_FP_FRAC_WORD_8(_z,7),_FP_FRAC_WORD_8(_z,6), \
				312	_b_f1,_b_f0, \
				313	_FP_FRAC_WORD_8(_z,7),_FP_FRAC_WORD_8(_z,6)); \
				314	\
				315	/* Normalize since we know where the msb of the multiplicands \
				316	were (bit B), we know that the msb of the of the product is \
				317	at either 2B or 2B-1. */ \
				318	_FP_FRAC_SRS_8(_z, wfracbits-1, 2*wfracbits); \
				319	__FP_FRAC_SET_4(R, _FP_FRAC_WORD_8(_z,3), _FP_FRAC_WORD_8(_z,2), \
				320	_FP_FRAC_WORD_8(_z,1), _FP_FRAC_WORD_8(_z,0)); \
				321	} while (0)
				322
				323	#define _FP_MUL_MEAT_4_gmp(wfracbits, R, X, Y) \
				324	do { \
				325	_FP_FRAC_DECL_8(_z); \
				326	\
				327	mpn_mul_n(_z_f, _x_f, _y_f, 4); \
				328	\
				329	/* Normalize since we know where the msb of the multiplicands \
				330	were (bit B), we know that the msb of the of the product is \
				331	at either 2B or 2B-1. */ \
				332	_FP_FRAC_SRS_8(_z, wfracbits-1, 2*wfracbits); \
				333	__FP_FRAC_SET_4(R, _FP_FRAC_WORD_8(_z,3), _FP_FRAC_WORD_8(_z,2), \
				334	_FP_FRAC_WORD_8(_z,1), _FP_FRAC_WORD_8(_z,0)); \
				335	} while (0)
				336
				337	/*
				338	* Helper utility for _FP_DIV_MEAT_4_udiv:
				339	* pppp = m * nnn
				340	*/
				341	#define umul_ppppmnnn(p3,p2,p1,p0,m,n2,n1,n0) \
				342	do { \
				343	UWtype _t; \
				344	umul_ppmm(p1,p0,m,n0); \
				345	umul_ppmm(p2,_t,m,n1); \
				346	__FP_FRAC_ADDI_2(p2,p1,_t); \
				347	umul_ppmm(p3,_t,m,n2); \
				348	__FP_FRAC_ADDI_2(p3,p2,_t); \
				349	} while (0)
				350
				351	/*
				352	* Division algorithms:
				353	*/
				354
				355	#define _FP_DIV_MEAT_4_udiv(fs, R, X, Y) \
				356	do { \
				357	int _i; \
				358	_FP_FRAC_DECL_4(_n); _FP_FRAC_DECL_4(_m); \
				359	_FP_FRAC_SET_4(_n, _FP_ZEROFRAC_4); \
				360	if (_FP_FRAC_GT_4(X, Y)) \
				361	{ \
				362	_n_f[3] = X##_f[0] << (_FP_W_TYPE_SIZE - 1); \
				363	_FP_FRAC_SRL_4(X, 1); \
				364	} \
				365	else \
				366	R##_e--; \
				367	\
				368	/* Normalize, i.e. make the most significant bit of the \
				369	denominator set. */ \
				370	_FP_FRAC_SLL_4(Y, _FP_WFRACXBITS_##fs); \
				371	\
				372	for (_i = 3; ; _i--) \
				373	{ \
				374	if (X##_f[3] == Y##_f[3]) \
				375	{ \
				376	/* This is a special case, not an optimization \
				377	(X##_f[3]/Y##_f[3] would not fit into UWtype). \
				378	As X## is guaranteed to be < Y, R##_f[_i] can be either \
				379	(UWtype)-1 or (UWtype)-2. */ \
				380	R##_f[_i] = -1; \
				381	if (!_i) \
				382	break; \
				383	__FP_FRAC_SUB_4(X##_f[3], X##_f[2], X##_f[1], X##_f[0], \
				384	Y##_f[2], Y##_f[1], Y##_f[0], 0, \
				385	X##_f[2], X##_f[1], X##_f[0], _n_f[_i]); \
				386	_FP_FRAC_SUB_4(X, Y, X); \
				387	if (X##_f[3] > Y##_f[3]) \
				388	{ \
				389	R##_f[_i] = -2; \
				390	_FP_FRAC_ADD_4(X, Y, X); \
				391	} \
				392	} \
				393	else \
				394	{ \
				395	udiv_qrnnd(R##_f[_i], X##_f[3], X##_f[3], X##_f[2], Y##_f[3]); \
				396	umul_ppppmnnn(_m_f[3], _m_f[2], _m_f[1], _m_f[0], \
				397	R##_f[_i], Y##_f[2], Y##_f[1], Y##_f[0]); \
				398	X##_f[2] = X##_f[1]; \
				399	X##_f[1] = X##_f[0]; \
				400	X##_f[0] = _n_f[_i]; \
				401	if (_FP_FRAC_GT_4(_m, X)) \
				402	{ \
				403	R##_f[_i]--; \
				404	_FP_FRAC_ADD_4(X, Y, X); \
				405	if (_FP_FRAC_GE_4(X, Y) && _FP_FRAC_GT_4(_m, X)) \
				406	{ \
				407	R##_f[_i]--; \
				408	_FP_FRAC_ADD_4(X, Y, X); \
				409	} \
				410	} \
				411	_FP_FRAC_DEC_4(X, _m); \
				412	if (!_i) \
				413	{ \
				414	if (!_FP_FRAC_EQ_4(X, _m)) \
				415	R##_f[0] \|= _FP_WORK_STICKY; \
				416	break; \
				417	} \
				418	} \
				419	} \
				420	} while (0)
				421
				422
				423	/*
				424	* Square root algorithms:
				425	* We have just one right now, maybe Newton approximation
				426	* should be added for those machines where division is fast.
				427	*/
				428
				429	#define _FP_SQRT_MEAT_4(R, S, T, X, q) \
				430	do { \
				431	while (q) \
				432	{ \
				433	T##_f[3] = S##_f[3] + q; \
				434	if (T##_f[3] <= X##_f[3]) \
				435	{ \
				436	S##_f[3] = T##_f[3] + q; \
				437	X##_f[3] -= T##_f[3]; \
				438	R##_f[3] += q; \
				439	} \
				440	_FP_FRAC_SLL_4(X, 1); \
				441	q >>= 1; \
				442	} \
				443	q = (_FP_W_TYPE)1 << (_FP_W_TYPE_SIZE - 1); \
				444	while (q) \
				445	{ \
				446	T##_f[2] = S##_f[2] + q; \
				447	T##_f[3] = S##_f[3]; \
				448	if (T##_f[3] < X##_f[3] \|\| \
				449	(T##_f[3] == X##_f[3] && T##_f[2] <= X##_f[2])) \
				450	{ \
				451	S##_f[2] = T##_f[2] + q; \
				452	S##_f[3] += (T##_f[2] > S##_f[2]); \
				453	__FP_FRAC_DEC_2(X##_f[3], X##_f[2], \
				454	T##_f[3], T##_f[2]); \
				455	R##_f[2] += q; \
				456	} \
				457	_FP_FRAC_SLL_4(X, 1); \
				458	q >>= 1; \
				459	} \
				460	q = (_FP_W_TYPE)1 << (_FP_W_TYPE_SIZE - 1); \
				461	while (q) \
				462	{ \
				463	T##_f[1] = S##_f[1] + q; \
				464	T##_f[2] = S##_f[2]; \
				465	T##_f[3] = S##_f[3]; \
				466	if (T##_f[3] < X##_f[3] \|\| \
				467	(T##_f[3] == X##_f[3] && (T##_f[2] < X##_f[2] \|\| \
				468	(T##_f[2] == X##_f[2] && T##_f[1] <= X##_f[1])))) \
				469	{ \
				470	S##_f[1] = T##_f[1] + q; \
				471	S##_f[2] += (T##_f[1] > S##_f[1]); \
				472	S##_f[3] += (T##_f[2] > S##_f[2]); \
				473	__FP_FRAC_DEC_3(X##_f[3], X##_f[2], X##_f[1], \
				474	T##_f[3], T##_f[2], T##_f[1]); \
				475	R##_f[1] += q; \
				476	} \
				477	_FP_FRAC_SLL_4(X, 1); \
				478	q >>= 1; \
				479	} \
				480	q = (_FP_W_TYPE)1 << (_FP_W_TYPE_SIZE - 1); \
				481	while (q != _FP_WORK_ROUND) \
				482	{ \
				483	T##_f[0] = S##_f[0] + q; \
				484	T##_f[1] = S##_f[1]; \
				485	T##_f[2] = S##_f[2]; \
				486	T##_f[3] = S##_f[3]; \
				487	if (_FP_FRAC_GE_4(X,T)) \
				488	{ \
				489	S##_f[0] = T##_f[0] + q; \
				490	S##_f[1] += (T##_f[0] > S##_f[0]); \
				491	S##_f[2] += (T##_f[1] > S##_f[1]); \
				492	S##_f[3] += (T##_f[2] > S##_f[2]); \
				493	_FP_FRAC_DEC_4(X, T); \
				494	R##_f[0] += q; \
				495	} \
				496	_FP_FRAC_SLL_4(X, 1); \
				497	q >>= 1; \
				498	} \
				499	if (!_FP_FRAC_ZEROP_4(X)) \
				500	{ \
				501	if (_FP_FRAC_GT_4(X,S)) \
				502	R##_f[0] \|= _FP_WORK_ROUND; \
				503	R##_f[0] \|= _FP_WORK_STICKY; \
				504	} \
				505	} while (0)
				506
				507
				508	/*
				509	* Internals
				510	*/
				511
				512	#define __FP_FRAC_SET_4(X,I3,I2,I1,I0) \
				513	(X##_f[3] = I3, X##_f[2] = I2, X##_f[1] = I1, X##_f[0] = I0)
				514
				515	#ifndef __FP_FRAC_ADD_3
				516	#define __FP_FRAC_ADD_3(r2,r1,r0,x2,x1,x0,y2,y1,y0) \
				517	do { \
				518	int _c1, _c2; \
				519	r0 = x0 + y0; \
				520	_c1 = r0 < x0; \
				521	r1 = x1 + y1; \
				522	_c2 = r1 < x1; \
				523	r1 += _c1; \
				524	_c2 \|= r1 < _c1; \
				525	r2 = x2 + y2 + _c2; \
				526	} while (0)
				527	#endif
				528
				529	#ifndef __FP_FRAC_ADD_4
				530	#define __FP_FRAC_ADD_4(r3,r2,r1,r0,x3,x2,x1,x0,y3,y2,y1,y0) \
				531	do { \
				532	int _c1, _c2, _c3; \
				533	r0 = x0 + y0; \
				534	_c1 = r0 < x0; \
				535	r1 = x1 + y1; \
				536	_c2 = r1 < x1; \
				537	r1 += _c1; \
				538	_c2 \|= r1 < _c1; \
				539	r2 = x2 + y2; \
				540	_c3 = r2 < x2; \
				541	r2 += _c2; \
				542	_c3 \|= r2 < _c2; \
				543	r3 = x3 + y3 + _c3; \
				544	} while (0)
				545	#endif
				546
				547	#ifndef __FP_FRAC_SUB_3
				548	#define __FP_FRAC_SUB_3(r2,r1,r0,x2,x1,x0,y2,y1,y0) \
				549	do { \
				550	int _c1, _c2; \
				551	r0 = x0 - y0; \
				552	_c1 = r0 > x0; \
				553	r1 = x1 - y1; \
				554	_c2 = r1 > x1; \
				555	r1 -= _c1; \
				556	_c2 \|= r1 > _c1; \
				557	r2 = x2 - y2 - _c2; \
				558	} while (0)
				559	#endif
				560
				561	#ifndef __FP_FRAC_SUB_4
				562	#define __FP_FRAC_SUB_4(r3,r2,r1,r0,x3,x2,x1,x0,y3,y2,y1,y0) \
				563	do { \
				564	int _c1, _c2, _c3; \
				565	r0 = x0 - y0; \
				566	_c1 = r0 > x0; \
				567	r1 = x1 - y1; \
				568	_c2 = r1 > x1; \
				569	r1 -= _c1; \
				570	_c2 \|= r1 > _c1; \
				571	r2 = x2 - y2; \
				572	_c3 = r2 > x2; \
				573	r2 -= _c2; \
				574	_c3 \|= r2 > _c2; \
				575	r3 = x3 - y3 - _c3; \
				576	} while (0)
				577	#endif
				578
				579	#ifndef __FP_FRAC_DEC_3
				580	#define __FP_FRAC_DEC_3(x2,x1,x0,y2,y1,y0) \
				581	do { \
				582	UWtype _t0, _t1, _t2; \
				583	_t0 = x0, _t1 = x1, _t2 = x2; \
				584	__FP_FRAC_SUB_3 (x2, x1, x0, _t2, _t1, _t0, y2, y1, y0); \
				585	} while (0)
				586	#endif
				587
				588	#ifndef __FP_FRAC_DEC_4
				589	#define __FP_FRAC_DEC_4(x3,x2,x1,x0,y3,y2,y1,y0) \
				590	do { \
				591	UWtype _t0, _t1, _t2, _t3; \
				592	_t0 = x0, _t1 = x1, _t2 = x2, _t3 = x3; \
				593	__FP_FRAC_SUB_4 (x3,x2,x1,x0,_t3,_t2,_t1,_t0, y3,y2,y1,y0); \
				594	} while (0)
				595	#endif
				596
				597	#ifndef __FP_FRAC_ADDI_4
				598	#define __FP_FRAC_ADDI_4(x3,x2,x1,x0,i) \
				599	do { \
				600	UWtype _t; \
				601	_t = ((x0 += i) < i); \
				602	x1 += _t; _t = (x1 < _t); \
				603	x2 += _t; _t = (x2 < _t); \
				604	x3 += _t; \
				605	} while (0)
				606	#endif
				607
				608	/* Convert FP values between word sizes. This appears to be more
				609	* complicated than I'd have expected it to be, so these might be
				610	* wrong... These macros are in any case somewhat bogus because they
				611	* use information about what various FRAC_n variables look like
				612	* internally [eg, that 2 word vars are X_f0 and x_f1]. But so do
				613	* the ones in op-2.h and op-1.h.
				614	*/
				615	#define _FP_FRAC_CONV_1_4(dfs, sfs, D, S) \
				616	do { \
				617	if (S##_c != FP_CLS_NAN) \
				618	_FP_FRAC_SRS_4(S, (_FP_WFRACBITS_##sfs - _FP_WFRACBITS_##dfs), \
				619	_FP_WFRACBITS_##sfs); \
				620	else \
				621	_FP_FRAC_SRL_4(S, (_FP_WFRACBITS_##sfs - _FP_WFRACBITS_##dfs)); \
				622	D##_f = S##_f[0]; \
				623	} while (0)
				624
				625	#define _FP_FRAC_CONV_2_4(dfs, sfs, D, S) \
				626	do { \
				627	if (S##_c != FP_CLS_NAN) \
				628	_FP_FRAC_SRS_4(S, (_FP_WFRACBITS_##sfs - _FP_WFRACBITS_##dfs), \
				629	_FP_WFRACBITS_##sfs); \
				630	else \
				631	_FP_FRAC_SRL_4(S, (_FP_WFRACBITS_##sfs - _FP_WFRACBITS_##dfs)); \
				632	D##_f0 = S##_f[0]; \
				633	D##_f1 = S##_f[1]; \
				634	} while (0)
				635
				636	/* Assembly/disassembly for converting to/from integral types.
				637	* No shifting or overflow handled here.
				638	*/
				639	/* Put the FP value X into r, which is an integer of size rsize. */
				640	#define _FP_FRAC_ASSEMBLE_4(r, X, rsize) \
				641	do { \
				642	if (rsize <= _FP_W_TYPE_SIZE) \
				643	r = X##_f[0]; \
				644	else if (rsize <= 2*_FP_W_TYPE_SIZE) \
				645	{ \
				646	r = X##_f[1]; \
				647	r <<= _FP_W_TYPE_SIZE; \
				648	r += X##_f[0]; \
				649	} \
				650	else \
				651	{ \
				652	/* I'm feeling lazy so we deal with int == 3words (implausible)*/ \
				653	/* and int == 4words as a single case. */ \
				654	r = X##_f[3]; \
				655	r <<= _FP_W_TYPE_SIZE; \
				656	r += X##_f[2]; \
				657	r <<= _FP_W_TYPE_SIZE; \
				658	r += X##_f[1]; \
				659	r <<= _FP_W_TYPE_SIZE; \
				660	r += X##_f[0]; \
				661	} \
				662	} while (0)
				663
				664	/* "No disassemble Number Five!" */
				665	/* move an integer of size rsize into X's fractional part. We rely on
				666	* the _f[] array consisting of words of size _FP_W_TYPE_SIZE to avoid
				667	* having to mask the values we store into it.
				668	*/
				669	#define _FP_FRAC_DISASSEMBLE_4(X, r, rsize) \
				670	do { \
				671	X##_f[0] = r; \
				672	X##_f[1] = (rsize <= _FP_W_TYPE_SIZE ? 0 : r >> _FP_W_TYPE_SIZE); \
				673	X##_f[2] = (rsize <= 2_FP_W_TYPE_SIZE ? 0 : r >> 2_FP_W_TYPE_SIZE); \
				674	X##_f[3] = (rsize <= 3_FP_W_TYPE_SIZE ? 0 : r >> 3_FP_W_TYPE_SIZE); \
				675	} while (0)
				676
				677	#define _FP_FRAC_CONV_4_1(dfs, sfs, D, S) \
				678	do { \
				679	D##_f[0] = S##_f; \
				680	D##_f[1] = D##_f[2] = D##_f[3] = 0; \
				681	_FP_FRAC_SLL_4(D, (_FP_WFRACBITS_##dfs - _FP_WFRACBITS_##sfs)); \
				682	} while (0)
				683
				684	#define _FP_FRAC_CONV_4_2(dfs, sfs, D, S) \
				685	do { \
				686	D##_f[0] = S##_f0; \
				687	D##_f[1] = S##_f1; \
				688	D##_f[2] = D##_f[3] = 0; \
				689	_FP_FRAC_SLL_4(D, (_FP_WFRACBITS_##dfs - _FP_WFRACBITS_##sfs)); \
				690	} while (0)
				691
				692	#endif