Blame - pl/math/atan_common.h - platform/external/arm-optimized-routines

blob: da0da643685471d667b338098e9f9b18d9060a92 [file] [log] [blame]

Joe Ramsay	f75d804	2022-06-16 12:06:36 +0100	[diff] [blame]	1	/*
				2	* Double-precision polynomial evaluation function for scalar and vector atan(x)
				3	* and atan2(y,x).
				4	*
Joe Ramsay	f0f80b8	2023-01-06 09:10:57 +0000	[diff] [blame^]	5	* Copyright (c) 2021-2023, Arm Limited.
Joe Ramsay	f75d804	2022-06-16 12:06:36 +0100	[diff] [blame]	6	* SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
				7	*/
				8
				9	#include "math_config.h"
Joe Ramsay	bc7cc9d	2022-12-09 12:19:38 +0000	[diff] [blame]	10	#include "estrin.h"
Joe Ramsay	f75d804	2022-06-16 12:06:36 +0100	[diff] [blame]	11
				12	#if V_SUPPORTED
				13
				14	#include "v_math.h"
				15
				16	#define DBL_T v_f64_t
Joe Ramsay	f75d804	2022-06-16 12:06:36 +0100	[diff] [blame]	17	#define P(i) v_f64 (__atan_poly_data.poly[i])
				18
				19	#else
				20
				21	#define DBL_T double
Joe Ramsay	f75d804	2022-06-16 12:06:36 +0100	[diff] [blame]	22	#define P(i) __atan_poly_data.poly[i]
				23
				24	#endif
				25
				26	/* Polynomial used in fast atan(x) and atan2(y,x) implementations
				27	The order 19 polynomial P approximates (atan(sqrt(x))-sqrt(x))/x^(3/2). */
				28	static inline DBL_T
				29	eval_poly (DBL_T z, DBL_T az, DBL_T shift)
				30	{
Joe Ramsay	bc7cc9d	2022-12-09 12:19:38 +0000	[diff] [blame]	31	/* Use split Estrin scheme for P(z^2) with deg(P)=19. Use split instead of
				32	full scheme to avoid underflow in x^16. */
Joe Ramsay	f75d804	2022-06-16 12:06:36 +0100	[diff] [blame]	33	DBL_T z2 = z * z;
Joe Ramsay	f75d804	2022-06-16 12:06:36 +0100	[diff] [blame]	34	DBL_T x2 = z2 * z2;
Joe Ramsay	f75d804	2022-06-16 12:06:36 +0100	[diff] [blame]	35	DBL_T x4 = x2 * x2;
Joe Ramsay	f75d804	2022-06-16 12:06:36 +0100	[diff] [blame]	36	DBL_T x8 = x4 * x4;
Joe Ramsay	bc7cc9d	2022-12-09 12:19:38 +0000	[diff] [blame]	37	DBL_T y
				38	= FMA (ESTRIN_11_ (z2, x2, x4, x8, P, 8), x8, ESTRIN_7 (z2, x2, x4, P));
Joe Ramsay	f75d804	2022-06-16 12:06:36 +0100	[diff] [blame]	39
				40	/* Finalize. y = shift + z + z^3 * P(z^2). */
				41	y = FMA (y, z2 * az, az);
				42	y = y + shift;
				43
				44	return y;
				45	}
				46
				47	#undef DBL_T
				48	#undef FMA
				49	#undef P