Joe Ramsay | f75d804 | 2022-06-16 12:06:36 +0100 | [diff] [blame] | 1 | /* |
| 2 | * Double-precision polynomial evaluation function for scalar and vector atan(x) |
| 3 | * and atan2(y,x). |
| 4 | * |
Joe Ramsay | f0f80b8 | 2023-01-06 09:10:57 +0000 | [diff] [blame^] | 5 | * Copyright (c) 2021-2023, Arm Limited. |
Joe Ramsay | f75d804 | 2022-06-16 12:06:36 +0100 | [diff] [blame] | 6 | * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception |
| 7 | */ |
| 8 | |
| 9 | #include "math_config.h" |
Joe Ramsay | bc7cc9d | 2022-12-09 12:19:38 +0000 | [diff] [blame] | 10 | #include "estrin.h" |
Joe Ramsay | f75d804 | 2022-06-16 12:06:36 +0100 | [diff] [blame] | 11 | |
| 12 | #if V_SUPPORTED |
| 13 | |
| 14 | #include "v_math.h" |
| 15 | |
| 16 | #define DBL_T v_f64_t |
Joe Ramsay | f75d804 | 2022-06-16 12:06:36 +0100 | [diff] [blame] | 17 | #define P(i) v_f64 (__atan_poly_data.poly[i]) |
| 18 | |
| 19 | #else |
| 20 | |
| 21 | #define DBL_T double |
Joe Ramsay | f75d804 | 2022-06-16 12:06:36 +0100 | [diff] [blame] | 22 | #define P(i) __atan_poly_data.poly[i] |
| 23 | |
| 24 | #endif |
| 25 | |
| 26 | /* Polynomial used in fast atan(x) and atan2(y,x) implementations |
| 27 | The order 19 polynomial P approximates (atan(sqrt(x))-sqrt(x))/x^(3/2). */ |
| 28 | static inline DBL_T |
| 29 | eval_poly (DBL_T z, DBL_T az, DBL_T shift) |
| 30 | { |
Joe Ramsay | bc7cc9d | 2022-12-09 12:19:38 +0000 | [diff] [blame] | 31 | /* Use split Estrin scheme for P(z^2) with deg(P)=19. Use split instead of |
| 32 | full scheme to avoid underflow in x^16. */ |
Joe Ramsay | f75d804 | 2022-06-16 12:06:36 +0100 | [diff] [blame] | 33 | DBL_T z2 = z * z; |
Joe Ramsay | f75d804 | 2022-06-16 12:06:36 +0100 | [diff] [blame] | 34 | DBL_T x2 = z2 * z2; |
Joe Ramsay | f75d804 | 2022-06-16 12:06:36 +0100 | [diff] [blame] | 35 | DBL_T x4 = x2 * x2; |
Joe Ramsay | f75d804 | 2022-06-16 12:06:36 +0100 | [diff] [blame] | 36 | DBL_T x8 = x4 * x4; |
Joe Ramsay | bc7cc9d | 2022-12-09 12:19:38 +0000 | [diff] [blame] | 37 | DBL_T y |
| 38 | = FMA (ESTRIN_11_ (z2, x2, x4, x8, P, 8), x8, ESTRIN_7 (z2, x2, x4, P)); |
Joe Ramsay | f75d804 | 2022-06-16 12:06:36 +0100 | [diff] [blame] | 39 | |
| 40 | /* Finalize. y = shift + z + z^3 * P(z^2). */ |
| 41 | y = FMA (y, z2 * az, az); |
| 42 | y = y + shift; |
| 43 | |
| 44 | return y; |
| 45 | } |
| 46 | |
| 47 | #undef DBL_T |
| 48 | #undef FMA |
| 49 | #undef P |