pl/math/erfc_4u5.c - platform/external/arm-optimized-routines - Gitiles

 /*
  * Double-precision erfc(x) function.
  *
  * Copyright (c) 2019-2023, Arm Limited.
  * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
  */

 #include "math_config.h"
 #include "pairwise_horner.h"
 #include "pl_sig.h"
 #include "pl_test.h"

 #define AbsMask (0x7fffffffffffffff)

 #define xint __erfc_data.interval_bounds
 #define PX __erfc_data.poly

 /* Accurate exponential from optimized routines.  */
 double
 __exp_dd (double x, double xtail);

 static inline double
 eval_poly_horner (double z, int i)
 {
   double z2 = z * z;
 #define C(j) PX[i][j]
   return PAIRWISE_HORNER_12 (z, z2, C);
 }

 /* Accurate evaluation of exp(x^2)
    using compensated product (x^2 ~ x*x + e2)
    and the __exp_dd(y,d) routine, that is the
    computation of exp(y+d) with a small correction d<<y.  */
 static inline double
 eval_accurate_gaussian (double a)
 {
   double e2;
   double a2 = a * a;
   double aa1 = -fma (0x1.0000002p27, a, -a);
   aa1 = fma (0x1.0000002p27, a, aa1);
   double aa2 = a - aa1;
   e2 = fma (-aa1, aa1, a2);
   e2 = fma (-aa1, aa2, e2);
   e2 = fma (-aa2, aa1, e2);
   e2 = fma (-aa2, aa2, e2);
   return __exp_dd (-a2, e2);
 }

 /* Approximation of erfc for |x| > 6.0.  */
 static inline double
 approx_erfc_hi (double x, int i)
 {
   double a = fabs (x);
   double z = a - xint[i];
   double p = eval_poly_horner (z, i);
   double e_mx2 = eval_accurate_gaussian (a);
   return p * e_mx2;
 }

 static inline int
 get_itv_idx (double x)
 {
   /* Interval bounds are a logarithmic scale, i.e. interval n has
      lower bound 2^(n/4) - 1. Use the exponent of (|x|+1)^4 to obtain
      the interval index.  */
   double a = asdouble (asuint64 (x) & AbsMask);
   double z = a + 1.0;
   z = z * z;
   z = z * z;
   return (asuint64 (z) >> 52) - 1023;
 }

 /* Approximation of erfc for |x| < 6.0.  */
 static inline double
 approx_erfc_lo (double x, uint32_t sign, int i)
 {
   double a = fabs (x);
   double z = a - xint[i];
   double p = eval_poly_horner (z, i);
   double e_mx2 = eval_accurate_gaussian (a);
   if (sign)
     return fma (-p, e_mx2, 2.0);
   else
     return p * e_mx2;
 }

 /* Top 12 bits of a double (sign and exponent bits).  */
 static inline uint32_t
 abstop12 (double x)
 {
   return (asuint64 (x) >> 52) & 0x7ff;
 }

 /* Top 32 bits of a double.  */
 static inline uint32_t
 top32 (double x)
 {
   return asuint64 (x) >> 32;
 }

 /* Fast erfc implementation.
    The approximation uses polynomial approximation of
    exp(x^2) * erfc(x) with fixed orders on 20 intervals.
    Maximum measured error is 4.05 ULPs:.
    erfc(0x1.e8ebf6a2b0801p-2) got 0x1.ff84036f8f0b3p-2
 			     want 0x1.ff84036f8f0b7p-2.  */
 double
 erfc (double x)
 {
   /* Get top words.  */
   uint32_t ix = top32 (x); /* We need to compare at most 32 bits.  */
   uint32_t ia = ix & 0x7fffffff;
   uint32_t sign = ix >> 31;

   /* Handle special cases and small values with a single comparison:
      abstop12(x)-abstop12(small) >= abstop12(INFINITY)-abstop12(small)
      Special cases erfc(nan)=nan, erfc(+inf)=0 and erfc(-inf)=2
      Errno EDOM does not have to be set in case of erfc(nan).
      Only ERANGE may be set in case of underflow.
      Small values (|x|<small)
        |x|<0x1.0p-56 => accurate up to 0.5 ULP (top12(0x1p-50) = 0x3c7)
        |x|<0x1.0p-50 => accurate up to 1.0 ULP (top12(0x1p-50) = 0x3cd).  */
   if (unlikely (abstop12 (x) - 0x3cd >= (abstop12 (INFINITY) & 0x7ff) - 0x3cd))
     {
       if (abstop12 (x) >= 0x7ff)
 	return (double) (sign << 1) + 1.0 / x; /* special cases.  */
       else
 	return 1.0 - x; /* small case.  */
     }
   else if (ia < 0x40180000)
     { /* |x| < 6.0.  */
       return approx_erfc_lo (x, sign, get_itv_idx (x));
     }
   else if (sign)
     { /* x <= -6.0.  */
       return 2.0;
     }
   else if (ia < 0x403c0000)
     { /* 6.0 <= x < 28.  */
       return approx_erfc_hi (x, get_itv_idx (x));
     }
   else
     { /* x > 28.  */
       return __math_uflow (0);
     }
 }

 PL_SIG (S, D, 1, erfc, -6.0, 28.0)
 PL_TEST_ULP (erfc, 3.56)
 PL_TEST_INTERVAL (erfc, 0, 0xffff0000, 10000)
 PL_TEST_INTERVAL (erfc, 0x1p-1022, 0x1p-26, 40000)
 PL_TEST_INTERVAL (erfc, -0x1p-1022, -0x1p-26, 40000)
 PL_TEST_INTERVAL (erfc, 0x1p-26, 0x1p5, 40000)
 PL_TEST_INTERVAL (erfc, -0x1p-26, -0x1p3, 40000)
 PL_TEST_INTERVAL (erfc, 0, inf, 40000)
	/*
	* Double-precision erfc(x) function.
	*
	* Copyright (c) 2019-2023, Arm Limited.
	* SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
	*/

	#include "math_config.h"
	#include "pairwise_horner.h"
	#include "pl_sig.h"
	#include "pl_test.h"

	#define AbsMask (0x7fffffffffffffff)

	#define xint __erfc_data.interval_bounds
	#define PX __erfc_data.poly

	/* Accurate exponential from optimized routines. */
	double
	__exp_dd (double x, double xtail);

	static inline double
	eval_poly_horner (double z, int i)
	{
	double z2 = z * z;
	#define C(j) PX[i][j]
	return PAIRWISE_HORNER_12 (z, z2, C);
	}

	/* Accurate evaluation of exp(x^2)
	using compensated product (x^2 ~ x*x + e2)
	and the __exp_dd(y,d) routine, that is the
	computation of exp(y+d) with a small correction d<<y. */
	static inline double
	eval_accurate_gaussian (double a)
	{
	double e2;
	double a2 = a * a;
	double aa1 = -fma (0x1.0000002p27, a, -a);
	aa1 = fma (0x1.0000002p27, a, aa1);
	double aa2 = a - aa1;
	e2 = fma (-aa1, aa1, a2);
	e2 = fma (-aa1, aa2, e2);
	e2 = fma (-aa2, aa1, e2);
	e2 = fma (-aa2, aa2, e2);
	return __exp_dd (-a2, e2);
	}

	/* Approximation of erfc for \|x\| > 6.0. */
	static inline double
	approx_erfc_hi (double x, int i)
	{
	double a = fabs (x);
	double z = a - xint[i];
	double p = eval_poly_horner (z, i);
	double e_mx2 = eval_accurate_gaussian (a);
	return p * e_mx2;
	}

	static inline int
	get_itv_idx (double x)
	{
	/* Interval bounds are a logarithmic scale, i.e. interval n has
	lower bound 2^(n/4) - 1. Use the exponent of (\|x\|+1)^4 to obtain
	the interval index. */
	double a = asdouble (asuint64 (x) & AbsMask);
	double z = a + 1.0;
	z = z * z;
	z = z * z;
	return (asuint64 (z) >> 52) - 1023;
	}

	/* Approximation of erfc for \|x\| < 6.0. */
	static inline double
	approx_erfc_lo (double x, uint32_t sign, int i)
	{
	double a = fabs (x);
	double z = a - xint[i];
	double p = eval_poly_horner (z, i);
	double e_mx2 = eval_accurate_gaussian (a);
	if (sign)
	return fma (-p, e_mx2, 2.0);
	else
	return p * e_mx2;
	}

	/* Top 12 bits of a double (sign and exponent bits). */
	static inline uint32_t
	abstop12 (double x)
	{
	return (asuint64 (x) >> 52) & 0x7ff;
	}

	/* Top 32 bits of a double. */
	static inline uint32_t
	top32 (double x)
	{
	return asuint64 (x) >> 32;
	}

	/* Fast erfc implementation.
	The approximation uses polynomial approximation of
	exp(x^2) * erfc(x) with fixed orders on 20 intervals.
	Maximum measured error is 4.05 ULPs:.
	erfc(0x1.e8ebf6a2b0801p-2) got 0x1.ff84036f8f0b3p-2
	want 0x1.ff84036f8f0b7p-2. */
	double
	erfc (double x)
	{
	/* Get top words. */
	uint32_t ix = top32 (x); /* We need to compare at most 32 bits. */
	uint32_t ia = ix & 0x7fffffff;
	uint32_t sign = ix >> 31;

	/* Handle special cases and small values with a single comparison:
	abstop12(x)-abstop12(small) >= abstop12(INFINITY)-abstop12(small)
	Special cases erfc(nan)=nan, erfc(+inf)=0 and erfc(-inf)=2
	Errno EDOM does not have to be set in case of erfc(nan).
	Only ERANGE may be set in case of underflow.
	Small values (\|x\|<small)
	\|x\|<0x1.0p-56 => accurate up to 0.5 ULP (top12(0x1p-50) = 0x3c7)
	\|x\|<0x1.0p-50 => accurate up to 1.0 ULP (top12(0x1p-50) = 0x3cd). */
	if (unlikely (abstop12 (x) - 0x3cd >= (abstop12 (INFINITY) & 0x7ff) - 0x3cd))
	{
	if (abstop12 (x) >= 0x7ff)
	return (double) (sign << 1) + 1.0 / x; /* special cases. */
	else
	return 1.0 - x; /* small case. */
	}
	else if (ia < 0x40180000)
	{ /* \|x\| < 6.0. */
	return approx_erfc_lo (x, sign, get_itv_idx (x));
	}
	else if (sign)
	{ /* x <= -6.0. */
	return 2.0;
	}
	else if (ia < 0x403c0000)
	{ /* 6.0 <= x < 28. */
	return approx_erfc_hi (x, get_itv_idx (x));
	}
	else
	{ /* x > 28. */
	return __math_uflow (0);
	}
	}

	PL_SIG (S, D, 1, erfc, -6.0, 28.0)
	PL_TEST_ULP (erfc, 3.56)
	PL_TEST_INTERVAL (erfc, 0, 0xffff0000, 10000)
	PL_TEST_INTERVAL (erfc, 0x1p-1022, 0x1p-26, 40000)
	PL_TEST_INTERVAL (erfc, -0x1p-1022, -0x1p-26, 40000)
	PL_TEST_INTERVAL (erfc, 0x1p-26, 0x1p5, 40000)
	PL_TEST_INTERVAL (erfc, -0x1p-26, -0x1p3, 40000)
	PL_TEST_INTERVAL (erfc, 0, inf, 40000)