/* o-----------------------------------------------------------------------------o | | Number module implementation | | Methodical Accelerator Design - Copyright (c) 2016+ | Support: http://cern.ch/mad - mad at cern.ch | Authors: L. Deniau, laurent.deniau at cern.ch | Contrib: - | o-----------------------------------------------------------------------------o | You can redistribute this file and/or modify it under the terms of the GNU | General Public License GPLv3 (or later), as published by the Free Software | Foundation. This file is distributed in the hope that it will be useful, but | WITHOUT ANY WARRANTY OF ANY KIND. See http://gnu.org/licenses for details. o-----------------------------------------------------------------------------o */ #include #include #include #include #include #include "mad_num.h" #include "mad_cst.h" // --- helpers ----------------------------------------------------------------o static inline num_t fact(int n) { static num_t f[171] = {1, 0}; if (!f[1]) for (int i=1; i < 171; ++i) f[i] = i*f[i-1]; return n < 171 ? f[n] : INFINITY; } // --- implementation ---------------------------------------------------------o #define CHKR assert( r ) #define CPX(a) CPX2(MKNAME(a,_re), MKNAME(a,_im)) #define CPX2(a,b) (* (cpx_t*) & (num_t[2]) { a, b }) // --- num int mad_num_sign1 (num_t x) { return ((int[]){ -1, 1 })[!signbit(x)]; // -1, 1: works for ±0, ±inf and ±NaN } int mad_num_sign (num_t x) { return x == 0 ? 0 : mad_num_sign1(x); // -1, 0, 1 } num_t mad_num_fact (int n) { int s = 1; if (n < 0) n = -n, s = n & 1 ? -s : s; return s*fact(n); } num_t mad_num_sinc (num_t x) { return fabs(x)<1e-4 ? 1 - 0.1666666666666666666667*x*x : sin(x)/x; } num_t mad_num_sinhc (num_t x) { return fabs(x)<1e-4 ? 1 + 0.1666666666666666666667*x*x : sinh(x)/x; } num_t mad_num_asinc (num_t x) { return fabs(x)<1e-4 ? 1 + 0.1666666666666666666667*x*x : asin(x)/x; } num_t mad_num_asinhc (num_t x) { return fabs(x)<1e-4 ? 1 - 0.1666666666666666666667*x*x : asinh(x)/x; } num_t mad_num_powi (num_t x, int n) { num_t r = 1; if (n < 0) n = -n, x = 1/x; for (;;) { if (n & 1) r *= x; if (n >>= 1) x *= x; else break; } return r; } // --- cpx cpx_t mad_cpx_div (cpx_t x, cpx_t y) { // REFERENCES // // [1] Robert L. Smith. Algorithm 116: Complex division. Commun. ACM, // 5(8):435, 1962. // // [2] Michael Baudin and Robert L. Smith. "A robust complex division in // Scilab," October 2012, available at http://arxiv.org/abs/1210.4539. #define RBIG (DBL_MAX/2) #define RMIN (DBL_MIN) #define RMIN2 (0x1.0p-512) #define RMINSCAL (0x1.0p+510) num_t x_re = creal(x), x_im = cimag(x); num_t y_re = creal(y), y_im = cimag(y); num_t r_re, r_im, denom, ratio; if (fabs(y_re) < fabs(y_im)) { /* avoid overflow/underflow issues when y_re and y_im are small */ if (fabs(y_im) < RMIN2) { x_re = x_re * RMINSCAL; x_im = x_im * RMINSCAL; y_re = y_re * RMINSCAL; y_im = y_im * RMINSCAL; } // /* prevent overflow when arguments are near max representable */ // else if ((fabs(y_im) >= RBIG) || (fabs(x_re) >= RBIG) || (fabs(x_im) >= RBIG)) { // x_re = x_re * 0.5; // x_im = x_im * 0.5; // y_re = y_re * 0.5; // y_im = y_im * 0.5; // } ratio = y_re / y_im; denom = (y_re * ratio) + y_im; if (fabs(ratio) > RMIN) { r_re = ((x_re * ratio) + x_im) / denom; r_im = ((x_im * ratio) - x_re) / denom; } else { r_re = ((y_re * (x_re / y_im)) + x_im) / denom; r_im = ((y_re * (x_im / y_im)) - x_re) / denom; } } else { /* avoid overflow issues when y_re and y_im are small */ if (fabs(y_re) < RMIN2) { x_re = x_re * RMINSCAL; x_im = x_im * RMINSCAL; y_re = y_re * RMINSCAL; y_im = y_im * RMINSCAL; } // /* prevent overflow when arguments are near max representable */ // else if ((fabs(y_re) >= RBIG) || (fabs(x_re) >= RBIG) || (fabs(x_im) >= RBIG)) { // x_re = x_re * 0.5; // x_im = x_im * 0.5; // y_re = y_re * 0.5; // y_im = y_im * 0.5; // } ratio = y_im / y_re; denom = (y_im * ratio) + y_re; if (fabs(ratio) > RMIN) { r_re = (x_re + (x_im * ratio)) / denom; r_im = (x_im - (x_re * ratio)) / denom; } else { r_re = (x_re + (y_im * (x_im / y_re))) / denom; r_im = (x_im - (y_im * (x_re / y_re))) / denom; } } return CPX(r); #undef RBIG #undef RMIN #undef RMIN2 #undef RMINSCAL } cpx_t mad_cpx_inv (cpx_t x) { return mad_cpx_div(1, x); } cpx_t mad_cpx_sinc (cpx_t x) { return cabs(x)<1e-4 ? 1 - 0.1666666666666666666667*x*x : mad_cpx_div(csin(x), x); } cpx_t mad_cpx_sinhc (cpx_t x) { return cabs(x)<1e-4 ? 1 + 0.1666666666666666666667*x*x : mad_cpx_div(csinh(x), x); } cpx_t mad_cpx_asinc (cpx_t x) { return cabs(x)<1e-4 ? 1 + 0.1666666666666666666667*x*x : mad_cpx_div(casin(x), x); } cpx_t mad_cpx_asinhc (cpx_t x) { return cabs(x)<1e-4 ? 1 - 0.1666666666666666666667*x*x : mad_cpx_div(casinh(x), x); } cpx_t mad_cpx_powi (cpx_t x, int n) { cpx_t r = 1; if (n < 0) n = -n, x = mad_cpx_inv(x); for (;;) { if (n & 1) r *= x; if (n >>= 1) x *= x; else break; } return r; } num_t mad_cpx_abs_r (num_t x_re, num_t x_im) { return cabs( CPX(x) ); } num_t mad_cpx_arg_r (num_t x_re, num_t x_im) { return carg( CPX(x) ); } void mad_cpx_sqrt_r (num_t x_re, num_t x_im, cpx_t *r) { CHKR; *r = csqrt ( CPX(x) ); } void mad_cpx_exp_r (num_t x_re, num_t x_im, cpx_t *r) { CHKR; *r = cexp ( CPX(x) ); } void mad_cpx_log_r (num_t x_re, num_t x_im, cpx_t *r) { CHKR; *r = clog ( CPX(x) ); } void mad_cpx_log10_r (num_t x_re, num_t x_im, cpx_t *r) { CHKR; *r = clog ( CPX(x) )/log(10); } void mad_cpx_sin_r (num_t x_re, num_t x_im, cpx_t *r) { CHKR; *r = csin ( CPX(x) ); } void mad_cpx_cos_r (num_t x_re, num_t x_im, cpx_t *r) { CHKR; *r = ccos ( CPX(x) ); } void mad_cpx_tan_r (num_t x_re, num_t x_im, cpx_t *r) { CHKR; *r = ctan ( CPX(x) ); } void mad_cpx_sinh_r (num_t x_re, num_t x_im, cpx_t *r) { CHKR; *r = csinh ( CPX(x) ); } void mad_cpx_cosh_r (num_t x_re, num_t x_im, cpx_t *r) { CHKR; *r = ccosh ( CPX(x) ); } void mad_cpx_tanh_r (num_t x_re, num_t x_im, cpx_t *r) { CHKR; *r = ctanh ( CPX(x) ); } void mad_cpx_asin_r (num_t x_re, num_t x_im, cpx_t *r) { CHKR; *r = casin ( CPX(x) ); } void mad_cpx_acos_r (num_t x_re, num_t x_im, cpx_t *r) { CHKR; *r = cacos ( CPX(x) ); } void mad_cpx_atan_r (num_t x_re, num_t x_im, cpx_t *r) { CHKR; *r = catan ( CPX(x) ); } void mad_cpx_asinh_r (num_t x_re, num_t x_im, cpx_t *r) { CHKR; *r = casinh ( CPX(x) ); } void mad_cpx_acosh_r (num_t x_re, num_t x_im, cpx_t *r) { CHKR; *r = cacosh ( CPX(x) ); } void mad_cpx_atanh_r (num_t x_re, num_t x_im, cpx_t *r) { CHKR; *r = catanh ( CPX(x) ); } void mad_cpx_proj_r (num_t x_re, num_t x_im, cpx_t *r) { CHKR; *r = cproj ( CPX(x) ); } void mad_cpx_sinc_r (num_t x_re, num_t x_im, cpx_t *r) { CHKR; *r = mad_cpx_sinc ( CPX(x) ); } void mad_cpx_sinhc_r (num_t x_re, num_t x_im, cpx_t *r) { CHKR; *r = mad_cpx_sinhc ( CPX(x) ); } void mad_cpx_asinc_r (num_t x_re, num_t x_im, cpx_t *r) { CHKR; *r = mad_cpx_asinc ( CPX(x) ); } void mad_cpx_asinhc_r(num_t x_re, num_t x_im, cpx_t *r) { CHKR; *r = mad_cpx_asinhc( CPX(x) ); } void mad_cpx_invsqrt_r (num_t x_re, num_t x_im, cpx_t *r) { CHKR; *r = csqrt(mad_cpx_inv(CPX(x))); } void mad_cpx_inv_r (num_t x_re, num_t x_im, cpx_t *r) { CHKR; *r = mad_cpx_inv(CPX(x)); } void mad_cpx_powi_r (num_t x_re, num_t x_im, int n, cpx_t *r) { CHKR; *r = mad_cpx_powi( CPX(x), n ); } void mad_cpx_unit_r (num_t x_re, num_t x_im, cpx_t *r) { CHKR; *r = CPX(x) / cabs( CPX(x) ); } void mad_cpx_rect_r (num_t rho, num_t ang, cpx_t *r) { CHKR; *r = CPX2( rho * cos(ang), rho * sin(ang) ); } void mad_cpx_polar_r (num_t x_re, num_t x_im, cpx_t *r) { CHKR; *r = CPX2( cabs(CPX(x)), carg(CPX(x)) ); } void mad_cpx_div_r (num_t x_re, num_t x_im, num_t y_re, num_t y_im, cpx_t *r) { CHKR; *r = mad_cpx_div(CPX(x), CPX(y)); } // This function uses floor() for compliance with the real modulo of Lua 5. // See https://en.wikipedia.org/wiki/Modulo_operation for other languages. void mad_cpx_mod_r (num_t x_re, num_t x_im, num_t y_re, num_t y_im, cpx_t *r) { CHKR; cpx_t cr = mad_cpx_div(CPX(x), CPX(y)); *r = CPX(x) - CPX(y) * CPX2(floor(creal(cr)), floor(cimag(cr))); } void mad_cpx_pow_r (num_t x_re, num_t x_im, num_t y_re, num_t y_im, cpx_t *r) { CHKR; *r = cpow( CPX(x), CPX(y) ); } // --- Faddeeva function and variants from MIT --------------------------------o #include "mad_erfw.h" num_t mad_num_wf (num_t x) { return Faddeeva_w_im (x); } num_t mad_num_erf (num_t x) { return Faddeeva_erf_re (x); } num_t mad_num_erfc (num_t x) { return Faddeeva_erfc_re (x); } num_t mad_num_erfi (num_t x) { return Faddeeva_erfi_re (x); } num_t mad_num_erfcx (num_t x) { return Faddeeva_erfcx_re (x); } num_t mad_num_dawson(num_t x) { return Faddeeva_Dawson_re(x); } cpx_t mad_cpx_wf (cpx_t x, num_t relerr) { return Faddeeva_w (x, relerr); } cpx_t mad_cpx_erf (cpx_t x, num_t relerr) { return Faddeeva_erf (x, relerr); } cpx_t mad_cpx_erfc (cpx_t x, num_t relerr) { return Faddeeva_erfc (x, relerr); } cpx_t mad_cpx_erfi (cpx_t x, num_t relerr) { return Faddeeva_erfi (x, relerr); } cpx_t mad_cpx_erfcx (cpx_t x, num_t relerr) { return Faddeeva_erfcx (x, relerr); } cpx_t mad_cpx_dawson(cpx_t x, num_t relerr) { return Faddeeva_Dawson(x, relerr); } void mad_cpx_wf_r (num_t x_re, num_t x_im, num_t relerr, cpx_t *r) { CHKR; *r = Faddeeva_w (CPX(x), relerr); } void mad_cpx_erf_r (num_t x_re, num_t x_im, num_t relerr, cpx_t *r) { CHKR; *r = Faddeeva_erf (CPX(x), relerr); } void mad_cpx_erfc_r (num_t x_re, num_t x_im, num_t relerr, cpx_t *r) { CHKR; *r = Faddeeva_erfc (CPX(x), relerr); } void mad_cpx_erfi_r (num_t x_re, num_t x_im, num_t relerr, cpx_t *r) { CHKR; *r = Faddeeva_erfi (CPX(x), relerr); } void mad_cpx_erfcx_r (num_t x_re, num_t x_im, num_t relerr, cpx_t *r) { CHKR; *r = Faddeeva_erfcx (CPX(x), relerr); } void mad_cpx_dawson_r (num_t x_re, num_t x_im, num_t relerr, cpx_t *r) { CHKR; *r = Faddeeva_Dawson (CPX(x), relerr); } // -- RNG XoShiRo256** --------------------------------------------------------o #define N 4 static inline u64_t splitmix64 (u64_t x) { u64_t z = x + 0x9e3779b97f4a7c15ULL; z = (z ^ (z >> 30)) * 0xbf58476d1ce4e5b9ULL; z = (z ^ (z >> 27)) * 0x94d049bb133111ebULL; return z ^ (z >> 31); } static inline u64_t rotl(const u64_t x, int k) { return (x << k) | (x >> (64 - k)); } u64_t mad_num_randi (prng_state_t *restrict st) { u64_t *s = st->s; const u64_t r = rotl(s[1] * 5, 7) * 9; const u64_t t = s[1] << 17; s[2] ^= s[0]; s[3] ^= s[1]; s[1] ^= s[2]; s[0] ^= s[3]; s[2] ^= t; s[3] = rotl(s[3], 45); return r; // number within [0,ULLONG_MAX] } union numbit { u64_t u; num_t d; }; num_t mad_num_rand (prng_state_t *restrict st) { u64_t x = mad_num_randi(st); const union numbit n = { .u = 0x3ffULL << 52 | x >> 12 }; return n.d - 1; // [1.,2.) -> [0.,1.) } void mad_num_randseed (prng_state_t *restrict st, num_t seed) { const union numbit n = { .d = seed }; st->s[0] = splitmix64(n.u); for (int i=1; i < N; i++) st->s[i] = splitmix64(st->s[i-1]); } void mad_num_randjump (prng_state_t *restrict st) { static const u64_t jump[N] = { 0x180ec6d33cfd0abaULL, 0xd5a61266f0c9392cULL, 0xa9582618e03fc9aaULL, 0x39abdc4529b1661cULL }; u64_t *s = st->s; u64_t s0=0, s1=0, s2=0, s3=0; for(int i=0; i < N; i++) for(int b=0; b < 64; b++) { if (jump[i] & 1llu << b) { s0 ^= s[0]; s1 ^= s[1]; s2 ^= s[2]; s3 ^= s[3]; } mad_num_randi(st); } s[0] = s0; s[1] = s1; s[2] = s2; s[3] = s3; } #undef N // -- RNG MADX ----------------------------------------------------------------o #define MAX_RAND 1000000000 #define SCL_RAND 1e-9 #define NR_RAND 55 #define NJ_RAND 24 #define ND_RAND 21 static void irngen(xrng_state_t *rng) { int j; for (int i = 0; i < NJ_RAND; i++) { if ((j = rng->s[i] - rng->s[i+NR_RAND-NJ_RAND]) < 0) j += MAX_RAND; rng->s[i] = j; } for (int i = NJ_RAND; i < NR_RAND; i++) { if ((j = rng->s[i] - rng->s[i-NJ_RAND]) < 0) j += MAX_RAND; rng->s[i] = j; } rng->n = 0; } void mad_num_xrandseed (xrng_state_t *rng, u32_t seed) { int k = 1, j = seed % MAX_RAND; rng->s[NR_RAND-1] = j; for (int i = 0; i < NR_RAND-1; i++) { int ii = (ND_RAND*(i+1)) % NR_RAND; rng->s[ii-1] = k; if ((k = j - k) < 0) k += MAX_RAND; j = rng->s[ii-1]; } for (int i = 0; i < 3; i++) irngen(rng); } num_t mad_num_xrand (xrng_state_t *rng) // [0.,1.) { if (rng->n == NR_RAND) irngen(rng); return SCL_RAND * rng->s[rng->n++]; } u32_t mad_num_xrandi (xrng_state_t *rng) // [0,UINT_MAX] { if (rng->n == NR_RAND) irngen(rng); return rng->s[rng->n++]; } // -- OPENMP TEST -------------------------------------------------------------o num_t mad_num_suminv(u64_t n) { num_t s=0; #ifdef _OPENMP #pragma omp parallel for reduction(+ : s) #endif for (u64_t i=1; i <= n; ++i) s += 1.0/i; return s; }