-
-
Notifications
You must be signed in to change notification settings - Fork 10.9k
Rewrite complex log1p to improve precision. #26798
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,32 @@ | ||
#include <Python.h> | ||
#define NPY_NO_DEPRECATED_API NPY_API_VERSION | ||
#include "numpy/npy_math.h" | ||
#include "numpy/ndarraytypes.h" | ||
#include <cmath> | ||
#include <complex> | ||
#include "log1p_complex.h" | ||
|
||
extern "C" { | ||
|
||
/**begin repeat | ||
* #c_fp_type = float, double, long double# | ||
* #np_cplx_type = npy_cfloat,npy_cdouble,npy_clongdouble# | ||
* #c = f, , l# | ||
*/ | ||
|
||
/* | ||
* C wrapper for log1p_complex(z). This function is to be used only | ||
* when the input is close to the unit circle centered at -1+0j. | ||
*/ | ||
NPY_NO_EXPORT void | ||
clog1p@c@(@np_cplx_type@ *x, @np_cplx_type@ *r) | ||
{ | ||
std::complex<@c_fp_type@> z{npy_creal@c@(*x), npy_cimag@c@(*x)}; | ||
auto w = log1p_complex::log1p_complex(z); | ||
npy_csetreal@c@(r, w.real()); | ||
npy_csetimag@c@(r, w.imag()); | ||
} | ||
|
||
/**end repeat**/ | ||
|
||
} // extern "C" |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,19 @@ | ||
#ifndef CLOG1P_WRAPPERS_H | ||
#define CLOG1P_WRAPPERS_H | ||
|
||
// This header is to be included in umathmodule.c, | ||
// so it will be processed by a C compiler. | ||
// | ||
// This file assumes that the numpy header files have | ||
// already been included. | ||
|
||
NPY_NO_EXPORT void | ||
clog1pf(npy_cfloat *z, npy_cfloat *r); | ||
|
||
NPY_NO_EXPORT void | ||
clog1p(npy_cdouble *z, npy_cdouble *r); | ||
|
||
NPY_NO_EXPORT void | ||
clog1pl(npy_clongdouble *z, npy_clongdouble *r); | ||
|
||
#endif // CLOG1P_WRAPPERS_H |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,260 @@ | ||
#ifndef LOG1P_COMPLEX_H | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This is C++ only, I think. So let's rename it to Also please adjust the include guard to be the google-style full file path (see other files). |
||
#define LOG1P_COMPLEX_H | ||
|
||
#include <Python.h> | ||
#include "numpy/ndarraytypes.h" | ||
#include "numpy/npy_math.h" | ||
|
||
#include <cmath> | ||
#include <complex> | ||
#include <limits> | ||
|
||
// For memcpy | ||
#include <cstring> | ||
|
||
|
||
// | ||
// Trivial C++ wrappers for several npy_* functions. | ||
// | ||
|
||
#define CPP_WRAP1(name) \ | ||
inline float name(const float x) \ | ||
{ \ | ||
return ::npy_ ## name ## f(x); \ | ||
} \ | ||
inline double name(const double x) \ | ||
{ \ | ||
return ::npy_ ## name(x); \ | ||
} \ | ||
inline long double name(const long double x) \ | ||
{ \ | ||
return ::npy_ ## name ## l(x); \ | ||
} \ | ||
|
||
#define CPP_WRAP2(name) \ | ||
inline float name(const float x, \ | ||
const float y) \ | ||
{ \ | ||
return ::npy_ ## name ## f(x, y); \ | ||
} \ | ||
inline double name(const double x, \ | ||
const double y) \ | ||
{ \ | ||
return ::npy_ ## name(x, y); \ | ||
} \ | ||
inline long double name(const long double x, \ | ||
const long double y) \ | ||
{ \ | ||
return ::npy_ ## name ## l(x, y); \ | ||
} \ | ||
|
||
|
||
namespace npy { | ||
|
||
CPP_WRAP1(fabs) | ||
CPP_WRAP1(log) | ||
CPP_WRAP1(log1p) | ||
CPP_WRAP2(atan2) | ||
CPP_WRAP2(hypot) | ||
|
||
} | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I guess we don't have a nicer C++ pattern/common spot for these yet, so this looks fine. |
||
|
||
namespace log1p_complex | ||
{ | ||
|
||
template<typename T> | ||
struct doubled_t { | ||
T upper; | ||
T lower; | ||
}; | ||
|
||
// | ||
// There are three functions below where it is crucial that the | ||
// expressions are not optimized. E.g. `t - (t - x)` must not be | ||
// simplified by the compiler to just `x`. The NO_OPT macro defines | ||
// an attribute that should turn off optimization for the function. | ||
// | ||
// The inclusion of `gnu::target("fpmath=sse")` when __GNUC__ and | ||
// __i386 are defined also turns off the use of the floating-point | ||
// unit '387'. It is important that when the type is, for example, | ||
// `double`, these functions compute their results with 64 bit | ||
// precision, and not with 80 bit extended precision. | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. this makes me worried that this may break e.g. on MSVC (depending on optimization levels)!? Are there tests that would fail if it does? I suspect you should rather use something like (may require 2 volatiles):
I suppose that may fail with There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I was trying to find a failure mode for either (also the sse forcing), but beyond the intel compiler effectively defaulting to fast-math I guess, I couldn't. |
||
// | ||
|
||
#if defined(__clang__) | ||
#define NO_OPT [[clang::optnone]] | ||
#elif defined(__GNUC__) | ||
#if defined(__i386) | ||
#define NO_OPT [[gnu::optimize(0),gnu::target("fpmath=sse")]] | ||
#else | ||
#define NO_OPT [[gnu::optimize(0)]] | ||
#endif | ||
#else | ||
#define NO_OPT | ||
#endif | ||
|
||
// | ||
// Dekker splitting. See, for example, Theorem 1 of | ||
// | ||
// Seppa Linnainmaa, Software for Double-Precision Floating-Point | ||
// Computations, ACM Transactions on Mathematical Software, Vol 7, No 3, | ||
// September 1981, pages 272-283. | ||
// | ||
// or Theorem 17 of | ||
// | ||
// J. R. Shewchuk, Adaptive Precision Floating-Point Arithmetic and | ||
// Fast Robust Geometric Predicates, CMU-CS-96-140R, from Discrete & | ||
// Computational Geometry 18(3):305-363, October 1997. | ||
// | ||
template<typename T> | ||
NO_OPT inline void | ||
split(T x, doubled_t<T>& out) | ||
{ | ||
if (std::numeric_limits<T>::digits == 106) { | ||
// Special case: IBM double-double format. The value is already | ||
// split in memory, so there is no need for any calculations. | ||
std::memcpy(&out, &x, sizeof(out)); | ||
} | ||
else { | ||
constexpr int halfprec = (std::numeric_limits<T>::digits + 1)/2; | ||
T t = ((1ull << halfprec) + 1)*x; | ||
// The compiler must not be allowed to simplify this expression: | ||
out.upper = t - (t - x); | ||
out.lower = x - out.upper; | ||
} | ||
} | ||
|
||
template<typename T> | ||
NO_OPT inline void | ||
two_sum_quick(T x, T y, doubled_t<T>& out) | ||
{ | ||
T r = x + y; | ||
T e = y - (r - x); | ||
out.upper = r; | ||
out.lower = e; | ||
} | ||
|
||
template<typename T> | ||
NO_OPT inline void | ||
two_sum(T x, T y, doubled_t<T>& out) | ||
{ | ||
T s = x + y; | ||
T v = s - x; | ||
T e = (x - (s - v)) + (y - v); | ||
out.upper = s; | ||
out.lower = e; | ||
} | ||
|
||
template<typename T> | ||
inline void | ||
double_sum(const doubled_t<T>& x, const doubled_t<T>& y, | ||
doubled_t<T>& out) | ||
{ | ||
two_sum<T>(x.upper, y.upper, out); | ||
out.lower += x.lower + y.lower; | ||
two_sum_quick<T>(out.upper, out.lower, out); | ||
} | ||
|
||
template<typename T> | ||
inline void | ||
square(T x, doubled_t<T>& out) | ||
{ | ||
doubled_t<T> xsplit; | ||
out.upper = x*x; | ||
split(x, xsplit); | ||
out.lower = xsplit.lower*xsplit.lower | ||
- ((out.upper - xsplit.upper*xsplit.upper) | ||
- 2*xsplit.lower*xsplit.upper); | ||
} | ||
|
||
// | ||
// As the name makes clear, this function computes x**2 + 2*x + y**2. | ||
// It uses doubled_t<T> for the intermediate calculations. | ||
// (That is, we give the floating point type T an upgrayedd, spelled with | ||
// two d's for a double dose of precision.) | ||
// | ||
// The function is used in log1p_complex() to avoid the loss of | ||
// precision that can occur in the expression when x**2 + y**2 ≈ -2*x. | ||
// | ||
template<typename T> | ||
inline T | ||
xsquared_plus_2x_plus_ysquared_dd(T x, T y) | ||
{ | ||
doubled_t<T> x2, y2, twox, sum1, sum2; | ||
|
||
square<T>(x, x2); // x2 = x**2 | ||
square<T>(y, y2); // y2 = y**2 | ||
twox.upper = 2*x; // twox = 2*x | ||
twox.lower = 0.0; | ||
double_sum<T>(x2, twox, sum1); // sum1 = x**2 + 2*x | ||
double_sum<T>(sum1, y2, sum2); // sum2 = x**2 + 2*x + y**2 | ||
return sum2.upper; | ||
} | ||
|
||
// | ||
// For the float type, the intermediate calculation is done | ||
// with the double type. We don't need to use doubled_t<float>. | ||
// | ||
inline float | ||
xsquared_plus_2x_plus_ysquared(float x, float y) | ||
{ | ||
double xd = x; | ||
double yd = y; | ||
return xd*(2.0 + xd) + yd*yd; | ||
} | ||
|
||
// | ||
// For double, we used doubled_t<double> if long double doesn't have | ||
// at least 106 bits of precision. | ||
// | ||
inline double | ||
xsquared_plus_2x_plus_ysquared(double x, double y) | ||
{ | ||
if (std::numeric_limits<long double>::digits >= 106) { | ||
// Cast to long double for the calculation. | ||
long double xd = x; | ||
long double yd = y; | ||
return xd*(2.0L + xd) + yd*yd; | ||
} | ||
else { | ||
// Use doubled_t<double> for the calculation. | ||
return xsquared_plus_2x_plus_ysquared_dd<double>(x, y); | ||
} | ||
} | ||
|
||
// | ||
// For long double, we always use doubled_t<long double> for the | ||
// calculation. | ||
// | ||
inline long double | ||
xsquared_plus_2x_plus_ysquared(long double x, long double y) | ||
{ | ||
return xsquared_plus_2x_plus_ysquared_dd<long double>(x, y); | ||
} | ||
|
||
// | ||
// Implement log1p(z) for complex inputs that are near the unit circle | ||
// centered at -1+0j. | ||
// | ||
// The function assumes that neither component of z is nan. | ||
// | ||
template<typename T> | ||
inline std::complex<T> | ||
log1p_complex(std::complex<T> z) | ||
{ | ||
T x = z.real(); | ||
T y = z.imag(); | ||
// The input is close to the unit circle centered at -1+0j. | ||
// Compute x**2 + 2*x + y**2 with higher precision than T. | ||
// The calculation here is equivalent to log(hypot(x+1, y)), | ||
// since | ||
// log(hypot(x+1, y)) = 0.5*log(x**2 + 2*x + 1 + y**2) | ||
// = 0.5*log1p(x**2 + 2*x + y**2) | ||
T t = xsquared_plus_2x_plus_ysquared(x, y); | ||
T lnr = 0.5*npy::log1p(t); | ||
return std::complex<T>(lnr, npy::atan2(y, x + static_cast<T>(1))); | ||
} | ||
|
||
} // namespace log1p_complex | ||
|
||
#endif |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -30,6 +30,7 @@ | |
#include "string_ufuncs.h" | ||
#include "stringdtype_ufuncs.h" | ||
#include "special_integer_comparisons.h" | ||
#include "clog1p_wrappers.h" | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Maybe nicer to include this in the |
||
#include "extobj.h" /* for _extobject_contextvar exposure */ | ||
#include "ufunc_type_resolution.h" | ||
|
||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Would have a mild preference to giving this a different name.
clog1p
isn't clear that it is only relevant in parts of the domain.Might be nice to avoid the
.src
style templating, I think that would mean making this astatic (inline)
templated function and then creating three explicit stubs that call it below.Since each stub has a single line, I would hope it isn't significantly longer.