Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Appearance settings

Rewrite complex log1p to improve precision. #26798

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 1 commit into
base: main
Choose a base branch
Loading
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions 1 numpy/_core/meson.build
Original file line number Diff line number Diff line change
Expand Up @@ -1153,6 +1153,7 @@ src_umath = umath_gen_headers + [
src_file.process('src/umath/scalarmath.c.src'),
'src/umath/ufunc_object.c',
'src/umath/umathmodule.c',
src_file.process('src/umath/clog1p_wrappers.cpp.src'),
'src/umath/special_integer_comparisons.cpp',
'src/umath/string_ufuncs.cpp',
'src/umath/stringdtype_ufuncs.cpp',
Expand Down
32 changes: 32 additions & 0 deletions 32 numpy/_core/src/umath/clog1p_wrappers.cpp.src
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
#include <Python.h>
#define NPY_NO_DEPRECATED_API NPY_API_VERSION
#include "numpy/npy_math.h"
#include "numpy/ndarraytypes.h"
#include <cmath>
#include <complex>
#include "log1p_complex.h"

extern "C" {

/**begin repeat
* #c_fp_type = float, double, long double#
* #np_cplx_type = npy_cfloat,npy_cdouble,npy_clongdouble#
* #c = f, , l#
*/

/*
* C wrapper for log1p_complex(z). This function is to be used only
* when the input is close to the unit circle centered at -1+0j.
*/
NPY_NO_EXPORT void
clog1p@c@(@np_cplx_type@ *x, @np_cplx_type@ *r)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Would have a mild preference to giving this a different name. clog1p isn't clear that it is only relevant in parts of the domain.

Might be nice to avoid the .src style templating, I think that would mean making this a static (inline) templated function and then creating three explicit stubs that call it below.
Since each stub has a single line, I would hope it isn't significantly longer.

{
std::complex<@c_fp_type@> z{npy_creal@c@(*x), npy_cimag@c@(*x)};
auto w = log1p_complex::log1p_complex(z);
npy_csetreal@c@(r, w.real());
npy_csetimag@c@(r, w.imag());
}

/**end repeat**/

} // extern "C"
19 changes: 19 additions & 0 deletions 19 numpy/_core/src/umath/clog1p_wrappers.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
#ifndef CLOG1P_WRAPPERS_H
#define CLOG1P_WRAPPERS_H

// This header is to be included in umathmodule.c,
// so it will be processed by a C compiler.
//
// This file assumes that the numpy header files have
// already been included.

NPY_NO_EXPORT void
clog1pf(npy_cfloat *z, npy_cfloat *r);

NPY_NO_EXPORT void
clog1p(npy_cdouble *z, npy_cdouble *r);

NPY_NO_EXPORT void
clog1pl(npy_clongdouble *z, npy_clongdouble *r);

#endif // CLOG1P_WRAPPERS_H
29 changes: 25 additions & 4 deletions 29 numpy/_core/src/umath/funcs.inc.src
Original file line number Diff line number Diff line change
Expand Up @@ -310,10 +310,31 @@ nc_log@c@(@ctype@ *x, @ctype@ *r)
static void
nc_log1p@c@(@ctype@ *x, @ctype@ *r)
{
@ftype@ l = npy_hypot@c@(npy_creal@c@(*x) + 1,npy_cimag@c@(*x));
npy_csetimag@c@(r, npy_atan2@c@(npy_cimag@c@(*x), npy_creal@c@(*x) + 1));
npy_csetreal@c@(r, npy_log@c@(l));
return;
@ftype@ xr = npy_creal@c@(*x);
@ftype@ xi = npy_cimag@c@(*x);
@ctype@ xp1 = npy_cpack@c@(xr + 1, xi);
@ftype@ delta = 0.001;
/*
* Should we use the high precision calculation? First check if
* x is within a bounding box, and then check if x is close to the
* unit circle centered at -1+0j. The bounding box is checked first
* to avoid overflow that might occur in npy_cabs@c@(xp1) when xr or
* xi is very close to the maximum value of @ftype@.
*/
if (-2 - delta < xr && xr < delta && -1 - delta < xi && xi < 1 + delta) {
@ftype@ m = npy_cabs@c@(xp1);
if (1 - delta < m && m < 1 + delta) {
/* Use the high precision calculation in this region. */
clog1p@c@(x, r);
return;
}
}
/*
* Use clog(1 + x).
* This branch of the code will also be used if either
* xr or xi is nan.
*/
nc_log@c@(&xp1, r);
}

static void
Expand Down
260 changes: 260 additions & 0 deletions 260 numpy/_core/src/umath/log1p_complex.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,260 @@
#ifndef LOG1P_COMPLEX_H
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is C++ only, I think. So let's rename it to .hpp?

Also please adjust the include guard to be the google-style full file path (see other files).

#define LOG1P_COMPLEX_H

#include <Python.h>
#include "numpy/ndarraytypes.h"
#include "numpy/npy_math.h"

#include <cmath>
#include <complex>
#include <limits>

// For memcpy
#include <cstring>


//
// Trivial C++ wrappers for several npy_* functions.
//

#define CPP_WRAP1(name) \
inline float name(const float x) \
{ \
return ::npy_ ## name ## f(x); \
} \
inline double name(const double x) \
{ \
return ::npy_ ## name(x); \
} \
inline long double name(const long double x) \
{ \
return ::npy_ ## name ## l(x); \
} \

#define CPP_WRAP2(name) \
inline float name(const float x, \
const float y) \
{ \
return ::npy_ ## name ## f(x, y); \
} \
inline double name(const double x, \
const double y) \
{ \
return ::npy_ ## name(x, y); \
} \
inline long double name(const long double x, \
const long double y) \
{ \
return ::npy_ ## name ## l(x, y); \
} \


namespace npy {

CPP_WRAP1(fabs)
CPP_WRAP1(log)
CPP_WRAP1(log1p)
CPP_WRAP2(atan2)
CPP_WRAP2(hypot)

}
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I guess we don't have a nicer C++ pattern/common spot for these yet, so this looks fine.


namespace log1p_complex
{

template<typename T>
struct doubled_t {
T upper;
T lower;
};

//
// There are three functions below where it is crucial that the
// expressions are not optimized. E.g. `t - (t - x)` must not be
// simplified by the compiler to just `x`. The NO_OPT macro defines
// an attribute that should turn off optimization for the function.
//
// The inclusion of `gnu::target("fpmath=sse")` when __GNUC__ and
// __i386 are defined also turns off the use of the floating-point
// unit '387'. It is important that when the type is, for example,
// `double`, these functions compute their results with 64 bit
// precision, and not with 80 bit extended precision.
Copy link
Member

@seberg seberg Jul 25, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

this makes me worried that this may break e.g. on MSVC (depending on optimization levels)!? Are there tests that would fail if it does?

I suspect you should rather use something like (may require 2 volatiles):

volatile T tmp = t - x;
t = t - tmp;

I suppose that may fail with fast-math enabled. If that failure is "graceful" as in: you get bad precision, I would ignore it. If you compile NumPy with fast-math you should get test failures.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I was trying to find a failure mode for either (also the sse forcing), but beyond the intel compiler effectively defaulting to fast-math I guess, I couldn't.
The volatile always adds some store/load instructions, which I maybe avoids the use of the 80bit FPE registers as well (but maybe bad for speed?).

//

#if defined(__clang__)
#define NO_OPT [[clang::optnone]]
#elif defined(__GNUC__)
#if defined(__i386)
#define NO_OPT [[gnu::optimize(0),gnu::target("fpmath=sse")]]
#else
#define NO_OPT [[gnu::optimize(0)]]
#endif
#else
#define NO_OPT
#endif

//
// Dekker splitting. See, for example, Theorem 1 of
//
// Seppa Linnainmaa, Software for Double-Precision Floating-Point
// Computations, ACM Transactions on Mathematical Software, Vol 7, No 3,
// September 1981, pages 272-283.
//
// or Theorem 17 of
//
// J. R. Shewchuk, Adaptive Precision Floating-Point Arithmetic and
// Fast Robust Geometric Predicates, CMU-CS-96-140R, from Discrete &
// Computational Geometry 18(3):305-363, October 1997.
//
template<typename T>
NO_OPT inline void
split(T x, doubled_t<T>& out)
{
if (std::numeric_limits<T>::digits == 106) {
// Special case: IBM double-double format. The value is already
// split in memory, so there is no need for any calculations.
std::memcpy(&out, &x, sizeof(out));
}
else {
constexpr int halfprec = (std::numeric_limits<T>::digits + 1)/2;
T t = ((1ull << halfprec) + 1)*x;
// The compiler must not be allowed to simplify this expression:
out.upper = t - (t - x);
out.lower = x - out.upper;
}
}

template<typename T>
NO_OPT inline void
two_sum_quick(T x, T y, doubled_t<T>& out)
{
T r = x + y;
T e = y - (r - x);
out.upper = r;
out.lower = e;
}

template<typename T>
NO_OPT inline void
two_sum(T x, T y, doubled_t<T>& out)
{
T s = x + y;
T v = s - x;
T e = (x - (s - v)) + (y - v);
out.upper = s;
out.lower = e;
}

template<typename T>
inline void
double_sum(const doubled_t<T>& x, const doubled_t<T>& y,
doubled_t<T>& out)
{
two_sum<T>(x.upper, y.upper, out);
out.lower += x.lower + y.lower;
two_sum_quick<T>(out.upper, out.lower, out);
}

template<typename T>
inline void
square(T x, doubled_t<T>& out)
{
doubled_t<T> xsplit;
out.upper = x*x;
split(x, xsplit);
out.lower = xsplit.lower*xsplit.lower
- ((out.upper - xsplit.upper*xsplit.upper)
- 2*xsplit.lower*xsplit.upper);
}

//
// As the name makes clear, this function computes x**2 + 2*x + y**2.
// It uses doubled_t<T> for the intermediate calculations.
// (That is, we give the floating point type T an upgrayedd, spelled with
// two d's for a double dose of precision.)
//
// The function is used in log1p_complex() to avoid the loss of
// precision that can occur in the expression when x**2 + y**2 ≈ -2*x.
//
template<typename T>
inline T
xsquared_plus_2x_plus_ysquared_dd(T x, T y)
{
doubled_t<T> x2, y2, twox, sum1, sum2;

square<T>(x, x2); // x2 = x**2
square<T>(y, y2); // y2 = y**2
twox.upper = 2*x; // twox = 2*x
twox.lower = 0.0;
double_sum<T>(x2, twox, sum1); // sum1 = x**2 + 2*x
double_sum<T>(sum1, y2, sum2); // sum2 = x**2 + 2*x + y**2
return sum2.upper;
}

//
// For the float type, the intermediate calculation is done
// with the double type. We don't need to use doubled_t<float>.
//
inline float
xsquared_plus_2x_plus_ysquared(float x, float y)
{
double xd = x;
double yd = y;
return xd*(2.0 + xd) + yd*yd;
}

//
// For double, we used doubled_t<double> if long double doesn't have
// at least 106 bits of precision.
//
inline double
xsquared_plus_2x_plus_ysquared(double x, double y)
{
if (std::numeric_limits<long double>::digits >= 106) {
// Cast to long double for the calculation.
long double xd = x;
long double yd = y;
return xd*(2.0L + xd) + yd*yd;
}
else {
// Use doubled_t<double> for the calculation.
return xsquared_plus_2x_plus_ysquared_dd<double>(x, y);
}
}

//
// For long double, we always use doubled_t<long double> for the
// calculation.
//
inline long double
xsquared_plus_2x_plus_ysquared(long double x, long double y)
{
return xsquared_plus_2x_plus_ysquared_dd<long double>(x, y);
}

//
// Implement log1p(z) for complex inputs that are near the unit circle
// centered at -1+0j.
//
// The function assumes that neither component of z is nan.
//
template<typename T>
inline std::complex<T>
log1p_complex(std::complex<T> z)
{
T x = z.real();
T y = z.imag();
// The input is close to the unit circle centered at -1+0j.
// Compute x**2 + 2*x + y**2 with higher precision than T.
// The calculation here is equivalent to log(hypot(x+1, y)),
// since
// log(hypot(x+1, y)) = 0.5*log(x**2 + 2*x + 1 + y**2)
// = 0.5*log1p(x**2 + 2*x + y**2)
T t = xsquared_plus_2x_plus_ysquared(x, y);
T lnr = 0.5*npy::log1p(t);
return std::complex<T>(lnr, npy::atan2(y, x + static_cast<T>(1)));
}

} // namespace log1p_complex

#endif
1 change: 1 addition & 0 deletions 1 numpy/_core/src/umath/umathmodule.c
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@
#include "string_ufuncs.h"
#include "stringdtype_ufuncs.h"
#include "special_integer_comparisons.h"
#include "clog1p_wrappers.h"
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Maybe nicer to include this in the funcs.inc.src code? Since it is used here only indirectly?
OTOH, I guess it may be that a few other imports should maybe be also be moved by that logic.

#include "extobj.h" /* for _extobject_contextvar exposure */
#include "ufunc_type_resolution.h"

Expand Down
Loading
Morty Proxy This is a proxified and sanitized view of the page, visit original site.