Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Appearance settings

std::poisson_distribution and std::negative_binomial_distribution are terrible for small types like int8_t #56656

Copy link
Copy link
Open
@ldionne

Description

@ldionne
Issue body actions

The following tests fail miserably for std::negative_binomial_distribution on int8_t and uint8_t. We have similar problems for std::poisson_distribution.

Basically, I think we need to re-think the algorithm we use for std::poisson_distribution (which is what negative_binomial_distribution uses too). That might require an ABI break, however breaking the ABI of random distributions might be acceptable.

#include <random>
#include <numeric>
#include <vector>
#include <cassert>

template <class T>
T sqr(T x) {
    return x * x;
}

template <class T>
void test2() {
    typedef std::negative_binomial_distribution<T> D;
    typedef std::mt19937 G;
    G g;
    D d(30, .03125);
    const int N = 1000000;
    std::vector<typename D::result_type> u;
    for (int i = 0; i < N; ++i)
    {
        typename D::result_type v = d(g);
        assert(d.min() <= v && v <= d.max());
        u.push_back(v);
    }
    double mean = std::accumulate(u.begin(), u.end(),
                                          double(0)) / u.size();
    double var = 0;
    double skew = 0;
    double kurtosis = 0;
    for (unsigned i = 0; i < u.size(); ++i)
    {
        double dbl = (u[i] - mean);
        double d2 = sqr(dbl);
        var += d2;
        skew += dbl * d2;
        kurtosis += d2 * d2;
    }
    var /= u.size();
    double dev = std::sqrt(var);
    skew /= u.size() * dev * var;
    kurtosis /= u.size() * var * var;
    kurtosis -= 3;
    double x_mean = d.k() * (1 - d.p()) / d.p();
    double x_var = x_mean / d.p();
    double x_skew = (2 - d.p()) / std::sqrt(d.k() * (1 - d.p()));
    double x_kurtosis = 6. / d.k() + sqr(d.p()) / (d.k() * (1 - d.p()));
    assert(std::abs((mean - x_mean) / x_mean) < 0.01);
    assert(std::abs((var - x_var) / x_var) < 0.01);
    assert(std::abs((skew - x_skew) / x_skew) < 0.01);
    assert(std::abs((kurtosis - x_kurtosis) / x_kurtosis) < 0.01);
}

template <class T>
void test3() {
    typedef std::negative_binomial_distribution<T> D;
    typedef std::mt19937 G;
    G g;
    D d(40, .25);
    const int N = 1000000;
    std::vector<typename D::result_type> u;
    for (int i = 0; i < N; ++i)
    {
        typename D::result_type v = d(g);
        assert(d.min() <= v && v <= d.max());
        u.push_back(v);
    }
    double mean = std::accumulate(u.begin(), u.end(),
                                          double(0)) / u.size();
    double var = 0;
    double skew = 0;
    double kurtosis = 0;
    for (unsigned i = 0; i < u.size(); ++i)
    {
        double dbl = (u[i] - mean);
        double d2 = sqr(dbl);
        var += d2;
        skew += dbl * d2;
        kurtosis += d2 * d2;
    }
    var /= u.size();
    double dev = std::sqrt(var);
    skew /= u.size() * dev * var;
    kurtosis /= u.size() * var * var;
    kurtosis -= 3;
    double x_mean = d.k() * (1 - d.p()) / d.p();
    double x_var = x_mean / d.p();
    double x_skew = (2 - d.p()) / std::sqrt(d.k() * (1 - d.p()));
    double x_kurtosis = 6. / d.k() + sqr(d.p()) / (d.k() * (1 - d.p()));
    assert(std::abs((mean - x_mean) / x_mean) < 0.01);
    assert(std::abs((var - x_var) / x_var) < 0.01);
    assert(std::abs((skew - x_skew) / x_skew) < 0.01);
    assert(std::abs((kurtosis - x_kurtosis) / x_kurtosis) < 0.03);
}

template <class T>
void test5() {
    typedef std::negative_binomial_distribution<T> D;
    typedef std::mt19937 G;
    G g;
    D d(127, 0.5);
    const int N = 1000000;
    std::vector<typename D::result_type> u;
    for (int i = 0; i < N; ++i)
    {
        typename D::result_type v = d(g);
        assert(d.min() <= v && v <= d.max());
        u.push_back(v);
    }
    double mean = std::accumulate(u.begin(), u.end(),
                                          double(0)) / u.size();
    double var = 0;
    double skew = 0;
    double kurtosis = 0;
    for (unsigned i = 0; i < u.size(); ++i)
    {
        double dbl = (u[i] - mean);
        double d2 = sqr(dbl);
        var += d2;
        skew += dbl * d2;
        kurtosis += d2 * d2;
    }
    var /= u.size();
    double dev = std::sqrt(var);
    skew /= u.size() * dev * var;
    kurtosis /= u.size() * var * var;
    kurtosis -= 3;
    double x_mean = d.k() * (1 - d.p()) / d.p();
    double x_var = x_mean / d.p();
    double x_skew = (2 - d.p()) / std::sqrt(d.k() * (1 - d.p()));
    double x_kurtosis = 6. / d.k() + sqr(d.p()) / (d.k() * (1 - d.p()));
    assert(std::abs((mean - x_mean) / x_mean) < 0.01);
    assert(std::abs((var - x_var) / x_var) < 0.01);
    assert(std::abs((skew - x_skew) / x_skew) < 0.04);
    assert(std::abs((kurtosis - x_kurtosis) / x_kurtosis) < 0.05);
}

template <class T>
void test6() {
    typedef std::negative_binomial_distribution<T> D;
    typedef std::mt19937 G;
    G g;
    D d(1, 0.05);
    const int N = 1000000;
    std::vector<typename D::result_type> u;
    for (int i = 0; i < N; ++i)
    {
        typename D::result_type v = d(g);
        assert(d.min() <= v && v <= d.max());
        u.push_back(v);
    }
    double mean = std::accumulate(u.begin(), u.end(),
                                          double(0)) / u.size();
    double var = 0;
    double skew = 0;
    double kurtosis = 0;
    for (unsigned i = 0; i < u.size(); ++i)
    {
        double dbl = (u[i] - mean);
        double d2 = sqr(dbl);
        var += d2;
        skew += dbl * d2;
        kurtosis += d2 * d2;
    }
    var /= u.size();
    double dev = std::sqrt(var);
    skew /= u.size() * dev * var;
    kurtosis /= u.size() * var * var;
    kurtosis -= 3;
    double x_mean = d.k() * (1 - d.p()) / d.p();
    double x_var = x_mean / d.p();
    double x_skew = (2 - d.p()) / std::sqrt(d.k() * (1 - d.p()));
    double x_kurtosis = 6. / d.k() + sqr(d.p()) / (d.k() * (1 - d.p()));
    assert(std::abs((mean - x_mean) / x_mean) < 0.01);
    assert(std::abs((var - x_var) / x_var) < 0.01);
    assert(std::abs((skew - x_skew) / x_skew) < 0.01);
    assert(std::abs((kurtosis - x_kurtosis) / x_kurtosis) < 0.03);
}

template <class T>
void tests() {
    test2<T>();
    test3<T>();
    test5<T>();
    test6<T>();
}

int main(int, char**) {
    tests<int8_t>();
    tests<uint8_t>();
    return 0;
}

Metadata

Metadata

Assignees

No one assigned

    Labels

    enhancementImproving things as opposed to bug fixing, e.g. new or missing featureImproving things as opposed to bug fixing, e.g. new or missing featurelibc++libc++ C++ Standard Library. Not GNU libstdc++. Not libc++abi.libc++ C++ Standard Library. Not GNU libstdc++. Not libc++abi.random

    Type

    No type

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions

      Morty Proxy This is a proxified and sanitized view of the page, visit original site.