Open
Description
The following tests fail miserably for std::negative_binomial_distribution
on int8_t
and uint8_t
. We have similar problems for std::poisson_distribution
.
Basically, I think we need to re-think the algorithm we use for std::poisson_distribution
(which is what negative_binomial_distribution
uses too). That might require an ABI break, however breaking the ABI of random distributions might be acceptable.
#include <random>
#include <numeric>
#include <vector>
#include <cassert>
template <class T>
T sqr(T x) {
return x * x;
}
template <class T>
void test2() {
typedef std::negative_binomial_distribution<T> D;
typedef std::mt19937 G;
G g;
D d(30, .03125);
const int N = 1000000;
std::vector<typename D::result_type> u;
for (int i = 0; i < N; ++i)
{
typename D::result_type v = d(g);
assert(d.min() <= v && v <= d.max());
u.push_back(v);
}
double mean = std::accumulate(u.begin(), u.end(),
double(0)) / u.size();
double var = 0;
double skew = 0;
double kurtosis = 0;
for (unsigned i = 0; i < u.size(); ++i)
{
double dbl = (u[i] - mean);
double d2 = sqr(dbl);
var += d2;
skew += dbl * d2;
kurtosis += d2 * d2;
}
var /= u.size();
double dev = std::sqrt(var);
skew /= u.size() * dev * var;
kurtosis /= u.size() * var * var;
kurtosis -= 3;
double x_mean = d.k() * (1 - d.p()) / d.p();
double x_var = x_mean / d.p();
double x_skew = (2 - d.p()) / std::sqrt(d.k() * (1 - d.p()));
double x_kurtosis = 6. / d.k() + sqr(d.p()) / (d.k() * (1 - d.p()));
assert(std::abs((mean - x_mean) / x_mean) < 0.01);
assert(std::abs((var - x_var) / x_var) < 0.01);
assert(std::abs((skew - x_skew) / x_skew) < 0.01);
assert(std::abs((kurtosis - x_kurtosis) / x_kurtosis) < 0.01);
}
template <class T>
void test3() {
typedef std::negative_binomial_distribution<T> D;
typedef std::mt19937 G;
G g;
D d(40, .25);
const int N = 1000000;
std::vector<typename D::result_type> u;
for (int i = 0; i < N; ++i)
{
typename D::result_type v = d(g);
assert(d.min() <= v && v <= d.max());
u.push_back(v);
}
double mean = std::accumulate(u.begin(), u.end(),
double(0)) / u.size();
double var = 0;
double skew = 0;
double kurtosis = 0;
for (unsigned i = 0; i < u.size(); ++i)
{
double dbl = (u[i] - mean);
double d2 = sqr(dbl);
var += d2;
skew += dbl * d2;
kurtosis += d2 * d2;
}
var /= u.size();
double dev = std::sqrt(var);
skew /= u.size() * dev * var;
kurtosis /= u.size() * var * var;
kurtosis -= 3;
double x_mean = d.k() * (1 - d.p()) / d.p();
double x_var = x_mean / d.p();
double x_skew = (2 - d.p()) / std::sqrt(d.k() * (1 - d.p()));
double x_kurtosis = 6. / d.k() + sqr(d.p()) / (d.k() * (1 - d.p()));
assert(std::abs((mean - x_mean) / x_mean) < 0.01);
assert(std::abs((var - x_var) / x_var) < 0.01);
assert(std::abs((skew - x_skew) / x_skew) < 0.01);
assert(std::abs((kurtosis - x_kurtosis) / x_kurtosis) < 0.03);
}
template <class T>
void test5() {
typedef std::negative_binomial_distribution<T> D;
typedef std::mt19937 G;
G g;
D d(127, 0.5);
const int N = 1000000;
std::vector<typename D::result_type> u;
for (int i = 0; i < N; ++i)
{
typename D::result_type v = d(g);
assert(d.min() <= v && v <= d.max());
u.push_back(v);
}
double mean = std::accumulate(u.begin(), u.end(),
double(0)) / u.size();
double var = 0;
double skew = 0;
double kurtosis = 0;
for (unsigned i = 0; i < u.size(); ++i)
{
double dbl = (u[i] - mean);
double d2 = sqr(dbl);
var += d2;
skew += dbl * d2;
kurtosis += d2 * d2;
}
var /= u.size();
double dev = std::sqrt(var);
skew /= u.size() * dev * var;
kurtosis /= u.size() * var * var;
kurtosis -= 3;
double x_mean = d.k() * (1 - d.p()) / d.p();
double x_var = x_mean / d.p();
double x_skew = (2 - d.p()) / std::sqrt(d.k() * (1 - d.p()));
double x_kurtosis = 6. / d.k() + sqr(d.p()) / (d.k() * (1 - d.p()));
assert(std::abs((mean - x_mean) / x_mean) < 0.01);
assert(std::abs((var - x_var) / x_var) < 0.01);
assert(std::abs((skew - x_skew) / x_skew) < 0.04);
assert(std::abs((kurtosis - x_kurtosis) / x_kurtosis) < 0.05);
}
template <class T>
void test6() {
typedef std::negative_binomial_distribution<T> D;
typedef std::mt19937 G;
G g;
D d(1, 0.05);
const int N = 1000000;
std::vector<typename D::result_type> u;
for (int i = 0; i < N; ++i)
{
typename D::result_type v = d(g);
assert(d.min() <= v && v <= d.max());
u.push_back(v);
}
double mean = std::accumulate(u.begin(), u.end(),
double(0)) / u.size();
double var = 0;
double skew = 0;
double kurtosis = 0;
for (unsigned i = 0; i < u.size(); ++i)
{
double dbl = (u[i] - mean);
double d2 = sqr(dbl);
var += d2;
skew += dbl * d2;
kurtosis += d2 * d2;
}
var /= u.size();
double dev = std::sqrt(var);
skew /= u.size() * dev * var;
kurtosis /= u.size() * var * var;
kurtosis -= 3;
double x_mean = d.k() * (1 - d.p()) / d.p();
double x_var = x_mean / d.p();
double x_skew = (2 - d.p()) / std::sqrt(d.k() * (1 - d.p()));
double x_kurtosis = 6. / d.k() + sqr(d.p()) / (d.k() * (1 - d.p()));
assert(std::abs((mean - x_mean) / x_mean) < 0.01);
assert(std::abs((var - x_var) / x_var) < 0.01);
assert(std::abs((skew - x_skew) / x_skew) < 0.01);
assert(std::abs((kurtosis - x_kurtosis) / x_kurtosis) < 0.03);
}
template <class T>
void tests() {
test2<T>();
test3<T>();
test5<T>();
test6<T>();
}
int main(int, char**) {
tests<int8_t>();
tests<uint8_t>();
return 0;
}
Metadata
Metadata
Assignees
Labels
Improving things as opposed to bug fixing, e.g. new or missing featureImproving things as opposed to bug fixing, e.g. new or missing featurelibc++ C++ Standard Library. Not GNU libstdc++. Not libc++abi.libc++ C++ Standard Library. Not GNU libstdc++. Not libc++abi.