-
-
Notifications
You must be signed in to change notification settings - Fork 10.9k
BUG: quantile should error when weights are all zeros #28595
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from 1 commit
293ea24
ae8cd54
565fc75
8309d16
04d05f6
c76b5ad
bfcec09
f06a1f8
1303b3c
ad95df2
dc14e6b
8eeed6a
4fe3444
ba2c398
2e8e2ea
3a20796
40075b3
a83887b
3d1c7b0
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
- Loading branch information
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -4535,21 +4535,26 @@ def quantile(a, | |
if axis is not None: | ||
axis = _nx.normalize_axis_tuple(axis, a.ndim, argname="axis") | ||
weights = _weights_are_valid(weights=weights, a=a, axis=axis) | ||
if np.any(weights < 0): | ||
raise ValueError("Weights must be non-negative.") | ||
elif np.all(weights == 0): | ||
raise ValueError("At least one weight must be non-zero") | ||
if weights.dtype != object: | ||
if np.any(np.isinf(weights)): | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Another general comment: as written, the common case has to go through a lot of checks. I think it would be better to optimize for the common case, and not worry too much about distinguishing failure cases. E.g., you can do just one evaluation with:
|
||
raise ValueError("Weights must be non-infinite") | ||
raise ValueError("Weights must be non-infinite.") | ||
elif np.any(np.isnan(weights)): | ||
raise ValueError("At least one weight is nan") | ||
raise ValueError("At least one weight is nan.") | ||
# Since np.isinf and np.isnan do not work in dtype object arrays | ||
# Also, dtpye object arrays with np.nan in them break <, > and == opperators | ||
# This specific handling had to be done (Can be improved) | ||
elif weights.dtype == object: | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Note that this loop can still give unexpected errors, because you are here counting on object arrays to be turned into their values as scalars. E.g.,
This will be an uninformative error! I think we have two choices: just not check for |
||
for w in weights: | ||
if np.isnan(w): | ||
raise ValueError("At least one weight is nan") | ||
raise ValueError("At least one weight is nan.") | ||
if np.isinf(w): | ||
raise ValueError("Weights must be non-infinite") | ||
raise ValueError("Weights must be non-infinite.") | ||
|
||
if np.any(weights < 0): | ||
raise ValueError("Weights must be non-negative.") | ||
elif np.all(weights == 0): | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Here again we could ensure the common case remains fast by doing:
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Trying to keep this inline:
You're right, I was too sloppy in writing this, the p.s. Given this, I'd probably swap the order, i.e.,
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @Tontonio3 I don't see how you responded to this suggestion. Please make sure all reviewer feedback is addressed before requesting re-review. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @ngoldbaum Your're right, I forgot to implement this |
||
raise ValueError("At least one weight must be non-zero.") | ||
|
||
|
||
return _quantile_unchecked( | ||
a, q, axis, out, overwrite_input, method, keepdims, weights) | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -4142,58 +4142,25 @@ def test_closest_observation(self): | |
assert_equal(4, np.quantile(arr[0:9], q, method=m)) | ||
assert_equal(5, np.quantile(arr, q, method=m)) | ||
|
||
def test_inf_err(self): | ||
|
||
m = "inverted_cdf" | ||
q = 0.5 | ||
arr = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10] | ||
wgt = np.ones(10) | ||
|
||
for i in range(len(arr)): | ||
wgt[i] = np.inf | ||
with pytest.raises(ValueError) as ex: | ||
a = np.quantile(arr, q, weights=wgt, method=m) | ||
assert "Weights must be non-infinite" in str(ex) | ||
wgt[i] = 1 | ||
|
||
for i in range(len(arr)): | ||
wgt[i] = np.inf | ||
with pytest.raises(ValueError) as ex: | ||
a = np.quantile(arr, q, weights=wgt, method=m) | ||
assert "Weights must be non-infinite" in str(ex) | ||
|
||
def test_nan_err(self): | ||
@pytest.mark.parametrize(["err_msg", "weight"], | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I'd parametrize over There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Also, if you pass in a list rather than an array, you could parametrize over There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Done |
||
[("Weights must be non-infinite.", np.array([1,np.inf, 1, 1])), | ||
("Weights must be non-infinite.", np.array([1,np.inf, 1, 1], dtype=object)), | ||
("Weights must be non-infinite.", np.array([1,-np.inf, 1, 1])), | ||
("Weights must be non-infinite.", np.array([1,-np.inf, 1, 1], dtype=object)), | ||
("Weights must be non-infinite.", np.array([1,np.inf, 1, np.inf])), | ||
("At least one weight is nan.", np.array([1,np.nan, 1, 1])), | ||
("At least one weight is nan.", np.array([1,np.nan, 1, 1], dtype=object)), | ||
("At least one weight is nan.", np.array([1,np.nan, np.nan, 1])), | ||
("At least one weight must be non-zero.", np.zeros(4))]) | ||
def test_inf_nan_err(self, err_msg, weight): | ||
|
||
m = "inverted_cdf" | ||
q = 0.5 | ||
arr = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10] | ||
wgt = np.ones(10) | ||
|
||
for i in range(len(arr)): | ||
wgt[i] = np.nan | ||
with pytest.raises(ValueError) as ex: | ||
a = np.quantile(arr, q, weights=wgt, method=m) | ||
assert "At least one weight is nan" in str(ex) | ||
wgt[i] = 1 | ||
|
||
for i in range(len(arr)): | ||
wgt[i] = np.nan | ||
with pytest.raises(ValueError) as ex: | ||
a = np.quantile(arr, q, weights=wgt, method=m) | ||
assert "At least one weight is nan" in str(ex) | ||
|
||
def test_all_zeroes_err(self): | ||
|
||
m = "inverted_cdf" | ||
q = 0.5 | ||
arr = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10] | ||
wgt = np.zeros(10) | ||
with pytest.raises(ValueError) as ex: | ||
a = np.quantile(arr, q, weights=wgt, method=m) | ||
|
||
assert "At least one weight must be non-zero" in str(ex) | ||
|
||
|
||
arr = [1, 2, 3, 4] | ||
with pytest.raises(ValueError, match=err_msg): | ||
a = np.quantile(arr, q, weights=weight, method=m) | ||
|
||
class TestLerp: | ||
@hypothesis.given(t0=st.floats(allow_nan=False, allow_infinity=False, | ||
min_value=0, max_value=1), | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
A general comment: I think these checks should happen inside
_weights_ar_valid
- this will ensure they are used forpercentile
as well.