Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Appearance settings

Commit fd3a80f

Browse filesBrowse files
committed
Merge pull request scikit-learn#2067 from jnothman/test_binarizer
TST additional tests for preprocessing.Binarizer
2 parents 7bc5d1a + ebff9a3 commit fd3a80f
Copy full SHA for fd3a80f

File tree

Expand file treeCollapse file tree

2 files changed

+22
-4
lines changed
Filter options
Expand file treeCollapse file tree

2 files changed

+22
-4
lines changed

‎sklearn/preprocessing.py

Copy file name to clipboardExpand all lines: sklearn/preprocessing.py
+5Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -535,6 +535,7 @@ def binarize(X, threshold=0.0, copy=True):
535535
536536
threshold : float, optional (0.0 by default)
537537
The lower bound that triggers feature values to be replaced by 1.0.
538+
The threshold cannot be less than 0 for operations on sparse matrices.
538539
539540
copy : boolean, optional, default is True
540541
set to False to perform inplace binarization and avoid a copy
@@ -554,6 +555,9 @@ def binarize(X, threshold=0.0, copy=True):
554555

555556
X = check_arrays(X, sparse_format=sparse_format, copy=copy)[0]
556557
if sp.issparse(X):
558+
if threshold < 0:
559+
raise ValueError('Cannot binarize a sparse matrix with threshold '
560+
'< 0')
557561
cond = X.data > threshold
558562
not_cond = np.logical_not(cond)
559563
X.data[cond] = 1
@@ -585,6 +589,7 @@ class Binarizer(BaseEstimator, TransformerMixin):
585589
----------
586590
threshold : float, optional (0.0 by default)
587591
The lower bound that triggers feature values to be replaced by 1.0.
592+
The threshold cannot be less than 0 for operations on sparse matrices.
588593
589594
copy : boolean, optional, default is True
590595
set to False to perform inplace binarization and avoid a copy (if

‎sklearn/tests/test_preprocessing.py

Copy file name to clipboardExpand all lines: sklearn/tests/test_preprocessing.py
+17-4Lines changed: 17 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -421,9 +421,9 @@ def test_normalize_errors():
421421

422422

423423
def test_binarizer():
424-
X_ = np.array([[1, 0, 5], [2, 3, 0]])
424+
X_ = np.array([[1, 0, 5], [2, 3, -1]])
425425

426-
for init in (np.array, sp.csr_matrix, sp.csc_matrix):
426+
for init in (np.array, list, sp.csr_matrix, sp.csc_matrix):
427427

428428
X = init(X_.copy())
429429

@@ -432,7 +432,7 @@ def test_binarizer():
432432
assert_equal(np.sum(X_bin == 0), 4)
433433
assert_equal(np.sum(X_bin == 1), 2)
434434
X_bin = binarizer.transform(X)
435-
assert_equal(type(X), type(X_bin))
435+
assert_equal(sp.issparse(X), sp.issparse(X_bin))
436436

437437
binarizer = Binarizer(copy=True).fit(X)
438438
X_bin = toarray(binarizer.transform(X))
@@ -449,11 +449,24 @@ def test_binarizer():
449449

450450
binarizer = Binarizer(copy=False)
451451
X_bin = binarizer.transform(X)
452-
assert_true(X_bin is X)
452+
if init is not list:
453+
assert_true(X_bin is X)
453454
X_bin = toarray(X_bin)
454455
assert_equal(np.sum(X_bin == 0), 2)
455456
assert_equal(np.sum(X_bin == 1), 4)
456457

458+
binarizer = Binarizer(threshold=-0.5, copy=True)
459+
for init in (np.array, list):
460+
X = init(X_.copy())
461+
462+
X_bin = toarray(binarizer.transform(X))
463+
assert_equal(np.sum(X_bin == 0), 1)
464+
assert_equal(np.sum(X_bin == 1), 5)
465+
X_bin = binarizer.transform(X)
466+
467+
# Cannot use threshold < 0 for sparse
468+
assert_raises(ValueError, binarizer.transform, sp.csc_matrix(X))
469+
457470

458471
def test_label_binarizer():
459472
lb = LabelBinarizer()

0 commit comments

Comments
0 (0)
Morty Proxy This is a proxified and sanitized view of the page, visit original site.