Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Appearance settings

Commit bc7cd31

Browse filesBrowse files
authored
ENH more efficient _num_combinations calculation in PolynomialFeatures (#19734)
1 parent 108dd7b commit bc7cd31
Copy full SHA for bc7cd31

File tree

3 files changed

+54
-6
lines changed
Filter options

3 files changed

+54
-6
lines changed

‎doc/whats_new/v1.0.rst

Copy file name to clipboardExpand all lines: doc/whats_new/v1.0.rst
+4Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -200,6 +200,10 @@ Changelog
200200
:pr:`19426` by :user:`Alexandre Gramfort <agramfort>` and
201201
:user:`Maria Telenczuk <maikia>`.
202202

203+
- |Efficiency| The implementation of `fit` for `PolynomialFeatures` transformer
204+
is now faster. This is especially noticeable on large sparse input.
205+
:pr:`19734` by :user:`Fred Robinson <frrad>`.
206+
203207
:mod:`sklearn.manifold`
204208
.......................
205209

‎sklearn/preprocessing/_polynomial.py

Copy file name to clipboardExpand all lines: sklearn/preprocessing/_polynomial.py
+29-6Lines changed: 29 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88
import numpy as np
99
from scipy import sparse
1010
from scipy.interpolate import BSpline
11+
from scipy.special import comb
1112

1213
from ..base import BaseEstimator, TransformerMixin
1314
from ..utils import check_array
@@ -113,6 +114,29 @@ def _combinations(n_features, degree, interaction_only, include_bias):
113114
return chain.from_iterable(comb(range(n_features), i)
114115
for i in range(start, degree + 1))
115116

117+
@staticmethod
118+
def _num_combinations(n_features, degree, interaction_only, include_bias):
119+
"""Calculate number of terms in polynomial expansion
120+
121+
This should be equivalent to counting the number of terms returned by
122+
_combinations(...) but much faster.
123+
"""
124+
125+
if interaction_only:
126+
combinations = sum(
127+
[
128+
comb(n_features, i, exact=True)
129+
for i in range(1, min(degree + 1, n_features + 1))
130+
]
131+
)
132+
else:
133+
combinations = comb(n_features + degree, degree, exact=True) - 1
134+
135+
if include_bias:
136+
combinations += 1
137+
138+
return combinations
139+
116140
@property
117141
def powers_(self):
118142
check_is_fitted(self)
@@ -170,13 +194,12 @@ def fit(self, X, y=None):
170194
self : object
171195
Fitted transformer.
172196
"""
173-
n_samples, n_features = self._validate_data(
174-
X, accept_sparse=True).shape
175-
combinations = self._combinations(n_features, self.degree,
176-
self.interaction_only,
177-
self.include_bias)
197+
_, n_features = self._validate_data(X, accept_sparse=True).shape
178198
self.n_input_features_ = n_features
179-
self.n_output_features_ = sum(1 for _ in combinations)
199+
self.n_output_features_ = self._num_combinations(
200+
n_features, self.degree, self.interaction_only, self.include_bias
201+
)
202+
180203
return self
181204

182205
def transform(self, X):

‎sklearn/preprocessing/tests/test_polynomial.py

Copy file name to clipboardExpand all lines: sklearn/preprocessing/tests/test_polynomial.py
+21Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -552,6 +552,27 @@ def test_polynomial_features_csr_X(deg, include_bias, interaction_only, dtype):
552552
assert_array_almost_equal(Xt_csr.A, Xt_dense)
553553

554554

555+
@pytest.mark.parametrize("n_features", [1, 4, 5])
556+
@pytest.mark.parametrize("degree", range(1, 5))
557+
@pytest.mark.parametrize("interaction_only", [True, False])
558+
@pytest.mark.parametrize("include_bias", [True, False])
559+
def test_num_combinations(n_features, degree, interaction_only, include_bias):
560+
"""
561+
Test that n_output_features_ is calculated correctly.
562+
"""
563+
x = sparse.csr_matrix(([1], ([0], [n_features - 1])))
564+
est = PolynomialFeatures(
565+
degree, interaction_only=interaction_only, include_bias=include_bias
566+
)
567+
est.fit(x)
568+
num_combos = est.n_output_features_
569+
570+
combos = PolynomialFeatures._combinations(
571+
n_features, degree, interaction_only, include_bias
572+
)
573+
assert num_combos == sum([1 for _ in combos])
574+
575+
555576
@pytest.mark.parametrize(['deg', 'include_bias', 'interaction_only', 'dtype'],
556577
[(2, True, False, np.float32),
557578
(2, True, False, np.float64),

0 commit comments

Comments
0 (0)
Morty Proxy This is a proxified and sanitized view of the page, visit original site.