kanghaiyang
diff --git a/‎doc/developers/utilities.rst
Copy file name to clipboardExpand all lines: doc/developers/utilities.rst
+3Lines changed: 3 additions & 0 deletions b/‎doc/developers/utilities.rst
Copy file name to clipboardExpand all lines: doc/developers/utilities.rst
+3Lines changed: 3 additions & 0 deletions
diff --git a/‎sklearn/ensemble/base.py
Copy file name to clipboardExpand all lines: sklearn/ensemble/base.py
+4Lines changed: 4 additions & 0 deletions b/‎sklearn/ensemble/base.py
Copy file name to clipboardExpand all lines: sklearn/ensemble/base.py
+4Lines changed: 4 additions & 0 deletions
diff --git a/‎sklearn/tests/test_common.py
Copy file name to clipboardExpand all lines: sklearn/tests/test_common.py
+27-49Lines changed: 27 additions & 49 deletions b/‎sklearn/tests/test_common.py
Copy file name to clipboardExpand all lines: sklearn/tests/test_common.py
+27-49Lines changed: 27 additions & 49 deletions
diff --git a/‎sklearn/utils/testing.py
Copy file name to clipboardExpand all lines: sklearn/utils/testing.py
+71-2Lines changed: 71 additions & 2 deletions b/‎sklearn/utils/testing.py
Copy file name to clipboardExpand all lines: sklearn/utils/testing.py
+71-2Lines changed: 71 additions & 2 deletions
@@ -256,6 +256,9 @@ Testing Functions
 - :class:`mock_urllib2`: Object which mocks the urllib2 module to fake
   requests of mldata.  Used in tests of :mod:`sklearn.datasets`.
 
+- :func:`testing.all_estimators` : returns a list of all estimators in
+  sklearn to test for consistent behavior and interfaces.
+
 
 Helper Functions
 ================
 
@@ -5,6 +5,8 @@
 # Authors: Gilles Louppe
 # License: BSD 3
 
+from abc import ABCMeta, abstractmethod
+
 from ..base import clone
 from ..base import BaseEstimator
 from ..base import MetaEstimatorMixin
@@ -28,7 +30,9 @@ class BaseEnsemble(BaseEstimator, MetaEstimatorMixin):
         The list of attributes to use as parameters when instantiating a
         new base estimator. If none are given, default parameters are used.
     """
+    __metaclass__ = ABCMeta
 
+    @abstractmethod
     def __init__(self, base_estimator, n_estimators=10,
                  estimator_params=tuple()):
 
 
@@ -20,6 +20,7 @@
 from sklearn.utils.testing import assert_array_equal
 from sklearn.utils.testing import assert_array_almost_equal
 from sklearn.utils.testing import all_estimators
+from sklearn.utils.testing import meta_estimators
 from sklearn.utils.testing import set_random_state
 from sklearn.utils.testing import assert_greater
 
@@ -34,14 +35,10 @@
 from sklearn.svm.base import BaseLibSVM
 
 # import "special" estimators
-from sklearn.grid_search import GridSearchCV
 from sklearn.decomposition import SparseCoder
-from sklearn.pipeline import Pipeline, FeatureUnion
 from sklearn.pls import _PLS, PLSCanonical, PLSRegression, CCA, PLSSVD
-from sklearn.ensemble import BaseEnsemble, RandomTreesEmbedding
-from sklearn.multiclass import (OneVsOneClassifier, OneVsRestClassifier,
-                                OutputCodeClassifier)
-from sklearn.feature_selection import RFE, RFECV, SelectKBest
+from sklearn.ensemble import RandomTreesEmbedding
+from sklearn.feature_selection import SelectKBest
 from sklearn.dummy import DummyClassifier, DummyRegressor
 from sklearn.naive_bayes import MultinomialNB, BernoulliNB
 from sklearn.covariance import EllipticEnvelope, EllipticEnvelop
@@ -56,19 +53,16 @@
 from sklearn.random_projection import (GaussianRandomProjection,
                                        SparseRandomProjection)
 
-dont_test = [Pipeline, FeatureUnion, GridSearchCV, SparseCoder,
-             EllipticEnvelope, EllipticEnvelop, DictVectorizer, LabelBinarizer,
-             LabelEncoder, TfidfTransformer, IsotonicRegression, OneHotEncoder,
-             RandomTreesEmbedding, FeatureHasher, DummyClassifier,
-             DummyRegressor]
-meta_estimators = [BaseEnsemble, OneVsOneClassifier, OutputCodeClassifier,
-                   OneVsRestClassifier, RFE, RFECV]
+dont_test = [SparseCoder, EllipticEnvelope, EllipticEnvelop, DictVectorizer,
+             LabelBinarizer, LabelEncoder, TfidfTransformer,
+             IsotonicRegression, OneHotEncoder, RandomTreesEmbedding,
+             FeatureHasher, DummyClassifier, DummyRegressor]
 
 
 def test_all_estimators():
     # Test that estimators are default-constructible, clonable
     # and have working repr.
-    estimators = all_estimators()
+    estimators = all_estimators(include_meta_estimators=True)
     clf = LDA()
 
     for name, E in estimators:
@@ -78,7 +72,7 @@ def test_all_estimators():
         # test default-constructibility
         # get rid of deprecation warnings
         with warnings.catch_warnings(record=True):
-            if E in meta_estimators:
+            if name in meta_estimators:
                 e = E(clf)
             else:
                 e = E()
@@ -101,7 +95,7 @@ def test_all_estimators():
                 # true for mixins
                 continue
             params = e.get_params()
-            if E in meta_estimators:
+            if name in meta_estimators:
                 # they need a non-default argument
                 args = args[2:]
             else:
@@ -130,7 +124,7 @@ def test_estimators_sparse_data():
     estimators = [(name, E) for name, E in estimators
                   if issubclass(E, (ClassifierMixin, RegressorMixin))]
     for name, Clf in estimators:
-        if Clf in dont_test or Clf in meta_estimators:
+        if Clf in dont_test:
             continue
         # catch deprecation warnings
         with warnings.catch_warnings(record=True):
@@ -154,9 +148,7 @@ def test_estimators_sparse_data():
 def test_transformers():
     # test if transformers do something sensible on training set
     # also test all shapes / shape errors
-    estimators = all_estimators()
-    transformers = [(name, E) for name, E in estimators
-                    if issubclass(E, TransformerMixin)]
+    transformers = all_estimators(type_filter='transformer')
     X, y = make_blobs(n_samples=30, centers=[[0, 0, 0], [1, 1, 1]],
                       random_state=0, n_features=2, cluster_std=0.1)
     n_samples, n_features = X.shape
@@ -168,7 +160,7 @@ def test_transformers():
     for name, Trans in transformers:
         trans = None
 
-        if Trans in dont_test or Trans in meta_estimators:
+        if Trans in dont_test:
             continue
         # these don't actually fit the data:
         if Trans in [AdditiveChi2Sampler, Binarizer, Normalizer]:
@@ -244,11 +236,9 @@ def test_transformers_sparse_data():
     X[X < .8] = 0
     X = sparse.csr_matrix(X)
     y = (4 * rng.rand(40)).astype(np.int)
-    estimators = all_estimators()
-    estimators = [(name, E) for name, E in estimators
-                  if issubclass(E, TransformerMixin)]
+    estimators = all_estimators(type_filter='transformer')
     for name, Trans in estimators:
-        if Trans in dont_test or Trans in meta_estimators:
+        if Trans in dont_test:
             continue
         # catch deprecation warnings
         with warnings.catch_warnings(record=True):
@@ -302,7 +292,7 @@ def test_estimators_nan_inf():
                               " transform.")
     for X_train in [X_train_nan, X_train_inf]:
         for name, Est in estimators:
-            if Est in dont_test or Est in meta_estimators:
+            if Est in dont_test:
                 continue
             if Est in (_PLS, PLSCanonical, PLSRegression, CCA, PLSSVD):
                 continue
@@ -383,14 +373,12 @@ def test_classifiers_one_label():
     X_train = rnd.uniform(size=(10, 3))
     X_test = rnd.uniform(size=(10, 3))
     y = np.ones(10)
-    estimators = all_estimators()
-    classifiers = [(name, E) for name, E in estimators
-                   if issubclass(E, ClassifierMixin)]
+    classifiers = all_estimators(type_filter='classifier')
     error_string_fit = "Classifier can't train when only one class is present."
     error_string_predict = ("Classifier can't predict when only one class is "
                             "present.")
     for name, Clf in classifiers:
-        if Clf in dont_test or Clf in meta_estimators:
+        if Clf in dont_test:
             continue
         # catch deprecation warnings
         with warnings.catch_warnings(record=True):
@@ -420,9 +408,7 @@ def test_classifiers_one_label():
 def test_clustering():
     # test if clustering algorithms do something sensible
     # also test all shapes / shape errors
-    estimators = all_estimators()
-    clustering = [(name, E) for name, E in estimators
-                  if issubclass(E, ClusterMixin)]
+    clustering = all_estimators(type_filter='cluster')
     iris = load_iris()
     X, y = iris.data, iris.target
     X, y = shuffle(X, y, random_state=7)
@@ -460,9 +446,7 @@ def test_clustering():
 def test_classifiers_train():
     # test if classifiers do something sensible on training set
     # also test all shapes / shape errors
-    estimators = all_estimators()
-    classifiers = [(name, E) for name, E in estimators
-                   if issubclass(E, ClassifierMixin)]
+    classifiers = all_estimators(type_filter='classifier')
     X_m, y_m = make_blobs(random_state=0)
     X_m, y_m = shuffle(X_m, y_m, random_state=7)
     X_m = StandardScaler().fit_transform(X_m)
@@ -475,7 +459,7 @@ def test_classifiers_train():
         n_classes = len(classes)
         n_samples, n_features = X.shape
         for name, Clf in classifiers:
-            if Clf in dont_test or Clf in meta_estimators:
+            if Clf in dont_test:
                 continue
             if Clf in [MultinomialNB, BernoulliNB]:
                 # TODO also test these!
@@ -538,17 +522,15 @@ def test_classifiers_train():
 
 def test_classifiers_classes():
     # test if classifiers can cope with non-consecutive classes
-    estimators = all_estimators()
-    classifiers = [(name, E) for name, E in estimators
-                   if issubclass(E, ClassifierMixin)]
+    classifiers = all_estimators(type_filter='classifier')
     X, y = make_blobs(random_state=12345)
     X, y = shuffle(X, y, random_state=7)
     X = StandardScaler().fit_transform(X)
     y = 2 * y + 1
     # TODO: make work with next line :)
     #y = y.astype(np.str)
     for name, Clf in classifiers:
-        if Clf in dont_test or Clf in meta_estimators:
+        if Clf in dont_test:
             continue
         if Clf in [MultinomialNB, BernoulliNB]:
             # TODO also test these!
@@ -569,16 +551,14 @@ def test_classifiers_classes():
 def test_regressors_int():
     # test if regressors can cope with integer labels (by converting them to
     # float)
-    estimators = all_estimators()
-    regressors = [(name, E) for name, E in estimators
-                  if issubclass(E, RegressorMixin)]
+    regressors = all_estimators(type_filter='regressor')
     boston = load_boston()
     X, y = boston.data, boston.target
     X, y = shuffle(X, y, random_state=0)
     X = StandardScaler().fit_transform(X)
     y = np.random.randint(2, size=X.shape[0])
     for name, Reg in regressors:
-        if Reg in dont_test or Reg in meta_estimators or Reg in (CCA,):
+        if Reg in dont_test or Reg in (CCA,):
             continue
         # catch deprecation warnings
         with warnings.catch_warnings(record=True):
@@ -603,9 +583,7 @@ def test_regressors_int():
 
 
 def test_regressors_train():
-    estimators = all_estimators()
-    regressors = [(name, E) for name, E in estimators
-                  if issubclass(E, RegressorMixin)]
+    regressors = all_estimators(type_filter='regressor')
     boston = load_boston()
     X, y = boston.data, boston.target
     X, y = shuffle(X, y, random_state=0)
@@ -615,7 +593,7 @@ def test_regressors_train():
     y = StandardScaler().fit_transform(y)
     succeeded = True
     for name, Reg in regressors:
-        if Reg in dont_test or Reg in meta_estimators:
+        if Reg in dont_test:
             continue
         # catch deprecation warnings
         with warnings.catch_warnings(record=True):
 
@@ -34,6 +34,9 @@
 from numpy.testing import assert_array_almost_equal
 from numpy.testing import assert_array_less
 
+from sklearn.base import (ClassifierMixin, RegressorMixin, TransformerMixin,
+                          ClusterMixin)
+
 __all__ = ["assert_equal", "assert_not_equal", "assert_raises", "raises",
            "with_setup", "assert_true", "assert_false", "assert_almost_equal",
            "assert_array_equal", "assert_array_almost_equal",
@@ -160,8 +163,48 @@ def urlopen(self, urlname):
     def quote(self, string, safe='/'):
         return urllib2.quote(string, safe)
 
+# Meta estimators need another estimator to be instantiated.
+meta_estimators = ["OneVsOneClassifier",
+                   "OutputCodeClassifier", "OneVsRestClassifier", "RFE",
+                   "RFECV"]
+# estimators that there is no way to default-construct sensibly
+other = ["Pipeline", "FeatureUnion", "GridSearchCV"]
+
+
+def all_estimators(include_meta_estimators=False, include_other=False,
+                   type_filter=None):
+    """Get a list of all estimators from sklearn.
 
-def all_estimators():
+    This function crawls the module and gets all classes that inherit
+    from BaseEstimator. Classes that are defined in test-modules are not
+    included.
+    By default meta_estimators such as GridSearchCV are also not included.
+
+    Parameters
+    ----------
+    include_meta_estimators : boolean, default=False
+        Whether to include meta-estimators that can be constructed using
+        an estimator as their first argument. These are currently
+        OneVsOneClassifier, OutputCodeClassifier, OneVsRestClassifier, RFE,
+        RFECV.
+
+    include_others : boolean, default=False
+        Wether to include meta-estimators that are somehow special and can
+        not be default-constructed sensibly. These are currently
+        Pipeline, FeatureUnion and GridSearchCV
+
+    type_filter : string or None, default=None
+        Which kind of estimators should be returned. If None, no filter is
+        applied and all estimators are returned.  Possible values are
+        'classifier', 'regressor', 'cluster' and 'transformer' to get
+        estimators only of these specific types.
+
+    Returns
+    -------
+    estimators : list of tuples
+        List of (name, class), where ``name`` is the class name as string
+        and ``class`` is the actuall type of the class.
+    """
     def is_abstract(c):
         if not(hasattr(c, '__abstractmethods__')):
             return False
@@ -182,9 +225,35 @@ def is_abstract(c):
 
     all_classes = set(all_classes)
 
-    estimators = [c for c in all_classes if issubclass(c[1], BaseEstimator)]
+    estimators = [c for c in all_classes
+                  if (issubclass(c[1], BaseEstimator)
+                      and c[0] != 'BaseEstimator')]
     # get rid of abstract base classes
     estimators = [c for c in estimators if not is_abstract(c[1])]
+
+    if not include_other:
+        estimators = [c for c in estimators if not c[0] in other]
+    # possibly get rid of meta estimators
+    if not include_meta_estimators:
+        estimators = [c for c in estimators if not c[0] in meta_estimators]
+
+    if type_filter == 'classifier':
+        estimators = [est for est in estimators
+                      if issubclass(est[1], ClassifierMixin)]
+    elif type_filter == 'regressor':
+        estimators = [est for est in estimators
+                      if issubclass(est[1], RegressorMixin)]
+    elif type_filter == 'transformer':
+        estimators = [est for est in estimators
+                      if issubclass(est[1], TransformerMixin)]
+    elif type_filter == 'cluster':
+        estimators = [est for est in estimators
+                      if issubclass(est[1], ClusterMixin)]
+    elif type_filter is not None:
+        raise ValueError("Parmeter type_filter must be 'classifier', "
+                         "'regressor', 'transformer', 'cluster' or None, got"
+                         " %s." % repr(type_filter))
+
     # We sort in order to have reproducible test failures
     return sorted(estimators)