pymc-learn
diff --git a/‎.travis.yml
Copy file name to clipboardExpand all lines: .travis.yml
+4-4Lines changed: 4 additions & 4 deletions b/‎.travis.yml
Copy file name to clipboardExpand all lines: .travis.yml
+4-4Lines changed: 4 additions & 4 deletions
diff --git a/‎pmlearn/gaussian_process/tests/test_gpr.py
Copy file name to clipboardExpand all lines: pmlearn/gaussian_process/tests/test_gpr.py
+11-10Lines changed: 11 additions & 10 deletions b/‎pmlearn/gaussian_process/tests/test_gpr.py
Copy file name to clipboardExpand all lines: pmlearn/gaussian_process/tests/test_gpr.py
+11-10Lines changed: 11 additions & 10 deletions
diff --git a/‎pmlearn/linear_model/tests/test_logistic.py
Copy file name to clipboard
+96-61Lines changed: 96 additions & 61 deletions b/‎pmlearn/linear_model/tests/test_logistic.py
Copy file name to clipboard
+96-61Lines changed: 96 additions & 61 deletions
diff --git a/‎pmlearn/mixture/tests/test_dirichlet_process.py
Copy file name to clipboardExpand all lines: pmlearn/mixture/tests/test_dirichlet_process.py
+4-4Lines changed: 4 additions & 4 deletions b/‎pmlearn/mixture/tests/test_dirichlet_process.py
Copy file name to clipboardExpand all lines: pmlearn/mixture/tests/test_dirichlet_process.py
+4-4Lines changed: 4 additions & 4 deletions
diff --git a/‎pmlearn/mixture/tests/test_gaussian_mixture.py
Copy file name to clipboardExpand all lines: pmlearn/mixture/tests/test_gaussian_mixture.py
+4-4Lines changed: 4 additions & 4 deletions b/‎pmlearn/mixture/tests/test_gaussian_mixture.py
Copy file name to clipboardExpand all lines: pmlearn/mixture/tests/test_gaussian_mixture.py
+4-4Lines changed: 4 additions & 4 deletions
@@ -22,10 +22,10 @@ install:
   - pip install coveralls travis-sphinx==2.0.0
 
 env:
-  - PYTHON_VERSION=2.7 FLOATX='float32' RUN_PYLINT="true" TESTCMD="--durations=10 --cov-append pmlearn/tests/test_base.py pmlearn/linear_model/tests/test_base.py --ignore=pmlearn/linear_model/tests/test_logistic.py --ignore=pmlearn/gaussian_process/tests/test_gpr.py --ignore=pmlearn/mixture/tests/test_gaussian_mixture.py --ignore=pmlearn/mixture/tests/test_dirichlet_process.py --ignore=pmlearn/naive_bayes/tests/test_naive_bayes.py --ignore=pmlearn/neural_network/test_multilayer_perceptron.py"
-  - PYTHON_VERSION=2.7 FLOATX='float64' RUN_PYLINT="true" TESTCMD="--durations=10 --cov-append pmlearn/tests/test_base.py pmlearn/linear_model/tests/test_base.py --ignore=pmlearn/linear_model/tests/test_logistic.py --ignore=pmlearn/gaussian_process/tests/test_gpr.py --ignore=pmlearn/mixture/tests/test_gaussian_mixture.py --ignore=pmlearn/mixture/tests/test_dirichlet_process.py --ignore=pmlearn/naive_bayes/tests/test_naive_bayes.py --ignore=pmlearn/neural_network/test_multilayer_perceptron.py"
-  - PYTHON_VERSION=3.6 FLOATX='float64' RUN_PYLINT="true" TESTCMD="--durations=10 --cov-append pmlearn/tests/test_base.py pmlearn/linear_model/tests/test_base.py --ignore=pmlearn/linear_model/tests/test_logistic.py --ignore=pmlearn/gaussian_process/tests/test_gpr.py --ignore=pmlearn/mixture/tests/test_gaussian_mixture.py --ignore=pmlearn/mixture/tests/test_dirichlet_process.py --ignore=pmlearn/naive_bayes/tests/test_naive_bayes.py --ignore=pmlearn/neural_network/test_multilayer_perceptron.py"
-  - PYTHON_VERSION=3.6 FLOATX='float64' RUN_PYLINT="true" TESTCMD="--durations=10 --cov-append pmlearn/tests/test_base.py pmlearn/linear_model/tests/test_base.py --ignore=pmlearn/linear_model/tests/test_logistic.py --ignore=pmlearn/gaussian_process/tests/test_gpr.py --ignore=pmlearn/mixture/tests/test_gaussian_mixture.py --ignore=pmlearn/mixture/tests/test_dirichlet_process.py --ignore=pmlearn/naive_bayes/tests/test_naive_bayes.py --ignore=pmlearn/neural_network/test_multilayer_perceptron.py"
+  - PYTHON_VERSION=2.7 FLOATX='float32' RUN_PYLINT="true" TESTCMD="--durations=10 --cov-append pmlearn/tests/test_base.py pmlearn/linear_model/tests/test_base.py pmlearn/linear_model/tests/test_logistic.py --ignore=pmlearn/gaussian_process/tests/test_gpr.py --ignore=pmlearn/mixture/tests/test_gaussian_mixture.py --ignore=pmlearn/mixture/tests/test_dirichlet_process.py --ignore=pmlearn/naive_bayes/tests/test_naive_bayes.py --ignore=pmlearn/neural_network/test_multilayer_perceptron.py"
+  - PYTHON_VERSION=2.7 FLOATX='float64' RUN_PYLINT="true" TESTCMD="--durations=10 --cov-append pmlearn/tests/test_base.py pmlearn/linear_model/tests/test_base.py pmlearn/linear_model/tests/test_logistic.py --ignore=pmlearn/gaussian_process/tests/test_gpr.py --ignore=pmlearn/mixture/tests/test_gaussian_mixture.py --ignore=pmlearn/mixture/tests/test_dirichlet_process.py --ignore=pmlearn/naive_bayes/tests/test_naive_bayes.py --ignore=pmlearn/neural_network/test_multilayer_perceptron.py"
+  - PYTHON_VERSION=3.6 FLOATX='float64' RUN_PYLINT="true" TESTCMD="--durations=10 --cov-append pmlearn/tests/test_base.py pmlearn/linear_model/tests/test_base.py pmlearn/linear_model/tests/test_logistic.py --ignore=pmlearn/gaussian_process/tests/test_gpr.py --ignore=pmlearn/mixture/tests/test_gaussian_mixture.py --ignore=pmlearn/mixture/tests/test_dirichlet_process.py --ignore=pmlearn/naive_bayes/tests/test_naive_bayes.py --ignore=pmlearn/neural_network/test_multilayer_perceptron.py"
+  - PYTHON_VERSION=3.6 FLOATX='float64' RUN_PYLINT="true" TESTCMD="--durations=10 --cov-append pmlearn/tests/test_base.py pmlearn/linear_model/tests/test_base.py pmlearn/linear_model/tests/test_logistic.py --ignore=pmlearn/gaussian_process/tests/test_gpr.py --ignore=pmlearn/mixture/tests/test_gaussian_mixture.py --ignore=pmlearn/mixture/tests/test_dirichlet_process.py --ignore=pmlearn/naive_bayes/tests/test_naive_bayes.py --ignore=pmlearn/neural_network/test_multilayer_perceptron.py"
 
 script:
   - . ./scripts/test.sh $TESTCMD
 
@@ -4,24 +4,25 @@
 #
 # License: BSD 3 clause
 
-import pytest
+# import pytest
 import numpy.testing as npt
 import shutil
 import tempfile
 
 import numpy as np
-import pandas as pd
+# import pandas as pd
 import pymc3 as pm
-from pymc3 import summary
-from sklearn.gaussian_process import \
-    GaussianProcessRegressor as skGaussianProcessRegressor
-from sklearn.model_selection import train_test_split
+# from pymc3 import summary
+# from sklearn.gaussian_process import \
+#     GaussianProcessRegressor as skGaussianProcessRegressor
+# from sklearn.model_selection import train_test_split
 
 
-from pmlearn.exceptions import NotFittedError
-from pmlearn.gaussian_process import (GaussianProcessRegressor,
-                                      SparseGaussianProcessRegressor,
-                                      StudentsTProcessRegressor)
+# from pmlearn.exceptions import NotFittedError
+from pmlearn.gaussian_process import (GaussianProcessRegressor)
+# ,
+#                                       SparseGaussianProcessRegressor,
+#                                       StudentsTProcessRegressor)
 
 
 class TestGaussianProcessRegressor(object):
 
@@ -1,135 +1,170 @@
+"""Testing for Logistic regression """
+
+# Authors: Daniel Emaasit <daniel.emaasit@gmail.com>
+#
+# License: BSD 3 clause
+
+import pytest
+import numpy.testing as npt
+import pandas.testing as pdt
 import shutil
 import tempfile
-import unittest
 
 import numpy as np
-import pandas as pd
 from pymc3 import summary
 from sklearn.model_selection import train_test_split
 
 from pmlearn.exceptions import NotFittedError
 from pmlearn.linear_model import HierarchicalLogisticRegression
 
 
-class HierarchicalLogisticRegressionTestCase(unittest.TestCase):
-    def setUp(self):
+class TestHierarchicalLogisticRegression(object):
+    def setup_method(self):
         def numpy_invlogit(x):
             return 1 / (1 + np.exp(-x))
 
         self.num_cats = 3
         self.num_pred = 1
-        self.num_samples_per_cat = 100000
+        self.num_samples_per_cat = 1000
 
         self.alphas = np.random.randn(self.num_cats)
         self.betas = np.random.randn(self.num_cats, self.num_pred)
-        #TODO: make this more efficient; right now, it's very explicit so I understand it.
+        # TODO: make this more efficient; right now, it's very explicit
+        # so I understand it.
         x_a = np.random.randn(self.num_samples_per_cat, self.num_pred)
-        y_a = np.random.binomial(1, numpy_invlogit(self.alphas[0] + np.sum(self.betas[0] * x_a, 1)))
+        y_a = np.random.binomial(1,
+                                 numpy_invlogit(self.alphas[0] +
+                                                np.sum(self.betas[0] * x_a, 1)
+                                                ))
         x_b = np.random.randn(self.num_samples_per_cat, self.num_pred)
-        y_b = np.random.binomial(1, numpy_invlogit(self.alphas[1] + np.sum(self.betas[1] * x_b, 1)))
+        y_b = np.random.binomial(1,
+                                 numpy_invlogit(self.alphas[1] +
+                                                np.sum(self.betas[1] * x_b, 1)
+                                                ))
         x_c = np.random.randn(self.num_samples_per_cat, self.num_pred)
-        y_c = np.random.binomial(1, numpy_invlogit(self.alphas[2] + np.sum(self.betas[2] * x_c, 1)))
+        y_c = np.random.binomial(1,
+                                 numpy_invlogit(self.alphas[2] +
+                                                np.sum(self.betas[2] * x_c, 1)
+                                                ))
 
         X = np.concatenate([x_a, x_b, x_c])
-        Y = np.concatenate([y_a, y_b, y_c])
+        y = np.concatenate([y_a, y_b, y_c])
         cats = np.concatenate([
             np.zeros(self.num_samples_per_cat, dtype=np.int),
             np.ones(self.num_samples_per_cat, dtype=np.int),
             2*np.ones(self.num_samples_per_cat, dtype=np.int)
         ])
 
-        self.X_train, self.X_test, self.cat_train, self.cat_test, self.Y_train, self.Y_test = train_test_split(
-            X, cats, Y, test_size=0.4
+        self.X_train, self.X_test, self.cat_train, self.cat_test, \
+        self.y_train, self.y_test = train_test_split(
+            X, cats, y, test_size=0.4
         )
 
-        self.test_HLR = HierarchicalLogisticRegression()
+        self.advi_hlr = HierarchicalLogisticRegression()
 
         self.test_dir = tempfile.mkdtemp()
 
-    def tearDown(self):
+    def teardown_method(self):
         shutil.rmtree(self.test_dir)
 
 
-class HierarchicalLogisticRegressionFitTestCase(HierarchicalLogisticRegressionTestCase):
-    def test_fit_returns_correct_model(self):
+class TestHierarchicalLogisticRegressionFit(TestHierarchicalLogisticRegression):
+    def test_advi_fit_returns_correct_model(self):
         # Note: print is here so PyMC3 output won't overwrite the test name
         print('')
-        self.test_HLR.fit(self.X_train, self.Y_train, self.cat_train, minibatch_size=2000)
+        self.advi_hlr.fit(self.X_train, self.y_train, self.cat_train,
+                          minibatch_size=500, inference_args={"n": 50000})
 
-        self.assertEqual(self.num_cats, self.test_HLR.num_cats)
-        self.assertEqual(self.num_pred, self.test_HLR.num_pred)
+        npt.assert_equal(self.num_cats, self.advi_hlr.num_cats)
+        npt.assert_equal(self.num_pred, self.advi_hlr.num_pred)
 
         #TODO: Figure out best way to test
-        #np.testing.assert_almost_equal(self.alphas, self.test_HLR.trace['alphas'].mean(), decimal=1)
-        #np.testing.assert_almost_equal(self.betas, self.test_HLR.trace['betas'].mean(), decimal=1)
-
-        # For now, just check that the estimated parameters have the correct signs
-        np.testing.assert_equal(
+        #np.testing.assert_almost_equal(self.alphas,
+        # self.advi_hlr.trace['alphas'].mean(), decimal=1)
+        #np.testing.assert_almost_equal(self.betas,
+        # self.advi_hlr.trace['betas'].mean(), decimal=1)
+
+        # For now, just check that the estimated parameters
+        # have the correct signs
+        npt.assert_equal(
             np.sign(self.alphas),
-            np.sign(self.test_HLR.trace['alpha'].mean(axis=0))
+            np.sign(self.advi_hlr.trace['alpha'].mean(axis=0))
         )
-        np.testing.assert_equal(
+        npt.assert_equal(
             np.sign(self.betas),
-            np.sign(self.test_HLR.trace['beta'].mean(axis=0))
+            np.sign(self.advi_hlr.trace['beta'].mean(axis=0))
         )
 
 
-class HierarchicalLogisticRegressionPredictProbaTestCase(HierarchicalLogisticRegressionTestCase):
+class TestHierarchicalLogisticRegressionPredictProba(
+    TestHierarchicalLogisticRegression):
+
     def test_predict_proba_returns_probabilities(self):
         print('')
-        self.test_HLR.fit(self.X_train, self.Y_train, self.cat_train, minibatch_size=2000)
-        probs = self.test_HLR.predict_proba(self.X_test, self.cat_test)
-        self.assertEqual(probs.shape, self.Y_test.shape)
+        self.advi_hlr.fit(self.X_train, self.y_train, self.cat_train,
+                          minibatch_size=500, inference_args={"n": 50000})
+        probs = self.advi_hlr.predict_proba(self.X_test, self.cat_test)
+        npt.assert_equal(probs.shape, self.y_test.shape)
 
     def test_predict_proba_returns_probabilities_and_std(self):
         print('')
-        self.test_HLR.fit(self.X_train, self.Y_train, self.cat_train, minibatch_size=2000)
-        probs, stds = self.test_HLR.predict_proba(self.X_test, self.cat_test, return_std=True)
-        self.assertEqual(probs.shape, self.Y_test.shape)
-        self.assertEqual(stds.shape, self.Y_test.shape)
+        self.advi_hlr.fit(self.X_train, self.y_train, self.cat_train,
+                          minibatch_size=500, inference_args={"n": 50000})
+        probs, stds = self.advi_hlr.predict_proba(self.X_test, self.cat_test,
+                                                  return_std=True)
+        npt.assert_equal(probs.shape, self.y_test.shape)
+        npt.assert_equal(stds.shape, self.y_test.shape)
 
     def test_predict_proba_raises_error_if_not_fit(self):
-        with self.assertRaises(NotFittedError) as no_fit_error:
-            test_HLR = HierarchicalLogisticRegression()
-            test_HLR.predict_proba(self.X_train, self.cat_train)
+        with pytest.raises(NotFittedError):
+            advi_hlr = HierarchicalLogisticRegression()
+            advi_hlr.predict_proba(self.X_train, self.cat_train)
 
-        expected = 'Run fit on the model before predict.'
-        self.assertEqual(str(no_fit_error.exception), expected)
 
+class TestHierarchicalLogisticRegressionPredict(
+    TestHierarchicalLogisticRegression):
 
-class HierarchicalLogisticRegressionPredictTestCase(HierarchicalLogisticRegressionTestCase):
     def test_predict_returns_predictions(self):
         print('')
-        self.test_HLR.fit(self.X_train, self.Y_train, self.cat_train, minibatch_size=2000)
-        preds = self.test_HLR.predict(self.X_test, self.cat_test)
-        self.assertEqual(preds.shape, self.Y_test.shape)
+        self.advi_hlr.fit(self.X_train, self.y_train, self.cat_train,
+                          minibatch_size=500, inference_args={"n": 50000})
+        preds = self.advi_hlr.predict(self.X_test, self.cat_test)
+        npt.assert_equal(preds.shape, self.y_test.shape)
+
 
+class TestHierarchicalLogisticRegressionScore(
+    TestHierarchicalLogisticRegression):
 
-class HierarchicalLogisticRegressionScoreTestCase(HierarchicalLogisticRegressionTestCase):
     def test_score_scores(self):
         print('')
-        self.test_HLR.fit(self.X_train, self.Y_train, self.cat_train, minibatch_size=2000)
-        score = self.test_HLR.score(self.X_test, self.Y_test, self.cat_test)
-        naive_score = np.mean(self.Y_test)
-        self.assertGreaterEqual(score, naive_score)
+        self.advi_hlr.fit(self.X_train, self.y_train, self.cat_train,
+                          minibatch_size=500, inference_args={"n": 50000})
+        score = self.advi_hlr.score(self.X_test, self.y_test, self.cat_test)
+        naive_score = np.mean(self.y_test)
+        npt.assert_array_less(naive_score, score)
+
 
+class TestHierarchicalLogisticRegressionSaveandLoad(
+    TestHierarchicalLogisticRegression):
 
-class HierarchicalLogisticRegressionSaveandLoadTestCase(HierarchicalLogisticRegressionTestCase):
     def test_save_and_load_work_correctly(self):
         print('')
-        self.test_HLR.fit(self.X_train, self.Y_train, self.cat_train, minibatch_size=2000)
-        probs1 = self.test_HLR.predict_proba(self.X_test, self.cat_test)
-        self.test_HLR.save(self.test_dir)
+        self.advi_hlr.fit(self.X_train, self.y_train, self.cat_train,
+                          minibatch_size=500, inference_args={"n": 50000})
+        probs1 = self.advi_hlr.predict_proba(self.X_test, self.cat_test)
+        self.advi_hlr.save(self.test_dir)
 
-        HLR2 = HierarchicalLogisticRegression()
+        hlr2 = HierarchicalLogisticRegression()
 
-        HLR2.load(self.test_dir)
+        hlr2.load(self.test_dir)
 
-        self.assertEqual(self.test_HLR.num_cats, HLR2.num_cats)
-        self.assertEqual(self.test_HLR.num_pred, HLR2.num_pred)
-        self.assertEqual(self.test_HLR.num_training_samples, HLR2.num_training_samples)
-        pd.testing.assert_frame_equal(summary(self.test_HLR.trace), summary(HLR2.trace))
+        npt.assert_equal(self.advi_hlr.num_cats, hlr2.num_cats)
+        npt.assert_equal(self.advi_hlr.num_pred, hlr2.num_pred)
+        npt.assert_equal(self.advi_hlr.num_training_samples,
+                         hlr2.num_training_samples)
+        pdt.assert_frame_equal(summary(self.advi_hlr.trace),
+                               summary(hlr2.trace))
 
-        probs2 = HLR2.predict_proba(self.X_test, self.cat_test)
+        probs2 = hlr2.predict_proba(self.X_test, self.cat_test)
 
-        np.testing.assert_almost_equal(probs2, probs1, decimal=1)
+        npt.assert_almost_equal(probs2, probs1, decimal=1)
@@ -3,10 +3,10 @@
 import tempfile
 
 import numpy as np
-import pandas as pd
-import pymc3 as pm
-from pymc3 import summary
-from sklearn.mixture import BayesianGaussianMixture as skBayesianGaussianMixture
+# import pandas as pd
+# import pymc3 as pm
+# from pymc3 import summary
+# from sklearn.mixture import BayesianGaussianMixture as skBayesianGaussianMixture
 from sklearn.model_selection import train_test_split
 
 from pmlearn.exceptions import NotFittedError
 
@@ -3,10 +3,10 @@
 import tempfile
 
 import numpy as np
-import pandas as pd
-import pymc3 as pm
-from pymc3 import summary
-from sklearn.mixture import GaussianMixture as skGaussianMixture
+# import pandas as pd
+# import pymc3 as pm
+# from pymc3 import summary
+# from sklearn.mixture import GaussianMixture as skGaussianMixture
 from sklearn.model_selection import train_test_split
 
 from pmlearn.exceptions import NotFittedError