From 6509213d0e9d0f0c1fffb38ec422eac6dfa2488b Mon Sep 17 00:00:00 2001 From: Daniel Emaasit Date: Mon, 22 Oct 2018 21:10:30 -0400 Subject: [PATCH 01/11] fixed badges and added tests for gp reg (#7) * added tests for gp reg * fixed badges --- .travis.yml | 12 +- README.rst | 8 +- pmlearn/gaussian_process/tests/test_gpr.py | 222 ++++++++++----------- 3 files changed, 116 insertions(+), 126 deletions(-) diff --git a/.travis.yml b/.travis.yml index b64ab8f..de6da6c 100644 --- a/.travis.yml +++ b/.travis.yml @@ -22,12 +22,12 @@ install: - pip install coveralls travis-sphinx==2.0.0 env: - - PYTHON_VERSION=2.7 FLOATX='float64' RUN_PYLINT="true" TESTCMD="--durations=10 --cov-append pmlearn/tests/test_base.py pmlearn/linear_model/tests/test_base.py pmlearn/linear_model/tests/test_logistic.py --ignore=pmlearn/gaussian_process/tests/test_gpr.py --ignore=pmlearn/mixture/tests/test_gaussian_mixture.py --ignore=pmlearn/mixture/tests/test_dirichlet_process.py --ignore=pmlearn/naive_bayes/tests/test_naive_bayes.py --ignore=pmlearn/neural_network/test_multilayer_perceptron.py" - - PYTHON_VERSION=2.7 FLOATX='float64' RUN_PYLINT="true" TESTCMD="--durations=10 --cov-append --ignore=pmlearn/tests/test_base.py --ignore=pmlearn/linear_model/tests/test_base.py --ignore=pmlearn/linear_model/tests/test_logistic.py --ignore=pmlearn/gaussian_process/tests/test_gpr.py --ignore=pmlearn/mixture/tests/test_gaussian_mixture.py --ignore=pmlearn/mixture/tests/test_dirichlet_process.py --ignore=pmlearn/naive_bayes/tests/test_naive_bayes.py --ignore=pmlearn/neural_network/test_multilayer_perceptron.py" - - PYTHON_VERSION=3.6 FLOATX='float64' RUN_PYLINT="true" TESTCMD="--durations=10 --cov-append pmlearn/tests/test_base.py pmlearn/linear_model/tests/test_base.py pmlearn/linear_model/tests/test_logistic.py --ignore=pmlearn/gaussian_process/tests/test_gpr.py --ignore=pmlearn/mixture/tests/test_gaussian_mixture.py --ignore=pmlearn/mixture/tests/test_dirichlet_process.py --ignore=pmlearn/naive_bayes/tests/test_naive_bayes.py --ignore=pmlearn/neural_network/test_multilayer_perceptron.py" - - PYTHON_VERSION=3.6 FLOATX='float64' RUN_PYLINT="true" TESTCMD="--durations=10 --cov-append --ignore=pmlearn/tests/test_base.py --ignore=pmlearn/linear_model/tests/test_base.py --ignore=pmlearn/linear_model/tests/test_logistic.py --ignore=pmlearn/gaussian_process/tests/test_gpr.py --ignore=pmlearn/mixture/tests/test_gaussian_mixture.py --ignore=pmlearn/mixture/tests/test_dirichlet_process.py --ignore=pmlearn/naive_bayes/tests/test_naive_bayes.py --ignore=pmlearn/neural_network/test_multilayer_perceptron.py" - - PYTHON_VERSION=3.6 FLOATX='float64' RUN_PYLINT="true" TESTCMD="--durations=10 --cov-append pmlearn/tests/test_base.py pmlearn/linear_model/tests/test_base.py pmlearn/linear_model/tests/test_logistic.py --ignore=pmlearn/gaussian_process/tests/test_gpr.py --ignore=pmlearn/mixture/tests/test_gaussian_mixture.py --ignore=pmlearn/mixture/tests/test_dirichlet_process.py --ignore=pmlearn/naive_bayes/tests/test_naive_bayes.py --ignore=pmlearn/neural_network/test_multilayer_perceptron.py" - - PYTHON_VERSION=3.6 FLOATX='float64' RUN_PYLINT="true" TESTCMD="--durations=10 --cov-append --ignore=pmlearn/tests/test_base.py --ignore=pmlearn/linear_model/tests/test_base.py --ignore=pmlearn/linear_model/tests/test_logistic.py --ignore=pmlearn/gaussian_process/tests/test_gpr.py --ignore=pmlearn/mixture/tests/test_gaussian_mixture.py --ignore=pmlearn/mixture/tests/test_dirichlet_process.py --ignore=pmlearn/naive_bayes/tests/test_naive_bayes.py --ignore=pmlearn/neural_network/test_multilayer_perceptron.py" + - PYTHON_VERSION=2.7 FLOATX='float64' RUN_PYLINT="true" TESTCMD="--durations=50 --cov-append pmlearn/tests/test_base.py pmlearn/linear_model/tests/test_base.py pmlearn/linear_model/tests/test_logistic.py --ignore=pmlearn/gaussian_process/tests/test_gpr.py --ignore=pmlearn/mixture/tests/test_gaussian_mixture.py --ignore=pmlearn/mixture/tests/test_dirichlet_process.py --ignore=pmlearn/naive_bayes/tests/test_naive_bayes.py --ignore=pmlearn/neural_network/test_multilayer_perceptron.py" + - PYTHON_VERSION=2.7 FLOATX='float64' RUN_PYLINT="true" TESTCMD="--durations=50 --cov-append pmlearn/tests/test_base.py --ignore=pmlearn/linear_model/tests/test_base.py --ignore=pmlearn/linear_model/tests/test_logistic.py pmlearn/gaussian_process/tests/test_gpr.py --ignore=pmlearn/mixture/tests/test_gaussian_mixture.py --ignore=pmlearn/mixture/tests/test_dirichlet_process.py --ignore=pmlearn/naive_bayes/tests/test_naive_bayes.py --ignore=pmlearn/neural_network/test_multilayer_perceptron.py" + - PYTHON_VERSION=3.6 FLOATX='float64' RUN_PYLINT="true" TESTCMD="--durations=50 --cov-append pmlearn/tests/test_base.py pmlearn/linear_model/tests/test_base.py pmlearn/linear_model/tests/test_logistic.py --ignore=pmlearn/gaussian_process/tests/test_gpr.py --ignore=pmlearn/mixture/tests/test_gaussian_mixture.py --ignore=pmlearn/mixture/tests/test_dirichlet_process.py --ignore=pmlearn/naive_bayes/tests/test_naive_bayes.py --ignore=pmlearn/neural_network/test_multilayer_perceptron.py" + - PYTHON_VERSION=3.6 FLOATX='float64' RUN_PYLINT="true" TESTCMD="--durations=50 --cov-append pmlearn/tests/test_base.py --ignore=pmlearn/linear_model/tests/test_base.py --ignore=pmlearn/linear_model/tests/test_logistic.py pmlearn/gaussian_process/tests/test_gpr.py --ignore=pmlearn/mixture/tests/test_gaussian_mixture.py --ignore=pmlearn/mixture/tests/test_dirichlet_process.py --ignore=pmlearn/naive_bayes/tests/test_naive_bayes.py --ignore=pmlearn/neural_network/test_multilayer_perceptron.py" + - PYTHON_VERSION=3.6 FLOATX='float64' RUN_PYLINT="true" TESTCMD="--durations=50 --cov-append pmlearn/tests/test_base.py pmlearn/linear_model/tests/test_base.py pmlearn/linear_model/tests/test_logistic.py --ignore=pmlearn/gaussian_process/tests/test_gpr.py --ignore=pmlearn/mixture/tests/test_gaussian_mixture.py --ignore=pmlearn/mixture/tests/test_dirichlet_process.py --ignore=pmlearn/naive_bayes/tests/test_naive_bayes.py --ignore=pmlearn/neural_network/test_multilayer_perceptron.py" + - PYTHON_VERSION=3.6 FLOATX='float64' RUN_PYLINT="true" TESTCMD="--durations=50 --cov-append pmlearn/tests/test_base.py --ignore=pmlearn/linear_model/tests/test_base.py --ignore=pmlearn/linear_model/tests/test_logistic.py pmlearn/gaussian_process/tests/test_gpr.py --ignore=pmlearn/mixture/tests/test_gaussian_mixture.py --ignore=pmlearn/mixture/tests/test_dirichlet_process.py --ignore=pmlearn/naive_bayes/tests/test_naive_bayes.py --ignore=pmlearn/neural_network/test_multilayer_perceptron.py" script: - . ./scripts/test.sh $TESTCMD diff --git a/README.rst b/README.rst index 9184d4b..821f666 100644 --- a/README.rst +++ b/README.rst @@ -259,11 +259,11 @@ project: https://github.com/parsing-science/pymc3_models. .. |Binder| image:: https://mybinder.org/badge.svg :target: https://mybinder.org/v2/gh/pymc-learn/pymc-learn/master?filepath=%2Fdocs%2Fnotebooks?urlpath=lab -.. |Travis| image:: https://api.travis-ci.org/pymc-learn/pymc-learn.svg?branch=master - :target: https://travis-ci.org/pymc-learn/pymc-learn +.. |Travis| image:: https://travis-ci.com/pymc-learn/pymc-learn.svg?branch=master + :target: https://travis-ci.com/pymc-learn/pymc-learn -.. |Coverage| image:: https://coveralls.io/repos/github/pymc-learn/pymc-learn/badge.svg?branch=master - :target: https://coveralls.io/github/pymc-learn/pymc-learn?branch=master +.. |Coverage| image:: https://coveralls.io/repos/github/pymc-learn/pymc-learn/badge.svg + :target: https://coveralls.io/github/pymc-learn/pymc-learn .. |Python27| image:: https://img.shields.io/badge/python-2.7-blue.svg :target: https://badge.fury.io/py/pymc-learn diff --git a/pmlearn/gaussian_process/tests/test_gpr.py b/pmlearn/gaussian_process/tests/test_gpr.py index a211fbf..255516b 100644 --- a/pmlearn/gaussian_process/tests/test_gpr.py +++ b/pmlearn/gaussian_process/tests/test_gpr.py @@ -4,21 +4,21 @@ # # License: BSD 3 clause -# import pytest +import pytest import numpy.testing as npt -# import pandas.testing as pdt +import pandas.testing as pdt import shutil import tempfile import numpy as np import pymc3 as pm -# from pymc3 import summary -# from sklearn.gaussian_process import \ -# GaussianProcessRegressor as skGaussianProcessRegressor -# from sklearn.model_selection import train_test_split -# -# -# from pmlearn.exceptions import NotFittedError +from pymc3 import summary +from sklearn.gaussian_process import \ + GaussianProcessRegressor as skGaussianProcessRegressor +from sklearn.model_selection import train_test_split + + +from pmlearn.exceptions import NotFittedError from pmlearn.gaussian_process import (GaussianProcessRegressor) # , # SparseGaussianProcessRegressor, @@ -26,34 +26,34 @@ class TestGaussianProcessRegressor(object): - """ - Compare the logp of GPR models in pmlearn to sklearn - """ def setup_method(self): - """Setup the data for testing - """ self.num_pred = 1 - self.num_training_samples = 20 - self.length_scale = 0.1 - self.signal_variance = 0.01 - self.noise_variance = 0.01 - self.X = np.random.randn(self.num_training_samples, self.num_pred) - self.y = np.random.randn(self.num_training_samples) * \ - self.noise_variance - self.Xnew = np.random.randn(50, self.num_pred) - self.pnew = np.random.randn(50) * self.noise_variance - with pm.Model() as model: - cov_func = self.signal_variance**2 * \ - pm.gp.cov.ExpQuad(self.num_pred, self.length_scale) - gp = pm.gp.Latent(cov_func=cov_func) - f = gp.prior("f", self.X, reparameterize=False) - p = gp.conditional("p", self.Xnew) - - self.latent_logp = model.logp({"f": self.y, "p": self.pnew}) - self.plogp = p.logp({"f": self.y, "p": self.pnew}) - - self.test_gpr = GaussianProcessRegressor(kernel=cov_func) + self.num_training_samples = 300 + + self.length_scale = 1.0 + self.signal_variance = 0.1 + self.noise_variance = 0.1 + + X = np.linspace(start=0, stop=10, + num=self.num_training_samples)[:, None] + + cov_func = self.signal_variance ** 2 * pm.gp.cov.ExpQuad( + 1, self.length_scale) + mean_func = pm.gp.mean.Zero() + + f_true = np.random.multivariate_normal( + mean_func(X).eval(), + cov_func(X).eval() + 1e-8 * np.eye(self.num_training_samples), + 1).flatten() + y = f_true + \ + self.noise_variance * np.random.randn(self.num_training_samples) + + self.X_train, self.X_test, self.y_train, self.y_test = \ + train_test_split(X, y, test_size=0.3) + + self.advi_gpr = GaussianProcessRegressor() + self.test_dir = tempfile.mkdtemp() def teardown_method(self): @@ -67,93 +67,83 @@ def test_advi_fit_returns_correct_model(self): # This print statement ensures PyMC3 output won't overwrite # the test name print('') - self.test_gpr.fit(self.X, self.y) - - npt.assert_equal(self.num_pred, self.test_gpr.num_pred) - npt.assert_almost_equal(self.signal_variance, - int(self.test_GPR.summary['mean']['signal_variance__0']), - 0) - self.assertAlmostEqual(self.length_scale, - int(self.test_GPR.summary['mean']['length_scale__0_0']), - 0) - self.assertAlmostEqual(self.noise_variance, - int(self.test_GPR.summary['mean']['noise_variance__0']), - 0) + self.advi_gpr.fit(self.X_train, self.y_train, + inference_args={"n": 25000}) - # def test_nuts_fit_returns_correct_model(self): - # # This print statement ensures PyMC3 output won't overwrite the test name - # print('') - # self.test_nuts_GPR.fit(self.X_train, self.y_train, inference_type='nuts') - # - # self.assertEqual(self.num_pred, self.test_nuts_GPR.num_pred) - # self.assertAlmostEqual(self.signal_variance, - # int(self.test_nuts_GPR.summary['mean']['signal_variance__0']), - # 0) - # self.assertAlmostEqual(self.length_scale, - # int(self.test_nuts_GPR.summary['mean']['length_scale__0_0']), - # 0) - # self.assertAlmostEqual(self.noise_variance, - # int(self.test_nuts_GPR.summary['mean']['noise_variance__0']), - # 0) + npt.assert_equal(self.num_pred, self.advi_gpr.num_pred) + npt.assert_almost_equal( + self.signal_variance, + self.advi_gpr.summary['mean']['signal_variance__0'], + 0) + npt.assert_almost_equal( + self.length_scale, + self.advi_gpr.summary['mean']['length_scale__0_0'], + 0) + npt.assert_almost_equal( + self.noise_variance, + self.advi_gpr.summary['mean']['noise_variance__0'], + 0) -# class GaussianProcessRegressorPredictTestCase(GaussianProcessRegressorTestCase): -# def test_predict_returns_predictions(self): -# print('') -# self.test_GPR.fit(self.X_train, self.y_train) -# preds = self.test_GPR.predict(self.X_test) -# self.assertEqual(self.y_test.shape, preds.shape) -# -# def test_predict_returns_mean_predictions_and_std(self): -# print('') -# self.test_GPR.fit(self.X_train, self.y_train) -# preds, stds = self.test_GPR.predict(self.X_test, return_std=True) -# self.assertEqual(self.y_test.shape, preds.shape) -# self.assertEqual(self.y_test.shape, stds.shape) -# -# def test_predict_raises_error_if_not_fit(self): -# print('') -# with self.assertRaises(NotFittedError) as no_fit_error: -# test_GPR = GaussianProcessRegressor() -# test_GPR.predict(self.X_train) -# -# expected = 'Run fit on the model before predict.' -# self.assertEqual(str(no_fit_error.exception), expected) +class TestGaussianProcessRegressorPredict(TestGaussianProcessRegressor): + def test_predict_returns_predictions(self): + print('') + self.advi_gpr.fit(self.X_train, self.y_train, + inference_args={"n": 25000}) + preds = self.advi_gpr.predict(self.X_test) + npt.assert_equal(self.y_test.shape, preds.shape) + + def test_predict_returns_mean_predictions_and_std(self): + print('') + self.advi_gpr.fit(self.X_train, self.y_train, + inference_args={"n": 25000}) + preds, stds = self.advi_gpr.predict(self.X_test, return_std=True) + npt.assert_equal(self.y_test.shape, preds.shape) + npt.assert_equal(self.y_test.shape, stds.shape) + + def test_predict_raises_error_if_not_fit(self): + print('') + with pytest.raises(NotFittedError): + advi_gpr = GaussianProcessRegressor() + advi_gpr.predict(self.X_train) + + +class TestGaussianProcessRegressorScore(TestGaussianProcessRegressor): + def test_score_matches_sklearn_performance(self): + print('') + sk_gpr = skGaussianProcessRegressor() + sk_gpr.fit(self.X_train, self.y_train) + sk_gpr_score = sk_gpr.score(self.X_test, self.y_test) + + self.advi_gpr.fit(self.X_train, self.y_train, + inference_args={"n": 25000}) + advi_gpr_score = self.advi_gpr.score(self.X_test, self.y_test) + + npt.assert_almost_equal(sk_gpr_score, advi_gpr_score, 1) + + +class TestGaussianProcessRegressorSaveAndLoad(TestGaussianProcessRegressor): + def test_save_and_load_work_correctly(self): + print('') + self.advi_gpr.fit(self.X_train, self.y_train, + inference_args={"n": 25000}) + score1 = self.advi_gpr.score(self.X_test, self.y_test) + self.advi_gpr.save(self.test_dir) + + gpr2 = GaussianProcessRegressor() + gpr2.load(self.test_dir) + + npt.assert_equal(self.advi_gpr.inference_type, gpr2.inference_type) + npt.assert_equal(self.advi_gpr.num_pred, gpr2.num_pred) + npt.assert_equal(self.advi_gpr.num_training_samples, + gpr2.num_training_samples) + pdt.assert_frame_equal(summary(self.advi_gpr.trace), + summary(gpr2.trace)) + + score2 = gpr2.score(self.X_test, self.y_test) + npt.assert_almost_equal(score1, score2, 0) -# class GaussianProcessRegressorScoreTestCase(GaussianProcessRegressorTestCase): -# def test_score_matches_sklearn_performance(self): -# print('') -# skGPR = skGaussianProcessRegressor() -# skGPR.fit(self.X_train, self.y_train) -# skGPR_score = skGPR.score(self.X_test, self.y_test) -# -# self.test_GPR.fit(self.X_train, self.y_train) -# test_GPR_score = self.test_GPR.score(self.X_test, self.y_test) -# -# self.assertAlmostEqual(skGPR_score, test_GPR_score, 1) -# -# -# class GaussianProcessRegressorSaveAndLoadTestCase(GaussianProcessRegressorTestCase): -# def test_save_and_load_work_correctly(self): -# print('') -# self.test_GPR.fit(self.X_train, self.y_train) -# score1 = self.test_GPR.score(self.X_test, self.y_test) -# self.test_GPR.save(self.test_dir) -# -# GPR2 = GaussianProcessRegressor() -# GPR2.load(self.test_dir) -# -# self.assertEqual(self.test_GPR.inference_type, GPR2.inference_type) -# self.assertEqual(self.test_GPR.num_pred, GPR2.num_pred) -# self.assertEqual(self.test_GPR.num_training_samples, GPR2.num_training_samples) -# pd.testing.assert_frame_equal(summary(self.test_GPR.trace), -# summary(GPR2.trace)) -# -# score2 = GPR2.score(self.X_test, self.y_test) -# self.assertAlmostEqual(score1, score2, 1) -# -# # class StudentsTProcessRegressorTestCase(unittest.TestCase): # # def setUp(self): From 9f929b66eb158e3e461f8bec273e6a71a2030041 Mon Sep 17 00:00:00 2001 From: Daniel Emaasit Date: Tue, 23 Oct 2018 00:45:09 -0400 Subject: [PATCH 02/11] finished testing gps (#10) * added tests for gp reg * fixed badges * added tests for students tp, sparse gp * cleaned redandunt imports (#9) --- README.rst | 2 +- pmlearn/gaussian_process/gpr.py | 61 ++- pmlearn/gaussian_process/tests/test_gpr.py | 495 ++++++++++----------- 3 files changed, 286 insertions(+), 272 deletions(-) diff --git a/README.rst b/README.rst index 821f666..c952fcf 100644 --- a/README.rst +++ b/README.rst @@ -280,4 +280,4 @@ project: https://github.com/parsing-science/pymc3_models. :target: https://github.com/pymc-learn/pymc-learn/blob/master/LICENSE .. |Pypi| image:: https://badge.fury.io/py/pymc-learn.svg - :target: https://badge.fury.io/py/pymc-learn \ No newline at end of file + :target: https://badge.fury.io/py/pymc-learn diff --git a/pmlearn/gaussian_process/gpr.py b/pmlearn/gaussian_process/gpr.py index 9ba1794..4f41373 100644 --- a/pmlearn/gaussian_process/gpr.py +++ b/pmlearn/gaussian_process/gpr.py @@ -173,7 +173,8 @@ def load(self, file_prefix): self.num_training_samples = params['num_training_samples'] -class StudentsTProcessRegressor(GaussianProcessRegressor): +class StudentsTProcessRegressor(BayesianModel, + GaussianProcessRegressorMixin): """ StudentsT Process Regression built using PyMC3. Fit a StudentsT process model and estimate model parameters using @@ -204,8 +205,15 @@ class StudentsTProcessRegressor(GaussianProcessRegressor): Rasmussen and Williams (2006). Gaussian Processes for Machine Learning. """ - def __init__(self, prior_mean=0.0): - super(StudentsTProcessRegressor, self).__init__(prior_mean=prior_mean) + def __init__(self, prior_mean=None, kernel=None): + self.ppc = None + self.gp = None + self.num_training_samples = None + self.num_pred = None + self.prior_mean = prior_mean + self.kernel = kernel + + super(StudentsTProcessRegressor, self).__init__() def create_model(self): """ Creates and returns the PyMC3 model. @@ -241,13 +249,17 @@ def create_model(self): degrees_of_freedom = pm.Gamma('degrees_of_freedom', alpha=2, beta=0.1, shape=1) - # cov_function = signal_variance**2 * pm.gp.cov.ExpQuad( - # 1, length_scale) - cov_function = signal_variance ** 2 * pm.gp.cov.Matern52( - 1, length_scale) + if self.kernel is None: + cov_function = signal_variance ** 2 * RBF( + input_dim=self.num_pred, + ls=length_scale) + else: + cov_function = self.kernel - # mean_function = pm.gp.mean.Zero() - mean_function = pm.gp.mean.Constant(self.prior_mean) + if self.prior_mean is None: + mean_function = pm.gp.mean.Zero() + else: + mean_function = self.prior_mean self.gp = pm.gp.Latent(mean_func=mean_function, cov_func=cov_function) @@ -277,7 +289,8 @@ def load(self, file_prefix): self.num_training_samples = params['num_training_samples'] -class SparseGaussianProcessRegressor(GaussianProcessRegressor): +class SparseGaussianProcessRegressor(BayesianModel, + GaussianProcessRegressorMixin): """ Sparse Gaussian Process Regression built using PyMC3. Fit a Sparse Gaussian process model and estimate model parameters using @@ -308,9 +321,15 @@ class SparseGaussianProcessRegressor(GaussianProcessRegressor): Rasmussen and Williams (2006). Gaussian Processes for Machine Learning. """ - def __init__(self, prior_mean=0.0): - super(SparseGaussianProcessRegressor, self).__init__( - prior_mean=prior_mean) + def __init__(self, prior_mean=None, kernel=None): + self.ppc = None + self.gp = None + self.num_training_samples = None + self.num_pred = None + self.prior_mean = prior_mean + self.kernel = kernel + + super(SparseGaussianProcessRegressor, self).__init__() def create_model(self): """ Creates and returns the PyMC3 model. @@ -344,13 +363,17 @@ def create_model(self): noise_variance = pm.HalfCauchy('noise_variance', beta=5, shape=1) - # cov_function = signal_variance**2 * pm.gp.cov.ExpQuad( - # 1, length_scale) - cov_function = signal_variance ** 2 * pm.gp.cov.Matern52( - 1, length_scale) + if self.kernel is None: + cov_function = signal_variance ** 2 * RBF( + input_dim=self.num_pred, + ls=length_scale) + else: + cov_function = self.kernel - # mean_function = pm.gp.mean.Zero() - mean_function = pm.gp.mean.Constant(self.prior_mean) + if self.prior_mean is None: + mean_function = pm.gp.mean.Zero() + else: + mean_function = self.prior_mean self.gp = pm.gp.MarginalSparse(mean_func=mean_function, cov_func=cov_function, diff --git a/pmlearn/gaussian_process/tests/test_gpr.py b/pmlearn/gaussian_process/tests/test_gpr.py index 255516b..afc947b 100644 --- a/pmlearn/gaussian_process/tests/test_gpr.py +++ b/pmlearn/gaussian_process/tests/test_gpr.py @@ -19,10 +19,9 @@ from pmlearn.exceptions import NotFittedError -from pmlearn.gaussian_process import (GaussianProcessRegressor) -# , -# SparseGaussianProcessRegressor, -# StudentsTProcessRegressor) +from pmlearn.gaussian_process import (GaussianProcessRegressor, + StudentsTProcessRegressor, + SparseGaussianProcessRegressor) class TestGaussianProcessRegressor(object): @@ -144,251 +143,243 @@ def test_save_and_load_work_correctly(self): npt.assert_almost_equal(score1, score2, 0) -# class StudentsTProcessRegressorTestCase(unittest.TestCase): -# -# def setUp(self): -# self.num_training_samples = 150 -# self.num_pred = 1 -# -# self.length_scale = 2.0 -# self.noise_variance = 1.0 -# self.signal_variance = 1.0 -# self.degrees_of_freedom = 3.0 -# -# X = np.linspace(start=0, stop=10, num=self.num_training_samples)[:, None] -# cov_func = self.signal_variance**2 * pm.gp.cov.ExpQuad(self.num_pred, -# self.length_scale) -# -# mean_func = pm.gp.mean.Zero() -# f_ = np.random.multivariate_normal(mean_func(X).eval(), -# cov_func(X).eval() + 1e-8 * np.eye(self.num_training_samples), -# self.num_pred -# ).flatten() -# -# y = f_ + self.noise_variance * np.random.standard_t(self.degrees_of_freedom, -# size=self.num_training_samples) -# self.X_train, self.X_test, self.y_train, self.y_test = train_test_split( -# X, y, test_size=0.3 -# ) -# -# self.test_STPR = StudentsTProcessRegressor() -# # self.test_nuts_STPR = StudentsTProcessRegressor() -# self.test_dir = tempfile.mkdtemp() -# -# def tearDown(self): -# shutil.rmtree(self.test_dir) -# -# -# class StudentsTProcessRegressorFitTestCase(StudentsTProcessRegressorTestCase): -# def test_advi_fit_returns_correct_model(self): -# # This print statement ensures PyMC3 output won't overwrite the test name -# print('') -# self.test_STPR.fit(self.X_train, self.y_train) -# -# self.assertEqual(self.num_pred, self.test_STPR.num_pred) -# self.assertAlmostEqual(self.signal_variance, -# int(self.test_STPR.summary['mean']['signal_variance__0']), -# 0) -# self.assertAlmostEqual(self.length_scale, -# int(self.test_STPR.summary['mean']['length_scale__0_0']), -# 0) -# self.assertAlmostEqual(self.noise_variance, -# int(self.test_STPR.summary['mean']['noise_variance__0']), -# 0) - - # def test_nuts_fit_returns_correct_model(self): - # # This print statement ensures PyMC3 output won't overwrite the test name - # print('') - # self.test_nuts_STPR.fit(self.X_train, self.y_train, inference_type='nuts') - # - # self.assertEqual(self.num_pred, self.test_nuts_STPR.num_pred) - # self.assertAlmostEqual(self.signal_variance, - # int(self.test_nuts_STPR.summary['mean']['signal_variance__0']), - # 0) - # self.assertAlmostEqual(self.length_scale, - # int(self.test_nuts_STPR.summary['mean']['length_scale__0_0']), - # 0) - # self.assertAlmostEqual(self.noise_variance, - # int(self.test_nuts_STPR.summary['mean']['noise_variance__0']), - # 0) - - -# class StudentsTProcessRegressorPredictTestCase(StudentsTProcessRegressorTestCase): -# def test_predict_returns_predictions(self): -# print('') -# self.test_STPR.fit(self.X_train, self.y_train) -# preds = self.test_STPR.predict(self.X_test) -# self.assertEqual(self.y_test.shape, preds.shape) -# -# def test_predict_returns_mean_predictions_and_std(self): -# print('') -# self.test_STPR.fit(self.X_train, self.y_train) -# preds, stds = self.test_STPR.predict(self.X_test, return_std=True) -# self.assertEqual(self.y_test.shape, preds.shape) -# self.assertEqual(self.y_test.shape, stds.shape) -# -# def test_predict_raises_error_if_not_fit(self): -# print('') -# with self.assertRaises(NotFittedError) as no_fit_error: -# test_STPR = StudentsTProcessRegressor() -# test_STPR.predict(self.X_train) -# -# expected = 'Run fit on the model before predict.' -# self.assertEqual(str(no_fit_error.exception), expected) -# -# -# class StudentsTProcessRegressorScoreTestCase(StudentsTProcessRegressorTestCase): -# def test_score_matches_sklearn_performance(self): -# print('') -# skGPR = skGaussianProcessRegressor() -# skGPR.fit(self.X_train, self.y_train) -# skGPR_score = skGPR.score(self.X_test, self.y_test) -# -# self.test_STPR.fit(self.X_train, self.y_train) -# test_STPR_score = self.test_STPR.score(self.X_test, self.y_test) -# -# self.assertAlmostEqual(skGPR_score, test_STPR_score, 1) -# -# -# class StudentsTProcessRegressorSaveAndLoadTestCase(StudentsTProcessRegressorTestCase): -# def test_save_and_load_work_correctly(self): -# print('') -# self.test_STPR.fit(self.X_train, self.y_train) -# score1 = self.test_STPR.score(self.X_test, self.y_test) -# self.test_STPR.save(self.test_dir) -# -# STPR2 = StudentsTProcessRegressor() -# STPR2.load(self.test_dir) -# -# self.assertEqual(self.test_STPR.inference_type, STPR2.inference_type) -# self.assertEqual(self.test_STPR.num_pred, STPR2.num_pred) -# self.assertEqual(self.test_STPR.num_training_samples, STPR2.num_training_samples) -# pd.testing.assert_frame_equal(summary(self.test_STPR.trace), -# summary(STPR2.trace)) -# -# score2 = STPR2.score(self.X_test, self.y_test) -# self.assertAlmostEqual(score1, score2, 1) -# -# -# class SparseGaussianProcessRegressorTestCase(unittest.TestCase): -# -# def setUp(self): -# self.num_training_samples = 150 -# self.num_pred = 1 -# -# self.length_scale = 1.0 -# self.noise_variance = 2.0 -# self.signal_variance = 3.0 -# -# X = np.linspace(start=0, stop=10, num=self.num_training_samples)[:, None] -# cov_func = self.signal_variance**2 * pm.gp.cov.ExpQuad(self.num_pred, -# self.length_scale) -# -# mean_func = pm.gp.mean.Zero() -# f_ = np.random.multivariate_normal(mean_func(X).eval(), -# cov_func(X).eval() + 1e-8 * np.eye(self.num_training_samples), -# self.num_pred -# ).flatten() -# -# y = f_ + self.noise_variance * np.random.randn(self.num_training_samples) -# self.X_train, self.X_test, self.y_train, self.y_test = train_test_split( -# X, y, test_size=0.3 -# ) -# -# self.test_SGPR = SparseGaussianProcessRegressor() -# # self.test_nuts_SGPR = SparseGaussianProcessRegressor() -# self.test_dir = tempfile.mkdtemp() -# -# def tearDown(self): -# shutil.rmtree(self.test_dir) -# -# -# class SparseGaussianProcessRegressorFitTestCase(SparseGaussianProcessRegressorTestCase): -# def test_advi_fit_returns_correct_model(self): -# # This print statement ensures PyMC3 output won't overwrite the test name -# print('') -# self.test_SGPR.fit(self.X_train, self.y_train) -# -# self.assertEqual(self.num_pred, self.test_SGPR.num_pred) -# self.assertAlmostEqual(self.signal_variance, -# int(self.test_SGPR.summary['mean']['signal_variance__0']), -# 0) -# self.assertAlmostEqual(self.length_scale, -# int(self.test_SGPR.summary['mean']['length_scale__0_0']), -# 0) -# self.assertAlmostEqual(self.noise_variance, -# int(self.test_SGPR.summary['mean']['noise_variance__0']), -# 0) - - # def test_nuts_fit_returns_correct_model(self): - # # This print statement ensures PyMC3 output won't overwrite the test name - # print('') - # self.test_nuts_SGPR.fit(self.X_train, self.y_train, inference_type='nuts') - # - # self.assertEqual(self.num_pred, self.test_nuts_SGPR.num_pred) - # self.assertAlmostEqual(self.signal_variance, - # int(self.test_nuts_SGPR.summary['mean']['signal_variance__0']), - # 0) - # self.assertAlmostEqual(self.length_scale, - # int(self.test_nuts_SGPR.summary['mean']['length_scale__0_0']), - # 0) - # self.assertAlmostEqual(self.noise_variance, - # int(self.test_nuts_SGPR.summary['mean']['noise_variance__0']), - # 0) - - -# class SparseGaussianProcessRegressorPredictTestCase(SparseGaussianProcessRegressorTestCase): -# def test_predict_returns_predictions(self): -# print('') -# self.test_SGPR.fit(self.X_train, self.y_train) -# preds = self.test_SGPR.predict(self.X_test) -# self.assertEqual(self.y_test.shape, preds.shape) -# -# def test_predict_returns_mean_predictions_and_std(self): -# print('') -# self.test_SGPR.fit(self.X_train, self.y_train) -# preds, stds = self.test_SGPR.predict(self.X_test, return_std=True) -# self.assertEqual(self.y_test.shape, preds.shape) -# self.assertEqual(self.y_test.shape, stds.shape) -# -# def test_predict_raises_error_if_not_fit(self): -# print('') -# with self.assertRaises(NotFittedError) as no_fit_error: -# test_SGPR = SparseGaussianProcessRegressor() -# test_SGPR.predict(self.X_train) -# -# expected = 'Run fit on the model before predict.' -# self.assertEqual(str(no_fit_error.exception), expected) -# -# -# class SparseGaussianProcessRegressorScoreTestCase(SparseGaussianProcessRegressorTestCase): -# def test_score_matches_sklearn_performance(self): -# print('') -# skGPR = skGaussianProcessRegressor() -# skGPR.fit(self.X_train, self.y_train) -# skGPR_score = skGPR.score(self.X_test, self.y_test) -# -# self.test_SGPR.fit(self.X_train, self.y_train) -# test_SGPR_score = self.test_SGPR.score(self.X_test, self.y_test) -# -# self.assertAlmostEqual(skGPR_score, test_SGPR_score, 1) -# -# -# class SparseGaussianProcessRegressorSaveAndLoadTestCase(SparseGaussianProcessRegressorTestCase): -# def test_save_and_load_work_correctly(self): -# print('') -# self.test_SGPR.fit(self.X_train, self.y_train) -# score1 = self.test_SGPR.score(self.X_test, self.y_test) -# self.test_SGPR.save(self.test_dir) -# -# SGPR2 = SparseGaussianProcessRegressor() -# SGPR2.load(self.test_dir) -# -# self.assertEqual(self.test_SGPR.inference_type, SGPR2.inference_type) -# self.assertEqual(self.test_SGPR.num_pred, SGPR2.num_pred) -# self.assertEqual(self.test_SGPR.num_training_samples, SGPR2.num_training_samples) -# pd.testing.assert_frame_equal(summary(self.test_SGPR.trace), -# summary(SGPR2.trace)) -# -# score2 = SGPR2.score(self.X_test, self.y_test) -# self.assertAlmostEqual(score1, score2, 1) \ No newline at end of file +class TestStudentsTProcessRegressor(object): + + def setup_method(self): + self.num_pred = 1 + self.num_training_samples = 500 + + self.length_scale = 1.0 + self.signal_variance = 0.1 + self.noise_variance = 0.1 + self.degrees_of_freedom = 1.0 + + X = np.linspace(start=0, stop=10, + num=self.num_training_samples)[:, None] + + cov_func = self.signal_variance ** 2 * pm.gp.cov.ExpQuad( + 1, self.length_scale) + mean_func = pm.gp.mean.Zero() + + f_true = np.random.multivariate_normal( + mean_func(X).eval(), + cov_func(X).eval() + 1e-8 * np.eye(self.num_training_samples), + 1).flatten() + y = f_true + \ + self.noise_variance * \ + np.random.standard_t(self.degrees_of_freedom, + size=self.num_training_samples) + + self.X_train, self.X_test, self.y_train, self.y_test = \ + train_test_split(X, y, test_size=0.3) + + self.advi_stpr = StudentsTProcessRegressor() + + self.test_dir = tempfile.mkdtemp() + + def tearDown(self): + shutil.rmtree(self.test_dir) + + +class TestStudentsTProcessRegressorFit(TestStudentsTProcessRegressor): + def test_advi_fit_returns_correct_model(self): + # This print statement ensures PyMC3 output won't overwrite + # the test name + print('') + self.advi_stpr.fit(self.X_train, self.y_train, + inference_args={"n": 25000}) + + npt.assert_equal(self.num_pred, self.advi_stpr.num_pred) + npt.assert_almost_equal( + self.signal_variance, + self.advi_stpr.summary['mean']['signal_variance__0'], + 0) + npt.assert_almost_equal( + self.length_scale, + self.advi_stpr.summary['mean']['length_scale__0_0'], + 0) + npt.assert_almost_equal( + self.noise_variance, + self.advi_stpr.summary['mean']['noise_variance__0'], + 0) + + +class TestStudentsTProcessRegressorPredict(TestStudentsTProcessRegressor): + def test_predict_returns_predictions(self): + print('') + self.advi_stpr.fit(self.X_train, self.y_train, + inference_args={"n": 25000}) + preds = self.advi_stpr.predict(self.X_test) + npt.assert_equal(self.y_test.shape, preds.shape) + + def test_predict_returns_mean_predictions_and_std(self): + print('') + self.advi_stpr.fit(self.X_train, self.y_train, + inference_args={"n": 25000}) + preds, stds = self.advi_stpr.predict(self.X_test, return_std=True) + npt.assert_equal(self.y_test.shape, preds.shape) + npt.assert_equal(self.y_test.shape, stds.shape) + + def test_predict_raises_error_if_not_fit(self): + print('') + with pytest.raises(NotFittedError): + advi_stpr = StudentsTProcessRegressor() + advi_stpr.predict(self.X_train) + + +class TestStudentsTProcessRegressorScore(TestStudentsTProcessRegressor): + def test_score_matches_sklearn_performance(self): + print('') + sk_gpr = skGaussianProcessRegressor() + sk_gpr.fit(self.X_train, self.y_train) + sk_gpr_score = sk_gpr.score(self.X_test, self.y_test) + + self.advi_stpr.fit(self.X_train, self.y_train, + inference_args={"n": 25000}) + advi_stpr_score = self.advi_stpr.score(self.X_test, self.y_test) + + npt.assert_almost_equal(sk_gpr_score, advi_stpr_score, 0) + + +class TestStudentsTProcessRegressorSaveAndLoad(TestStudentsTProcessRegressor): + def test_save_and_load_work_correctly(self): + print('') + self.advi_stpr.fit(self.X_train, self.y_train, + inference_args={"n": 25000}) + score1 = self.advi_stpr.score(self.X_test, self.y_test) + self.advi_stpr.save(self.test_dir) + + stpr2 = StudentsTProcessRegressor() + stpr2.load(self.test_dir) + + npt.assert_equal(self.advi_stpr.inference_type, stpr2.inference_type) + npt.assert_equal(self.advi_stpr.num_pred, stpr2.num_pred) + npt.assert_equal(self.advi_stpr.num_training_samples, + stpr2.num_training_samples) + pdt.assert_frame_equal(summary(self.advi_stpr.trace), + summary(stpr2.trace)) + + score2 = stpr2.score(self.X_test, self.y_test) + npt.assert_almost_equal(score1, score2, 0) + + +class TestSparseGaussianProcessRegressor(object): + + def setup_method(self): + self.num_pred = 1 + self.num_training_samples = 1000 + + self.length_scale = 1.0 + self.signal_variance = 0.1 + self.noise_variance = 0.1 + + X = np.linspace(start=0, stop=10, + num=self.num_training_samples)[:, None] + + cov_func = self.signal_variance ** 2 * pm.gp.cov.ExpQuad( + 1, self.length_scale) + mean_func = pm.gp.mean.Zero() + + f_true = np.random.multivariate_normal( + mean_func(X).eval(), + cov_func(X).eval() + 1e-8 * np.eye(self.num_training_samples), + 1).flatten() + y = f_true + \ + self.noise_variance * np.random.randn(self.num_training_samples) + + self.X_train, self.X_test, self.y_train, self.y_test = \ + train_test_split(X, y, test_size=0.3) + + self.advi_sgpr = SparseGaussianProcessRegressor() + + self.test_dir = tempfile.mkdtemp() + + def teardown_method(self): + """Tear down + """ + shutil.rmtree(self.test_dir) + + +class TestSparseGaussianProcessRegressorFit(TestSparseGaussianProcessRegressor): + def test_advi_fit_returns_correct_model(self): + # This print statement ensures PyMC3 output won't overwrite + # the test name + print('') + self.advi_sgpr.fit(self.X_train, self.y_train) + + npt.assert_equal(self.num_pred, self.advi_sgpr.num_pred) + npt.assert_almost_equal( + self.signal_variance, + self.advi_sgpr.summary['mean']['signal_variance__0'], + 0) + npt.assert_almost_equal( + self.length_scale, + self.advi_sgpr.summary['mean']['length_scale__0_0'], + 0) + npt.assert_almost_equal( + self.noise_variance, + self.advi_sgpr.summary['mean']['noise_variance__0'], + 0) + + +class TestSparseGaussianProcessRegressorPredict( + TestSparseGaussianProcessRegressor): + + def test_predict_returns_predictions(self): + print('') + self.advi_sgpr.fit(self.X_train, self.y_train, + inference_args={"n": 25000}) + preds = self.advi_sgpr.predict(self.X_test) + npt.assert_equal(self.y_test.shape, preds.shape) + + def test_predict_returns_mean_predictions_and_std(self): + print('') + self.advi_sgpr.fit(self.X_train, self.y_train, + inference_args={"n": 25000}) + preds, stds = self.advi_sgpr.predict(self.X_test, return_std=True) + npt.assert_equal(self.y_test.shape, preds.shape) + npt.assert_equal(self.y_test.shape, stds.shape) + + def test_predict_raises_error_if_not_fit(self): + print('') + with pytest.raises(NotFittedError): + advi_sgpr = SparseGaussianProcessRegressor() + advi_sgpr.predict(self.X_train) + + +class TestSparseGaussianProcessRegressorScore( + TestSparseGaussianProcessRegressor): + + def test_score_matches_sklearn_performance(self): + print('') + sk_gpr = skGaussianProcessRegressor() + sk_gpr.fit(self.X_train, self.y_train) + sk_gpr_score = sk_gpr.score(self.X_test, self.y_test) + + self.advi_sgpr.fit(self.X_train, self.y_train) + advi_sgpr_score = self.advi_sgpr.score(self.X_test, self.y_test) + + npt.assert_almost_equal(sk_gpr_score, advi_sgpr_score, 0) + + +class TestSparseGaussianProcessRegressorSaveAndLoad( + TestSparseGaussianProcessRegressor): + + def test_save_and_load_work_correctly(self): + print('') + self.advi_sgpr.fit(self.X_train, self.y_train) + score1 = self.advi_sgpr.score(self.X_test, self.y_test) + self.advi_sgpr.save(self.test_dir) + + sgpr2 = SparseGaussianProcessRegressor() + sgpr2.load(self.test_dir) + + npt.assert_equal(self.advi_sgpr.inference_type, sgpr2.inference_type) + npt.assert_equal(self.advi_sgpr.num_pred, sgpr2.num_pred) + npt.assert_equal(self.advi_sgpr.num_training_samples, + sgpr2.num_training_samples) + pdt.assert_frame_equal(summary(self.advi_sgpr.trace), + summary(sgpr2.trace)) + + score2 = sgpr2.score(self.X_test, self.y_test) + npt.assert_almost_equal(score1, score2, 0) From 386b985586328630e7f19ead90c4691e448c0c1d Mon Sep 17 00:00:00 2001 From: Daniel Emaasit Date: Tue, 23 Oct 2018 02:17:54 -0400 Subject: [PATCH 03/11] able to merge (#11) * added tests for gp reg * fixed badges * added tests for students tp, sparse gp * cleaned redandunt imports (#9) From 5c74a2f817d7329fe1700753c72de3148e27dafc Mon Sep 17 00:00:00 2001 From: Daniel Emaasit Date: Tue, 23 Oct 2018 02:30:07 -0400 Subject: [PATCH 04/11] coveralls badge --- README.rst | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/README.rst b/README.rst index c952fcf..55420eb 100644 --- a/README.rst +++ b/README.rst @@ -262,8 +262,8 @@ project: https://github.com/parsing-science/pymc3_models. .. |Travis| image:: https://travis-ci.com/pymc-learn/pymc-learn.svg?branch=master :target: https://travis-ci.com/pymc-learn/pymc-learn -.. |Coverage| image:: https://coveralls.io/repos/github/pymc-learn/pymc-learn/badge.svg - :target: https://coveralls.io/github/pymc-learn/pymc-learn +.. |Coverage| image:: https://coveralls.io/repos/github/pymc-learn/pymc-learn/badge.svg?branch=master + :target: https://coveralls.io/github/pymc-learn/pymc-learn?branch=master .. |Python27| image:: https://img.shields.io/badge/python-2.7-blue.svg :target: https://badge.fury.io/py/pymc-learn From 2c1e9a55ee5971cd8cda83518cbcaafa3fb473fb Mon Sep 17 00:00:00 2001 From: Daniel Emaasit Date: Wed, 31 Oct 2018 23:15:21 -0400 Subject: [PATCH 05/11] updated docs with pypi install --- .travis.yml | 4 ++++ CONTRIBUTING.rst | 2 +- README.rst | 52 ++++++++++++++++++++++++++--------------- docs/why.rst | 60 ++++++++++++++++++++++++++++++++++++++++++++---- 4 files changed, 94 insertions(+), 24 deletions(-) diff --git a/.travis.yml b/.travis.yml index de6da6c..a6c913c 100644 --- a/.travis.yml +++ b/.travis.yml @@ -24,6 +24,10 @@ install: env: - PYTHON_VERSION=2.7 FLOATX='float64' RUN_PYLINT="true" TESTCMD="--durations=50 --cov-append pmlearn/tests/test_base.py pmlearn/linear_model/tests/test_base.py pmlearn/linear_model/tests/test_logistic.py --ignore=pmlearn/gaussian_process/tests/test_gpr.py --ignore=pmlearn/mixture/tests/test_gaussian_mixture.py --ignore=pmlearn/mixture/tests/test_dirichlet_process.py --ignore=pmlearn/naive_bayes/tests/test_naive_bayes.py --ignore=pmlearn/neural_network/test_multilayer_perceptron.py" - PYTHON_VERSION=2.7 FLOATX='float64' RUN_PYLINT="true" TESTCMD="--durations=50 --cov-append pmlearn/tests/test_base.py --ignore=pmlearn/linear_model/tests/test_base.py --ignore=pmlearn/linear_model/tests/test_logistic.py pmlearn/gaussian_process/tests/test_gpr.py --ignore=pmlearn/mixture/tests/test_gaussian_mixture.py --ignore=pmlearn/mixture/tests/test_dirichlet_process.py --ignore=pmlearn/naive_bayes/tests/test_naive_bayes.py --ignore=pmlearn/neural_network/test_multilayer_perceptron.py" + - PYTHON_VERSION=3.5 FLOATX='float64' RUN_PYLINT="true" TESTCMD="--durations=50 --cov-append pmlearn/tests/test_base.py pmlearn/linear_model/tests/test_base.py pmlearn/linear_model/tests/test_logistic.py --ignore=pmlearn/gaussian_process/tests/test_gpr.py --ignore=pmlearn/mixture/tests/test_gaussian_mixture.py --ignore=pmlearn/mixture/tests/test_dirichlet_process.py --ignore=pmlearn/naive_bayes/tests/test_naive_bayes.py --ignore=pmlearn/neural_network/test_multilayer_perceptron.py" + - PYTHON_VERSION=3.5 FLOATX='float64' RUN_PYLINT="true" TESTCMD="--durations=50 --cov-append pmlearn/tests/test_base.py --ignore=pmlearn/linear_model/tests/test_base.py --ignore=pmlearn/linear_model/tests/test_logistic.py pmlearn/gaussian_process/tests/test_gpr.py --ignore=pmlearn/mixture/tests/test_gaussian_mixture.py --ignore=pmlearn/mixture/tests/test_dirichlet_process.py --ignore=pmlearn/naive_bayes/tests/test_naive_bayes.py --ignore=pmlearn/neural_network/test_multilayer_perceptron.py" + - PYTHON_VERSION=3.5 FLOATX='float64' RUN_PYLINT="true" TESTCMD="--durations=50 --cov-append pmlearn/tests/test_base.py pmlearn/linear_model/tests/test_base.py pmlearn/linear_model/tests/test_logistic.py --ignore=pmlearn/gaussian_process/tests/test_gpr.py --ignore=pmlearn/mixture/tests/test_gaussian_mixture.py --ignore=pmlearn/mixture/tests/test_dirichlet_process.py --ignore=pmlearn/naive_bayes/tests/test_naive_bayes.py --ignore=pmlearn/neural_network/test_multilayer_perceptron.py" + - PYTHON_VERSION=3.5 FLOATX='float64' RUN_PYLINT="true" TESTCMD="--durations=50 --cov-append pmlearn/tests/test_base.py --ignore=pmlearn/linear_model/tests/test_base.py --ignore=pmlearn/linear_model/tests/test_logistic.py pmlearn/gaussian_process/tests/test_gpr.py --ignore=pmlearn/mixture/tests/test_gaussian_mixture.py --ignore=pmlearn/mixture/tests/test_dirichlet_process.py --ignore=pmlearn/naive_bayes/tests/test_naive_bayes.py --ignore=pmlearn/neural_network/test_multilayer_perceptron.py" - PYTHON_VERSION=3.6 FLOATX='float64' RUN_PYLINT="true" TESTCMD="--durations=50 --cov-append pmlearn/tests/test_base.py pmlearn/linear_model/tests/test_base.py pmlearn/linear_model/tests/test_logistic.py --ignore=pmlearn/gaussian_process/tests/test_gpr.py --ignore=pmlearn/mixture/tests/test_gaussian_mixture.py --ignore=pmlearn/mixture/tests/test_dirichlet_process.py --ignore=pmlearn/naive_bayes/tests/test_naive_bayes.py --ignore=pmlearn/neural_network/test_multilayer_perceptron.py" - PYTHON_VERSION=3.6 FLOATX='float64' RUN_PYLINT="true" TESTCMD="--durations=50 --cov-append pmlearn/tests/test_base.py --ignore=pmlearn/linear_model/tests/test_base.py --ignore=pmlearn/linear_model/tests/test_logistic.py pmlearn/gaussian_process/tests/test_gpr.py --ignore=pmlearn/mixture/tests/test_gaussian_mixture.py --ignore=pmlearn/mixture/tests/test_dirichlet_process.py --ignore=pmlearn/naive_bayes/tests/test_naive_bayes.py --ignore=pmlearn/neural_network/test_multilayer_perceptron.py" - PYTHON_VERSION=3.6 FLOATX='float64' RUN_PYLINT="true" TESTCMD="--durations=50 --cov-append pmlearn/tests/test_base.py pmlearn/linear_model/tests/test_base.py pmlearn/linear_model/tests/test_logistic.py --ignore=pmlearn/gaussian_process/tests/test_gpr.py --ignore=pmlearn/mixture/tests/test_gaussian_mixture.py --ignore=pmlearn/mixture/tests/test_dirichlet_process.py --ignore=pmlearn/naive_bayes/tests/test_naive_bayes.py --ignore=pmlearn/neural_network/test_multilayer_perceptron.py" diff --git a/CONTRIBUTING.rst b/CONTRIBUTING.rst index 21c15a0..c4faaa7 100644 --- a/CONTRIBUTING.rst +++ b/CONTRIBUTING.rst @@ -104,7 +104,7 @@ For the most part, this library follows PEP8 with a couple of exceptions. Notes: - Indent with 4 spaces -- Lines can be 120 characters long +- Lines should be a maximum of 80 characters long - Docstrings should be written as numpy docstrings - Your code should be Python 3 compatible - When in doubt, follow the style of the existing code \ No newline at end of file diff --git a/README.rst b/README.rst index 55420eb..b1d0806 100644 --- a/README.rst +++ b/README.rst @@ -27,8 +27,15 @@ What is pymc-learn? *pymc-learn is a library for practical probabilistic machine learning in Python*. -It provides probabilistic models in a syntax that mimics -`scikit-learn `_. +It provides a variety of state-of-the art probabilistic models for supervised +and unsupervised machine learning. **It is inspired by** +`scikit-learn `_ **and focuses on bringing probabilistic +machine learning to non-specialists**. It uses a syntax that mimics scikit-learn. +Emphasis is put on ease of use, productivity, flexibility, performance, +documentation, and an API consistent with scikit-learn. It depends on scikit-learn +and `PyMC3 `_ and is distributed under the new BSD-3 license, +encouraging its use in both academia and industry. + Users can now have calibrated quantities of uncertainty in their models using powerful inference algorithms -- such as MCMC or Variational inference -- provided by `PyMC3 `_. @@ -63,19 +70,29 @@ parameters and predictions. Quick Install ----------------- -You can install ``pymc-learn`` from source as follows: +You can install ``pymc-learn`` from PyPi using pip as follows: + +.. code-block:: bash + + pip install pymc-learn + + +Or from source as follows: .. code-block:: bash pip install git+https://github.com/pymc-learn/pymc-learn +.. CAUTION:: + ``pymc-learn`` is under heavy development. + Dependencies ................ ``pymc-learn`` is tested on Python 2.7, 3.5 & 3.6 and depends on Theano, -PyMC3, NumPy, SciPy, and Matplotlib (see ``requirements.txt`` for version -information). +PyMC3, Scikit-learn, NumPy, SciPy, and Matplotlib (see ``requirements.txt`` +for version information). ---- @@ -92,9 +109,9 @@ Quick Start >>> X, y = make_friedman2(n_samples=500, noise=0, random_state=0) >>> kernel = DotProduct() + WhiteKernel() >>> gpr = GaussianProcessRegressor(kernel=kernel).fit(X, y) - >>> gpr.score(X, y) # doctest: +ELLIPSIS + >>> gpr.score(X, y) 0.3680... - >>> gpr.predict(X[:2,:], return_std=True) # doctest: +ELLIPSIS + >>> gpr.predict(X[:2,:], return_std=True) (array([653.0..., 592.1...]), array([316.6..., 316.6...])) ---- @@ -124,18 +141,18 @@ Citing pymc-learn To cite ``pymc-learn`` in publications, please use the following:: - Pymc-learn Developers Team (2019). pymc-learn: Practical probabilistic machine - learning in Python. arXiv preprint arXiv:xxxx.xxxxx. Forthcoming. + Emaasit, Daniel (2018). Pymc-learn: Practical probabilistic machine + learning in Python. arXiv preprint arXiv:1810.xxxxx. Or using BibTex as follows: .. code-block:: latex - @article{Pymc-learn, - title={pymc-learn: Practical probabilistic machine learning in {P}ython}, - author={Pymc-learn Developers Team}, - journal={arXiv preprint arXiv:xxxx.xxxxx}, - year={2019} + @article{emaasit2018pymc, + title={Pymc-learn: Practical probabilistic machine learning in {P}ython}, + author={Emaasit, Daniel and others}, + journal={arXiv preprint arXiv:1810.xxxxx}, + year={2018} } If you want to cite ``pymc-learn`` for its API, you may also want to consider @@ -186,8 +203,7 @@ Index **User Guide** The main documentation. This contains an in-depth description of all models -and how to apply them. ``pymc-learn`` leverages the Base template provided by the PyMC3 Models -project: https://github.com/parsing-science/pymc3_models. +and how to apply them. * :doc:`user_guide` @@ -225,8 +241,8 @@ in a familiar scikit-learn syntax. **API Reference** -``pymc-learn`` leverages the Base template provided by the PyMC3 Models -project: https://github.com/parsing-science/pymc3_models. +``pymc-learn`` leverages and extends the Base template provided by the PyMC3 +Models project: https://github.com/parsing-science/pymc3_models. * :doc:`api` diff --git a/docs/why.rst b/docs/why.rst index c7b640c..d6136b7 100644 --- a/docs/why.rst +++ b/docs/why.rst @@ -14,11 +14,11 @@ you may be compelled to use ``pymc-learn``. pymc-learn prioritizes user experience --------------------------------------- -- ``pymc-learn`` mimics the syntax of `scikit-learn `_ -- a popular Python library for machine learning -- which has a consistent & simple API, and is very user friendly. +- *Familiarity*: ``pymc-learn`` mimics the syntax of `scikit-learn `_ -- a popular Python library for machine learning -- which has a consistent & simple API, and is very user friendly. -- This makes ``pymc-learn`` easy to learn and use for first-time users. +- *Ease of use*: This makes ``pymc-learn`` easy to learn and use for first-time users. -- For scikit-learn users, you don't have to completely rewrite your code. Your code looks almost the same. You are more productive, allowing you to try more ideas faster. +- *Productivity*: For scikit-learn users, you don't have to completely rewrite your code. Your code looks almost the same. You are more productive, allowing you to try more ideas faster. .. code-block:: python @@ -27,12 +27,56 @@ pymc-learn prioritizes user experience lr = LinearRegression() lr = LinearRegression() lr.fit(X, y) lr.fit(X, y) -- This ease of use does not come at the cost of reduced flexibility: because ``pymc-learn`` integrates with `PyMC3 `_, it enables you to implement anything you could have built in the base language. +- *Flexibility*: This ease of use does not come at the cost of reduced flexibility. Given that ``pymc-learn`` integrates with `PyMC3 `_, it enables you to implement anything you could have built in the base language. +- *Performance*. The primary inference algorithm is gradient-based automatic differentiation variational inference (ADVI) (Kucukelbir et al., 2017), which estimates a divergence measure between approximate and true posterior distributions. Pymc-learn scales to complex, high-dimensional models thanks to GPU-accelerated tensor math and reverse-mode automatic differentiation via Theano (Theano Development Team, 2016), and it scales to large datasets thanks to estimates computed over mini-batches of data in ADVI. ---- +Why do we need pymc-learn? +-------------------------- +Currently, there is a growing need for principled machine learning approaches by +non-specialists in many fields including the pure sciences (e.g. biology, physics, +chemistry), the applied sciences (e.g. political science, biostatistics), +engineering (e.g. transportation, mechanical), medicine (e.g. medical imaging), +the arts (e.g visual art), and software industries. + +This has lead to increased adoption of probabilistic modeling. This trend is +attributed in part to three major factors: + +(1) the need for transparent models with calibrated quantities of uncertainty, i.e. "models should know when they don't know", + +(2) the ever-increasing number of promising results achieved on a variety of fundamental problems in AI (Ghahramani, 2015), and + +(3) the emergency of probabilistic programming languages (PPLs) that provide a fexible framework to build richly structured probabilistic models that incorporate domain knowledge. + +However, usage of PPLs requires a specialized understanding of probability +theory, probabilistic graphical modeling, and probabilistic inference. Some PPLs +also require a good command of software coding. These requirements make it +difficult for non-specialists to adopt and apply probabilistic machine learning +to their domain problems. + +``Pymc-learn`` seeks to address these challenges by providing state-of-the art +implementations of several popular probabilistic machine learning models. +**It is inspired by scikit-learn** (Pedregosa et al., 2011) **and focuses on +bringing probabilistic machine learning to non-specialists**. It puts emphasis +on: + +(1) ease of use, + +(2) productivity, + +(3) fexibility, + +(4) performance, + +(5) documentation, and + +(6) an API consistent with scikit-learn. + +The underlying probabilistic models are built using pymc3 (Salvatier et al., 2016). + Python is the lingua franca of Data Science -------------------------------------------- @@ -137,4 +181,10 @@ References 4. Barber, D. (2012). Bayesian reasoning and machine learning. Cambridge University Press. -5. Salvatier, J., Wiecki, T. V., & Fonnesbeck, C. (2016). Probabilistic programming in Python using PyMC3. PeerJ Computer Science, 2, e55. \ No newline at end of file +5. Salvatier, J., Wiecki, T. V., & Fonnesbeck, C. (2016). Probabilistic programming in Python using PyMC3. PeerJ Computer Science, 2, e55. + +6. Alp Kucukelbir, Dustin Tran, Rajesh Ranganath, Andrew Gelman, and David M Blei. Automatic differentiation variational inference. The Journal of Machine Learning Research, 18(1):430{474, 2017. + +7. Fabian Pedregosa, Gael Varoquaux, Alexandre Gramfort, Vincent Michel, Bertrand Thirion, Olivier Grisel, Mathieu Blondel, Peter Prettenhofer, Ron Weiss, Vincent Dubourg, et al. Scikit-learn: Machine learning in python. Journal of machine learning research, 12(Oct): 2825-2830, 2011. + +8. Theano Development Team. Theano: A Python framework for fast computation of mathematical expressions. arXiv e-prints, abs/1605.02688, May 2016. URL http://arxiv.org/abs/1605.02688. \ No newline at end of file From 78efa95871fce1509b64c4233946b7ef4b58edc0 Mon Sep 17 00:00:00 2001 From: Daniel Emaasit Date: Mon, 5 Nov 2018 13:04:07 -0500 Subject: [PATCH 06/11] updated docs with arxiv reference and pymc4 note --- CONTRIBUTING.rst | 11 ++++++++++- README.rst | 14 ++++++++++++-- docs/cite.rst | 14 +++++++------- docs/develop.rst | 9 +++++++++ docs/install.rst | 30 +++++++++++++++++++++++++++--- docs/why.rst | 11 ++++++++++- 6 files changed, 75 insertions(+), 14 deletions(-) diff --git a/CONTRIBUTING.rst b/CONTRIBUTING.rst index c4faaa7..54337c2 100644 --- a/CONTRIBUTING.rst +++ b/CONTRIBUTING.rst @@ -107,4 +107,13 @@ Notes: - Lines should be a maximum of 80 characters long - Docstrings should be written as numpy docstrings - Your code should be Python 3 compatible -- When in doubt, follow the style of the existing code \ No newline at end of file +- When in doubt, follow the style of the existing code + +Transitioning from PyMC3 to PyMC4 +----------------------------------- + +.. raw:: html + + + + \ No newline at end of file diff --git a/README.rst b/README.rst index b1d0806..6965abf 100644 --- a/README.rst +++ b/README.rst @@ -46,6 +46,16 @@ created. ``pymc-learn`` leverages and extends the Base template provided by the PyMC3 Models project: https://github.com/parsing-science/pymc3_models + +Transitioning from PyMC3 to PyMC4 +.................................. + +.. raw:: html + + + + + ---- Familiar user interface @@ -142,7 +152,7 @@ Citing pymc-learn To cite ``pymc-learn`` in publications, please use the following:: Emaasit, Daniel (2018). Pymc-learn: Practical probabilistic machine - learning in Python. arXiv preprint arXiv:1810.xxxxx. + learning in Python. arXiv preprint arXiv:1811.00542. Or using BibTex as follows: @@ -151,7 +161,7 @@ Or using BibTex as follows: @article{emaasit2018pymc, title={Pymc-learn: Practical probabilistic machine learning in {P}ython}, author={Emaasit, Daniel and others}, - journal={arXiv preprint arXiv:1810.xxxxx}, + journal={arXiv preprint arXiv:1811.00542}, year={2018} } diff --git a/docs/cite.rst b/docs/cite.rst index 413794f..45367b9 100644 --- a/docs/cite.rst +++ b/docs/cite.rst @@ -3,18 +3,18 @@ Citations To cite ``pymc-learn`` in publications, please use the following:: - Pymc-learn Developers Team (2019). pymc-learn: Practical probabilistic machine - learning in Python. arXiv preprint arXiv:xxxx.xxxxx. Forthcoming. + Emaasit, Daniel (2018). Pymc-learn: Practical probabilistic machine + learning in Python. arXiv preprint arXiv:1811.00542. Or using BibTex as follows: .. code-block:: latex - @article{Pymc-learn, - title={pymc-learn: Practical probabilistic machine learning in {P}ython}, - author={Pymc-learn Developers Team}, - journal={arXiv preprint arXiv:xxxx.xxxxx}, - year={2019} + @article{emaasit2018pymc, + title={Pymc-learn: Practical probabilistic machine learning in {P}ython}, + author={Emaasit, Daniel and others}, + journal={arXiv preprint arXiv:1811.00542}, + year={2018} } If you want to cite ``pymc-learn`` for its API, you may also want to consider diff --git a/docs/develop.rst b/docs/develop.rst index 302fa93..8202cc4 100644 --- a/docs/develop.rst +++ b/docs/develop.rst @@ -118,3 +118,12 @@ Contact To report an issue with ``pymc-learn`` please use the `issue tracker `__. Finally, if you need to get in touch for information about the project, `send us an e-mail `__. + +Transitioning from PyMC3 to PyMC4 +----------------------------------- + +.. raw:: html + + + + \ No newline at end of file diff --git a/docs/install.rst b/docs/install.rst index 538f4b7..68a3fa5 100644 --- a/docs/install.rst +++ b/docs/install.rst @@ -5,12 +5,36 @@ Install pymc-learn It is recommend installing Python and key numerical libraries using the `Anaconda Distribution `_, which has one-click installers available on all major platforms. -Assuming a standard Python environment is installed on your machine (including pip), ``pymc-learn`` itself can be installed in one line using pip: +Assuming a standard Python environment is installed on your machine +(including pip), ``pymc-learn`` itself can be installed in one line using pip: -.. code-block:: python +You can install ``pymc-learn`` from PyPi using pip as follows: + +.. code-block:: bash + + pip install pymc-learn + + +Or from source as follows: + +.. code-block:: bash + + pip install git+https://github.com/pymc-learn/pymc-learn + + +.. CAUTION:: + ``pymc-learn`` is under heavy development. - pip install git+https://github.com/pymc-learn/pymc-learn This also installs required dependencies including Theano. For alternative Theano installations (e.g., gpu), please see the instructions on the main `Theano webpage `_. + +Transitioning from PyMC3 to PyMC4 +.................................. + +.. raw:: html + + + + \ No newline at end of file diff --git a/docs/why.rst b/docs/why.rst index d6136b7..4723631 100644 --- a/docs/why.rst +++ b/docs/why.rst @@ -78,6 +78,15 @@ on: The underlying probabilistic models are built using pymc3 (Salvatier et al., 2016). +Transitioning from PyMC3 to PyMC4 +.................................. + +.. raw:: html + + + + + Python is the lingua franca of Data Science -------------------------------------------- @@ -171,7 +180,7 @@ such as `NIPS `_, `UAI `_, ---- References -........... +------------ 1. Ghahramani, Z. (2015). Probabilistic machine learning and artificial intelligence. Nature, 521(7553), 452. From af9875c8721b6200bb45ff3dd2ee4ab9d91ef29f Mon Sep 17 00:00:00 2001 From: Daniel Emaasit Date: Mon, 5 Nov 2018 14:13:15 -0500 Subject: [PATCH 07/11] write tests using pytest --- CONTRIBUTING.rst | 20 +++++++++++++------- docs/develop.rst | 15 +++++++-------- 2 files changed, 20 insertions(+), 15 deletions(-) diff --git a/CONTRIBUTING.rst b/CONTRIBUTING.rst index 54337c2..486b324 100644 --- a/CONTRIBUTING.rst +++ b/CONTRIBUTING.rst @@ -1,6 +1,5 @@ -Thank you for considering contributing to ``pymc-learn``! This project is intended to be a space where anyone can share models they've built. - -Please read these guidelines before submitting anything to the project. As of the first release, I'm the only person working on this project so respecting these guidelines will help me get back to you more quickly. +Thank you for considering contributing to ``pymc-learn``! Please read these +guidelines before submitting anything to the project. Some ways to contribute: @@ -62,7 +61,7 @@ in case there have been any changes: git fetch upstream git rebase upstream/master -Then push the changes to your Gitlab account with: +Then push the changes to your Github account with: .. code-block:: bash @@ -75,8 +74,8 @@ Pull Request Checklist ................................ - Ensure your code has followed the Style Guidelines below -- Make sure you have written unittests where appropriate -- Make sure the unittests pass +- Make sure you have written tests where appropriate +- Make sure the tests pass .. code-block:: bash @@ -104,11 +103,18 @@ For the most part, this library follows PEP8 with a couple of exceptions. Notes: - Indent with 4 spaces -- Lines should be a maximum of 80 characters long +- Lines can be 80 characters long - Docstrings should be written as numpy docstrings - Your code should be Python 3 compatible - When in doubt, follow the style of the existing code +Contact +............. + +To report an issue with ``pymc-learn`` please use the `issue tracker `__. + +Finally, if you need to get in touch for information about the project, `send us an e-mail `__. + Transitioning from PyMC3 to PyMC4 ----------------------------------- diff --git a/docs/develop.rst b/docs/develop.rst index 8202cc4..b1ed321 100644 --- a/docs/develop.rst +++ b/docs/develop.rst @@ -1,9 +1,8 @@ Contributing ============= -Thank you for considering contributing to ``pymc-learn``! This project is intended to be a space where anyone can share models they've built. - -Please read these guidelines before submitting anything to the project. As of the first release, I'm the only person working on this project so respecting these guidelines will help me get back to you more quickly. +Thank you for considering contributing to ``pymc-learn``! Please read these +guidelines before submitting anything to the project. Some ways to contribute: @@ -65,7 +64,7 @@ in case there have been any changes: git fetch upstream git rebase upstream/master -Then push the changes to your Gitlab account with: +Then push the changes to your Github account with: .. code-block:: bash @@ -78,13 +77,13 @@ Pull Request Checklist ................................ - Ensure your code has followed the Style Guidelines below -- Make sure you have written unittests where appropriate -- Make sure the unittests pass +- Make sure you have written tests where appropriate +- Make sure the tests pass .. code-block:: bash conda activate myenv - python -m unittest discover -cv + python -m pytest NOTE: On Windows, in your Anaconda Prompt, run ``activate myenv``. @@ -107,7 +106,7 @@ For the most part, this library follows PEP8 with a couple of exceptions. Notes: - Indent with 4 spaces -- Lines can be 120 characters long +- Lines can be 80 characters long - Docstrings should be written as numpy docstrings - Your code should be Python 3 compatible - When in doubt, follow the style of the existing code From 48f00006d7129df7718305d59fdbd30aa41a0708 Mon Sep 17 00:00:00 2001 From: Daniel Emaasit Date: Mon, 26 Nov 2018 09:13:13 -0500 Subject: [PATCH 08/11] dependent packages were pinned too specifically --- pmlearn/__init__.py | 2 +- requirements-dev.txt | 6 +++--- requirements.txt | 18 +++++++++--------- 3 files changed, 13 insertions(+), 13 deletions(-) diff --git a/pmlearn/__init__.py b/pmlearn/__init__.py index 3e3d1ac..41a115e 100644 --- a/pmlearn/__init__.py +++ b/pmlearn/__init__.py @@ -10,7 +10,7 @@ See http://pymc-learn.org for complete documentation. """ -__version__ = '0.0.1.rc0' +__version__ = '0.0.1.rc2' __all__ = ['gaussian_process', 'linear_model', diff --git a/requirements-dev.txt b/requirements-dev.txt index 1fc7ced..4db3e1a 100644 --- a/requirements-dev.txt +++ b/requirements-dev.txt @@ -1,4 +1,4 @@ -CommonMark==0.5.4 +CommonMark>=0.5.4 flake8>=3.5.0 # gpflowopt>=1.1 jupyter-sphinx>=0.1.3 @@ -11,6 +11,6 @@ pytest-cov>=2.5.1 pytest>=3.0.7 recommonmark>=0.4.0 sphinx>=1.5.5 -sphinx-autobuild==0.7.1 -sphinx-rtd-theme==0.4.2 +sphinx-autobuild>=0.7.1 +sphinx-rtd-theme>=0.4.2 pymc_learn_sphinx_theme>=0.1.5 diff --git a/requirements.txt b/requirements.txt index 8efdd10..8b22fe9 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,13 +1,13 @@ future>=0.16.0 -joblib==0.11 -matplotlib==2.1.1 -numpy==1.13.1 -numpydoc==0.7.0 -pandas==0.21.1 -pymc3==3.4.1 -scikit-learn==0.19.1 -scipy==1.0.0 -seaborn==0.8.1 +joblib>=0.11 +matplotlib>=2.1.1 +numpy>=1.13.1 +numpydoc>=0.7.0 +pandas>=0.21.1 +pymc3>=3.4.1 +scikit-learn>=0.19.1 +scipy>=1.0.0 +seaborn>=0.8.1 six>=1.10.0 theano>=1.0.0 tqdm>=4.8.4 From 3b309bc8f7f5ab200e72d000fc2d2286bb4d8a5c Mon Sep 17 00:00:00 2001 From: Daniel Emaasit Date: Mon, 26 Nov 2018 10:48:54 -0500 Subject: [PATCH 09/11] install in conda env --- README.rst | 21 +++++++++++++++++++-- docs/install.rst | 10 ++++++++-- docs/modules/neural_networks.rst | 11 ++++++----- setup.cfg | 5 ++++- 4 files changed, 37 insertions(+), 10 deletions(-) diff --git a/README.rst b/README.rst index 6965abf..2a9e343 100644 --- a/README.rst +++ b/README.rst @@ -6,7 +6,7 @@ pymc-learn: Practical Probabilistic Machine Learning in Python :alt: Pymc-Learn logo :align: center -|Travis| |Coverage| |Docs| |License| |Pypi| |Binder| +|status| |Travis| |Coverage| |Docs| |License| |Pypi| |Binder| **Contents:** @@ -80,6 +80,13 @@ parameters and predictions. Quick Install ----------------- +``pymc-learn`` requires a working Python interpreter (2.7 or 3.5+). +It is recommend installing Python and key numerical libraries using the `Anaconda Distribution `_, +which has one-click installers available on all major platforms. + +Assuming a standard Python environment is installed on your machine +(including pip), ``pymc-learn`` itself can be installed in one line using pip: + You can install ``pymc-learn`` from PyPi using pip as follows: .. code-block:: bash @@ -97,6 +104,14 @@ Or from source as follows: .. CAUTION:: ``pymc-learn`` is under heavy development. + It is recommended installing ``pymc-learn`` in a Conda environment because it + provides `Math Kernel Library `_ (MKL) + routines to accelerate math functions. If you are having trouble, try using + a distribution of Python that includes these packages like + `Anaconda `_. + + + Dependencies ................ @@ -282,7 +297,7 @@ Models project: https://github.com/parsing-science/pymc3_models. changelog.rst cite.rst -.. |Binder| image:: https://mybinder.org/badge.svg +.. |Binder| image:: https://img.shields.io/badge/try-online-579ACA.svg?logo= :target: https://mybinder.org/v2/gh/pymc-learn/pymc-learn/master?filepath=%2Fdocs%2Fnotebooks?urlpath=lab .. |Travis| image:: https://travis-ci.com/pymc-learn/pymc-learn.svg?branch=master @@ -307,3 +322,5 @@ Models project: https://github.com/parsing-science/pymc3_models. .. |Pypi| image:: https://badge.fury.io/py/pymc-learn.svg :target: https://badge.fury.io/py/pymc-learn + +.. |status| image:: https://img.shields.io/badge/Status-Beta-blue.svg \ No newline at end of file diff --git a/docs/install.rst b/docs/install.rst index 68a3fa5..9050891 100644 --- a/docs/install.rst +++ b/docs/install.rst @@ -1,8 +1,8 @@ Install pymc-learn =================== -``pymc-learn`` requires a working Python interpreter (2.7 or 3.3+). -It is recommend installing Python and key numerical libraries using the `Anaconda Distribution `_, +``pymc-learn`` requires a working Python interpreter (2.7 or 3.5+). +It is recommend installing Python and key numerical libraries using the `Anaconda Distribution `_, which has one-click installers available on all major platforms. Assuming a standard Python environment is installed on your machine @@ -25,6 +25,12 @@ Or from source as follows: .. CAUTION:: ``pymc-learn`` is under heavy development. + It is recommended installing ``pymc-learn`` in a Conda environment because it + provides `Math Kernel Library `_ (MKL) + routines to accelerate math functions. If you are having trouble, try using + a distribution of Python that includes these packages like + `Anaconda `_. + This also installs required dependencies including Theano. For alternative Theano installations (e.g., gpu), please see the diff --git a/docs/modules/neural_networks.rst b/docs/modules/neural_networks.rst index 965ecf8..67082ae 100644 --- a/docs/modules/neural_networks.rst +++ b/docs/modules/neural_networks.rst @@ -7,12 +7,13 @@ Neural network models (supervised) .. currentmodule:: pmlearn.neural_network -.. warning:: +.. NOTE:: - This implementation is not intended for large-scale applications. In particular, - scikit-learn offers no GPU support. For much faster, GPU-based implementations, - as well as frameworks offering much more flexibility to build deep learning - architectures, see :ref:`related_projects`. + Unlike scikit-learn, this implementation of neural networks in pymc-learn is + intended for large-scale applications. Pymc-learn relies on Theano for GPU + support. + + scikit-learn offers no GPU support. .. _multilayer_perceptron: diff --git a/setup.cfg b/setup.cfg index 38b775c..c291257 100644 --- a/setup.cfg +++ b/setup.cfg @@ -13,4 +13,7 @@ python_files = test_*.py [pydocstyle] add-ignore = D100,D104 -convention = numpy \ No newline at end of file +convention = numpy + +[bdist_wheel] +universal=1 \ No newline at end of file From abde49e7e84fbe52849fe6ccebcedc9d85564d1c Mon Sep 17 00:00:00 2001 From: Daniel Emaasit Date: Mon, 3 Dec 2018 11:20:29 -0500 Subject: [PATCH 10/11] add constant mean function for gpr --- pmlearn/__init__.py | 2 +- pmlearn/gaussian_process/gpr.py | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/pmlearn/__init__.py b/pmlearn/__init__.py index 41a115e..98e3329 100644 --- a/pmlearn/__init__.py +++ b/pmlearn/__init__.py @@ -10,7 +10,7 @@ See http://pymc-learn.org for complete documentation. """ -__version__ = '0.0.1.rc2' +__version__ = '0.0.1.rc3' __all__ = ['gaussian_process', 'linear_model', diff --git a/pmlearn/gaussian_process/gpr.py b/pmlearn/gaussian_process/gpr.py index 4f41373..0c06a03 100644 --- a/pmlearn/gaussian_process/gpr.py +++ b/pmlearn/gaussian_process/gpr.py @@ -144,7 +144,7 @@ def create_model(self): if self.prior_mean is None: mean_function = pm.gp.mean.Zero() else: - mean_function = self.prior_mean + mean_function = pm.gp.mean.Constant(c=self.prior_mean) self.gp = pm.gp.Latent(mean_func=mean_function, cov_func=cov_function) @@ -259,7 +259,7 @@ def create_model(self): if self.prior_mean is None: mean_function = pm.gp.mean.Zero() else: - mean_function = self.prior_mean + mean_function = pm.gp.mean.Constant(c=self.prior_mean) self.gp = pm.gp.Latent(mean_func=mean_function, cov_func=cov_function) @@ -373,7 +373,7 @@ def create_model(self): if self.prior_mean is None: mean_function = pm.gp.mean.Zero() else: - mean_function = self.prior_mean + mean_function = pm.gp.mean.Constant(c=self.prior_mean) self.gp = pm.gp.MarginalSparse(mean_func=mean_function, cov_func=cov_function, From 24c8534f2e4ae815e4e7d2aa23a1ebf003b191ac Mon Sep 17 00:00:00 2001 From: Daniel Emaasit Date: Thu, 20 Jun 2019 11:50:54 -0400 Subject: [PATCH 11/11] fixed pymc3 and theano versions fixed pymc3 to version 3.4.1 and theano to version 1.0.4 --- requirements.txt | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/requirements.txt b/requirements.txt index 8b22fe9..4390a4b 100644 --- a/requirements.txt +++ b/requirements.txt @@ -4,10 +4,10 @@ matplotlib>=2.1.1 numpy>=1.13.1 numpydoc>=0.7.0 pandas>=0.21.1 -pymc3>=3.4.1 +pymc3==3.4.1 scikit-learn>=0.19.1 scipy>=1.0.0 seaborn>=0.8.1 six>=1.10.0 -theano>=1.0.0 +theano>=1.0.4 tqdm>=4.8.4