pymc-learn
diff --git a/‎.travis.yml
Copy file name to clipboardExpand all lines: .travis.yml
+4Lines changed: 4 additions & 0 deletions b/‎.travis.yml
Copy file name to clipboardExpand all lines: .travis.yml
+4Lines changed: 4 additions & 0 deletions
diff --git a/‎CONTRIBUTING.rst
Copy file name to clipboardExpand all lines: CONTRIBUTING.rst
+1-1Lines changed: 1 addition & 1 deletion b/‎CONTRIBUTING.rst
Copy file name to clipboardExpand all lines: CONTRIBUTING.rst
+1-1Lines changed: 1 addition & 1 deletion
diff --git a/‎README.rst
Copy file name to clipboardExpand all lines: README.rst
+34-18Lines changed: 34 additions & 18 deletions b/‎README.rst
Copy file name to clipboardExpand all lines: README.rst
+34-18Lines changed: 34 additions & 18 deletions
diff --git a/‎docs/why.rst
Copy file name to clipboardExpand all lines: docs/why.rst
+55-5Lines changed: 55 additions & 5 deletions b/‎docs/why.rst
Copy file name to clipboardExpand all lines: docs/why.rst
+55-5Lines changed: 55 additions & 5 deletions
@@ -24,6 +24,10 @@ install:
 env:
   - PYTHON_VERSION=2.7 FLOATX='float64' RUN_PYLINT="true" TESTCMD="--durations=50 --cov-append pmlearn/tests/test_base.py pmlearn/linear_model/tests/test_base.py pmlearn/linear_model/tests/test_logistic.py --ignore=pmlearn/gaussian_process/tests/test_gpr.py --ignore=pmlearn/mixture/tests/test_gaussian_mixture.py --ignore=pmlearn/mixture/tests/test_dirichlet_process.py --ignore=pmlearn/naive_bayes/tests/test_naive_bayes.py --ignore=pmlearn/neural_network/test_multilayer_perceptron.py"
   - PYTHON_VERSION=2.7 FLOATX='float64' RUN_PYLINT="true" TESTCMD="--durations=50 --cov-append pmlearn/tests/test_base.py --ignore=pmlearn/linear_model/tests/test_base.py --ignore=pmlearn/linear_model/tests/test_logistic.py pmlearn/gaussian_process/tests/test_gpr.py --ignore=pmlearn/mixture/tests/test_gaussian_mixture.py --ignore=pmlearn/mixture/tests/test_dirichlet_process.py --ignore=pmlearn/naive_bayes/tests/test_naive_bayes.py --ignore=pmlearn/neural_network/test_multilayer_perceptron.py"
+  - PYTHON_VERSION=3.5 FLOATX='float64' RUN_PYLINT="true" TESTCMD="--durations=50 --cov-append pmlearn/tests/test_base.py pmlearn/linear_model/tests/test_base.py pmlearn/linear_model/tests/test_logistic.py --ignore=pmlearn/gaussian_process/tests/test_gpr.py --ignore=pmlearn/mixture/tests/test_gaussian_mixture.py --ignore=pmlearn/mixture/tests/test_dirichlet_process.py --ignore=pmlearn/naive_bayes/tests/test_naive_bayes.py --ignore=pmlearn/neural_network/test_multilayer_perceptron.py"
+  - PYTHON_VERSION=3.5 FLOATX='float64' RUN_PYLINT="true" TESTCMD="--durations=50 --cov-append pmlearn/tests/test_base.py --ignore=pmlearn/linear_model/tests/test_base.py --ignore=pmlearn/linear_model/tests/test_logistic.py pmlearn/gaussian_process/tests/test_gpr.py --ignore=pmlearn/mixture/tests/test_gaussian_mixture.py --ignore=pmlearn/mixture/tests/test_dirichlet_process.py --ignore=pmlearn/naive_bayes/tests/test_naive_bayes.py --ignore=pmlearn/neural_network/test_multilayer_perceptron.py"
+  - PYTHON_VERSION=3.5 FLOATX='float64' RUN_PYLINT="true" TESTCMD="--durations=50 --cov-append pmlearn/tests/test_base.py pmlearn/linear_model/tests/test_base.py pmlearn/linear_model/tests/test_logistic.py --ignore=pmlearn/gaussian_process/tests/test_gpr.py --ignore=pmlearn/mixture/tests/test_gaussian_mixture.py --ignore=pmlearn/mixture/tests/test_dirichlet_process.py --ignore=pmlearn/naive_bayes/tests/test_naive_bayes.py --ignore=pmlearn/neural_network/test_multilayer_perceptron.py"
+  - PYTHON_VERSION=3.5 FLOATX='float64' RUN_PYLINT="true" TESTCMD="--durations=50 --cov-append pmlearn/tests/test_base.py --ignore=pmlearn/linear_model/tests/test_base.py --ignore=pmlearn/linear_model/tests/test_logistic.py pmlearn/gaussian_process/tests/test_gpr.py --ignore=pmlearn/mixture/tests/test_gaussian_mixture.py --ignore=pmlearn/mixture/tests/test_dirichlet_process.py --ignore=pmlearn/naive_bayes/tests/test_naive_bayes.py --ignore=pmlearn/neural_network/test_multilayer_perceptron.py"
   - PYTHON_VERSION=3.6 FLOATX='float64' RUN_PYLINT="true" TESTCMD="--durations=50 --cov-append pmlearn/tests/test_base.py pmlearn/linear_model/tests/test_base.py pmlearn/linear_model/tests/test_logistic.py --ignore=pmlearn/gaussian_process/tests/test_gpr.py --ignore=pmlearn/mixture/tests/test_gaussian_mixture.py --ignore=pmlearn/mixture/tests/test_dirichlet_process.py --ignore=pmlearn/naive_bayes/tests/test_naive_bayes.py --ignore=pmlearn/neural_network/test_multilayer_perceptron.py"
   - PYTHON_VERSION=3.6 FLOATX='float64' RUN_PYLINT="true" TESTCMD="--durations=50 --cov-append pmlearn/tests/test_base.py --ignore=pmlearn/linear_model/tests/test_base.py --ignore=pmlearn/linear_model/tests/test_logistic.py pmlearn/gaussian_process/tests/test_gpr.py --ignore=pmlearn/mixture/tests/test_gaussian_mixture.py --ignore=pmlearn/mixture/tests/test_dirichlet_process.py --ignore=pmlearn/naive_bayes/tests/test_naive_bayes.py --ignore=pmlearn/neural_network/test_multilayer_perceptron.py"
   - PYTHON_VERSION=3.6 FLOATX='float64' RUN_PYLINT="true" TESTCMD="--durations=50 --cov-append pmlearn/tests/test_base.py pmlearn/linear_model/tests/test_base.py pmlearn/linear_model/tests/test_logistic.py --ignore=pmlearn/gaussian_process/tests/test_gpr.py --ignore=pmlearn/mixture/tests/test_gaussian_mixture.py --ignore=pmlearn/mixture/tests/test_dirichlet_process.py --ignore=pmlearn/naive_bayes/tests/test_naive_bayes.py --ignore=pmlearn/neural_network/test_multilayer_perceptron.py"
 
@@ -104,7 +104,7 @@ For the most part, this library follows PEP8 with a couple of exceptions.
 Notes:
 
 - Indent with 4 spaces
-- Lines can be 120 characters long
+- Lines should be a maximum of 80 characters long
 - Docstrings should be written as numpy docstrings
 - Your code should be Python 3 compatible
 - When in doubt, follow the style of the existing code
@@ -27,8 +27,15 @@ What is pymc-learn?
 *pymc-learn is a library for practical probabilistic
 machine learning in Python*.
 
-It provides probabilistic models in a syntax that mimics
-`scikit-learn <http://scikit-learn.org>`_.
+It provides a variety of state-of-the art probabilistic models for supervised
+and unsupervised machine learning. **It is inspired by**
+`scikit-learn <http://scikit-learn.org>`_ **and focuses on bringing probabilistic
+machine learning to non-specialists**. It uses a syntax that mimics scikit-learn.
+Emphasis is put on ease of use, productivity, flexibility, performance,
+documentation, and an API consistent with scikit-learn. It depends on scikit-learn
+and `PyMC3 <https://docs.pymc.io/>`_ and is distributed under the new BSD-3 license,
+encouraging its use in both academia and industry.
+
 Users can now have calibrated quantities of uncertainty in their models
 using powerful inference algorithms -- such as MCMC or Variational inference --
 provided by `PyMC3 <https://docs.pymc.io/>`_.
@@ -63,19 +70,29 @@ parameters and predictions.
 Quick Install
 -----------------
 
-You can install ``pymc-learn`` from source as follows:
+You can install ``pymc-learn`` from PyPi using pip as follows:
+
+.. code-block:: bash
+
+   pip install pymc-learn
+
+
+Or from source as follows:
 
 .. code-block:: bash
 
    pip install git+https://github.com/pymc-learn/pymc-learn
 
 
+.. CAUTION::
+   ``pymc-learn`` is under heavy development.
+
 Dependencies
 ................
 
 ``pymc-learn`` is tested on Python 2.7, 3.5 & 3.6 and depends on Theano,
-PyMC3, NumPy, SciPy, and Matplotlib (see ``requirements.txt`` for version
-information).
+PyMC3, Scikit-learn, NumPy, SciPy, and Matplotlib (see ``requirements.txt``
+for version information).
 
 ----
 
@@ -92,9 +109,9 @@ Quick Start
     >>> X, y = make_friedman2(n_samples=500, noise=0, random_state=0)
     >>> kernel = DotProduct() + WhiteKernel()
     >>> gpr = GaussianProcessRegressor(kernel=kernel).fit(X, y)
-    >>> gpr.score(X, y) # doctest: +ELLIPSIS
+    >>> gpr.score(X, y)
     0.3680...
-    >>> gpr.predict(X[:2,:], return_std=True) # doctest: +ELLIPSIS
+    >>> gpr.predict(X[:2,:], return_std=True)
     (array([653.0..., 592.1...]), array([316.6..., 316.6...]))
 
 ----
@@ -124,18 +141,18 @@ Citing pymc-learn
 
 To cite ``pymc-learn`` in publications, please use the following::
 
-   Pymc-learn Developers Team (2019). pymc-learn: Practical probabilistic machine
-   learning in Python. arXiv preprint arXiv:xxxx.xxxxx. Forthcoming.
+   Emaasit, Daniel (2018). Pymc-learn: Practical probabilistic machine
+   learning in Python. arXiv preprint arXiv:1810.xxxxx.
 
 Or using BibTex as follows:
 
 .. code-block:: latex
 
-    @article{Pymc-learn,
-      title={pymc-learn: Practical probabilistic machine learning in {P}ython},
-      author={Pymc-learn Developers Team},
-      journal={arXiv preprint arXiv:xxxx.xxxxx},
-      year={2019}
+    @article{emaasit2018pymc,
+      title={Pymc-learn: Practical probabilistic machine learning in {P}ython},
+      author={Emaasit, Daniel and others},
+      journal={arXiv preprint arXiv:1810.xxxxx},
+      year={2018}
     }
 
 If you want to cite ``pymc-learn`` for its API, you may also want to consider
@@ -186,8 +203,7 @@ Index
 **User Guide**
 
 The main documentation. This contains an in-depth description of all models
-and how to apply them. ``pymc-learn`` leverages the Base template provided by the PyMC3 Models
-project: https://github.com/parsing-science/pymc3_models.
+and how to apply them.
 
 * :doc:`user_guide`
 
@@ -225,8 +241,8 @@ in a familiar scikit-learn syntax.
 
 **API Reference**
 
-``pymc-learn`` leverages the Base template provided by the PyMC3 Models
-project: https://github.com/parsing-science/pymc3_models.
+``pymc-learn`` leverages and extends the Base template provided by the PyMC3
+Models project: https://github.com/parsing-science/pymc3_models.
 
 * :doc:`api`
 
 
@@ -14,11 +14,11 @@ you may be compelled to use ``pymc-learn``.
 pymc-learn prioritizes user experience
 ---------------------------------------
 
-- ``pymc-learn`` mimics the syntax of `scikit-learn <https://scikit-learn.org>`_ -- a popular Python library for machine learning -- which has a consistent & simple API, and is very user friendly.
+- *Familiarity*: ``pymc-learn`` mimics the syntax of `scikit-learn <https://scikit-learn.org>`_ -- a popular Python library for machine learning -- which has a consistent & simple API, and is very user friendly.
 
-- This makes ``pymc-learn`` easy to learn and use for first-time users.
+- *Ease of use*: This makes ``pymc-learn`` easy to learn and use for first-time users.
 
-- For scikit-learn users, you don't have to completely rewrite your code. Your code looks almost the same. You are more productive, allowing you to try more ideas faster.
+- *Productivity*: For scikit-learn users, you don't have to completely rewrite your code. Your code looks almost the same. You are more productive, allowing you to try more ideas faster.
 
 .. code-block:: python
 
@@ -27,12 +27,56 @@ pymc-learn prioritizes user experience
     lr = LinearRegression()                             lr = LinearRegression()
     lr.fit(X, y)                                        lr.fit(X, y)
 
-- This ease of use does not come at the cost of reduced flexibility: because ``pymc-learn`` integrates with `PyMC3 <https://docs.pymc.io>`_, it enables you to implement anything you could have built in the base language.
+- *Flexibility*: This ease of use does not come at the cost of reduced flexibility. Given that ``pymc-learn`` integrates with `PyMC3 <https://docs.pymc.io>`_, it enables you to implement anything you could have built in the base language.
 
+- *Performance*. The primary inference algorithm is gradient-based automatic differentiation variational inference (ADVI) (Kucukelbir et al., 2017), which estimates a divergence measure between approximate and true posterior distributions. Pymc-learn scales to complex, high-dimensional models thanks to GPU-accelerated tensor math and reverse-mode automatic differentiation via Theano (Theano Development Team, 2016), and it scales to large datasets thanks to estimates computed over mini-batches of data in ADVI.
 
 ----
 
 
+Why do we need pymc-learn?
+--------------------------
+Currently, there is a growing need for principled machine learning approaches by
+non-specialists in many fields including the pure sciences (e.g. biology, physics,
+chemistry), the applied sciences (e.g. political science, biostatistics),
+engineering (e.g. transportation, mechanical), medicine (e.g. medical imaging),
+the arts (e.g visual art), and software industries.
+
+This has lead to increased adoption of probabilistic modeling. This trend is
+attributed in part to three major factors:
+
+(1) the need for transparent models with calibrated quantities of uncertainty, i.e. "models should know when they don't know",
+
+(2) the ever-increasing number of promising results achieved on a variety of fundamental problems in AI (Ghahramani, 2015), and
+
+(3) the emergency of probabilistic programming languages (PPLs) that provide a fexible framework to build richly structured probabilistic models that incorporate domain knowledge.
+
+However, usage of PPLs requires a specialized understanding of probability
+theory, probabilistic graphical modeling, and probabilistic inference. Some PPLs
+also require a good command of software coding. These requirements make it
+difficult for non-specialists to adopt and apply probabilistic machine learning
+to their domain problems.
+
+``Pymc-learn`` seeks to address these challenges by providing state-of-the art
+implementations of several popular probabilistic machine learning models.
+**It is inspired by scikit-learn** (Pedregosa et al., 2011) **and focuses on
+bringing probabilistic machine learning to non-specialists**. It puts emphasis
+on:
+
+(1) ease of use,
+
+(2) productivity,
+
+(3) fexibility,
+
+(4) performance,
+
+(5) documentation, and
+
+(6) an API consistent with scikit-learn.
+
+The underlying probabilistic models are built using pymc3 (Salvatier et al., 2016).
+
 
 Python is the lingua franca of Data Science
 --------------------------------------------
@@ -137,4 +181,10 @@ References
 
 4. Barber, D. (2012). Bayesian reasoning and machine learning. Cambridge University Press.
 
-5. Salvatier, J., Wiecki, T. V., & Fonnesbeck, C. (2016). Probabilistic programming in Python using PyMC3. PeerJ Computer Science, 2, e55.
+5. Salvatier, J., Wiecki, T. V., & Fonnesbeck, C. (2016). Probabilistic programming in Python using PyMC3. PeerJ Computer Science, 2, e55.
+
+6. Alp Kucukelbir, Dustin Tran, Rajesh Ranganath, Andrew Gelman, and David M Blei. Automatic differentiation variational inference. The Journal of Machine Learning Research, 18(1):430{474, 2017.
+
+7. Fabian Pedregosa, Gael Varoquaux, Alexandre Gramfort, Vincent Michel, Bertrand Thirion, Olivier Grisel, Mathieu Blondel, Peter Prettenhofer, Ron Weiss, Vincent Dubourg, et al. Scikit-learn: Machine learning in python. Journal of machine learning research, 12(Oct): 2825-2830, 2011.
+
+8. Theano Development Team. Theano: A Python framework for fast computation of mathematical expressions. arXiv e-prints, abs/1605.02688, May 2016. URL http://arxiv.org/abs/1605.02688.