postgresml
diff --git a/‎pgml-extension/pgml_rust/Cargo.toml
Copy file name to clipboardExpand all lines: pgml-extension/pgml_rust/Cargo.toml
+1Lines changed: 1 addition & 0 deletions b/‎pgml-extension/pgml_rust/Cargo.toml
Copy file name to clipboardExpand all lines: pgml-extension/pgml_rust/Cargo.toml
+1Lines changed: 1 addition & 0 deletions
diff --git a/‎pgml-extension/pgml_rust/control
Copy file name to clipboardExpand all lines: pgml-extension/pgml_rust/control
+1-1Lines changed: 1 addition & 1 deletion b/‎pgml-extension/pgml_rust/control
Copy file name to clipboardExpand all lines: pgml-extension/pgml_rust/control
+1-1Lines changed: 1 addition & 1 deletion
diff --git a/‎pgml-extension/pgml_rust/sql/schema.sql
Copy file name to clipboardExpand all lines: pgml-extension/pgml_rust/sql/schema.sql
+1Lines changed: 1 addition & 0 deletions b/‎pgml-extension/pgml_rust/sql/schema.sql
Copy file name to clipboardExpand all lines: pgml-extension/pgml_rust/sql/schema.sql
+1Lines changed: 1 addition & 0 deletions
diff --git a/‎pgml-extension/pgml_rust/src/backends/backend.rs
Copy file name to clipboard
+42Lines changed: 42 additions & 0 deletions b/‎pgml-extension/pgml_rust/src/backends/backend.rs
Copy file name to clipboard
+42Lines changed: 42 additions & 0 deletions
diff --git a/‎pgml-extension/pgml_rust/src/backends/mod.rs
Copy file name to clipboard
+2Lines changed: 2 additions & 0 deletions b/‎pgml-extension/pgml_rust/src/backends/mod.rs
Copy file name to clipboard
+2Lines changed: 2 additions & 0 deletions
diff --git a/‎pgml-extension/pgml_rust/src/backends/sklearn.rs
Copy file name to clipboard
+145Lines changed: 145 additions & 0 deletions b/‎pgml-extension/pgml_rust/src/backends/sklearn.rs
Copy file name to clipboard
+145Lines changed: 145 additions & 0 deletions
diff --git a/‎pgml-extension/pgml_rust/src/backends/wrappers.py
Copy file name to clipboard
+108Lines changed: 108 additions & 0 deletions b/‎pgml-extension/pgml_rust/src/backends/wrappers.py
Copy file name to clipboard
+108Lines changed: 108 additions & 0 deletions
diff --git a/‎pgml-extension/pgml_rust/src/lib.rs
Copy file name to clipboardExpand all lines: pgml-extension/pgml_rust/src/lib.rs
+1Lines changed: 1 addition & 0 deletions b/‎pgml-extension/pgml_rust/src/lib.rs
Copy file name to clipboardExpand all lines: pgml-extension/pgml_rust/src/lib.rs
+1Lines changed: 1 addition & 0 deletions
@@ -29,6 +29,7 @@ serde = { version = "1.0.2" }
 serde_json = { version = "1.0.85" }
 rmp-serde = { version = "1.1.0" }
 typetag = "0.2"
+pyo3 = { version = "0.17", features = ["auto-initialize"] }
 heapless = "0.7.13"
 
 [dev-dependencies]
 
@@ -3,7 +3,7 @@ Version: VERSION
 Section: base
 Priority: optional
 Architecture: ARCH
-Depends: postgresql-PGVERSION, libopenblas-dev, postgresql-server-dev-PGVERSION
+Depends: postgresql-PGVERSION, libopenblas-dev, postgresql-server-dev-PGVERSION python3-numpy python3-sklearn python3 python3-dev
 Maintainer: PostgresML <team@postgresml.org>
 Description: PostgresML - machine learning with PostgreSQL
  PostgresML is a PostgreSQL extension that allows to do machine
 
@@ -77,6 +77,7 @@ CREATE TABLE IF NOT EXISTS pgml_rust.models(
 	project_id BIGINT NOT NULL,
 	snapshot_id BIGINT NOT NULL,
 	algorithm TEXT NOT NULL,
+	backend TEXT DEFAULT 'smartcore',
 	hyperparams JSONB NOT NULL,
 	status TEXT NOT NULL,
 	metrics JSONB,
 
@@ -0,0 +1,42 @@
+use pgx::*;
+use serde::Deserialize;
+
+#[derive(PostgresEnum, Copy, Clone, Eq, PartialEq, Debug, Deserialize)]
+#[allow(non_camel_case_types)]
+pub enum Backend {
+    xgboost,
+    torch,
+    lightdbm,
+    sklearn,
+    smartcore,
+    linfa,
+}
+
+impl std::str::FromStr for Backend {
+    type Err = ();
+
+    fn from_str(input: &str) -> Result<Backend, Self::Err> {
+        match input {
+            "xgboost" => Ok(Backend::xgboost),
+            "torch" => Ok(Backend::torch),
+            "lightdbm" => Ok(Backend::lightdbm),
+            "sklearn" => Ok(Backend::sklearn),
+            "smartcore" => Ok(Backend::smartcore),
+            "linfa" => Ok(Backend::linfa),
+            _ => Err(()),
+        }
+    }
+}
+
+impl std::string::ToString for Backend {
+    fn to_string(&self) -> String {
+        match *self {
+            Backend::xgboost => "xgboost".to_string(),
+            Backend::torch => "torch".to_string(),
+            Backend::lightdbm => "lightdbm".to_string(),
+            Backend::sklearn => "sklearn".to_string(),
+            Backend::smartcore => "smartcore".to_string(),
+            Backend::linfa => "linfa".to_string(),
+        }
+    }
+}
@@ -0,0 +1,2 @@
+pub mod backend;
+pub mod sklearn;
@@ -0,0 +1,145 @@
+use pgx::*;
+use pyo3::prelude::*;
+use pyo3::types::PyTuple;
+
+use std::collections::HashMap;
+
+use crate::orm::dataset::Dataset;
+use crate::orm::estimator::SklearnBox;
+
+#[pg_extern]
+pub fn sklearn_version() -> String {
+    let mut version = String::new();
+
+    Python::with_gil(|py| {
+        let sklearn = py.import("sklearn").unwrap();
+        version = sklearn.getattr("__version__").unwrap().extract().unwrap();
+    });
+
+    version
+}
+
+pub fn sklearn_train(
+    algorithm_name: &str,
+    dataset: &Dataset,
+    hyperparams: HashMap<String, f32>,
+) -> SklearnBox {
+    let module = include_str!(concat!(
+        env!("CARGO_MANIFEST_DIR"),
+        "/src/backends/wrappers.py"
+    ));
+
+    let estimator = Python::with_gil(|py| -> Py<PyAny> {
+        let module = PyModule::from_code(py, module, "", "").unwrap();
+        let estimator: Py<PyAny> = module.getattr("estimator").unwrap().into();
+
+        let train: Py<PyAny> = estimator
+            .call1(
+                py,
+                PyTuple::new(
+                    py,
+                    &[
+                        String::from(algorithm_name).into_py(py),
+                        dataset.num_features.into_py(py),
+                        hyperparams.into_py(py),
+                    ],
+                ),
+            )
+            .unwrap();
+
+        train
+            .call1(
+                py,
+                PyTuple::new(py, &[dataset.x_train(), dataset.y_train()]),
+            )
+            .unwrap()
+    });
+
+    SklearnBox::new(estimator)
+}
+
+pub fn sklearn_test(estimator: &SklearnBox, x_test: &[f32], num_features: usize) -> Vec<f32> {
+    let module = include_str!(concat!(
+        env!("CARGO_MANIFEST_DIR"),
+        "/src/backends/wrappers.py"
+    ));
+
+    let y_hat: Vec<f32> = Python::with_gil(|py| -> Vec<f32> {
+        let module = PyModule::from_code(py, module, "", "").unwrap();
+        let predictor = module.getattr("predictor").unwrap();
+        let predict = predictor
+            .call1(PyTuple::new(
+                py,
+                &[estimator.contents.as_ref(), &num_features.into_py(py)],
+            ))
+            .unwrap();
+
+        predict
+            .call1(PyTuple::new(py, &[x_test]))
+            .unwrap()
+            .extract()
+            .unwrap()
+    });
+
+    y_hat
+}
+
+pub fn sklearn_predict(estimator: &SklearnBox, x: &[f32]) -> Vec<f32> {
+    let module = include_str!(concat!(
+        env!("CARGO_MANIFEST_DIR"),
+        "/src/backends/wrappers.py"
+    ));
+
+    let y_hat: Vec<f32> = Python::with_gil(|py| -> Vec<f32> {
+        let module = PyModule::from_code(py, module, "", "").unwrap();
+        let predictor = module.getattr("predictor").unwrap();
+        let predict = predictor
+            .call1(PyTuple::new(
+                py,
+                &[estimator.contents.as_ref(), &x.len().into_py(py)],
+            ))
+            .unwrap();
+
+        predict
+            .call1(PyTuple::new(py, &[x]))
+            .unwrap()
+            .extract()
+            .unwrap()
+    });
+
+    y_hat
+}
+
+pub fn sklearn_save(estimator: &SklearnBox) -> Vec<u8> {
+    let module = include_str!(concat!(
+        env!("CARGO_MANIFEST_DIR"),
+        "/src/backends/wrappers.py"
+    ));
+
+    Python::with_gil(|py| -> Vec<u8> {
+        let module = PyModule::from_code(py, module, "", "").unwrap();
+        let save = module.getattr("save").unwrap();
+        save.call1(PyTuple::new(py, &[estimator.contents.as_ref()]))
+            .unwrap()
+            .extract()
+            .unwrap()
+    })
+}
+
+pub fn sklearn_load(data: &Vec<u8>) -> SklearnBox {
+    let module = include_str!(concat!(
+        env!("CARGO_MANIFEST_DIR"),
+        "/src/backends/wrappers.py"
+    ));
+
+    Python::with_gil(|py| -> SklearnBox {
+        let module = PyModule::from_code(py, module, "", "").unwrap();
+        let load = module.getattr("load").unwrap();
+        let estimator = load
+            .call1(PyTuple::new(py, &[data]))
+            .unwrap()
+            .extract()
+            .unwrap();
+        SklearnBox::new(estimator)
+    })
+}
@@ -0,0 +1,108 @@
+import sklearn.linear_model
+import sklearn.kernel_ridge
+import sklearn.svm
+import sklearn.ensemble
+import sklearn.multioutput
+import sklearn.gaussian_process
+import sklearn.model_selection
+import numpy as np
+import pickle
+
+_ALGORITHM_MAP = {
+    "linear_regression": sklearn.linear_model.LinearRegression,
+    "linear_classification": sklearn.linear_model.LogisticRegression,
+    "ridge_regression": sklearn.linear_model.Ridge,
+    "ridge_classification": sklearn.linear_model.RidgeClassifier,
+    "lasso_regression": sklearn.linear_model.Lasso,
+    "elastic_net_regression": sklearn.linear_model.ElasticNet,
+    "least_angle_regression": sklearn.linear_model.Lars,
+    "lasso_least_angle_regression": sklearn.linear_model.LassoLars,
+    "orthoganl_matching_pursuit_regression": sklearn.linear_model.OrthogonalMatchingPursuit,
+    "bayesian_ridge_regression": sklearn.linear_model.BayesianRidge,
+    "automatic_relevance_determination_regression": sklearn.linear_model.ARDRegression,
+    "stochastic_gradient_descent_regression": sklearn.linear_model.SGDRegressor,
+    "stochastic_gradient_descent_classification": sklearn.linear_model.SGDClassifier,
+    "perceptron_classification": sklearn.linear_model.Perceptron,
+    "passive_aggressive_regression": sklearn.linear_model.PassiveAggressiveRegressor,
+    "passive_aggressive_classification": sklearn.linear_model.PassiveAggressiveClassifier,
+    "ransac_regression": sklearn.linear_model.RANSACRegressor,
+    "theil_sen_regression": sklearn.linear_model.TheilSenRegressor,
+    "huber_regression": sklearn.linear_model.HuberRegressor,
+    "quantile_regression": sklearn.linear_model.QuantileRegressor,
+    "kernel_ridge_regression": sklearn.kernel_ridge.KernelRidge,
+    "gaussian_process_regression": sklearn.gaussian_process.GaussianProcessRegressor,
+    "gaussian_process_classification": sklearn.gaussian_process.GaussianProcessClassifier,
+    "svm_regression": sklearn.svm.SVR,
+    "svm_classification": sklearn.svm.SVC,
+    "nu_svm_regression": sklearn.svm.NuSVR,
+    "nu_svm_classification": sklearn.svm.NuSVC,
+    "linear_svm_regression": sklearn.svm.LinearSVR,
+    "linear_svm_classification": sklearn.svm.LinearSVC,
+    "ada_boost_regression": sklearn.ensemble.AdaBoostRegressor,
+    "ada_boost_classification": sklearn.ensemble.AdaBoostClassifier,
+    "bagging_regression": sklearn.ensemble.BaggingRegressor,
+    "bagging_classification": sklearn.ensemble.BaggingClassifier,
+    "extra_trees_regression": sklearn.ensemble.ExtraTreesRegressor,
+    "extra_trees_classification": sklearn.ensemble.ExtraTreesClassifier,
+    "gradient_boosting_trees_regression": sklearn.ensemble.GradientBoostingRegressor,
+    "gradient_boosting_trees_classification": sklearn.ensemble.GradientBoostingClassifier,
+    "hist_gradient_boosting_regression": sklearn.ensemble.HistGradientBoostingRegressor,
+    "hist_gradient_boosting_classification": sklearn.ensemble.HistGradientBoostingClassifier,
+    "random_forest_regression": sklearn.ensemble.RandomForestRegressor,
+    "random_forest_classification": sklearn.ensemble.RandomForestClassifier,
+}
+
+
+def estimator(algorithm_name, num_features, hyperparams):
+    return estimator_joint(algorithm_name, num_features, 1, hyperparams)
+
+
+def estimator_joint(algorithm_name, num_features, num_targets, hyperparams):
+    if hyperparams is None:
+        hyperparams = {}
+
+    def train(X_train, y_train):
+        instance = _ALGORITHM_MAP[algorithm_name](**hyperparams)
+
+        X_train = np.asarray(X_train).reshape((-1, num_features))
+
+        # Only support single value models for just now.
+        y_train = np.asarray(y_train).reshape((-1, num_targets))
+
+        instance.fit(X_train, y_train)
+        return instance
+
+    return train
+
+
+def test(estimator, X_test):
+    y_hat = estimator.predict(X_test)
+
+    # Single value models only just for now.
+    return list(np.asarray(y_hat).flatten())
+
+
+def predictor(estimator, num_features):
+    return predictor_joint(estimator, num_features, 1)
+
+
+def predictor_joint(estimator, num_features, num_targets):
+    def predict(X):
+        X = np.asarray(X).reshape((-1, num_features))
+        y_hat = estimator.predict(X)
+
+        # Only support single value models for just now.
+        if num_targets == 1:
+            return list(np.asarray(y_hat).flatten())
+        else:
+            return list(y_hat)
+
+    return predict
+
+
+def save(estimator):
+    return pickle.dumps(estimator)
+
+
+def load(data):
+    return pickle.loads(bytes(data))
@@ -10,6 +10,7 @@ use std::sync::Mutex;
 use xgboost::{Booster, DMatrix};
 
 pub mod api;
+pub mod backends;
 pub mod orm;
 pub mod vectors;
Original file line number	Diff line number	Diff line change
`@@ -0,0 +1,2 @@`
	`1`	`+pub mod backend;`
	`2`	`+pub mod sklearn;`