From 056f051f9614077526bd3ae0e8ae9b5e45000a90 Mon Sep 17 00:00:00 2001 From: jakedubb Date: Tue, 12 Nov 2019 16:04:41 -0800 Subject: [PATCH 01/15] modified model --- code/scoring/.azureml/config.json | 1 + code/scoring/local_deploy.py | 35 +++++++++++++++++++++++++++++++ code/training/train.py | 2 +- 3 files changed, 37 insertions(+), 1 deletion(-) create mode 100644 code/scoring/.azureml/config.json create mode 100644 code/scoring/local_deploy.py diff --git a/code/scoring/.azureml/config.json b/code/scoring/.azureml/config.json new file mode 100644 index 00000000..b6efcda5 --- /dev/null +++ b/code/scoring/.azureml/config.json @@ -0,0 +1 @@ +{"Id": null, "Scope": "/subscriptions/7fd76d0f-84f2-498b-a997-e0d059af5ce1/resourceGroups/sdbolts-AML-RG/providers/Microsoft.MachineLearningServices/workspaces/sdbolts-AML-WS"} \ No newline at end of file diff --git a/code/scoring/local_deploy.py b/code/scoring/local_deploy.py new file mode 100644 index 00000000..165176a7 --- /dev/null +++ b/code/scoring/local_deploy.py @@ -0,0 +1,35 @@ +from azureml.core import Workspace +from azureml.core.model import InferenceConfig, Model +from azureml.core.webservice import LocalWebservice +from azureml.core.authentication import AzureCliAuthentication + +cli_auth = AzureCliAuthentication() + +subscription_id = '7fd76d0f-84f2-498b-a997-e0d059af5ce1' +resource_group = 'sdbolts-AML-RG' +workspace_name = 'sdbolts-AML-WS' + +try: + ws = Workspace(subscription_id = subscription_id, resource_group = resource_group, workspace_name = workspace_name) + ws.write_config() + print('Library configuration succeeded') +except: + print('Workspace not found') +# Get workspace +#ws = Workspace.from_config(auth=cli_auth, path='./') + +# Create inference configuration. This creates a docker image that contains the model. +inference_config = InferenceConfig(runtime="python", + entry_script="score.py", + conda_file="conda_dependencies.yml") +model = Model(ws, name='sklearn_regression_model.pkl') + +# Create a local deployment, using port 8890 for the web service endpoint +deployment_config = LocalWebservice.deploy_configuration(port=8890) +# Deploy the service +service = Model.deploy( + ws, "mymodel", [model], inference_config, deployment_config) +# Wait for the deployment to complete +service.wait_for_deployment(True) +# Display the port that the web service is available on +print(service.port) \ No newline at end of file diff --git a/code/training/train.py b/code/training/train.py index d703964f..d1fd8c1f 100644 --- a/code/training/train.py +++ b/code/training/train.py @@ -69,7 +69,7 @@ print("Running train.py") # Randomly pic alpha -alphas = np.arange(0.0, 1.0, 0.05) +alphas = np.arange(2.0, 3.0, 4.0) alpha = alphas[np.random.choice(alphas.shape[0], 1, replace=False)][0] print(alpha) run.log("alpha", alpha) From 4fd17e9b81d1854258842fedae9ea6defe7f07af Mon Sep 17 00:00:00 2001 From: jakedubb Date: Tue, 12 Nov 2019 16:22:59 -0800 Subject: [PATCH 02/15] Update train.py --- code/training/train.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/code/training/train.py b/code/training/train.py index d1fd8c1f..1e09ff42 100644 --- a/code/training/train.py +++ b/code/training/train.py @@ -62,14 +62,14 @@ X, y = load_diabetes(return_X_y=True) columns = ["age", "gender", "bmi", "bp", "s1", "s2", "s3", "s4", "s5", "s6"] X_train, X_test, y_train, y_test = train_test_split( - X, y, test_size=0.2, random_state=0) + X, y, test_size=0.3, random_state=0) data = {"train": {"X": X_train, "y": y_train}, "test": {"X": X_test, "y": y_test}} print("Running train.py") # Randomly pic alpha -alphas = np.arange(2.0, 3.0, 4.0) +alphas = np.arange(0.1, 0.2, 0.3) alpha = alphas[np.random.choice(alphas.shape[0], 1, replace=False)][0] print(alpha) run.log("alpha", alpha) From 654da1c23a2b4ff06e1f2641fdd1780b3bafb306 Mon Sep 17 00:00:00 2001 From: jakedubb Date: Tue, 12 Nov 2019 16:24:06 -0800 Subject: [PATCH 03/15] Update train.py --- code/training/train.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/code/training/train.py b/code/training/train.py index 1e09ff42..f5026426 100644 --- a/code/training/train.py +++ b/code/training/train.py @@ -69,7 +69,7 @@ print("Running train.py") # Randomly pic alpha -alphas = np.arange(0.1, 0.2, 0.3) +alphas = np.arange(0.0, 0.5, 0.01) alpha = alphas[np.random.choice(alphas.shape[0], 1, replace=False)][0] print(alpha) run.log("alpha", alpha) From 156eef49211dd77558dc1d15064e37ef2eb42241 Mon Sep 17 00:00:00 2001 From: jakedubb Date: Tue, 12 Nov 2019 20:43:47 -0800 Subject: [PATCH 04/15] Delete local_deploy.py --- code/scoring/local_deploy.py | 35 ----------------------------------- 1 file changed, 35 deletions(-) delete mode 100644 code/scoring/local_deploy.py diff --git a/code/scoring/local_deploy.py b/code/scoring/local_deploy.py deleted file mode 100644 index 165176a7..00000000 --- a/code/scoring/local_deploy.py +++ /dev/null @@ -1,35 +0,0 @@ -from azureml.core import Workspace -from azureml.core.model import InferenceConfig, Model -from azureml.core.webservice import LocalWebservice -from azureml.core.authentication import AzureCliAuthentication - -cli_auth = AzureCliAuthentication() - -subscription_id = '7fd76d0f-84f2-498b-a997-e0d059af5ce1' -resource_group = 'sdbolts-AML-RG' -workspace_name = 'sdbolts-AML-WS' - -try: - ws = Workspace(subscription_id = subscription_id, resource_group = resource_group, workspace_name = workspace_name) - ws.write_config() - print('Library configuration succeeded') -except: - print('Workspace not found') -# Get workspace -#ws = Workspace.from_config(auth=cli_auth, path='./') - -# Create inference configuration. This creates a docker image that contains the model. -inference_config = InferenceConfig(runtime="python", - entry_script="score.py", - conda_file="conda_dependencies.yml") -model = Model(ws, name='sklearn_regression_model.pkl') - -# Create a local deployment, using port 8890 for the web service endpoint -deployment_config = LocalWebservice.deploy_configuration(port=8890) -# Deploy the service -service = Model.deploy( - ws, "mymodel", [model], inference_config, deployment_config) -# Wait for the deployment to complete -service.wait_for_deployment(True) -# Display the port that the web service is available on -print(service.port) \ No newline at end of file From 68afd789a4e40818e5fd6a68f8d65536d494bce7 Mon Sep 17 00:00:00 2001 From: jakedubb Date: Mon, 18 Nov 2019 13:55:36 -0800 Subject: [PATCH 05/15] updated build pipeline with explainability --- code/scoring/conda_dependencies.yml | 6 ++++++ code/training/train.py | 20 ++++++++++++++++++++ 2 files changed, 26 insertions(+) diff --git a/code/scoring/conda_dependencies.yml b/code/scoring/conda_dependencies.yml index f13c3c3d..05c29f5c 100644 --- a/code/scoring/conda_dependencies.yml +++ b/code/scoring/conda_dependencies.yml @@ -51,4 +51,10 @@ dependencies: - gunicorn==19.9.0 - flask==1.1.1 - azure-ml-api-sdk + - azureml-defaults + - azureml-contrib-interpret + - azureml-core + - azureml-telemetry + - azureml-interpret + - pandas diff --git a/code/training/train.py b/code/training/train.py index f5026426..920ce60a 100644 --- a/code/training/train.py +++ b/code/training/train.py @@ -30,9 +30,14 @@ from sklearn.linear_model import Ridge from sklearn.metrics import mean_squared_error from sklearn.model_selection import train_test_split + from sklearn.externals import joblib import numpy as np +from azureml.contrib.explain.model.visualize import ExplanationDashboard +from interpret.ext.blackbox import TabularExplainer +from azureml.contrib.explain.model.explanation.explanation_client import ExplanationClient +from azureml.core.model import Model parser = argparse.ArgumentParser("train") parser.add_argument( @@ -58,6 +63,7 @@ run = Run.get_context() exp = run.experiment ws = run.experiment.workspace +client = ExplanationClient.from_run(run) X, y = load_diabetes(return_X_y=True) columns = ["age", "gender", "bmi", "bp", "s1", "s2", "s3", "s4", "s5", "s6"] @@ -83,6 +89,20 @@ # model_name = "." + +# create an explainer to validate or debug the model +tabular_explainer = TabularExplainer(reg, + initialization_examples=X_train, + features=columns) +# explain overall model predictions (global explanation) +# passing in test dataset for evaluation examples - note it must be a representative sample of the original data +# more data (e.g. x_train) will likely lead to higher accuracy, but at a time cost +global_explanation = tabular_explainer.explain_global(X_test) + +# uploading model explanation data for storage or visualization +comment = 'Global explanation on of Diabetes Regression' +client.upload_model_explanation(global_explanation, comment=comment) + with open(model_name, "wb") as file: joblib.dump(value=reg, filename=model_name) From ddeb0a79ac0f1afe4adfcc2544609f1c76cdeff0 Mon Sep 17 00:00:00 2001 From: jakedubb Date: Mon, 18 Nov 2019 13:56:16 -0800 Subject: [PATCH 06/15] Update conda_dependencies.yml --- code/scoring/conda_dependencies.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/code/scoring/conda_dependencies.yml b/code/scoring/conda_dependencies.yml index 05c29f5c..730a2981 100644 --- a/code/scoring/conda_dependencies.yml +++ b/code/scoring/conda_dependencies.yml @@ -56,5 +56,5 @@ dependencies: - azureml-core - azureml-telemetry - azureml-interpret - - pandas + From 7b1e781b5dcf1ae2cfd8893e02f458981df3d49e Mon Sep 17 00:00:00 2001 From: jakedubb Date: Mon, 18 Nov 2019 14:05:11 -0800 Subject: [PATCH 07/15] Update train.py --- code/training/train.py | 16 +++++----------- 1 file changed, 5 insertions(+), 11 deletions(-) diff --git a/code/training/train.py b/code/training/train.py index 920ce60a..53391671 100644 --- a/code/training/train.py +++ b/code/training/train.py @@ -34,10 +34,10 @@ from sklearn.externals import joblib import numpy as np -from azureml.contrib.explain.model.visualize import ExplanationDashboard from interpret.ext.blackbox import TabularExplainer -from azureml.contrib.explain.model.explanation.explanation_client import ExplanationClient -from azureml.core.model import Model +from azureml.contrib.explain.model.explanation.explanation_client import ( + ExplanationClient +) parser = argparse.ArgumentParser("train") parser.add_argument( @@ -84,19 +84,13 @@ preds = reg.predict(data["test"]["X"]) run.log("mse", mean_squared_error(preds, data["test"]["y"])) - -# Save model as part of the run history - -# model_name = "." - - # create an explainer to validate or debug the model tabular_explainer = TabularExplainer(reg, initialization_examples=X_train, features=columns) # explain overall model predictions (global explanation) -# passing in test dataset for evaluation examples - note it must be a representative sample of the original data -# more data (e.g. x_train) will likely lead to higher accuracy, but at a time cost +# passing in test dataset for evaluation examples + global_explanation = tabular_explainer.explain_global(X_test) # uploading model explanation data for storage or visualization From cadb53414351a60f123b492211d833e33a366d42 Mon Sep 17 00:00:00 2001 From: jakedubb Date: Mon, 18 Nov 2019 14:08:15 -0800 Subject: [PATCH 08/15] Update train.py --- code/training/train.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/code/training/train.py b/code/training/train.py index 53391671..892a7f1a 100644 --- a/code/training/train.py +++ b/code/training/train.py @@ -89,7 +89,7 @@ initialization_examples=X_train, features=columns) # explain overall model predictions (global explanation) -# passing in test dataset for evaluation examples +# passing in test dataset for evaluation examples global_explanation = tabular_explainer.explain_global(X_test) From aca018dcc59c7405257c56640f2fee4b4b8a7a06 Mon Sep 17 00:00:00 2001 From: jakedubb Date: Mon, 18 Nov 2019 15:18:45 -0800 Subject: [PATCH 09/15] Create train_explain.py --- code/training/train_explain.py | 116 +++++++++++++++++++++++++++++++++ 1 file changed, 116 insertions(+) create mode 100644 code/training/train_explain.py diff --git a/code/training/train_explain.py b/code/training/train_explain.py new file mode 100644 index 00000000..892a7f1a --- /dev/null +++ b/code/training/train_explain.py @@ -0,0 +1,116 @@ +""" +Copyright (C) Microsoft Corporation. All rights reserved.​ + ​ +Microsoft Corporation (“Microsoft”) grants you a nonexclusive, perpetual, +royalty-free right to use, copy, and modify the software code provided by us +("Software Code"). You may not sublicense the Software Code or any use of it +(except to your affiliates and to vendors to perform work on your behalf) +through distribution, network access, service agreement, lease, rental, or +otherwise. This license does not purport to express any claim of ownership over +data you may have shared with Microsoft in the creation of the Software Code. +Unless applicable law gives you more rights, Microsoft reserves all other +rights not expressly granted herein, whether by implication, estoppel or +otherwise. ​ + ​ +THE SOFTWARE CODE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS +OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +MICROSOFT OR ITS LICENSORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR +BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER +IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +ARISING IN ANY WAY OUT OF THE USE OF THE SOFTWARE CODE, EVEN IF ADVISED OF THE +POSSIBILITY OF SUCH DAMAGE. +""" +from azureml.core.run import Run +import os +import argparse +from sklearn.datasets import load_diabetes +from sklearn.linear_model import Ridge +from sklearn.metrics import mean_squared_error +from sklearn.model_selection import train_test_split + +from sklearn.externals import joblib +import numpy as np + +from interpret.ext.blackbox import TabularExplainer +from azureml.contrib.explain.model.explanation.explanation_client import ( + ExplanationClient +) + +parser = argparse.ArgumentParser("train") +parser.add_argument( + "--release_id", + type=str, + help="The ID of the release triggering this pipeline run", +) +parser.add_argument( + "--model_name", + type=str, + help="Name of the Model", + default="sklearn_regression_model.pkl", +) + +args = parser.parse_args() + +print("Argument 1: %s" % args.release_id) +print("Argument 2: %s" % args.model_name) + +model_name = args.model_name +release_id = args.release_id + +run = Run.get_context() +exp = run.experiment +ws = run.experiment.workspace +client = ExplanationClient.from_run(run) + +X, y = load_diabetes(return_X_y=True) +columns = ["age", "gender", "bmi", "bp", "s1", "s2", "s3", "s4", "s5", "s6"] +X_train, X_test, y_train, y_test = train_test_split( + X, y, test_size=0.3, random_state=0) +data = {"train": {"X": X_train, "y": y_train}, + "test": {"X": X_test, "y": y_test}} + +print("Running train.py") + +# Randomly pic alpha +alphas = np.arange(0.0, 0.5, 0.01) +alpha = alphas[np.random.choice(alphas.shape[0], 1, replace=False)][0] +print(alpha) +run.log("alpha", alpha) +reg = Ridge(alpha=alpha) +reg.fit(data["train"]["X"], data["train"]["y"]) +preds = reg.predict(data["test"]["X"]) +run.log("mse", mean_squared_error(preds, data["test"]["y"])) + +# create an explainer to validate or debug the model +tabular_explainer = TabularExplainer(reg, + initialization_examples=X_train, + features=columns) +# explain overall model predictions (global explanation) +# passing in test dataset for evaluation examples + +global_explanation = tabular_explainer.explain_global(X_test) + +# uploading model explanation data for storage or visualization +comment = 'Global explanation on of Diabetes Regression' +client.upload_model_explanation(global_explanation, comment=comment) + +with open(model_name, "wb") as file: + joblib.dump(value=reg, filename=model_name) + +# upload the model file explicitly into artifacts +run.upload_file(name="./outputs/" + model_name, path_or_stream=model_name) +print("Uploaded the model {} to experiment {}".format( + model_name, run.experiment.name)) +dirpath = os.getcwd() +print(dirpath) +print("Following files are uploaded ") +print(run.get_file_names()) + +# Add properties to identify this specific training run +run.add_properties({"release_id": release_id, "run_type": "train"}) +print(f"added properties: {run.properties}") + +run.complete() From 5af325a6ccd8c19e60874e2e8cf9bade4c495285 Mon Sep 17 00:00:00 2001 From: jakedubb Date: Mon, 18 Nov 2019 15:19:08 -0800 Subject: [PATCH 10/15] reverting changes --- code/scoring/conda_dependencies.yml | 3 +++ code/training/train.py | 12 ++++++++++-- 2 files changed, 13 insertions(+), 2 deletions(-) diff --git a/code/scoring/conda_dependencies.yml b/code/scoring/conda_dependencies.yml index 730a2981..b4a2d9a6 100644 --- a/code/scoring/conda_dependencies.yml +++ b/code/scoring/conda_dependencies.yml @@ -51,10 +51,13 @@ dependencies: - gunicorn==19.9.0 - flask==1.1.1 - azure-ml-api-sdk +<<<<<<< HEAD - azureml-defaults - azureml-contrib-interpret - azureml-core - azureml-telemetry - azureml-interpret +======= +>>>>>>> parent of 68afd78... updated build pipeline with explainability diff --git a/code/training/train.py b/code/training/train.py index 892a7f1a..43a6bae3 100644 --- a/code/training/train.py +++ b/code/training/train.py @@ -30,14 +30,16 @@ from sklearn.linear_model import Ridge from sklearn.metrics import mean_squared_error from sklearn.model_selection import train_test_split - from sklearn.externals import joblib import numpy as np +<<<<<<< HEAD from interpret.ext.blackbox import TabularExplainer from azureml.contrib.explain.model.explanation.explanation_client import ( ExplanationClient ) +======= +>>>>>>> parent of 68afd78... updated build pipeline with explainability parser = argparse.ArgumentParser("train") parser.add_argument( @@ -63,7 +65,6 @@ run = Run.get_context() exp = run.experiment ws = run.experiment.workspace -client = ExplanationClient.from_run(run) X, y = load_diabetes(return_X_y=True) columns = ["age", "gender", "bmi", "bp", "s1", "s2", "s3", "s4", "s5", "s6"] @@ -84,6 +85,7 @@ preds = reg.predict(data["test"]["X"]) run.log("mse", mean_squared_error(preds, data["test"]["y"])) +<<<<<<< HEAD # create an explainer to validate or debug the model tabular_explainer = TabularExplainer(reg, initialization_examples=X_train, @@ -96,6 +98,12 @@ # uploading model explanation data for storage or visualization comment = 'Global explanation on of Diabetes Regression' client.upload_model_explanation(global_explanation, comment=comment) +======= + +# Save model as part of the run history + +# model_name = "." +>>>>>>> parent of 68afd78... updated build pipeline with explainability with open(model_name, "wb") as file: joblib.dump(value=reg, filename=model_name) From b5346c380deec868241e37a6c89987a7dc659524 Mon Sep 17 00:00:00 2001 From: jakedubb Date: Mon, 18 Nov 2019 15:19:33 -0800 Subject: [PATCH 11/15] Update train.py --- code/training/train.py | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) diff --git a/code/training/train.py b/code/training/train.py index 43a6bae3..352e3d12 100644 --- a/code/training/train.py +++ b/code/training/train.py @@ -33,6 +33,7 @@ from sklearn.externals import joblib import numpy as np +<<<<<<< HEAD <<<<<<< HEAD from interpret.ext.blackbox import TabularExplainer from azureml.contrib.explain.model.explanation.explanation_client import ( @@ -40,6 +41,12 @@ ) ======= >>>>>>> parent of 68afd78... updated build pipeline with explainability +======= +from azureml.contrib.explain.model.visualize import ExplanationDashboard +from interpret.ext.blackbox import TabularExplainer +from azureml.contrib.explain.model.explanation.explanation_client import ExplanationClient +from azureml.core.model import Model +>>>>>>> parent of 7b1e781... Update train.py parser = argparse.ArgumentParser("train") parser.add_argument( @@ -86,13 +93,27 @@ run.log("mse", mean_squared_error(preds, data["test"]["y"])) <<<<<<< HEAD +<<<<<<< HEAD +======= + +# Save model as part of the run history + +# model_name = "." + + +>>>>>>> parent of 7b1e781... Update train.py # create an explainer to validate or debug the model tabular_explainer = TabularExplainer(reg, initialization_examples=X_train, features=columns) # explain overall model predictions (global explanation) +<<<<<<< HEAD # passing in test dataset for evaluation examples +======= +# passing in test dataset for evaluation examples - note it must be a representative sample of the original data +# more data (e.g. x_train) will likely lead to higher accuracy, but at a time cost +>>>>>>> parent of 7b1e781... Update train.py global_explanation = tabular_explainer.explain_global(X_test) # uploading model explanation data for storage or visualization From 58ca1c26604520e9ffb216367fa6f47d394e06b9 Mon Sep 17 00:00:00 2001 From: jakedubb Date: Mon, 18 Nov 2019 15:20:56 -0800 Subject: [PATCH 12/15] Updated training --- code/scoring/conda_dependencies.yml | 10 ------ code/training/train.py | 49 ----------------------------- 2 files changed, 59 deletions(-) diff --git a/code/scoring/conda_dependencies.yml b/code/scoring/conda_dependencies.yml index b4a2d9a6..3a351e4d 100644 --- a/code/scoring/conda_dependencies.yml +++ b/code/scoring/conda_dependencies.yml @@ -51,13 +51,3 @@ dependencies: - gunicorn==19.9.0 - flask==1.1.1 - azure-ml-api-sdk -<<<<<<< HEAD - - azureml-defaults - - azureml-contrib-interpret - - azureml-core - - azureml-telemetry - - azureml-interpret - -======= ->>>>>>> parent of 68afd78... updated build pipeline with explainability - diff --git a/code/training/train.py b/code/training/train.py index 352e3d12..a8d7bcad 100644 --- a/code/training/train.py +++ b/code/training/train.py @@ -33,21 +33,6 @@ from sklearn.externals import joblib import numpy as np -<<<<<<< HEAD -<<<<<<< HEAD -from interpret.ext.blackbox import TabularExplainer -from azureml.contrib.explain.model.explanation.explanation_client import ( - ExplanationClient -) -======= ->>>>>>> parent of 68afd78... updated build pipeline with explainability -======= -from azureml.contrib.explain.model.visualize import ExplanationDashboard -from interpret.ext.blackbox import TabularExplainer -from azureml.contrib.explain.model.explanation.explanation_client import ExplanationClient -from azureml.core.model import Model ->>>>>>> parent of 7b1e781... Update train.py - parser = argparse.ArgumentParser("train") parser.add_argument( "--release_id", @@ -92,40 +77,6 @@ preds = reg.predict(data["test"]["X"]) run.log("mse", mean_squared_error(preds, data["test"]["y"])) -<<<<<<< HEAD -<<<<<<< HEAD -======= - -# Save model as part of the run history - -# model_name = "." - - ->>>>>>> parent of 7b1e781... Update train.py -# create an explainer to validate or debug the model -tabular_explainer = TabularExplainer(reg, - initialization_examples=X_train, - features=columns) -# explain overall model predictions (global explanation) -<<<<<<< HEAD -# passing in test dataset for evaluation examples - -======= -# passing in test dataset for evaluation examples - note it must be a representative sample of the original data -# more data (e.g. x_train) will likely lead to higher accuracy, but at a time cost ->>>>>>> parent of 7b1e781... Update train.py -global_explanation = tabular_explainer.explain_global(X_test) - -# uploading model explanation data for storage or visualization -comment = 'Global explanation on of Diabetes Regression' -client.upload_model_explanation(global_explanation, comment=comment) -======= - -# Save model as part of the run history - -# model_name = "." ->>>>>>> parent of 68afd78... updated build pipeline with explainability - with open(model_name, "wb") as file: joblib.dump(value=reg, filename=model_name) From be00e66acab849de5ba050f7162a985c10ea2f98 Mon Sep 17 00:00:00 2001 From: jakedubb Date: Wed, 20 Nov 2019 13:55:18 -0800 Subject: [PATCH 13/15] added explanation again --- code/training/train.py | 20 ++++++++++++++++++++ ml_service/pipelines/build_train_pipeline.py | 7 ++++++- 2 files changed, 26 insertions(+), 1 deletion(-) diff --git a/code/training/train.py b/code/training/train.py index a8d7bcad..892a7f1a 100644 --- a/code/training/train.py +++ b/code/training/train.py @@ -30,9 +30,15 @@ from sklearn.linear_model import Ridge from sklearn.metrics import mean_squared_error from sklearn.model_selection import train_test_split + from sklearn.externals import joblib import numpy as np +from interpret.ext.blackbox import TabularExplainer +from azureml.contrib.explain.model.explanation.explanation_client import ( + ExplanationClient +) + parser = argparse.ArgumentParser("train") parser.add_argument( "--release_id", @@ -57,6 +63,7 @@ run = Run.get_context() exp = run.experiment ws = run.experiment.workspace +client = ExplanationClient.from_run(run) X, y = load_diabetes(return_X_y=True) columns = ["age", "gender", "bmi", "bp", "s1", "s2", "s3", "s4", "s5", "s6"] @@ -77,6 +84,19 @@ preds = reg.predict(data["test"]["X"]) run.log("mse", mean_squared_error(preds, data["test"]["y"])) +# create an explainer to validate or debug the model +tabular_explainer = TabularExplainer(reg, + initialization_examples=X_train, + features=columns) +# explain overall model predictions (global explanation) +# passing in test dataset for evaluation examples + +global_explanation = tabular_explainer.explain_global(X_test) + +# uploading model explanation data for storage or visualization +comment = 'Global explanation on of Diabetes Regression' +client.upload_model_explanation(global_explanation, comment=comment) + with open(model_name, "wb") as file: joblib.dump(value=reg, filename=model_name) diff --git a/ml_service/pipelines/build_train_pipeline.py b/ml_service/pipelines/build_train_pipeline.py index cd65ff83..d0df9053 100644 --- a/ml_service/pipelines/build_train_pipeline.py +++ b/ml_service/pipelines/build_train_pipeline.py @@ -52,7 +52,12 @@ def main(): 'scikit-learn', 'tensorflow', 'keras'], pip_packages=['azure', 'azureml-core', 'azure-storage', - 'azure-storage-blob']) + 'azure-storage-blob', 'azureml-defaults', + 'azureml-contrib-interpret', + 'azureml-telemetry', + 'azureml-interpret', + 'sklearn-pandas', + 'azureml-dataprep']) ) run_config.environment.docker.enabled = True From 532acfd82d7caa3ef099c1ae10c4f9dbe8fe58f8 Mon Sep 17 00:00:00 2001 From: jakedubb Date: Wed, 20 Nov 2019 14:19:00 -0800 Subject: [PATCH 14/15] Update build_train_pipeline.py --- ml_service/pipelines/build_train_pipeline.py | 1 + 1 file changed, 1 insertion(+) diff --git a/ml_service/pipelines/build_train_pipeline.py b/ml_service/pipelines/build_train_pipeline.py index d0df9053..50138e36 100644 --- a/ml_service/pipelines/build_train_pipeline.py +++ b/ml_service/pipelines/build_train_pipeline.py @@ -53,6 +53,7 @@ def main(): pip_packages=['azure', 'azureml-core', 'azure-storage', 'azure-storage-blob', 'azureml-defaults', + 'azureml-contrib-explain-model', 'azureml-contrib-interpret', 'azureml-telemetry', 'azureml-interpret', From a9554907385e7063352530e0b5286a93d00b9880 Mon Sep 17 00:00:00 2001 From: Jake Wang <47987698+jakeatmsft@users.noreply.github.com> Date: Tue, 18 Aug 2020 09:40:20 -0700 Subject: [PATCH 15/15] Add files via upload --- .pipelines/DevOps for AI - Build Model - AzureML PuP CLI.json | 1 + 1 file changed, 1 insertion(+) create mode 100644 .pipelines/DevOps for AI - Build Model - AzureML PuP CLI.json diff --git a/.pipelines/DevOps for AI - Build Model - AzureML PuP CLI.json b/.pipelines/DevOps for AI - Build Model - AzureML PuP CLI.json new file mode 100644 index 00000000..38bd513f --- /dev/null +++ b/.pipelines/DevOps for AI - Build Model - AzureML PuP CLI.json @@ -0,0 +1 @@ +{"options":[{"enabled":false,"definition":{"id":"5d58cc01-7c75-450c-be18-a388ddb129ec"},"inputs":{"branchFilters":"[\"+refs/heads/*\"]","additionalFields":"{}"}}],"variables":{"AMLWorkspaceName":{"value":"aidemosdevaml"},"ResourceGroupName":{"value":"aidemosdev"},"system.debug":{"value":"false","allowOverride":true}},"retentionRules":[{"branches":["+refs/heads/*"],"artifacts":[],"artifactTypesToDelete":["FilePath","SymbolStore"],"daysToKeep":10,"minimumToKeep":1,"deleteBuildRecord":true,"deleteTestResults":true}],"properties":{},"tags":[],"_links":{"self":{"href":"https://aidemos.visualstudio.com/bab48e25-e319-43a6-812f-aee921d85c3f/_apis/build/Definitions/48?revision=16"},"web":{"href":"https://aidemos.visualstudio.com/bab48e25-e319-43a6-812f-aee921d85c3f/_build/definition?definitionId=48"},"editor":{"href":"https://aidemos.visualstudio.com/bab48e25-e319-43a6-812f-aee921d85c3f/_build/designer?id=48&_a=edit-build-definition"},"badge":{"href":"https://aidemos.visualstudio.com/bab48e25-e319-43a6-812f-aee921d85c3f/_apis/build/status/48"}},"jobAuthorizationScope":1,"jobTimeoutInMinutes":60,"jobCancelTimeoutInMinutes":5,"process":{"phases":[{"steps":[{"environment":{},"enabled":false,"continueOnError":false,"alwaysRun":false,"displayName":"Create Conda Environment","timeoutInMinutes":0,"condition":"succeeded()","task":{"id":"03dd16c3-43e0-4667-ba84-40515d27a410","versionSpec":"1.*","definitionType":"task"},"inputs":{"createCustomEnvironment":"true","environmentName":"project_environment","packageSpecs":"Python=3.6 cython numpy","updateConda":"true","installOptions":"","createOptions":"","cleanEnvironment":"false"}},{"environment":{},"enabled":false,"continueOnError":true,"alwaysRun":false,"displayName":"Prepare Conda Environment (using yml)","timeoutInMinutes":0,"condition":"succeeded()","task":{"id":"d9bafed4-0b18-4f58-968d-86655b4d2ce9","versionSpec":"2.*","definitionType":"task"},"inputs":{"script":"conda env list;\n\nconda env update -f ./aml_config/conda_dependencies.yml;","workingDirectory":"","failOnStderr":"false"}},{"environment":{},"enabled":true,"continueOnError":false,"alwaysRun":false,"displayName":"Install Azure CLI ML Extension (Preview)","timeoutInMinutes":0,"condition":"succeeded()","task":{"id":"46e4be58-730b-4389-8a2f-ea10b3e5e815","versionSpec":"1.*","definitionType":"task"},"inputs":{"connectedServiceNameARM":"e3a37592-7b52-4523-890b-a46c782141b3","scriptLocation":"inlineScript","scriptPath":"azcli-setup.sh","inlineScript":"az extension add -s https://azuremlsdktestpypi.blob.core.windows.net/wheels/sdk-release/Preview/E7501C02541B433786111FE8E140CAA1/azure_cli_ml-0.1.50-py2.py3-none-any.whl --pip-extra-index-urls https://azuremlsdktestpypi.azureedge.net/sdk-release/Preview/E7501C02541B433786111FE8E140CAA1 --yes","args":"","cwd":"","failOnStandardError":"false"}},{"environment":{},"enabled":true,"continueOnError":false,"alwaysRun":false,"displayName":"Show DSVM status","timeoutInMinutes":0,"condition":"succeeded()","task":{"id":"46e4be58-730b-4389-8a2f-ea10b3e5e815","versionSpec":"1.*","definitionType":"task"},"inputs":{"connectedServiceNameARM":"e3a37592-7b52-4523-890b-a46c782141b3","scriptLocation":"inlineScript","scriptPath":"azcli-setup.sh","inlineScript":"az ml computetarget show -n mydsvm -w $(AMLWorkspaceName) -g $(ResourceGroupName) \n","args":"","cwd":"","failOnStandardError":"false"}},{"environment":{},"enabled":true,"continueOnError":false,"alwaysRun":false,"displayName":"Show run configs","timeoutInMinutes":0,"condition":"succeeded()","task":{"id":"46e4be58-730b-4389-8a2f-ea10b3e5e815","versionSpec":"1.*","definitionType":"task"},"inputs":{"connectedServiceNameARM":"e3a37592-7b52-4523-890b-a46c782141b3","scriptLocation":"inlineScript","scriptPath":"azcli-setup.sh","inlineScript":"az ml runconfiguration list -w $(AMLWorkspaceName) -g $(ResourceGroupName)","args":"","cwd":"","failOnStandardError":"false"}},{"environment":{},"enabled":true,"continueOnError":false,"alwaysRun":false,"displayName":"AzureML: attach project","timeoutInMinutes":0,"condition":"succeeded()","task":{"id":"46e4be58-730b-4389-8a2f-ea10b3e5e815","versionSpec":"1.*","definitionType":"task"},"inputs":{"connectedServiceNameARM":"e3a37592-7b52-4523-890b-a46c782141b3","scriptLocation":"inlineScript","scriptPath":"","inlineScript":"Write-Host \"Attaching to workspace $(AMLWorkspaceName)\"\n\naz ml project attach --experiment-name myexperiment -w $(AMLWorkspaceName) -g $(ResourceGroupName) \n\n","args":"","cwd":"","failOnStandardError":"false"}},{"environment":{},"enabled":true,"continueOnError":true,"alwaysRun":false,"displayName":"Unit tests (engineered features, data sets)","timeoutInMinutes":0,"condition":"succeeded()","task":{"id":"d9bafed4-0b18-4f58-968d-86655b4d2ce9","versionSpec":"2.*","definitionType":"task"},"inputs":{"script":"pip install setuptools\npip install pytest\npip install engarde\npython -m pytest --junit-xml $(Build.BinariesDirectory)/unittest_report.xml\n","workingDirectory":"code/02_modeling","failOnStderr":"false"}},{"environment":{},"enabled":true,"continueOnError":false,"alwaysRun":false,"displayName":"Publish Unit Test Results","timeoutInMinutes":0,"condition":"succeeded()","task":{"id":"0b0f01ed-7dde-43ff-9cbb-e48954daf9b1","versionSpec":"2.*","definitionType":"task"},"inputs":{"testRunner":"JUnit","testResultsFiles":"$(Build.BinariesDirectory)/unittest_report.xml","searchFolder":"$(System.DefaultWorkingDirectory)/code/02_modeling","mergeTestResults":"false","testRunTitle":"","platform":"","configuration":"","publishRunAttachments":"true"}},{"environment":{},"enabled":true,"continueOnError":false,"alwaysRun":false,"displayName":"AzureML: train model","timeoutInMinutes":0,"condition":"succeeded()","task":{"id":"46e4be58-730b-4389-8a2f-ea10b3e5e815","versionSpec":"1.*","definitionType":"task"},"inputs":{"connectedServiceNameARM":"e3a37592-7b52-4523-890b-a46c782141b3","scriptLocation":"inlineScript","scriptPath":"","inlineScript":"az ml run submit -c local code/02_modeling/train.py","args":"","cwd":"","failOnStandardError":"false"}},{"environment":{},"enabled":true,"continueOnError":false,"alwaysRun":false,"displayName":"AzureML: list experiments","timeoutInMinutes":0,"condition":"succeeded()","task":{"id":"46e4be58-730b-4389-8a2f-ea10b3e5e815","versionSpec":"1.*","definitionType":"task"},"inputs":{"connectedServiceNameARM":"e3a37592-7b52-4523-890b-a46c782141b3","scriptLocation":"inlineScript","scriptPath":"","inlineScript":"az ml history list","args":"","cwd":"","failOnStandardError":"false"}},{"environment":{},"enabled":false,"continueOnError":false,"alwaysRun":false,"displayName":"AzureML: model validation","timeoutInMinutes":0,"condition":"succeeded()","task":{"id":"46e4be58-730b-4389-8a2f-ea10b3e5e815","versionSpec":"1.*","definitionType":"task"},"inputs":{"connectedServiceNameARM":"e3a37592-7b52-4523-890b-a46c782141b3","scriptLocation":"inlineScript","scriptPath":"","inlineScript":"az ml run submit -c bai code/02_modeling/train.py","args":"","cwd":"","failOnStandardError":"false"}},{"environment":{},"enabled":true,"continueOnError":false,"alwaysRun":false,"displayName":"AzureML: download trained model","timeoutInMinutes":0,"condition":"succeeded()","task":{"id":"46e4be58-730b-4389-8a2f-ea10b3e5e815","versionSpec":"1.*","definitionType":"task"},"inputs":{"connectedServiceNameARM":"e3a37592-7b52-4523-890b-a46c782141b3","scriptLocation":"inlineScript","scriptPath":"","inlineScript":"az ml history download --last --artifact model.pkl -d code/03_deployment/ --overwrite","args":"","cwd":"","failOnStandardError":"false"}},{"environment":{},"enabled":true,"continueOnError":false,"alwaysRun":false,"displayName":"Copy Model Requirements","timeoutInMinutes":0,"condition":"succeeded()","task":{"id":"5bfb729a-a7c8-4a78-a7c3-8d717bb7c13c","versionSpec":"2.*","definitionType":"task"},"inputs":{"SourceFolder":"aml_config","Contents":"*.yml\n","TargetFolder":"$(build.artifactstagingdirectory)","CleanTargetFolder":"false","OverWrite":"false","flattenFolders":"false"}},{"environment":{},"enabled":true,"continueOnError":false,"alwaysRun":false,"displayName":"Copy Model Scoring Artifacts","timeoutInMinutes":0,"condition":"succeeded()","task":{"id":"5bfb729a-a7c8-4a78-a7c3-8d717bb7c13c","versionSpec":"2.*","definitionType":"task"},"inputs":{"SourceFolder":"code/03_deployment","Contents":"score.py\n!**\\tests\\**","TargetFolder":"$(build.artifactstagingdirectory)","CleanTargetFolder":"false","OverWrite":"false","flattenFolders":"false"}},{"environment":{},"enabled":true,"continueOnError":false,"alwaysRun":false,"displayName":"Publish Artifacts: Model Deployment","timeoutInMinutes":0,"condition":"succeeded()","task":{"id":"2ff763a7-ce83-4e1f-bc89-0ae63477cebe","versionSpec":"1.*","definitionType":"task"},"inputs":{"PathtoPublish":"$(build.artifactstagingdirectory)","ArtifactName":"Model_Deployment","ArtifactType":"Container","TargetPath":"","Parallel":"false","ParallelCount":"8"}}],"name":"Agent Phase","refName":"Phase_2","condition":"succeeded()","target":{"queue":{"_links":{"self":{"href":"https://aidemos.visualstudio.com/_apis/build/Queues/108"}},"id":108,"url":"https://aidemos.visualstudio.com/_apis/build/Queues/108","pool":null},"executionOptions":{"type":0},"allowScriptsAuthAccessOption":false,"type":1},"jobAuthorizationScope":1,"jobCancelTimeoutInMinutes":1}],"type":1},"repository":{"properties":{"cleanOptions":"0","labelSources":"0","labelSourcesFormat":"$(build.buildNumber)","reportBuildStatus":"true","gitLfsSupport":"false","skipSyncSource":"false","checkoutNestedSubmodules":"false","fetchDepth":"0"},"id":"93081a31-4c2b-458c-b873-d3bcbcf49552","type":"TfsGit","name":"DevOps for AI Demos","url":"https://aidemos.visualstudio.com/DevOps%20for%20AI%20-%20Demo/_git/DevOps%20for%20AI%20Demos","defaultBranch":"refs/heads/master","clean":"false","checkoutSubmodules":false},"processParameters":{},"quality":1,"authoredBy":{"displayName":"Jordan Edwards","url":"https://spsprodeus27.vssps.visualstudio.com/A99cdcbe4-45ab-4b19-837d-1854e789456d/_apis/Identities/47d7ba8c-85b3-4d34-8d6f-25fefc0ace9a","_links":{"avatar":{"href":"https://aidemos.visualstudio.com/_apis/GraphProfile/MemberAvatars/aad.MDM0Mjk5YjQtZTg5Yi03ZGE3LTkyMDQtOGY1ZjlhZjU0YzJj"}},"id":"47d7ba8c-85b3-4d34-8d6f-25fefc0ace9a","uniqueName":"Jordane@microsoft.com","imageUrl":"https://aidemos.visualstudio.com/_apis/GraphProfile/MemberAvatars/aad.MDM0Mjk5YjQtZTg5Yi03ZGE3LTkyMDQtOGY1ZjlhZjU0YzJj","descriptor":"aad.MDM0Mjk5YjQtZTg5Yi03ZGE3LTkyMDQtOGY1ZjlhZjU0YzJj"},"drafts":[],"queue":{"_links":{"self":{"href":"https://aidemos.visualstudio.com/_apis/build/Queues/108"}},"id":108,"url":"https://aidemos.visualstudio.com/_apis/build/Queues/108","pool":null},"id":48,"name":"DevOps for AI - Build Model - AzureML PuP CLI","url":"https://aidemos.visualstudio.com/bab48e25-e319-43a6-812f-aee921d85c3f/_apis/build/Definitions/48?revision=16","uri":"vstfs:///Build/Definition/48","path":"\\","type":2,"queueStatus":0,"revision":16,"createdDate":"2018-09-25T02:51:45.497Z","project":{"id":"bab48e25-e319-43a6-812f-aee921d85c3f","name":"DevOps for AI - Demo","description":"Demo and Readiness Content","url":"https://aidemos.visualstudio.com/_apis/projects/bab48e25-e319-43a6-812f-aee921d85c3f","state":1,"revision":532,"visibility":1,"lastUpdateTime":"2019-09-20T08:27:56.663Z"}} \ No newline at end of file