diff --git a/.pipelines/DevOps for AI - Build Model - AzureML PuP CLI.json b/.pipelines/DevOps for AI - Build Model - AzureML PuP CLI.json new file mode 100644 index 00000000..38bd513f --- /dev/null +++ b/.pipelines/DevOps for AI - Build Model - AzureML PuP CLI.json @@ -0,0 +1 @@ +{"options":[{"enabled":false,"definition":{"id":"5d58cc01-7c75-450c-be18-a388ddb129ec"},"inputs":{"branchFilters":"[\"+refs/heads/*\"]","additionalFields":"{}"}}],"variables":{"AMLWorkspaceName":{"value":"aidemosdevaml"},"ResourceGroupName":{"value":"aidemosdev"},"system.debug":{"value":"false","allowOverride":true}},"retentionRules":[{"branches":["+refs/heads/*"],"artifacts":[],"artifactTypesToDelete":["FilePath","SymbolStore"],"daysToKeep":10,"minimumToKeep":1,"deleteBuildRecord":true,"deleteTestResults":true}],"properties":{},"tags":[],"_links":{"self":{"href":"https://aidemos.visualstudio.com/bab48e25-e319-43a6-812f-aee921d85c3f/_apis/build/Definitions/48?revision=16"},"web":{"href":"https://aidemos.visualstudio.com/bab48e25-e319-43a6-812f-aee921d85c3f/_build/definition?definitionId=48"},"editor":{"href":"https://aidemos.visualstudio.com/bab48e25-e319-43a6-812f-aee921d85c3f/_build/designer?id=48&_a=edit-build-definition"},"badge":{"href":"https://aidemos.visualstudio.com/bab48e25-e319-43a6-812f-aee921d85c3f/_apis/build/status/48"}},"jobAuthorizationScope":1,"jobTimeoutInMinutes":60,"jobCancelTimeoutInMinutes":5,"process":{"phases":[{"steps":[{"environment":{},"enabled":false,"continueOnError":false,"alwaysRun":false,"displayName":"Create Conda Environment","timeoutInMinutes":0,"condition":"succeeded()","task":{"id":"03dd16c3-43e0-4667-ba84-40515d27a410","versionSpec":"1.*","definitionType":"task"},"inputs":{"createCustomEnvironment":"true","environmentName":"project_environment","packageSpecs":"Python=3.6 cython numpy","updateConda":"true","installOptions":"","createOptions":"","cleanEnvironment":"false"}},{"environment":{},"enabled":false,"continueOnError":true,"alwaysRun":false,"displayName":"Prepare Conda Environment (using yml)","timeoutInMinutes":0,"condition":"succeeded()","task":{"id":"d9bafed4-0b18-4f58-968d-86655b4d2ce9","versionSpec":"2.*","definitionType":"task"},"inputs":{"script":"conda env list;\n\nconda env update -f ./aml_config/conda_dependencies.yml;","workingDirectory":"","failOnStderr":"false"}},{"environment":{},"enabled":true,"continueOnError":false,"alwaysRun":false,"displayName":"Install Azure CLI ML Extension (Preview)","timeoutInMinutes":0,"condition":"succeeded()","task":{"id":"46e4be58-730b-4389-8a2f-ea10b3e5e815","versionSpec":"1.*","definitionType":"task"},"inputs":{"connectedServiceNameARM":"e3a37592-7b52-4523-890b-a46c782141b3","scriptLocation":"inlineScript","scriptPath":"azcli-setup.sh","inlineScript":"az extension add -s https://azuremlsdktestpypi.blob.core.windows.net/wheels/sdk-release/Preview/E7501C02541B433786111FE8E140CAA1/azure_cli_ml-0.1.50-py2.py3-none-any.whl --pip-extra-index-urls https://azuremlsdktestpypi.azureedge.net/sdk-release/Preview/E7501C02541B433786111FE8E140CAA1 --yes","args":"","cwd":"","failOnStandardError":"false"}},{"environment":{},"enabled":true,"continueOnError":false,"alwaysRun":false,"displayName":"Show DSVM status","timeoutInMinutes":0,"condition":"succeeded()","task":{"id":"46e4be58-730b-4389-8a2f-ea10b3e5e815","versionSpec":"1.*","definitionType":"task"},"inputs":{"connectedServiceNameARM":"e3a37592-7b52-4523-890b-a46c782141b3","scriptLocation":"inlineScript","scriptPath":"azcli-setup.sh","inlineScript":"az ml computetarget show -n mydsvm -w $(AMLWorkspaceName) -g $(ResourceGroupName) \n","args":"","cwd":"","failOnStandardError":"false"}},{"environment":{},"enabled":true,"continueOnError":false,"alwaysRun":false,"displayName":"Show run configs","timeoutInMinutes":0,"condition":"succeeded()","task":{"id":"46e4be58-730b-4389-8a2f-ea10b3e5e815","versionSpec":"1.*","definitionType":"task"},"inputs":{"connectedServiceNameARM":"e3a37592-7b52-4523-890b-a46c782141b3","scriptLocation":"inlineScript","scriptPath":"azcli-setup.sh","inlineScript":"az ml runconfiguration list -w $(AMLWorkspaceName) -g $(ResourceGroupName)","args":"","cwd":"","failOnStandardError":"false"}},{"environment":{},"enabled":true,"continueOnError":false,"alwaysRun":false,"displayName":"AzureML: attach project","timeoutInMinutes":0,"condition":"succeeded()","task":{"id":"46e4be58-730b-4389-8a2f-ea10b3e5e815","versionSpec":"1.*","definitionType":"task"},"inputs":{"connectedServiceNameARM":"e3a37592-7b52-4523-890b-a46c782141b3","scriptLocation":"inlineScript","scriptPath":"","inlineScript":"Write-Host \"Attaching to workspace $(AMLWorkspaceName)\"\n\naz ml project attach --experiment-name myexperiment -w $(AMLWorkspaceName) -g $(ResourceGroupName) \n\n","args":"","cwd":"","failOnStandardError":"false"}},{"environment":{},"enabled":true,"continueOnError":true,"alwaysRun":false,"displayName":"Unit tests (engineered features, data sets)","timeoutInMinutes":0,"condition":"succeeded()","task":{"id":"d9bafed4-0b18-4f58-968d-86655b4d2ce9","versionSpec":"2.*","definitionType":"task"},"inputs":{"script":"pip install setuptools\npip install pytest\npip install engarde\npython -m pytest --junit-xml $(Build.BinariesDirectory)/unittest_report.xml\n","workingDirectory":"code/02_modeling","failOnStderr":"false"}},{"environment":{},"enabled":true,"continueOnError":false,"alwaysRun":false,"displayName":"Publish Unit Test Results","timeoutInMinutes":0,"condition":"succeeded()","task":{"id":"0b0f01ed-7dde-43ff-9cbb-e48954daf9b1","versionSpec":"2.*","definitionType":"task"},"inputs":{"testRunner":"JUnit","testResultsFiles":"$(Build.BinariesDirectory)/unittest_report.xml","searchFolder":"$(System.DefaultWorkingDirectory)/code/02_modeling","mergeTestResults":"false","testRunTitle":"","platform":"","configuration":"","publishRunAttachments":"true"}},{"environment":{},"enabled":true,"continueOnError":false,"alwaysRun":false,"displayName":"AzureML: train model","timeoutInMinutes":0,"condition":"succeeded()","task":{"id":"46e4be58-730b-4389-8a2f-ea10b3e5e815","versionSpec":"1.*","definitionType":"task"},"inputs":{"connectedServiceNameARM":"e3a37592-7b52-4523-890b-a46c782141b3","scriptLocation":"inlineScript","scriptPath":"","inlineScript":"az ml run submit -c local code/02_modeling/train.py","args":"","cwd":"","failOnStandardError":"false"}},{"environment":{},"enabled":true,"continueOnError":false,"alwaysRun":false,"displayName":"AzureML: list experiments","timeoutInMinutes":0,"condition":"succeeded()","task":{"id":"46e4be58-730b-4389-8a2f-ea10b3e5e815","versionSpec":"1.*","definitionType":"task"},"inputs":{"connectedServiceNameARM":"e3a37592-7b52-4523-890b-a46c782141b3","scriptLocation":"inlineScript","scriptPath":"","inlineScript":"az ml history list","args":"","cwd":"","failOnStandardError":"false"}},{"environment":{},"enabled":false,"continueOnError":false,"alwaysRun":false,"displayName":"AzureML: model validation","timeoutInMinutes":0,"condition":"succeeded()","task":{"id":"46e4be58-730b-4389-8a2f-ea10b3e5e815","versionSpec":"1.*","definitionType":"task"},"inputs":{"connectedServiceNameARM":"e3a37592-7b52-4523-890b-a46c782141b3","scriptLocation":"inlineScript","scriptPath":"","inlineScript":"az ml run submit -c bai code/02_modeling/train.py","args":"","cwd":"","failOnStandardError":"false"}},{"environment":{},"enabled":true,"continueOnError":false,"alwaysRun":false,"displayName":"AzureML: download trained model","timeoutInMinutes":0,"condition":"succeeded()","task":{"id":"46e4be58-730b-4389-8a2f-ea10b3e5e815","versionSpec":"1.*","definitionType":"task"},"inputs":{"connectedServiceNameARM":"e3a37592-7b52-4523-890b-a46c782141b3","scriptLocation":"inlineScript","scriptPath":"","inlineScript":"az ml history download --last --artifact model.pkl -d code/03_deployment/ --overwrite","args":"","cwd":"","failOnStandardError":"false"}},{"environment":{},"enabled":true,"continueOnError":false,"alwaysRun":false,"displayName":"Copy Model Requirements","timeoutInMinutes":0,"condition":"succeeded()","task":{"id":"5bfb729a-a7c8-4a78-a7c3-8d717bb7c13c","versionSpec":"2.*","definitionType":"task"},"inputs":{"SourceFolder":"aml_config","Contents":"*.yml\n","TargetFolder":"$(build.artifactstagingdirectory)","CleanTargetFolder":"false","OverWrite":"false","flattenFolders":"false"}},{"environment":{},"enabled":true,"continueOnError":false,"alwaysRun":false,"displayName":"Copy Model Scoring Artifacts","timeoutInMinutes":0,"condition":"succeeded()","task":{"id":"5bfb729a-a7c8-4a78-a7c3-8d717bb7c13c","versionSpec":"2.*","definitionType":"task"},"inputs":{"SourceFolder":"code/03_deployment","Contents":"score.py\n!**\\tests\\**","TargetFolder":"$(build.artifactstagingdirectory)","CleanTargetFolder":"false","OverWrite":"false","flattenFolders":"false"}},{"environment":{},"enabled":true,"continueOnError":false,"alwaysRun":false,"displayName":"Publish Artifacts: Model Deployment","timeoutInMinutes":0,"condition":"succeeded()","task":{"id":"2ff763a7-ce83-4e1f-bc89-0ae63477cebe","versionSpec":"1.*","definitionType":"task"},"inputs":{"PathtoPublish":"$(build.artifactstagingdirectory)","ArtifactName":"Model_Deployment","ArtifactType":"Container","TargetPath":"","Parallel":"false","ParallelCount":"8"}}],"name":"Agent Phase","refName":"Phase_2","condition":"succeeded()","target":{"queue":{"_links":{"self":{"href":"https://aidemos.visualstudio.com/_apis/build/Queues/108"}},"id":108,"url":"https://aidemos.visualstudio.com/_apis/build/Queues/108","pool":null},"executionOptions":{"type":0},"allowScriptsAuthAccessOption":false,"type":1},"jobAuthorizationScope":1,"jobCancelTimeoutInMinutes":1}],"type":1},"repository":{"properties":{"cleanOptions":"0","labelSources":"0","labelSourcesFormat":"$(build.buildNumber)","reportBuildStatus":"true","gitLfsSupport":"false","skipSyncSource":"false","checkoutNestedSubmodules":"false","fetchDepth":"0"},"id":"93081a31-4c2b-458c-b873-d3bcbcf49552","type":"TfsGit","name":"DevOps for AI Demos","url":"https://aidemos.visualstudio.com/DevOps%20for%20AI%20-%20Demo/_git/DevOps%20for%20AI%20Demos","defaultBranch":"refs/heads/master","clean":"false","checkoutSubmodules":false},"processParameters":{},"quality":1,"authoredBy":{"displayName":"Jordan Edwards","url":"https://spsprodeus27.vssps.visualstudio.com/A99cdcbe4-45ab-4b19-837d-1854e789456d/_apis/Identities/47d7ba8c-85b3-4d34-8d6f-25fefc0ace9a","_links":{"avatar":{"href":"https://aidemos.visualstudio.com/_apis/GraphProfile/MemberAvatars/aad.MDM0Mjk5YjQtZTg5Yi03ZGE3LTkyMDQtOGY1ZjlhZjU0YzJj"}},"id":"47d7ba8c-85b3-4d34-8d6f-25fefc0ace9a","uniqueName":"Jordane@microsoft.com","imageUrl":"https://aidemos.visualstudio.com/_apis/GraphProfile/MemberAvatars/aad.MDM0Mjk5YjQtZTg5Yi03ZGE3LTkyMDQtOGY1ZjlhZjU0YzJj","descriptor":"aad.MDM0Mjk5YjQtZTg5Yi03ZGE3LTkyMDQtOGY1ZjlhZjU0YzJj"},"drafts":[],"queue":{"_links":{"self":{"href":"https://aidemos.visualstudio.com/_apis/build/Queues/108"}},"id":108,"url":"https://aidemos.visualstudio.com/_apis/build/Queues/108","pool":null},"id":48,"name":"DevOps for AI - Build Model - AzureML PuP CLI","url":"https://aidemos.visualstudio.com/bab48e25-e319-43a6-812f-aee921d85c3f/_apis/build/Definitions/48?revision=16","uri":"vstfs:///Build/Definition/48","path":"\\","type":2,"queueStatus":0,"revision":16,"createdDate":"2018-09-25T02:51:45.497Z","project":{"id":"bab48e25-e319-43a6-812f-aee921d85c3f","name":"DevOps for AI - Demo","description":"Demo and Readiness Content","url":"https://aidemos.visualstudio.com/_apis/projects/bab48e25-e319-43a6-812f-aee921d85c3f","state":1,"revision":532,"visibility":1,"lastUpdateTime":"2019-09-20T08:27:56.663Z"}} \ No newline at end of file diff --git a/code/scoring/.azureml/config.json b/code/scoring/.azureml/config.json new file mode 100644 index 00000000..b6efcda5 --- /dev/null +++ b/code/scoring/.azureml/config.json @@ -0,0 +1 @@ +{"Id": null, "Scope": "/subscriptions/7fd76d0f-84f2-498b-a997-e0d059af5ce1/resourceGroups/sdbolts-AML-RG/providers/Microsoft.MachineLearningServices/workspaces/sdbolts-AML-WS"} \ No newline at end of file diff --git a/code/scoring/conda_dependencies.yml b/code/scoring/conda_dependencies.yml index f13c3c3d..3a351e4d 100644 --- a/code/scoring/conda_dependencies.yml +++ b/code/scoring/conda_dependencies.yml @@ -51,4 +51,3 @@ dependencies: - gunicorn==19.9.0 - flask==1.1.1 - azure-ml-api-sdk - diff --git a/code/training/train.py b/code/training/train.py index d703964f..892a7f1a 100644 --- a/code/training/train.py +++ b/code/training/train.py @@ -30,9 +30,14 @@ from sklearn.linear_model import Ridge from sklearn.metrics import mean_squared_error from sklearn.model_selection import train_test_split + from sklearn.externals import joblib import numpy as np +from interpret.ext.blackbox import TabularExplainer +from azureml.contrib.explain.model.explanation.explanation_client import ( + ExplanationClient +) parser = argparse.ArgumentParser("train") parser.add_argument( @@ -58,18 +63,19 @@ run = Run.get_context() exp = run.experiment ws = run.experiment.workspace +client = ExplanationClient.from_run(run) X, y = load_diabetes(return_X_y=True) columns = ["age", "gender", "bmi", "bp", "s1", "s2", "s3", "s4", "s5", "s6"] X_train, X_test, y_train, y_test = train_test_split( - X, y, test_size=0.2, random_state=0) + X, y, test_size=0.3, random_state=0) data = {"train": {"X": X_train, "y": y_train}, "test": {"X": X_test, "y": y_test}} print("Running train.py") # Randomly pic alpha -alphas = np.arange(0.0, 1.0, 0.05) +alphas = np.arange(0.0, 0.5, 0.01) alpha = alphas[np.random.choice(alphas.shape[0], 1, replace=False)][0] print(alpha) run.log("alpha", alpha) @@ -78,10 +84,18 @@ preds = reg.predict(data["test"]["X"]) run.log("mse", mean_squared_error(preds, data["test"]["y"])) +# create an explainer to validate or debug the model +tabular_explainer = TabularExplainer(reg, + initialization_examples=X_train, + features=columns) +# explain overall model predictions (global explanation) +# passing in test dataset for evaluation examples -# Save model as part of the run history +global_explanation = tabular_explainer.explain_global(X_test) -# model_name = "." +# uploading model explanation data for storage or visualization +comment = 'Global explanation on of Diabetes Regression' +client.upload_model_explanation(global_explanation, comment=comment) with open(model_name, "wb") as file: joblib.dump(value=reg, filename=model_name) diff --git a/code/training/train_explain.py b/code/training/train_explain.py new file mode 100644 index 00000000..892a7f1a --- /dev/null +++ b/code/training/train_explain.py @@ -0,0 +1,116 @@ +""" +Copyright (C) Microsoft Corporation. All rights reserved.​ + ​ +Microsoft Corporation (“Microsoft”) grants you a nonexclusive, perpetual, +royalty-free right to use, copy, and modify the software code provided by us +("Software Code"). You may not sublicense the Software Code or any use of it +(except to your affiliates and to vendors to perform work on your behalf) +through distribution, network access, service agreement, lease, rental, or +otherwise. This license does not purport to express any claim of ownership over +data you may have shared with Microsoft in the creation of the Software Code. +Unless applicable law gives you more rights, Microsoft reserves all other +rights not expressly granted herein, whether by implication, estoppel or +otherwise. ​ + ​ +THE SOFTWARE CODE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS +OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +MICROSOFT OR ITS LICENSORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR +BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER +IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +ARISING IN ANY WAY OUT OF THE USE OF THE SOFTWARE CODE, EVEN IF ADVISED OF THE +POSSIBILITY OF SUCH DAMAGE. +""" +from azureml.core.run import Run +import os +import argparse +from sklearn.datasets import load_diabetes +from sklearn.linear_model import Ridge +from sklearn.metrics import mean_squared_error +from sklearn.model_selection import train_test_split + +from sklearn.externals import joblib +import numpy as np + +from interpret.ext.blackbox import TabularExplainer +from azureml.contrib.explain.model.explanation.explanation_client import ( + ExplanationClient +) + +parser = argparse.ArgumentParser("train") +parser.add_argument( + "--release_id", + type=str, + help="The ID of the release triggering this pipeline run", +) +parser.add_argument( + "--model_name", + type=str, + help="Name of the Model", + default="sklearn_regression_model.pkl", +) + +args = parser.parse_args() + +print("Argument 1: %s" % args.release_id) +print("Argument 2: %s" % args.model_name) + +model_name = args.model_name +release_id = args.release_id + +run = Run.get_context() +exp = run.experiment +ws = run.experiment.workspace +client = ExplanationClient.from_run(run) + +X, y = load_diabetes(return_X_y=True) +columns = ["age", "gender", "bmi", "bp", "s1", "s2", "s3", "s4", "s5", "s6"] +X_train, X_test, y_train, y_test = train_test_split( + X, y, test_size=0.3, random_state=0) +data = {"train": {"X": X_train, "y": y_train}, + "test": {"X": X_test, "y": y_test}} + +print("Running train.py") + +# Randomly pic alpha +alphas = np.arange(0.0, 0.5, 0.01) +alpha = alphas[np.random.choice(alphas.shape[0], 1, replace=False)][0] +print(alpha) +run.log("alpha", alpha) +reg = Ridge(alpha=alpha) +reg.fit(data["train"]["X"], data["train"]["y"]) +preds = reg.predict(data["test"]["X"]) +run.log("mse", mean_squared_error(preds, data["test"]["y"])) + +# create an explainer to validate or debug the model +tabular_explainer = TabularExplainer(reg, + initialization_examples=X_train, + features=columns) +# explain overall model predictions (global explanation) +# passing in test dataset for evaluation examples + +global_explanation = tabular_explainer.explain_global(X_test) + +# uploading model explanation data for storage or visualization +comment = 'Global explanation on of Diabetes Regression' +client.upload_model_explanation(global_explanation, comment=comment) + +with open(model_name, "wb") as file: + joblib.dump(value=reg, filename=model_name) + +# upload the model file explicitly into artifacts +run.upload_file(name="./outputs/" + model_name, path_or_stream=model_name) +print("Uploaded the model {} to experiment {}".format( + model_name, run.experiment.name)) +dirpath = os.getcwd() +print(dirpath) +print("Following files are uploaded ") +print(run.get_file_names()) + +# Add properties to identify this specific training run +run.add_properties({"release_id": release_id, "run_type": "train"}) +print(f"added properties: {run.properties}") + +run.complete() diff --git a/ml_service/pipelines/build_train_pipeline.py b/ml_service/pipelines/build_train_pipeline.py index cd65ff83..50138e36 100644 --- a/ml_service/pipelines/build_train_pipeline.py +++ b/ml_service/pipelines/build_train_pipeline.py @@ -52,7 +52,13 @@ def main(): 'scikit-learn', 'tensorflow', 'keras'], pip_packages=['azure', 'azureml-core', 'azure-storage', - 'azure-storage-blob']) + 'azure-storage-blob', 'azureml-defaults', + 'azureml-contrib-explain-model', + 'azureml-contrib-interpret', + 'azureml-telemetry', + 'azureml-interpret', + 'sklearn-pandas', + 'azureml-dataprep']) ) run_config.environment.docker.enabled = True