diff --git a/aml_config/config.json b/aml_config/config.json index 7105ecf7..60119e95 100644 --- a/aml_config/config.json +++ b/aml_config/config.json @@ -1,6 +1,7 @@ { - "subscription_id": "<>", - "resource_group": "DevOps_AzureML_Demo", - "workspace_name": "AzureML_Demo_ws", - "location": "southcentralus" + "subscription_id": "09e8ad18-7bdb-43b8-80c4-43ee53460e0b", + "subscription_id_prod": "f72a9681-95ab-4ad8-b7b1-b1f7b7c67f9c", + "resource_group": "DevOps_AzureML_Demo", + "workspace_name": "AzureML_Demo_ws", + "location": "southcentralus" } diff --git a/aml_service/00-WorkSpace_Prod.py b/aml_service/00-WorkSpace_Prod.py new file mode 100644 index 00000000..32828906 --- /dev/null +++ b/aml_service/00-WorkSpace_Prod.py @@ -0,0 +1,64 @@ +""" +Copyright (C) Microsoft Corporation. All rights reserved.​ + ​ +Microsoft Corporation (“Microsoft”) grants you a nonexclusive, perpetual, +royalty-free right to use, copy, and modify the software code provided by us +("Software Code"). You may not sublicense the Software Code or any use of it +(except to your affiliates and to vendors to perform work on your behalf) +through distribution, network access, service agreement, lease, rental, or +otherwise. This license does not purport to express any claim of ownership over +data you may have shared with Microsoft in the creation of the Software Code. +Unless applicable law gives you more rights, Microsoft reserves all other +rights not expressly granted herein, whether by implication, estoppel or +otherwise. ​ + ​ +THE SOFTWARE CODE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS +OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +MICROSOFT OR ITS LICENSORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR +BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER +IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +ARISING IN ANY WAY OUT OF THE USE OF THE SOFTWARE CODE, EVEN IF ADVISED OF THE +POSSIBILITY OF SUCH DAMAGE. +""" +from azureml.core import Workspace +import os, json, sys +import azureml.core +from azureml.core.authentication import AzureCliAuthentication + +print("SDK Version:", azureml.core.VERSION) +# print('current dir is ' +os.curdir) +with open("aml_config/config.json") as f: + config = json.load(f) + +workspace_name = config["workspace_name"] +resource_group = config["resource_group"] +subscription_id = config["subscription_id_prod"] +location = config["location"] + +cli_auth = AzureCliAuthentication() + +try: + ws = Workspace.get( + name=workspace_name, + subscription_id=subscription_id, + resource_group=resource_group, + auth=cli_auth, + ) + +except: + # this call might take a minute or two. + print("Creating new workspace") + ws = Workspace.create( + name=workspace_name, + subscription_id=subscription_id, + resource_group=resource_group, + # create_resource_group=True, + location=location, + auth=cli_auth, + ) + +# print Workspace details +print(ws.name, ws.resource_group, ws.location, ws.subscription_id, sep="\n") diff --git a/aml_service/01-Experiment_Prod.py b/aml_service/01-Experiment_Prod.py new file mode 100644 index 00000000..b3543e1c --- /dev/null +++ b/aml_service/01-Experiment_Prod.py @@ -0,0 +1,44 @@ +""" +Copyright (C) Microsoft Corporation. All rights reserved.​ + ​ +Microsoft Corporation (“Microsoft”) grants you a nonexclusive, perpetual, +royalty-free right to use, copy, and modify the software code provided by us +("Software Code"). You may not sublicense the Software Code or any use of it +(except to your affiliates and to vendors to perform work on your behalf) +through distribution, network access, service agreement, lease, rental, or +otherwise. This license does not purport to express any claim of ownership over +data you may have shared with Microsoft in the creation of the Software Code. +Unless applicable law gives you more rights, Microsoft reserves all other +rights not expressly granted herein, whether by implication, estoppel or +otherwise. ​ + ​ +THE SOFTWARE CODE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS +OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +MICROSOFT OR ITS LICENSORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR +BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER +IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +ARISING IN ANY WAY OUT OF THE USE OF THE SOFTWARE CODE, EVEN IF ADVISED OF THE +POSSIBILITY OF SUCH DAMAGE. +""" +import os +from azureml.core import Experiment +from azureml.core import Workspace +from azureml.core.authentication import AzureCliAuthentication + +cli_auth = AzureCliAuthentication() + + +def getExperiment(): + ws = Workspace.from_config(auth=cli_auth) + script_folder = "." + experiment_name = "devops-ai-demo" + exp = Experiment(workspace=ws, name=experiment_name) + print(exp.name, exp.workspace.name, sep="\n") + return exp + + +if __name__ == "__main__": + exp = getExperiment() diff --git a/aml_service/02-AttachTrainingVM_Prod.py b/aml_service/02-AttachTrainingVM_Prod.py new file mode 100644 index 00000000..3fc11c25 --- /dev/null +++ b/aml_service/02-AttachTrainingVM_Prod.py @@ -0,0 +1,78 @@ +""" +Copyright (C) Microsoft Corporation. All rights reserved.​ + ​ +Microsoft Corporation (“Microsoft”) grants you a nonexclusive, perpetual, +royalty-free right to use, copy, and modify the software code provided by us +("Software Code"). You may not sublicense the Software Code or any use of it +(except to your affiliates and to vendors to perform work on your behalf) +through distribution, network access, service agreement, lease, rental, or +otherwise. This license does not purport to express any claim of ownership over +data you may have shared with Microsoft in the creation of the Software Code. +Unless applicable law gives you more rights, Microsoft reserves all other +rights not expressly granted herein, whether by implication, estoppel or +otherwise. ​ + ​ +THE SOFTWARE CODE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS +OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +MICROSOFT OR ITS LICENSORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR +BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER +IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +ARISING IN ANY WAY OUT OF THE USE OF THE SOFTWARE CODE, EVEN IF ADVISED OF THE +POSSIBILITY OF SUCH DAMAGE. +""" + +from azureml.core import Workspace +from azureml.core import Run +from azureml.core import Experiment +from azureml.core.conda_dependencies import CondaDependencies +from azureml.core.runconfig import RunConfiguration +import os, json +from azureml.core.compute import RemoteCompute +from azureml.core.compute import DsvmCompute +from azureml.core.compute_target import ComputeTargetException +from azureml.core.authentication import AzureCliAuthentication + +cli_auth = AzureCliAuthentication() + +# Get workspace +ws = Workspace.from_config(auth=cli_auth) + +# Read the New VM Config +with open("aml_config/security_config.json") as f: + config = json.load(f) + +remote_vm_name = config["remote_vm_name"] +remote_vm_username = config["remote_vm_username"] +remote_vm_password = config["remote_vm_password"] +remote_vm_ip = config["remote_vm_ip"] + +try: + dsvm_compute = RemoteCompute.attach( + ws, + name=remote_vm_name, + username=remote_vm_username, + address=remote_vm_ip, + ssh_port=22, + password=remote_vm_password, + ) + dsvm_compute.wait_for_completion(show_output=True) + +except Exception as e: + print("Caught = {}".format(e.message)) + print("Compute config already attached.") + + +## Create VM if not available +# compute_target_name = remote_vm_name + +# try: +# dsvm_compute = DsvmCompute(workspace=ws, name=compute_target_name) +# print('found existing:', dsvm_compute.name) +# except ComputeTargetException: +# print('creating new.') +# dsvm_config = DsvmCompute.provisioning_configuration(vm_size="Standard_D2_v2") +# dsvm_compute = DsvmCompute.create(ws, name=compute_target_name, provisioning_configuration=dsvm_config) +# dsvm_compute.wait_for_completion(show_output=True) diff --git a/aml_service/03-AttachAmlCluster_Prod.py b/aml_service/03-AttachAmlCluster_Prod.py new file mode 100644 index 00000000..1ba3f127 --- /dev/null +++ b/aml_service/03-AttachAmlCluster_Prod.py @@ -0,0 +1,66 @@ +""" +Copyright (C) Microsoft Corporation. All rights reserved.​ + +Microsoft Corporation (“Microsoft”) grants you a nonexclusive, perpetual, +royalty-free right to use, copy, and modify the software code provided by us +("Software Code"). You may not sublicense the Software Code or any use of it +(except to your affiliates and to vendors to perform work on your behalf) +through distribution, network access, service agreement, lease, rental, or +otherwise. This license does not purport to express any claim of ownership over +data you may have shared with Microsoft in the creation of the Software Code. +Unless applicable law gives you more rights, Microsoft reserves all other +rights not expressly granted herein, whether by implication, estoppel or +otherwise. ​ + +THE SOFTWARE CODE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS +OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +MICROSOFT OR ITS LICENSORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR +BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER +IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +ARISING IN ANY WAY OUT OF THE USE OF THE SOFTWARE CODE, EVEN IF ADVISED OF THE +POSSIBILITY OF SUCH DAMAGE. +""" + +from azureml.core import Workspace +from azureml.core.compute import ComputeTarget, AmlCompute +from azureml.core.compute_target import ComputeTargetException +from azureml.core.authentication import AzureCliAuthentication +import os, json + +cli_auth = AzureCliAuthentication() +# Get workspace +ws = Workspace.from_config(auth=cli_auth) + +# Read the New VM Config +with open("aml_config/security_config.json") as f: + config = json.load(f) + +aml_cluster_name = config["aml_cluster_name"] + +# un-comment the below lines if you want to put AML Compute under Vnet. Also update /aml_config/security_config.json +# vnet_resourcegroup_name = config['vnet_resourcegroup_name'] +# vnet_name = config['vnet_name'] +# subnet_name = config['subnet_name'] + +# Verify that cluster does not exist already +try: + cpu_cluster = ComputeTarget(workspace=ws, name=aml_cluster_name) + print("Found existing cluster, use it.") +except ComputeTargetException: + compute_config = AmlCompute.provisioning_configuration( + vm_size="STANDARD_D2_V2", + vm_priority="dedicated", + min_nodes=1, + max_nodes=3, + idle_seconds_before_scaledown="300", + # #Uncomment the below lines for VNet support + # vnet_resourcegroup_name=vnet_resourcegroup_name, + # vnet_name=vnet_name, + # subnet_name=subnet_name + ) + cpu_cluster = ComputeTarget.create(ws, aml_cluster_name, compute_config) + +cpu_cluster.wait_for_completion(show_output=True) diff --git a/aml_service/04-AmlPipelines_Prod.py b/aml_service/04-AmlPipelines_Prod.py new file mode 100644 index 00000000..520c8d95 --- /dev/null +++ b/aml_service/04-AmlPipelines_Prod.py @@ -0,0 +1,196 @@ +""" +Copyright (C) Microsoft Corporation. All rights reserved.​ + ​ +Microsoft Corporation (“Microsoft”) grants you a nonexclusive, perpetual, +royalty-free right to use, copy, and modify the software code provided by us +("Software Code"). You may not sublicense the Software Code or any use of it +(except to your affiliates and to vendors to perform work on your behalf) +through distribution, network access, service agreement, lease, rental, or +otherwise. This license does not purport to express any claim of ownership over +data you may have shared with Microsoft in the creation of the Software Code. +Unless applicable law gives you more rights, Microsoft reserves all other +rights not expressly granted herein, whether by implication, estoppel or +otherwise. ​ + ​ +THE SOFTWARE CODE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS +OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +MICROSOFT OR ITS LICENSORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR +BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER +IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +ARISING IN ANY WAY OUT OF THE USE OF THE SOFTWARE CODE, EVEN IF ADVISED OF THE +POSSIBILITY OF SUCH DAMAGE. +""" + +import os, json, requests, datetime +import argparse +from azureml.core import Workspace, Experiment, Datastore +from azureml.core.runconfig import RunConfiguration, CondaDependencies +from azureml.data.data_reference import DataReference +from azureml.pipeline.core import Pipeline, PipelineData, StepSequence +from azureml.pipeline.steps import PythonScriptStep +from azureml.pipeline.core import PublishedPipeline +from azureml.pipeline.core.graph import PipelineParameter +from azureml.core.compute import ComputeTarget + +# from azureml.widgets import RunDetails +from azureml.core.authentication import AzureCliAuthentication + +print("Pipeline SDK-specific imports completed") + +cli_auth = AzureCliAuthentication() + + +parser = argparse.ArgumentParser("Pipeline") +parser.add_argument( + "--pipeline_action", + type=str, + choices=["pipeline-test", "publish"], + help="Determines if pipeline needs to run on small data set \ + or pipeline needs to be republished", + #default="pipeline-test", +) + +args = parser.parse_args() + + +# Get workspace +ws = Workspace.from_config(path="aml_config/config.json", auth=cli_auth) +def_blob_store = Datastore(ws, "workspaceblobstore") + +# Get AML Compute name and Experiment Name +with open("aml_config/security_config.json") as f: + config = json.load(f) + +experiment_name = config["experiment_name"] +aml_cluster_name = config["aml_cluster_name"] +aml_pipeline_name = "training-pipeline" + +source_directory = "code" + +# Run Config +# Declare packages dependencies required in the pipeline (these can also be expressed as a YML file) +# cd = CondaDependencies.create(pip_packages=["azureml-defaults", 'tensorflow==1.8.0']) +cd = CondaDependencies("aml_config/conda_dependencies.yml") + +run_config = RunConfiguration(conda_dependencies=cd) + +aml_compute = ws.compute_targets[aml_cluster_name] + +jsonconfigs = PipelineData("jsonconfigs", datastore=def_blob_store) + +# Suffix for all the config files +config_suffix = datetime.datetime.now().strftime("%Y%m%d%H") +print("PipelineData object created") + +# Create python script step to run the training/scoring main script +train = PythonScriptStep( + name="Train New Model", + script_name="training/train.py", + compute_target=aml_compute, + source_directory=source_directory, + arguments=["--config_suffix", config_suffix, "--json_config", jsonconfigs], + runconfig=run_config, + # inputs=[jsonconfigs], + outputs=[jsonconfigs], + allow_reuse=False, +) +print("Step Train created") + +evaluate = PythonScriptStep( + name="Evaluate New Model with Prod Model", + script_name="evaluate/evaluate_model.py", + compute_target=aml_compute, + source_directory=source_directory, + arguments=["--config_suffix", config_suffix, "--json_config", jsonconfigs], + runconfig=run_config, + inputs=[jsonconfigs], + # outputs=[jsonconfigs], + allow_reuse=False, +) +print("Step Evaluate created") + +register_model = PythonScriptStep( + name="Register New Trained Model", + script_name="register/register_model.py", + compute_target=aml_compute, + source_directory=source_directory, + arguments=["--config_suffix", config_suffix, "--json_config", jsonconfigs], + runconfig=run_config, + inputs=[jsonconfigs], + # outputs=[jsonconfigs], + allow_reuse=False, +) +print("Step register model created") + +package_model = PythonScriptStep( + name="Package Model as Scoring Image", + script_name="scoring/create_scoring_image.py", + compute_target=aml_compute, + source_directory=source_directory, + arguments=["--config_suffix", config_suffix, "--json_config", jsonconfigs], + runconfig=run_config, + inputs=[jsonconfigs], + # outputs=[jsonconfigs], + allow_reuse=False, +) +print("Packed the model into a Scoring Image") + +# Create Steps dependency such that they run in sequence +evaluate.run_after(train) +register_model.run_after(evaluate) +package_model.run_after(register_model) + +steps = [package_model] + + +# Build Pipeline +pipeline1 = Pipeline(workspace=ws, steps=steps) +print("Pipeline is built") + +# Validate Pipeline +pipeline1.validate() +print("Pipeline validation complete") + + +# Submit unpublished pipeline with small data set for test +if args.pipeline_action == "pipeline-test": + pipeline_run1 = Experiment(ws, experiment_name).submit( + pipeline1, regenerate_outputs=True + ) + print("Pipeline is submitted for execution") + pipeline_run1.wait_for_completion(show_output=True) + + +# RunDetails(pipeline_run1).show() + + +# Define pipeline parameters +# run_env = PipelineParameter( +# name="dev_flag", +# default_value=True) + +# dbname = PipelineParameter( +# name="dbname", +# default_value='opex') + + +# Publish Pipeline +if args.pipeline_action == "publish": + published_pipeline1 = pipeline1.publish( + name=aml_pipeline_name, description="Model training/retraining pipeline" + ) + print( + "Pipeline is published as rest_endpoint {} ".format( + published_pipeline1.endpoint + ) + ) + # write published pipeline details as build artifact + pipeline_config = {} + pipeline_config["pipeline_name"] = published_pipeline1.name + pipeline_config["rest_endpoint"] = published_pipeline1.endpoint + pipeline_config["experiment_name"] = "published-pipeline-exp" # experiment_name + with open("aml_config/pipeline_config.json", "w") as outfile: + json.dump(pipeline_config, outfile) diff --git a/aml_service/05-TriggerAmlPipeline_Prod.py b/aml_service/05-TriggerAmlPipeline_Prod.py new file mode 100644 index 00000000..0a2dc64e --- /dev/null +++ b/aml_service/05-TriggerAmlPipeline_Prod.py @@ -0,0 +1,51 @@ +""" +Copyright (C) Microsoft Corporation. All rights reserved.​ + ​ +Microsoft Corporation (“Microsoft”) grants you a nonexclusive, perpetual, +royalty-free right to use, copy, and modify the software code provided by us +("Software Code"). You may not sublicense the Software Code or any use of it +(except to your affiliates and to vendors to perform work on your behalf) +through distribution, network access, service agreement, lease, rental, or +otherwise. This license does not purport to express any claim of ownership over +data you may have shared with Microsoft in the creation of the Software Code. +Unless applicable law gives you more rights, Microsoft reserves all other +rights not expressly granted herein, whether by implication, estoppel or +otherwise. ​ + ​ +THE SOFTWARE CODE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS +OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +MICROSOFT OR ITS LICENSORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR +BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER +IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +ARISING IN ANY WAY OUT OF THE USE OF THE SOFTWARE CODE, EVEN IF ADVISED OF THE +POSSIBILITY OF SUCH DAMAGE. +""" + +import os, json, requests, datetime, sys +import argparse +from azureml.core.authentication import AzureCliAuthentication + +try: + with open("aml_config/pipeline_config.json") as f: + config = json.load(f) +except: + print("No pipeline config found") + sys.exit(0) + +# Run a published pipeline +cli_auth = AzureCliAuthentication() +aad_token = cli_auth.get_authentication_header() +rest_endpoint1 = config["rest_endpoint"] +experiment_name = config["experiment_name"] +print(rest_endpoint1) + +response = requests.post( + rest_endpoint1, headers=aad_token, json={"ExperimentName": experiment_name} +) + +run_id = response.json()["Id"] +print(run_id) +print("Pipeline run initiated") diff --git a/aml_service/10-TrainOnLocal_Prod.py b/aml_service/10-TrainOnLocal_Prod.py new file mode 100644 index 00000000..d7c71b3b --- /dev/null +++ b/aml_service/10-TrainOnLocal_Prod.py @@ -0,0 +1,73 @@ +""" +Copyright (C) Microsoft Corporation. All rights reserved.​ + ​ +Microsoft Corporation (“Microsoft”) grants you a nonexclusive, perpetual, +royalty-free right to use, copy, and modify the software code provided by us +("Software Code"). You may not sublicense the Software Code or any use of it +(except to your affiliates and to vendors to perform work on your behalf) +through distribution, network access, service agreement, lease, rental, or +otherwise. This license does not purport to express any claim of ownership over +data you may have shared with Microsoft in the creation of the Software Code. +Unless applicable law gives you more rights, Microsoft reserves all other +rights not expressly granted herein, whether by implication, estoppel or +otherwise. ​ + ​ +THE SOFTWARE CODE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS +OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +MICROSOFT OR ITS LICENSORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR +BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER +IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +ARISING IN ANY WAY OUT OF THE USE OF THE SOFTWARE CODE, EVEN IF ADVISED OF THE +POSSIBILITY OF SUCH DAMAGE. +""" + +from azureml.core.runconfig import RunConfiguration +from azureml.core import Workspace +from azureml.core import Experiment +from azureml.core import ScriptRunConfig +import json +from azureml.core.authentication import AzureCliAuthentication + +cli_auth = AzureCliAuthentication() + +# Get workspace +ws = Workspace.from_config(auth=cli_auth) + +# Attach Experiment +experiment_name = "devops-ai-demo" +exp = Experiment(workspace=ws, name=experiment_name) +print(exp.name, exp.workspace.name, sep="\n") + +# Editing a run configuration property on-fly. +run_config_user_managed = RunConfiguration() +run_config_user_managed.environment.python.user_managed_dependencies = True + +print("Submitting an experiment.") +src = ScriptRunConfig( + source_directory="./code", + script="training/train.py", + run_config=run_config_user_managed, +) +run = exp.submit(src) + +# Shows output of the run on stdout. +run.wait_for_completion(show_output=True, wait_post_processing=True) + +# Raise exception if run fails +if run.get_status() == "Failed": + raise Exception( + "Training on local failed with following run status: {} and logs: \n {}".format( + run.get_status(), run.get_details_with_logs() + ) + ) + +# Writing the run id to /aml_config/run_id.json + +run_id = {} +run_id["run_id"] = run.id +run_id["experiment_name"] = run.experiment.name +with open("aml_config/run_id.json", "w") as outfile: + json.dump(run_id, outfile) diff --git a/aml_service/11-TrainOnLocalEnv_Prod.py b/aml_service/11-TrainOnLocalEnv_Prod.py new file mode 100644 index 00000000..544a9d93 --- /dev/null +++ b/aml_service/11-TrainOnLocalEnv_Prod.py @@ -0,0 +1,82 @@ +""" +Copyright (C) Microsoft Corporation. All rights reserved.​ + ​ +Microsoft Corporation (“Microsoft”) grants you a nonexclusive, perpetual, +royalty-free right to use, copy, and modify the software code provided by us +("Software Code"). You may not sublicense the Software Code or any use of it +(except to your affiliates and to vendors to perform work on your behalf) +through distribution, network access, service agreement, lease, rental, or +otherwise. This license does not purport to express any claim of ownership over +data you may have shared with Microsoft in the creation of the Software Code. +Unless applicable law gives you more rights, Microsoft reserves all other +rights not expressly granted herein, whether by implication, estoppel or +otherwise. ​ + ​ +THE SOFTWARE CODE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS +OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +MICROSOFT OR ITS LICENSORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR +BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER +IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +ARISING IN ANY WAY OUT OF THE USE OF THE SOFTWARE CODE, EVEN IF ADVISED OF THE +POSSIBILITY OF SUCH DAMAGE. +""" +## Create a new Conda environment on local and train the model +## System-managed environment + +from azureml.core.conda_dependencies import CondaDependencies +from azureml.core.runconfig import RunConfiguration +from azureml.core import Workspace +from azureml.core import Experiment +from azureml.core import ScriptRunConfig + +from azureml.core.authentication import AzureCliAuthentication + +cli_auth = AzureCliAuthentication() + +# Get workspace +ws = Workspace.from_config(auth=cli_auth) + +# Attach Experiment +experiment_name = "devops-ai-demo" +exp = Experiment(workspace=ws, name=experiment_name) +print(exp.name, exp.workspace.name, sep="\n") + +# Editing a run configuration property on-fly. +run_config_system_managed = RunConfiguration() +# Use a new conda environment that is to be created from the conda_dependencies.yml file +run_config_system_managed.environment.python.user_managed_dependencies = False +# Automatically create the conda environment before the run +run_config_system_managed.prepare_environment = True + +# # add scikit-learn to the conda_dependencies.yml file +# Specify conda dependencies with scikit-learn +# run_config_system_managed.environment.python.conda_dependencies = CondaDependencies.create(conda_packages=['scikit-learn']) + +print("Submitting an experiment to new conda virtual env") +src = ScriptRunConfig( + source_directory="./code", + script="training/train.py", + run_config=run_config_user_managed, +) +run = exp.submit(src) + +# Shows output of the run on stdout. +run.wait_for_completion(show_output=True, wait_post_processing=True) + +# Raise exception if run fails +if run.get_status() == "Failed": + raise Exception( + "Training on local env failed with following run status: {} and logs: \n {}".format( + run.get_status(), run.get_details_with_logs() + ) + ) + +# Writing the run id to /aml_config/run_id.json +run_id = {} +run_id["run_id"] = run.id +run_id["experiment_name"] = run.experiment.name +with open("aml_config/run_id.json", "w") as outfile: + json.dump(run_id, outfile) diff --git a/aml_service/12-TrainOnVM_Prod.py b/aml_service/12-TrainOnVM_Prod.py new file mode 100644 index 00000000..788ffd15 --- /dev/null +++ b/aml_service/12-TrainOnVM_Prod.py @@ -0,0 +1,80 @@ +""" +Copyright (C) Microsoft Corporation. All rights reserved.​ + ​ +Microsoft Corporation (“Microsoft”) grants you a nonexclusive, perpetual, +royalty-free right to use, copy, and modify the software code provided by us +("Software Code"). You may not sublicense the Software Code or any use of it +(except to your affiliates and to vendors to perform work on your behalf) +through distribution, network access, service agreement, lease, rental, or +otherwise. This license does not purport to express any claim of ownership over +data you may have shared with Microsoft in the creation of the Software Code. +Unless applicable law gives you more rights, Microsoft reserves all other +rights not expressly granted herein, whether by implication, estoppel or +otherwise. ​ + ​ +THE SOFTWARE CODE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS +OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +MICROSOFT OR ITS LICENSORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR +BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER +IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +ARISING IN ANY WAY OUT OF THE USE OF THE SOFTWARE CODE, EVEN IF ADVISED OF THE +POSSIBILITY OF SUCH DAMAGE. +""" +import os, json +from azureml.core import Workspace +from azureml.core import Experiment +from azureml.core.compute import RemoteCompute +from azureml.core.runconfig import RunConfiguration +from azureml.core import ScriptRunConfig +import azureml.core +from azureml.core.authentication import AzureCliAuthentication + +cli_auth = AzureCliAuthentication() +# Get workspace +ws = Workspace.from_config(auth=cli_auth) + + +# Read the New VM Config +with open("aml_config/security_config.json") as f: + config = json.load(f) +remote_vm_name = config["remote_vm_name"] + + +# Attach Experiment +experiment_name = "devops-ai-demo" +exp = Experiment(workspace=ws, name=experiment_name) +print(exp.name, exp.workspace.name, sep="\n") + +run_config = RunConfiguration() +run_config.target = remote_vm_name + +# replace with your path to the python interpreter in the remote VM found earlier +run_config.environment.python.interpreter_path = "/anaconda/envs/myenv/bin/python" +run_config.environment.python.user_managed_dependencies = True + + +src = ScriptRunConfig( + source_directory="./code", script="training/train.py", run_config=run_config +) +run = exp.submit(src) + +# Shows output of the run on stdout. +run.wait_for_completion(show_output=True, wait_post_processing=True) + +# Raise exception if run fails +if run.get_status() == "Failed": + raise Exception( + "Training on local env failed with following run status: {} and logs: \n {}".format( + run.get_status(), run.get_details_with_logs() + ) + ) + +# Writing the run id to /aml_config/run_id.json +run_id = {} +run_id["run_id"] = run.id +run_id["experiment_name"] = run.experiment.name +with open("aml_config/run_id.json", "w") as outfile: + json.dump(run_id, outfile) diff --git a/aml_service/15-EvaluateModel_Prod.py b/aml_service/15-EvaluateModel_Prod.py new file mode 100644 index 00000000..4d266a98 --- /dev/null +++ b/aml_service/15-EvaluateModel_Prod.py @@ -0,0 +1,93 @@ +""" +Copyright (C) Microsoft Corporation. All rights reserved.​ + ​ +Microsoft Corporation (“Microsoft”) grants you a nonexclusive, perpetual, +royalty-free right to use, copy, and modify the software code provided by us +("Software Code"). You may not sublicense the Software Code or any use of it +(except to your affiliates and to vendors to perform work on your behalf) +through distribution, network access, service agreement, lease, rental, or +otherwise. This license does not purport to express any claim of ownership over +data you may have shared with Microsoft in the creation of the Software Code. +Unless applicable law gives you more rights, Microsoft reserves all other +rights not expressly granted herein, whether by implication, estoppel or +otherwise. ​ + ​ +THE SOFTWARE CODE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS +OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +MICROSOFT OR ITS LICENSORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR +BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER +IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +ARISING IN ANY WAY OUT OF THE USE OF THE SOFTWARE CODE, EVEN IF ADVISED OF THE +POSSIBILITY OF SUCH DAMAGE. +""" +import os, json +from azureml.core import Workspace +from azureml.core import Experiment +from azureml.core.model import Model +import azureml.core +from azureml.core import Run +from azureml.core.authentication import AzureCliAuthentication + +cli_auth = AzureCliAuthentication() + +# Get workspace +ws = Workspace.from_config(auth=cli_auth) + +# Paramaterize the matrics on which the models should be compared + +# Add golden data set on which all the model performance can be evaluated + +# Get the latest run_id +with open("aml_config/run_id.json") as f: + config = json.load(f) + +new_model_run_id = config["run_id"] +experiment_name = config["experiment_name"] +exp = Experiment(workspace=ws, name=experiment_name) + + +try: + # Get most recently registered model, we assume that is the model in production. Download this model and compare it with the recently trained model by running test with same data set. + model_list = Model.list(ws) + production_model = next( + filter( + lambda x: x.created_time == max(model.created_time for model in model_list), + model_list, + ) + ) + production_model_run_id = production_model.tags.get("run_id") + run_list = exp.get_runs() + # production_model_run = next(filter(lambda x: x.id == production_model_run_id, run_list)) + + # Get the run history for both production model and newly trained model and compare mse + production_model_run = Run(exp, run_id=production_model_run_id) + new_model_run = Run(exp, run_id=new_model_run_id) + + production_model_mse = production_model_run.get_metrics().get("mse") + new_model_mse = new_model_run.get_metrics().get("mse") + print( + "Current Production model mse: {}, New trained model mse: {}".format( + production_model_mse, new_model_mse + ) + ) + + promote_new_model = False + if new_model_mse < production_model_mse: + promote_new_model = True + print("New trained model performs better, thus it will be registered") +except: + promote_new_model = True + print("This is the first model to be trained, thus nothing to evaluate for now") + +run_id = {} +run_id["run_id"] = "" +# Writing the run id to /aml_config/run_id.json +if promote_new_model: + run_id["run_id"] = new_model_run_id + +run_id["experiment_name"] = experiment_name +with open("aml_config/run_id.json", "w") as outfile: + json.dump(run_id, outfile) diff --git a/aml_service/20-RegisterModel_Prod.py b/aml_service/20-RegisterModel_Prod.py new file mode 100644 index 00000000..bd9a7bbc --- /dev/null +++ b/aml_service/20-RegisterModel_Prod.py @@ -0,0 +1,92 @@ +""" +Copyright (C) Microsoft Corporation. All rights reserved.​ + ​ +Microsoft Corporation (“Microsoft”) grants you a nonexclusive, perpetual, +royalty-free right to use, copy, and modify the software code provided by us +("Software Code"). You may not sublicense the Software Code or any use of it +(except to your affiliates and to vendors to perform work on your behalf) +through distribution, network access, service agreement, lease, rental, or +otherwise. This license does not purport to express any claim of ownership over +data you may have shared with Microsoft in the creation of the Software Code. +Unless applicable law gives you more rights, Microsoft reserves all other +rights not expressly granted herein, whether by implication, estoppel or +otherwise. ​ + ​ +THE SOFTWARE CODE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS +OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +MICROSOFT OR ITS LICENSORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR +BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER +IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +ARISING IN ANY WAY OUT OF THE USE OF THE SOFTWARE CODE, EVEN IF ADVISED OF THE +POSSIBILITY OF SUCH DAMAGE. +""" +import os, json, sys +from azureml.core import Workspace +from azureml.core import Run +from azureml.core import Experiment +from azureml.core.model import Model + +from azureml.core.runconfig import RunConfiguration +from azureml.core.authentication import AzureCliAuthentication + +cli_auth = AzureCliAuthentication() + +# Get workspace +ws = Workspace.from_config(auth=cli_auth) + +# Get the latest evaluation result +try: + with open("aml_config/run_id.json") as f: + config = json.load(f) + if not config["run_id"]: + raise Exception("No new model to register as production model perform better") +except: + print("No new model to register as production model perform better") + # raise Exception('No new model to register as production model perform better') + sys.exit(0) + +run_id = config["run_id"] +experiment_name = config["experiment_name"] +exp = Experiment(workspace=ws, name=experiment_name) + +run = Run(experiment=exp, run_id=run_id) +names = run.get_file_names +names() +print("Run ID for last run: {}".format(run_id)) +model_local_dir = "model" +os.makedirs(model_local_dir, exist_ok=True) + +# Download Model to Project root directory +model_name = "sklearn_regression_model.pkl" +run.download_file( + name="./outputs/" + model_name, output_file_path="./model/" + model_name +) +print("Downloaded model {} to Project root directory".format(model_name)) +os.chdir("./model") +model = Model.register( + model_path=model_name, # this points to a local file + model_name=model_name, # this is the name the model is registered as + tags={"area": "diabetes", "type": "regression", "run_id": run_id}, + description="Regression model for diabetes dataset", + workspace=ws, +) +os.chdir("..") +print( + "Model registered: {} \nModel Description: {} \nModel Version: {}".format( + model.name, model.description, model.version + ) +) + +# Remove the evaluate.json as we no longer need it +# os.remove("aml_config/evaluate.json") + +# Writing the registered model details to /aml_config/model.json +model_json = {} +model_json["model_name"] = model.name +model_json["model_version"] = model.version +model_json["run_id"] = run_id +with open("aml_config/model.json", "w") as outfile: + json.dump(model_json, outfile) diff --git a/aml_service/30-CreateScoringImage_Prod.py b/aml_service/30-CreateScoringImage_Prod.py new file mode 100644 index 00000000..b94f3cb1 --- /dev/null +++ b/aml_service/30-CreateScoringImage_Prod.py @@ -0,0 +1,99 @@ +""" +Copyright (C) Microsoft Corporation. All rights reserved.​ + ​ +Microsoft Corporation (“Microsoft”) grants you a nonexclusive, perpetual, +royalty-free right to use, copy, and modify the software code provided by us +("Software Code"). You may not sublicense the Software Code or any use of it +(except to your affiliates and to vendors to perform work on your behalf) +through distribution, network access, service agreement, lease, rental, or +otherwise. This license does not purport to express any claim of ownership over +data you may have shared with Microsoft in the creation of the Software Code. +Unless applicable law gives you more rights, Microsoft reserves all other +rights not expressly granted herein, whether by implication, estoppel or +otherwise. ​ + ​ +THE SOFTWARE CODE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS +OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +MICROSOFT OR ITS LICENSORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR +BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER +IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +ARISING IN ANY WAY OUT OF THE USE OF THE SOFTWARE CODE, EVEN IF ADVISED OF THE +POSSIBILITY OF SUCH DAMAGE. +""" +import os, json, sys +from azureml.core import Workspace +from azureml.core.image import ContainerImage, Image +from azureml.core.model import Model +from azureml.core.authentication import AzureCliAuthentication + +cli_auth = AzureCliAuthentication() + +# Get workspace +ws = Workspace.from_config(auth=cli_auth) + +# Get the latest model details + +try: + with open("aml_config/model.json") as f: + config = json.load(f) +except: + print("No new model to register thus no need to create new scoring image") + # raise Exception('No new model to register as production model perform better') + sys.exit(0) + +model_name = config["model_name"] +model_version = config["model_version"] + + +model_list = Model.list(workspace=ws) +model, = (m for m in model_list if m.version == model_version and m.name == model_name) +print( + "Model picked: {} \nModel Description: {} \nModel Version: {}".format( + model.name, model.description, model.version + ) +) + +os.chdir("./code/scoring") +image_name = "diabetes-model-score" + +image_config = ContainerImage.image_configuration( + execution_script="score.py", + runtime="python-slim", + conda_file="conda_dependencies.yml", + description="Image with ridge regression model", + tags={"area": "diabetes", "type": "regression"}, +) + +image = Image.create( + name=image_name, models=[model], image_config=image_config, workspace=ws +) + +image.wait_for_creation(show_output=True) +os.chdir("../..") + +if image.creation_state != "Succeeded": + raise Exception("Image creation status: {image.creation_state}") + +print( + "{}(v.{} [{}]) stored at {} with build log {}".format( + image.name, + image.version, + image.creation_state, + image.image_location, + image.image_build_log_uri, + ) +) + +# Writing the image details to /aml_config/image.json +image_json = {} +image_json["image_name"] = image.name +image_json["image_version"] = image.version +image_json["image_location"] = image.image_location +with open("aml_config/image.json", "w") as outfile: + json.dump(image_json, outfile) + + +# How to fix the schema for a model, like if we have multiple models expecting different schema, diff --git a/aml_service/34-GetScoringImageName_Prod.py b/aml_service/34-GetScoringImageName_Prod.py new file mode 100644 index 00000000..b5f3a764 --- /dev/null +++ b/aml_service/34-GetScoringImageName_Prod.py @@ -0,0 +1,44 @@ +""" +Copyright (C) Microsoft Corporation. All rights reserved.​ + ​ +Microsoft Corporation (“Microsoft”) grants you a nonexclusive, perpetual, +royalty-free right to use, copy, and modify the software code provided by us +("Software Code"). You may not sublicense the Software Code or any use of it +(except to your affiliates and to vendors to perform work on your behalf) +through distribution, network access, service agreement, lease, rental, or +otherwise. This license does not purport to express any claim of ownership over +data you may have shared with Microsoft in the creation of the Software Code. +Unless applicable law gives you more rights, Microsoft reserves all other +rights not expressly granted herein, whether by implication, estoppel or +otherwise. ​ + ​ +THE SOFTWARE CODE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS +OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +MICROSOFT OR ITS LICENSORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR +BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER +IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +ARISING IN ANY WAY OUT OF THE USE OF THE SOFTWARE CODE, EVEN IF ADVISED OF THE +POSSIBILITY OF SUCH DAMAGE. +""" +import os, json, sys +from azureml.core import Workspace +from azureml.core.authentication import AzureCliAuthentication + +cli_auth = AzureCliAuthentication() + +# Get workspace +ws = Workspace.from_config(auth=cli_auth) + +# Get the latest image details +latest_image = ws.images +name, version = latest_image.get(list(latest_image)[0]).id.split(':') + +# Writing the image details to /aml_config/image.json +image_json = {} +image_json["image_name"] = name +image_json["image_version"] = int(version) +with open("aml_config/image.json", "w") as outfile: + json.dump(image_json, outfile) diff --git a/aml_service/50-deployOnAci_Prod.py b/aml_service/50-deployOnAci_Prod.py new file mode 100644 index 00000000..00313380 --- /dev/null +++ b/aml_service/50-deployOnAci_Prod.py @@ -0,0 +1,88 @@ +""" +Copyright (C) Microsoft Corporation. All rights reserved.​ + ​ +Microsoft Corporation (“Microsoft”) grants you a nonexclusive, perpetual, +royalty-free right to use, copy, and modify the software code provided by us +("Software Code"). You may not sublicense the Software Code or any use of it +(except to your affiliates and to vendors to perform work on your behalf) +through distribution, network access, service agreement, lease, rental, or +otherwise. This license does not purport to express any claim of ownership over +data you may have shared with Microsoft in the creation of the Software Code. +Unless applicable law gives you more rights, Microsoft reserves all other +rights not expressly granted herein, whether by implication, estoppel or +otherwise. ​ + ​ +THE SOFTWARE CODE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS +OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +MICROSOFT OR ITS LICENSORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR +BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER +IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +ARISING IN ANY WAY OUT OF THE USE OF THE SOFTWARE CODE, EVEN IF ADVISED OF THE +POSSIBILITY OF SUCH DAMAGE. +""" +import os, json, datetime, sys +from operator import attrgetter +from azureml.core import Workspace +from azureml.core.model import Model +from azureml.core.image import Image +from azureml.core.webservice import Webservice +from azureml.core.webservice import AciWebservice +from azureml.core.authentication import AzureCliAuthentication + +cli_auth = AzureCliAuthentication() +# Get workspace +ws = Workspace.from_config(auth=cli_auth) # Get the Image to deploy details +try: + with open("aml_config/image.json") as f: + config = json.load(f) +except: + print("No new model, thus no deployment on ACI") + # raise Exception('No new model to register as production model perform better') + sys.exit(0) + + +image_name = config["image_name"] +image_version = config["image_version"] + +images = Image.list(workspace=ws) +image, = (m for m in images if m.version == image_version and m.name == image_name) +print( + "From image.json, Image used to deploy webservice on ACI: {}\nImage Version: {}\nImage Location = {}".format( + image.name, image.version, image.image_location + ) +) + +# image = max(images, key=attrgetter('version')) +# print('From Max Version, Image used to deploy webservice on ACI: {}\nImage Version: {}\nImage Location = {}'.format(image.name, image.version, image.image_location)) + + +aciconfig = AciWebservice.deploy_configuration( + cpu_cores=1, + memory_gb=1, + tags={"area": "diabetes", "type": "regression"}, + description="A sample description", +) + +aci_service_name = "aciwebservice" + datetime.datetime.now().strftime("%m%d%H") + +service = Webservice.deploy_from_image( + deployment_config=aciconfig, image=image, name=aci_service_name, workspace=ws +) + +service.wait_for_deployment() +print( + "Deployed ACI Webservice: {} \nWebservice Uri: {}".format( + service.name, service.scoring_uri + ) +) + +# service=Webservice(name ='aciws0622', workspace =ws) +# Writing the ACI details to /aml_config/aci_webservice.json +aci_webservice = {} +aci_webservice["aci_name"] = service.name +aci_webservice["aci_url"] = service.scoring_uri +with open("aml_config/aci_webservice.json", "w") as outfile: + json.dump(aci_webservice, outfile) diff --git a/aml_service/51-deployOnAks_Prod.py b/aml_service/51-deployOnAks_Prod.py new file mode 100644 index 00000000..379ea90c --- /dev/null +++ b/aml_service/51-deployOnAks_Prod.py @@ -0,0 +1,124 @@ +""" +Copyright (C) Microsoft Corporation. All rights reserved.​ + ​ +Microsoft Corporation (“Microsoft”) grants you a nonexclusive, perpetual, +royalty-free right to use, copy, and modify the software code provided by us +("Software Code"). You may not sublicense the Software Code or any use of it +(except to your affiliates and to vendors to perform work on your behalf) +through distribution, network access, service agreement, lease, rental, or +otherwise. This license does not purport to express any claim of ownership over +data you may have shared with Microsoft in the creation of the Software Code. +Unless applicable law gives you more rights, Microsoft reserves all other +rights not expressly granted herein, whether by implication, estoppel or +otherwise. ​ + ​ +THE SOFTWARE CODE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS +OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +MICROSOFT OR ITS LICENSORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR +BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER +IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +ARISING IN ANY WAY OUT OF THE USE OF THE SOFTWARE CODE, EVEN IF ADVISED OF THE +POSSIBILITY OF SUCH DAMAGE. +""" +import os, json, datetime, sys +from operator import attrgetter +from azureml.core import Workspace +from azureml.core.model import Model +from azureml.core.image import Image +from azureml.core.compute import AksCompute, ComputeTarget +from azureml.core.webservice import Webservice, AksWebservice +from azureml.core.authentication import AzureCliAuthentication + +cli_auth = AzureCliAuthentication() +# Get workspace +ws = Workspace.from_config(auth=cli_auth) + +# Get the Image to deploy details +try: + with open("aml_config/image.json") as f: + config = json.load(f) +except: + print("No new model, thus no deployment on ACI") + # raise Exception('No new model to register as production model perform better') + sys.exit(0) + +image_name = config["image_name"] +image_version = config["image_version"] + +images = Image.list(workspace=ws) +image, = (m for m in images if m.version == image_version and m.name == image_name) +print( + "From image.json, Image used to deploy webservice on ACI: {}\nImage Version: {}\nImage Location = {}".format( + image.name, image.version, image.image_location + ) +) + +# image = max(images, key=attrgetter('version')) +# print('From Max Version, Image used to deploy webservice on ACI: {}\nImage Version: {}\nImage Location = {}'.format(image.name, image.version, image.image_location)) + +# Check if AKS already Available +try: + with open("aml_config/aks_webservice.json") as f: + config = json.load(f) + aks_name = config["aks_name"] + aks_service_name = config["aks_service_name"] + compute_list = ws.compute_targets() + aks_target, = (c for c in compute_list if c.name == aks_name) + service = Webservice(name=aks_service_name, workspace=ws) + print( + "Updating AKS service {} with image: {}".format( + aks_service_name, image.image_location + ) + ) + service.update(image=image) +except: + aks_name = "aks" + datetime.datetime.now().strftime("%m%d%H") + aks_service_name = "akswebservice" + datetime.datetime.now().strftime("%m%d%H") + prov_config = AksCompute.provisioning_configuration( + agent_count=6, vm_size="Standard_F4s", location="eastus" + ) + print( + "No AKS found in aks_webservice.json. Creating new Aks: {} and AKS Webservice: {}".format( + aks_name, aks_service_name + ) + ) + # Create the cluster + aks_target = ComputeTarget.create( + workspace=ws, name=aks_name, provisioning_configuration=prov_config + ) + + aks_target.wait_for_completion(show_output=True) + print(aks_target.provisioning_state) + print(aks_target.provisioning_errors) + + # Use the default configuration (can also provide parameters to customize) + aks_config = AksWebservice.deploy_configuration(enable_app_insights=True) + + service = Webservice.deploy_from_image( + workspace=ws, + name=aks_service_name, + image=image, + deployment_config=aks_config, + deployment_target=aks_target, + ) + + service.wait_for_deployment(show_output=True) + print(service.state) + print( + "Deployed AKS Webservice: {} \nWebservice Uri: {}".format( + service.name, service.scoring_uri + ) + ) + + +# Writing the AKS details to /aml_config/aks_webservice.json +aks_webservice = {} +aks_webservice["aks_name"] = aks_name +aks_webservice["aks_service_name"] = service.name +aks_webservice["aks_url"] = service.scoring_uri +aks_webservice["aks_keys"] = service.get_keys() +with open("aml_config/aks_webservice.json", "w") as outfile: + json.dump(aks_webservice, outfile) diff --git a/aml_service/60-AciWebserviceTest_Prod.py b/aml_service/60-AciWebserviceTest_Prod.py new file mode 100644 index 00000000..a8c40f69 --- /dev/null +++ b/aml_service/60-AciWebserviceTest_Prod.py @@ -0,0 +1,63 @@ +""" +Copyright (C) Microsoft Corporation. All rights reserved.​ + ​ +Microsoft Corporation (“Microsoft”) grants you a nonexclusive, perpetual, +royalty-free right to use, copy, and modify the software code provided by us +("Software Code"). You may not sublicense the Software Code or any use of it +(except to your affiliates and to vendors to perform work on your behalf) +through distribution, network access, service agreement, lease, rental, or +otherwise. This license does not purport to express any claim of ownership over +data you may have shared with Microsoft in the creation of the Software Code. +Unless applicable law gives you more rights, Microsoft reserves all other +rights not expressly granted herein, whether by implication, estoppel or +otherwise. ​ + ​ +THE SOFTWARE CODE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS +OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +MICROSOFT OR ITS LICENSORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR +BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER +IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +ARISING IN ANY WAY OUT OF THE USE OF THE SOFTWARE CODE, EVEN IF ADVISED OF THE +POSSIBILITY OF SUCH DAMAGE. +""" +import numpy +import os, json, datetime, sys +from operator import attrgetter +from azureml.core import Workspace +from azureml.core.model import Model +from azureml.core.image import Image +from azureml.core.webservice import Webservice +from azureml.core.webservice import AciWebservice +from azureml.core.authentication import AzureCliAuthentication + +cli_auth = AzureCliAuthentication() +# Get workspace +ws = Workspace.from_config(auth=cli_auth) +# Get the ACI Details +try: + with open("aml_config/aci_webservice.json") as f: + config = json.load(f) +except: + print("No new model, thus no deployment on ACI") + # raise Exception('No new model to register as production model perform better') + sys.exit(0) + +service_name = config["aci_name"] +# Get the hosted web service +service = Webservice(name=service_name, workspace=ws) + +# Input for Model with all features +input_j = [[1, 2, 3, 4, 5, 6, 7, 8, 9, 10], [10, 9, 8, 7, 6, 5, 4, 3, 2, 1]] +print(input_j) +test_sample = json.dumps({"data": input_j}) +test_sample = bytes(test_sample, encoding="utf8") +try: + prediction = service.run(input_data=test_sample) + print(prediction) +except Exception as e: + result = str(e) + print(result) + raise Exception("ACI service is not working as expected") diff --git a/aml_service/61-AksWebserviceTest_Prod.py b/aml_service/61-AksWebserviceTest_Prod.py new file mode 100644 index 00000000..f22982e0 --- /dev/null +++ b/aml_service/61-AksWebserviceTest_Prod.py @@ -0,0 +1,66 @@ +""" +Copyright (C) Microsoft Corporation. All rights reserved.​ + ​ +Microsoft Corporation (“Microsoft”) grants you a nonexclusive, perpetual, +royalty-free right to use, copy, and modify the software code provided by us +("Software Code"). You may not sublicense the Software Code or any use of it +(except to your affiliates and to vendors to perform work on your behalf) +through distribution, network access, service agreement, lease, rental, or +otherwise. This license does not purport to express any claim of ownership over +data you may have shared with Microsoft in the creation of the Software Code. +Unless applicable law gives you more rights, Microsoft reserves all other +rights not expressly granted herein, whether by implication, estoppel or +otherwise. ​ + ​ +THE SOFTWARE CODE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS +OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +MICROSOFT OR ITS LICENSORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR +BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER +IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +ARISING IN ANY WAY OUT OF THE USE OF THE SOFTWARE CODE, EVEN IF ADVISED OF THE +POSSIBILITY OF SUCH DAMAGE. +""" +import numpy +import os, json, datetime, sys +from operator import attrgetter +from azureml.core import Workspace +from azureml.core.model import Model +from azureml.core.image import Image +from azureml.core.webservice import Webservice +from azureml.core.authentication import AzureCliAuthentication + +cli_auth = AzureCliAuthentication() +# Get workspace +ws = Workspace.from_config(auth=cli_auth) + +# Get the AKS Details +try: + with open("aml_config/aks_webservice.json") as f: + config = json.load(f) +except: + print("No new model, thus no deployment on ACI") + # raise Exception('No new model to register as production model perform better') + sys.exit(0) + +service_name = config["aks_service_name"] +# Get the hosted web service +service = Webservice(workspace=ws, name=service_name) + +# Input for Model with all features +input_j = [[1, 2, 3, 4, 5, 6, 7, 8, 9, 10], [10, 9, 8, 7, 6, 5, 4, 3, 2, 1]] +print(input_j) +test_sample = json.dumps({"data": input_j}) +test_sample = bytes(test_sample, encoding="utf8") +try: + prediction = service.run(input_data=test_sample) + print(prediction) +except Exception as e: + result = str(e) + print(result) + raise Exception("AKS service is not working as expected") + +# Delete aci after test +service.delete() diff --git a/azure-pipelines-prod.yml b/azure-pipelines-prod.yml new file mode 100644 index 00000000..2e72379e --- /dev/null +++ b/azure-pipelines-prod.yml @@ -0,0 +1,70 @@ +pool: + vmImage: 'Ubuntu 16.04' +#Your build pipeline references a secret variable named ‘sp_username’. Create or edit the build pipeline for this YAML file, define the variable on the Variables tab, and then select the option to make it secret. See https://go.microsoft.com/fwlink/?linkid=865972 +#Your build pipeline references a secret variable named ‘sp_password’. Create or edit the build pipeline for this YAML file, define the variable on the Variables tab, and then select the option to make it secret. See https://go.microsoft.com/fwlink/?linkid=865972 +#Your build pipeline references a secret variable named ‘sp_tenantid’. Create or edit the build pipeline for this YAML file, define the variable on the Variables tab, and then select the option to make it secret. See https://go.microsoft.com/fwlink/?linkid=865972 +#Your build pipeline references a secret variable named ‘subscription_id’. Create or edit the build pipeline for this YAML file, define the variable on the Variables tab, and then select the option to make it secret. See https://go.microsoft.com/fwlink/?linkid=865972 + +variables: +- group: FreeTrialVariableGroup + +trigger: +- master +- releases/* +- develop + +steps: +- task: UsePythonVersion@0 + inputs: + versionSpec: '3.6' + architecture: 'x64' + +- task: Bash@3 + displayName: 'Install Requirements' + inputs: + targetType: filePath + filePath: 'environment_setup/install_requirements.sh' + workingDirectory: 'environment_setup' + +- script: | + az login --service-principal -u $(freetrialspidentity) -p $(freetrialspsecret) --tenant $(freetrialsptenant) + + displayName: 'Login to Azure' + +- script: | + sed -i 's#"freetrialsubscriptionid": "<>"#"freetrialsubscriptionid": "$(freetrialsubscriptionid)"#g' aml_config/config.json + + displayName: 'replace subscription value' + +- script: 'pytest tests/unit/data_test_Prod.py' + displayName: 'Data Quality Check' + +- script: 'python aml_service/00-WorkSpace_Prod.py' + displayName: 'Get or Create Workspace' + + +- script: 'python aml_service/03-AttachAmlCluster_Prod.py' + displayName: 'Create AML Compute Cluster' + +- script: 'python aml_service/04-AmlPipelines_Prod.py' + displayName: 'Create and Test AML Pipeline' + +- script: 'python aml_service/04-AmlPipelines_Prod.py --pipeline_action publish' + displayName: 'Publish AML Pipeline as Endpoint' + + +- task: CopyFiles@2 + displayName: 'Copy Files to: $(Build.ArtifactStagingDirectory)' + inputs: + SourceFolder: '$(Build.SourcesDirectory)' + TargetFolder: '$(Build.ArtifactStagingDirectory)' + Contents: '**' + +- task: PublishBuildArtifacts@1 + displayName: 'Publish Artifact: devops-for-ai' + inputs: + ArtifactName: 'devops-for-ai' + publishLocation: 'container' + pathtoPublish: '$(Build.ArtifactStagingDirectory)' + TargetPath: '$(Build.ArtifactStagingDirectory)' + diff --git a/azure-pipelines.yml b/azure-pipelines.yml index c3815408..c2d6b57d 100644 --- a/azure-pipelines.yml +++ b/azure-pipelines.yml @@ -6,7 +6,7 @@ pool: #Your build pipeline references a secret variable named ‘subscription_id’. Create or edit the build pipeline for this YAML file, define the variable on the Variables tab, and then select the option to make it secret. See https://go.microsoft.com/fwlink/?linkid=865972 variables: -- group: AzureKeyVaultSecrets +- group: PartnerDeployerVariableGroup trigger: - master @@ -27,12 +27,12 @@ steps: workingDirectory: 'environment_setup' - script: | - az login --service-principal -u $(spidentity) -p $(spsecret) --tenant $(sptenant) + az login --service-principal -u $(partnerdeployspidentity) -p $(partnerdeployspsecret) --tenant $(partnerdeploysptenant) displayName: 'Login to Azure' - script: | - sed -i 's#"subscription_id": "<>"#"subscription_id": "$(subscriptionid)"#g' aml_config/config.json + sed -i 's#"partnerdeployspsubscriptionid": "<>"#"partnerdeployspsubscriptionid": "$(partnerdeployspsubscriptionid)"#g' aml_config/config.json displayName: 'replace subscription value' @@ -42,6 +42,7 @@ steps: - script: 'python aml_service/00-WorkSpace.py' displayName: 'Get or Create Workspace' + - script: 'python aml_service/03-AttachAmlCluster.py' displayName: 'Create AML Compute Cluster' @@ -51,6 +52,7 @@ steps: - script: 'python aml_service/04-AmlPipelines.py --pipeline_action publish' displayName: 'Publish AML Pipeline as Endpoint' + - task: CopyFiles@2 displayName: 'Copy Files to: $(Build.ArtifactStagingDirectory)' inputs: diff --git a/tests/unit/data_test_Prod.py b/tests/unit/data_test_Prod.py new file mode 100644 index 00000000..ad5c28ba --- /dev/null +++ b/tests/unit/data_test_Prod.py @@ -0,0 +1,124 @@ +# test integrity of the input data +""" +Copyright (C) Microsoft Corporation. All rights reserved.​ + ​ +Microsoft Corporation (“Microsoft”) grants you a nonexclusive, perpetual, +royalty-free right to use, copy, and modify the software code provided by us +("Software Code"). You may not sublicense the Software Code or any use of it +(except to your affiliates and to vendors to perform work on your behalf) +through distribution, network access, service agreement, lease, rental, or +otherwise. This license does not purport to express any claim of ownership over +data you may have shared with Microsoft in the creation of the Software Code. +Unless applicable law gives you more rights, Microsoft reserves all other +rights not expressly granted herein, whether by implication, estoppel or +otherwise. ​ + ​ +THE SOFTWARE CODE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS +OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +MICROSOFT OR ITS LICENSORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR +BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER +IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +ARISING IN ANY WAY OUT OF THE USE OF THE SOFTWARE CODE, EVEN IF ADVISED OF THE +POSSIBILITY OF SUCH DAMAGE. +""" +import os +import numpy as np +import pandas as pd + + +# get absolute path of csv files from data folder +def get_absPath(filename): + """Returns the path of the notebooks folder""" + path = os.path.abspath( + os.path.join( + os.path.dirname(__file__), os.path.pardir, os.path.pardir, "data", filename + ) + ) + return path + + +# number of features +expected_columns = 10 + +# distribution of features in the training set +historical_mean = np.array( + [ + -3.63962254e-16, + 1.26972339e-16, + -8.01646331e-16, + 1.28856202e-16, + -8.99230414e-17, + 1.29609747e-16, + -4.56397112e-16, + 3.87573332e-16, + -3.84559152e-16, + -3.39848813e-16, + 1.52133484e02, + ] +) +historical_std = np.array( + [ + 4.75651494e-02, + 4.75651494e-02, + 4.75651494e-02, + 4.75651494e-02, + 4.75651494e-02, + 4.75651494e-02, + 4.75651494e-02, + 4.75651494e-02, + 4.75651494e-02, + 4.75651494e-02, + 7.70057459e01, + ] +) + +# maximal relative change in feature mean or standrd deviation +# that we can tolerate +shift_tolerance = 3 + + +def test_check_schema(): + datafile = get_absPath("diabetes.csv") + # check that file exists + assert os.path.exists(datafile) + dataset = pd.read_csv(datafile) + header = dataset[dataset.columns[:-1]] + actual_columns = header.shape[1] + # check header has expected number of columns + assert actual_columns == expected_columns + + +def test_check_bad_schema(): + datafile = get_absPath("diabetes_bad_schema.csv") + # check that file exists + assert os.path.exists(datafile) + dataset = pd.read_csv(datafile) + header = dataset[dataset.columns[:-1]] + actual_columns = header.shape[1] + # check header has expected number of columns + assert actual_columns != expected_columns + + +def test_check_missing_values(): + datafile = get_absPath("diabetes_missing_values.csv") + # check that file exists + assert os.path.exists(datafile) + dataset = pd.read_csv(datafile) + n_nan = np.sum(np.isnan(dataset.values)) + assert n_nan > 0 + + +def test_check_distribution(): + datafile = get_absPath("diabetes_bad_dist.csv") + # check that file exists + assert os.path.exists(datafile) + dataset = pd.read_csv(datafile) + mean = np.mean(dataset.values, axis=0) + std = np.mean(dataset.values, axis=0) + assert ( + np.sum(abs(mean - historical_mean) > shift_tolerance * abs(historical_mean)) + or np.sum(abs(std - historical_std) > shift_tolerance * abs(historical_std)) > 0 + )