diff --git a/.pipelines/azdo-ci-build-train.yml b/.pipelines/azdo-ci-build-train.yml
index d1d92d74..d1d95843 100644
--- a/.pipelines/azdo-ci-build-train.yml
+++ b/.pipelines/azdo-ci-build-train.yml
@@ -7,8 +7,7 @@ trigger:
exclude:
- docs/
- environment_setup/
- - charts/
- - ml_service/util/create_scoring_image.py
+ - ml_service/util/create_scoring_image.*
variables:
- template: azdo-variables.yml
@@ -27,12 +26,15 @@ stages:
timeoutInMinutes: 0
steps:
- template: azdo-base-pipeline.yml
- - script: |
- # Invoke the Python building and publishing a training pipeline
- python3 $(Build.SourcesDirectory)/ml_service/pipelines/${{ variables.BUILD_TRAIN_SCRIPT }}
- failOnStderr: 'false'
- env:
- SP_APP_SECRET: '$(SP_APP_SECRET)'
+ - task: AzureCLI@1
+ inputs:
+ azureSubscription: '$(WORKSPACE_SVC_CONNECTION)'
+ scriptLocation: inlineScript
+ inlineScript: |
+ set -e # fail on error
+ export SUBSCRIPTION_ID=$(az account show --query id -o tsv)
+ # Invoke the Python building and publishing a training pipeline
+ python $(Build.SourcesDirectory)/ml_service/pipelines/${{ variables.BUILD_TRAIN_SCRIPT }}
displayName: 'Publish Azure Machine Learning Pipeline'
- stage: 'Trigger_AML_Pipeline'
displayName: 'Train, evaluate, register model via previously published AML pipeline'
@@ -45,22 +47,35 @@ stages:
container: mcr.microsoft.com/mlops/python:latest
timeoutInMinutes: 0
steps:
- - script: |
- python $(Build.SourcesDirectory)/ml_service/pipelines/run_train_pipeline.py
- # Set AMLPIPELINEID variable for next AML Pipeline task in next job
- source $(Build.SourcesDirectory)/tmp.sh
- echo "##vso[task.setvariable variable=AMLPIPELINEID;isOutput=true]$AMLPIPELINE_ID"
- rm $(Build.SourcesDirectory)/tmp.sh
+ - task: AzureCLI@1
+ inputs:
+ azureSubscription: '$(WORKSPACE_SVC_CONNECTION)'
+ scriptLocation: inlineScript
+ inlineScript: |
+ set -e # fail on error
+ export SUBSCRIPTION_ID=$(az account show --query id -o tsv)
+ python $(Build.SourcesDirectory)/ml_service/pipelines/run_train_pipeline.py --output_pipeline_id_file "pipeline_id.txt" --skip_train_execution
+ # Set AMLPIPELINEID variable for next AML Pipeline task in next job
+ AMLPIPELINEID="$(cat pipeline_id.txt)"
+ echo "##vso[task.setvariable variable=AMLPIPELINEID;isOutput=true]$AMLPIPELINEID"
name: 'getpipelineid'
displayName: 'Get Pipeline ID'
- env:
- SP_APP_SECRET: '$(SP_APP_SECRET)'
+ - bash: |
+ # Generate a hyperparameter value as a random number between 0 and 1.
+ # A random value is used here to make the Azure ML dashboards "interesting" when testing
+ # the solution sample.
+ alpha=$(printf "0.%03d\n" $((($RANDOM*1000)/32767)))
+ echo "Alpha: $alpha"
+ echo "##vso[task.setvariable variable=ALPHA;isOutput=true]$alpha"
+ name: 'getalpha'
+ displayName: 'Generate random value for hyperparameter alpha'
- job: "Run_ML_Pipeline"
dependsOn: "Get_Pipeline_ID"
displayName: "Trigger ML Training Pipeline"
pool: server
variables:
AMLPIPELINE_ID: $[ dependencies.Get_Pipeline_ID.outputs['getpipelineid.AMLPIPELINEID'] ]
+ ALPHA: $[ dependencies.Get_Pipeline_ID.outputs['getalpha.ALPHA'] ]
steps:
- task: ms-air-aiagility.vss-services-azureml.azureml-restApi-task.MLPublishedPipelineRestAPITask@0
displayName: 'Invoke ML pipeline'
@@ -68,7 +83,7 @@ stages:
azureSubscription: '$(WORKSPACE_SVC_CONNECTION)'
PipelineId: '$(AMLPIPELINE_ID)'
ExperimentName: '$(EXPERIMENT_NAME)'
- PipelineParameters: '"model_name": "sklearn_regression_model.pkl"'
+ PipelineParameters: '"ParameterAssignments": {"model_name": "$(MODEL_NAME)", "hyperparameter_alpha": "$(ALPHA)"}'
- job: "Training_Run_Report"
dependsOn: "Run_ML_Pipeline"
displayName: "Determine if evaluation succeeded and new model is registered"
@@ -77,11 +92,15 @@ stages:
container: mcr.microsoft.com/mlops/python:latest
timeoutInMinutes: 0
steps:
- - script: |
- python $(Build.SourcesDirectory)/code/register/register_model.py --build_id $(Build.BuildId) --validate True
- displayName: 'Check if new model registered'
- env:
- SP_APP_SECRET: '$(SP_APP_SECRET)'
+ - task: AzureCLI@1
+ inputs:
+ azureSubscription: '$(WORKSPACE_SVC_CONNECTION)'
+ scriptLocation: inlineScript
+ inlineScript: |
+ set -e # fail on error
+ export SUBSCRIPTION_ID=$(az account show --query id -o tsv)
+ python $(Build.SourcesDirectory)/ml_service/pipelines/verify_train_pipeline.py --build_id $(Build.BuildId)
+ displayName: "Determine if evaluation succeeded and new model is registered"
- task: CopyFiles@2
displayName: 'Copy Files to: $(Build.ArtifactStagingDirectory)'
inputs:
@@ -89,10 +108,11 @@ stages:
TargetFolder: '$(Build.ArtifactStagingDirectory)'
Contents: |
code/scoring/**
+ ml_service/util/**
- task: PublishBuildArtifacts@1
displayName: 'Publish Artifact'
inputs:
ArtifactName: 'mlops-pipelines'
publishLocation: 'container'
pathtoPublish: '$(Build.ArtifactStagingDirectory)'
- TargetPath: '$(Build.ArtifactStagingDirectory)'
\ No newline at end of file
+ TargetPath: '$(Build.ArtifactStagingDirectory)'
diff --git a/code/evaluate/evaluate_model.py b/code/evaluate/evaluate_model.py
index 2c01c1e0..f59a87fd 100644
--- a/code/evaluate/evaluate_model.py
+++ b/code/evaluate/evaluate_model.py
@@ -27,7 +27,6 @@
import sys
from azureml.core import Run, Workspace, Experiment
import argparse
-from azureml.core.authentication import ServicePrincipalAuthentication
import traceback
run = Run.get_context()
@@ -48,16 +47,11 @@
build_id = os.environ.get('BUILD_BUILDID')
# run_id useful to query previous runs
run_id = "57fee47f-5ae8-441c-bc0c-d4c371f32d70"
- service_principal = ServicePrincipalAuthentication(
- tenant_id=tenant_id,
- service_principal_id=app_id,
- service_principal_password=app_secret)
aml_workspace = Workspace.get(
name=workspace_name,
subscription_id=subscription_id,
- resource_group=resource_group,
- auth=service_principal
+ resource_group=resource_group
)
ws = aml_workspace
exp = Experiment(ws, experiment_name)
diff --git a/code/register/register_model.py b/code/register/register_model.py
index bdef3600..fff5c7b6 100644
--- a/code/register/register_model.py
+++ b/code/register/register_model.py
@@ -29,7 +29,6 @@
import traceback
from azureml.core import Run, Experiment, Workspace
from azureml.core.model import Model as AMLModel
-from azureml.core.authentication import ServicePrincipalAuthentication
def main():
@@ -37,37 +36,23 @@ def main():
run = Run.get_context()
if (run.id.startswith('OfflineRun')):
from dotenv import load_dotenv
- sys.path.append(os.path.abspath("./code/util")) # NOQA: E402
- from model_helper import get_model_by_tag
# For local development, set values in this section
load_dotenv()
workspace_name = os.environ.get("WORKSPACE_NAME")
experiment_name = os.environ.get("EXPERIMENT_NAME")
resource_group = os.environ.get("RESOURCE_GROUP")
subscription_id = os.environ.get("SUBSCRIPTION_ID")
- tenant_id = os.environ.get("TENANT_ID")
- model_name = os.environ.get("MODEL_NAME")
- app_id = os.environ.get('SP_APP_ID')
- app_secret = os.environ.get('SP_APP_SECRET')
build_id = os.environ.get('BUILD_BUILDID')
# run_id useful to query previous runs
run_id = "bd184a18-2ac8-4951-8e78-e290bef3b012"
- service_principal = ServicePrincipalAuthentication(
- tenant_id=tenant_id,
- service_principal_id=app_id,
- service_principal_password=app_secret)
-
aml_workspace = Workspace.get(
name=workspace_name,
subscription_id=subscription_id,
- resource_group=resource_group,
- auth=service_principal
+ resource_group=resource_group
)
ws = aml_workspace
exp = Experiment(ws, experiment_name)
else:
- sys.path.append(os.path.abspath("./util")) # NOQA: E402
- from model_helper import get_model_by_tag
ws = run.experiment.workspace
exp = run.experiment
run_id = 'amlcompute'
@@ -89,12 +74,6 @@ def main():
help="Name of the Model",
default="sklearn_regression_model.pkl",
)
- parser.add_argument(
- "--validate",
- type=str,
- help="Set to true to only validate if model is registered for run",
- default=False,
- )
args = parser.parse_args()
if (args.build_id is not None):
@@ -103,30 +82,13 @@ def main():
run_id = args.run_id
if (run_id == 'amlcompute'):
run_id = run.parent.id
- if (args.validate is not None):
- validate = args.validate
model_name = args.model_name
- if (validate):
- try:
- tag_name = 'BuildId'
- model = get_model_by_tag(
- model_name, tag_name, build_id, exp.workspace)
- if (model is not None):
- print("Model was registered for this build.")
- if (model is None):
- print("Model was not registered for this run.")
- sys.exit(1)
- except Exception as e:
- print(e)
- print("Model was not registered for this run.")
- sys.exit(1)
+ if (build_id is None):
+ register_aml_model(model_name, exp, run_id)
else:
- if (build_id is None):
- register_aml_model(model_name, exp, run_id)
- else:
- run.tag("BuildId", value=build_id)
- register_aml_model(model_name, exp, run_id, build_id)
+ run.tag("BuildId", value=build_id)
+ register_aml_model(model_name, exp, run_id, build_id)
def model_already_registered(model_name, exp, run_id):
diff --git a/code/scoring/score.py b/code/scoring/score.py
index dafe6bee..716cd0e4 100644
--- a/code/scoring/score.py
+++ b/code/scoring/score.py
@@ -39,14 +39,10 @@ def init():
def run(raw_data):
- try:
- data = json.loads(raw_data)["data"]
- data = numpy.array(data)
- result = model.predict(data)
- return json.dumps({"result": result.tolist()})
- except Exception as e:
- result = str(e)
- return json.dumps({"error": result})
+ data = json.loads(raw_data)["data"]
+ data = numpy.array(data)
+ result = model.predict(data)
+ return {"result": result.tolist()}
if __name__ == "__main__":
diff --git a/code/training/train.py b/code/training/train.py
index 5f8c19ef..05c58683 100644
--- a/code/training/train.py
+++ b/code/training/train.py
@@ -31,73 +31,83 @@
from sklearn.metrics import mean_squared_error
from sklearn.model_selection import train_test_split
from sklearn.externals import joblib
-import numpy as np
-
-
-parser = argparse.ArgumentParser("train")
-parser.add_argument(
- "--build_id",
- type=str,
- help="The build ID of the build triggering this pipeline run",
-)
-parser.add_argument(
- "--model_name",
- type=str,
- help="Name of the Model",
- default="sklearn_regression_model.pkl",
-)
-
-args = parser.parse_args()
-
-print("Argument 1: %s" % args.build_id)
-print("Argument 2: %s" % args.model_name)
-
-model_name = args.model_name
-build_id = args.build_id
-
-run = Run.get_context()
-exp = run.experiment
-ws = run.experiment.workspace
-
-X, y = load_diabetes(return_X_y=True)
-columns = ["age", "gender", "bmi", "bp", "s1", "s2", "s3", "s4", "s5", "s6"]
-X_train, X_test, y_train, y_test = train_test_split(
- X, y, test_size=0.2, random_state=0)
-data = {"train": {"X": X_train, "y": y_train},
- "test": {"X": X_test, "y": y_test}}
-
-print("Running train.py")
-
-# Randomly pic alpha
-alphas = np.arange(0.0, 1.0, 0.05)
-alpha = alphas[np.random.choice(alphas.shape[0], 1, replace=False)][0]
-print(alpha)
-run.log("alpha", alpha)
-run.parent.log("alpha", alpha)
-reg = Ridge(alpha=alpha)
-reg.fit(data["train"]["X"], data["train"]["y"])
-preds = reg.predict(data["test"]["X"])
-run.log("mse", mean_squared_error(
- preds, data["test"]["y"]), description="Mean squared error metric")
-run.parent.log("mse", mean_squared_error(
- preds, data["test"]["y"]), description="Mean squared error metric")
-
-with open(model_name, "wb") as file:
+
+
+def train_model(run, data, alpha):
+ run.log("alpha", alpha)
+ run.parent.log("alpha", alpha)
+ reg = Ridge(alpha=alpha)
+ reg.fit(data["train"]["X"], data["train"]["y"])
+ preds = reg.predict(data["test"]["X"])
+ run.log("mse", mean_squared_error(
+ preds, data["test"]["y"]), description="Mean squared error metric")
+ run.parent.log("mse", mean_squared_error(
+ preds, data["test"]["y"]), description="Mean squared error metric")
+ return reg
+
+
+def main():
+ print("Running train.py")
+
+ parser = argparse.ArgumentParser("train")
+ parser.add_argument(
+ "--build_id",
+ type=str,
+ help="The build ID of the build triggering this pipeline run",
+ )
+ parser.add_argument(
+ "--model_name",
+ type=str,
+ help="Name of the Model",
+ default="sklearn_regression_model.pkl",
+ )
+ parser.add_argument(
+ "--alpha",
+ type=float,
+ default=0.5,
+ help=("Ridge regression regularization strength hyperparameter; "
+ "must be a positive float.")
+ )
+
+ args = parser.parse_args()
+
+ print("Argument [build_id]: %s" % args.build_id)
+ print("Argument [model_name]: %s" % args.model_name)
+ print("Argument [alpha]: %s" % args.alpha)
+
+ model_name = args.model_name
+ build_id = args.build_id
+ alpha = args.alpha
+
+ run = Run.get_context()
+
+ X, y = load_diabetes(return_X_y=True)
+ X_train, X_test, y_train, y_test = train_test_split(
+ X, y, test_size=0.2, random_state=0)
+ data = {"train": {"X": X_train, "y": y_train},
+ "test": {"X": X_test, "y": y_test}}
+
+ reg = train_model(run, data, alpha)
+
joblib.dump(value=reg, filename=model_name)
-# upload model file explicitly into artifacts for parent run
-run.parent.upload_file(name="./outputs/" + model_name,
- path_or_stream=model_name)
-print("Uploaded the model {} to experiment {}".format(
- model_name, run.experiment.name))
-dirpath = os.getcwd()
-print(dirpath)
-print("Following files are uploaded ")
-print(run.parent.get_file_names())
-
-# Add properties to identify this specific training run
-run.tag("BuildId", value=build_id)
-run.tag("run_type", value="train")
-print(f"tags now present for run: {run.tags}")
-
-run.complete()
+ # upload model file explicitly into artifacts for parent run
+ run.parent.upload_file(name="./outputs/" + model_name,
+ path_or_stream=model_name)
+ print("Uploaded the model {} to experiment {}".format(
+ model_name, run.experiment.name))
+ dirpath = os.getcwd()
+ print(dirpath)
+ print("Following files are uploaded ")
+ print(run.parent.get_file_names())
+
+ # Add properties to identify this specific training run
+ run.tag("BuildId", value=build_id)
+ run.tag("run_type", value="train")
+ print(f"tags now present for run: {run.tags}")
+
+ run.complete()
+
+
+if __name__ == '__main__':
+ main()
diff --git a/docs/code_description.md b/docs/code_description.md
index bd094ce0..472e781b 100644
--- a/docs/code_description.md
+++ b/docs/code_description.md
@@ -24,6 +24,7 @@
- `ml_service/pipelines/build_train_pipeline_with_r.py` : builds and publishes an ML training pipeline. It uses R on ML Compute.
- `ml_service/pipelines/build_train_pipeline_with_r_on_dbricks.py` : builds and publishes an ML training pipeline. It uses R on Databricks Compute.
- `ml_service/pipelines/run_train_pipeline.py` : invokes a published ML training pipeline (Python on ML Compute) via REST API.
+- `ml_service/pipelines/verify_train_pipeline.py` : determines whether the evaluate_model.py step of the training pipeline registered a new model.
- `ml_service/util` : contains common utility functions used to build and publish an ML training pipeline.
### Code
diff --git a/docs/getting_started.md b/docs/getting_started.md
index db4a4938..8cff7ec1 100644
--- a/docs/getting_started.md
+++ b/docs/getting_started.md
@@ -10,35 +10,25 @@ following the instructions [here](https://docs.microsoft.com/en-us/azure/devops/
If you already have Azure DevOps account, create a [new project](https://docs.microsoft.com/en-us/azure/devops/organizations/projects/create-project?view=azure-devops).
-## Create a Service Principal to login to Azure
+## Create an ARM Service Connection to deploy resources
-To create service principal, register an application entity in Azure Active
-Directory (Azure AD) and grant it the Contributor or Owner role of the
-subscription or the resource group where the web service belongs to. See
-[how to create service principal](https://docs.microsoft.com/en-us/azure/active-directory/develop/howto-create-service-principal-portal) and assign permissions to manage Azure
-resource.
+The repository includes a DevOps pipeline to deploy the Azure ML workspace and associated resources through Azure Resource Manager.
-Please make note of the following values after creating a service principal, we
-will need them in subsequent steps:
-
-* Application (client) ID
-* Directory (tenant) ID
-* Application Secret
-
-**Note:** You must have sufficient permissions to register an application with
-your Azure AD tenant, and assign the application to a role in your Azure
-subscription. Contact your subscription administrator if you don't have the
-permissions. Normally a subscription admin can create a Service principal and
-can provide you the details.
+The pipeline requires an **Azure Resource Manager**
+[service connection](https://docs.microsoft.com/en-us/azure/devops/pipelines/library/service-endpoints?view=azure-devops&tabs=yaml#create-a-service-connection).
+Given this service connection, you will be able to run the IaC pipeline
+and have the required permissions to generate resources.
-## Create an Azure DevOps Azure ML Workspace Service Connection
-Install the **Azure Machine Learning** extension to your organization from the
-[marketplace](https://marketplace.visualstudio.com/items?itemName=ms-air-aiagility.vss-services-azureml),
-so that you can set up a service connection to your AML workspace.
+
-Create a service connection to your ML workspace via the [Azure DevOps Azure ML task instructions](https://marketplace.visualstudio.com/items?itemName=ms-air-aiagility.vss-services-azureml) to be able to execute the Azure ML training pipeline. The connection name specified here needs to be used for the value of the `WORKSPACE_SVC_CONNECTION` set in the variable group below.
+Use **``AzureResourceConnection``** as the connection name, since it is used
+in the IaC pipeline definition. Leave the **``Resource Group``** field empty.
-**Note:** Creating service connection using Azure Machine Learning extension requires 'Owner' or 'User Access Administrator' permissions on the Workspace.
+**Note:** Creating the ARM service connection scope requires 'Owner' or 'User Access Administrator' permissions on the subscription.
+You must also have sufficient permissions to register an application with
+your Azure AD tenant, or receive the ID and secret of a service principal
+from your Azure AD Administrator. That principal must have 'Contributor'
+permissions on the subscription.
## Create a Variable Group for your Pipelines
@@ -62,16 +52,10 @@ The variable group should contain the following required variables:
| --------------------------- | -----------------------------------|
| BASE_NAME | [unique base name] |
| LOCATION | centralus |
-| SP_APP_ID | |
-| SP_APP_SECRET | |
-| SUBSCRIPTION_ID | |
-| TENANT_ID | |
| RESOURCE_GROUP | |
| WORKSPACE_NAME | mlops-AML-WS |
| WORKSPACE_SVC_CONNECTION | aml-workspace-connection |
-Mark **SP_APP_SECRET** variable as a secret one.
-
**Note:**
The **WORKSPACE_NAME** parameter is used for the Azure Machine Learning Workspace creation. You can provide here an existing AML Workspace if you have one.
@@ -93,7 +77,10 @@ There are more variables used in the project. They're defined in two places one
### Local configuration
-In order to configure the project locally you have to create a copy from `.env.example` to the root and name it `.env`. Fill out all missing values and adjust the existing ones to your needs. Please be aware that the local environment also needs access to the Azure subscription so you have to provide the credentials of your service principal and Azure account information here as well.
+In order to configure the project locally you have to create a copy from `.env.example` to the root and name it `.env`. Fill out all missing values and adjust the existing ones to your needs.
+
+For local development, you will also need to [install the Azure CLI](https://docs.microsoft.com/en-us/cli/azure/install-azure-cli). Azure CLI will be used to log you in interactively.
+Please be aware that the local environment also needs access to the Azure subscription so you have to have Contributor access on the Azure ML Workspace.
### Azure DevOps configuration
@@ -103,7 +90,6 @@ Up until now you should have:
* Forked (or cloned) the repo
* Created a devops account or use an existing one
-* Got service principal details and subscription id
* A variable group with all configuration values
## Create Resources with Azure Pipelines
@@ -118,18 +104,6 @@ To set up this pipeline, you will need to do the following steps:
1. Create an Azure Resource Manager Service Connection
1. Create a Build IaC Pipeline
-### Create an Azure Resource Manager Service Connection
-
-The pipeline requires an **Azure Resource Manager**
-[service connection](https://docs.microsoft.com/en-us/azure/devops/pipelines/library/service-endpoints?view=azure-devops&tabs=yaml#create-a-service-connection).
-Given this service connection, you will be able to run the IaC pipeline
-and have the required permissions to generate resources.
-
-
-
-Use **``AzureResourceConnection``** as the connection name, since it is used
-in the IaC pipeline definition. Leave the **``Resource Group``** field empty.
-
### Create a Build IaC Pipeline
In your DevOps project, create a build pipeline from your forked **GitHub**
@@ -152,8 +126,18 @@ Check out created resources in the [Azure Portal](portal.azure.com):
Alternatively, you can also use a [cleaning pipeline](../environment_setup/iac-remove-environment.yml) that removes resources created for this project or
you can just delete a resource group in the [Azure Portal](portal.azure.com).
-Once this resource group is created, be sure that the Service Principal you have
-created has access to this resource group.
+## Create an Azure DevOps Azure ML Workspace Service Connection
+Install the **Azure Machine Learning** extension to your organization from the
+[marketplace](https://marketplace.visualstudio.com/items?itemName=ms-air-aiagility.vss-services-azureml),
+so that you can set up a service connection to your AML workspace.
+
+Create a service connection to your ML workspace via the [Azure DevOps Azure ML task instructions](https://marketplace.visualstudio.com/items?itemName=ms-air-aiagility.vss-services-azureml) to be able to execute the Azure ML training pipeline. The connection name specified here needs to be used for the value of the `WORKSPACE_SVC_CONNECTION` set in the variable group below.
+
+**Note:** Creating service connection with Azure Machine Learning workspace scope requires 'Owner' or 'User Access Administrator' permissions on the Workspace.
+You must also have sufficient permissions to register an application with
+your Azure AD tenant, or receive the ID and secret of a service principal
+from your Azure AD Administrator. That principal must have Contributor
+permissions on the Azure ML Workspace.
## Set up Build, Release Trigger, and Release Deployment Pipelines
@@ -241,10 +225,10 @@ Specify task parameters as it is shown in the table below:
| ----------------------------- | ---------------------------------------------------------------------------------------------------- |
| Display Name | Azure ML Model Deploy |
| Azure ML Workspace | mlops-AML-WS |
-| Inference config Path | `$(System.DefaultWorkingDirectory)/_ci-build/mlops-pipelines/code/scoring/inference_config.yml` |
+| Inference config Path | `$(System.DefaultWorkingDirectory)/_ci-build/mlops-pipelines/code/scoring/inference_config.yml`
_(The `_ci-build` part of the path is the source alias of your CI artifact)_ |
| Model Deployment Target | Azure Container Instance |
| Deployment Name | mlopspython-aci |
-| Deployment Configuration file | `$(System.DefaultWorkingDirectory)/_ci-build/mlops-pipelines/code/scoring/deployment_config_aci.yml` |
+| Deployment Configuration file | `$(System.DefaultWorkingDirectory)/_ci-build/mlops-pipelines/code/scoring/deployment_config_aci.yml`
_(The `_ci-build` part of the path is the source alias of your CI artifact)_ |
| Overwrite existing deployment | X |
In a similar way, create a stage **Prod (AKS)** and add a single task to the job
@@ -259,11 +243,11 @@ Specify task parameters as it is shown in the table below:
| --------------------------------- | ---------------------------------------------------------------------------------------------------- |
| Display Name | Azure ML Model Deploy |
| Azure ML Workspace | mlops-AML-WS |
-| Inference config Path | `$(System.DefaultWorkingDirectory)/_ci-build/mlops-pipelines/code/scoring/inference_config.yml` |
+| Inference config Path | `$(System.DefaultWorkingDirectory)/_ci-build/mlops-pipelines/code/scoring/inference_config.yml`
_(The `_ci-build` part of the path is the source alias of your CI artifact)_ |
| Model Deployment Target | Azure Kubernetes Service |
| Select AKS Cluster for Deployment | YOUR_DEPLOYMENT_K8S_CLUSTER |
| Deployment Name | mlopspython-aks |
-| Deployment Configuration file | `$(System.DefaultWorkingDirectory)/_ci-build/mlops-pipelines/code/scoring/deployment_config_aks.yml` |
+| Deployment Configuration file | `$(System.DefaultWorkingDirectory)/_ci-build/mlops-pipelines/code/scoring/deployment_config_aks.yml`
_(The `_ci-build` part of the path is the source alias of your CI artifact)_ |
| Overwrite existing deployment | X |
**Note:** Creating of a Kubernetes cluster on AKS is out of scope of this
@@ -300,19 +284,33 @@ config. To learn more on how to create a container with AML SDK click
[here](https://docs.microsoft.com/en-us/python/api/azureml-core/azureml.core.image.image.image?view=azure-ml-py#create-workspace--name--models--image-config-).
Below is release pipeline with two tasks one to create an image using the above
-script and second is the deploy the image to Web App for containers
-.
+script and second is the deploy the image to Web App for containers.
+
+
+
+In the Variables tab, link the pipeline to your variable group (`devopsforai-aml-vg`). In the variable group definition, add the following variables:
+
+| Variable Name | Suggested Value |
+| --------------------------- | -----------------------------------|
+| MODEL_NAME | sklearn_regression_model.pkl |
+| IMAGE_NAME | diabetes |
+
+Add as an artifact to the pipeline the result of the **Build Pipeline** as it contains the necessary scripts.
+
+Use an Agent of type `ubuntu-16.04`.
-For the bash script task to invoke the [Create Image Script](../ml_service/util/create_scoring_image.py), specify the following task parameters:
+For the Azure CLI task to invoke the [Create Image Script](../ml_service/util/create_scoring_image.py), specify the following task parameters:
| Parameter | Value |
| ------------------ | --------------------------------------------------------------------------------------------------- |
-| Display Name | Create Scoring Image |
-| Script | python3 $(System.DefaultWorkingDirectory)/\_MLOpsPythonRepo/ml_service/util/create_scoring_image.py |
+| Display name | Create Scoring Image |
+| Azure subscription | aml-workspace-connection |
+| Script Path | `$(System.DefaultWorkingDirectory)/_ci-build/mlops-pipeline/ml_service/util/create_scoring_image.sh`
_(The `_ci-build` part of the path is the source alias of your CI artifact)_ |
+| Working directory | `$(System.DefaultWorkingDirectory)/_ci-build/mlops-pipelines`
_(The `_ci-build` part of the path is the source alias of your CI artifact)_ |

-Finally, for the Azure WebApp on Container Task, specify the following task
+Finally, for the Azure Web App for Containers Task, specify the following task
parameters as it is shown in the table below:
| Parameter | Value |
diff --git a/docs/images/release-task-createimage.PNG b/docs/images/release-task-createimage.PNG
new file mode 100644
index 00000000..8224db18
Binary files /dev/null and b/docs/images/release-task-createimage.PNG differ
diff --git a/docs/images/release-webapp-pipeline.PNG b/docs/images/release-webapp-pipeline.PNG
index 63d8e1c0..10ffddff 100644
Binary files a/docs/images/release-webapp-pipeline.PNG and b/docs/images/release-webapp-pipeline.PNG differ
diff --git a/environment_setup/requirements.txt b/environment_setup/requirements.txt
index e7d7cc7b..f99e7f4b 100644
--- a/environment_setup/requirements.txt
+++ b/environment_setup/requirements.txt
@@ -2,6 +2,7 @@ pytest>=5.3
requests>=2.22
numpy>=1.17
pandas>=0.25
+scikit-learn>=0.21.3
azureml-sdk>=1.0
python-dotenv>=0.10.3
flake8>=3.7
diff --git a/ml_service/pipelines/build_train_pipeline.py b/ml_service/pipelines/build_train_pipeline.py
index fa21b515..4b0d775b 100644
--- a/ml_service/pipelines/build_train_pipeline.py
+++ b/ml_service/pipelines/build_train_pipeline.py
@@ -1,12 +1,11 @@
from azureml.pipeline.core.graph import PipelineParameter
from azureml.pipeline.steps import PythonScriptStep
-from azureml.pipeline.core import Pipeline # , PipelineData
+from azureml.pipeline.core import Pipeline
+from azureml.core import Workspace
from azureml.core.runconfig import RunConfiguration, CondaDependencies
-# from azureml.core import Datastore
import os
import sys
sys.path.append(os.path.abspath("./ml_service/util")) # NOQA: E402
-from workspace import get_workspace
from attach_compute import get_compute
from env_variables import Env
@@ -14,13 +13,11 @@
def main():
e = Env()
# Get Azure machine learning workspace
- aml_workspace = get_workspace(
- e.workspace_name,
- e.resource_group,
- e.subscription_id,
- e.tenant_id,
- e.app_id,
- e.app_secret)
+ aml_workspace = Workspace.get(
+ name=e.workspace_name,
+ subscription_id=e.subscription_id,
+ resource_group=e.resource_group
+ )
print("get_workspace:")
print(aml_workspace)
@@ -46,6 +43,8 @@ def main():
name="model_name", default_value=e.model_name)
build_id_param = PipelineParameter(
name="build_id", default_value=e.build_id)
+ hyperparameter_alpha_param = PipelineParameter(
+ name="hyperparameter_alpha", default_value=0.5)
train_step = PythonScriptStep(
name="Train Model",
@@ -55,6 +54,7 @@ def main():
arguments=[
"--build_id", build_id_param,
"--model_name", model_name_param,
+ "--alpha", hyperparameter_alpha_param,
],
runconfig=run_config,
allow_reuse=False,
diff --git a/ml_service/pipelines/build_train_pipeline_with_r.py b/ml_service/pipelines/build_train_pipeline_with_r.py
index 72ed8e2a..eea6d4c6 100644
--- a/ml_service/pipelines/build_train_pipeline_with_r.py
+++ b/ml_service/pipelines/build_train_pipeline_with_r.py
@@ -1,11 +1,10 @@
from azureml.pipeline.steps import PythonScriptStep
-from azureml.pipeline.core import Pipeline # , PipelineData
+from azureml.pipeline.core import Pipeline
+from azureml.core import Workspace
from azureml.core.runconfig import RunConfiguration, CondaDependencies
-# from azureml.core import Datastore
import os
import sys
sys.path.append(os.path.abspath("./ml_service/util")) # NOQA: E402
-from workspace import get_workspace
from attach_compute import get_compute
from env_variables import Env
@@ -13,13 +12,12 @@
def main():
e = Env()
# Get Azure machine learning workspace
- aml_workspace = get_workspace(
- e.workspace_name,
- e.resource_group,
- e.subscription_id,
- e.tenant_id,
- e.app_id,
- e.app_secret)
+ aml_workspace = Workspace.get(
+ name=e.workspace_name,
+ subscription_id=e.subscription_id,
+ resource_group=e.resource_group
+ )
+ print("get_workspace:")
print(aml_workspace)
# Get Azure machine learning cluster
@@ -28,6 +26,7 @@ def main():
e.compute_name,
e.vm_size)
if aml_compute is not None:
+ print("aml_compute:")
print(aml_compute)
run_config = RunConfiguration(conda_dependencies=CondaDependencies.create(
diff --git a/ml_service/pipelines/build_train_pipeline_with_r_on_dbricks.py b/ml_service/pipelines/build_train_pipeline_with_r_on_dbricks.py
index 733683eb..7d1891c7 100644
--- a/ml_service/pipelines/build_train_pipeline_with_r_on_dbricks.py
+++ b/ml_service/pipelines/build_train_pipeline_with_r_on_dbricks.py
@@ -1,8 +1,8 @@
from azureml.pipeline.core import Pipeline
+from azureml.core import Workspace
import os
import sys
sys.path.append(os.path.abspath("./ml_service/util")) # NOQA: E402
-from workspace import get_workspace
from attach_compute import get_compute
from azureml.pipeline.steps import DatabricksStep
from env_variables import Env
@@ -11,13 +11,12 @@
def main():
e = Env()
# Get Azure machine learning workspace
- aml_workspace = get_workspace(
- e.workspace_name,
- e.resource_group,
- e.subscription_id,
- e.tenant_id,
- e.app_id,
- e.app_secret)
+ aml_workspace = Workspace.get(
+ name=e.workspace_name,
+ subscription_id=e.subscription_id,
+ resource_group=e.resource_group
+ )
+ print("get_workspace:")
print(aml_workspace)
# Get Azure machine learning cluster
@@ -26,6 +25,7 @@ def main():
e.compute_name,
e.vm_size)
if aml_compute is not None:
+ print("aml_compute:")
print(aml_compute)
train_step = DatabricksStep(
diff --git a/ml_service/pipelines/run_train_pipeline.py b/ml_service/pipelines/run_train_pipeline.py
index 06abfd4d..50bf6e65 100644
--- a/ml_service/pipelines/run_train_pipeline.py
+++ b/ml_service/pipelines/run_train_pipeline.py
@@ -1,24 +1,35 @@
from azureml.pipeline.core import PublishedPipeline
from azureml.core import Workspace
-from azureml.core.authentication import ServicePrincipalAuthentication
import os
import sys
+import argparse
sys.path.append(os.path.abspath("./ml_service/util")) # NOQA: E402
from env_variables import Env
def main():
+
+ parser = argparse.ArgumentParser("register")
+ parser.add_argument(
+ "--output_pipeline_id_file",
+ type=str,
+ default="pipeline_id.txt",
+ help="Name of a file to write pipeline ID to"
+ )
+ parser.add_argument(
+ "--skip_train_execution",
+ action="store_true",
+ help=("Do not trigger the execution. "
+ "Use this in Azure DevOps when using a server job to trigger")
+ )
+ args = parser.parse_args()
+
e = Env()
- service_principal = ServicePrincipalAuthentication(
- tenant_id=e.tenant_id,
- service_principal_id=e.app_id,
- service_principal_password=e.app_secret)
aml_workspace = Workspace.get(
name=e.workspace_name,
subscription_id=e.subscription_id,
- resource_group=e.resource_group,
- auth=service_principal
+ resource_group=e.resource_group
)
# Find the pipeline that was published by the specified build ID
@@ -41,22 +52,18 @@ def main():
print("published pipeline id is", published_pipeline.id)
# Save the Pipeline ID for other AzDO jobs after script is complete
- os.environ['amlpipeline_id'] = published_pipeline.id
- savePIDcmd = 'echo "export AMLPIPELINE_ID=$amlpipeline_id" >tmp.sh'
- os.system(savePIDcmd)
-
- # Set this to True for local development or if NOT
- # using Azure DevOps Azure ML agentless pipeline execution task
- skip_train_execution = True
- if(skip_train_execution is False):
+ if args.output_pipeline_id_file is not None:
+ with open(args.output_pipeline_id_file, "w") as out_file:
+ out_file.write(published_pipeline.id)
+
+ if(args.skip_train_execution is False):
pipeline_parameters = {"model_name": e.model_name}
- response = published_pipeline.submit(
+ run = published_pipeline.submit(
aml_workspace,
e.experiment_name,
pipeline_parameters)
- run_id = response.id
- print("Pipeline run initiated ", run_id)
+ print("Pipeline run initiated ", run.id)
if __name__ == "__main__":
diff --git a/ml_service/pipelines/verify_train_pipeline.py b/ml_service/pipelines/verify_train_pipeline.py
new file mode 100644
index 00000000..b677dd6e
--- /dev/null
+++ b/ml_service/pipelines/verify_train_pipeline.py
@@ -0,0 +1,66 @@
+import os
+import sys
+import argparse
+from azureml.core import Run, Experiment, Workspace
+sys.path.append(os.path.abspath("./ml_service/util")) # NOQA: E402
+from env_variables import Env
+
+
+def main():
+
+ run = Run.get_context()
+ if (run.id.startswith('OfflineRun')):
+ from dotenv import load_dotenv
+ sys.path.append(os.path.abspath("./code/util")) # NOQA: E402
+ from model_helper import get_model_by_tag
+ # For local development, set values in this section
+ load_dotenv()
+ workspace_name = os.environ.get("WORKSPACE_NAME")
+ experiment_name = os.environ.get("EXPERIMENT_NAME")
+ resource_group = os.environ.get("RESOURCE_GROUP")
+ subscription_id = os.environ.get("SUBSCRIPTION_ID")
+ build_id = os.environ.get('BUILD_BUILDID')
+ aml_workspace = Workspace.get(
+ name=workspace_name,
+ subscription_id=subscription_id,
+ resource_group=resource_group
+ )
+ ws = aml_workspace
+ exp = Experiment(ws, experiment_name)
+ else:
+ sys.path.append(os.path.abspath("./util")) # NOQA: E402
+ from model_helper import get_model_by_tag
+ ws = run.experiment.workspace
+ exp = run.experiment
+
+ e = Env()
+
+ parser = argparse.ArgumentParser("register")
+ parser.add_argument(
+ "--build_id",
+ type=str,
+ help="The Build ID of the build triggering this pipeline run",
+ )
+
+ args = parser.parse_args()
+ if (args.build_id is not None):
+ build_id = args.build_id
+ model_name = e.model_name
+
+ try:
+ tag_name = 'BuildId'
+ model = get_model_by_tag(
+ model_name, tag_name, build_id, exp.workspace)
+ if (model is not None):
+ print("Model was registered for this build.")
+ if (model is None):
+ print("Model was not registered for this run.")
+ sys.exit(1)
+ except Exception as e:
+ print(e)
+ print("Model was not registered for this run.")
+ sys.exit(1)
+
+
+if __name__ == '__main__':
+ main()
diff --git a/ml_service/util/attach_compute.py b/ml_service/util/attach_compute.py
index 569e3041..cc88be95 100644
--- a/ml_service/util/attach_compute.py
+++ b/ml_service/util/attach_compute.py
@@ -39,4 +39,4 @@ def get_compute(
except ComputeTargetException as e:
print(e)
print('An error occurred trying to provision compute.')
- exit()
+ exit(1)
diff --git a/ml_service/util/create_scoring_image.py b/ml_service/util/create_scoring_image.py
index 7e99bd28..af7de448 100644
--- a/ml_service/util/create_scoring_image.py
+++ b/ml_service/util/create_scoring_image.py
@@ -1,25 +1,20 @@
import os
+import sys
from azureml.core import Workspace
from azureml.core.image import ContainerImage, Image
from azureml.core.model import Model
-from azureml.core.authentication import ServicePrincipalAuthentication
+sys.path.append(os.path.abspath("./ml_service/util")) # NOQA: E402
from env_variables import Env
e = Env()
-SP_AUTH = ServicePrincipalAuthentication(
- tenant_id=e.tenant_id,
- service_principal_id=e.app_id,
- service_principal_password=e.app_secret)
-
+# Get Azure machine learning workspace
ws = Workspace.get(
- e.workspace_name,
- SP_AUTH,
- e.subscription_id,
- e.resource_group
+ name=e.workspace_name,
+ subscription_id=e.subscription_id,
+ resource_group=e.resource_group
)
-
model = Model(ws, name=e.model_name, version=e.model_version)
os.chdir("./code/scoring")
diff --git a/ml_service/util/create_scoring_image.sh b/ml_service/util/create_scoring_image.sh
new file mode 100644
index 00000000..1651b73e
--- /dev/null
+++ b/ml_service/util/create_scoring_image.sh
@@ -0,0 +1,17 @@
+#!/bin/sh
+
+set -euo pipefail # strict mode, fail on error
+set -x # verbose
+
+docker run \
+ --rm \
+ -t \
+ -v $PWD:/mlops \
+ -v ${AZURE_CONFIG_DIR:-$HOME/.azure}:/root/.azure \
+ -e SUBSCRIPTION_ID=$(az account show --query id -o tsv) \
+ -e RESOURCE_GROUP=$RESOURCE_GROUP \
+ -e WORKSPACE_NAME=$WORKSPACE_NAME \
+ -e MODEL_NAME=$MODEL_NAME \
+ -e IMAGE_NAME=$IMAGE_NAME \
+ mcr.microsoft.com/mlops/python:latest \
+ bash -c "cd /mlops/ && python ml_service/util/create_scoring_image.py"
diff --git a/ml_service/util/workspace.py b/ml_service/util/workspace.py
deleted file mode 100644
index 08d1f67d..00000000
--- a/ml_service/util/workspace.py
+++ /dev/null
@@ -1,29 +0,0 @@
-import sys
-from azureml.core import Workspace
-from azureml.core.authentication import ServicePrincipalAuthentication
-
-
-def get_workspace(
- name: str,
- resource_group: str,
- subscription_id: str,
- tenant_id: str,
- app_id: str,
- app_secret: str):
- service_principal = ServicePrincipalAuthentication(
- tenant_id=tenant_id,
- service_principal_id=app_id,
- service_principal_password=app_secret)
-
- try:
- aml_workspace = Workspace.get(
- name=name,
- subscription_id=subscription_id,
- resource_group=resource_group,
- auth=service_principal)
-
- return aml_workspace
- except Exception as caught_exception:
- print("Error while retrieving Workspace...")
- print(str(caught_exception))
- sys.exit(1)
diff --git a/tests/unit/code_test.py b/tests/unit/code_test.py
index 3c49454d..06654b2f 100644
--- a/tests/unit/code_test.py
+++ b/tests/unit/code_test.py
@@ -1,27 +1,25 @@
import sys
import os
-sys.path.append(os.path.abspath("./ml_service/util")) # NOQA: E402
-from workspace import get_workspace
-from env_variables import Env
+import numpy as np
+from azureml.core.run import Run
+from unittest.mock import Mock
+sys.path.append(os.path.abspath("./code/training")) # NOQA: E402
+from train import train_model
-# Just an example of a unit test against
-# a utility function common_scoring.next_saturday
-def test_get_workspace():
- e = Env()
- workspace_name = e.workspace_name
- resource_group = e.resource_group
- subscription_id = e.subscription_id
- tenant_id = e.tenant_id
- app_id = e.app_id
- app_secret = e.app_secret
+def test_train_model():
+ X_train = np.array([1, 2, 3, 4, 5, 6]).reshape(-1, 1)
+ y_train = np.array([10, 9, 8, 8, 6, 5])
+ X_test = np.array([3, 4]).reshape(-1, 1)
+ y_test = np.array([8, 7])
+ data = {"train": {"X": X_train, "y": y_train},
+ "test": {"X": X_test, "y": y_test}}
- aml_workspace = get_workspace(
- workspace_name,
- resource_group,
- subscription_id,
- tenant_id,
- app_id,
- app_secret)
+ run = Mock(Run)
+ reg = train_model(run, data, alpha=1.2)
- assert aml_workspace.name == workspace_name
+ run.log.assert_called_with("mse", 0.029843893480256872,
+ description='Mean squared error metric')
+
+ preds = reg.predict([[1], [2]])
+ np.testing.assert_equal(preds, [9.93939393939394, 9.03030303030303])