diff --git a/.env.example b/.env.example new file mode 100644 index 00000000..47311d1e --- /dev/null +++ b/.env.example @@ -0,0 +1,81 @@ +# Azure Subscription Variables +SUBSCRIPTION_ID = '' +LOCATION = '' +TENANT_ID = '' +BASE_NAME = '' +SP_APP_ID = '' +SP_APP_SECRET = '' +RESOURCE_GROUP = 'mlops-RG' + +# Mock build/release ID for local testing +BUILD_BUILDID = '001' + +# Azure ML Workspace Variables +WORKSPACE_NAME = 'mlops-aml-ws' +EXPERIMENT_NAME = 'mlopspython' + +# AML Compute Cluster Config +AML_ENV_NAME='diabetes_regression_training_env' +AML_ENV_TRAIN_CONDA_DEP_FILE="conda_dependencies.yml" +AML_COMPUTE_CLUSTER_NAME = 'train-cluster' +AML_COMPUTE_CLUSTER_CPU_SKU = 'STANDARD_DS2_V2' +AML_CLUSTER_MAX_NODES = '4' +AML_CLUSTER_MIN_NODES = '0' +AML_CLUSTER_PRIORITY = 'lowpriority' +# Training Config +MODEL_NAME = 'diabetes_regression_model.pkl' +MODEL_VERSION = '1' +TRAIN_SCRIPT_PATH = 'training/train_aml.py' + + +# AML Pipeline Config +TRAINING_PIPELINE_NAME = 'Training Pipeline' +MODEL_PATH = '' +EVALUATE_SCRIPT_PATH = 'evaluate/evaluate_model.py' +REGISTER_SCRIPT_PATH = 'register/register_model.py' +SOURCES_DIR_TRAIN = 'diabetes_regression' +DATASET_NAME = 'diabetes_ds' +DATASET_VERSION = 'latest' +# Optional. Set it if you have configured non default datastore to point to your data +DATASTORE_NAME = '' +SCORE_SCRIPT = 'scoring/score.py' + +# Optional. Used by a training pipeline with R on Databricks +DB_CLUSTER_ID = '' + +# Optional. Container Image name for image creation +IMAGE_NAME = 'mltrained' + +# Run Evaluation Step in AML pipeline +RUN_EVALUATION = 'true' + +# Set to true cancels the Azure ML pipeline run when evaluation criteria are not met. +ALLOW_RUN_CANCEL = 'true' + +# Flag to allow rebuilding the AML Environment after it was built for the first time. This enables dependency updates from conda_dependencies.yaml. +AML_REBUILD_ENVIRONMENT = 'false' + + + +USE_GPU_FOR_SCORING = "false" +AML_ENV_SCORE_CONDA_DEP_FILE="conda_dependencies_scoring.yml" +AML_ENV_SCORECOPY_CONDA_DEP_FILE="conda_dependencies_scorecopy.yml" +# AML Compute Cluster Config for parallel batch scoring +AML_ENV_NAME_SCORING='diabetes_regression_scoring_env' +AML_ENV_NAME_SCORE_COPY='diabetes_regression_score_copy_env' +AML_COMPUTE_CLUSTER_NAME_SCORING = 'score-cluster' +AML_COMPUTE_CLUSTER_CPU_SKU_SCORING = 'STANDARD_DS2_V2' +AML_CLUSTER_MAX_NODES_SCORING = '4' +AML_CLUSTER_MIN_NODES_SCORING = '0' +AML_CLUSTER_PRIORITY_SCORING = 'lowpriority' +AML_REBUILD_ENVIRONMENT_SCORING = 'true' +BATCHSCORE_SCRIPT_PATH = 'scoring/parallel_batchscore.py' +BATCHSCORE_COPY_SCRIPT_PATH = 'scoring/parallel_batchscore_copyoutput.py' + + +SCORING_DATASTORE_INPUT_CONTAINER = 'input' +SCORING_DATASTORE_INPUT_FILENAME = 'diabetes_scoring_input.csv' +SCORING_DATASTORE_OUTPUT_CONTAINER = 'output' +SCORING_DATASTORE_OUTPUT_FILENAME = 'diabetes_scoring_output.csv' +SCORING_DATASET_NAME = 'diabetes_scoring_ds' +SCORING_PIPELINE_NAME = 'diabetes-scoring-pipeline' diff --git a/.gitignore b/.gitignore index bc6e89d1..3ab04e2f 100644 --- a/.gitignore +++ b/.gitignore @@ -24,6 +24,7 @@ wheels/ .installed.cfg *.egg MANIFEST +venv/ # PyInstaller # Usually these files are written by a python script from a template @@ -46,6 +47,8 @@ coverage.xml *.cover .hypothesis/ .pytest_cache/ +*-testresults.xml +test-output.xml # Translations *.mo @@ -90,6 +93,7 @@ ENV/ env.bak/ venv.bak/ *.vscode +condaenv.* # Spyder project settings .spyderproject @@ -103,3 +107,5 @@ venv.bak/ # mypy .mypy_cache/ + +.DS_Store diff --git a/.pipelines/abtest.yml b/.pipelines/abtest.yml new file mode 100644 index 00000000..cf876181 --- /dev/null +++ b/.pipelines/abtest.yml @@ -0,0 +1,168 @@ +# Pipeline for the canary deployment workflow. + +resources: + containers: + - container: mlops + image: mcr.microsoft.com/mlops/python:latest + +pr: none +trigger: + branches: + include: + - master + paths: + exclude: + - docs/ + - environment_setup/ + - ml_service/util/create_scoring_image.* + - ml_service/util/smoke_test_scoring_service.py + +variables: +- template: diabetes_regression-variables-template.yml +- group: 'devopsforai-aml-vg' +- name: 'helmVersion' + value: 'v3.1.1' +- name: 'helmDownloadURL' + value: 'https://get.helm.sh/helm-$HELM_VERSION-linux-amd64.tar.gz' +- name: 'blueReleaseName' + value: 'model-blue' +- name: 'greenReleaseName' + value: 'model-green' +- name: 'SCORE_SCRIPT' + value: 'scoring/scoreA.py' + +stages: +- stage: 'Building' + jobs: + - job: "Build_Scoring_image" + timeoutInMinutes: 0 + pool: + vmImage: 'ubuntu-latest' + container: mlops + steps: + - task: AzureCLI@1 + inputs: + azureSubscription: '$(WORKSPACE_SVC_CONNECTION)' + scriptLocation: inlineScript + inlineScript: | + set -e + export SUBSCRIPTION_ID=$(az account show --query id -o tsv) + python -m ml_service.util.create_scoring_image --output_image_location_file image_location.txt + displayName: 'Create Scoring Image' + name: 'buildscoringimage' + + - publish: image_location.txt + artifact: image_location + + - publish: $(System.DefaultWorkingDirectory)/charts + artifact: allcharts + +- stage: 'Blue_Staging' + jobs: + - deployment: "Deploy_to_Staging" + timeoutInMinutes: 0 + environment: abtestenv + strategy: + runOnce: + deploy: + steps: + - script: | + IMAGE_LOCATION="$(cat $(Pipeline.Workspace)/image_location/image_location.txt)" + echo "##vso[task.setvariable variable=IMAGE_LOCATION]$IMAGE_LOCATION" + displayName: 'Get Image Location' + - template: helm-upgrade-template.yml + parameters: + chartPath: '$(Pipeline.Workspace)/allcharts/abtest-model' + releaseName: $(blueReleaseName) + overrideValues: 'deployment.name=$(blueReleaseName),deployment.bluegreen=blue,deployment.image.name=$(IMAGE_LOCATION)' + +- stage: 'Blue_50' + jobs: + - job: 'Blue_Rollout_50' + displayName: 50 50 rollout to blue environment + timeoutInMinutes: 0 + steps: + - template: helm-upgrade-template.yml + parameters: + chartPath: '$(System.DefaultWorkingDirectory)/charts/abtest-istio' + releaseName: 'abtest-istio' + overrideValues: 'weight.blue=50,weight.green=50' + +- stage: 'Blue_100' + jobs: + - deployment: 'blue_Rollout_100' + timeoutInMinutes: 0 + environment: abtestenv + strategy: + runOnce: + deploy: + steps: + - template: helm-upgrade-template.yml + parameters: + chartPath: '$(Pipeline.Workspace)/allcharts/abtest-istio' + releaseName: 'abtest-istio' + overrideValues: 'weight.blue=100,weight.green=0' + +- stage: 'Rollback' + dependsOn: 'Blue_100' + condition: failed() + jobs: + - deployment: 'Roll_Back' + displayName: 'Roll Back after failure' + environment: abtestenv + strategy: + runOnce: + deploy: + steps: + - template: helm-upgrade-template.yml + parameters: + chartPath: '$(Pipeline.Workspace)/allcharts/abtest-istio' + releaseName: 'abtest-istio' + overrideValues: 'weight.blue=0,weight.green=100' + +- stage: 'Set_Production_Tag' + dependsOn: 'Blue_100' + condition: succeeded() + jobs: + - deployment: 'green_blue_tagging' + timeoutInMinutes: 0 + environment: abtestenv + strategy: + runOnce: + deploy: + steps: + - script: | + IMAGE_LOCATION="$(cat $(Pipeline.Workspace)/image_location/image_location.txt)" + echo "##vso[task.setvariable variable=IMAGE_LOCATION]$IMAGE_LOCATION" + displayName: 'Get Image Location' + - template: helm-upgrade-template.yml + parameters: + chartPath: '$(Pipeline.Workspace)/allcharts/abtest-model' + releaseName: $(greenReleaseName) + overrideValues: 'deployment.name=$(greenReleaseName),deployment.bluegreen=green,deployment.image.name=$(IMAGE_LOCATION)' + +- stage: 'Green_100' + jobs: + - job: 'Prod_Rollout_100' + timeoutInMinutes: 0 + steps: + - template: helm-upgrade-template.yml + parameters: + chartPath: '$(System.DefaultWorkingDirectory)/charts/abtest-istio' + releaseName: 'abtest-istio' + overrideValues: 'weight.blue=0,weight.green=100' + +- stage: 'Disable_blue' + condition: always() + jobs: + - job: 'blue_disable' + timeoutInMinutes: 0 + steps: + - template: helm-install-template.yml + - task: HelmDeploy@0 + displayName: 'helm uninstall blue' + inputs: + connectionType: 'Kubernetes Service Connection' + kubernetesServiceConnection: $(K8S_AB_SERVICE_CONNECTION) + command: delete + arguments: $(blueReleaseName) --namespace $(K8S_AB_NAMESPACE) diff --git a/.pipelines/code-quality-template.yml b/.pipelines/code-quality-template.yml new file mode 100644 index 00000000..afaf7a9a --- /dev/null +++ b/.pipelines/code-quality-template.yml @@ -0,0 +1,27 @@ +# Pipeline template to run linting, unit tests with code coverage, and publish the results. +steps: +- script: | + flake8 --output-file=lint-testresults.xml --format junit-xml + displayName: 'Run lint tests' + +- script: | + python -m pytest . --cov=diabetes_regression --cov-report=html --cov-report=xml --junitxml=unit-testresults.xml + condition: succeededOrFailed() + displayName: 'Run unit tests' + +- task: PublishTestResults@2 + condition: succeededOrFailed() + inputs: + testResultsFiles: '*-testresults.xml' + testRunTitle: 'Linting & Unit tests' + failTaskOnFailedTests: true + displayName: 'Publish test results' + +- task: PublishCodeCoverageResults@1 + displayName: 'Publish coverage report' + condition: succeededOrFailed() + inputs: + codeCoverageTool: Cobertura + summaryFileLocation: 'coverage.xml' + reportDirectory: 'htmlcov' + failIfCoverageEmpty: true diff --git a/.pipelines/diabetes_regression-batchscoring-ci.yml b/.pipelines/diabetes_regression-batchscoring-ci.yml new file mode 100644 index 00000000..1392fddb --- /dev/null +++ b/.pipelines/diabetes_regression-batchscoring-ci.yml @@ -0,0 +1,89 @@ +# Continuous Integration (CI) pipeline that orchestrates the batch scoring of the diabetes_regression model. + +# Runtime parameters to select artifacts +parameters: +- name : artifactBuildId + displayName: Model Train CI Build ID. Default is 'latest'. + type: string + default: latest + +pr: none + +# Trigger this pipeline on model-train pipeline completion +resources: + containers: + - container: mlops + image: mcr.microsoft.com/mlops/python:latest + pipelines: + - pipeline: model-train-ci + source: Model-Train-Register-CI # Name of the triggering pipeline + trigger: + branches: + include: + - master + +trigger: + branches: + include: + - master + paths: + include: + - diabetes_regression/scoring/parallel_batchscore.py + - ml_service/pipelines/diabetes_regression_build_parallel_batchscore_pipeline.py + - ml_service/pipelines/run_parallel_batchscore_pipeline.py + +variables: +- template: diabetes_regression-variables-template.yml +- group: devopsforai-aml-vg + +pool: + vmImage: ubuntu-latest + +stages: +- stage: 'Batch_Scoring_Pipeline_CI' + displayName: 'Batch Scoring Pipeline CI' + jobs: + - job: "Build_Batch_Scoring_Pipeline" + displayName: "Build Batch Scoring Pipeline" + container: mlops + timeoutInMinutes: 0 + steps: + - template: code-quality-template.yml + - template: diabetes_regression-get-model-id-artifact-template.yml + parameters: + projectId: '$(resources.pipeline.model-train-ci.projectID)' + pipelineId: '$(resources.pipeline.model-train-ci.pipelineID)' + artifactBuildId: ${{ parameters.artifactBuildId }} + - task: AzureCLI@1 + displayName: "Publish Batch Scoring Pipeline" + name: publish_batchscore + inputs: + azureSubscription: '$(WORKSPACE_SVC_CONNECTION)' + scriptLocation: inlineScript + workingDirectory: $(Build.SourcesDirectory) + inlineScript: | + set -e # fail on error + export SUBSCRIPTION_ID=$(az account show --query id -o tsv) + # Invoke the Python building and publishing a training pipeline + python -m ml_service.pipelines.diabetes_regression_build_parallel_batchscore_pipeline + env: + SCORING_DATASTORE_ACCESS_KEY: $(SCORING_DATASTORE_ACCESS_KEY) + + - job: "Run_Batch_Score_Pipeline" + displayName: "Run Batch Scoring Pipeline" + dependsOn: ["Build_Batch_Scoring_Pipeline"] + timeoutInMinutes: 240 + pool: server + variables: + pipeline_id: $[ dependencies.Build_Batch_Scoring_Pipeline.outputs['publish_batchscore.pipeline_id']] + model_name: $[ dependencies.Build_Batch_Scoring_Pipeline.outputs['get_model.MODEL_NAME']] + model_version: $[ dependencies.Build_Batch_Scoring_Pipeline.outputs['get_model.MODEL_VERSION']] + steps: + - task: ms-air-aiagility.vss-services-azureml.azureml-restApi-task.MLPublishedPipelineRestAPITask@0 + displayName: 'Invoke Batch Scoring pipeline' + inputs: + azureSubscription: '$(WORKSPACE_SVC_CONNECTION)' + PipelineId: '$(pipeline_id)' + ExperimentName: '$(EXPERIMENT_NAME)' + PipelineParameters: '"ParameterAssignments": {"model_name": "$(model_name)", "model_version": "$(model_version)"}' + \ No newline at end of file diff --git a/.pipelines/diabetes_regression-cd.yml b/.pipelines/diabetes_regression-cd.yml new file mode 100644 index 00000000..a691cc47 --- /dev/null +++ b/.pipelines/diabetes_regression-cd.yml @@ -0,0 +1,161 @@ +# Continuous Integration (CI) pipeline that orchestrates the deployment of the diabetes_regression model. + +# Runtime parameters to select artifacts +parameters: +- name : artifactBuildId + displayName: Model Train CI Build ID. Default is 'latest'. + type: string + default: latest + +pr: none + +# Trigger this pipeline on model-train pipeline completion +trigger: none +resources: + containers: + - container: mlops + image: mcr.microsoft.com/mlops/python:latest + pipelines: + - pipeline: model-train-ci + source: Model-Train-Register-CI # Name of the triggering pipeline + trigger: + branches: + include: + - master + +variables: +- template: diabetes_regression-variables-template.yml +- group: devopsforai-aml-vg + +stages: +- stage: 'Deploy_ACI' + displayName: 'Deploy to ACI' + condition: variables['ACI_DEPLOYMENT_NAME'] + jobs: + - job: "Deploy_ACI" + displayName: "Deploy to ACI" + container: mlops + timeoutInMinutes: 0 + steps: + - download: none + - template: diabetes_regression-get-model-id-artifact-template.yml + parameters: + projectId: '$(resources.pipeline.model-train-ci.projectID)' + pipelineId: '$(resources.pipeline.model-train-ci.pipelineID)' + artifactBuildId: ${{ parameters.artifactBuildId }} + - task: AzureCLI@1 + displayName: 'Install AzureML CLI' + inputs: + azureSubscription: '$(WORKSPACE_SVC_CONNECTION)' + scriptLocation: inlineScript + workingDirectory: $(Build.SourcesDirectory) + inlineScript: 'az extension add --source https://azurecliext.blob.core.windows.net/release/azure_cli_ml-1.27.0-py3-none-any.whl --yes' + - task: AzureCLI@1 + displayName: "Deploy to ACI (CLI)" + inputs: + azureSubscription: '$(WORKSPACE_SVC_CONNECTION)' + scriptLocation: inlineScript + workingDirectory: $(Build.SourcesDirectory)/$(SOURCES_DIR_TRAIN)/scoring + inlineScript: | + set -e # fail on error + + az ml model deploy --name $(ACI_DEPLOYMENT_NAME) --model '$(MODEL_NAME):$(get_model.MODEL_VERSION)' \ + --ic inference_config.yml \ + --dc deployment_config_aci.yml \ + -g $(RESOURCE_GROUP) --workspace-name $(WORKSPACE_NAME) \ + --overwrite -v + - task: AzureCLI@1 + displayName: 'Smoke test' + inputs: + azureSubscription: '$(WORKSPACE_SVC_CONNECTION)' + scriptLocation: inlineScript + inlineScript: | + set -e # fail on error + export SUBSCRIPTION_ID=$(az account show --query id -o tsv) + python -m ml_service.util.smoke_test_scoring_service --type ACI --service "$(ACI_DEPLOYMENT_NAME)" + +- stage: 'Deploy_AKS' + displayName: 'Deploy to AKS' + dependsOn: Deploy_ACI + condition: and(succeeded(), variables['AKS_DEPLOYMENT_NAME']) + jobs: + - job: "Deploy_AKS" + displayName: "Deploy to AKS" + container: mlops + timeoutInMinutes: 0 + steps: + - template: diabetes_regression-get-model-id-artifact-template.yml + parameters: + projectId: '$(resources.pipeline.model-train-ci.projectID)' + pipelineId: '$(resources.pipeline.model-train-ci.pipelineID)' + artifactBuildId: ${{ parameters.artifactBuildId }} + - task: AzureCLI@1 + displayName: 'Install AzureML CLI' + inputs: + azureSubscription: '$(WORKSPACE_SVC_CONNECTION)' + scriptLocation: inlineScript + workingDirectory: $(Build.SourcesDirectory) + inlineScript: 'az extension add --source https://azurecliext.blob.core.windows.net/release/azure_cli_ml-1.27.0-py3-none-any.whl --yes' + - task: AzureCLI@1 + displayName: "Deploy to AKS (CLI)" + inputs: + azureSubscription: '$(WORKSPACE_SVC_CONNECTION)' + scriptLocation: inlineScript + workingDirectory: $(Build.SourcesDirectory)/$(SOURCES_DIR_TRAIN)/scoring + inlineScript: | + set -e # fail on error + + az ml model deploy --name $(AKS_DEPLOYMENT_NAME) --model '$(MODEL_NAME):$(get_model.MODEL_VERSION)' \ + --compute-target $(AKS_COMPUTE_NAME) \ + --ic inference_config.yml \ + --dc deployment_config_aks.yml \ + -g $(RESOURCE_GROUP) --workspace-name $(WORKSPACE_NAME) \ + --overwrite -v + - task: AzureCLI@1 + displayName: 'Smoke test' + inputs: + azureSubscription: '$(WORKSPACE_SVC_CONNECTION)' + scriptLocation: inlineScript + inlineScript: | + set -e # fail on error + export SUBSCRIPTION_ID=$(az account show --query id -o tsv) + python -m ml_service.util.smoke_test_scoring_service --type AKS --service "$(AKS_DEPLOYMENT_NAME)" + +- stage: 'Deploy_Webapp' + displayName: 'Deploy to Webapp' + condition: variables['WEBAPP_DEPLOYMENT_NAME'] + jobs: + - job: "Deploy_Webapp" + displayName: "Package and deploy model" + container: mlops + timeoutInMinutes: 0 + steps: + - template: diabetes_regression-get-model-id-artifact-template.yml + parameters: + projectId: '$(resources.pipeline.model-train-ci.projectID)' + pipelineId: '$(resources.pipeline.model-train-ci.pipelineID)' + artifactBuildId: ${{ parameters.artifactBuildId }} + - template: diabetes_regression-package-model-template.yml + parameters: + modelId: $(MODEL_NAME):$(get_model.MODEL_VERSION) + scoringScriptPath: '$(Build.SourcesDirectory)/$(SOURCES_DIR_TRAIN)/scoring/score.py' + condaFilePath: '$(Build.SourcesDirectory)/$(SOURCES_DIR_TRAIN)/conda_dependencies.yml' + - script: echo $(IMAGE_LOCATION) >image_location.txt + displayName: "Write image location file" + - task: AzureWebAppContainer@1 + name: WebAppDeploy + displayName: 'Azure Web App on Container Deploy' + inputs: + azureSubscription: '$(AZURE_RM_SVC_CONNECTION)' + appName: '$(WEBAPP_DEPLOYMENT_NAME)' + resourceGroupName: '$(RESOURCE_GROUP)' + imageName: '$(IMAGE_LOCATION)' + - task: AzureCLI@1 + displayName: 'Smoke test' + inputs: + azureSubscription: '$(WORKSPACE_SVC_CONNECTION)' + scriptLocation: inlineScript + inlineScript: | + set -e # fail on error + export SUBSCRIPTION_ID=$(az account show --query id -o tsv) + python -m ml_service.util.smoke_test_scoring_service --type Webapp --service "$(WebAppDeploy.AppServiceApplicationUrl)/score" diff --git a/.pipelines/diabetes_regression-ci-image.yml b/.pipelines/diabetes_regression-ci-image.yml new file mode 100644 index 00000000..d7c925bf --- /dev/null +++ b/.pipelines/diabetes_regression-ci-image.yml @@ -0,0 +1,38 @@ +# Pipeline for building the container image that is used by other pipelines for scoring. + +resources: + containers: + - container: mlops + image: mcr.microsoft.com/mlops/python:latest + +pr: none +trigger: + branches: + include: + - master + paths: + include: + - ml_service/util/create_scoring_image.py + - ml_service/util/Dockerfile + - diabetes_regression/scoring/ + exclude: + - diabetes_regression/scoring/deployment_config_aci.yml + - diabetes_regression/scoring/deployment_config_aks.yml + +pool: + vmImage: 'ubuntu-latest' + +container: mlops + +variables: +- group: devopsforai-aml-vg +- name: 'SCORE_SCRIPT' + value: 'scoring/scoreB.py' + +steps: +- template: diabetes_regression-package-model-template.yml + parameters: + modelId: $(MODEL_NAME):$(MODEL_VERSION) + scoringScriptPath: '$(Build.SourcesDirectory)/$(SOURCES_DIR_TRAIN)/$(SCORE_SCRIPT)' + condaFilePath: '$(Build.SourcesDirectory)/$(SOURCES_DIR_TRAIN)/conda_dependencies.yml' + diff --git a/.pipelines/diabetes_regression-ci.yml b/.pipelines/diabetes_regression-ci.yml new file mode 100644 index 00000000..5a539af0 --- /dev/null +++ b/.pipelines/diabetes_regression-ci.yml @@ -0,0 +1,97 @@ +# Continuous Integration (CI) pipeline that orchestrates the training, evaluation, and registration of the diabetes_regression model. + +resources: + containers: + - container: mlops + image: mcr.microsoft.com/mlops/python:latest + +pr: none +trigger: + branches: + include: + - master + paths: + include: + - diabetes_regression/ + - ml_service/pipelines/diabetes_regression_build_train_pipeline.py + - ml_service/pipelines/diabetes_regression_build_train_pipeline_with_r.py + - ml_service/pipelines/diabetes_regression_build_train_pipeline_with_r_on_dbricks.py + +variables: +- template: diabetes_regression-variables-template.yml +- group: devopsforai-aml-vg + +pool: + vmImage: ubuntu-latest + +stages: +- stage: 'Model_CI' + displayName: 'Model CI' + jobs: + - job: "Model_CI_Pipeline" + displayName: "Model CI Pipeline" + container: mlops + timeoutInMinutes: 0 + steps: + - template: code-quality-template.yml + - task: AzureCLI@1 + inputs: + azureSubscription: '$(WORKSPACE_SVC_CONNECTION)' + scriptLocation: inlineScript + workingDirectory: $(Build.SourcesDirectory) + inlineScript: | + set -e # fail on error + export SUBSCRIPTION_ID=$(az account show --query id -o tsv) + # Invoke the Python building and publishing a training pipeline + python -m ml_service.pipelines.diabetes_regression_build_train_pipeline + displayName: 'Publish Azure Machine Learning Pipeline' + +- stage: 'Trigger_AML_Pipeline' + displayName: 'Train and evaluate model' + condition: succeeded() + variables: + BUILD_URI: '$(SYSTEM.COLLECTIONURI)$(SYSTEM.TEAMPROJECT)/_build/results?buildId=$(BUILD.BUILDID)' + jobs: + - job: "Get_Pipeline_ID" + condition: and(succeeded(), eq(coalesce(variables['auto-trigger-training'], 'true'), 'true')) + displayName: "Get Pipeline ID for execution" + container: mlops + timeoutInMinutes: 0 + steps: + - task: AzureCLI@1 + inputs: + azureSubscription: '$(WORKSPACE_SVC_CONNECTION)' + scriptLocation: inlineScript + workingDirectory: $(Build.SourcesDirectory) + inlineScript: | + set -e # fail on error + export SUBSCRIPTION_ID=$(az account show --query id -o tsv) + python -m ml_service.pipelines.run_train_pipeline --output_pipeline_id_file "pipeline_id.txt" --skip_train_execution + # Set AMLPIPELINEID variable for next AML Pipeline task in next job + AMLPIPELINEID="$(cat pipeline_id.txt)" + echo "##vso[task.setvariable variable=AMLPIPELINEID;isOutput=true]$AMLPIPELINEID" + name: 'getpipelineid' + displayName: 'Get Pipeline ID' + - job: "Run_ML_Pipeline" + dependsOn: "Get_Pipeline_ID" + displayName: "Trigger ML Training Pipeline" + timeoutInMinutes: 0 + pool: server + variables: + AMLPIPELINE_ID: $[ dependencies.Get_Pipeline_ID.outputs['getpipelineid.AMLPIPELINEID'] ] + steps: + - task: ms-air-aiagility.vss-services-azureml.azureml-restApi-task.MLPublishedPipelineRestAPITask@0 + displayName: 'Invoke ML pipeline' + inputs: + azureSubscription: '$(WORKSPACE_SVC_CONNECTION)' + PipelineId: '$(AMLPIPELINE_ID)' + ExperimentName: '$(EXPERIMENT_NAME)' + PipelineParameters: '"ParameterAssignments": {"model_name": "$(MODEL_NAME)"}, "tags": {"BuildId": "$(Build.BuildId)", "BuildUri": "$(BUILD_URI)"}, "StepTags": {"BuildId": "$(Build.BuildId)", "BuildUri": "$(BUILD_URI)"}' + - job: "Training_Run_Report" + dependsOn: "Run_ML_Pipeline" + condition: always() + displayName: "Publish artifact if new model was registered" + container: mlops + timeoutInMinutes: 0 + steps: + - template: diabetes_regression-publish-model-artifact-template.yml diff --git a/.pipelines/diabetes_regression-get-model-id-artifact-template.yml b/.pipelines/diabetes_regression-get-model-id-artifact-template.yml new file mode 100644 index 00000000..b9e61306 --- /dev/null +++ b/.pipelines/diabetes_regression-get-model-id-artifact-template.yml @@ -0,0 +1,48 @@ +# Pipeline template that gets the model name and version from a previous build's artifact + +parameters: +- name: projectId + type: string + default: '' +- name: pipelineId + type: string + default: '' +- name: artifactBuildId + type: string + default: latest + +steps: + - download: none + - task: DownloadPipelineArtifact@2 + displayName: Download Pipeline Artifacts + inputs: + source: 'specific' + project: '${{ parameters.projectId }}' + pipeline: '${{ parameters.pipelineId }}' + preferTriggeringPipeline: true + ${{ if eq(parameters.artifactBuildId, 'latest') }}: + buildVersionToDownload: 'latestFromBranch' + ${{ if ne(parameters.artifactBuildId, 'latest') }}: + buildVersionToDownload: 'specific' + runId: '${{ parameters.artifactBuildId }}' + runBranch: '$(Build.SourceBranch)' + path: $(Build.SourcesDirectory)/bin + - task: Bash@3 + name: get_model + displayName: Parse Json for Model Name and Version + inputs: + targetType: 'inline' + script: | + # Print JSON + cat $(Build.SourcesDirectory)/bin/model/model.json | jq '.' + + # Set model name and version variables + MODEL_NAME=$(jq -r '.name' <$(Build.SourcesDirectory)/bin/model/model.json) + MODEL_VERSION=$(jq -r '.version' <$(Build.SourcesDirectory)/bin/model/model.json) + + echo "Model Name: $MODEL_NAME" + echo "Model Version: $MODEL_VERSION" + + # Set environment variables + echo "##vso[task.setvariable variable=MODEL_VERSION;isOutput=true]$MODEL_VERSION" + echo "##vso[task.setvariable variable=MODEL_NAME;isOutput=true]$MODEL_NAME" diff --git a/.pipelines/diabetes_regression-package-model-template.yml b/.pipelines/diabetes_regression-package-model-template.yml new file mode 100644 index 00000000..16fc1c1d --- /dev/null +++ b/.pipelines/diabetes_regression-package-model-template.yml @@ -0,0 +1,42 @@ +# Pipeline template that creates a model package and adds the package location to the environment for subsequent tasks to use. +parameters: +- name: modelId + type: string + default: '' +- name: scoringScriptPath + type: string + default: '' +- name: condaFilePath + type: string + default: '' + +steps: + - task: AzureCLI@1 + displayName: 'Install AzureML CLI' + inputs: + azureSubscription: '$(WORKSPACE_SVC_CONNECTION)' + scriptLocation: inlineScript + workingDirectory: $(Build.SourcesDirectory) + inlineScript: 'az extension add --source https://azurecliext.blob.core.windows.net/release/azure_cli_ml-1.27.0-py3-none-any.whl --yes' + - task: AzureCLI@1 + displayName: 'Create model package and set IMAGE_LOCATION variable' + inputs: + azureSubscription: '$(WORKSPACE_SVC_CONNECTION)' + scriptLocation: inlineScript + inlineScript: | + set -e # fail on error + + # Create model package using CLI + az ml model package --workspace-name $(WORKSPACE_NAME) -g $(RESOURCE_GROUP) \ + --model '${{ parameters.modelId }}' \ + --entry-script '${{ parameters.scoringScriptPath }}' \ + --cf '${{ parameters.condaFilePath }}' \ + -v \ + --rt python --query 'location' -o tsv > image_logs.txt + + # Show logs + cat image_logs.txt + + # Set environment variable using the last line of logs that has the package location + IMAGE_LOCATION=$(tail -n 1 image_logs.txt) + echo "##vso[task.setvariable variable=IMAGE_LOCATION]$IMAGE_LOCATION" diff --git a/.pipelines/diabetes_regression-publish-model-artifact-template.yml b/.pipelines/diabetes_regression-publish-model-artifact-template.yml new file mode 100644 index 00000000..d666750d --- /dev/null +++ b/.pipelines/diabetes_regression-publish-model-artifact-template.yml @@ -0,0 +1,29 @@ +# Pipeline template to check if a model was registered for the build and publishes an artifact with the model JSON +steps: +- task: AzureCLI@1 + displayName: 'Install AzureML CLI' + inputs: + azureSubscription: '$(WORKSPACE_SVC_CONNECTION)' + scriptLocation: inlineScript + workingDirectory: $(Build.SourcesDirectory) + inlineScript: 'az extension add --source https://azurecliext.blob.core.windows.net/release/azure_cli_ml-1.27.0-py3-none-any.whl --yes' +- task: AzureCLI@1 + inputs: + azureSubscription: '$(WORKSPACE_SVC_CONNECTION)' + scriptLocation: inlineScript + workingDirectory: $(Build.SourcesDirectory) + inlineScript: | + set -e # fail on error + + # Get the model using the build ID tag + FOUND_MODEL=$(az ml model list -g $(RESOURCE_GROUP) --workspace-name $(WORKSPACE_NAME) --tag BuildId=$(Build.BuildId) --query '[0]') + + # If the variable is empty, print and fail + [[ -z "$FOUND_MODEL" ]] && { echo "Model was not registered for this run." ; exit 1; } + + # Write to a file + echo $FOUND_MODEL >model.json + name: 'getversion' + displayName: "Determine if evaluation succeeded and new model is registered (CLI)" +- publish: model.json + artifact: model diff --git a/.pipelines/diabetes_regression-variables-template.yml b/.pipelines/diabetes_regression-variables-template.yml new file mode 100644 index 00000000..502753fb --- /dev/null +++ b/.pipelines/diabetes_regression-variables-template.yml @@ -0,0 +1,129 @@ +# Pipeline template that defines common runtime environment variables. +variables: + # Source Config + # The directory containing the scripts for training, evaluating, and registering the model + - name: SOURCES_DIR_TRAIN + value: diabetes_regression + # The path to the model training script under SOURCES_DIR_TRAIN + - name: TRAIN_SCRIPT_PATH + value: training/train_aml.py + # The path to the model evaluation script under SOURCES_DIR_TRAIN + - name: EVALUATE_SCRIPT_PATH + value: evaluate/evaluate_model.py + # The path to the model registration script under SOURCES_DIR_TRAIN + - name: REGISTER_SCRIPT_PATH + value: register/register_model.py + # The path to the model scoring script relative to SOURCES_DIR_TRAIN + - name: SCORE_SCRIPT + value: scoring/score.py + + + # Azure ML Variables + - name: EXPERIMENT_NAME + value: mlopspython + - name: DATASET_NAME + value: diabetes_ds + # Uncomment DATASTORE_NAME if you have configured non default datastore to point to your data + # - name: DATASTORE_NAME + # value: datablobstore + - name: DATASET_VERSION + value: latest + - name: TRAINING_PIPELINE_NAME + value: "diabetes-Training-Pipeline" + - name: MODEL_NAME + value: diabetes_regression_model.pkl + + # AML Compute Cluster Config + - name: AML_ENV_NAME + value: diabetes_regression_training_env + - name: AML_ENV_TRAIN_CONDA_DEP_FILE + value: "conda_dependencies.yml" + - name: AML_COMPUTE_CLUSTER_CPU_SKU + value: STANDARD_DS2_V2 + - name: AML_COMPUTE_CLUSTER_NAME + value: train-cluster + - name: AML_CLUSTER_MIN_NODES + value: 0 + - name: AML_CLUSTER_MAX_NODES + value: 4 + - name: AML_CLUSTER_PRIORITY + value: lowpriority + + # The name for the (docker/webapp) scoring image + - name: IMAGE_NAME + value: "diabetestrained" + + # Optional. Used by a training pipeline with R on Databricks + - name: DB_CLUSTER_ID + value: "" + + # These are the default values set in ml_service\util\env_variables.py. Uncomment and override if desired. + # Set to false to disable the evaluation step in the ML pipeline and register the newly trained model unconditionally. + # - name: RUN_EVALUATION + # value: "true" + # Set to false to register the model regardless of the outcome of the evaluation step in the ML pipeline. + # - name: ALLOW_RUN_CANCEL + # value: "true" + + # Flag to allow rebuilding the AML Environment after it was built for the first time. This enables dependency updates from conda_dependencies.yaml. + # - name: AML_REBUILD_ENVIRONMENT + # value: "false" + + # Variables below are used for controlling various aspects of batch scoring + - name: USE_GPU_FOR_SCORING + value: False + # Conda dependencies for the batch scoring step + - name: AML_ENV_SCORE_CONDA_DEP_FILE + value: "conda_dependencies_scoring.yml" + # Conda dependencies for the score copying step + - name: AML_ENV_SCORECOPY_CONDA_DEP_FILE + value: "conda_dependencies_scorecopy.yml" + # AML Compute Cluster Config for parallel batch scoring + - name: AML_ENV_NAME_SCORING + value: diabetes_regression_scoring_env + - name: AML_ENV_NAME_SCORE_COPY + value: diabetes_regression_score_copy_env + - name: AML_COMPUTE_CLUSTER_CPU_SKU_SCORING + value: STANDARD_DS2_V2 + - name: AML_COMPUTE_CLUSTER_NAME_SCORING + value: score-cluster + - name: AML_CLUSTER_MIN_NODES_SCORING + value: 0 + - name: AML_CLUSTER_MAX_NODES_SCORING + value: 4 + - name: AML_CLUSTER_PRIORITY_SCORING + value: lowpriority + # The path to the batch scoring script relative to SOURCES_DIR_TRAIN + - name: BATCHSCORE_SCRIPT_PATH + value: scoring/parallel_batchscore.py + - name: BATCHSCORE_COPY_SCRIPT_PATH + value: scoring/parallel_batchscore_copyoutput.py + # Flag to allow rebuilding the AML Environment after it was built for the first time. + # This enables dependency updates from the conda dependencies yaml for scoring activities. + - name: AML_REBUILD_ENVIRONMENT_SCORING + value: "true" + + # Datastore config for scoring + # The storage account name and key are supplied as variables in a variable group + # in the Azure Pipelines library for this project. Please refer to repo docs for + # more details + + # Blob container where the input data for scoring can be found + - name: SCORING_DATASTORE_INPUT_CONTAINER + value: "input" + # Blobname for the input data - include any applicable path in the string + - name: SCORING_DATASTORE_INPUT_FILENAME + value: "diabetes_scoring_input.csv" + # Blob container where the output data for scoring can be found + - name: SCORING_DATASTORE_OUTPUT_CONTAINER + value: "output" + # Blobname for the output data - include any applicable path in the string + - name: SCORING_DATASTORE_OUTPUT_FILENAME + value: "diabetes_scoring_output.csv" + # Dataset name for input data for scoring + - name: SCORING_DATASET_NAME + value: "diabetes_scoring_ds" + # Scoring pipeline name + - name: SCORING_PIPELINE_NAME + value: "diabetes-scoring-pipeline" + \ No newline at end of file diff --git a/.pipelines/helm-install-template.yml b/.pipelines/helm-install-template.yml new file mode 100644 index 00000000..a4dbd581 --- /dev/null +++ b/.pipelines/helm-install-template.yml @@ -0,0 +1,10 @@ +# Pipeline template for installing helm on the agent. +steps: +- task: Bash@3 + displayName: 'Install Helm $(helmVersion)' + inputs: + targetType: inline + script: wget -q $(helmDownloadURL) -O /tmp/$FILENAME && tar -zxvf /tmp/$FILENAME -C /tmp && sudo mv /tmp/linux-amd64/helm /usr/local/bin/helm + env: + HELM_VERSION: $(helmVersion) + FILENAME: helm-$(helmVersion)-linux-amd64.tar.gz diff --git a/.pipelines/helm-upgrade-template.yml b/.pipelines/helm-upgrade-template.yml new file mode 100644 index 00000000..4f75c8ed --- /dev/null +++ b/.pipelines/helm-upgrade-template.yml @@ -0,0 +1,20 @@ +# Pipeline template for deploying / upgrading using Helm. +parameters: + chartPath: '' + releaseName: '' + overrideValues: '' + +steps: +- template: helm-install-template.yml +- task: HelmDeploy@0 + displayName: 'helm upgrade' + inputs: + connectionType: 'Kubernetes Service Connection' + kubernetesServiceConnection: $(K8S_AB_SERVICE_CONNECTION) + command: upgrade + chartType: FilePath + chartPath: ${{ parameters.chartPath }} + releaseName: ${{ parameters.releaseName }} + overrideValues: ${{ parameters.overrideValues }} + install: true + arguments: --namespace $(K8S_AB_NAMESPACE) diff --git a/.pipelines/pr.yml b/.pipelines/pr.yml new file mode 100644 index 00000000..765a5fef --- /dev/null +++ b/.pipelines/pr.yml @@ -0,0 +1,24 @@ +# Pipeline to run basic code quality tests as part of pull requests to the master branch. + +resources: + containers: + - container: mlops + image: mcr.microsoft.com/mlops/python:latest + +trigger: none +pr: + branches: + include: + - master + +pool: + vmImage: 'ubuntu-latest' + +container: mlops + +variables: +- template: diabetes_regression-variables-template.yml +- group: devopsforai-aml-vg + +steps: +- template: code-quality-template.yml diff --git a/README.md b/README.md index c434ef51..434be0df 100644 --- a/README.md +++ b/README.md @@ -1,84 +1,59 @@ -# MLOps with Azure ML - -[![Build Status](https://dev.azure.com/customai/DevopsForAI-AML/_apis/build/status/Microsoft.DevOpsForAI?branchName=master)](https://dev.azure.com/customai/DevopsForAI-AML/_build/latest?definitionId=1&branchName=master) - -MLOps will help you to understand how to build the Continuous Integration and Continuous Delivery pipeline for a ML/AI project. We will be using the Azure DevOps Project for build and release/deployment pipelines along with Azure ML services for model retraining pipeline, model management and operationalization. - -![ML lifecycle](/docs/images/ml-lifecycle.png) - -This template contains code and pipeline definition for a machine learning project demonstrating how to automate an end to end ML/AI workflow. The build pipelines include DevOps tasks for data sanity test, unit test, model training on different compute targets, model version management, model evaluation/model selection, model deployment as realtime web service, staged deployment to QA/prod and integration testing. - - -## Prerequisite -- Active Azure subscription -- At least contributor access to Azure subscription +--- +page_type: sample +languages: +- python +products: +- azure +- azure-machine-learning-service +- azure-devops +description: "Code which demonstrates how to set up and operationalize an MLOps flow leveraging Azure Machine Learning and Azure DevOps." +--- -## Getting Started: - -To deploy this solution in your subscription, follow the manual instructions in the [getting started](docs/getting_started.md) doc - - -## Architecture Diagram - -This reference architecture shows how to implement continuous integration (CI), continuous delivery (CD), and retraining pipeline for an AI application using Azure DevOps and Azure Machine Learning. The solution is built on the scikit-learn diabetes dataset but can be easily adapted for any AI scenario and other popular build systems such as Jenkins and Travis. +# MLOps with Azure ML -![Architecture](/docs/images/Architecture_DevOps_AI.png) +CI: [![Build Status](https://aidemos.visualstudio.com/MLOps/_apis/build/status/Model-Train-Register-CI?branchName=master)](https://aidemos.visualstudio.com/MLOps/_build/latest?definitionId=160&branchName=master) +CD: [![Build Status](https://aidemos.visualstudio.com/MLOps/_apis/build/status/microsoft.MLOpsPython-CD?branchName=master)](https://aidemos.visualstudio.com/MLOps/_build/latest?definitionId=161&branchName=master) -## Architecture Flow +MLOps will help you to understand how to build a Continuous Integration and Continuous Delivery pipeline for an ML/AI project. We will be using the Azure DevOps Project for build and release/deployment pipelines along with Azure ML services for model retraining pipeline, model management and operationalization. -### Train Model -1. Data Scientist writes/updates the code and push it to git repo. This triggers the Azure DevOps build pipeline (continuous integration). -2. Once the Azure DevOps build pipeline is triggered, it runs following types of tasks: - - Run for new code: Every time new code is committed to the repo, the build pipeline performs data sanity tests and unit tests on the new code. - - One-time run: These tasks runs only for the first time the build pipeline runs. It will programatically create an [Azure ML Service Workspace](https://docs.microsoft.com/en-us/azure/machine-learning/service/concept-azure-machine-learning-architecture#workspace), provision [Azure ML Compute](https://docs.microsoft.com/en-us/azure/machine-learning/service/how-to-set-up-training-targets#amlcompute) (used for model training compute), and publish an [Azure ML Pipeline](https://docs.microsoft.com/en-us/azure/machine-learning/service/concept-ml-pipelines). This published Azure ML pipeline is the model training/retraining pipeline. +![ML lifecycle](/docs/images/ml-lifecycle.png) - > Note: The Publish Azure ML pipeline task currently runs for every code change +This template contains code and pipeline definitions for a machine learning project that demonstrates how to automate an end to end ML/AI workflow. -3. The Azure ML Retraining pipeline is triggered once the Azure DevOps build pipeline completes. All the tasks in this pipeline runs on Azure ML Compute created earlier. Following are the tasks in this pipeline: +## Architecture and Features - - **Train Model** task executes model training script on Azure ML Compute. It outputs a [model](https://docs.microsoft.com/en-us/azure/machine-learning/service/concept-azure-machine-learning-architecture#model) file which is stored in the [run history](https://docs.microsoft.com/en-us/azure/machine-learning/service/concept-azure-machine-learning-architecture#run). +Architecture Reference: [Machine learning operationalization (MLOps) for Python models using Azure Machine Learning](https://docs.microsoft.com/en-us/azure/architecture/reference-architectures/ai/mlops-python) - - **Evaluate Model** task evaluates the performance of newly trained model with the model in production. If the new model performs better than the production model, the following steps are executed. If not, they will be skipped. +This reference architecture shows how to implement continuous integration (CI), continuous delivery (CD), and retraining pipeline for an AI application using Azure DevOps and [Azure Machine Learning](/azure/machine-learning/service/overview-what-is-azure-ml). The solution is built on the scikit-learn diabetes dataset but can be easily adapted for any AI scenario and other popular build systems such as Jenkins and Travis. - - **Register Model** task takes the improved model and registers it with the [Azure ML Model registry](https://docs.microsoft.com/en-us/azure/machine-learning/service/concept-azure-machine-learning-architecture#model-registry). This allows us to version control it. +The build pipelines include DevOps tasks for data sanity tests, unit tests, model training on different compute targets, model version management, model evaluation/model selection, model deployment as realtime web service, staged deployment to QA/prod and integration testing. -### Deploy Model +## Prerequisite -Once you have registered your ML model, you can use Azure ML + Azure DevOps to deploy it. +- Active Azure subscription +- At least contributor access to Azure subscription -The **Package Model** task packages the new model along with the scoring file and its python dependencies into a [docker image](https://docs.microsoft.com/en-us/azure/machine-learning/service/concept-azure-machine-learning-architecture#image) and pushes it to [Azure Container Registry](https://docs.microsoft.com/en-us/azure/container-registry/container-registry-intro). This image is used to deploy the model as [web service](https://docs.microsoft.com/en-us/azure/machine-learning/service/concept-azure-machine-learning-architecture#web-service). - -The **Deploy Model** task handles deploying your Azure ML model to the cloud (ACI or AKS). -This pipeline deploys the model scoring image into Staging/QA and PROD environments. +## Getting Started - In the Staging/QA environment, one task creates an [Azure Container Instance](https://docs.microsoft.com/en-us/azure/container-instances/container-instances-overview) and deploys the scoring image as a [web service](https://docs.microsoft.com/en-us/azure/machine-learning/service/concept-azure-machine-learning-architecture#web-service) on it. - -The second task invokes the web service by calling its REST endpoint with dummy data. - -5. The deployment in production is a [gated release](https://docs.microsoft.com/en-us/azure/devops/pipelines/release/approvals/gates?view=azure-devops). This means that once the model web service deployment in the Staging/QA environment is successful, a notification is sent to approvers to manually review and approve the release. Once the release is approved, the model scoring web service is deployed to [Azure Kubernetes Service(AKS)](https://docs.microsoft.com/en-us/azure/aks/intro-kubernetes) and the deployment is tested. +To deploy this solution in your subscription, follow the manual instructions in the [getting started](docs/getting_started.md) doc. Then optionally follow the guide for [integrating your own code](docs/custom_model.md) with this repository template. ### Repo Details You can find the details of the code and scripts in the repository [here](/docs/code_description.md) ### References -- [Azure Machine Learning(Azure ML) Service Workspace](https://docs.microsoft.com/en-us/azure/machine-learning/service/overview-what-is-azure-ml) + +- [Azure Machine Learning (Azure ML) Service Workspace](https://docs.microsoft.com/en-us/azure/machine-learning/service/overview-what-is-azure-ml) - [Azure ML CLI](https://docs.microsoft.com/en-us/azure/machine-learning/service/reference-azure-machine-learning-cli) - [Azure ML Samples](https://docs.microsoft.com/en-us/azure/machine-learning/service/samples-notebooks) - [Azure ML Python SDK Quickstart](https://docs.microsoft.com/en-us/azure/machine-learning/service/quickstart-create-workspace-with-python) - [Azure DevOps](https://docs.microsoft.com/en-us/azure/devops/?view=vsts) -# Contributing +## Contributing -This project welcomes contributions and suggestions. Most contributions require you to agree to a -Contributor License Agreement (CLA) declaring that you have the right to, and actually do, grant us -the rights to use your contribution. For details, visit https://cla.microsoft.com. +This project welcomes contributions and suggestions. Most contributions require you to agree to a Contributor License Agreement (CLA) declaring that you have the right to, and actually do, grant us the rights to use your contribution. For details, visit -When you submit a pull request, a CLA-bot will automatically determine whether you need to provide -a CLA and decorate the PR appropriately (e.g., label, comment). Simply follow the instructions -provided by the bot. You will only need to do this once across all repos using our CLA. +When you submit a pull request, a CLA-bot will automatically determine whether you need to provide a CLA and decorate the PR appropriately (e.g., label, comment). Simply follow the instructions provided by the bot. You will only need to do this once across all repos using our CLA. -This project has adopted the [Microsoft Open Source Code of Conduct](https://opensource.microsoft.com/codeofconduct/). -For more information see the [Code of Conduct FAQ](https://opensource.microsoft.com/codeofconduct/faq/) or -contact [opencode@microsoft.com](mailto:opencode@microsoft.com) with any additional questions or comments. +This project has adopted the [Microsoft Open Source Code of Conduct](https://opensource.microsoft.com/codeofconduct/). For more information see the [Code of Conduct FAQ](https://opensource.microsoft.com/codeofconduct/faq/) or contact [opencode@microsoft.com](mailto:opencode@microsoft.com) with any additional questions or comments. diff --git a/aml_config/config.json b/aml_config/config.json deleted file mode 100644 index 7105ecf7..00000000 --- a/aml_config/config.json +++ /dev/null @@ -1,6 +0,0 @@ -{ - "subscription_id": "<>", - "resource_group": "DevOps_AzureML_Demo", - "workspace_name": "AzureML_Demo_ws", - "location": "southcentralus" -} diff --git a/aml_config/security_config.json b/aml_config/security_config.json deleted file mode 100644 index acfd3261..00000000 --- a/aml_config/security_config.json +++ /dev/null @@ -1,14 +0,0 @@ -{ - "sp_user" : "<>", - "sp_password" : "<>", - "sp_tenant_id" : "<>", - "remote_vm_name" : "<>", - "remote_vm_username" : "<>", - "remote_vm_password" : "<>", - "remote_vm_ip" : "<>", - "experiment_name" : "devops-ai-demo", - "aml_cluster_name" : "aml-compute", - "vnet_resourcegroup_name" : "<>", - "vnet_name" : "<>", - "subnet_name" : "<>" -} \ No newline at end of file diff --git a/aml_service/00-WorkSpace.py b/aml_service/00-WorkSpace.py deleted file mode 100644 index f234ed4c..00000000 --- a/aml_service/00-WorkSpace.py +++ /dev/null @@ -1,64 +0,0 @@ -""" -Copyright (C) Microsoft Corporation. All rights reserved.​ - ​ -Microsoft Corporation (“Microsoft”) grants you a nonexclusive, perpetual, -royalty-free right to use, copy, and modify the software code provided by us -("Software Code"). You may not sublicense the Software Code or any use of it -(except to your affiliates and to vendors to perform work on your behalf) -through distribution, network access, service agreement, lease, rental, or -otherwise. This license does not purport to express any claim of ownership over -data you may have shared with Microsoft in the creation of the Software Code. -Unless applicable law gives you more rights, Microsoft reserves all other -rights not expressly granted herein, whether by implication, estoppel or -otherwise. ​ - ​ -THE SOFTWARE CODE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS -OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL -MICROSOFT OR ITS LICENSORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR -BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER -IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -ARISING IN ANY WAY OUT OF THE USE OF THE SOFTWARE CODE, EVEN IF ADVISED OF THE -POSSIBILITY OF SUCH DAMAGE. -""" -from azureml.core import Workspace -import os, json, sys -import azureml.core -from azureml.core.authentication import AzureCliAuthentication - -print("SDK Version:", azureml.core.VERSION) -# print('current dir is ' +os.curdir) -with open("aml_config/config.json") as f: - config = json.load(f) - -workspace_name = config["workspace_name"] -resource_group = config["resource_group"] -subscription_id = config["subscription_id"] -location = config["location"] - -cli_auth = AzureCliAuthentication() - -try: - ws = Workspace.get( - name=workspace_name, - subscription_id=subscription_id, - resource_group=resource_group, - auth=cli_auth, - ) - -except: - # this call might take a minute or two. - print("Creating new workspace") - ws = Workspace.create( - name=workspace_name, - subscription_id=subscription_id, - resource_group=resource_group, - # create_resource_group=True, - location=location, - auth=cli_auth, - ) - -# print Workspace details -print(ws.name, ws.resource_group, ws.location, ws.subscription_id, sep="\n") diff --git a/aml_service/02-AttachTrainingVM.py b/aml_service/02-AttachTrainingVM.py deleted file mode 100644 index 3fc11c25..00000000 --- a/aml_service/02-AttachTrainingVM.py +++ /dev/null @@ -1,78 +0,0 @@ -""" -Copyright (C) Microsoft Corporation. All rights reserved.​ - ​ -Microsoft Corporation (“Microsoft”) grants you a nonexclusive, perpetual, -royalty-free right to use, copy, and modify the software code provided by us -("Software Code"). You may not sublicense the Software Code or any use of it -(except to your affiliates and to vendors to perform work on your behalf) -through distribution, network access, service agreement, lease, rental, or -otherwise. This license does not purport to express any claim of ownership over -data you may have shared with Microsoft in the creation of the Software Code. -Unless applicable law gives you more rights, Microsoft reserves all other -rights not expressly granted herein, whether by implication, estoppel or -otherwise. ​ - ​ -THE SOFTWARE CODE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS -OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL -MICROSOFT OR ITS LICENSORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR -BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER -IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -ARISING IN ANY WAY OUT OF THE USE OF THE SOFTWARE CODE, EVEN IF ADVISED OF THE -POSSIBILITY OF SUCH DAMAGE. -""" - -from azureml.core import Workspace -from azureml.core import Run -from azureml.core import Experiment -from azureml.core.conda_dependencies import CondaDependencies -from azureml.core.runconfig import RunConfiguration -import os, json -from azureml.core.compute import RemoteCompute -from azureml.core.compute import DsvmCompute -from azureml.core.compute_target import ComputeTargetException -from azureml.core.authentication import AzureCliAuthentication - -cli_auth = AzureCliAuthentication() - -# Get workspace -ws = Workspace.from_config(auth=cli_auth) - -# Read the New VM Config -with open("aml_config/security_config.json") as f: - config = json.load(f) - -remote_vm_name = config["remote_vm_name"] -remote_vm_username = config["remote_vm_username"] -remote_vm_password = config["remote_vm_password"] -remote_vm_ip = config["remote_vm_ip"] - -try: - dsvm_compute = RemoteCompute.attach( - ws, - name=remote_vm_name, - username=remote_vm_username, - address=remote_vm_ip, - ssh_port=22, - password=remote_vm_password, - ) - dsvm_compute.wait_for_completion(show_output=True) - -except Exception as e: - print("Caught = {}".format(e.message)) - print("Compute config already attached.") - - -## Create VM if not available -# compute_target_name = remote_vm_name - -# try: -# dsvm_compute = DsvmCompute(workspace=ws, name=compute_target_name) -# print('found existing:', dsvm_compute.name) -# except ComputeTargetException: -# print('creating new.') -# dsvm_config = DsvmCompute.provisioning_configuration(vm_size="Standard_D2_v2") -# dsvm_compute = DsvmCompute.create(ws, name=compute_target_name, provisioning_configuration=dsvm_config) -# dsvm_compute.wait_for_completion(show_output=True) diff --git a/aml_service/03-AttachAmlCluster.py b/aml_service/03-AttachAmlCluster.py deleted file mode 100644 index 1ba3f127..00000000 --- a/aml_service/03-AttachAmlCluster.py +++ /dev/null @@ -1,66 +0,0 @@ -""" -Copyright (C) Microsoft Corporation. All rights reserved.​ - -Microsoft Corporation (“Microsoft”) grants you a nonexclusive, perpetual, -royalty-free right to use, copy, and modify the software code provided by us -("Software Code"). You may not sublicense the Software Code or any use of it -(except to your affiliates and to vendors to perform work on your behalf) -through distribution, network access, service agreement, lease, rental, or -otherwise. This license does not purport to express any claim of ownership over -data you may have shared with Microsoft in the creation of the Software Code. -Unless applicable law gives you more rights, Microsoft reserves all other -rights not expressly granted herein, whether by implication, estoppel or -otherwise. ​ - -THE SOFTWARE CODE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS -OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL -MICROSOFT OR ITS LICENSORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR -BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER -IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -ARISING IN ANY WAY OUT OF THE USE OF THE SOFTWARE CODE, EVEN IF ADVISED OF THE -POSSIBILITY OF SUCH DAMAGE. -""" - -from azureml.core import Workspace -from azureml.core.compute import ComputeTarget, AmlCompute -from azureml.core.compute_target import ComputeTargetException -from azureml.core.authentication import AzureCliAuthentication -import os, json - -cli_auth = AzureCliAuthentication() -# Get workspace -ws = Workspace.from_config(auth=cli_auth) - -# Read the New VM Config -with open("aml_config/security_config.json") as f: - config = json.load(f) - -aml_cluster_name = config["aml_cluster_name"] - -# un-comment the below lines if you want to put AML Compute under Vnet. Also update /aml_config/security_config.json -# vnet_resourcegroup_name = config['vnet_resourcegroup_name'] -# vnet_name = config['vnet_name'] -# subnet_name = config['subnet_name'] - -# Verify that cluster does not exist already -try: - cpu_cluster = ComputeTarget(workspace=ws, name=aml_cluster_name) - print("Found existing cluster, use it.") -except ComputeTargetException: - compute_config = AmlCompute.provisioning_configuration( - vm_size="STANDARD_D2_V2", - vm_priority="dedicated", - min_nodes=1, - max_nodes=3, - idle_seconds_before_scaledown="300", - # #Uncomment the below lines for VNet support - # vnet_resourcegroup_name=vnet_resourcegroup_name, - # vnet_name=vnet_name, - # subnet_name=subnet_name - ) - cpu_cluster = ComputeTarget.create(ws, aml_cluster_name, compute_config) - -cpu_cluster.wait_for_completion(show_output=True) diff --git a/aml_service/04-AmlPipelines.py b/aml_service/04-AmlPipelines.py deleted file mode 100644 index 520c8d95..00000000 --- a/aml_service/04-AmlPipelines.py +++ /dev/null @@ -1,196 +0,0 @@ -""" -Copyright (C) Microsoft Corporation. All rights reserved.​ - ​ -Microsoft Corporation (“Microsoft”) grants you a nonexclusive, perpetual, -royalty-free right to use, copy, and modify the software code provided by us -("Software Code"). You may not sublicense the Software Code or any use of it -(except to your affiliates and to vendors to perform work on your behalf) -through distribution, network access, service agreement, lease, rental, or -otherwise. This license does not purport to express any claim of ownership over -data you may have shared with Microsoft in the creation of the Software Code. -Unless applicable law gives you more rights, Microsoft reserves all other -rights not expressly granted herein, whether by implication, estoppel or -otherwise. ​ - ​ -THE SOFTWARE CODE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS -OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL -MICROSOFT OR ITS LICENSORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR -BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER -IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -ARISING IN ANY WAY OUT OF THE USE OF THE SOFTWARE CODE, EVEN IF ADVISED OF THE -POSSIBILITY OF SUCH DAMAGE. -""" - -import os, json, requests, datetime -import argparse -from azureml.core import Workspace, Experiment, Datastore -from azureml.core.runconfig import RunConfiguration, CondaDependencies -from azureml.data.data_reference import DataReference -from azureml.pipeline.core import Pipeline, PipelineData, StepSequence -from azureml.pipeline.steps import PythonScriptStep -from azureml.pipeline.core import PublishedPipeline -from azureml.pipeline.core.graph import PipelineParameter -from azureml.core.compute import ComputeTarget - -# from azureml.widgets import RunDetails -from azureml.core.authentication import AzureCliAuthentication - -print("Pipeline SDK-specific imports completed") - -cli_auth = AzureCliAuthentication() - - -parser = argparse.ArgumentParser("Pipeline") -parser.add_argument( - "--pipeline_action", - type=str, - choices=["pipeline-test", "publish"], - help="Determines if pipeline needs to run on small data set \ - or pipeline needs to be republished", - #default="pipeline-test", -) - -args = parser.parse_args() - - -# Get workspace -ws = Workspace.from_config(path="aml_config/config.json", auth=cli_auth) -def_blob_store = Datastore(ws, "workspaceblobstore") - -# Get AML Compute name and Experiment Name -with open("aml_config/security_config.json") as f: - config = json.load(f) - -experiment_name = config["experiment_name"] -aml_cluster_name = config["aml_cluster_name"] -aml_pipeline_name = "training-pipeline" - -source_directory = "code" - -# Run Config -# Declare packages dependencies required in the pipeline (these can also be expressed as a YML file) -# cd = CondaDependencies.create(pip_packages=["azureml-defaults", 'tensorflow==1.8.0']) -cd = CondaDependencies("aml_config/conda_dependencies.yml") - -run_config = RunConfiguration(conda_dependencies=cd) - -aml_compute = ws.compute_targets[aml_cluster_name] - -jsonconfigs = PipelineData("jsonconfigs", datastore=def_blob_store) - -# Suffix for all the config files -config_suffix = datetime.datetime.now().strftime("%Y%m%d%H") -print("PipelineData object created") - -# Create python script step to run the training/scoring main script -train = PythonScriptStep( - name="Train New Model", - script_name="training/train.py", - compute_target=aml_compute, - source_directory=source_directory, - arguments=["--config_suffix", config_suffix, "--json_config", jsonconfigs], - runconfig=run_config, - # inputs=[jsonconfigs], - outputs=[jsonconfigs], - allow_reuse=False, -) -print("Step Train created") - -evaluate = PythonScriptStep( - name="Evaluate New Model with Prod Model", - script_name="evaluate/evaluate_model.py", - compute_target=aml_compute, - source_directory=source_directory, - arguments=["--config_suffix", config_suffix, "--json_config", jsonconfigs], - runconfig=run_config, - inputs=[jsonconfigs], - # outputs=[jsonconfigs], - allow_reuse=False, -) -print("Step Evaluate created") - -register_model = PythonScriptStep( - name="Register New Trained Model", - script_name="register/register_model.py", - compute_target=aml_compute, - source_directory=source_directory, - arguments=["--config_suffix", config_suffix, "--json_config", jsonconfigs], - runconfig=run_config, - inputs=[jsonconfigs], - # outputs=[jsonconfigs], - allow_reuse=False, -) -print("Step register model created") - -package_model = PythonScriptStep( - name="Package Model as Scoring Image", - script_name="scoring/create_scoring_image.py", - compute_target=aml_compute, - source_directory=source_directory, - arguments=["--config_suffix", config_suffix, "--json_config", jsonconfigs], - runconfig=run_config, - inputs=[jsonconfigs], - # outputs=[jsonconfigs], - allow_reuse=False, -) -print("Packed the model into a Scoring Image") - -# Create Steps dependency such that they run in sequence -evaluate.run_after(train) -register_model.run_after(evaluate) -package_model.run_after(register_model) - -steps = [package_model] - - -# Build Pipeline -pipeline1 = Pipeline(workspace=ws, steps=steps) -print("Pipeline is built") - -# Validate Pipeline -pipeline1.validate() -print("Pipeline validation complete") - - -# Submit unpublished pipeline with small data set for test -if args.pipeline_action == "pipeline-test": - pipeline_run1 = Experiment(ws, experiment_name).submit( - pipeline1, regenerate_outputs=True - ) - print("Pipeline is submitted for execution") - pipeline_run1.wait_for_completion(show_output=True) - - -# RunDetails(pipeline_run1).show() - - -# Define pipeline parameters -# run_env = PipelineParameter( -# name="dev_flag", -# default_value=True) - -# dbname = PipelineParameter( -# name="dbname", -# default_value='opex') - - -# Publish Pipeline -if args.pipeline_action == "publish": - published_pipeline1 = pipeline1.publish( - name=aml_pipeline_name, description="Model training/retraining pipeline" - ) - print( - "Pipeline is published as rest_endpoint {} ".format( - published_pipeline1.endpoint - ) - ) - # write published pipeline details as build artifact - pipeline_config = {} - pipeline_config["pipeline_name"] = published_pipeline1.name - pipeline_config["rest_endpoint"] = published_pipeline1.endpoint - pipeline_config["experiment_name"] = "published-pipeline-exp" # experiment_name - with open("aml_config/pipeline_config.json", "w") as outfile: - json.dump(pipeline_config, outfile) diff --git a/aml_service/05-TriggerAmlPipeline.py b/aml_service/05-TriggerAmlPipeline.py deleted file mode 100644 index 0a2dc64e..00000000 --- a/aml_service/05-TriggerAmlPipeline.py +++ /dev/null @@ -1,51 +0,0 @@ -""" -Copyright (C) Microsoft Corporation. All rights reserved.​ - ​ -Microsoft Corporation (“Microsoft”) grants you a nonexclusive, perpetual, -royalty-free right to use, copy, and modify the software code provided by us -("Software Code"). You may not sublicense the Software Code or any use of it -(except to your affiliates and to vendors to perform work on your behalf) -through distribution, network access, service agreement, lease, rental, or -otherwise. This license does not purport to express any claim of ownership over -data you may have shared with Microsoft in the creation of the Software Code. -Unless applicable law gives you more rights, Microsoft reserves all other -rights not expressly granted herein, whether by implication, estoppel or -otherwise. ​ - ​ -THE SOFTWARE CODE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS -OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL -MICROSOFT OR ITS LICENSORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR -BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER -IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -ARISING IN ANY WAY OUT OF THE USE OF THE SOFTWARE CODE, EVEN IF ADVISED OF THE -POSSIBILITY OF SUCH DAMAGE. -""" - -import os, json, requests, datetime, sys -import argparse -from azureml.core.authentication import AzureCliAuthentication - -try: - with open("aml_config/pipeline_config.json") as f: - config = json.load(f) -except: - print("No pipeline config found") - sys.exit(0) - -# Run a published pipeline -cli_auth = AzureCliAuthentication() -aad_token = cli_auth.get_authentication_header() -rest_endpoint1 = config["rest_endpoint"] -experiment_name = config["experiment_name"] -print(rest_endpoint1) - -response = requests.post( - rest_endpoint1, headers=aad_token, json={"ExperimentName": experiment_name} -) - -run_id = response.json()["Id"] -print(run_id) -print("Pipeline run initiated") diff --git a/aml_service/10-TrainOnLocal.py b/aml_service/10-TrainOnLocal.py deleted file mode 100644 index d7c71b3b..00000000 --- a/aml_service/10-TrainOnLocal.py +++ /dev/null @@ -1,73 +0,0 @@ -""" -Copyright (C) Microsoft Corporation. All rights reserved.​ - ​ -Microsoft Corporation (“Microsoft”) grants you a nonexclusive, perpetual, -royalty-free right to use, copy, and modify the software code provided by us -("Software Code"). You may not sublicense the Software Code or any use of it -(except to your affiliates and to vendors to perform work on your behalf) -through distribution, network access, service agreement, lease, rental, or -otherwise. This license does not purport to express any claim of ownership over -data you may have shared with Microsoft in the creation of the Software Code. -Unless applicable law gives you more rights, Microsoft reserves all other -rights not expressly granted herein, whether by implication, estoppel or -otherwise. ​ - ​ -THE SOFTWARE CODE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS -OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL -MICROSOFT OR ITS LICENSORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR -BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER -IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -ARISING IN ANY WAY OUT OF THE USE OF THE SOFTWARE CODE, EVEN IF ADVISED OF THE -POSSIBILITY OF SUCH DAMAGE. -""" - -from azureml.core.runconfig import RunConfiguration -from azureml.core import Workspace -from azureml.core import Experiment -from azureml.core import ScriptRunConfig -import json -from azureml.core.authentication import AzureCliAuthentication - -cli_auth = AzureCliAuthentication() - -# Get workspace -ws = Workspace.from_config(auth=cli_auth) - -# Attach Experiment -experiment_name = "devops-ai-demo" -exp = Experiment(workspace=ws, name=experiment_name) -print(exp.name, exp.workspace.name, sep="\n") - -# Editing a run configuration property on-fly. -run_config_user_managed = RunConfiguration() -run_config_user_managed.environment.python.user_managed_dependencies = True - -print("Submitting an experiment.") -src = ScriptRunConfig( - source_directory="./code", - script="training/train.py", - run_config=run_config_user_managed, -) -run = exp.submit(src) - -# Shows output of the run on stdout. -run.wait_for_completion(show_output=True, wait_post_processing=True) - -# Raise exception if run fails -if run.get_status() == "Failed": - raise Exception( - "Training on local failed with following run status: {} and logs: \n {}".format( - run.get_status(), run.get_details_with_logs() - ) - ) - -# Writing the run id to /aml_config/run_id.json - -run_id = {} -run_id["run_id"] = run.id -run_id["experiment_name"] = run.experiment.name -with open("aml_config/run_id.json", "w") as outfile: - json.dump(run_id, outfile) diff --git a/aml_service/11-TrainOnLocalEnv.py b/aml_service/11-TrainOnLocalEnv.py deleted file mode 100644 index 544a9d93..00000000 --- a/aml_service/11-TrainOnLocalEnv.py +++ /dev/null @@ -1,82 +0,0 @@ -""" -Copyright (C) Microsoft Corporation. All rights reserved.​ - ​ -Microsoft Corporation (“Microsoft”) grants you a nonexclusive, perpetual, -royalty-free right to use, copy, and modify the software code provided by us -("Software Code"). You may not sublicense the Software Code or any use of it -(except to your affiliates and to vendors to perform work on your behalf) -through distribution, network access, service agreement, lease, rental, or -otherwise. This license does not purport to express any claim of ownership over -data you may have shared with Microsoft in the creation of the Software Code. -Unless applicable law gives you more rights, Microsoft reserves all other -rights not expressly granted herein, whether by implication, estoppel or -otherwise. ​ - ​ -THE SOFTWARE CODE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS -OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL -MICROSOFT OR ITS LICENSORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR -BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER -IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -ARISING IN ANY WAY OUT OF THE USE OF THE SOFTWARE CODE, EVEN IF ADVISED OF THE -POSSIBILITY OF SUCH DAMAGE. -""" -## Create a new Conda environment on local and train the model -## System-managed environment - -from azureml.core.conda_dependencies import CondaDependencies -from azureml.core.runconfig import RunConfiguration -from azureml.core import Workspace -from azureml.core import Experiment -from azureml.core import ScriptRunConfig - -from azureml.core.authentication import AzureCliAuthentication - -cli_auth = AzureCliAuthentication() - -# Get workspace -ws = Workspace.from_config(auth=cli_auth) - -# Attach Experiment -experiment_name = "devops-ai-demo" -exp = Experiment(workspace=ws, name=experiment_name) -print(exp.name, exp.workspace.name, sep="\n") - -# Editing a run configuration property on-fly. -run_config_system_managed = RunConfiguration() -# Use a new conda environment that is to be created from the conda_dependencies.yml file -run_config_system_managed.environment.python.user_managed_dependencies = False -# Automatically create the conda environment before the run -run_config_system_managed.prepare_environment = True - -# # add scikit-learn to the conda_dependencies.yml file -# Specify conda dependencies with scikit-learn -# run_config_system_managed.environment.python.conda_dependencies = CondaDependencies.create(conda_packages=['scikit-learn']) - -print("Submitting an experiment to new conda virtual env") -src = ScriptRunConfig( - source_directory="./code", - script="training/train.py", - run_config=run_config_user_managed, -) -run = exp.submit(src) - -# Shows output of the run on stdout. -run.wait_for_completion(show_output=True, wait_post_processing=True) - -# Raise exception if run fails -if run.get_status() == "Failed": - raise Exception( - "Training on local env failed with following run status: {} and logs: \n {}".format( - run.get_status(), run.get_details_with_logs() - ) - ) - -# Writing the run id to /aml_config/run_id.json -run_id = {} -run_id["run_id"] = run.id -run_id["experiment_name"] = run.experiment.name -with open("aml_config/run_id.json", "w") as outfile: - json.dump(run_id, outfile) diff --git a/aml_service/12-TrainOnVM.py b/aml_service/12-TrainOnVM.py deleted file mode 100644 index 788ffd15..00000000 --- a/aml_service/12-TrainOnVM.py +++ /dev/null @@ -1,80 +0,0 @@ -""" -Copyright (C) Microsoft Corporation. All rights reserved.​ - ​ -Microsoft Corporation (“Microsoft”) grants you a nonexclusive, perpetual, -royalty-free right to use, copy, and modify the software code provided by us -("Software Code"). You may not sublicense the Software Code or any use of it -(except to your affiliates and to vendors to perform work on your behalf) -through distribution, network access, service agreement, lease, rental, or -otherwise. This license does not purport to express any claim of ownership over -data you may have shared with Microsoft in the creation of the Software Code. -Unless applicable law gives you more rights, Microsoft reserves all other -rights not expressly granted herein, whether by implication, estoppel or -otherwise. ​ - ​ -THE SOFTWARE CODE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS -OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL -MICROSOFT OR ITS LICENSORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR -BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER -IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -ARISING IN ANY WAY OUT OF THE USE OF THE SOFTWARE CODE, EVEN IF ADVISED OF THE -POSSIBILITY OF SUCH DAMAGE. -""" -import os, json -from azureml.core import Workspace -from azureml.core import Experiment -from azureml.core.compute import RemoteCompute -from azureml.core.runconfig import RunConfiguration -from azureml.core import ScriptRunConfig -import azureml.core -from azureml.core.authentication import AzureCliAuthentication - -cli_auth = AzureCliAuthentication() -# Get workspace -ws = Workspace.from_config(auth=cli_auth) - - -# Read the New VM Config -with open("aml_config/security_config.json") as f: - config = json.load(f) -remote_vm_name = config["remote_vm_name"] - - -# Attach Experiment -experiment_name = "devops-ai-demo" -exp = Experiment(workspace=ws, name=experiment_name) -print(exp.name, exp.workspace.name, sep="\n") - -run_config = RunConfiguration() -run_config.target = remote_vm_name - -# replace with your path to the python interpreter in the remote VM found earlier -run_config.environment.python.interpreter_path = "/anaconda/envs/myenv/bin/python" -run_config.environment.python.user_managed_dependencies = True - - -src = ScriptRunConfig( - source_directory="./code", script="training/train.py", run_config=run_config -) -run = exp.submit(src) - -# Shows output of the run on stdout. -run.wait_for_completion(show_output=True, wait_post_processing=True) - -# Raise exception if run fails -if run.get_status() == "Failed": - raise Exception( - "Training on local env failed with following run status: {} and logs: \n {}".format( - run.get_status(), run.get_details_with_logs() - ) - ) - -# Writing the run id to /aml_config/run_id.json -run_id = {} -run_id["run_id"] = run.id -run_id["experiment_name"] = run.experiment.name -with open("aml_config/run_id.json", "w") as outfile: - json.dump(run_id, outfile) diff --git a/aml_service/15-EvaluateModel.py b/aml_service/15-EvaluateModel.py deleted file mode 100644 index 4d266a98..00000000 --- a/aml_service/15-EvaluateModel.py +++ /dev/null @@ -1,93 +0,0 @@ -""" -Copyright (C) Microsoft Corporation. All rights reserved.​ - ​ -Microsoft Corporation (“Microsoft”) grants you a nonexclusive, perpetual, -royalty-free right to use, copy, and modify the software code provided by us -("Software Code"). You may not sublicense the Software Code or any use of it -(except to your affiliates and to vendors to perform work on your behalf) -through distribution, network access, service agreement, lease, rental, or -otherwise. This license does not purport to express any claim of ownership over -data you may have shared with Microsoft in the creation of the Software Code. -Unless applicable law gives you more rights, Microsoft reserves all other -rights not expressly granted herein, whether by implication, estoppel or -otherwise. ​ - ​ -THE SOFTWARE CODE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS -OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL -MICROSOFT OR ITS LICENSORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR -BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER -IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -ARISING IN ANY WAY OUT OF THE USE OF THE SOFTWARE CODE, EVEN IF ADVISED OF THE -POSSIBILITY OF SUCH DAMAGE. -""" -import os, json -from azureml.core import Workspace -from azureml.core import Experiment -from azureml.core.model import Model -import azureml.core -from azureml.core import Run -from azureml.core.authentication import AzureCliAuthentication - -cli_auth = AzureCliAuthentication() - -# Get workspace -ws = Workspace.from_config(auth=cli_auth) - -# Paramaterize the matrics on which the models should be compared - -# Add golden data set on which all the model performance can be evaluated - -# Get the latest run_id -with open("aml_config/run_id.json") as f: - config = json.load(f) - -new_model_run_id = config["run_id"] -experiment_name = config["experiment_name"] -exp = Experiment(workspace=ws, name=experiment_name) - - -try: - # Get most recently registered model, we assume that is the model in production. Download this model and compare it with the recently trained model by running test with same data set. - model_list = Model.list(ws) - production_model = next( - filter( - lambda x: x.created_time == max(model.created_time for model in model_list), - model_list, - ) - ) - production_model_run_id = production_model.tags.get("run_id") - run_list = exp.get_runs() - # production_model_run = next(filter(lambda x: x.id == production_model_run_id, run_list)) - - # Get the run history for both production model and newly trained model and compare mse - production_model_run = Run(exp, run_id=production_model_run_id) - new_model_run = Run(exp, run_id=new_model_run_id) - - production_model_mse = production_model_run.get_metrics().get("mse") - new_model_mse = new_model_run.get_metrics().get("mse") - print( - "Current Production model mse: {}, New trained model mse: {}".format( - production_model_mse, new_model_mse - ) - ) - - promote_new_model = False - if new_model_mse < production_model_mse: - promote_new_model = True - print("New trained model performs better, thus it will be registered") -except: - promote_new_model = True - print("This is the first model to be trained, thus nothing to evaluate for now") - -run_id = {} -run_id["run_id"] = "" -# Writing the run id to /aml_config/run_id.json -if promote_new_model: - run_id["run_id"] = new_model_run_id - -run_id["experiment_name"] = experiment_name -with open("aml_config/run_id.json", "w") as outfile: - json.dump(run_id, outfile) diff --git a/aml_service/20-RegisterModel.py b/aml_service/20-RegisterModel.py deleted file mode 100644 index bd9a7bbc..00000000 --- a/aml_service/20-RegisterModel.py +++ /dev/null @@ -1,92 +0,0 @@ -""" -Copyright (C) Microsoft Corporation. All rights reserved.​ - ​ -Microsoft Corporation (“Microsoft”) grants you a nonexclusive, perpetual, -royalty-free right to use, copy, and modify the software code provided by us -("Software Code"). You may not sublicense the Software Code or any use of it -(except to your affiliates and to vendors to perform work on your behalf) -through distribution, network access, service agreement, lease, rental, or -otherwise. This license does not purport to express any claim of ownership over -data you may have shared with Microsoft in the creation of the Software Code. -Unless applicable law gives you more rights, Microsoft reserves all other -rights not expressly granted herein, whether by implication, estoppel or -otherwise. ​ - ​ -THE SOFTWARE CODE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS -OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL -MICROSOFT OR ITS LICENSORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR -BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER -IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -ARISING IN ANY WAY OUT OF THE USE OF THE SOFTWARE CODE, EVEN IF ADVISED OF THE -POSSIBILITY OF SUCH DAMAGE. -""" -import os, json, sys -from azureml.core import Workspace -from azureml.core import Run -from azureml.core import Experiment -from azureml.core.model import Model - -from azureml.core.runconfig import RunConfiguration -from azureml.core.authentication import AzureCliAuthentication - -cli_auth = AzureCliAuthentication() - -# Get workspace -ws = Workspace.from_config(auth=cli_auth) - -# Get the latest evaluation result -try: - with open("aml_config/run_id.json") as f: - config = json.load(f) - if not config["run_id"]: - raise Exception("No new model to register as production model perform better") -except: - print("No new model to register as production model perform better") - # raise Exception('No new model to register as production model perform better') - sys.exit(0) - -run_id = config["run_id"] -experiment_name = config["experiment_name"] -exp = Experiment(workspace=ws, name=experiment_name) - -run = Run(experiment=exp, run_id=run_id) -names = run.get_file_names -names() -print("Run ID for last run: {}".format(run_id)) -model_local_dir = "model" -os.makedirs(model_local_dir, exist_ok=True) - -# Download Model to Project root directory -model_name = "sklearn_regression_model.pkl" -run.download_file( - name="./outputs/" + model_name, output_file_path="./model/" + model_name -) -print("Downloaded model {} to Project root directory".format(model_name)) -os.chdir("./model") -model = Model.register( - model_path=model_name, # this points to a local file - model_name=model_name, # this is the name the model is registered as - tags={"area": "diabetes", "type": "regression", "run_id": run_id}, - description="Regression model for diabetes dataset", - workspace=ws, -) -os.chdir("..") -print( - "Model registered: {} \nModel Description: {} \nModel Version: {}".format( - model.name, model.description, model.version - ) -) - -# Remove the evaluate.json as we no longer need it -# os.remove("aml_config/evaluate.json") - -# Writing the registered model details to /aml_config/model.json -model_json = {} -model_json["model_name"] = model.name -model_json["model_version"] = model.version -model_json["run_id"] = run_id -with open("aml_config/model.json", "w") as outfile: - json.dump(model_json, outfile) diff --git a/aml_service/30-CreateScoringImage.py b/aml_service/30-CreateScoringImage.py deleted file mode 100644 index b94f3cb1..00000000 --- a/aml_service/30-CreateScoringImage.py +++ /dev/null @@ -1,99 +0,0 @@ -""" -Copyright (C) Microsoft Corporation. All rights reserved.​ - ​ -Microsoft Corporation (“Microsoft”) grants you a nonexclusive, perpetual, -royalty-free right to use, copy, and modify the software code provided by us -("Software Code"). You may not sublicense the Software Code or any use of it -(except to your affiliates and to vendors to perform work on your behalf) -through distribution, network access, service agreement, lease, rental, or -otherwise. This license does not purport to express any claim of ownership over -data you may have shared with Microsoft in the creation of the Software Code. -Unless applicable law gives you more rights, Microsoft reserves all other -rights not expressly granted herein, whether by implication, estoppel or -otherwise. ​ - ​ -THE SOFTWARE CODE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS -OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL -MICROSOFT OR ITS LICENSORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR -BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER -IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -ARISING IN ANY WAY OUT OF THE USE OF THE SOFTWARE CODE, EVEN IF ADVISED OF THE -POSSIBILITY OF SUCH DAMAGE. -""" -import os, json, sys -from azureml.core import Workspace -from azureml.core.image import ContainerImage, Image -from azureml.core.model import Model -from azureml.core.authentication import AzureCliAuthentication - -cli_auth = AzureCliAuthentication() - -# Get workspace -ws = Workspace.from_config(auth=cli_auth) - -# Get the latest model details - -try: - with open("aml_config/model.json") as f: - config = json.load(f) -except: - print("No new model to register thus no need to create new scoring image") - # raise Exception('No new model to register as production model perform better') - sys.exit(0) - -model_name = config["model_name"] -model_version = config["model_version"] - - -model_list = Model.list(workspace=ws) -model, = (m for m in model_list if m.version == model_version and m.name == model_name) -print( - "Model picked: {} \nModel Description: {} \nModel Version: {}".format( - model.name, model.description, model.version - ) -) - -os.chdir("./code/scoring") -image_name = "diabetes-model-score" - -image_config = ContainerImage.image_configuration( - execution_script="score.py", - runtime="python-slim", - conda_file="conda_dependencies.yml", - description="Image with ridge regression model", - tags={"area": "diabetes", "type": "regression"}, -) - -image = Image.create( - name=image_name, models=[model], image_config=image_config, workspace=ws -) - -image.wait_for_creation(show_output=True) -os.chdir("../..") - -if image.creation_state != "Succeeded": - raise Exception("Image creation status: {image.creation_state}") - -print( - "{}(v.{} [{}]) stored at {} with build log {}".format( - image.name, - image.version, - image.creation_state, - image.image_location, - image.image_build_log_uri, - ) -) - -# Writing the image details to /aml_config/image.json -image_json = {} -image_json["image_name"] = image.name -image_json["image_version"] = image.version -image_json["image_location"] = image.image_location -with open("aml_config/image.json", "w") as outfile: - json.dump(image_json, outfile) - - -# How to fix the schema for a model, like if we have multiple models expecting different schema, diff --git a/aml_service/34-GetScoringImageName.py b/aml_service/34-GetScoringImageName.py deleted file mode 100644 index b5f3a764..00000000 --- a/aml_service/34-GetScoringImageName.py +++ /dev/null @@ -1,44 +0,0 @@ -""" -Copyright (C) Microsoft Corporation. All rights reserved.​ - ​ -Microsoft Corporation (“Microsoft”) grants you a nonexclusive, perpetual, -royalty-free right to use, copy, and modify the software code provided by us -("Software Code"). You may not sublicense the Software Code or any use of it -(except to your affiliates and to vendors to perform work on your behalf) -through distribution, network access, service agreement, lease, rental, or -otherwise. This license does not purport to express any claim of ownership over -data you may have shared with Microsoft in the creation of the Software Code. -Unless applicable law gives you more rights, Microsoft reserves all other -rights not expressly granted herein, whether by implication, estoppel or -otherwise. ​ - ​ -THE SOFTWARE CODE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS -OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL -MICROSOFT OR ITS LICENSORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR -BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER -IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -ARISING IN ANY WAY OUT OF THE USE OF THE SOFTWARE CODE, EVEN IF ADVISED OF THE -POSSIBILITY OF SUCH DAMAGE. -""" -import os, json, sys -from azureml.core import Workspace -from azureml.core.authentication import AzureCliAuthentication - -cli_auth = AzureCliAuthentication() - -# Get workspace -ws = Workspace.from_config(auth=cli_auth) - -# Get the latest image details -latest_image = ws.images -name, version = latest_image.get(list(latest_image)[0]).id.split(':') - -# Writing the image details to /aml_config/image.json -image_json = {} -image_json["image_name"] = name -image_json["image_version"] = int(version) -with open("aml_config/image.json", "w") as outfile: - json.dump(image_json, outfile) diff --git a/aml_service/50-deployOnAci.py b/aml_service/50-deployOnAci.py deleted file mode 100644 index 00313380..00000000 --- a/aml_service/50-deployOnAci.py +++ /dev/null @@ -1,88 +0,0 @@ -""" -Copyright (C) Microsoft Corporation. All rights reserved.​ - ​ -Microsoft Corporation (“Microsoft”) grants you a nonexclusive, perpetual, -royalty-free right to use, copy, and modify the software code provided by us -("Software Code"). You may not sublicense the Software Code or any use of it -(except to your affiliates and to vendors to perform work on your behalf) -through distribution, network access, service agreement, lease, rental, or -otherwise. This license does not purport to express any claim of ownership over -data you may have shared with Microsoft in the creation of the Software Code. -Unless applicable law gives you more rights, Microsoft reserves all other -rights not expressly granted herein, whether by implication, estoppel or -otherwise. ​ - ​ -THE SOFTWARE CODE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS -OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL -MICROSOFT OR ITS LICENSORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR -BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER -IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -ARISING IN ANY WAY OUT OF THE USE OF THE SOFTWARE CODE, EVEN IF ADVISED OF THE -POSSIBILITY OF SUCH DAMAGE. -""" -import os, json, datetime, sys -from operator import attrgetter -from azureml.core import Workspace -from azureml.core.model import Model -from azureml.core.image import Image -from azureml.core.webservice import Webservice -from azureml.core.webservice import AciWebservice -from azureml.core.authentication import AzureCliAuthentication - -cli_auth = AzureCliAuthentication() -# Get workspace -ws = Workspace.from_config(auth=cli_auth) # Get the Image to deploy details -try: - with open("aml_config/image.json") as f: - config = json.load(f) -except: - print("No new model, thus no deployment on ACI") - # raise Exception('No new model to register as production model perform better') - sys.exit(0) - - -image_name = config["image_name"] -image_version = config["image_version"] - -images = Image.list(workspace=ws) -image, = (m for m in images if m.version == image_version and m.name == image_name) -print( - "From image.json, Image used to deploy webservice on ACI: {}\nImage Version: {}\nImage Location = {}".format( - image.name, image.version, image.image_location - ) -) - -# image = max(images, key=attrgetter('version')) -# print('From Max Version, Image used to deploy webservice on ACI: {}\nImage Version: {}\nImage Location = {}'.format(image.name, image.version, image.image_location)) - - -aciconfig = AciWebservice.deploy_configuration( - cpu_cores=1, - memory_gb=1, - tags={"area": "diabetes", "type": "regression"}, - description="A sample description", -) - -aci_service_name = "aciwebservice" + datetime.datetime.now().strftime("%m%d%H") - -service = Webservice.deploy_from_image( - deployment_config=aciconfig, image=image, name=aci_service_name, workspace=ws -) - -service.wait_for_deployment() -print( - "Deployed ACI Webservice: {} \nWebservice Uri: {}".format( - service.name, service.scoring_uri - ) -) - -# service=Webservice(name ='aciws0622', workspace =ws) -# Writing the ACI details to /aml_config/aci_webservice.json -aci_webservice = {} -aci_webservice["aci_name"] = service.name -aci_webservice["aci_url"] = service.scoring_uri -with open("aml_config/aci_webservice.json", "w") as outfile: - json.dump(aci_webservice, outfile) diff --git a/aml_service/51-deployOnAks.py b/aml_service/51-deployOnAks.py deleted file mode 100644 index 379ea90c..00000000 --- a/aml_service/51-deployOnAks.py +++ /dev/null @@ -1,124 +0,0 @@ -""" -Copyright (C) Microsoft Corporation. All rights reserved.​ - ​ -Microsoft Corporation (“Microsoft”) grants you a nonexclusive, perpetual, -royalty-free right to use, copy, and modify the software code provided by us -("Software Code"). You may not sublicense the Software Code or any use of it -(except to your affiliates and to vendors to perform work on your behalf) -through distribution, network access, service agreement, lease, rental, or -otherwise. This license does not purport to express any claim of ownership over -data you may have shared with Microsoft in the creation of the Software Code. -Unless applicable law gives you more rights, Microsoft reserves all other -rights not expressly granted herein, whether by implication, estoppel or -otherwise. ​ - ​ -THE SOFTWARE CODE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS -OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL -MICROSOFT OR ITS LICENSORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR -BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER -IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -ARISING IN ANY WAY OUT OF THE USE OF THE SOFTWARE CODE, EVEN IF ADVISED OF THE -POSSIBILITY OF SUCH DAMAGE. -""" -import os, json, datetime, sys -from operator import attrgetter -from azureml.core import Workspace -from azureml.core.model import Model -from azureml.core.image import Image -from azureml.core.compute import AksCompute, ComputeTarget -from azureml.core.webservice import Webservice, AksWebservice -from azureml.core.authentication import AzureCliAuthentication - -cli_auth = AzureCliAuthentication() -# Get workspace -ws = Workspace.from_config(auth=cli_auth) - -# Get the Image to deploy details -try: - with open("aml_config/image.json") as f: - config = json.load(f) -except: - print("No new model, thus no deployment on ACI") - # raise Exception('No new model to register as production model perform better') - sys.exit(0) - -image_name = config["image_name"] -image_version = config["image_version"] - -images = Image.list(workspace=ws) -image, = (m for m in images if m.version == image_version and m.name == image_name) -print( - "From image.json, Image used to deploy webservice on ACI: {}\nImage Version: {}\nImage Location = {}".format( - image.name, image.version, image.image_location - ) -) - -# image = max(images, key=attrgetter('version')) -# print('From Max Version, Image used to deploy webservice on ACI: {}\nImage Version: {}\nImage Location = {}'.format(image.name, image.version, image.image_location)) - -# Check if AKS already Available -try: - with open("aml_config/aks_webservice.json") as f: - config = json.load(f) - aks_name = config["aks_name"] - aks_service_name = config["aks_service_name"] - compute_list = ws.compute_targets() - aks_target, = (c for c in compute_list if c.name == aks_name) - service = Webservice(name=aks_service_name, workspace=ws) - print( - "Updating AKS service {} with image: {}".format( - aks_service_name, image.image_location - ) - ) - service.update(image=image) -except: - aks_name = "aks" + datetime.datetime.now().strftime("%m%d%H") - aks_service_name = "akswebservice" + datetime.datetime.now().strftime("%m%d%H") - prov_config = AksCompute.provisioning_configuration( - agent_count=6, vm_size="Standard_F4s", location="eastus" - ) - print( - "No AKS found in aks_webservice.json. Creating new Aks: {} and AKS Webservice: {}".format( - aks_name, aks_service_name - ) - ) - # Create the cluster - aks_target = ComputeTarget.create( - workspace=ws, name=aks_name, provisioning_configuration=prov_config - ) - - aks_target.wait_for_completion(show_output=True) - print(aks_target.provisioning_state) - print(aks_target.provisioning_errors) - - # Use the default configuration (can also provide parameters to customize) - aks_config = AksWebservice.deploy_configuration(enable_app_insights=True) - - service = Webservice.deploy_from_image( - workspace=ws, - name=aks_service_name, - image=image, - deployment_config=aks_config, - deployment_target=aks_target, - ) - - service.wait_for_deployment(show_output=True) - print(service.state) - print( - "Deployed AKS Webservice: {} \nWebservice Uri: {}".format( - service.name, service.scoring_uri - ) - ) - - -# Writing the AKS details to /aml_config/aks_webservice.json -aks_webservice = {} -aks_webservice["aks_name"] = aks_name -aks_webservice["aks_service_name"] = service.name -aks_webservice["aks_url"] = service.scoring_uri -aks_webservice["aks_keys"] = service.get_keys() -with open("aml_config/aks_webservice.json", "w") as outfile: - json.dump(aks_webservice, outfile) diff --git a/aml_service/60-AciWebserviceTest.py b/aml_service/60-AciWebserviceTest.py deleted file mode 100644 index a8c40f69..00000000 --- a/aml_service/60-AciWebserviceTest.py +++ /dev/null @@ -1,63 +0,0 @@ -""" -Copyright (C) Microsoft Corporation. All rights reserved.​ - ​ -Microsoft Corporation (“Microsoft”) grants you a nonexclusive, perpetual, -royalty-free right to use, copy, and modify the software code provided by us -("Software Code"). You may not sublicense the Software Code or any use of it -(except to your affiliates and to vendors to perform work on your behalf) -through distribution, network access, service agreement, lease, rental, or -otherwise. This license does not purport to express any claim of ownership over -data you may have shared with Microsoft in the creation of the Software Code. -Unless applicable law gives you more rights, Microsoft reserves all other -rights not expressly granted herein, whether by implication, estoppel or -otherwise. ​ - ​ -THE SOFTWARE CODE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS -OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL -MICROSOFT OR ITS LICENSORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR -BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER -IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -ARISING IN ANY WAY OUT OF THE USE OF THE SOFTWARE CODE, EVEN IF ADVISED OF THE -POSSIBILITY OF SUCH DAMAGE. -""" -import numpy -import os, json, datetime, sys -from operator import attrgetter -from azureml.core import Workspace -from azureml.core.model import Model -from azureml.core.image import Image -from azureml.core.webservice import Webservice -from azureml.core.webservice import AciWebservice -from azureml.core.authentication import AzureCliAuthentication - -cli_auth = AzureCliAuthentication() -# Get workspace -ws = Workspace.from_config(auth=cli_auth) -# Get the ACI Details -try: - with open("aml_config/aci_webservice.json") as f: - config = json.load(f) -except: - print("No new model, thus no deployment on ACI") - # raise Exception('No new model to register as production model perform better') - sys.exit(0) - -service_name = config["aci_name"] -# Get the hosted web service -service = Webservice(name=service_name, workspace=ws) - -# Input for Model with all features -input_j = [[1, 2, 3, 4, 5, 6, 7, 8, 9, 10], [10, 9, 8, 7, 6, 5, 4, 3, 2, 1]] -print(input_j) -test_sample = json.dumps({"data": input_j}) -test_sample = bytes(test_sample, encoding="utf8") -try: - prediction = service.run(input_data=test_sample) - print(prediction) -except Exception as e: - result = str(e) - print(result) - raise Exception("ACI service is not working as expected") diff --git a/aml_service/61-AksWebserviceTest.py b/aml_service/61-AksWebserviceTest.py deleted file mode 100644 index f22982e0..00000000 --- a/aml_service/61-AksWebserviceTest.py +++ /dev/null @@ -1,66 +0,0 @@ -""" -Copyright (C) Microsoft Corporation. All rights reserved.​ - ​ -Microsoft Corporation (“Microsoft”) grants you a nonexclusive, perpetual, -royalty-free right to use, copy, and modify the software code provided by us -("Software Code"). You may not sublicense the Software Code or any use of it -(except to your affiliates and to vendors to perform work on your behalf) -through distribution, network access, service agreement, lease, rental, or -otherwise. This license does not purport to express any claim of ownership over -data you may have shared with Microsoft in the creation of the Software Code. -Unless applicable law gives you more rights, Microsoft reserves all other -rights not expressly granted herein, whether by implication, estoppel or -otherwise. ​ - ​ -THE SOFTWARE CODE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS -OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL -MICROSOFT OR ITS LICENSORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR -BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER -IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -ARISING IN ANY WAY OUT OF THE USE OF THE SOFTWARE CODE, EVEN IF ADVISED OF THE -POSSIBILITY OF SUCH DAMAGE. -""" -import numpy -import os, json, datetime, sys -from operator import attrgetter -from azureml.core import Workspace -from azureml.core.model import Model -from azureml.core.image import Image -from azureml.core.webservice import Webservice -from azureml.core.authentication import AzureCliAuthentication - -cli_auth = AzureCliAuthentication() -# Get workspace -ws = Workspace.from_config(auth=cli_auth) - -# Get the AKS Details -try: - with open("aml_config/aks_webservice.json") as f: - config = json.load(f) -except: - print("No new model, thus no deployment on ACI") - # raise Exception('No new model to register as production model perform better') - sys.exit(0) - -service_name = config["aks_service_name"] -# Get the hosted web service -service = Webservice(workspace=ws, name=service_name) - -# Input for Model with all features -input_j = [[1, 2, 3, 4, 5, 6, 7, 8, 9, 10], [10, 9, 8, 7, 6, 5, 4, 3, 2, 1]] -print(input_j) -test_sample = json.dumps({"data": input_j}) -test_sample = bytes(test_sample, encoding="utf8") -try: - prediction = service.run(input_data=test_sample) - print(prediction) -except Exception as e: - result = str(e) - print(result) - raise Exception("AKS service is not working as expected") - -# Delete aci after test -service.delete() diff --git a/aml_service/helper/azcli.py b/aml_service/helper/azcli.py deleted file mode 100644 index 4affc1b3..00000000 --- a/aml_service/helper/azcli.py +++ /dev/null @@ -1,73 +0,0 @@ -""" -Copyright (C) Microsoft Corporation. All rights reserved.​ - ​ -Microsoft Corporation (“Microsoft”) grants you a nonexclusive, perpetual, -royalty-free right to use, copy, and modify the software code provided by us -("Software Code"). You may not sublicense the Software Code or any use of it -(except to your affiliates and to vendors to perform work on your behalf) -through distribution, network access, service agreement, lease, rental, or -otherwise. This license does not purport to express any claim of ownership over -data you may have shared with Microsoft in the creation of the Software Code. -Unless applicable law gives you more rights, Microsoft reserves all other -rights not expressly granted herein, whether by implication, estoppel or -otherwise. ​ - ​ -THE SOFTWARE CODE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS -OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL -MICROSOFT OR ITS LICENSORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR -BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER -IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -ARISING IN ANY WAY OUT OF THE USE OF THE SOFTWARE CODE, EVEN IF ADVISED OF THE -POSSIBILITY OF SUCH DAMAGE. -""" -import subprocess - - -def az_login(sp_user: str, sp_password: str, sp_tenant_id: str): - """ - Uses the provided service principal credentials to log into the azure cli. - This should always be the first step in executing az cli commands. - """ - cmd = "az login --service-principal --username {} --password {} --tenant {}" - out, err = run_cmd(cmd.format(sp_user, sp_password, sp_tenant_id)) - return out, err - - -def run_cmd(cmd: str): - """ - Runs an arbitrary command line command. Works for Linux or Windows. - Returns a tuple of output and error. - """ - proc = subprocess.Popen( - cmd, shell=True, stdout=subprocess.PIPE, universal_newlines=True - ) - output, error = proc.communicate() - if proc.returncode != 0: - print("Following command execution failed: {}".format(cmd)) - raise Exception("Operation Failed. Look at console logs for error info") - return output, error - - -def az_account_set(subscription_id: str): - """ - Sets the correct azure subscription. - This should always be run after the az_login. - """ - cmd = "az account set -s {}" - out, err = run_cmd(cmd.format(subscription_id)) - return out, err - - -def az_acr_create(resource_group: str, acr_name: str): - cmd = "az acr create --resource-group {} --name {} --sku Basic" - out, err = run_cmd(cmd.format(resource_group, acr_name)) - return out, err - - -def az_acr_login(acr_name: str): - cmd = "az acr login --name {}" - out, err = run_cmd(cmd.format(acr_name)) - return out, err diff --git a/azure-pipelines.yml b/azure-pipelines.yml deleted file mode 100644 index c3815408..00000000 --- a/azure-pipelines.yml +++ /dev/null @@ -1,68 +0,0 @@ -pool: - vmImage: 'Ubuntu 16.04' -#Your build pipeline references a secret variable named ‘sp_username’. Create or edit the build pipeline for this YAML file, define the variable on the Variables tab, and then select the option to make it secret. See https://go.microsoft.com/fwlink/?linkid=865972 -#Your build pipeline references a secret variable named ‘sp_password’. Create or edit the build pipeline for this YAML file, define the variable on the Variables tab, and then select the option to make it secret. See https://go.microsoft.com/fwlink/?linkid=865972 -#Your build pipeline references a secret variable named ‘sp_tenantid’. Create or edit the build pipeline for this YAML file, define the variable on the Variables tab, and then select the option to make it secret. See https://go.microsoft.com/fwlink/?linkid=865972 -#Your build pipeline references a secret variable named ‘subscription_id’. Create or edit the build pipeline for this YAML file, define the variable on the Variables tab, and then select the option to make it secret. See https://go.microsoft.com/fwlink/?linkid=865972 - -variables: -- group: AzureKeyVaultSecrets - -trigger: -- master -- releases/* -- develop - -steps: -- task: UsePythonVersion@0 - inputs: - versionSpec: '3.6' - architecture: 'x64' - -- task: Bash@3 - displayName: 'Install Requirements' - inputs: - targetType: filePath - filePath: 'environment_setup/install_requirements.sh' - workingDirectory: 'environment_setup' - -- script: | - az login --service-principal -u $(spidentity) -p $(spsecret) --tenant $(sptenant) - - displayName: 'Login to Azure' - -- script: | - sed -i 's#"subscription_id": "<>"#"subscription_id": "$(subscriptionid)"#g' aml_config/config.json - - displayName: 'replace subscription value' - -- script: 'pytest tests/unit/data_test.py' - displayName: 'Data Quality Check' - -- script: 'python aml_service/00-WorkSpace.py' - displayName: 'Get or Create Workspace' - -- script: 'python aml_service/03-AttachAmlCluster.py' - displayName: 'Create AML Compute Cluster' - -- script: 'python aml_service/04-AmlPipelines.py' - displayName: 'Create and Test AML Pipeline' - -- script: 'python aml_service/04-AmlPipelines.py --pipeline_action publish' - displayName: 'Publish AML Pipeline as Endpoint' - -- task: CopyFiles@2 - displayName: 'Copy Files to: $(Build.ArtifactStagingDirectory)' - inputs: - SourceFolder: '$(Build.SourcesDirectory)' - TargetFolder: '$(Build.ArtifactStagingDirectory)' - Contents: '**' - -- task: PublishBuildArtifacts@1 - displayName: 'Publish Artifact: devops-for-ai' - inputs: - ArtifactName: 'devops-for-ai' - publishLocation: 'container' - pathtoPublish: '$(Build.ArtifactStagingDirectory)' - TargetPath: '$(Build.ArtifactStagingDirectory)' - diff --git a/bootstrap/README.md b/bootstrap/README.md new file mode 100644 index 00000000..0841cc30 --- /dev/null +++ b/bootstrap/README.md @@ -0,0 +1,3 @@ +# Bootstrap from MLOpsPython repository + +For steps on how to use the bootstrap script, please see the "Bootstrap the project" section of the [custom model guide](../docs/custom_model.md#bootstrap-the-project). diff --git a/bootstrap/bootstrap.py b/bootstrap/bootstrap.py new file mode 100644 index 00000000..02f51bbc --- /dev/null +++ b/bootstrap/bootstrap.py @@ -0,0 +1,155 @@ +import os +import sys +import platform +import argparse +import re + + +class Helper: + + def __init__(self, project_directory, project_name): + self._project_directory = project_directory + self._project_name = project_name + self._git_repo = "https://github.com/microsoft/MLOpsPython.git" + + @property + def project_directory(self): + return self._project_directory + + @property + def project_name(self): + return self._project_name + + @property + def git_repo(self): + return self._git_repo + + def rename_files(self): + # Rename all files starting with diabetes_regression with project name + strtoreplace = "diabetes_regression" + dirs = [".pipelines", r"ml_service/pipelines"] + for dir in dirs: + normDir = os.path.normpath(dir) + dirpath = os.path.join(self._project_directory, normDir) + for filename in os.listdir(dirpath): + if(filename.find(strtoreplace) != -1): + src = os.path.join(self._project_directory, normDir, filename) # NOQA: E501 + dst = os.path.join(self._project_directory, + normDir, + filename.replace(strtoreplace, self._project_name, 1)) # NOQA: E501 + os.rename(src, dst) + + def rename_dir(self): + dir = "diabetes_regression" + src = os.path.join(self._project_directory, dir) + for path, subdirs, files in os.walk(src): + for name in files: + newPath = path.replace(dir, self._project_name) + if (not (os.path.exists(newPath))): + os.mkdir(newPath) + file_path = os.path.join(path, name) + new_name = os.path.join(newPath, name) + os.rename(file_path, new_name) + + def delete_dir(self): + # Delete unwanted directories + dirs = ["docs", r"diabetes_regression"] + if (platform.system() == "Windows"): + cmd = 'rmdir /S /Q "{}"' + else: + cmd = 'rm -r "{}"' + for dir in dirs: + os.system(cmd.format(os.path.join(self._project_directory, os.path.normpath(dir)))) # NOQA: E501 + + def clean_dir(self): + # Clean up directories + dirs = ["data", "experimentation"] + for dir in dirs: + for root, dirs, files in os.walk(os.path.join(self._project_directory, dir)): # NOQA: E501 + for file in files: + os.remove(os.path.join(root, file)) + + def validate_args(self): + # Validate arguments + if (os.path.isdir(self._project_directory) is False): + raise Exception("Not a valid directory. Please provide an absolute directory path.") # NOQA: E501 + if (len(self._project_name) < 3 or len(self._project_name) > 15): + raise Exception("Invalid project name length. Project name should be 3 to 15 chars long, letters and underscores only.") # NOQA: E501 + if (not re.search("^[\\w_]+$", self._project_name)): + raise Exception("Invalid characters in project name. Project name should be 3 to 15 chars long, letters and underscores only.") # NOQA: E501 + + +def replace_project_name(project_dir, project_name, rename_name): + # Replace instances of rename_name within files with project_name + files = [r".env.example", + r".pipelines/code-quality-template.yml", + r".pipelines/pr.yml", + r".pipelines/diabetes_regression-cd.yml", + r".pipelines/diabetes_regression-ci.yml", + r".pipelines/abtest.yml", + r".pipelines/diabetes_regression-ci-image.yml", + r".pipelines/diabetes_regression-publish-model-artifact-template.yml", # NOQA: E501 + r".pipelines/diabetes_regression-get-model-id-artifact-template.yml", # NOQA: E501 + r".pipelines/diabetes_regression-batchscoring-ci.yml", + r".pipelines/diabetes_regression-variables-template.yml", + r"environment_setup/Dockerfile", + r"environment_setup/install_requirements.sh", + r"ml_service/pipelines/diabetes_regression_build_parallel_batchscore_pipeline.py", # NOQA: E501 + r"ml_service/pipelines/diabetes_regression_build_train_pipeline_with_r_on_dbricks.py", # NOQA: E501 + r"ml_service/pipelines/diabetes_regression_build_train_pipeline_with_r.py", # NOQA: E501 + r"ml_service/pipelines/diabetes_regression_build_train_pipeline.py", # NOQA: E501 + r"ml_service/util/create_scoring_image.py", + r"diabetes_regression/conda_dependencies.yml", + r"diabetes_regression/evaluate/evaluate_model.py", + r"diabetes_regression/register/register_model.py", + r"diabetes_regression/training/test_train.py"] + + for file in files: + path = os.path.join(project_dir, os.path.normpath(file)) + try: + with open(path, "rt", encoding="utf8") as f_in: + data = f_in.read() + data = data.replace(rename_name, project_name) + with open(os.path.join(project_dir, file), "wt", encoding="utf8") as f_out: # NOQA: E501 + f_out.write(data) + except IOError as e: + print("Could not modify \"%s\". Is the MLOpsPython repo already cloned at \"%s\"?" % (path, project_dir)) # NOQA: E501 + raise e + + +def main(args): + parser = argparse.ArgumentParser(description='New Template') + parser.add_argument("-d", + "--directory", + type=str, + required=True, + help="Absolute path to new project direcory") + parser.add_argument("-n", + "--name", + type=str, + required=True, + help="Name of the project [3-15 chars, letters and underscores only]") # NOQA: E501 + try: + args = parser.parse_args() + + project_directory = args.directory + project_name = args.name + + helper = Helper(project_directory, project_name) + helper.validate_args() + helper.clean_dir() + + replace_project_name(project_directory, project_name, "diabetes_regression") # NOQA: E501 + replace_project_name(project_directory, project_name, "diabetes") + + helper.rename_files() + helper.rename_dir() + helper.delete_dir() + except Exception as e: + print(e) + + return 0 + + +if '__main__' == __name__: + sys.exit(main(sys.argv)) diff --git a/charts/abtest-istio/Chart.yaml b/charts/abtest-istio/Chart.yaml new file mode 100644 index 00000000..bfcf8584 --- /dev/null +++ b/charts/abtest-istio/Chart.yaml @@ -0,0 +1,5 @@ +apiVersion: v1 +appVersion: "1.0" +description: A Helm chart for Kubernetes +name: abtest-istio +version: 0.1.0 diff --git a/charts/abtest-istio/templates/istio-canary.yaml b/charts/abtest-istio/templates/istio-canary.yaml new file mode 100644 index 00000000..a030fd0d --- /dev/null +++ b/charts/abtest-istio/templates/istio-canary.yaml @@ -0,0 +1,60 @@ +apiVersion: networking.istio.io/v1alpha3 +kind: Gateway +metadata: + name: mlmodel-gateway + namespace: abtesting +spec: + selector: + istio: ingressgateway + servers: + - port: + number: {{ .Values.ingress.port }} + name: http + protocol: HTTP + hosts: + - "*" +--- +apiVersion: networking.istio.io/v1alpha3 +kind: VirtualService +metadata: + name: mlmodel-virtualservice + namespace: abtesting +spec: + gateways: + - mlmodel-gateway + hosts: + - '*' + http: + - match: + - uri: + prefix: /score + headers: + x-api-version: + exact: 'blue' + route: + - destination: + host: {{ .Values.svc.name }}-blue.abtesting.svc.cluster.local + port: + number: {{ .Values.svc.port }} + - match: + - uri: + prefix: /score + headers: + x-api-version: + exact: 'green' + route: + - destination: + host: {{ .Values.svc.name }}-green.abtesting.svc.cluster.local + port: + number: {{ .Values.svc.port }} + - route: + - destination: + host: {{ .Values.svc.name }}-green.abtesting.svc.cluster.local + port: + number: {{ .Values.svc.port }} + weight: {{ .Values.weight.green }} + - destination: + host: {{ .Values.svc.name }}-blue.abtesting.svc.cluster.local + port: + number: {{ .Values.svc.port }} + weight: {{ .Values.weight.blue }} \ No newline at end of file diff --git a/charts/abtest-istio/values.yaml b/charts/abtest-istio/values.yaml new file mode 100644 index 00000000..014845bc --- /dev/null +++ b/charts/abtest-istio/values.yaml @@ -0,0 +1,15 @@ +ingress: + port: 80 + +svc: + port: 5001 + name: model-svc + + +weight: + green: 50 + blue: 50 + +uri: + prefix: /score + diff --git a/charts/abtest-model/Chart.yaml b/charts/abtest-model/Chart.yaml new file mode 100644 index 00000000..eeaa24bf --- /dev/null +++ b/charts/abtest-model/Chart.yaml @@ -0,0 +1,5 @@ +apiVersion: v1 +appVersion: "1.0" +description: A Helm chart for Kubernetes +name: abtest-model +version: 0.1.0 diff --git a/charts/abtest-model/templates/deployment.yaml b/charts/abtest-model/templates/deployment.yaml new file mode 100644 index 00000000..78d01cc4 --- /dev/null +++ b/charts/abtest-model/templates/deployment.yaml @@ -0,0 +1,30 @@ +apiVersion: apps/v1 +kind: Deployment +metadata: + name: {{ .Values.deployment.name }} + namespace: {{ .Values.namespace }} + labels: + app: {{ .Values.appname }} + model_version: {{ .Values.deployment.bluegreen }} +spec: + replicas: 1 + selector: + matchLabels: + app: {{ .Values.appname }} + model_version: {{ .Values.deployment.bluegreen }} + template: + metadata: + labels: + app: {{ .Values.appname }} + model_version: {{ .Values.deployment.bluegreen }} + spec: + containers: + - name: {{ .Values.deployment.container.name }} + image: "{{ .Values.deployment.image.name }}" + imagePullPolicy: Always + ports: + - name: http + containerPort: 5001 + - name: probe + containerPort: 8086 + diff --git a/charts/abtest-model/templates/service.yaml b/charts/abtest-model/templates/service.yaml new file mode 100644 index 00000000..a4a6ed8b --- /dev/null +++ b/charts/abtest-model/templates/service.yaml @@ -0,0 +1,13 @@ +apiVersion: v1 +kind: Service +metadata: + name: "{{ .Values.svc.name }}-{{ .Values.deployment.bluegreen }}" + namespace: {{ .Values.namespace }} +spec: + selector: + app: {{ .Values.appname }} + model_version: {{ .Values.deployment.bluegreen }} + ports: + - port: {{ .Values.svc.port }} + targetPort: {{ .Values.deployment.container.port }} + \ No newline at end of file diff --git a/charts/abtest-model/values.yaml b/charts/abtest-model/values.yaml new file mode 100644 index 00000000..c3ab1b60 --- /dev/null +++ b/charts/abtest-model/values.yaml @@ -0,0 +1,13 @@ +namespace: abtesting +appname: model + +deployment: + name: model-green + bluegreen: green + container: + name: model + port: 5001 + +svc: + name: model-svc + port: 5001 \ No newline at end of file diff --git a/charts/load_test.sh b/charts/load_test.sh new file mode 100755 index 00000000..25a06452 --- /dev/null +++ b/charts/load_test.sh @@ -0,0 +1,8 @@ +#!/bin/bash + +for ((i=1;i<=$1;i++)) +do + curl --header "x-api-version: $3" $2 + echo + sleep .2 +done \ No newline at end of file diff --git a/code/evaluate/evaluate_model.py b/code/evaluate/evaluate_model.py deleted file mode 100644 index d3ba5af0..00000000 --- a/code/evaluate/evaluate_model.py +++ /dev/null @@ -1,126 +0,0 @@ -""" -Copyright (C) Microsoft Corporation. All rights reserved.​ - ​ -Microsoft Corporation (“Microsoft”) grants you a nonexclusive, perpetual, -royalty-free right to use, copy, and modify the software code provided by us -("Software Code"). You may not sublicense the Software Code or any use of it -(except to your affiliates and to vendors to perform work on your behalf) -through distribution, network access, service agreement, lease, rental, or -otherwise. This license does not purport to express any claim of ownership over -data you may have shared with Microsoft in the creation of the Software Code. -Unless applicable law gives you more rights, Microsoft reserves all other -rights not expressly granted herein, whether by implication, estoppel or -otherwise. ​ - ​ -THE SOFTWARE CODE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS -OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL -MICROSOFT OR ITS LICENSORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR -BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER -IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -ARISING IN ANY WAY OUT OF THE USE OF THE SOFTWARE CODE, EVEN IF ADVISED OF THE -POSSIBILITY OF SUCH DAMAGE. -""" -import os, json -from azureml.core import Workspace -from azureml.core import Experiment -from azureml.core.model import Model -import azureml.core -from azureml.core import Run -import argparse - - -# Get workspace -# ws = Workspace.from_config() -run = Run.get_context() -exp = run.experiment -ws = run.experiment.workspace - - -parser = argparse.ArgumentParser("evaluate") -parser.add_argument( - "--config_suffix", type=str, help="Datetime suffix for json config files" -) -parser.add_argument( - "--json_config", - type=str, - help="Directory to write all the intermediate json configs", -) -args = parser.parse_args() - -print("Argument 1: %s" % args.config_suffix) -print("Argument 2: %s" % args.json_config) - -if not (args.json_config is None): - os.makedirs(args.json_config, exist_ok=True) - print("%s created" % args.json_config) -# Paramaterize the matrics on which the models should be compared -# Add golden data set on which all the model performance can be evaluated - -# Get the latest run_id -# with open("aml_config/run_id.json") as f: -# config = json.load(f) - -train_run_id_json = "run_id_{}.json".format(args.config_suffix) -train_output_path = os.path.join(args.json_config, train_run_id_json) -with open(train_output_path) as f: - config = json.load(f) - -# parser = argparse.ArgumentParser() -# parser.add_argument('--train_run_id',type=str,default='',help='Run id of the newly trained model') -# #parser.add_argument('--model_assets_path',type=str,default='outputs',help='Location of trained model.') - - -new_model_run_id = config["run_id"] # args.train_run_id -experiment_name = config["experiment_name"] -# exp = Experiment(workspace=ws, name=experiment_name) - - -try: - # Get most recently registered model, we assume that is the model in production. Download this model and compare it with the recently trained model by running test with same data set. - model_list = Model.list(ws) - production_model = next( - filter( - lambda x: x.created_time == max(model.created_time for model in model_list), - model_list, - ) - ) - production_model_run_id = production_model.tags.get("run_id") - run_list = exp.get_runs() - # production_model_run = next(filter(lambda x: x.id == production_model_run_id, run_list)) - - # Get the run history for both production model and newly trained model and compare mse - production_model_run = Run(exp, run_id=production_model_run_id) - new_model_run = Run(exp, run_id=new_model_run_id) - - production_model_mse = production_model_run.get_metrics().get("mse") - new_model_mse = new_model_run.get_metrics().get("mse") - print( - "Current Production model mse: {}, New trained model mse: {}".format( - production_model_mse, new_model_mse - ) - ) - - promote_new_model = False - if new_model_mse < production_model_mse: - promote_new_model = True - print("New trained model performs better, thus it will be registered") -except: - promote_new_model = True - print("This is the first model to be trained, thus nothing to evaluate for now") - -run_id = {} -run_id["run_id"] = "" -# Writing the run id to /aml_config/run_id.json -if promote_new_model: - run_id["run_id"] = new_model_run_id - # register new model - # new_model_run.register_model(model_name='',model_path='outputs/sklearn_regression_model.pkl') - -run_id["experiment_name"] = experiment_name -filename = "run_id_{}.json".format(args.config_suffix) -output_path = os.path.join(args.json_config, filename) -with open(output_path, "w") as outfile: - json.dump(run_id, outfile) diff --git a/code/register/register_model.py b/code/register/register_model.py deleted file mode 100644 index fe1ae9c5..00000000 --- a/code/register/register_model.py +++ /dev/null @@ -1,119 +0,0 @@ -""" -Copyright (C) Microsoft Corporation. All rights reserved.​ - ​ -Microsoft Corporation (“Microsoft”) grants you a nonexclusive, perpetual, -royalty-free right to use, copy, and modify the software code provided by us -("Software Code"). You may not sublicense the Software Code or any use of it -(except to your affiliates and to vendors to perform work on your behalf) -through distribution, network access, service agreement, lease, rental, or -otherwise. This license does not purport to express any claim of ownership over -data you may have shared with Microsoft in the creation of the Software Code. -Unless applicable law gives you more rights, Microsoft reserves all other -rights not expressly granted herein, whether by implication, estoppel or -otherwise. ​ - ​ -THE SOFTWARE CODE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS -OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL -MICROSOFT OR ITS LICENSORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR -BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER -IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -ARISING IN ANY WAY OUT OF THE USE OF THE SOFTWARE CODE, EVEN IF ADVISED OF THE -POSSIBILITY OF SUCH DAMAGE. -""" -import os, json, sys -from azureml.core import Workspace -from azureml.core import Run -from azureml.core import Experiment -from azureml.core.model import Model -import argparse - -from azureml.core.runconfig import RunConfiguration -from azureml.core.authentication import AzureCliAuthentication - -cli_auth = AzureCliAuthentication() - -# Get workspace -# ws = Workspace.from_config(auth=cli_auth) -run = Run.get_context() -exp = run.experiment -ws = run.experiment.workspace - -parser = argparse.ArgumentParser("register") -parser.add_argument( - "--config_suffix", type=str, help="Datetime suffix for json config files" -) -parser.add_argument( - "--json_config", - type=str, - help="Directory to write all the intermediate json configs", -) -args = parser.parse_args() - -print("Argument 1: %s" % args.config_suffix) -print("Argument 2: %s" % args.json_config) - -if not (args.json_config is None): - os.makedirs(args.json_config, exist_ok=True) - print("%s created" % args.json_config) - -evaluate_run_id_json = "run_id_{}.json".format(args.config_suffix) -evaluate_output_path = os.path.join(args.json_config, evaluate_run_id_json) - -# Get the latest evaluation result -try: - with open(evaluate_output_path) as f: - config = json.load(f) - if not config["run_id"]: - raise Exception("No new model to register as production model perform better") -except: - print("No new model to register as production model perform better") - # raise Exception('No new model to register as production model perform better') - sys.exit(0) - -run_id = config["run_id"] -experiment_name = config["experiment_name"] -# exp = Experiment(workspace=ws, name=experiment_name) - -run = Run(experiment=exp, run_id=run_id) -names = run.get_file_names -names() -print("Run ID for last run: {}".format(run_id)) -model_local_dir = "model" -os.makedirs(model_local_dir, exist_ok=True) - -# Download Model to Project root directory -model_name = "sklearn_regression_model.pkl" -run.download_file( - name="./outputs/" + model_name, output_file_path="./model/" + model_name -) -print("Downloaded model {} to Project root directory".format(model_name)) -os.chdir("./model") -model = Model.register( - model_path=model_name, # this points to a local file - model_name=model_name, # this is the name the model is registered as - tags={"area": "diabetes", "type": "regression", "run_id": run_id}, - description="Regression model for diabetes dataset", - workspace=ws, -) -os.chdir("..") -print( - "Model registered: {} \nModel Description: {} \nModel Version: {}".format( - model.name, model.description, model.version - ) -) - -# Remove the evaluate.json as we no longer need it -# os.remove("aml_config/evaluate.json") - -# Writing the registered model details to /aml_config/model.json -model_json = {} -model_json["model_name"] = model.name -model_json["model_version"] = model.version -model_json["run_id"] = run_id -filename = "model_{}.json".format(args.config_suffix) -output_path = os.path.join(args.json_config, filename) -with open(output_path, "w") as outfile: - json.dump(model_json, outfile) diff --git a/code/scoring/create_scoring_image.py b/code/scoring/create_scoring_image.py deleted file mode 100644 index 1aafade1..00000000 --- a/code/scoring/create_scoring_image.py +++ /dev/null @@ -1,124 +0,0 @@ -""" -Copyright (C) Microsoft Corporation. All rights reserved.​ - ​ -Microsoft Corporation (“Microsoft”) grants you a nonexclusive, perpetual, -royalty-free right to use, copy, and modify the software code provided by us -("Software Code"). You may not sublicense the Software Code or any use of it -(except to your affiliates and to vendors to perform work on your behalf) -through distribution, network access, service agreement, lease, rental, or -otherwise. This license does not purport to express any claim of ownership over -data you may have shared with Microsoft in the creation of the Software Code. -Unless applicable law gives you more rights, Microsoft reserves all other -rights not expressly granted herein, whether by implication, estoppel or -otherwise. ​ - ​ -THE SOFTWARE CODE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS -OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL -MICROSOFT OR ITS LICENSORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR -BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER -IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -ARISING IN ANY WAY OUT OF THE USE OF THE SOFTWARE CODE, EVEN IF ADVISED OF THE -POSSIBILITY OF SUCH DAMAGE. -""" -import os, json, sys -import argparse -from azureml.core import Workspace -from azureml.core.image import ContainerImage, Image -from azureml.core import Run -from azureml.core.model import Model -from azureml.core.authentication import AzureCliAuthentication - -cli_auth = AzureCliAuthentication() - -run = Run.get_context() -if "OfflineRun" in run.id: - print("offline run") - # Get workspace - ws = Workspace.from_config(auth=cli_auth) -else: - exp = run.experiment - ws = run.experiment.workspace - -# Get the latest model details - -parser = argparse.ArgumentParser("scoring_image") -parser.add_argument( - "--config_suffix", type=str, help="Datetime suffix for json config files" -) -parser.add_argument( - "--json_config", - type=str, - help="Directory to write all the intermediate json configs", -) -args = parser.parse_args() - -register_model_json = "model_{}.json".format(args.config_suffix) -register_output_path = os.path.join(args.json_config, register_model_json) - - -try: - with open(register_output_path) as f: - config = json.load(f) -except: - print("No new model to register thus no need to create new scoring image") - # raise Exception('No new model to register as production model perform better') - sys.exit(0) - -model_name = config["model_name"] -model_version = config["model_version"] - -model_list = Model.list(workspace=ws) -model, = (m for m in model_list if m.version == model_version and m.name == model_name) -print( - "Model picked: {} \nModel Description: {} \nModel Version: {}".format( - model.name, model.description, model.version - ) -) - -os.chdir("scoring") -image_name = "diabetes-model-score" - -image_config = ContainerImage.image_configuration( - execution_script="score.py", - runtime="python-slim", - conda_file="conda_dependencies.yml", - description="Image with ridge regression model", - tags={"area": "diabetes", "type": "regression"}, -) - -image = Image.create( - name=image_name, models=[model], image_config=image_config, workspace=ws -) - -image.wait_for_creation(show_output=True) -os.chdir("..") - -if image.creation_state != "Succeeded": - raise Exception("Image creation status: {image.creation_state}") - -print( - "{}(v.{} [{}]) stored at {} with build log {}".format( - image.name, - image.version, - image.creation_state, - image.image_location, - image.image_build_log_uri, - ) -) - -# Writing the image details to /aml_config/image.json -image_json = {} -image_json["image_name"] = image.name -image_json["image_version"] = image.version -image_json["image_location"] = image.image_location -# with open("aml_config/image.json", "w") as outfile: -# json.dump(image_json, outfile) -filename = "image_{}.json".format(args.config_suffix) -output_path = os.path.join(args.json_config, filename) -with open(output_path, "w") as outfile: - json.dump(image_json, outfile) - -# How to fix the schema for a model, like if we have multiple models expecting different schema, diff --git a/code/scoring/score.py b/code/scoring/score.py deleted file mode 100644 index 994ca24a..00000000 --- a/code/scoring/score.py +++ /dev/null @@ -1,58 +0,0 @@ -""" -Copyright (C) Microsoft Corporation. All rights reserved.​ - ​ -Microsoft Corporation (“Microsoft”) grants you a nonexclusive, perpetual, -royalty-free right to use, copy, and modify the software code provided by us -("Software Code"). You may not sublicense the Software Code or any use of it -(except to your affiliates and to vendors to perform work on your behalf) -through distribution, network access, service agreement, lease, rental, or -otherwise. This license does not purport to express any claim of ownership over -data you may have shared with Microsoft in the creation of the Software Code. -Unless applicable law gives you more rights, Microsoft reserves all other -rights not expressly granted herein, whether by implication, estoppel or -otherwise. ​ - ​ -THE SOFTWARE CODE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS -OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL -MICROSOFT OR ITS LICENSORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR -BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER -IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -ARISING IN ANY WAY OUT OF THE USE OF THE SOFTWARE CODE, EVEN IF ADVISED OF THE -POSSIBILITY OF SUCH DAMAGE. -""" -import pickle -import json -import numpy -from sklearn.ensemble import RandomForestClassifier -from azureml.core.model import Model - - -def init(): - global model - from sklearn.externals import joblib - - # load the model from file into a global object - model_path = Model.get_model_path(model_name="sklearn_regression_model.pkl") - model = joblib.load(model_path) - - -def run(raw_data): - try: - data = json.loads(raw_data)["data"] - data = numpy.array(data) - result = model.predict(data) - return json.dumps({"result": result.tolist()}) - except Exception as e: - result = str(e) - return json.dumps({"error": result}) - - -if __name__ == "__main__": - # Test scoring - init() - test_row = '{"data":[[1,2,3,4,5,6,7,8,9,10],[10,9,8,7,6,5,4,3,2,1]]}' - prediction = run(test_row) - print("Test result: ", prediction) diff --git a/code/training/train.py b/code/training/train.py deleted file mode 100644 index 19f24877..00000000 --- a/code/training/train.py +++ /dev/null @@ -1,109 +0,0 @@ -""" -Copyright (C) Microsoft Corporation. All rights reserved.​ - ​ -Microsoft Corporation (“Microsoft”) grants you a nonexclusive, perpetual, -royalty-free right to use, copy, and modify the software code provided by us -("Software Code"). You may not sublicense the Software Code or any use of it -(except to your affiliates and to vendors to perform work on your behalf) -through distribution, network access, service agreement, lease, rental, or -otherwise. This license does not purport to express any claim of ownership over -data you may have shared with Microsoft in the creation of the Software Code. -Unless applicable law gives you more rights, Microsoft reserves all other -rights not expressly granted herein, whether by implication, estoppel or -otherwise. ​ - ​ -THE SOFTWARE CODE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS -OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL -MICROSOFT OR ITS LICENSORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR -BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER -IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -ARISING IN ANY WAY OUT OF THE USE OF THE SOFTWARE CODE, EVEN IF ADVISED OF THE -POSSIBILITY OF SUCH DAMAGE. -""" -import pickle -from azureml.core import Workspace -from azureml.core.run import Run -import os -import argparse -from sklearn.datasets import load_diabetes -from sklearn.linear_model import Ridge -from sklearn.metrics import mean_squared_error -from sklearn.model_selection import train_test_split -from sklearn.externals import joblib -import numpy as np -import json -import subprocess -from typing import Tuple, List - - -parser = argparse.ArgumentParser("train") -parser.add_argument( - "--config_suffix", type=str, help="Datetime suffix for json config files" -) -parser.add_argument( - "--json_config", - type=str, - help="Directory to write all the intermediate json configs", -) -args = parser.parse_args() - -print("Argument 1: %s" % args.config_suffix) -print("Argument 2: %s" % args.json_config) - -if not (args.json_config is None): - os.makedirs(args.json_config, exist_ok=True) - print("%s created" % args.json_config) - -run = Run.get_context() -exp = run.experiment -ws = run.experiment.workspace - -X, y = load_diabetes(return_X_y=True) -columns = ["age", "gender", "bmi", "bp", "s1", "s2", "s3", "s4", "s5", "s6"] -X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0) -data = {"train": {"X": X_train, "y": y_train}, "test": {"X": X_test, "y": y_test}} - -print("Running train.py") - -# Randomly pic alpha -alphas = np.arange(0.0, 1.0, 0.05) -alpha = alphas[np.random.choice(alphas.shape[0], 1, replace=False)][0] -print(alpha) -run.log("alpha", alpha) -reg = Ridge(alpha=alpha) -reg.fit(data["train"]["X"], data["train"]["y"]) -preds = reg.predict(data["test"]["X"]) -run.log("mse", mean_squared_error(preds, data["test"]["y"])) - - -# Save model as part of the run history -model_name = "sklearn_regression_model.pkl" -# model_name = "." - -with open(model_name, "wb") as file: - joblib.dump(value=reg, filename=model_name) - -# upload the model file explicitly into artifacts -run.upload_file(name="./outputs/" + model_name, path_or_stream=model_name) -print("Uploaded the model {} to experiment {}".format(model_name, run.experiment.name)) -dirpath = os.getcwd() -print(dirpath) -print("Following files are uploaded ") -print(run.get_file_names()) - -# register the model -# run.log_model(file_name = model_name) -# print('Registered the model {} to run history {}'.format(model_name, run.history.name)) - -run_id = {} -run_id["run_id"] = run.id -run_id["experiment_name"] = run.experiment.name -filename = "run_id_{}.json".format(args.config_suffix) -output_path = os.path.join(args.json_config, filename) -with open(output_path, "w") as outfile: - json.dump(run_id, outfile) - -run.complete() \ No newline at end of file diff --git a/data/README.md b/data/README.md new file mode 100644 index 00000000..d43d139c --- /dev/null +++ b/data/README.md @@ -0,0 +1,3 @@ +This folder is used for example data, and it is not meant to be used for storing training data. + +Follow steps to [Configure Training Data](../docs/custom_model.md#Configure-Custom-Training) to use your own data for training. \ No newline at end of file diff --git a/tests/unit/data_test.py b/data/data_test.py similarity index 92% rename from tests/unit/data_test.py rename to data/data_test.py index ad5c28ba..6d7d2ddf 100644 --- a/tests/unit/data_test.py +++ b/data/data_test.py @@ -34,7 +34,8 @@ def get_absPath(filename): """Returns the path of the notebooks folder""" path = os.path.abspath( os.path.join( - os.path.dirname(__file__), os.path.pardir, os.path.pardir, "data", filename + os.path.dirname( + __file__), os.path.pardir, "data", filename ) ) return path @@ -119,6 +120,8 @@ def test_check_distribution(): mean = np.mean(dataset.values, axis=0) std = np.mean(dataset.values, axis=0) assert ( - np.sum(abs(mean - historical_mean) > shift_tolerance * abs(historical_mean)) - or np.sum(abs(std - historical_std) > shift_tolerance * abs(historical_std)) > 0 + np.sum(abs(mean - historical_mean) + > shift_tolerance * abs(historical_mean)) + or np.sum(abs(std - historical_std) + > shift_tolerance * abs(historical_std)) > 0 ) diff --git a/diabetes_regression/.amlignore b/diabetes_regression/.amlignore new file mode 100644 index 00000000..e8705e07 --- /dev/null +++ b/diabetes_regression/.amlignore @@ -0,0 +1,10 @@ +# To prevent unnecessary files from being included in +# the snapshot, make an ignore file (.gitignore or .amlignore). +# Place this file in the Snapshot directory and add the +# filenames to ignore in it. The .amlignore file uses +# the same syntax and patterns as the .gitignore file. +# If both files exist, the .amlignore file takes precedence. + +# We use yml files to configure deployment, +# but we are not deploying them to compute +*.yml diff --git a/diabetes_regression/ci_dependencies.yml b/diabetes_regression/ci_dependencies.yml new file mode 100644 index 00000000..73086471 --- /dev/null +++ b/diabetes_regression/ci_dependencies.yml @@ -0,0 +1,29 @@ +name: mlopspython_ci + +dependencies: + # The python interpreter version. + - python=3.7.* + + # dependencies with versions aligned with conda_dependencies.yml. + - numpy=1.18.* + - pandas=1.0.* + - scikit-learn=0.22.* + # dependencies for MLOps with R. + - r=3.6.0 + - r-essentials=3.6.0 + + - conda-forge::jq + - pip=20.0.* + + - pip: + # dependencies with versions aligned with conda_dependencies.yml. + - azureml-sdk==1.27.* + + # Additional pip dependencies for the CI environment. + - pytest==5.4.* + - pytest-cov==2.8.* + - requests==2.23.* + - python-dotenv==0.12.* + - flake8==3.7.* + - flake8_formatter_junit_xml==0.0.* + - azure-cli==2.3.* diff --git a/code/scoring/conda_dependencies.yml b/diabetes_regression/conda_dependencies.yml similarity index 67% rename from code/scoring/conda_dependencies.yml rename to diabetes_regression/conda_dependencies.yml index 9bca0710..e214c7b2 100644 --- a/code/scoring/conda_dependencies.yml +++ b/diabetes_regression/conda_dependencies.yml @@ -1,49 +1,39 @@ # Conda environment specification. The dependencies defined in this file will - # be automatically provisioned for managed runs. These include runs against - # the localdocker, remotedocker, and cluster compute targets. - # Note that this file is NOT used to automatically manage dependencies for the - # local compute target. To provision these dependencies locally, run: - # conda env update --file conda_dependencies.yml - # Details about the Conda environment file format: - # https://conda.io/docs/using/envs.html#create-environment-file-by-hand - # For managing Spark packages and configuration, see spark_dependencies.yml. - - # Version of this configuration file's structure and semantics in AzureML. - # This directive is stored in a comment to preserve the Conda file structure. - # [AzureMlVersion] = 2 - -name: project_environment +name: diabetes_regression_training_env dependencies: # The python interpreter version. - # Currently Azure ML Workbench only supports 3.5.2 and later. + - python=3.7.* + - pip + + - pip: + # Base AzureML SDK + - azureml-sdk==1.27.* -- python=3.6.2 - # Required by azureml-defaults, installed separately through Conda to + # Must match AzureML SDK version. + # https://docs.microsoft.com/en-us/azure/machine-learning/concept-environments + - azureml-defaults==1.27.* - # get a prebuilt version and not require build tools for the install. + # Training deps + - scikit-learn -- psutil=5.3 + # Scoring deps + - inference-schema[numpy-support] -- pip: - # Required packages for AzureML execution, history, and data preparation. - - azureml-sdk[notebooks] # add the version to lock it ==0.1.74 - - scipy==1.0.0 - - scikit-learn==0.19.1 - - pandas==0.23.1 - - numpy==1.14.5 \ No newline at end of file + # MLOps with R + - azure-storage-blob diff --git a/aml_config/conda_dependencies.yml b/diabetes_regression/conda_dependencies_scorecopy.yml similarity index 68% rename from aml_config/conda_dependencies.yml rename to diabetes_regression/conda_dependencies_scorecopy.yml index 48505e28..9ed22ccd 100644 --- a/aml_config/conda_dependencies.yml +++ b/diabetes_regression/conda_dependencies_scorecopy.yml @@ -1,50 +1,31 @@ # Conda environment specification. The dependencies defined in this file will - # be automatically provisioned for managed runs. These include runs against - # the localdocker, remotedocker, and cluster compute targets. - # Note that this file is NOT used to automatically manage dependencies for the - # local compute target. To provision these dependencies locally, run: - # conda env update --file conda_dependencies.yml - # Details about the Conda environment file format: - # https://conda.io/docs/using/envs.html#create-environment-file-by-hand - # For managing Spark packages and configuration, see spark_dependencies.yml. - - # Version of this configuration file's structure and semantics in AzureML. - # This directive is stored in a comment to preserve the Conda file structure. - # [AzureMlVersion] = 2 - -name: project_environment +# These dependencies are used to create the environment used by the batch score +# copy pipeline step +name: diabetes_regression_score_copy_env dependencies: # The python interpreter version. - # Currently Azure ML Workbench only supports 3.5.2 and later. - -- python=3.6.2 - # Required by azureml-defaults, installed separately through Conda to - - # get a prebuilt version and not require build tools for the install. - -- psutil=5.3 - -- pip: - # Required packages for AzureML execution, history, and data preparation. - - azureml-sdk[notebooks] - - pynacl==1.2.1 - - scipy==1.0.0 - - scikit-learn==0.19.1 - - pandas==0.23.1 - - numpy==1.14.5 \ No newline at end of file + - python=3.7.* + - pip + + - pip: + # Base AzureML SDK + - azureml-sdk==1.27.* + + # Score copying deps + - azure-storage-blob diff --git a/diabetes_regression/conda_dependencies_scoring.yml b/diabetes_regression/conda_dependencies_scoring.yml new file mode 100644 index 00000000..e744b369 --- /dev/null +++ b/diabetes_regression/conda_dependencies_scoring.yml @@ -0,0 +1,32 @@ +# Conda environment specification. The dependencies defined in this file will +# be automatically provisioned for managed runs. These include runs against +# the localdocker, remotedocker, and cluster compute targets. + +# Note that this file is NOT used to automatically manage dependencies for the +# local compute target. To provision these dependencies locally, run: +# conda env update --file conda_dependencies.yml + +# Details about the Conda environment file format: +# https://conda.io/docs/using/envs.html#create-environment-file-by-hand + +# For managing Spark packages and configuration, see spark_dependencies.yml. +# Version of this configuration file's structure and semantics in AzureML. +# This directive is stored in a comment to preserve the Conda file structure. +# [AzureMlVersion] = 2 + +# These dependencies are used to create the environment used by the batch score +# pipeline step +name: diabetes_regression_scoring_env +dependencies: + # The python interpreter version. + # Currently Azure ML Workbench only supports 3.5.2 and later. + - python=3.7.* + - pip + + - pip: + # Base AzureML SDK + - azureml-sdk==1.27.* + + # Scoring deps + - scikit-learn + - pandas diff --git a/diabetes_regression/evaluate/evaluate_model.py b/diabetes_regression/evaluate/evaluate_model.py new file mode 100644 index 00000000..d1ff3c6a --- /dev/null +++ b/diabetes_regression/evaluate/evaluate_model.py @@ -0,0 +1,154 @@ +""" +Copyright (C) Microsoft Corporation. All rights reserved.​ + ​ +Microsoft Corporation (“Microsoft”) grants you a nonexclusive, perpetual, +royalty-free right to use, copy, and modify the software code provided by us +("Software Code"). You may not sublicense the Software Code or any use of it +(except to your affiliates and to vendors to perform work on your behalf) +through distribution, network access, service agreement, lease, rental, or +otherwise. This license does not purport to express any claim of ownership over +data you may have shared with Microsoft in the creation of the Software Code. +Unless applicable law gives you more rights, Microsoft reserves all other +rights not expressly granted herein, whether by implication, estoppel or +otherwise. ​ + ​ +THE SOFTWARE CODE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS +OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +MICROSOFT OR ITS LICENSORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR +BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER +IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +ARISING IN ANY WAY OUT OF THE USE OF THE SOFTWARE CODE, EVEN IF ADVISED OF THE +POSSIBILITY OF SUCH DAMAGE. +""" +from azureml.core import Run +import argparse +import traceback +from util.model_helper import get_model + +run = Run.get_context() + +# if you would like to run this script on a local computer +# the following code is a good starting point for you +# use +# python -m evaluate.evaluate_model +# in diabetes_regression folder context + +# if (run.id.startswith('OfflineRun')): +# from dotenv import load_dotenv +# # For local development, set values in this section +# load_dotenv() +# sources_dir = os.environ.get("SOURCES_DIR_TRAIN") +# if (sources_dir is None): +# sources_dir = 'diabetes_regression' +# path_to_util = os.path.join(".", sources_dir, "util") +# sys.path.append(os.path.abspath(path_to_util)) # NOQA: E402 +# from model_helper import get_model +# workspace_name = os.environ.get("WORKSPACE_NAME") +# experiment_name = os.environ.get("EXPERIMENT_NAME") +# resource_group = os.environ.get("RESOURCE_GROUP") +# subscription_id = os.environ.get("SUBSCRIPTION_ID") +# tenant_id = os.environ.get("TENANT_ID") +# model_name = os.environ.get("MODEL_NAME") +# app_id = os.environ.get('SP_APP_ID') +# app_secret = os.environ.get('SP_APP_SECRET') +# build_id = os.environ.get('BUILD_BUILDID') +# # run_id useful to query previous runs +# run_id = "57fee47f-5ae8-441c-bc0c-d4c371f32d70" + +# aml_workspace = Workspace.get( +# name=workspace_name, +# subscription_id=subscription_id, +# resource_group=resource_group +# ) +# ws = aml_workspace +# exp = Experiment(ws, experiment_name) + +# comment the following three lines +# if you would like to use Offline mode +exp = run.experiment +ws = run.experiment.workspace +run_id = 'amlcompute' + +parser = argparse.ArgumentParser("evaluate") + +parser.add_argument( + "--run_id", + type=str, + help="Training run ID", +) +parser.add_argument( + "--model_name", + type=str, + help="Name of the Model", + default="diabetes_model.pkl", +) + +parser.add_argument( + "--allow_run_cancel", + type=str, + help="Set this to false to avoid evaluation step from cancelling run after an unsuccessful evaluation", # NOQA: E501 + default="true", +) + +args = parser.parse_args() +if (args.run_id is not None): + run_id = args.run_id +if (run_id == 'amlcompute'): + run_id = run.parent.id +model_name = args.model_name +metric_eval = "mse" + +allow_run_cancel = args.allow_run_cancel +# Parameterize the matrices on which the models should be compared +# Add golden data set on which all the model performance can be evaluated +try: + firstRegistration = False + tag_name = 'experiment_name' + + model = get_model( + model_name=model_name, + tag_name=tag_name, + tag_value=exp.name, + aml_workspace=ws) + + if (model is not None): + production_model_mse = 10000 + if (metric_eval in model.tags): + production_model_mse = float(model.tags[metric_eval]) + try: + new_model_mse = float(run.parent.get_metrics().get(metric_eval)) + except TypeError: + new_model_mse = None + if (production_model_mse is None or new_model_mse is None): + print("Unable to find ", metric_eval, " metrics, " + "exiting evaluation") + if((allow_run_cancel).lower() == 'true'): + run.parent.cancel() + else: + print( + "Current Production model {}: {}, ".format( + metric_eval, production_model_mse) + + "New trained model {}: {}".format( + metric_eval, new_model_mse + ) + ) + + if (new_model_mse < production_model_mse): + print("New trained model performs better, " + "thus it should be registered") + else: + print("New trained model metric is worse than or equal to " + "production model so skipping model registration.") + if((allow_run_cancel).lower() == 'true'): + run.parent.cancel() + else: + print("This is the first model, " + "thus it should be registered") + +except Exception: + traceback.print_exc(limit=None, file=None, chain=True) + print("Something went wrong trying to evaluate. Exiting.") + raise diff --git a/diabetes_regression/parameters.json b/diabetes_regression/parameters.json new file mode 100644 index 00000000..48f7227d --- /dev/null +++ b/diabetes_regression/parameters.json @@ -0,0 +1,18 @@ +{ + "training": + { + "alpha": 0.4 + }, + "evaluation": + { + + }, + "registration": + { + "tags": ["mse"] + }, + "scoring": + { + + } +} diff --git a/diabetes_regression/register/register_model.py b/diabetes_regression/register/register_model.py new file mode 100644 index 00000000..bca55a83 --- /dev/null +++ b/diabetes_regression/register/register_model.py @@ -0,0 +1,214 @@ +""" +Copyright (C) Microsoft Corporation. All rights reserved.​ + ​ +Microsoft Corporation (“Microsoft”) grants you a nonexclusive, perpetual, +royalty-free right to use, copy, and modify the software code provided by us +("Software Code"). You may not sublicense the Software Code or any use of it +(except to your affiliates and to vendors to perform work on your behalf) +through distribution, network access, service agreement, lease, rental, or +otherwise. This license does not purport to express any claim of ownership over +data you may have shared with Microsoft in the creation of the Software Code. +Unless applicable law gives you more rights, Microsoft reserves all other +rights not expressly granted herein, whether by implication, estoppel or +otherwise. ​ + ​ +THE SOFTWARE CODE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS +OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +MICROSOFT OR ITS LICENSORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR +BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER +IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +ARISING IN ANY WAY OUT OF THE USE OF THE SOFTWARE CODE, EVEN IF ADVISED OF THE +POSSIBILITY OF SUCH DAMAGE. +""" +import json +import os +import sys +import argparse +import traceback +import joblib +from azureml.core import Run, Experiment, Workspace, Dataset +from azureml.core.model import Model as AMLModel + + +def main(): + + run = Run.get_context() + if (run.id.startswith('OfflineRun')): + from dotenv import load_dotenv + # For local development, set values in this section + load_dotenv() + workspace_name = os.environ.get("WORKSPACE_NAME") + experiment_name = os.environ.get("EXPERIMENT_NAME") + resource_group = os.environ.get("RESOURCE_GROUP") + subscription_id = os.environ.get("SUBSCRIPTION_ID") + # run_id useful to query previous runs + run_id = "bd184a18-2ac8-4951-8e78-e290bef3b012" + aml_workspace = Workspace.get( + name=workspace_name, + subscription_id=subscription_id, + resource_group=resource_group + ) + ws = aml_workspace + exp = Experiment(ws, experiment_name) + else: + ws = run.experiment.workspace + exp = run.experiment + run_id = 'amlcompute' + + parser = argparse.ArgumentParser("register") + + parser.add_argument( + "--run_id", + type=str, + help="Training run ID", + ) + + parser.add_argument( + "--model_name", + type=str, + help="Name of the Model", + default="diabetes_model.pkl", + ) + + parser.add_argument( + "--step_input", + type=str, + help=("input from previous steps") + ) + + args = parser.parse_args() + if (args.run_id is not None): + run_id = args.run_id + if (run_id == 'amlcompute'): + run_id = run.parent.id + model_name = args.model_name + model_path = args.step_input + + print("Getting registration parameters") + + # Load the registration parameters from the parameters file + with open("parameters.json") as f: + pars = json.load(f) + try: + register_args = pars["registration"] + except KeyError: + print("Could not load registration values from file") + register_args = {"tags": []} + + model_tags = {} + for tag in register_args["tags"]: + try: + mtag = run.parent.get_metrics()[tag] + model_tags[tag] = mtag + except KeyError: + print(f"Could not find {tag} metric on parent run.") + + # load the model + print("Loading model from " + model_path) + model_file = os.path.join(model_path, model_name) + model = joblib.load(model_file) + parent_tags = run.parent.get_tags() + try: + build_id = parent_tags["BuildId"] + except KeyError: + build_id = None + print("BuildId tag not found on parent run.") + print(f"Tags present: {parent_tags}") + try: + build_uri = parent_tags["BuildUri"] + except KeyError: + build_uri = None + print("BuildUri tag not found on parent run.") + print(f"Tags present: {parent_tags}") + + if (model is not None): + dataset_id = parent_tags["dataset_id"] + if (build_id is None): + register_aml_model( + model_file, + model_name, + model_tags, + exp, + run_id, + dataset_id) + elif (build_uri is None): + register_aml_model( + model_file, + model_name, + model_tags, + exp, + run_id, + dataset_id, + build_id) + else: + register_aml_model( + model_file, + model_name, + model_tags, + exp, + run_id, + dataset_id, + build_id, + build_uri) + else: + print("Model not found. Skipping model registration.") + sys.exit(0) + + +def model_already_registered(model_name, exp, run_id): + model_list = AMLModel.list(exp.workspace, name=model_name, run_id=run_id) + if len(model_list) >= 1: + e = ("Model name:", model_name, "in workspace", + exp.workspace, "with run_id ", run_id, "is already registered.") + print(e) + raise Exception(e) + else: + print("Model is not registered for this run.") + + +def register_aml_model( + model_path, + model_name, + model_tags, + exp, + run_id, + dataset_id, + build_id: str = 'none', + build_uri=None +): + try: + tagsValue = {"area": "diabetes_regression", + "run_id": run_id, + "experiment_name": exp.name} + tagsValue.update(model_tags) + if (build_id != 'none'): + model_already_registered(model_name, exp, run_id) + tagsValue["BuildId"] = build_id + if (build_uri is not None): + tagsValue["BuildUri"] = build_uri + + model = AMLModel.register( + workspace=exp.workspace, + model_name=model_name, + model_path=model_path, + tags=tagsValue, + datasets=[('training data', + Dataset.get_by_id(exp.workspace, dataset_id))]) + os.chdir("..") + print( + "Model registered: {} \nModel Description: {} " + "\nModel Version: {}".format( + model.name, model.description, model.version + ) + ) + except Exception: + traceback.print_exc(limit=None, file=None, chain=True) + print("Model registration failed") + raise + + +if __name__ == '__main__': + main() diff --git a/diabetes_regression/scoring/deployment_config_aci.yml b/diabetes_regression/scoring/deployment_config_aci.yml new file mode 100644 index 00000000..d2e0ba12 --- /dev/null +++ b/diabetes_regression/scoring/deployment_config_aci.yml @@ -0,0 +1,4 @@ +computeType: ACI +containerResourceRequirements: + cpu: 1 + memoryInGB: 4 diff --git a/diabetes_regression/scoring/deployment_config_aks.yml b/diabetes_regression/scoring/deployment_config_aks.yml new file mode 100644 index 00000000..cd81009d --- /dev/null +++ b/diabetes_regression/scoring/deployment_config_aks.yml @@ -0,0 +1,16 @@ +computeType: AKS +autoScaler: + autoscaleEnabled: True + minReplicas: 1 + maxReplicas: 3 + refreshPeriodInSeconds: 10 + targetUtilization: 70 +authEnabled: True +containerResourceRequirements: + cpu: 0.5 + memoryInGB: 2 +appInsightsEnabled: True +scoringTimeoutMs: 5000 +maxConcurrentRequestsPerContainer: 2 +maxQueueWaitMs: 5000 +sslEnabled: True diff --git a/diabetes_regression/scoring/inference_config.yml b/diabetes_regression/scoring/inference_config.yml new file mode 100644 index 00000000..3fc86686 --- /dev/null +++ b/diabetes_regression/scoring/inference_config.yml @@ -0,0 +1,9 @@ +entryScript: score.py +runtime: python +condaFile: ../conda_dependencies.yml +extraDockerfileSteps: +schemaFile: +sourceDirectory: +enableGpu: False +baseImage: +baseImageRegistry: \ No newline at end of file diff --git a/diabetes_regression/scoring/parallel_batchscore.py b/diabetes_regression/scoring/parallel_batchscore.py new file mode 100644 index 00000000..cd42c79c --- /dev/null +++ b/diabetes_regression/scoring/parallel_batchscore.py @@ -0,0 +1,157 @@ +""" +Copyright (C) Microsoft Corporation. All rights reserved.​ + ​ +Microsoft Corporation (“Microsoft”) grants you a nonexclusive, perpetual, +royalty-free right to use, copy, and modify the software code provided by us +("Software Code"). You may not sublicense the Software Code or any use of it +(except to your affiliates and to vendors to perform work on your behalf) +through distribution, network access, service agreement, lease, rental, or +otherwise. This license does not purport to express any claim of ownership over +data you may have shared with Microsoft in the creation of the Software Code. +Unless applicable law gives you more rights, Microsoft reserves all other +rights not expressly granted herein, whether by implication, estoppel or +otherwise. ​ + ​ +THE SOFTWARE CODE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS +OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +MICROSOFT OR ITS LICENSORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR +BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER +IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +ARISING IN ANY WAY OUT OF THE USE OF THE SOFTWARE CODE, EVEN IF ADVISED OF THE +POSSIBILITY OF SUCH DAMAGE. +""" + +import numpy as np +import pandas as pd +import joblib +import sys +from typing import List +from util.model_helper import get_model +from azureml.core import Model + +model = None + + +def parse_args() -> List[str]: + """ + The AML pipeline calls this file with a set of additional command + line arguments whose names are not documented. As such using the + ArgumentParser which necessitates that we supply the names of the + arguments is risky should those undocumented names change. Hence + we parse the arguments manually. + + :returns: List of model filters + + :raises: ValueError + """ + model_name_param = [ + (sys.argv[idx], sys.argv[idx + 1]) + for idx, itm in enumerate(sys.argv) + if itm == "--model_name" + ] + + if len(model_name_param) == 0: + raise ValueError( + "Model name is required but no model name parameter was passed to the script" # NOQA: E501 + ) + + model_name = model_name_param[0][1] + + model_version_param = [ + (sys.argv[idx], sys.argv[idx + 1]) + for idx, itm in enumerate(sys.argv) + if itm == "--model_version" + ] + model_version = ( + None + if len(model_version_param) < 1 + or len(model_version_param[0][1].strip()) == 0 # NOQA: E501 + else model_version_param[0][1] + ) + + model_tag_name_param = [ + (sys.argv[idx], sys.argv[idx + 1]) + for idx, itm in enumerate(sys.argv) + if itm == "--model_tag_name" + ] + model_tag_name = ( + None + if len(model_tag_name_param) < 1 + or len(model_tag_name_param[0][1].strip()) == 0 # NOQA: E501 + else model_tag_name_param[0][1] + ) + + model_tag_value_param = [ + (sys.argv[idx], sys.argv[idx + 1]) + for idx, itm in enumerate(sys.argv) + if itm == "--model_tag_value" + ] + model_tag_value = ( + None + if len(model_tag_value_param) < 1 + or len(model_tag_name_param[0][1].strip()) == 0 + else model_tag_value_param[0][1] + ) + + return [model_name, model_version, model_tag_name, model_tag_value] + + +def init(): + """ + Initializer called once per node that runs the scoring job. Parse command + line arguments and get the right model to use for scoring. + """ + try: + print("Initializing batch scoring script...") + + # Get the model using name/version/tags filter + model_filter = parse_args() + amlmodel = get_model( + model_name=model_filter[0], + model_version=model_filter[1], + tag_name=model_filter[2], + tag_value=model_filter[3]) + + # Load the model using name/version found + global model + modelpath = Model.get_model_path( + model_name=amlmodel.name, version=amlmodel.version) + model = joblib.load(modelpath) + print("Loaded model {}".format(model_filter[0])) + except Exception as ex: + print("Error: {}".format(ex)) + + +def run(mini_batch: pd.DataFrame) -> pd.DataFrame: + """ + The run method is called multiple times by the runtime. Each time + a mini-batch consisting of a portion of the input data is passed + in as a pandas DataFrame. The run method should return the scoring + results as a List or a pandas DataFrame. + + :param mini_batch: Dataframe containing a portion of the scoring data + + :returns: array containing the scores. + """ + + try: + result = None + + for _, sample in mini_batch.iterrows(): + # prediction + pred = model.predict(sample.values.reshape(1, -1)) + result = ( + np.array(pred) if result is None else np.vstack((result, pred)) + ) # NOQA: E501 + + return ( + [] + if result is None + else mini_batch.join(pd.DataFrame(result, columns=["score"])) + ) + + except Exception as ex: + print(ex) diff --git a/diabetes_regression/scoring/parallel_batchscore_copyoutput.py b/diabetes_regression/scoring/parallel_batchscore_copyoutput.py new file mode 100644 index 00000000..1bcde4b6 --- /dev/null +++ b/diabetes_regression/scoring/parallel_batchscore_copyoutput.py @@ -0,0 +1,91 @@ +""" +Copyright (C) Microsoft Corporation. All rights reserved.​ + ​ +Microsoft Corporation (“Microsoft”) grants you a nonexclusive, perpetual, +royalty-free right to use, copy, and modify the software code provided by us +("Software Code"). You may not sublicense the Software Code or any use of it +(except to your affiliates and to vendors to perform work on your behalf) +through distribution, network access, service agreement, lease, rental, or +otherwise. This license does not purport to express any claim of ownership over +data you may have shared with Microsoft in the creation of the Software Code. +Unless applicable law gives you more rights, Microsoft reserves all other +rights not expressly granted herein, whether by implication, estoppel or +otherwise. ​ + ​ +THE SOFTWARE CODE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS +OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +MICROSOFT OR ITS LICENSORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR +BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER +IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +ARISING IN ANY WAY OUT OF THE USE OF THE SOFTWARE CODE, EVEN IF ADVISED OF THE +POSSIBILITY OF SUCH DAMAGE. +""" + +from azure.storage.blob import ContainerClient +from datetime import datetime, date, timezone +import argparse +import os + + +def parse_args(): + parser = argparse.ArgumentParser() + parser.add_argument("--output_path", type=str, default=None) + parser.add_argument("--scoring_datastore", type=str, default=None) + parser.add_argument("--score_container", type=str, default=None) + parser.add_argument("--scoring_datastore_key", type=str, default=None) + parser.add_argument("--scoring_output_filename", type=str, default=None) + + return parser.parse_args() + + +def copy_output(args): + print("Output : {}".format(args.output_path)) + + accounturl = "https://{}.blob.core.windows.net".format( + args.scoring_datastore + ) # NOQA E501 + + containerclient = ContainerClient( + accounturl, args.score_container, args.scoring_datastore_key + ) + + destfolder = date.today().isoformat() + filetime = ( + datetime.now(timezone.utc) + .time() + .isoformat("milliseconds") + .replace(":", "_") + .replace(".", "_") + ) # noqa E501 + destfilenameparts = args.scoring_output_filename.split(".") + destblobname = "{}/{}_{}.{}".format( + destfolder, destfilenameparts[0], filetime, destfilenameparts[1] + ) + + destblobclient = containerclient.get_blob_client(destblobname) + with open( + os.path.join(args.output_path, "parallel_run_step.txt"), "rb" + ) as scorefile: # noqa E501 + destblobclient.upload_blob(scorefile, blob_type="BlockBlob") + + +if __name__ == "__main__": + args = parse_args() + if ( + args.scoring_datastore is None + or args.scoring_datastore.strip() == "" + or args.score_container is None + or args.score_container.strip() == "" + or args.scoring_datastore_key is None + or args.scoring_datastore_key.strip() == "" + or args.scoring_output_filename is None + or args.scoring_output_filename.strip() == "" + or args.output_path is None + or args.output_path.strip() == "" + ): + print("Missing parameters in parallel_batchscore_copyoutput.py -- Not going to copy inferences to an output datastore") # NOQA E501 + else: + copy_output(args) diff --git a/diabetes_regression/scoring/score.py b/diabetes_regression/scoring/score.py new file mode 100644 index 00000000..4acd5c8d --- /dev/null +++ b/diabetes_regression/scoring/score.py @@ -0,0 +1,90 @@ +""" +Copyright (C) Microsoft Corporation. All rights reserved.​ + ​ +Microsoft Corporation (“Microsoft”) grants you a nonexclusive, perpetual, +royalty-free right to use, copy, and modify the software code provided by us +("Software Code"). You may not sublicense the Software Code or any use of it +(except to your affiliates and to vendors to perform work on your behalf) +through distribution, network access, service agreement, lease, rental, or +otherwise. This license does not purport to express any claim of ownership over +data you may have shared with Microsoft in the creation of the Software Code. +Unless applicable law gives you more rights, Microsoft reserves all other +rights not expressly granted herein, whether by implication, estoppel or +otherwise. ​ + ​ +THE SOFTWARE CODE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS +OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +MICROSOFT OR ITS LICENSORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR +BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER +IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +ARISING IN ANY WAY OUT OF THE USE OF THE SOFTWARE CODE, EVEN IF ADVISED OF THE +POSSIBILITY OF SUCH DAMAGE. +""" +import numpy +import joblib +import os +from azureml.core.model import Model +from inference_schema.schema_decorators \ + import input_schema, output_schema +from inference_schema.parameter_types.numpy_parameter_type \ + import NumpyParameterType + + +def init(): + # load the model from file into a global object + global model + + # we assume that we have just one model + # AZUREML_MODEL_DIR is an environment variable created during deployment. + # It is the path to the model folder + # (./azureml-models/$MODEL_NAME/$VERSION) + model_path = Model.get_model_path( + os.getenv("AZUREML_MODEL_DIR").split('/')[-2]) + + model = joblib.load(model_path) + + +input_sample = numpy.array([ + [1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0], + [10.0, 9.0, 8.0, 7.0, 6.0, 5.0, 4.0, 3.0, 2.0, 1.0]]) +output_sample = numpy.array([ + 5021.509689995557, + 3693.645386402646]) + + +# Inference_schema generates a schema for your web service +# It then creates an OpenAPI (Swagger) specification for the web service +# at http:///swagger.json +@input_schema('data', NumpyParameterType(input_sample)) +@output_schema(NumpyParameterType(output_sample)) +def run(data, request_headers): + result = model.predict(data) + + # Demonstrate how we can log custom data into the Application Insights + # traces collection. + # The 'X-Ms-Request-id' value is generated internally and can be used to + # correlate a log entry with the Application Insights requests collection. + # The HTTP 'traceparent' header may be set by the caller to implement + # distributed tracing (per the W3C Trace Context proposed specification) + # and can be used to correlate the request to external systems. + print(('{{"RequestId":"{0}", ' + '"TraceParent":"{1}", ' + '"NumberOfPredictions":{2}}}' + ).format( + request_headers.get("X-Ms-Request-Id", ""), + request_headers.get("Traceparent", ""), + len(result) + )) + + return {"result": result.tolist()} + + +if __name__ == "__main__": + # Test scoring + init() + test_row = '{"data":[[1,2,3,4,5,6,7,8,9,10],[10,9,8,7,6,5,4,3,2,1]]}' + prediction = run(test_row, {}) + print("Test result: ", prediction) diff --git a/diabetes_regression/scoring/scoreA.py b/diabetes_regression/scoring/scoreA.py new file mode 100644 index 00000000..ac4a6100 --- /dev/null +++ b/diabetes_regression/scoring/scoreA.py @@ -0,0 +1,6 @@ +def init(): + global model + + +def run(raw_data): + return "New Model A" diff --git a/diabetes_regression/scoring/scoreB.py b/diabetes_regression/scoring/scoreB.py new file mode 100644 index 00000000..c0865269 --- /dev/null +++ b/diabetes_regression/scoring/scoreB.py @@ -0,0 +1,6 @@ +def init(): + global model + + +def run(raw_data): + return "New Model B" diff --git a/diabetes_regression/training/R/r_train.r b/diabetes_regression/training/R/r_train.r new file mode 100644 index 00000000..c19a58be --- /dev/null +++ b/diabetes_regression/training/R/r_train.r @@ -0,0 +1,41 @@ +print(R.version.string) + +# COMMAND ---------- + +path="weight_data.csv" +print(paste("Reading file from",path)) + +routes<-read.csv(path, header=TRUE) + +# The predictor vector (height). +x <- routes$height +# The response vector (weight). +y <- routes$weight +# Apply the lm() function. +model <- lm(y~x) + +# COMMAND ---------- + +routes + +# COMMAND ---------- + +# Make Predictions +df_test_heights <- data.frame(x = as.numeric(c(115,20))) +result <- predict(model,df_test_heights) +print(result) + +# COMMAND ---------- + +# Save the model to blob storage +model_path="model.rds" +saveRDS(model, model_path) + +# COMMAND ---------- + +# View model details +print(model) + +# COMMAND ---------- + +print('Completed') \ No newline at end of file diff --git a/diabetes_regression/training/R/train_with_r.py b/diabetes_regression/training/R/train_with_r.py new file mode 100644 index 00000000..b8a0a2c3 --- /dev/null +++ b/diabetes_regression/training/R/train_with_r.py @@ -0,0 +1,3 @@ +import subprocess + +subprocess.check_call(["bash", "-c", "Rscript r_train.r && ls -ltr model.rds"]) diff --git a/diabetes_regression/training/R/train_with_r_on_databricks.py b/diabetes_regression/training/R/train_with_r_on_databricks.py new file mode 100644 index 00000000..c571d609 --- /dev/null +++ b/diabetes_regression/training/R/train_with_r_on_databricks.py @@ -0,0 +1,15 @@ +import os +import argparse + +parser = argparse.ArgumentParser("train") +parser.add_argument( + "--AZUREML_SCRIPT_DIRECTORY_NAME", + type=str, + help="folder", +) + +args, unknown = parser.parse_known_args() +folder = args.AZUREML_SCRIPT_DIRECTORY_NAME + +os.system("cd " + "/dbfs/" + folder + + " && Rscript r_train.r && ls -ltr model.rds") diff --git a/diabetes_regression/training/R/weight_data.csv b/diabetes_regression/training/R/weight_data.csv new file mode 100644 index 00000000..cc441ee9 --- /dev/null +++ b/diabetes_regression/training/R/weight_data.csv @@ -0,0 +1,30 @@ +height,weight +79,174 +63,250 +75,223 +75,130 +70,120 +76,239 +63,129 +64,185 +59,246 +80,241 +79,217 +65,212 +74,242 +71,223 +61,167 +78,148 +75,229 +75,116 +75,182 +72,237 +72,160 +79,169 +67,219 +61,202 +65,168 +79,181 +81,214 +78,216 +59,245 diff --git a/diabetes_regression/training/test_train.py b/diabetes_regression/training/test_train.py new file mode 100644 index 00000000..e1a79781 --- /dev/null +++ b/diabetes_regression/training/test_train.py @@ -0,0 +1,32 @@ +import numpy as np +from diabetes_regression.training.train import train_model, get_model_metrics + + +def test_train_model(): + X_train = np.array([1, 2, 3, 4, 5, 6]).reshape(-1, 1) + y_train = np.array([10, 9, 8, 8, 6, 5]) + data = {"train": {"X": X_train, "y": y_train}} + + reg_model = train_model(data, {"alpha": 1.2}) + + preds = reg_model.predict([[1], [2]]) + np.testing.assert_almost_equal(preds, [9.93939393939394, 9.03030303030303]) + + +def test_get_model_metrics(): + + class MockModel: + + @staticmethod + def predict(data): + return ([8.12121212, 7.21212121]) + + X_test = np.array([3, 4]).reshape(-1, 1) + y_test = np.array([8, 7]) + data = {"test": {"X": X_test, "y": y_test}} + + metrics = get_model_metrics(MockModel(), data) + + assert 'mse' in metrics + mse = metrics['mse'] + np.testing.assert_almost_equal(mse, 0.029843893480257067) diff --git a/aml_service/01-Experiment.py b/diabetes_regression/training/train.py similarity index 50% rename from aml_service/01-Experiment.py rename to diabetes_regression/training/train.py index b3543e1c..22258042 100644 --- a/aml_service/01-Experiment.py +++ b/diabetes_regression/training/train.py @@ -23,22 +23,62 @@ ARISING IN ANY WAY OUT OF THE USE OF THE SOFTWARE CODE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. """ + import os -from azureml.core import Experiment -from azureml.core import Workspace -from azureml.core.authentication import AzureCliAuthentication +import pandas as pd +from sklearn.linear_model import Ridge +from sklearn.metrics import mean_squared_error +from sklearn.model_selection import train_test_split + + +# Split the dataframe into test and train data +def split_data(df): + X = df.drop('Y', axis=1).values + y = df['Y'].values + + X_train, X_test, y_train, y_test = train_test_split( + X, y, test_size=0.2, random_state=0) + data = {"train": {"X": X_train, "y": y_train}, + "test": {"X": X_test, "y": y_test}} + return data + + +# Train the model, return the model +def train_model(data, ridge_args): + reg_model = Ridge(**ridge_args) + reg_model.fit(data["train"]["X"], data["train"]["y"]) + return reg_model + + +# Evaluate the metrics for the model +def get_model_metrics(model, data): + preds = model.predict(data["test"]["X"]) + mse = mean_squared_error(preds, data["test"]["y"]) + metrics = {"mse": mse} + return metrics + + +def main(): + print("Running train.py") + + # Define training parameters + ridge_args = {"alpha": 0.5} + + # Load the training data as dataframe + data_dir = "data" + data_file = os.path.join(data_dir, 'diabetes.csv') + train_df = pd.read_csv(data_file) -cli_auth = AzureCliAuthentication() + data = split_data(train_df) + # Train the model + model = train_model(data, ridge_args) -def getExperiment(): - ws = Workspace.from_config(auth=cli_auth) - script_folder = "." - experiment_name = "devops-ai-demo" - exp = Experiment(workspace=ws, name=experiment_name) - print(exp.name, exp.workspace.name, sep="\n") - return exp + # Log the metrics for the model + metrics = get_model_metrics(model, data) + for (k, v) in metrics.items(): + print(f"{k}: {v}") -if __name__ == "__main__": - exp = getExperiment() +if __name__ == '__main__': + main() diff --git a/diabetes_regression/training/train_aml.py b/diabetes_regression/training/train_aml.py new file mode 100644 index 00000000..9303198b --- /dev/null +++ b/diabetes_regression/training/train_aml.py @@ -0,0 +1,176 @@ +""" +Copyright (C) Microsoft Corporation. All rights reserved.​ + ​ +Microsoft Corporation (“Microsoft”) grants you a nonexclusive, perpetual, +royalty-free right to use, copy, and modify the software code provided by us +("Software Code"). You may not sublicense the Software Code or any use of it +(except to your affiliates and to vendors to perform work on your behalf) +through distribution, network access, service agreement, lease, rental, or +otherwise. This license does not purport to express any claim of ownership over +data you may have shared with Microsoft in the creation of the Software Code. +Unless applicable law gives you more rights, Microsoft reserves all other +rights not expressly granted herein, whether by implication, estoppel or +otherwise. ​ + ​ +THE SOFTWARE CODE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS +OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +MICROSOFT OR ITS LICENSORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR +BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER +IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +ARISING IN ANY WAY OUT OF THE USE OF THE SOFTWARE CODE, EVEN IF ADVISED OF THE +POSSIBILITY OF SUCH DAMAGE. +""" +from azureml.core.run import Run +from azureml.core import Dataset, Datastore, Workspace +import os +import argparse +import joblib +import json +from train import split_data, train_model, get_model_metrics + + +def register_dataset( + aml_workspace: Workspace, + dataset_name: str, + datastore_name: str, + file_path: str +) -> Dataset: + datastore = Datastore.get(aml_workspace, datastore_name) + dataset = Dataset.Tabular.from_delimited_files(path=(datastore, file_path)) + dataset = dataset.register(workspace=aml_workspace, + name=dataset_name, + create_new_version=True) + + return dataset + + +def main(): + print("Running train_aml.py") + + parser = argparse.ArgumentParser("train") + parser.add_argument( + "--model_name", + type=str, + help="Name of the Model", + default="diabetes_model.pkl", + ) + + parser.add_argument( + "--step_output", + type=str, + help=("output for passing data to next step") + ) + + parser.add_argument( + "--dataset_version", + type=str, + help=("dataset version") + ) + + parser.add_argument( + "--data_file_path", + type=str, + help=("data file path, if specified,\ + a new version of the dataset will be registered") + ) + + parser.add_argument( + "--caller_run_id", + type=str, + help=("caller run id, for example ADF pipeline run id") + ) + + parser.add_argument( + "--dataset_name", + type=str, + help=("Dataset name. Dataset must be passed by name\ + to always get the desired dataset version\ + rather than the one used while the pipeline creation") + ) + + args = parser.parse_args() + + print("Argument [model_name]: %s" % args.model_name) + print("Argument [step_output]: %s" % args.step_output) + print("Argument [dataset_version]: %s" % args.dataset_version) + print("Argument [data_file_path]: %s" % args.data_file_path) + print("Argument [caller_run_id]: %s" % args.caller_run_id) + print("Argument [dataset_name]: %s" % args.dataset_name) + + model_name = args.model_name + step_output_path = args.step_output + dataset_version = args.dataset_version + data_file_path = args.data_file_path + dataset_name = args.dataset_name + + run = Run.get_context() + + print("Getting training parameters") + + # Load the training parameters from the parameters file + with open("parameters.json") as f: + pars = json.load(f) + try: + train_args = pars["training"] + except KeyError: + print("Could not load training values from file") + train_args = {} + + # Log the training parameters + print(f"Parameters: {train_args}") + for (k, v) in train_args.items(): + run.log(k, v) + run.parent.log(k, v) + + # Get the dataset + if (dataset_name): + if (data_file_path == 'none'): + dataset = Dataset.get_by_name(run.experiment.workspace, dataset_name, dataset_version) # NOQA: E402, E501 + else: + dataset = register_dataset(run.experiment.workspace, + dataset_name, + os.environ.get("DATASTORE_NAME"), + data_file_path) + else: + e = ("No dataset provided") + print(e) + raise Exception(e) + + # Link dataset to the step run so it is trackable in the UI + run.input_datasets['training_data'] = dataset + run.parent.tag("dataset_id", value=dataset.id) + + # Split the data into test/train + df = dataset.to_pandas_dataframe() + data = split_data(df) + + # Train the model + model = train_model(data, train_args) + + # Evaluate and log the metrics returned from the train function + metrics = get_model_metrics(model, data) + for (k, v) in metrics.items(): + run.log(k, v) + run.parent.log(k, v) + + # Pass model file to next step + os.makedirs(step_output_path, exist_ok=True) + model_output_path = os.path.join(step_output_path, model_name) + joblib.dump(value=model, filename=model_output_path) + + # Also upload model file to run outputs for history + os.makedirs('outputs', exist_ok=True) + output_path = os.path.join('outputs', model_name) + joblib.dump(value=model, filename=output_path) + + run.tag("run_type", value="train") + print(f"tags now present for run: {run.tags}") + + run.complete() + + +if __name__ == '__main__': + main() diff --git a/model/placeholder b/diabetes_regression/util/__init__.py similarity index 100% rename from model/placeholder rename to diabetes_regression/util/__init__.py diff --git a/diabetes_regression/util/model_helper.py b/diabetes_regression/util/model_helper.py new file mode 100644 index 00000000..0fd20ef0 --- /dev/null +++ b/diabetes_regression/util/model_helper.py @@ -0,0 +1,79 @@ +""" +model_helper.py +""" +from azureml.core import Run +from azureml.core import Workspace +from azureml.core.model import Model as AMLModel + + +def get_current_workspace() -> Workspace: + """ + Retrieves and returns the current workspace. + Will not work when ran locally. + + Parameters: + None + + Return: + The current workspace. + """ + run = Run.get_context(allow_offline=False) + experiment = run.experiment + return experiment.workspace + + +def get_model( + model_name: str, + model_version: int = None, # If none, return latest model + tag_name: str = None, + tag_value: str = None, + aml_workspace: Workspace = None +) -> AMLModel: + """ + Retrieves and returns a model from the workspace by its name + and (optional) tag. + + Parameters: + aml_workspace (Workspace): aml.core Workspace that the model lives. + model_name (str): name of the model we are looking for + (optional) model_version (str): model version. Latest if not provided. + (optional) tag (str): the tag value & name the model was registered under. + + Return: + A single aml model from the workspace that matches the name and tag, or + None. + """ + if aml_workspace is None: + print("No workspace defined - using current experiment workspace.") + aml_workspace = get_current_workspace() + + tags = None + if tag_name is not None or tag_value is not None: + # Both a name and value must be specified to use tags. + if tag_name is None or tag_value is None: + raise ValueError( + "model_tag_name and model_tag_value should both be supplied" + + "or excluded" # NOQA: E501 + ) + tags = [[tag_name, tag_value]] + + model = None + if model_version is not None: + # TODO(tcare): Finding a specific version currently expects exceptions + # to propagate in the case we can't find the model. This call may + # result in a WebserviceException that may or may not be due to the + # model not existing. + model = AMLModel( + aml_workspace, + name=model_name, + version=model_version, + tags=tags) + else: + models = AMLModel.list( + aml_workspace, name=model_name, tags=tags, latest=True) + if len(models) == 1: + model = models[0] + elif len(models) > 1: + raise Exception("Expected only one model") + + return model diff --git a/docs/canary_ab_deployment.md b/docs/canary_ab_deployment.md new file mode 100644 index 00000000..49edb503 --- /dev/null +++ b/docs/canary_ab_deployment.md @@ -0,0 +1,124 @@ +# Model deployment to AKS cluster with Canary deployment + +[![Build Status](https://aidemos.visualstudio.com/MLOps/_apis/build/status/microsoft.MLOpsPython-Canary?branchName=master)](https://aidemos.visualstudio.com/MLOps/_build/latest?definitionId=133&branchName=master) + +If your target deployment environment is a Kubernetes cluster and you want to implement [Canary and/or A/B testing deployment strategies](http://adfpractice-fedor.blogspot.com/2019/04/deployment-strategies-with-kubernetes.html) you can follow this sample guide. + +- [Prerequisites](#prerequisites) +- [Install Istio on a K8s cluster](#install-istio-on-a-k8s-cluster) +- [Set up variables](#set-up-variables) +- [Configure a pipeline to build and deploy a scoring Image](#configure-a-pipeline-to-build-and-deploy-a-scoring-image) +- [Build a new Scoring Image](#build-a-new-scoring-image) + +## Prerequisites + +Before continuing with this guide, you will need: + +* An [Azure Kubernetes Service (AKS)](https://azure.microsoft.com/en-us/services/kubernetes-service) cluster + * This does **not** have to be the same cluster as the example in [Getting Started: Deploy the model to Azure Kubernetes Service](/docs/getting_started.md#deploy-the-model-to-azure-kubernetes-service) + * The cluster does not have to be connected to Azure Machine Learning. + * If you want to deploy a new cluster, see [Quickstart: Deploy an Azure Kubernetes Service cluster using the Azure CLI](https://docs.microsoft.com/en-us/azure/aks/kubernetes-walkthrough) +* An Azure Container Registry instance that is authenticated with your Azure Kubernetes Service cluster. + * The chart you will deploy is assuming you are authenticated using a service principal. + * See [Authenticate with Azure Container Registry from Azure Kubernetes Service](https://docs.microsoft.com/en-us/azure/aks/cluster-container-registry-integration#configure-acr-integration-for-existing-aks-clusters) for an authentication guide. +* In Azure DevOps, a service connection to your Kubernetes cluster. + * If you do not currently have a namespace, create one named 'abtesting'. + +## Install Istio on a K8s cluster + +You'll be using the [Istio](https://istio.io) service mesh implementation to control traffic routing between model versions. Follow the instructions at [Install and use Istio in Azure Kubernetes Service (AKS)](https://docs.microsoft.com/azure/aks/servicemesh-istio-install?pivots=client-operating-system-linux). + +After Istio is installed, figure out the Istio gateway endpoint on your K8s cluster: + +```bash +GATEWAY_IP=$(kubectl get svc istio-ingressgateway -n istio-system -o jsonpath='{.status.loadBalancer.ingress[0].ip}') +``` + +You don't need to create any Istio resources (e.g. Gateway or VirtualService) at this point. It will be handled by the AzDo pipeline that builds and deploys a scoring image. + +## Set up variables + +There are some extra variables that you need to setup in ***devopsforai-aml-vg*** variable group (see [getting started](./getting_started.md)): + +| Variable Name | Suggested Value | Short Description | +|---------------------------|-----------------------|-----------------------------------------------------------| +| K8S_AB_SERVICE_CONNECTION | mlops-aks | Name of the service connection to your Kubernetes cluster | +| K8S_AB_NAMESPACE | abtesting | Kubernetes namespace for model deployment | +| IMAGE_REPO_NAME | [Your ACR's DNS name] | Image reposiory name (e.g. mlopspyciamlcr.azurecr.io) | + +## Configure a pipeline to build and deploy a scoring Image + +Import and run the [abtest.yml](./.pipelines/abtest.yml) multistage deployment pipeline. + +After the pipeline completes successfully, you will see a registered Docker image in the ACR repository attached to the Azure ML Service: + +![scoring image](./images/scoring_image.png) + +The pipeline creates Istio Gateway and VirtualService and deploys the scoring image to the Kubernetes cluster. + +```bash +kubectl get deployments --namespace abtesting +NAME READY UP-TO-DATE AVAILABLE AGE +model-green 1/1 1 1 19h +``` + +## Build a new Scoring Image + +Change value of the ***SCORE_SCRIPT*** variable in the [abtest.yml](./.pipelines/abtest.yml) to point to ***scoring/scoreA.py*** and merge it to the master branch. + +**Note:** ***scoreA.py*** and ***scoreB.py*** files used in this tutorial are just mockups returning either "New Model A" or "New Model B" respectively. They are used to demonstrate the concept of testing two scoring images with different models or scoring code. In real life you would implement a scoring file similar to [score.py](./../code/scoring/score.py) (see the [Getting Started](./getting_started.md) guide). + +It will automatically trigger the pipeline and deploy a new scoring image with the following stages implementing ***Canary*** deployment strategy: + +| Stage | Green Weight | Blue Weight | Description | +|------------|--------------|-------------|-----------------------------------------------------------------------------------------------------------------------------------------------------------| +| Blue_0 | 100 | 0 | New image (blue) is deployed.
But all traffic (100%) is still routed to the old (green) image. | +| Blue_50 | 50 | 50 | Traffic is split between old (green) and new (blue) images 50/50. | +| Blue_100 | 0 | 100 | All traffic (100%) is routed to the blue image. | +| Blue_Green | 0 | 100 | Old green image is removed. The new blue image is copied as green.
Blue and Green images are equal.
All traffic (100%) is routed to the blue image. | +| Green_100 | 100 | 0 | All traffic (100%) is routed to the green image.
The blue image is removed. | + +**Note:** The pipeline performs the rollout without any pausing. You may want to configure [Approvals and Checks](https://docs.microsoft.com/en-us/azure/devops/pipelines/process/approvals?view=azure-devops&tabs=check-pass) for the stages on your environment for better experience of the model testing. The environment ***abtestenv*** will be added automatically to your AzDo project after the first pipeline run. + +At each stage you can verify how the traffic is routed sending requests to $GATEWAY_IP/score with ***Postman*** or with ***curl***: + +```bash +curl $GATEWAY_IP/score +``` + +You can also emulate a simple load test on the gateway with the ***load_test.sh***: + +```bash +./charts/load_test.sh 10 $GATEWAY_IP/score +``` + +The command above sends 10 requests to the gateway. So if the pipeline has completed stage Blue_50, the result will look like this: + +```bash +"New Model A" +"New Model A" +"New Model A" +"New Model B" +"New Model A" +"New Model B" +"New Model B" +"New Model A" +"New Model A" +"New Model A" +``` + +Regardless of the blue/green weight values set on the cluster, you can perform ***A/B testing*** and send requests directly to either blue or green images: + +```bash +curl --header "x-api-version: blue" $GATEWAY_IP/score +curl --header "x-api-version: green" $GATEWAY_IP/score +``` + +or with a load_test.sh script: + +```bash +./charts/load_test.sh 10 $GATEWAY_IP/score blue +./charts/load_test.sh 10 $GATEWAY_IP/score green +``` + +In this case the Istio Virtual Service analyzes the request header and routes the traffic directly to the specified model version. diff --git a/docs/code_description.md b/docs/code_description.md index 45fb7bb7..81abc78f 100644 --- a/docs/code_description.md +++ b/docs/code_description.md @@ -1,60 +1,97 @@ ## Repo Details -### Environment Setup - -- requirements.txt : It consist of list of python packages which are needed by the train.py to run successfully on host agent (locally). - -- install_requirements.sh : This script prepare the python environment i.e. install the Azure ML SDK and the packages specified in requirements.txt - -### Config Files -All the scripts inside the ./aml_config are config files. These are the files where you need to provide details about the subscription, resource group, workspace, conda dependencies, remote vm, AKS etc. - -- config.json : This is a mandatory config file. Provide the subscription id, resource group name, workspace name and location where you want to create Azure ML services workspace. If you have already created the workspace, provide the existing workspace details in here. +### Directory Structure + +High level directory structure for this repository: + +```bash +├── .pipelines <- Azure DevOps YAML pipelines for CI, PR and model training and deployment. +├── bootstrap <- Python script to initialize this repository with a custom project name. +├── charts <- Helm charts to deploy resources on Azure Kubernetes Service(AKS). +├── data <- Initial set of data to train and evaluate model. Not for use to store data. +├── diabetes_regression <- The top-level folder for the ML project. +│ ├── evaluate <- Python script to evaluate trained ML model. +│ ├── register <- Python script to register trained ML model with Azure Machine Learning Service. +│ ├── scoring <- Python score.py to deploy trained ML model. +│ ├── training <- Python script to train ML model. +│ ├── R <- R script to train R based ML model. +│ ├── util <- Python script for various utility operations specific to this ML project. +├── docs <- Extensive markdown documentation for entire project. +├── environment_setup <- The top-level folder for everything related to infrastructure. +│ ├── arm-templates <- Azure Resource Manager(ARM) templates to build infrastructure needed for this project. +│ ├── tf-templates <- Terraform templates to build infrastructure needed for this project. +├── experimentation <- Jupyter notebooks with ML experimentation code. +├── ml_service <- The top-level folder for all Azure Machine Learning resources. +│ ├── pipelines <- Python script that builds Azure Machine Learning pipelines. +│ ├── util <- Python script for various utility operations specific to Azure Machine Learning. +├── .env.example <- Example .env file with environment for local development experience. +├── .gitignore <- A gitignore file specifies intentionally un-tracked files that Git should ignore. +├── LICENSE <- License document for this project. +├── README.md <- The top-level README for developers using this project. +``` + +The repository provides a template with folders structure suitable for maintaining multiple ML projects. There are common folders such as ***.pipelines***, ***environment_setup***, ***ml_service*** and folders containing the code base for each ML project. This repository contains a single sample ML project in the ***diabetes_regression*** folder. This folder is going to be automatically renamed to your project name if you follow the [bootstrap procedure](../bootstrap/README.md). -- conda_dependencies.yml : This is a mandatory file. This files contains the list of dependencies which are needed by the training/scoring script to run. This file is used to prepare environment for the local run(user managed/system managed) and docker run(local/remote). - -- security_config.json : This file contains the credentials to the remove vm where we want to train the model. This config is used by the script 02-AttachTrainingVM.py to attach remote vm as a compute to the workspace. Attaching remote vm to workspace is one time operation. It is recommended not to publish this file with credentials populated in it. You can put the credentials, run the 02-AttachTrainingVM.py manually and clear the credentials before pushing it to git. - -- aks_webservice.json : This is an optional config. If you already have an AKS attached to your workspace, then provide the details in this file. If not, you do not have to check in this file to git. - -### Build Pipeline Scripts +### Environment Setup -The script under ./aml_service are used in build pipeline. All the scripts starting with 0 are the one time run scripts. These are the scripts which need to be run only once. There is no harm of running these scripts every time in build pipeline. +- `environment_setup/install_requirements.sh` : This script prepares a local conda environment i.e. install the Azure ML SDK and the packages specified in environment definitions. -- 00-WorkSpace.py : This is a onetime run script. It reads the workspace details from ./aml_config/config.json file and create (if workspace not available) or get (existing workspace). +- `environment_setup/iac-*-arm.yml, arm-templates` : Infrastructure as Code piplines to create required resources using ARM, along with corresponding arm-templates. Infrastructure as Code can be deployed with this template or with the Terraform template. -- 01-Experiment.py : This is a onetime run script. It registers the root directory as project. It is not included as a step in build pipeline. +- `environment_setup/iac-*-tf.yml, tf-templates` : Infrastructure as Code piplines to create required resources using Terraform, along with corresponding tf-templates. Infrastructure as Code can be deployed with this template or with the ARM template. -- 02-AttachTrainingVM.py : This is a onetime run script. It attaches a remote VM to the workspace. It reads the config from ./aml_config/security_config.json. It is not included as a step in build pipeline. +- `environment_setup/iac-remove-environment.yml` : Infrastructure as Code piplines to delete the created required resources. -- 10-TrainOnLocal.py : This scripts triggers the run of ./training/train.py script on the local compute(Host agent in case of build pipeline). If you are training on remote vm, you do not need this script in build pipeline. All the training scripts (1x) generates an output file aml_config/run_id.json which records the run_id and run history name of the training run. run_id.json is used by 20-RegisterModel.py to get the trained model. +- `environment_setup/Dockerfile` : Dockerfile of a build agent containing Python 3.6 and all required packages. -- 11-TrainOnLocalEnv.py : Its functionality is same as 10-TrainOnLocal.py, the only difference is that it creates a virtual environment on local compute and run training script on virtual env. +- `environment_setup/docker-image-pipeline.yml` : An AzDo pipeline for building and pushing [microsoft/mlopspython](https://hub.docker.com/_/microsoft-mlops-python) image. -- 12-TrainOnVM.py : As we want to train the model on remote VM, this script is included as a task in build pipeline. It submits the training job on remote vm. +### Pipelines -- 15.EvaluateModel.py : It gets the metrics of latest model trained and compares it with the model in production. If the production model still performs better, all below scripts are skipped. +- `.pipelines/abtest.yml` : a pipeline demonstrating [Canary deployment strategy](./docs/canary_ab_deployment.md). +- `.pipelines/code-quality-template.yml` : a pipeline template used by the CI and PR pipelines. It contains steps performing linting, data and unit testing. +- `.pipelines/diabetes_regression-ci-image.yml` : a pipeline building a scoring image for the diabetes regression model. +- `.pipelines/diabetes_regression-ci.yml` : a pipeline triggered when the code is merged into **master**. It performs linting, data integrity testing, unit testing, building and publishing an ML pipeline. +- `.pipelines/diabetes_regression-cd.yml` : a pipeline triggered when the code is merged into **master** and the `.pipelines/diabetes_regression-ci.yml` completes. Deploys the model to ACI, AKS or Webapp. +- `.pipelines/diabetes_regression-package-model-template.yml` : Pipeline template that creates a model package and adds the package location to the environment for subsequent tasks to use. +- `.pipelines/diabetes_regression-get-model-id-artifact-template.yml` : a pipeline template used by the `.pipelines/diabetes_regression-cd.yml` pipeline. It takes the model metadata artifact published by the previous pipeline and gets the model ID. +- `.pipelines/diabetes_regression-publish-model-artifact-template.yml` : a pipeline template used by the `.pipelines/diabetes_regression-ci.yml` pipeline. It finds out if a new model was registered and publishes a pipeline artifact containing the model metadata. +- `.pipelines/helm-*.yml` : pipeline templates used by the `.pipelines/abtest.yml` pipeline. +- `.pipelines/pr.yml` : a pipeline triggered when a **pull request** to the **master** branch is created. It performs linting, data integrity testing and unit testing only. -- 20-RegisterModel.py : It gets the run id from training steps output json and registers the model associated with that run along with tags. This scripts outputs a model.json file which contains model name and version. This script included as build task. +### ML Services -- 30-CreateScoringImage.py : This takes the model details from last step, creates a scoring webservice docker image and publish the image to ACR. This script included as build task. It writes the image name and version to image.json file. +- `ml_service/pipelines/diabetes_regression_build_train_pipeline.py` : builds and publishes an ML training pipeline. It uses Python on ML Compute. +- `ml_service/pipelines/diabetes_regression_build_train_pipeline_with_r.py` : builds and publishes an ML training pipeline. It uses R on ML Compute. +- `ml_service/pipelines/diabetes_regression_build_train_pipeline_with_r_on_dbricks.py` : builds and publishes an ML training pipeline. It uses R on Databricks Compute. +- `ml_service/pipelines/run_train_pipeline.py` : invokes a published ML training pipeline (Python on ML Compute) via REST API. +- `ml_service/util` : contains common utility functions used to build and publish an ML training pipeline. -### Deployment/Release Scripts -File under the directory ./aml_service starting with 5x and 6x are used in release pipeline. They are basically to deploy the docker image on AKS and ACI and publish webservice on them. +### Environment Definitions -- 50-deployOnAci.py : This script reads the image.json which is published as an artifact from build pipeline, create aci cluster and deploy the scoring web service on it. It writes the scoring service details to aci_webservice.json +- `diabetes_regression/conda_dependencies.yml` : Conda environment definition for the environment used for both training and scoring (Docker image in which train.py and score.py are run). +- `diabetes_regression/ci_dependencies.yml` : Conda environment definition for the CI environment. -- 51-deployOnAks.py : This script reads the image.json which is published as an artifact from build pipeline, create aks cluster and deploy the scoring web service on it. If the aks_webservice.json file was checked in with existing aks details, it will update the existing webservice with new Image. It writes the scoring service details to aks_webservice.json +### Training Step -- 60-AciWebServiceTest.py : Reads the ACI info from aci_webservice.json and test it with sample data. +- `diabetes_regression/training/train_aml.py`: a training step of an ML training pipeline. +- `diabetes_regression/training/train.py` : ML functionality called by train_aml.py +- `diabetes_regression/training/R/r_train.r` : training a model with R basing on a sample dataset (weight_data.csv). +- `diabetes_regression/training/R/train_with_r.py` : a python wrapper (ML Pipeline Step) invoking R training script on ML Compute +- `diabetes_regression/training/R/train_with_r_on_databricks.py` : a python wrapper (ML Pipeline Step) invoking R training script on Databricks Compute +- `diabetes_regression/training/R/weight_data.csv` : a sample dataset used by R script (r_train.r) to train a model +- `diabetes_regression/training/R/test_train.py` : a unit test for the training script(s) -- 61-AksWebServiceTest.py : Reads the AKS info from aks_webservice.json and test it with sample data. +### Evaluation Step -### Training/Scoring Scripts +- `diabetes_regression/evaluate/evaluate_model.py` : an evaluating step which cancels the pipeline in case of non-improvement. -- /code/training/train.py : This is the model training code. It uploads the model file to AML Service run id once the training is successful. This script is submitted as run job by all the 1x scripts. +### Registering Step -- /code/scoring/score.py : This is the score file used to create the webservice docker image. There is a conda_dependencies.yml in this directory which is exactly same as the one in aml_config. These two files are needed by the 30-CreateScoringImage.py scripts to be in same root directory while creating the image. +- `diabetes_regression/register/register_model.py` : registers a new trained model if evaluation shows the new model is more performant than the previous one. -**Note: In CICD Pipeline, please make sure that the working directory is the root directory of the repo.** +### Scoring +- `diabetes_regression/scoring/score.py` : a scoring script which is about to be packed into a Docker Image along with a model while being deployed to QA/Prod environment. +- `diabetes_regression/scoring/inference_config.yml`, `deployment_config_aci.yml`, `deployment_config_aks.yml` : configuration files for the [AML Model Deploy](https://marketplace.visualstudio.com/items?itemName=ms-air-aiagility.private-vss-services-azureml&ssr=false#overview) pipeline task for ACI and AKS deployment targets. +- `diabetes_regression/scoring/scoreA.py`, `diabetes_regression/scoring/scoreB.py` : simplified scoring files for the [Canary deployment sample](./docs/canary_ab_deployment.md). diff --git a/docs/custom_container.md b/docs/custom_container.md new file mode 100644 index 00000000..46e692f9 --- /dev/null +++ b/docs/custom_container.md @@ -0,0 +1,113 @@ +# Customizing the Azure DevOps job container + +The Model training and deployment pipeline uses a Docker container +on the Azure Pipelines agents to provide a reproducible environment +to run test and deployment code. + The image of the container +`mcr.microsoft.com/mlops/python:latest` is built with this +[Dockerfile](../environment_setup/Dockerfile). + +Additionally mcr.microsoft.com/mlops/python image is also tagged with below tags. + +| Image Tags | Description | +| ----------------------------------------------- | :---------------------------------------------------------------------------------------- | +| mcr.microsoft.com/mlops/python:latest | latest image | +| mcr.microsoft.com/mlops/python:build-[id] | where [id] is Azure Devops build id e.g. mcr.microsoft.com/mlops/python:build-20200325.1 | +| mcr.microsoft.com/mlops/python:amlsdk-[version] | where [version] is aml sdk version e.g. mcr.microsoft.com/mlops/python:amlsdk-1.1.5.1 | +| mcr.microsoft.com/mlops/python:release-[id] | where [id] is github release id e.g. mcr.microsoft.com/mlops/python:release-3.0.0 | | + +In your project you will want to build your own +Docker image that only contains the dependencies and tools required for your +use case. This image will be more likely smaller and therefore faster, and it +will be totally maintained by your team. + +## Provision an Azure Container Registry + +An Azure Container Registry is deployed along your Azure ML Workspace to manage models. +You can use that registry instance to store your MLOps container image as well, or +provision a separate instance. + +## Create a Registry Service Connection + +[Create a service connection](https://docs.microsoft.com/en-us/azure/devops/pipelines/library/service-endpoints?view=azure-devops&tabs=yaml#sep-docreg) to your Azure Container Registry: + +- As *Connection type*, select *Docker Registry* +- As *Registry type*, select *Azure Container Registry* +- As *Azure container registry*, select your Container registry instance +- As *Service connection name*, enter `acrconnection` + +## Update the environment definition + +Modify the [Dockerfile](../environment_setup/Dockerfile) and/or the +[ci_dependencies.yml](../diabetes_regression/ci_dependencies.yml) CI Conda +environment definition to tailor your environment. +Conda provides a [reusable environment for training and deployment with Azure Machine Learning](https://docs.microsoft.com/en-us/azure/machine-learning/how-to-use-environments). +The Conda environment used for CI should use the same package versions as the Conda environment +used for the Azure ML training and scoring environments (defined in [conda_dependencies.yml](../diabetes_regression/conda_dependencies.yml)). +This enables you to run unit and integration tests using the exact same dependencies as used in the ML pipeline. + +If a package is available in a Conda package repository, then we recommend that +you use the Conda installation rather than the pip installation. Conda packages +typically come with prebuilt binaries that make installation more reliable. + +## Create a container build pipeline + +In your [Azure DevOps](https://dev.azure.com) project create a new build +pipeline referring to the +[environment_setup/docker-image-pipeline.yml](../environment_setup/docker-image-pipeline.yml) +pipeline definition in your forked repository. + +Edit the [environment_setup/docker-image-pipeline.yml](../environment_setup/docker-image-pipeline.yml) file +and modify the string `'public/mlops/python'` with an name suitable to describe your environment, +e.g. `'mlops/diabetes_regression'`. + +Save and run the pipeline, making sure to set the these runtime variables: `amlsdkversion` and `githubrelease`. The values are up to you to set depending on your environment. These will show as tags on your image. + +![Custom Container Vars](./images/custom-container-variables.png) + +This will build and push a container image to your Azure Container Registry with +the name you have just edited. The next step is to modify the build pipeline to run the CI job on a container +run from that image. + +## Modify the model pipeline + +Modify the model pipeline file [diabetes_regression-ci.yml](../.pipelines/diabetes_regression-ci.yml) by replacing this section: + +``` +resources: + containers: + - container: mlops + image: mcr.microsoft.com/mlops/python:latest +``` + +with (using the image name previously defined): + +``` +resources: + containers: + - container: mlops + image: mlops/diabetes_regression + endpoint: acrconnection +``` + +Run the pipeline and ensure your container has been used. + +## Addressing conflicting dependencies + +Especially when working in a team, it's possible for environment changes across branches to interfere with one another. + +For example, if the master branch is using scikit-learn and you create a branch to use Tensorflow instead, and you +decide to remove scikit-learn from the +[ci_dependencies.yml](../diabetes_regression/ci_dependencies.yml) Conda environment definition +and run the [docker-image-pipeline.yml](../environment_setup/docker-image-pipeline.yml) Docker image, +then the master branch will stop building. + +You could leave scikit-learn in addition to Tensorflow in the environment, but that is not ideal, as you would have to take an extra step to remove scikit-learn after merging your branch to master. + +A better approach would be to use a distinct name for your modified environment, such as `mlops/diabetes_regression/tensorflow`. +By changing the name of the image in your branch in both the container build pipeline +[environment_setup/docker-image-pipeline.yml](../environment_setup/docker-image-pipeline.yml) +and the model pipeline file +[diabetes_regression-ci.yml](../.pipelines/diabetes_regression-ci.yml), +and running both pipelines in sequence on your branch, +you avoid any branch conflicts, and the name does not have to be changed after merging to master. diff --git a/docs/custom_model.md b/docs/custom_model.md new file mode 100644 index 00000000..28a15d78 --- /dev/null +++ b/docs/custom_model.md @@ -0,0 +1,124 @@ +# Bring your own code with the MLOpsPython repository template + +This document provides steps to follow when using this repository as a template to train models and deploy the models with real-time inference in Azure ML with your own scripts and data. + +1. Follow the MLOpsPython [Getting Started](getting_started.md) guide +1. Bootstrap the project +1. Configure training data +1. [If necessary] Convert your ML experimental code into production ready code +1. Replace the training code +1. [Optional] Update the evaluation code +1. Customize the build agent environment +1. [If appropriate] Replace the score code +1. [If appropriate] Configure batch scoring data + +## Follow the Getting Started guide + +Follow the [Getting Started](getting_started.md) guide to set up the infrastructure and pipelines to execute MLOpsPython. + +Take a look at the [Repo Details](code_description.md) document for a description of the structure of this repository. + +## Bootstrap the project + +Bootstrapping will prepare the directory structure to be used for your project name which includes: + +* renaming files and folders from the base project name `diabetes_regression` to your project name +* fixing imports and absolute path based on your project name +* deleting and cleaning up some directories + +**Note:** Since the bootstrap script will rename the `diabetes_regression` folder to the project name of your choice, we'll refer to your project as `[project name]` when paths are involved. + +To bootstrap from the existing MLOpsPython repository: + +1. Ensure Python 3 is installed locally +1. From a local copy of the code, run the `bootstrap.py` script in the `bootstrap` folder +`python bootstrap.py -d [dirpath] -n [projectname]` + * `[dirpath]` is the absolute path to the root of the directory where MLOpsPython is cloned + * `[projectname]` is the name of your ML project + +# Configure Custom Training + +## Configure training data + +The training ML pipeline uses a [sample diabetes dataset](https://scikit-learn.org/stable/modules/generated/sklearn.datasets.load_diabetes.html) as training data. + +**Important** Convert the template to use your own Azure ML Dataset for model training via these steps: + +1. [Create a Dataset](https://docs.microsoft.com/azure/machine-learning/how-to-create-register-datasets) in your Azure ML workspace +1. Update the `DATASET_NAME` and `DATASTORE_NAME` variables in `.pipelines/[project name]-variables-template.yml` + +## Convert your ML experimental code into production ready code + +The MLOpsPython template creates an Azure Machine Learning (ML) pipeline that invokes a set of [Azure ML pipeline steps](https://docs.microsoft.com/python/api/azureml-pipeline-steps/azureml.pipeline.steps) (see `ml_service/pipelines/[project name]_build_train_pipeline.py`). If your experiment is currently in a Jupyter notebook, it will need to be refactored into scripts that can be run independently and dropped into the template which the existing Azure ML pipeline steps utilize. + +1. Refactor your experiment code into scripts +1. [Recommended] Prepare unit tests + +Examples of all these scripts are provided in this repository. +See the [Convert ML experimental code to production code tutorial](https://docs.microsoft.com/azure/machine-learning/tutorial-convert-ml-experiment-to-production) for a step by step guide and additional details. + +## Replace training code + +The template contains three scripts in the `[project name]/training` folder. Update these scripts for your experiment code. + +* `train.py` contains the platform-agnostic logic required to do basic data preparation and train the model. This script can be invoked against a static data file for local development. +* `train_aml.py` is the entry script for the ML pipeline step. It invokes the functions in `train.py` in an Azure ML context and adds logging. `train_aml.py` loads parameters for training from `[project name]/parameters.json` and passes them to the training function in `train.py`. If your experiment code can be refactored to match the function signatures in `train.py`, this file shouldn't need many changes. +* `test_train.py` contains tests that guard against functional regressions in `train.py`. Remove this file if you have no tests for your own code. + +Add any dependencies required by training to `[project name]/conda_dependencies.yml]`. This file will be used to generate the environment that the pipeline steps will run in. + +## Update evaluation code + +The MLOpsPython template uses the evaluate_model script to compare the performance of the newly trained model and the current production model based on Mean Squared Error. If the performance of the newly trained model is better than the current production model, then the pipelines continue. Otherwise, the pipelines are canceled. + +To keep the evaluation step, replace all instances of `mse` in `[project name]/evaluate/evaluate_model.py` with the metric that you want. + +To disable the evaluation step, either: + +* set the DevOps pipeline variable `RUN_EVALUATION` to `false` +* uncomment `RUN_EVALUATION` in `.pipelines/[project name]-variables-template.yml` and set the value to `false` + +## Customize the build agent environment + +The DevOps pipeline definitions in the MLOpsPython template run several steps in a Docker container that contains the dependencies required to work through the Getting Started guide. These dependencies may change over time and may not suit your project's needs. To manage your own dependencies, there are a few options: + +* Add a pipeline step to install dependencies required by unit tests to `.pipelines/code-quality-template.yml`. Recommended if you only have a small number of test dependencies. +* Create a new Docker image containing your dependencies. See [docs/custom_container.md](custom_container.md). Recommended if you have a larger number of dependencies, or if the overhead of installing additional dependencies on each run is too high. +* Remove the container references from the pipeline definition files and run the pipelines on self hosted agents with dependencies pre-installed. + +# Configure Custom Scoring + +## Replace score code + +For the model to provide real-time inference capabilities, the score code needs to be replaced. The MLOpsPython template uses the score code to deploy the model to do real-time scoring on ACI, AKS, or Web apps. + +If you want to keep scoring: + +1. Update or replace `[project name]/scoring/score.py` +1. Add any dependencies required by scoring to `[project name]/conda_dependencies.yml` +1. Modify the test cases in the `ml_service/util/smoke_test_scoring_service.py` script to match the schema of the training features in your data +1. Check and modify [project name]/scoring/deployment_config_aks.yml if AKS deployment is planned. The deployment configuration shall suit custom model as well as AKS cluster size. + +# Configure Custom Batch Scoring + +## Configure input and output data + +The batch scoring pipeline is configured to use the default datastore for input and output. It will use sample data for scoring. + +In order to configure your own input datastore and output datastores, you will need to specify an Azure Blob Storage Account and set up input and output containers. + +Configure the variables below in your variable group. + +**Note: The datastore storage resource, input/output containers, and scoring data is not created automatically. Make sure that you have manually provisioned these resources and placed your scoring data in your input container with the proper name.** + + +| Variable Name | Suggested Value | Short description | +| ------------------------ | ------------------------- | --------------------------------------------------------------------------------------------------------------------------- | +| SCORING_DATASTORE_STORAGE_NAME | | [Azure Blob Storage Account](https://docs.microsoft.com/en-us/azure/storage/blobs/) name. | +| SCORING_DATASTORE_ACCESS_KEY | | [Azure Storage Account Key](https://docs.microsoft.com/en-us/rest/api/storageservices/authorize-requests-to-azure-storage). You may want to consider linking this variable to Azure KeyVault to avoid storing the access key in plain text. | +| SCORING_DATASTORE_INPUT_CONTAINER | | The name of the container for input data. Defaults to `input` if not set. | +| SCORING_DATASTORE_OUTPUT_CONTAINER| | The name of the container for output data. Defaults to `output` if not set. | +| SCORING_DATASTORE_INPUT_FILENAME | | The filename of the input data in your container Defaults to `diabetes_scoring_input.csv` if not set. | +| SCORING_DATASET_NAME | | The AzureML Dataset name to use. Defaults to `diabetes_scoring_ds` if not set (optional). | +| SCORING_DATASTORE_OUTPUT_FILENAME | | The filename to use for the output data. The pipeline will create this file. Defaults to `diabetes_scoring_output.csv` if not set (optional). | + diff --git a/docs/development_setup.md b/docs/development_setup.md new file mode 100644 index 00000000..1c8c2479 --- /dev/null +++ b/docs/development_setup.md @@ -0,0 +1,33 @@ +## Development environment setup + +### Setup + +Please be aware that the local environment also needs access to the Azure subscription so you have to have Contributor access on the Azure ML Workspace. + +In order to configure the project locally, create a copy of `.env.example` in the root directory and name it `.env`. Fill out all missing values and adjust the existing ones to suit your requirements. + +### Installation + +[Install the Azure CLI](https://docs.microsoft.com/en-us/cli/azure/install-azure-cli). The Azure CLI will be used to log you in interactively. + +Install [conda](https://docs.conda.io/projects/conda/en/latest/user-guide/tasks/manage-environments.html). + +Install the required Python modules. [`install_requirements.sh`](https://github.com/microsoft/MLOpsPython/blob/master/environment_setup/install_requirements.sh) creates and activates a new conda environment with required Python modules. + +``` +. environment_setup/install_requirements.sh +``` + +### Running local code + +To run your local ML pipeline code on Azure ML, run a command such as the following (in bash, all on one line): + +``` +export BUILD_BUILDID=$(uuidgen); python ml_service/pipelines/diabetes_regression_build_train_pipeline.py && python ml_service/pipelines/run_train_pipeline.py +``` + +BUILD_BUILDID is a variable used to uniquely identify the ML pipeline between the +`diabetes_regression_build_train_pipeline.py` and `run_train_pipeline.py` scripts. In Azure DevOps it is +set to the current build number. In a local environment, we can use a command such as +`uuidgen` so set a different random identifier on each run, ensuring there are +no collisions. diff --git a/docs/getting_started.md b/docs/getting_started.md index b0e16ff5..4ba694d7 100644 --- a/docs/getting_started.md +++ b/docs/getting_started.md @@ -1,207 +1,464 @@ -## Getting Started with this Repo - -### 1. Get the source code -- Either clone the repository to your workspace and create your own repo with the code in GitHub. -- An easier way is to just fork the project, so you have the repository under your username on GitHub itself. - - -### 2. Create Azure DevOps account -We use Azure DevOps for running our build(CI), retraining trigger and release (CD) pipelines. If you don't already have Azure DevOps account, create one by following the instructions [here](https://docs.microsoft.com/en-us/azure/devops/organizations/accounts/create-organization?view=azure-devops) - -If you already have Azure DevOps account, create a [new project](https://docs.microsoft.com/en-us/azure/devops/organizations/projects/create-project?view=azure-devops). - -#### Enable Azure DevOps Preview -The steps below uses the latest DevOps features. Thus, please enable the feature **New YAML pipeline creation experience** by following the instructions [here](https://docs.microsoft.com/en-us/azure/devops/project/navigation/preview-features?view=azure-devops). - -**Note:** Make sure you have the right permissions in Azure DevOps to do so. - -### 3. Create Service Principal to Login to Azure and create resources - -To create service principal, register an application entity in Azure Active Directory (Azure AD) and grant it the Contributor or Owner role of the subscription or the resource group where the web service belongs to. See [how to create service principal](https://docs.microsoft.com/en-us/azure/active-directory/develop/howto-create-service-principal-portal) and assign permissions to manage Azure resource. -Please make note the following values after creating a service principal, we will need them in subsequent steps -- Azure subscription id (subscriptionid) -- Service principal username (spidentity)([application id](https://docs.microsoft.com/en-us/azure/active-directory/develop/howto-create-service-principal-portal#get-application-id-and-authentication-key)) -- Service principal password (spsecret) ([auth_key](https://docs.microsoft.com/en-us/azure/active-directory/develop/howto-create-service-principal-portal#get-application-id-and-authentication-key)) -- Service principal [tenant id](https://docs.microsoft.com/en-us/azure/active-directory/develop/howto-create-service-principal-portal#get-tenant-id) (sptenant) - -**Note:** You must have sufficient permissions to register an application with your Azure AD tenant, and assign the application to a role in your Azure subscription. Contact your subscription adminstator if you don't have the permissions. Normally a subscription admin can create a Service principal and can provide you the details. - - -### 4. Store secret in Key Vault and link it as variable group in Azure DevOps to be used by piplines. -Our pipeline require the following variables to autheticate with Azure. -- spidentity -- spsecret -- sptenant -- subscriptionid - -We noted the value of these variables in previous steps. - -**NOTE:** These values should be treated as secret as they allow access to your subscription. - -We make use of variable group inside Azure DevOps to store variables and their values that we want to make available across multiple pipelines. You can either store the values directly in [Azure DevOps](https://docs.microsoft.com/en-us/azure/devops/pipelines/library/variable-groups?view=azure-devops&tabs=designer#create-a-variable-group) or connect to an Azure Key Vault in your subscription. Please refer to the documentation [here](https://docs.microsoft.com/en-us/azure/devops/pipelines/library/variable-groups?view=azure-devops&tabs=designer#create-a-variable-group) to learn more about how to create a variable group and [link](https://docs.microsoft.com/en-us/azure/devops/pipelines/library/variable-groups?view=azure-devops&tabs=designer#use-a-variable-group) it to your pipeline. - -Please name your variable group **``AzureKeyVaultSecrets``**, we are using this name within our build yaml file. - -Up until now you shouls have -- Forked (or cloned) the repo -- Created a devops account or use an existing one -- Got service principal details and subscription id -- Set them as variable group within devops - -We now have 3 pipelines that we would set up -- **Build Pipeline (azure-pipelines.yml)**: Runs tests and sets up infrastructure -- **Retraining trigger pipeline(/template/retraining-template.json)**: This pipeline triggers Azure ML Pipeline (training/retraining) which trains a new model and publishes model image, if new model performs better -- **Release pipeline(/template/release-template.json)**: This pipeline deploys and tests model image as web service in QA and Prod environment - - - -### 5. Set up Build Pipeline -1. Select your devops organization and project by clicking dev.azure.com -2. Once you are in the right devops project, click Pipelines on the left hand menu and select Builds -3. Click **New pipeline** to create new pipeline - ![new build pipeline](./images/new-build-pipeline1.png) -4. On the Connect option page, select **GitHub** - ![build connnect step](./images/build-connect.png) - -5. On the Select option page, select the GitHub repository where you forked the code. -![select repo](./images/build-selectrepo.png) - -6. Authorize Azure Pipelines to access your git account -![select repo](./images/Install_Azure_pipeline.png) - -7. Since the repository contains azure-pipelines.yml at the root level, Azure DevOps recognizes it and auto imports it. Click **Run** and this will start the build pipeline. -![select repo](./images/build-createpipeline1.png) - -8. Your build run would look similar to the following image -![select repo](./images/build-run.png) - -Great, you now have the build pipeline setup, you can either manually trigger it or it gets automatically triggered everytime there is a change in the master branch. - - -**Note:** The build pipeline will perform basic test on the code and provision infrastructure on azure. This can take around 10 mins to complete. - -### 6. Set up Retraining trigger release pipeline - -**Note:** For setting up release pipelines, first download the [release-pipelines](../release-pipelines) to your local filesystem so you can import it. - -**Also Note:** If this is the first time you are creating a release pipeline, you would see the following option, click on **New Pipeline** -![import release pipeline](./images/release-new-pipeline.png) - -To enable the option to **Import release pipeline**, we must have atleast one release pipeline so let's create one with an empty job. -![import release pipeline](./images/release-empty-job.png) - -On the next screen, click on **Save** and then click **Ok** to save the empty release pipeline. -![import release pipeline](./images/release-save-empty.png) - -**Steps** - -1. Select the Release tab from the menu on the left, then click the New dropdown on top and click on **Import Release pipeline** -![import release pipeline](./images/release-import.png) - -1. On the next screen, navigate to **release-pipelines** folder and select **retrainingtrigger.json** pipeline file, click import. You should now see the following screen. Under Stages click on the Retrain stage, where it shows the red error sign. -![release retraining triggger](./images/release-retrainingtrigger.png) - - Click on agent job and then from the drop down for Agent Pool on the right side select **Hosted Ubuntu 1604** agent to execute your run and click **Save** button on top right. -![release retraining agent](./images/release-retrainingagent.png) - -1. We would now link the variable group we created earlier to this release pipeline. To do so click on the **Variables** tab, then click on **Variable** groups and then select **Link variable group** and select the variable group that we created in previous step and click **Link** followed by **Save** button. -![release retraining artifact](./images/release-link-vg.png) -1. We want the retraining pipeline to be triggered every time build pipeline is complete. To create this dependency, we will link the artifact from build pipeline as a trigger for retraining trigger release pipeline. To do so, click on the **pipeline** tab and then select **Add an artifact** option under Artifacts. -![release pipeline view](./images/release-retrainingpipeline.png) - -1. This will open up a pop up window, on this screen: - - for source type, select **Build** - - for project, select your project in Azure DevOps that you created in previous steps. - - For Source select the source build pipeline. If you have forked the git repo, the build pipeline may named ``yourgitusername.DevOpsForAI`` - - In the Source alias, replace the auto-populated value with - **``DevOpsForAI``** - - Field **Devault version** will get auto populated **Latest**, you can leave them as it is. - - Click on **Add**, and then **Save** the pipeline - ![release retraining artifact](./images/release-retrainingartifact.png) - -1. Artifact is now added for retraining trigger pipeline, hit the **save** button on top right and then click **ok**. - -1. To trigger this pipeline every time build pipeline executes, click on the lighting sign to enable the **Continous Deployment Trigger**, click **Save**. - ![release retraining artifact](./images/release-retrainingtrigger1.png) - -2. If you want to run this pipeline on a schedule, you can set one by clicking on **Schedule set** in Artifacts section. -![release retraining artifact](./images/release-retrainingartifactsuccess.png) - -1. For the first time, we will manually trigger this pipeline. - - Click Releases option on the left hand side and navigate to the release pipeline you just created. - ![release retraining artifact](./images/release-createarelease.png) - - Click **Create Release** - ![release create ](./images/release-create.png) - - On the next screen click on **Create** button, this creates a manual release for you. - - **Note**: This release pipeline will call the published AML pipeline. The AML pipeline will train the model and package it into image. It will take around 10 mins to complete. The next steps need this pipeline to complete successfully. - -### 7. Set up release (Deployment) pipeline - -**Note:** For setting up release pipelines, first download the [release-pipelines](../release-pipelines) to your local filesystem so you can import it. - -**Also Note:** Before creating this pipeline, make sure that the build pipeline, retraining trigger release pipeline and AML retraining pipeline have been executed, as they will be creating resources during their run like docker images that we will deploy as part of this pipeline. So it is important for them to have successful runs before the setup here. - -Let's set up the release deployment pipeline now. -1. As done in previous step, Select the Release tab from the menu on the left, then click the New dropdown on top and click on **Import Release pipeline** -![import release pipeline](./images/release-import.png) - -1. On the next screen, navigate to **release-pipelines** folder and select **releasedeployment.json** pipeline file, click import. You should now see the following screen. Under Stages click on the QA environment's **view stage task", where it shows the red error sign. -![release retraining triggger](./images/release-deployment.png) - - Click on agent job and then from the drop down for Agent Pool on the right side select **Hosted Ubuntu 1604** agent to execute your run and click **Save** button on top right. -![release retraining agent](./images/release-deploymentqaagent.png) - - Follow the same steps for **Prod Environment** and select **Hosted Ubuntu 1604** for agent pool and save the pipeline. - ![release retraining agent](./images/release-deploymentprodagent.png) - -1. We would now link the variable group we created earlier to this release pipeline. To do so click on the **Variables** tab, then click on **Variable** groups and then select **Link variable group** and select the variable group that we created in previous step and click **Link** followed by **Save** button. -![release retraining artifact](./images/release-link-vg.png) - -1. We now need to add artifact that will trigger this pipeline. We will add two artifacts: - - Build pipeline output as artifact since that contains our configuration and code files that we require in this pipeline. - - ACR artifact to trigger this pipeline everytime there is a new image that gets published to Azure container registry (ACR) as part of retraining pipeline. - - Here are the steps to add build output as artifact - - - Click on pipeline tab to go back to pipeline view and click **Add an artifact**. This will open a pop up window - - for source type, select **Build** - - for project, select your project in Azure DevOps that you created in previous steps. - - For Source select the source build pipeline. If you have forked the git repo, the build pipeline may named ``yourgitusername.DevOpsForAI`` - - In the Source alias, replace the auto-populated value with - **``DevOpsForAI``** - - Field **Devault version** will get auto populated **Latest**, you can leave them as it is. - - Click on **Add**, and then **Save** the pipeline - ![release retraining artifact](./images/release-retrainingartifact.png) - - Here are the steps to add ACR as an artifact - - ![release retraining agent](./images/release-deployment-service-conn.png) - - - - Click on pipeline tab to go back to pipeline view and click **Add an artifact**. This will open a pop up window - - For Source type, click on **more artifact types** dropdown and select **Azure Container Registry** - - For **service connection**, select an existing service connection to Azure, if you don't see anything in the dropdown, click on **Manage** and [create new **Azure Resource Manager**](https://docs.microsoft.com/en-us/azure/devops/pipelines/library/service-endpoints?view=azure-devops#create-a-service-connection) service connection for your subscription. - **Note:** You must have sufficient privileges to create a service connection, if not contact your subscription adminstrator. - - For Resource Group, select **DevOps_AzureML_Demo**, this is the default resource group name that we are using and if the previous pipelines executed properly you will see this resource group in the drop down. - - Under Azure container registry dropdown, select the container registry, there should be only one container registry entry. - - For repository, select **diabetes-model-score** repository. - - For Default version, keep it to **latest** - - For Source alias, keep the default generated name. - - Click Add - - Click on lighting sign to enable the **Continous Deployment Trigger**, click Save. - ![release retraining artifact](./images/release-deploymentcitrigger.png) - - -1. We now have QA environment continously deployed each time there is a new image available in container registry. You can select pre-deployment conditions for prod environment, normally you don't want it to be auto deployed, so select manual only trigger here. - - ![release retraining artifact](./images/release-deploymentprodtrigger.png) - - To deploy a release manually, follow the document [here](https://docs.microsoft.com/en-us/azure/devops/pipelines/get-started-designer?view=azure-devops&tabs=new-nav#deploy-a-release) - - -Congratulations, you now have three pipelines set up end to end. - - Build pipeline: triggered on code change to master branch on GitHub. - - Release Trigger pipeline: triggered on build pipeline execution and produces a new model image if better than previous one. - - Release Deployment pipeline: QA environment is auto triggered when there is a new image. - Prod is manual only and user decides when to release to this environment. +# Getting Started with MLOpsPython + +This guide shows how to get MLOpsPython working with a sample ML project **_diabetes_regression_**. The project creates a linear regression model to predict diabetes and has CI/CD DevOps practices enabled for model training and serving when these steps are completed in this getting started guide. + +If you would like to bring your own model code to use this template structure, follow the [custom model](custom_model.md) guide. We recommend completing this getting started guide with the diabetes model through ACI deployment first to ensure everything is working in your environment before converting the template to use your own model code. + +- [Setting up Azure DevOps](#setting-up-azure-devops) + - [Install the Azure Machine Learning extension](#install-the-azure-machine-learning-extension) +- [Get the code](#get-the-code) +- [Create a Variable Group for your Pipeline](#create-a-variable-group-for-your-pipeline) + - [Variable Descriptions](#variable-descriptions) +- [Provisioning resources using Azure Pipelines](#provisioning-resources-using-azure-pipelines) + - [Create an Azure DevOps Service Connection for the Azure Resource Manager](#create-an-azure-devops-service-connection-for-the-azure-resource-manager) + - [Create the IaC Pipeline](#create-the-iac-pipeline) +- [Create an Azure DevOps Service Connection for the Azure ML Workspace](#create-an-azure-devops-service-connection-for-the-azure-ml-workspace) +- [Set up Build, Release Trigger, and Release Multi-Stage Pipeline](#set-up-build-release-trigger-and-release-multi-stage-pipelines) + - [Set up the Model CI Training, Evaluation, and Registration Pipeline](#set-up-the-model-ci-training-evaluation-and-registration-pipeline) + - [Set up the Release Deployment and/or Batch Scoring Pipelines](#set-up-the-release-deployment-andor-batch-scoring-pipelines) +- [Further Exploration](#further-exploration) + - [Deploy the model to Azure Kubernetes Service](#deploy-the-model-to-azure-kubernetes-service) + - [Web Service Authentication on Azure Kubernetes Service](#web-service-authentication-on-azure-kubernetes-service) + - [Deploy the model to Azure App Service (Azure Web App for containers)](#deploy-the-model-to-azure-app-service-azure-web-app-for-containers) + - [Example pipelines using R](#example-pipelines-using-r) + - [Observability and Monitoring](#observability-and-monitoring) + - [Clean up the example resources](#clean-up-the-example-resources) +- [Next Steps: Integrating your project](#next-steps-integrating-your-project) + - [Additional Variables and Configuration](#additional-variables-and-configuration) + - [More variable options](#more-variable-options) + - [Local configuration](#local-configuration) + +## Setting up Azure DevOps + +You'll use Azure DevOps for running the multi-stage pipeline with build, model training, and scoring service release stages. If you don't already have an Azure DevOps organization, create one by following the instructions at [Quickstart: Create an organization or project collection](https://docs.microsoft.com/en-us/azure/devops/organizations/accounts/create-organization?view=azure-devops). + +If you already have an Azure DevOps organization, create a new project using the guide at [Create a project in Azure DevOps and TFS](https://docs.microsoft.com/en-us/azure/devops/organizations/projects/create-project?view=azure-devops). + +### Install the Azure Machine Learning extension + +Install the **Azure Machine Learning** extension to your Azure DevOps organization from the [Visual Studio Marketplace](https://marketplace.visualstudio.com/items?itemName=ms-air-aiagility.vss-services-azureml) by clicking "Get it free" and following the steps. The UI will tell you if try to add it and it's already installed. + +This extension contains the Azure ML pipeline tasks and adds the ability to create Azure ML Workspace service connections. The documentation page on the marketplace includes detailed instructions with screenshots on what capabilities it includes. + +## Get the code + +We recommend using the [repository template](https://github.com/microsoft/MLOpsPython/generate), which effectively forks this repository to your own GitHub location and squashes the history. You can use the resulting repository for this guide and for your own experimentation. + +## Create a Variable Group for your Pipeline + +MLOpsPython requires some variables to be set before you can run any pipelines. You'll need to create a _variable group_ in Azure DevOps to store values that are reused across multiple pipelines or pipeline stages. Either store the values directly in [Azure DevOps](https://docs.microsoft.com/en-us/azure/devops/pipelines/library/variable-groups?view=azure-devops&tabs=designer#create-a-variable-group) or connect to an Azure Key Vault in your subscription. Check out the [Add & use variable groups](https://docs.microsoft.com/en-us/azure/devops/pipelines/library/variable-groups?view=azure-devops&tabs=yaml#use-a-variable-group) documentation to learn more about how to create a variable group and link it to your pipeline. + +Navigate to **Library** in the **Pipelines** section as indicated below: + +![Library Variable Groups](./images/library_variable_groups.png) + +Create a variable group named **`devopsforai-aml-vg`**. The YAML pipeline definitions in this repository refer to this variable group by name. + +The variable group should contain the following required variables. **Azure resources that don't exist yet will be created in the [Provisioning resources using Azure Pipelines](#provisioning-resources-using-azure-pipelines) step below.** + +| Variable Name | Suggested Value | Short description | +| ------------------------ | ------------------------- | --------------------------------------------------------------------------------------------------------------------------- | +| BASE_NAME | [your project name] | Unique naming prefix for created resources - max 10 chars, letters and numbers only | +| LOCATION | centralus | [Azure location](https://azure.microsoft.com/en-us/global-infrastructure/locations/), no spaces. You can list all the region codes by running `az account list-locations -o table` in the Azure CLI | +| RESOURCE_GROUP | mlops-RG | Azure Resource Group name | +| WORKSPACE_NAME | mlops-AML-WS | Azure ML Workspace name | +| AZURE_RM_SVC_CONNECTION | azure-resource-connection | [Azure Resource Manager Service Connection](#create-an-azure-devops-service-connection-for-the-azure-resource-manager) name | +| WORKSPACE_SVC_CONNECTION | aml-workspace-connection | [Azure ML Workspace Service Connection](#create-an-azure-devops-azure-ml-workspace-service-connection) name | +| ACI_DEPLOYMENT_NAME | mlops-aci | [Azure Container Instances](https://azure.microsoft.com/en-us/services/container-instances/) name | | + +Make sure you select the **Allow access to all pipelines** checkbox in the variable group configuration. To do this, first **Save** the variable group, then click **Pipeline Permissions**, then the button with 3 vertical dots, and then **Open access** button. + +More variables are available for further tweaking, but the above variables are all you need to get started with this example. For more information, see the [Additional Variables and Configuration](#additional-variables-and-configuration) section. + +### Variable Descriptions + +**BASE_NAME** is used as a prefix for naming Azure resources and should be unique. When sharing an Azure subscription, the prefix allows you to avoid naming collisions for resources that require unique names, for example, Azure Blob Storage and Registry DNS. Make sure to set BASE_NAME to a unique name so that created resources will have unique names, for example, MyUniqueMLamlcr, MyUniqueML-AML-KV, and so on. The length of the BASE_NAME value shouldn't exceed 10 characters and must contain letters and numbers only. + +**LOCATION** is the name of the [Azure location](https://azure.microsoft.com/en-us/global-infrastructure/locations/) for your resources. There should be no spaces in the name. For example, central, westus, northeurope. You can list all the region codes by running `az account list-locations -o table` in the Azure CLI. + +**RESOURCE_GROUP** is used as the name for the resource group that will hold the Azure resources for the solution. If providing an existing Azure ML Workspace, set this value to the corresponding resource group name. + +**WORKSPACE_NAME** is used for creating the Azure Machine Learning Workspace. *While you should be able to provide an existing Azure ML Workspace if you have one, you will run into problems if this has been provisioned manually and the naming of the associated storage account doesn't follow the convention followed in this repo -- as the environment provisioning will try to associate it with a new Storage Account and this is not supported. To avoid these problems, specify a new workspace/unique name.* + +**AZURE_RM_SVC_CONNECTION** is used by the [Azure Pipeline](../environment_setup/iac-create-environment-pipeline.yml) in Azure DevOps that creates the Azure ML workspace and associated resources through Azure Resource Manager. You'll create the connection in a [step below](#create-an-azure-devops-service-connection-for-the-azure-resource-manager). + +**WORKSPACE_SVC_CONNECTION** is used to reference a [service connection for the Azure ML workspace](#create-an-azure-devops-azure-ml-workspace-service-connection). You'll create the connection after [provisioning the workspace](#provisioning-resources-using-azure-pipelines) in the [Create an Azure DevOps Service Connection for the Azure ML Workspace](#create-an-azure-devops-service-connection-for-the-azure-ml-workspace) section below. + +**ACI_DEPLOYMENT_NAME** is used for naming the scoring service during deployment to [Azure Container Instances](https://azure.microsoft.com/en-us/services/container-instances/). + + +## Provisioning resources using Azure Pipelines + +The easiest way to create all required Azure resources (Resource Group, Azure ML Workspace, Container Registry, and others) is to use the **Infrastructure as Code (IaC)** [pipeline with ARM templates](../environment_setup/iac-create-environment-pipeline-arm.yml) or the [pipeline with Terraform templates](../environment_setup/iac-create-environment-pipeline-tf.yml). The pipeline takes care of setting up all required resources based on these [Azure Resource Manager templates](../environment_setup/arm-templates/cloud-environment.json), or based on these [Terraform templates](../environment_setup/tf-templates). + +**Note:** Since Azure Blob storage account required for batch scoring is optional, the resource provisioning pipelines mentioned above do not create this resource automatically, and manual creation is required before use. + +### Create an Azure DevOps Service Connection for the Azure Resource Manager + +The [IaC provisioning pipeline](../environment_setup/iac-create-environment-pipeline.yml) requires an **Azure Resource Manager** [service connection](https://docs.microsoft.com/en-us/azure/devops/pipelines/library/service-endpoints?view=azure-devops&tabs=yaml#create-a-service-connection). To create one, in Azure DevOps select **Project Settings**, then **Service Connections**, and create a new one, where: + +- Type is **Azure Resource Manager** +- Authentication method is **Service principal (automatic)** +- Scope level is **Subscription** +- Leave **`Resource Group`** empty after selecting your subscription in the dropdown +- Use the same **`Service Connection Name`** that you used in the variable group you created +- Select **Grant access permission to all pipelines** + +![Create service connection](./images/create-rm-service-connection.png) + +**Note:** Creating the Azure Resource Manager service connection scope requires 'Owner' or 'User Access Administrator' permissions on the subscription. +You'll also need sufficient permissions to register an application with your Azure AD tenant, or you can get the ID and secret of a service principal from your Azure AD Administrator. That principal must have 'Contributor' permissions on the subscription. + +### Create the IaC Pipeline + +In your Azure DevOps project, create a build pipeline from your forked repository: + +![Build connect step](./images/build-connect.png) + +If you are using GitHub, after picking the option above, you'll be asked to authorize to GitHub and select the repo you forked. Then you'll have to select your forked repository on GitHub under the **Repository Access** section, and click **Approve and Install**. + +After the above, and when you're redirected back to Azure DevOps, select the **Existing Azure Pipelines YAML file** option and set the path to [/environment_setup/iac-create-environment-pipeline-arm.yml](../environment_setup/iac-create-environment-pipeline-arm.yml) or to [/environment_setup/iac-create-environment-pipeline-tf.yml](../environment_setup/iac-create-environment-pipeline-tf.yml), depending on if you want to deploy your infrastructure using ARM templates or Terraform: + +![Configure step](./images/select-iac-pipeline.png) + +If you decide to use Terraform, make sure the ['Terraform Build & Release Tasks' from Charles Zipp](https://marketplace.visualstudio.com/items?itemName=charleszipp.azure-pipelines-tasks-terraform) is installed. + +Having done that, run the pipeline: + +![IaC run](./images/run-iac-pipeline.png) + +Check that the newly created resources appear in the [Azure Portal](https://portal.azure.com): + +![Created resources](./images/created-resources.png) + +**Note**: If you have other errors, one good thing to check is what you used in the variable names. If you end up running the pipeline multiple times, you may also run into errors and need to delete the Azure services and re-run the pipeline -- this should include a resource group, a KeyVault, a Storage Account, a Container Registry, an Application Insights and a Machine Learning workspace. + +## Create an Azure DevOps Service Connection for the Azure ML Workspace + +At this point, you should have an Azure ML Workspace created. Similar to the Azure Resource Manager service connection, you need to create an additional one for the Azure ML Workspace. + +Create a new service connection to your Azure ML Workspace using the [Machine Learning Extension](https://marketplace.visualstudio.com/items?itemName=ms-air-aiagility.vss-services-azureml) instructions to enable executing the Azure ML training pipeline. The connection name needs to match `WORKSPACE_SVC_CONNECTION` that you set in the variable group above (e.g., 'aml-workspace-connection'). + +![Created resources](./images/ml-ws-svc-connection.png) + +**Note:** Similar to the Azure Resource Manager service connection you created earlier, creating a service connection with Azure Machine Learning workspace scope requires 'Owner' or 'User Access Administrator' permissions on the Workspace. +You'll need sufficient permissions to register an application with your Azure AD tenant, or you can get the ID and secret of a service principal from your Azure AD Administrator. That principal must have Contributor permissions on the Azure ML Workspace. + +## Set up Build, Release Trigger, and Release Multi-Stage Pipelines + +Now that you've provisioned all the required Azure resources and service connections, you can set up the pipelines for training (Continuous Integration - **CI**) and deploying (Continuous Deployment - **CD**) your machine learning model to production. Additionally, you can set up a pipeline for batch scoring. + +1. **Model CI, training, evaluation, and registration** - triggered on code changes to master branch on GitHub. Runs linting, unit tests, code coverage, and publishes and runs the training pipeline. If a new model is registered after evaluation, it creates a build artifact containing the JSON metadata of the model. Definition: [diabetes_regression-ci.yml](../.pipelines/diabetes_regression-ci.yml). +1. **Release deployment** - consumes the artifact of the previous pipeline and deploys a model to either [Azure Container Instances (ACI)](https://azure.microsoft.com/en-us/services/container-instances/), [Azure Kubernetes Service (AKS)](https://azure.microsoft.com/en-us/services/kubernetes-service), or [Azure App Service](https://docs.microsoft.com/en-us/azure/machine-learning/service/how-to-deploy-app-service) environments. See [Further Exploration](#further-exploration) for other deployment types. Definition: [diabetes_regression-cd.yml](../.pipelines/diabetes_regression-cd.yml). + 1. **Note:** Edit the pipeline definition to remove unused stages. For example, if you're deploying to Azure Container Instances and Azure Kubernetes Service only, you'll need to delete the unused `Deploy_Webapp` stage. +1. **Batch Scoring Code Continuous Integration** - consumes the artifact of the model training pipeline. Runs linting, unit tests, code coverage, publishes a batch scoring pipeline, and invokes the published batch scoring pipeline to score a model. + +These pipelines use a Docker container on the Azure Pipelines agents to accomplish the pipeline steps. The container image ***mcr.microsoft.com/mlops/python:latest*** is built with [this Dockerfile](../environment_setup/Dockerfile) and has all the necessary dependencies installed for MLOpsPython and ***diabetes_regression***. This image is an example of a custom Docker image with a pre-baked environment. The environment is guaranteed to be the same on any building agent, VM, or local machine. **In your project, you'll want to build your own Docker image that only contains the dependencies and tools required for your use case. Your image will probably be smaller and faster, and it will be maintained by your team.** + +### Set up the Model CI, training, evaluation, and registration pipeline + +In your Azure DevOps project, create and run a new build pipeline based on the [./pipelines/diabetes_regression-ci.yml](../.pipelines/diabetes_regression-ci.yml) +pipeline definition in your forked repository. + +If you plan to use the release deployment pipeline (in the next section), you will need to rename this pipeline to `Model-Train-Register-CI`. + +**Note**: *To rename your pipeline, after you saved it, click **Pipelines** on the left menu on Azure DevOps, then **All** to see all the pipelines, then click the menu with the 3 vertical dots that appears when you hover the name of the new pipeline, and click it to pick **"Rename/move pipeline"**.* + +Start a run of the pipeline if you haven't already, and once the pipeline is finished, check the execution result. Note that the run can take 20 minutes, with time mostly spent in **Trigger ML Training Pipeline > Invoke ML Pipeline** step. You can track the execution of the AML pipeline by opening the AML Workspace user interface. Screenshots are below: + +![Build](./images/model-train-register.png) + +And the pipeline artifacts: + +![Build](./images/model-train-register-artifacts.png) + +Also check the published training pipeline in your newly created AML workspace in [Azure Machine Learning Studio](https://ml.azure.com/): + +![Training pipeline](./images/training-pipeline.png) + +Great, you now have the build pipeline for training set up which automatically triggers every time there's a change in the master branch! + +After the pipeline is finished, you'll also see a new model in the **AML Workspace** model registry section: + +![Trained model](./images/trained-model.png) + +To disable the automatic trigger of the training pipeline, change the `auto-trigger-training` variable as listed in the `.pipelines\diabetes_regression-ci.yml` pipeline to `false`. You can also override the variable at runtime execution of the pipeline. + +The pipeline stages are summarized below: + +#### Model CI + +- Linting (code quality analysis) +- Unit tests and code coverage analysis +- Build and publish _ML Training Pipeline_ in an _ML Workspace_ + +#### Train model + +- Determine the ID of the _ML Training Pipeline_ published in the previous stage. +- Trigger the _ML Training Pipeline_ and waits for it to complete. + - This is an **agentless** job. The CI pipeline can wait for ML pipeline completion for hours or even days without using agent resources. +- Determine if a new model was registered by the _ML Training Pipeline_. + - If the model evaluation step of the AML Pipeline determines that the new model doesn't perform any better than the previous one, the new model won't register and the _ML Training Pipeline_ will be **canceled**. In this case, you'll see a message in the 'Train Model' job under the 'Determine if evaluation succeeded and new model is registered' step saying '**Model was not registered for this run.**' + - See [evaluate_model.py](../diabetes_regression/evaluate/evaluate_model.py#L118) for the evaluation logic. This is a simplified test that just looks at MSE to decide whether or not to register a new model. A more realistic verification would also do some error analysis and verify the inferences/error distribution against a test dataset, for example. + - **Note**: *while it's possible to do an Evaluation Step as part of the ADO pipeline, this evaluation is logically part of the work done by Data Scientists, and as such the recommendation is that this step is done as part of the AML Pipeline and not ADO pipelines.* + - [Additional Variables and Configuration](#additional-variables-and-configuration) for configuring this and other behavior. + +#### Create pipeline artifact + +- Get the info about the registered model +- Create an Azure DevOps pipeline artifact called `model` that contains a `model.json` file containing the model information, for example: + +```json +{ "createdTime": "2021-12-14T13:03:24.494748+00:00", "framework": "Custom", "frameworkVersion": null, "id": "diabetes_regression_model.pkl:1", "name": "diabetes_regression_model.pkl", "version": 1 } +``` + +- Here's [more information on Azure DevOps Artifacts](https://docs.microsoft.com/en-us/azure/devops/pipelines/artifacts/build-artifacts?view=azure-devops&tabs=yaml#explore-download-and-deploy-your-artifacts) and where to find them on the ADO user interface. + +### Set up the Release Deployment and/or Batch Scoring pipelines + +--- +**PRE-REQUISITES** + +In order to use these pipelines: + +1. Follow the steps to set up the Model CI, training, evaluation, and registration pipeline. +1. You **must** rename your model CI/train/eval/register pipeline to `Model-Train-Register-CI`. + +These pipelines rely on the model CI pipeline and reference it by name. + +If you would like to change the name of your model CI pipeline, you must edit this section of yml for the CD and batch scoring pipeline, where it says `source: Model-Train-Register-CI` to use your own name. +``` +trigger: none +resources: + containers: + - container: mlops + image: mcr.microsoft.com/mlops/python:latest + pipelines: + - pipeline: model-train-ci + source: Model-Train-Register-CI # Name of the triggering pipeline + trigger: + branches: + include: + - master +``` + +--- + +The release deployment and batch scoring pipelines have the following behaviors: + +- The pipeline will **automatically trigger** on completion of the `Model-Train-Register-CI` pipeline for the master branch. +- The pipeline will default to using the latest successful build of the `Model-Train-Register-CI` pipeline. It will deploy the model produced by that build. +- You can specify a `Model-Train-Register-CI` build ID when running the pipeline manually. You can find this in the url of the build, and the model registered from that build will also be tagged with the build ID. This is useful to skip model training and registration, and deploy/score a model successfully registered by a `Model-Train-Register-CI` build. + - For example, if you navigate to a specific run of your CI pipeline, the URL should be something like `https://dev.azure.com/yourOrgName/yourProjectName/_build/results?buildId=653&view=results`. **653** is the build ID in this case. See the second screenshot below to verify where this number would be used. + +### Set up the Release Deployment pipeline + +In your Azure DevOps project, create and run a new **build** pipeline based on the [./pipelines/diabetes_regression-cd.yml](../.pipelines/diabetes_regression-cd.yml) +pipeline definition in your forked repository. It is recommended you rename this pipeline to something like `Model-Deploy-CD` for clarity. + +**Note**: *While Azure DevOps supports both Build and Release pipelines, when using YAML you don't usually need to use Release pipelines. This repository assumes the usage only of Build pipelines.* + +Your first run will use the latest model created by the `Model-Train-Register-CI` pipeline. + +Once the pipeline is finished, check the execution result: + +![Build](./images/model-deploy-result.png) + +To specify a particular build's model, set the `Model Train CI Build Id` parameter to the build ID you would like to use: + +![Build](./images/model-deploy-configure.png) + +Once your pipeline run begins, you can see the model name and version downloaded from the `Model-Train-Register-CI` pipeline. The run time will typically be 5-10 minutes. + +![Build](./images/model-deploy-get-artifact-logs.png) + +The pipeline has the following stage: + +#### Deploy to ACI + +- Deploy the model to the QA environment in [Azure Container Instances](https://azure.microsoft.com/en-us/services/container-instances/). +- Smoke test + - The test sends a sample query to the scoring web service and verifies that it returns the expected response. Have a look at the [smoke test code](../ml_service/util/smoke_test_scoring_service.py) for an example. + +- You can verify that an ACI instance was created in the same resource group you specified: + +![Created Resouces ](./images/aci-in-azure-portal.png) + +### Set up the Batch Scoring pipeline + +In your Azure DevOps project, create and run a new build pipeline based on the [.pipelines/diabetes_regression-batchscoring-ci.yml](../.pipelines/diabetes_regression-batchscoring-ci.yml) +pipeline definition in your forked repository. Rename this pipeline to `Batch-Scoring`. + +Once the pipeline is finished, check the execution result: + +![Build](./images/batchscoring-ci-result.png) + +Also check the published batch scoring pipeline in your AML workspace in the [Azure Portal](https://portal.azure.com/): + +![Batch scoring pipeline](./images/batchscoring-pipeline.png) + +Great, you now have the build pipeline set up for batch scoring which automatically triggers every time there's a change in the master branch! + +The pipeline stages are described below in detail -- and you must do further configurations to actually see the batch inferences: + +#### Batch Scoring CI + +- Linting (code quality analysis) +- Unit tests and code coverage analysis +- Build and publish *ML Batch Scoring Pipeline* in an *AML Workspace* + +#### Batch Score model + +- Determine the model to be used based on the model name (required), model version, model tag name and model tag value bound pipeline parameters. + - If run via Azure DevOps pipeline, the batch scoring pipeline will take the model name and version from the `Model-Train-Register-CI` build used as input. + - If run locally without the model version, the batch scoring pipeline will use the model's latest version. +- Trigger the *ML Batch Scoring Pipeline* and wait for it to complete. + - This is an **agentless** job. The CI pipeline can wait for ML pipeline completion for hours or even days without using agent resources. +- Create an Azure ML pipeline with two steps. The pipeline is created by the code in `ml_service\pipelines\diabetes_regression_build_parallel_batchscore_pipeline.py` and has two steps: + - `scoringstep` - this step is a **`ParallelRunStep`** that executes the code in `diabetes_regression\scoring\parallel_batchscore.py` with several different batches of the data to be scored. + - `scorecopystep` - this is a **`PythonScriptStep`** step that copies the output inferences from Azure ML's internal storage into a target location in a another storage account. + - If you run the instructions as defined above with no changes to variables, this step will be **not** executed. You'll see a message in the logs for the corresponding step saying `Missing Parameters`. In this case, you'll be able to find the file with the inferences in the same Storage Account associated with Azure ML, in a location similar to `azureml-blobstore-SomeGuid\azureml\SomeOtherGuid\defaultoutput\parallel_run_step.txt`. One way to find the right path is this: + - Open your experiment in Azure ML (by default called `mlopspython`). + - Open the run that you want to look at (named something like `neat_morning_qc10dzjy` or similar). + - In the graphical pipeline view with 2 steps, click the button to open the details tab: `Show run overview`. + - You'll see two steps (corresponding to `scoringstep`and `scorecopystep` as described above). + - Click the step with the with older "Submitted time". + - Click "Output + logs" at the top, and you'll see something like the following: + ![Outputs of `scoringstep`](./images/batch-child-run-scoringstep.png) + - The `defaultoutput` file will have JSON content with the path to a file called `parallel_run_step.txt` containing the scoring. + +To properly configure this step for your own custom scoring data, you must follow the instructions in [Configure Custom Batch Scoring](custom_model.md#Configure-Custom-Batch-Scoring), which let you specify both the location of the files to score (via the `SCORING_DATASTORE_INPUT_*` configuration variables) and where to store the inferences (via the `SCORING_DATASTORE_OUTPUT_*` configuration variables). + +## Further Exploration + +You should now have a working set of pipelines that can get you started with MLOpsPython. Below are some additional features offered that might suit your scenario. + +### Deploy the model to Azure Kubernetes Service + +MLOpsPython also can deploy to [Azure Kubernetes Service](https://azure.microsoft.com/en-us/services/kubernetes-service). + +Creating a cluster on Azure Kubernetes Service is out of scope of this tutorial, but you can find set up information on the [Quickstart: Deploy an Azure Kubernetes Service (AKS) cluster using the Azure portal](https://docs.microsoft.com/en-us/azure/aks/kubernetes-walkthrough-portal#create-an-aks-cluster) page. + +> **_Note_** +> +> If your target deployment environment is a Kubernetes cluster and you want to implement Canary and/or A/B testing deployment strategies, check out this [tutorial](./canary_ab_deployment.md). + +Keep the Azure Container Instances deployment active because it's a lightweight way to validate changes before deploying to Azure Kubernetes Service. + +In the Variables tab, edit your variable group (`devopsforai-aml-vg`). In the variable group definition, add these variables: + +| Variable Name | Suggested Value | Description | +| ------------------- | --------------- | ----------- | +| AKS_COMPUTE_NAME | aks | The Compute name of the inference cluster, created in the Azure ML Workspace (ml.azure.com). This connection has to be created manually before setting the value! | +| AKS_DEPLOYMENT_NAME | mlops-aks | The name of the deployed aks cluster in your subscripttion. | + +After successfully deploying to Azure Container Instances, the next stage will deploy the model to Kubernetes and run a smoke test. + +Set **AKS_COMPUTE_NAME** to the _Compute name_ of the Inference Cluster that references the Azure Kubernetes Service cluster in your Azure ML Workspace. + +![build](./images/multi-stage-aci-aks.png) + +Consider enabling [manual approvals](https://docs.microsoft.com/en-us/azure/devops/pipelines/process/approvals) before the deployment stages. + +#### Web Service Authentication on Azure Kubernetes Service + +When deploying to Azure Kubernetes Service, key-based authentication is enabled by default. You can also enable token-based authentication. Token-based authentication requires clients to use an Azure Active Directory account to request an authentication token, which is used to make requests to the deployed service. For more details on how to authenticate with ML web service deployed on the AKS service please follow [Smoke Test](../ml_service/util/smoke_test_scoring_service.py) or the Azure documentation on [web service authentication](https://docs.microsoft.com/en-us/azure/machine-learning/how-to-deploy-azure-kubernetes-service#web-service-authentication). + +### Deploy the model to Azure App Service (Azure Web App for containers) + +If you want to deploy your scoring service as an [Azure App Service](https://docs.microsoft.com/en-us/azure/machine-learning/service/how-to-deploy-app-service) instead of Azure Container Instances or Azure Kubernetes Service, follow these additional steps. + +- First, you'll need to create an App Service Plan using Linux. The simplest way is to run this from your Azure CLI: `az appservice plan create --name nameOfAppServicePlan --resource-group nameOfYourResourceGroup --sku B1 --is-linux`. + +- Second, you'll need to create a webapp in this App Service Plan, and configure it to run a certain container. As currently there is no UI in the Azure Portal to do this, this has to be done from the command line. We'll come back to this. + +- In the Variables tab, edit your variable group (`devopsforai-aml-vg`) and add a variable: + + | Variable Name | Suggested Value | + | ---------------------- | ---------------------- | + | WEBAPP_DEPLOYMENT_NAME | _name of your web app_ | + + Set **WEBAPP_DEPLOYMENT_NAME** to the name of your Azure Web App. You have not yet created this webapp, so just use the name you're planning on giving it. + +- Delete the **ACI_DEPLOYMENT_NAME** or any AKS-related variable. + +- Next, you'll need to run your `Model-Deploy-CD` pipeline + + - The pipeline uses the [Azure ML CLI](../.pipelines/diabetes_regression-package-model-template.yml) to create a scoring image. The image will be registered under an Azure Container Registry instance that belongs to the Azure Machine Learning Service. Any dependencies that the scoring file depends on can also be packaged with the container with an image config. Learn more about how to create a container using the Azure ML SDK with the [Image class](https://docs.microsoft.com/en-us/python/api/azureml-core/azureml.core.image.image.image?view=azure-ml-py#create-workspace--name--models--image-config-) API documentation. + + - This pipeline will **fail** on the `Azure Web App on Container Deploy` step, with an error saying the webapp doesn't exist yet. This is expected. Go to the next step. + +- If you want to confirm that the scoring image has been created, open the Azure Container Registry mentioned above, which will be in the Resource Group of the Azure ML workspace, and look for the repositories. You'll have one that was created by the pipeline, called `package`, which was created by the CD pipeline: + + ![Azure Container Registry repository list](./images/container-registry-webapp-image.png) + +- Notedown the name of the Login Server of your Azure Container Registry. It'll be something like `YourAcrName.azurecr.io`. + +- Going back to the Step Two, now you can create a Web App in you App Service Plan using this scoring image but with the `latest` tag. The easiest way to do this is to run this in the Azure CLI: `az webapp create --resource-group yourResourceGroup --plan nameOfAppServicePlan --name nameOfWebApp --deployment-container-image-name YourAcrName.azurecr.io/package:latest` + - Here, `nameOfWebApp` is the same you put in your Azure DevOps `WEBAPP_DEPLOYMENT_NAME` variable. + +From now on, whenever you run the CD pipeline, it will update the image in the container registry and it'll automatically update the one used in the WebApp. CD pipeline runs will now succeed. + +![build](./images/ADO-CD-pipeline-to-webapp.png) + +To confirm, you can open the App Service Plan, open your new WebApp, and open the **Deployment Center**, where you'll see something like: + +![WebApp Deployment Center page](./images/appservice-webapp-deploymentcenter.png) + +If you run into problems, you may have to make sure your webapp has the credentials to pull the image from the Azure Container Registry created by the Infrastructure as Code pipeline. Instructions can be found on the [Configure registry credentials in web app](https://docs.microsoft.com/en-us/azure/devops/pipelines/targets/webapp-on-container-linux?view=azure-devops&tabs=dotnet-core%2Cyaml#configure-registry-credentials-in-web-app) page. + +### Example pipelines using R + +The build pipeline also supports building and publishing Azure ML pipelines using R to train a model. You can enable it by changing the `build-train-script` pipeline variable to either of the following values: + +- `diabetes_regression_build_train_pipeline_with_r.py` to train a model with R on Azure ML Compute. You'll also need to uncomment (include) the `r-essentials` Conda packages in the environment definition YAML `diabetes_regression/conda_dependencies.yml`. +- `diabetes_regression_build_train_pipeline_with_r_on_dbricks.py` to train a model with R on Databricks. You'll need to manually create a Databricks cluster and attach it to the Azure ML Workspace as a compute resource. Set the DB_CLUSTER_ID and DATABRICKS_COMPUTE_NAME variables in your variable group. + +Example ML pipelines using R have a single step to train a model. They don't demonstrate how to evaluate and register a model. The evaluation and registering techniques are shown only in the Python implementation. + +### Observability and Monitoring + +You can explore aspects of model observability in the solution, such as: + +- **Logging**: Navigate to the Application Insights instance linked to the Azure ML Portal, then go to the Logs (Analytics) pane. The following sample query correlates HTTP requests with custom logs generated in `score.py`. This can be used, for example, to analyze query duration vs. scoring batch size: + + ```sql + let Traceinfo=traces + | extend d=parse_json(tostring(customDimensions.Content)) + | project workspace=customDimensions.["Workspace Name"], + service=customDimensions.["Service Name"], + NumberOfPredictions=tostring(d.NumberOfPredictions), + id=tostring(d.RequestId), + TraceParent=tostring(d.TraceParent); + requests + | project timestamp, id, success, resultCode, duration + | join kind=fullouter Traceinfo on id + | project-away id1 + ``` + +- **Distributed tracing**: The smoke test client code sets an HTTP `traceparent` header (per the [W3C Trace Context proposed specification](https://www.w3.org/TR/trace-context-1)), and the `score.py` code logs the header. The query above shows how to surface this value. You can adapt it to your tracing framework. +- **Monitoring**: You can use [Azure Monitor for containers](https://docs.microsoft.com/en-us/azure/azure-monitor/insights/container-insights-overview) to monitor the Azure ML scoring containers' performance. + +### Clean up the example resources + +To remove the resources created for this project, use the [/environment_setup/iac-remove-environment-pipeline.yml](../environment_setup/iac-remove-environment-pipeline.yml) definition or you can just delete the resource group in the [Azure Portal](https://portal.azure.com). + +## Next Steps: Integrating your project + +- The [custom model](custom_model.md) guide includes information on bringing your own code to this repository template. +- We recommend using a [custom container](custom_model.md#customize-the-build-agent-environment) to manage your pipeline environment and dependencies. The container provided with the getting started guide may not be suitable or up to date with your project needs. +- Consider using [Azure Pipelines self-hosted agents](https://docs.microsoft.com/en-us/azure/devops/pipelines/agents/agents?view=azure-devops&tabs=browser#install) to speed up your Azure ML pipeline execution. The Docker container image for the Azure ML pipeline is sizable, and having it cached on the agent between runs can trim several minutes from your runs. Additionally, for secure deployments of Azure Machine Learning, you'll probably need to have a self-hosted agent in a Virtual Network. + +### Additional Variables and Configuration + +#### More variable options + +There are more variables used in the project. They're defined in two places: one for local execution and one for using Azure DevOps Pipelines. + +For using Azure Pipelines, all other variables are stored in the file `.pipelines/diabetes_regression-variables-template.yml`. Using the default values as a starting point, adjust the variables to suit your requirements. + +In the `diabetes_regression` folder, you'll also find the `parameters.json` file that we recommend using to provide parameters for training, evaluation, and scoring scripts. The sample parameter that `diabetes_regression` uses is the ridge regression [_alpha_ hyperparameter](https://scikit-learn.org/stable/modules/generated/sklearn.linear_model.Ridge.html). We don't provide any serializers for this config file. + +#### Local configuration + +For instructions on how to set up a local development environment, refer to the [Development environment setup instructions](development_setup.md). diff --git a/docs/images/ADO-CD-pipeline-to-webapp.png b/docs/images/ADO-CD-pipeline-to-webapp.png new file mode 100644 index 00000000..aac8c9ee Binary files /dev/null and b/docs/images/ADO-CD-pipeline-to-webapp.png differ diff --git a/docs/images/Architecture_DevOps_AI.png b/docs/images/Architecture_DevOps_AI.png deleted file mode 100644 index c23bb80f..00000000 Binary files a/docs/images/Architecture_DevOps_AI.png and /dev/null differ diff --git a/docs/images/EditPipeline1.png b/docs/images/EditPipeline1.png deleted file mode 100644 index b2e60c60..00000000 Binary files a/docs/images/EditPipeline1.png and /dev/null differ diff --git a/docs/images/EditPipeline2.png b/docs/images/EditPipeline2.png deleted file mode 100644 index df91ad2d..00000000 Binary files a/docs/images/EditPipeline2.png and /dev/null differ diff --git a/docs/images/EditPipeline3.png b/docs/images/EditPipeline3.png deleted file mode 100644 index 47a114ae..00000000 Binary files a/docs/images/EditPipeline3.png and /dev/null differ diff --git a/docs/images/EditPipeline4.png b/docs/images/EditPipeline4.png deleted file mode 100644 index e90ddd76..00000000 Binary files a/docs/images/EditPipeline4.png and /dev/null differ diff --git a/docs/images/EditPipeline5.png b/docs/images/EditPipeline5.png deleted file mode 100644 index e5f77898..00000000 Binary files a/docs/images/EditPipeline5.png and /dev/null differ diff --git a/docs/images/EditPipeline6.png b/docs/images/EditPipeline6.png deleted file mode 100644 index bdcf6ab2..00000000 Binary files a/docs/images/EditPipeline6.png and /dev/null differ diff --git a/docs/images/EditPipeline7.png b/docs/images/EditPipeline7.png deleted file mode 100644 index aff974bd..00000000 Binary files a/docs/images/EditPipeline7.png and /dev/null differ diff --git a/docs/images/EditPipeline8.png b/docs/images/EditPipeline8.png deleted file mode 100644 index 396dc084..00000000 Binary files a/docs/images/EditPipeline8.png and /dev/null differ diff --git a/docs/images/Install_Azure_Pipeline.png b/docs/images/Install_Azure_Pipeline.png deleted file mode 100644 index cd1de310..00000000 Binary files a/docs/images/Install_Azure_Pipeline.png and /dev/null differ diff --git a/docs/images/aci-in-azure-portal.png b/docs/images/aci-in-azure-portal.png new file mode 100644 index 00000000..e7bfa8cd Binary files /dev/null and b/docs/images/aci-in-azure-portal.png differ diff --git a/docs/images/appservice-webapp-deploymentcenter.png b/docs/images/appservice-webapp-deploymentcenter.png new file mode 100644 index 00000000..b79ff615 Binary files /dev/null and b/docs/images/appservice-webapp-deploymentcenter.png differ diff --git a/docs/images/batch-child-run-scoringstep.png b/docs/images/batch-child-run-scoringstep.png new file mode 100644 index 00000000..6b87f52d Binary files /dev/null and b/docs/images/batch-child-run-scoringstep.png differ diff --git a/docs/images/batchscoring-ci-result.png b/docs/images/batchscoring-ci-result.png new file mode 100644 index 00000000..d07d41a8 Binary files /dev/null and b/docs/images/batchscoring-ci-result.png differ diff --git a/docs/images/batchscoring-pipeline.png b/docs/images/batchscoring-pipeline.png new file mode 100644 index 00000000..2b79fe03 Binary files /dev/null and b/docs/images/batchscoring-pipeline.png differ diff --git a/docs/images/build-connect.png b/docs/images/build-connect.png index f5d9d61a..79553d80 100644 Binary files a/docs/images/build-connect.png and b/docs/images/build-connect.png differ diff --git a/docs/images/build-createpipeline.png b/docs/images/build-createpipeline.png deleted file mode 100644 index 6258895a..00000000 Binary files a/docs/images/build-createpipeline.png and /dev/null differ diff --git a/docs/images/build-createpipeline1.png b/docs/images/build-createpipeline1.png deleted file mode 100644 index 2fa77203..00000000 Binary files a/docs/images/build-createpipeline1.png and /dev/null differ diff --git a/docs/images/build-run.png b/docs/images/build-run.png deleted file mode 100644 index de79438b..00000000 Binary files a/docs/images/build-run.png and /dev/null differ diff --git a/docs/images/build-selectrepo.png b/docs/images/build-selectrepo.png deleted file mode 100644 index a78c96d9..00000000 Binary files a/docs/images/build-selectrepo.png and /dev/null differ diff --git a/docs/images/ci-build-pipeline-configure.png b/docs/images/ci-build-pipeline-configure.png new file mode 100644 index 00000000..62953b53 Binary files /dev/null and b/docs/images/ci-build-pipeline-configure.png differ diff --git a/docs/images/container-registry-webapp-image.png b/docs/images/container-registry-webapp-image.png new file mode 100644 index 00000000..4ec09f8f Binary files /dev/null and b/docs/images/container-registry-webapp-image.png differ diff --git a/docs/images/create-rm-service-connection.png b/docs/images/create-rm-service-connection.png new file mode 100644 index 00000000..e677636a Binary files /dev/null and b/docs/images/create-rm-service-connection.png differ diff --git a/docs/images/created-resources.png b/docs/images/created-resources.png new file mode 100644 index 00000000..d5136ee8 Binary files /dev/null and b/docs/images/created-resources.png differ diff --git a/docs/images/custom-container-variables.png b/docs/images/custom-container-variables.png new file mode 100644 index 00000000..24a6a92a Binary files /dev/null and b/docs/images/custom-container-variables.png differ diff --git a/docs/images/deploy-aci.png b/docs/images/deploy-aci.png new file mode 100644 index 00000000..0270143b Binary files /dev/null and b/docs/images/deploy-aci.png differ diff --git a/docs/images/deploy-aks.png b/docs/images/deploy-aks.png new file mode 100644 index 00000000..96d83b8b Binary files /dev/null and b/docs/images/deploy-aks.png differ diff --git a/docs/images/library_variable_groups.png b/docs/images/library_variable_groups.png new file mode 100644 index 00000000..1029769c Binary files /dev/null and b/docs/images/library_variable_groups.png differ diff --git a/docs/images/ml-ws-svc-connection.png b/docs/images/ml-ws-svc-connection.png new file mode 100644 index 00000000..baf52e1f Binary files /dev/null and b/docs/images/ml-ws-svc-connection.png differ diff --git a/docs/images/model-artifact-cd-trigger.png b/docs/images/model-artifact-cd-trigger.png new file mode 100644 index 00000000..aca2cfb5 Binary files /dev/null and b/docs/images/model-artifact-cd-trigger.png differ diff --git a/docs/images/model-artifact.png b/docs/images/model-artifact.png new file mode 100644 index 00000000..b89390b4 Binary files /dev/null and b/docs/images/model-artifact.png differ diff --git a/docs/images/model-deploy-configure.png b/docs/images/model-deploy-configure.png new file mode 100644 index 00000000..fcd87750 Binary files /dev/null and b/docs/images/model-deploy-configure.png differ diff --git a/docs/images/model-deploy-get-artifact-logs.png b/docs/images/model-deploy-get-artifact-logs.png new file mode 100644 index 00000000..2249a8d3 Binary files /dev/null and b/docs/images/model-deploy-get-artifact-logs.png differ diff --git a/docs/images/model-deploy-result.png b/docs/images/model-deploy-result.png new file mode 100644 index 00000000..cd3d166e Binary files /dev/null and b/docs/images/model-deploy-result.png differ diff --git a/docs/images/model-train-register-artifacts.png b/docs/images/model-train-register-artifacts.png new file mode 100644 index 00000000..0d3eed26 Binary files /dev/null and b/docs/images/model-train-register-artifacts.png differ diff --git a/docs/images/model-train-register.png b/docs/images/model-train-register.png new file mode 100644 index 00000000..5ce4ef41 Binary files /dev/null and b/docs/images/model-train-register.png differ diff --git a/docs/images/multi-stage-aci-aks.png b/docs/images/multi-stage-aci-aks.png new file mode 100644 index 00000000..0307fbf6 Binary files /dev/null and b/docs/images/multi-stage-aci-aks.png differ diff --git a/docs/images/multi-stage-aci.png b/docs/images/multi-stage-aci.png new file mode 100644 index 00000000..a96f3195 Binary files /dev/null and b/docs/images/multi-stage-aci.png differ diff --git a/docs/images/multi-stage-webapp.png b/docs/images/multi-stage-webapp.png new file mode 100644 index 00000000..e6d60ce1 Binary files /dev/null and b/docs/images/multi-stage-webapp.png differ diff --git a/docs/images/new-build-pipeline.png b/docs/images/new-build-pipeline.png deleted file mode 100644 index 01229f1f..00000000 Binary files a/docs/images/new-build-pipeline.png and /dev/null differ diff --git a/docs/images/new-build-pipeline1.png b/docs/images/new-build-pipeline1.png deleted file mode 100644 index c6fa88ea..00000000 Binary files a/docs/images/new-build-pipeline1.png and /dev/null differ diff --git a/docs/images/postmane.png b/docs/images/postmane.png deleted file mode 100644 index d2b8a49d..00000000 Binary files a/docs/images/postmane.png and /dev/null differ diff --git a/docs/images/release-create.png b/docs/images/release-create.png deleted file mode 100644 index 46f20042..00000000 Binary files a/docs/images/release-create.png and /dev/null differ diff --git a/docs/images/release-createarelease.png b/docs/images/release-createarelease.png deleted file mode 100644 index 740f4a81..00000000 Binary files a/docs/images/release-createarelease.png and /dev/null differ diff --git a/docs/images/release-deployment-service-conn.png b/docs/images/release-deployment-service-conn.png deleted file mode 100644 index 81b402c9..00000000 Binary files a/docs/images/release-deployment-service-conn.png and /dev/null differ diff --git a/docs/images/release-deployment.png b/docs/images/release-deployment.png deleted file mode 100644 index 0fdebc0e..00000000 Binary files a/docs/images/release-deployment.png and /dev/null differ diff --git a/docs/images/release-deploymentacr.png b/docs/images/release-deploymentacr.png deleted file mode 100644 index c179b08c..00000000 Binary files a/docs/images/release-deploymentacr.png and /dev/null differ diff --git a/docs/images/release-deploymentcitrigger.png b/docs/images/release-deploymentcitrigger.png deleted file mode 100644 index f8661db8..00000000 Binary files a/docs/images/release-deploymentcitrigger.png and /dev/null differ diff --git a/docs/images/release-deploymentprodagent.png b/docs/images/release-deploymentprodagent.png deleted file mode 100644 index b2ad0274..00000000 Binary files a/docs/images/release-deploymentprodagent.png and /dev/null differ diff --git a/docs/images/release-deploymentprodtrigger.png b/docs/images/release-deploymentprodtrigger.png deleted file mode 100644 index 31c60450..00000000 Binary files a/docs/images/release-deploymentprodtrigger.png and /dev/null differ diff --git a/docs/images/release-deploymentqaagent.png b/docs/images/release-deploymentqaagent.png deleted file mode 100644 index e7d8999f..00000000 Binary files a/docs/images/release-deploymentqaagent.png and /dev/null differ diff --git a/docs/images/release-empty-job.png b/docs/images/release-empty-job.png deleted file mode 100644 index 7980b89a..00000000 Binary files a/docs/images/release-empty-job.png and /dev/null differ diff --git a/docs/images/release-envtask-scriptpath.png b/docs/images/release-envtask-scriptpath.png deleted file mode 100644 index 9524af0c..00000000 Binary files a/docs/images/release-envtask-scriptpath.png and /dev/null differ diff --git a/docs/images/release-envtask.png b/docs/images/release-envtask.png deleted file mode 100644 index a90f9d79..00000000 Binary files a/docs/images/release-envtask.png and /dev/null differ diff --git a/docs/images/release-import.png b/docs/images/release-import.png deleted file mode 100644 index 01533427..00000000 Binary files a/docs/images/release-import.png and /dev/null differ diff --git a/docs/images/release-link-vg.png b/docs/images/release-link-vg.png deleted file mode 100644 index 95e981a4..00000000 Binary files a/docs/images/release-link-vg.png and /dev/null differ diff --git a/docs/images/release-new-pipeline.png b/docs/images/release-new-pipeline.png deleted file mode 100644 index 8b245095..00000000 Binary files a/docs/images/release-new-pipeline.png and /dev/null differ diff --git a/docs/images/release-retrainingagent.png b/docs/images/release-retrainingagent.png deleted file mode 100644 index 0f000d8a..00000000 Binary files a/docs/images/release-retrainingagent.png and /dev/null differ diff --git a/docs/images/release-retrainingartifact.png b/docs/images/release-retrainingartifact.png deleted file mode 100644 index 16b23515..00000000 Binary files a/docs/images/release-retrainingartifact.png and /dev/null differ diff --git a/docs/images/release-retrainingartifactsuccess.png b/docs/images/release-retrainingartifactsuccess.png deleted file mode 100644 index 36f3b5b1..00000000 Binary files a/docs/images/release-retrainingartifactsuccess.png and /dev/null differ diff --git a/docs/images/release-retrainingpipeline.png b/docs/images/release-retrainingpipeline.png deleted file mode 100644 index a48313d9..00000000 Binary files a/docs/images/release-retrainingpipeline.png and /dev/null differ diff --git a/docs/images/release-retrainingtrigger.png b/docs/images/release-retrainingtrigger.png deleted file mode 100644 index 4786a9d2..00000000 Binary files a/docs/images/release-retrainingtrigger.png and /dev/null differ diff --git a/docs/images/release-retrainingtrigger1.png b/docs/images/release-retrainingtrigger1.png deleted file mode 100644 index b911996e..00000000 Binary files a/docs/images/release-retrainingtrigger1.png and /dev/null differ diff --git a/docs/images/release-retraintask.png b/docs/images/release-retraintask.png deleted file mode 100644 index 062d660b..00000000 Binary files a/docs/images/release-retraintask.png and /dev/null differ diff --git a/docs/images/release-save-empty.png b/docs/images/release-save-empty.png deleted file mode 100644 index 556ed9b6..00000000 Binary files a/docs/images/release-save-empty.png and /dev/null differ diff --git a/docs/images/release-task-createimage.PNG b/docs/images/release-task-createimage.PNG new file mode 100644 index 00000000..8224db18 Binary files /dev/null and b/docs/images/release-task-createimage.PNG differ diff --git a/docs/images/release-task-webappdeploy.PNG b/docs/images/release-task-webappdeploy.PNG new file mode 100644 index 00000000..0f23c08d Binary files /dev/null and b/docs/images/release-task-webappdeploy.PNG differ diff --git a/docs/images/release-webapp-pipeline.PNG b/docs/images/release-webapp-pipeline.PNG new file mode 100644 index 00000000..10ffddff Binary files /dev/null and b/docs/images/release-webapp-pipeline.PNG differ diff --git a/docs/images/release-workingdir.png b/docs/images/release-workingdir.png deleted file mode 100644 index 7e817104..00000000 Binary files a/docs/images/release-workingdir.png and /dev/null differ diff --git a/docs/images/run-iac-pipeline.png b/docs/images/run-iac-pipeline.png new file mode 100644 index 00000000..f2549da8 Binary files /dev/null and b/docs/images/run-iac-pipeline.png differ diff --git a/docs/images/scoring_image.png b/docs/images/scoring_image.png new file mode 100644 index 00000000..ecb1c245 Binary files /dev/null and b/docs/images/scoring_image.png differ diff --git a/docs/images/select-iac-pipeline.png b/docs/images/select-iac-pipeline.png new file mode 100644 index 00000000..695b041f Binary files /dev/null and b/docs/images/select-iac-pipeline.png differ diff --git a/docs/images/trained-model.png b/docs/images/trained-model.png new file mode 100644 index 00000000..5bea4fe2 Binary files /dev/null and b/docs/images/trained-model.png differ diff --git a/docs/images/training-pipeline.png b/docs/images/training-pipeline.png new file mode 100644 index 00000000..48854513 Binary files /dev/null and b/docs/images/training-pipeline.png differ diff --git a/environment_setup/Dockerfile b/environment_setup/Dockerfile new file mode 100644 index 00000000..0dfa36b6 --- /dev/null +++ b/environment_setup/Dockerfile @@ -0,0 +1,17 @@ +FROM conda/miniconda3 + +LABEL org.label-schema.vendor = "Microsoft" \ + org.label-schema.url = "https://hub.docker.com/r/microsoft/mlopspython" \ + org.label-schema.vcs-url = "https://github.com/microsoft/MLOpsPython" + +COPY diabetes_regression/ci_dependencies.yml /setup/ + +# activate environment +ENV PATH /usr/local/envs/mlopspython_ci/bin:$PATH + +RUN conda update -n base -c defaults conda && \ + conda install python=3.7.5 && \ + conda env create -f /setup/ci_dependencies.yml && \ + /bin/bash -c "source activate mlopspython_ci" && \ + az --version && \ + chmod -R 777 /usr/local/envs/mlopspython_ci/lib/python3.7 diff --git a/environment_setup/arm-templates/cloud-environment.json b/environment_setup/arm-templates/cloud-environment.json new file mode 100644 index 00000000..5f102747 --- /dev/null +++ b/environment_setup/arm-templates/cloud-environment.json @@ -0,0 +1,149 @@ +{ + "$schema": "https://schema.management.azure.com/schemas/2015-01-01/deploymentTemplate.json#", + "contentVersion": "1.0.0.0", + "parameters": { + "baseName": { + "type": "string", + "maxLength": 10, + "minLength": 3, + "metadata": { + "description": "The base name to use as prefix to create all the resources." + } + }, + "location": { + "type": "string", + "defaultValue": "eastus", + "metadata": { + "description": "Specifies the location for all resources." + } + }, + "workspace": { + "type": "string" + }, + "storageAccount": { + "type": "string", + "defaultValue": "[concat(toLower(parameters('baseName')), 'amlsa')]" + }, + "keyvault": { + "type": "string", + "defaultValue": "[concat(parameters('baseName'),'-AML-KV')]" + }, + "appInsights": { + "type": "string", + "defaultValue": "[concat(parameters('baseName'),'-AML-AI')]" + }, + "acr": { + "type": "string", + "defaultValue": "[concat(toLower(parameters('baseName')),'amlcr')]" + }, + "sku": { + "type": "string", + "defaultValue": "basic", + "allowedValues": [ + "basic", + "enterprise" + ], + "metadata": { + "description": "Specifies the sku, also referred as 'edition' of the Azure Machine Learning workspace." + } + } + }, + "variables": { + "amlWorkspaceName": "[parameters('workspace')]", + "storageAccountName": "[parameters('storageAccount')]", + "storageAccountType": "Standard_LRS", + "keyVaultName": "[parameters('keyvault')]", + "tenantId": "[subscription().tenantId]", + "applicationInsightsName": "[parameters('appInsights')]", + "containerRegistryName": "[parameters('acr')]" + }, + "resources": [ + { + "type": "Microsoft.Storage/storageAccounts", + "apiVersion": "2018-07-01", + "name": "[variables('storageAccountName')]", + "location": "[parameters('location')]", + "sku": { + "name": "[variables('storageAccountType')]" + }, + "kind": "StorageV2", + "properties": { + "encryption": { + "services": { + "blob": { + "enabled": true + }, + "file": { + "enabled": true + } + }, + "keySource": "Microsoft.Storage" + }, + "supportsHttpsTrafficOnly": true + } + }, + { + "type": "Microsoft.KeyVault/vaults", + "apiVersion": "2018-02-14", + "name": "[variables('keyVaultName')]", + "location": "[parameters('location')]", + "properties": { + "tenantId": "[variables('tenantId')]", + "sku": { + "name": "standard", + "family": "A" + }, + "accessPolicies": [ + ] + } + }, + { + "type": "Microsoft.Insights/components", + "apiVersion": "2015-05-01", + "name": "[variables('applicationInsightsName')]", + "location": "[if(or(equals(parameters('location'),'eastus2'),equals(parameters('location'),'westcentralus')),'southcentralus',parameters('location'))]", + "kind": "web", + "properties": { + "Application_Type": "web" + } + }, + { + "type": "Microsoft.ContainerRegistry/registries", + "apiVersion": "2017-10-01", + "name": "[variables('containerRegistryName')]", + "location": "[parameters('location')]", + "sku": { + "name": "Standard" + }, + "properties": { + "adminUserEnabled": true + } + }, + { + "type": "Microsoft.MachineLearningServices/workspaces", + "apiVersion": "2018-11-19", + "name": "[variables('amlWorkspaceName')]", + "location": "[parameters('location')]", + "dependsOn": [ + "[resourceId('Microsoft.Storage/storageAccounts', variables('storageAccountName'))]", + "[resourceId('Microsoft.KeyVault/vaults', variables('keyVaultName'))]", + "[resourceId('Microsoft.Insights/components', variables('applicationInsightsName'))]", + "[resourceId('Microsoft.ContainerRegistry/registries', variables('containerRegistryName'))]" + ], + "identity": { + "type": "systemAssigned" + }, + "sku": { + "tier": "[parameters('sku')]", + "name": "[parameters('sku')]" + }, + "properties": { + "friendlyName": "[variables('amlWorkspaceName')]", + "keyVault": "[resourceId('Microsoft.KeyVault/vaults',variables('keyVaultName'))]", + "applicationInsights": "[resourceId('Microsoft.Insights/components',variables('applicationInsightsName'))]", + "containerRegistry": "[resourceId('Microsoft.ContainerRegistry/registries',variables('containerRegistryName'))]", + "storageAccount": "[resourceId('Microsoft.Storage/storageAccounts/',variables('storageAccountName'))]" + } + } + ] +} \ No newline at end of file diff --git a/environment_setup/docker-image-pipeline.yml b/environment_setup/docker-image-pipeline.yml new file mode 100644 index 00000000..9f7361ac --- /dev/null +++ b/environment_setup/docker-image-pipeline.yml @@ -0,0 +1,34 @@ +# Pipeline that builds and pushes the microsoft/mlopspython image. +resources: +- repo: self + +pool: + vmImage: 'ubuntu-latest' + +trigger: + branches: + include: + - master + + paths: + include: + - environment_setup/Dockerfile + +variables: + containerRegistry: $[coalesce(variables['acrServiceConnection'], 'acrconnection')] + imageName: $[coalesce(variables['agentImageName'], 'public/mlops/python')] + +steps: + - task: Docker@2 + displayName: Build and Push + inputs: + command: buildAndPush + containerRegistry: '$(containerRegistry)' + repository: '$(imageName)' + tags: | + ${{format('build-{0}', '$(Build.BuildNumber)')}} + ${{format('amlsdk-{0}', '$(amlsdkversion)')}} + ${{format('release-{0}', '$(githubrelease)')}} + latest + buildContext: '$(Build.SourcesDirectory)' + dockerFile: '$(Build.SourcesDirectory)/environment_setup/Dockerfile' diff --git a/environment_setup/iac-create-environment-pipeline-arm.yml b/environment_setup/iac-create-environment-pipeline-arm.yml new file mode 100644 index 00000000..0b9f474c --- /dev/null +++ b/environment_setup/iac-create-environment-pipeline-arm.yml @@ -0,0 +1,36 @@ +# CI/PR Pipeline that deploys an ARM template to create or update the resources needed by the other pipelines. +trigger: + branches: + include: + - master + paths: + include: + - environment_setup/arm-templates/* +pr: + branches: + include: + - master + paths: + include: + - environment_setup/arm-templates/* + +pool: + vmImage: "ubuntu-latest" + +variables: + - group: devopsforai-aml-vg + - name: WORKSPACE_SKU # https://docs.microsoft.com/en-us/azure/machine-learning/overview-what-is-azure-ml#sku + value: basic + +steps: + - task: AzureResourceGroupDeployment@2 + inputs: + azureSubscription: "$(AZURE_RM_SVC_CONNECTION)" + action: "Create Or Update Resource Group" + resourceGroupName: "$(RESOURCE_GROUP)" + location: $(LOCATION) + templateLocation: "Linked artifact" + csmFile: "$(Build.SourcesDirectory)/environment_setup/arm-templates/cloud-environment.json" + overrideParameters: "-baseName $(BASE_NAME) -location $(LOCATION) -workspace $(WORKSPACE_NAME) -sku $(WORKSPACE_SKU)" + deploymentMode: "Incremental" + displayName: "Deploy MLOps resources to Azure" diff --git a/environment_setup/iac-create-environment-pipeline-tf.yml b/environment_setup/iac-create-environment-pipeline-tf.yml new file mode 100644 index 00000000..ef184546 --- /dev/null +++ b/environment_setup/iac-create-environment-pipeline-tf.yml @@ -0,0 +1,72 @@ +# CI/PR Pipeline that deploys an TF template to create or update the resources needed by the other pipelines. +trigger: + branches: + include: + - master + paths: + include: + - environment_setup/tf-templates/* +pr: + branches: + include: + - master + paths: + include: + - environment_setup/tf-templates/* + +pool: + vmImage: 'ubuntu-latest' + +variables: +- group: devopsforai-aml-vg + +steps: +- task: charleszipp.azure-pipelines-tasks-terraform.azure-pipelines-tasks-terraform-installer.TerraformInstaller@0 + displayName: 'Use Terraform 0.12.24' + inputs: + terraformVersion: 0.12.24 + +- task: charleszipp.azure-pipelines-tasks-terraform.azure-pipelines-tasks-terraform-cli.TerraformCLI@0 + displayName: 'TF init - Deploy MLOps resources to Azure' + inputs: + command: init + commandOptions: '-backend=true -backend-config=$(Build.SourcesDirectory)/environment_setup/tf-templates/backend.tf' + workingDirectory: '$(Build.SourcesDirectory)/environment_setup/tf-templates' + backendType: azurerm + backendServiceArm: $(AZURE_RM_SVC_CONNECTION) + ensureBackend: true + backendAzureRmResourceGroupLocation: $(LOCATION) + backendAzureRmResourceGroupName: $(RESOURCE_GROUP) + backendAzureRmStorageAccountName: '$(BASE_NAME)statestor' + backendAzureRmStorageAccountSku: 'Standard_LRS' + backendAzureRmContainerName: 'tfstate-cont' + backendAzureRmKey: 'mlopsinfra.tfstate' + +- task: charleszipp.azure-pipelines-tasks-terraform.azure-pipelines-tasks-terraform-cli.TerraformCLI@0 + displayName: 'TF validate - Deploy MLOps resources to Azure' + inputs: + command: validate + workingDirectory: '$(Build.SourcesDirectory)/environment_setup/tf-templates' + +- task: charleszipp.azure-pipelines-tasks-terraform.azure-pipelines-tasks-terraform-cli.TerraformCLI@0 + displayName: 'TF plan - Deploy MLOps resources to Azure' + inputs: + command: plan + workingDirectory: '$(Build.SourcesDirectory)/environment_setup/tf-templates' + environmentServiceName: $(AZURE_RM_SVC_CONNECTION) + env: + TF_VAR_BASE_NAME: $(BASE_NAME) + TF_VAR_RESOURCE_GROUP: $(RESOURCE_GROUP) + TF_VAR_WORKSPACE_NAME: $(WORKSPACE_NAME) + +- task: charleszipp.azure-pipelines-tasks-terraform.azure-pipelines-tasks-terraform-cli.TerraformCLI@0 + displayName: 'TF apply - Deploy MLOps resources to Azure' + inputs: + command: apply + workingDirectory: '$(Build.SourcesDirectory)/environment_setup/tf-templates' + environmentServiceName: $(AZURE_RM_SVC_CONNECTION) + env: + TF_VAR_BASE_NAME: $(BASE_NAME) + TF_VAR_RESOURCE_GROUP: $(RESOURCE_GROUP) + TF_VAR_WORKSPACE_NAME: $(WORKSPACE_NAME) + diff --git a/environment_setup/iac-remove-environment-pipeline.yml b/environment_setup/iac-remove-environment-pipeline.yml new file mode 100644 index 00000000..39ff9e7a --- /dev/null +++ b/environment_setup/iac-remove-environment-pipeline.yml @@ -0,0 +1,21 @@ +# Pipeline that removes the resources created by the IaC Create Environment pipeline. +pr: none +trigger: none + +pool: + vmImage: 'ubuntu-latest' + +variables: +- group: devopsforai-aml-vg + + +steps: +- task: AzureResourceGroupDeployment@2 + inputs: + azureSubscription: '$(AZURE_RM_SVC_CONNECTION)' + action: 'DeleteRG' + resourceGroupName: '$(RESOURCE_GROUP)' + location: $(LOCATION) + displayName: 'Delete resources in Azure' + + \ No newline at end of file diff --git a/environment_setup/install_requirements.sh b/environment_setup/install_requirements.sh old mode 100644 new mode 100755 index 1bdd081d..989e8b1e --- a/environment_setup/install_requirements.sh +++ b/environment_setup/install_requirements.sh @@ -24,8 +24,8 @@ # ARISING IN ANY WAY OUT OF THE USE OF THE SOFTWARE CODE, EVEN IF ADVISED OF THE # POSSIBILITY OF SUCH DAMAGE. +set -eux -python --version -pip install azure-cli==2.0.46 -pip install --upgrade azureml-sdk[cli] -pip install -r requirements.txt \ No newline at end of file +conda env create -f diabetes_regression/ci_dependencies.yml + +conda activate mlopspython_ci diff --git a/environment_setup/requirements.txt b/environment_setup/requirements.txt deleted file mode 100644 index b3c2a14c..00000000 --- a/environment_setup/requirements.txt +++ /dev/null @@ -1,5 +0,0 @@ -scipy==1.0.0 -scikit-learn==0.19.1 -numpy==1.14.5 -pandas==0.23.1 -pytest==4.3.0 \ No newline at end of file diff --git a/environment_setup/tf-templates/backend.tf b/environment_setup/tf-templates/backend.tf new file mode 100644 index 00000000..0aec0499 --- /dev/null +++ b/environment_setup/tf-templates/backend.tf @@ -0,0 +1,4 @@ +terraform { + backend "azurerm" { + } +} diff --git a/environment_setup/tf-templates/main.tf b/environment_setup/tf-templates/main.tf new file mode 100644 index 00000000..c57a5a84 --- /dev/null +++ b/environment_setup/tf-templates/main.tf @@ -0,0 +1,71 @@ +provider "azurerm" { + version = "=2.3.0" + features {} +} + +variable BASE_NAME {} +variable RESOURCE_GROUP {} +variable WORKSPACE_NAME {} + +#-------------------------------------------------------------------------------- + +#Set the already-existing resource group +data "azurerm_resource_group" "amlrg" { + name = var.RESOURCE_GROUP +} + +#Set client config for a.o. tenant id +data "azurerm_client_config" "currentconfig" { +} + +#-------------------------------------------------------------------------------- + +# Storage account for AML Service +resource "azurerm_storage_account" "amlstor" { + name = "${var.BASE_NAME}amlsa" + location = data.azurerm_resource_group.amlrg.location + resource_group_name = data.azurerm_resource_group.amlrg.name + account_tier = "Standard" + account_replication_type = "LRS" +} + +# Keyvault for AML Service +resource "azurerm_key_vault" "amlkv" { + name = "${var.BASE_NAME}-AML-KV" + location = data.azurerm_resource_group.amlrg.location + resource_group_name = data.azurerm_resource_group.amlrg.name + tenant_id = data.azurerm_client_config.currentconfig.tenant_id + sku_name = "standard" +} + +# App Insights for AML Service +resource "azurerm_application_insights" "amlai" { + name = "${var.BASE_NAME}-AML-AI" + location = data.azurerm_resource_group.amlrg.location + resource_group_name = data.azurerm_resource_group.amlrg.name + application_type = "web" +} + +# Container registry for AML Service +resource "azurerm_container_registry" "amlacr" { + name = "${var.BASE_NAME}amlcr" + resource_group_name = data.azurerm_resource_group.amlrg.name + location = data.azurerm_resource_group.amlrg.location + sku = "Standard" + admin_enabled = true +} + +# ML Workspace for AML Service, depending on the storage account, Keyvault, App Insights and ACR. +resource "azurerm_machine_learning_workspace" "amlws" { + name = var.WORKSPACE_NAME + location = data.azurerm_resource_group.amlrg.location + resource_group_name = data.azurerm_resource_group.amlrg.name + application_insights_id = azurerm_application_insights.amlai.id + key_vault_id = azurerm_key_vault.amlkv.id + storage_account_id = azurerm_storage_account.amlstor.id + container_registry_id = azurerm_container_registry.amlacr.id + + identity { + type = "SystemAssigned" + } +} diff --git a/experimentation/Diabetes Ridge Regression Experimentation Pipeline.ipynb b/experimentation/Diabetes Ridge Regression Experimentation Pipeline.ipynb new file mode 100644 index 00000000..8b04a5c5 --- /dev/null +++ b/experimentation/Diabetes Ridge Regression Experimentation Pipeline.ipynb @@ -0,0 +1,353 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Experiment with parameters for a Ridge Regression Model on the Diabetes Dataset in an Azure ML Pipeline" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "This notebook is for experimenting with different parameters to train a ridge regression model on the Diabetes dataset." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Change out of the experimentation directory\n", + "%cd .." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import azureml.core\n", + "from azureml.core import Workspace" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Load the workspace from the saved config file\n", + "ws = Workspace.from_config()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import os, shutil\n", + "\n", + "# Create a folder for the experiment files\n", + "training_folder = 'diabetes-training'\n", + "os.makedirs(training_folder, exist_ok=True)\n", + "\n", + "# Copy the data file into the experiment folder\n", + "shutil.copy('data/diabetes.csv', os.path.join(training_folder, \"diabetes.csv\"))\n", + "\n", + "# Copy the train functions into the experiment folder\n", + "shutil.copy('diabetes_regression/training/train.py', os.path.join(training_folder, \"train.py\"))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "%%writefile $training_folder/parameters.json\n", + "{\n", + " \"training\":\n", + " {\n", + " \"alpha\": 0.3\n", + " },\n", + " \"evaluation\":\n", + " {\n", + "\n", + " },\n", + " \"scoring\":\n", + " {\n", + " \n", + " }\n", + "}\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "%%writefile $training_folder/diabetes_training.py\n", + "# Import libraries\n", + "from azureml.core import Run\n", + "import pandas as pd\n", + "import shutil\n", + "import joblib\n", + "\n", + "from train import split_data, train_model\n", + "\n", + "# Get parameters\n", + "parser = argparse.ArgumentParser()\n", + "parser.add_argument('--output_folder', type=str, dest='output_folder', default=\"diabetes_model\", help='output folder')\n", + "args = parser.parse_args()\n", + "output_folder = args.output_folder\n", + "\n", + "# Get the experiment run context\n", + "run = Run.get_context()\n", + "\n", + "# load the diabetes dataset\n", + "print(\"Loading Data...\")\n", + "train_df = pd.read_csv('diabetes.csv')\n", + "\n", + "data = split_data(train_df)\n", + "\n", + "# Specify the parameters to test\n", + "with open(\"parameters.json\") as f:\n", + " pars = json.load(f)\n", + " train_args = pars[\"training\"]\n", + "\n", + "# Log parameters\n", + "for k, v in train_args.items():\n", + " run.log(k, v)\n", + "\n", + "model, metrics = train_model(data, train_args)\n", + "\n", + "# Log metrics\n", + "for k, v in metrics.items():\n", + " run.log(k, v)\n", + "\n", + "# Save the parameters file to the outputs folder\n", + "os.makedirs(output_folder, exist_ok=True)\n", + "shutil.copy('parameters.json', os.path.join(output_folder, 'parameters.json'))\n", + "joblib.dump(value=model, filename= output_folder + \"/model.pkl\")\n", + " \n", + "run.complete()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "%%writefile $training_folder/register_diabetes.py\n", + "# Import libraries\n", + "import argparse\n", + "import joblib\n", + "from azureml.core import Workspace, Model, Run\n", + "\n", + "# Get parameters\n", + "parser = argparse.ArgumentParser()\n", + "parser.add_argument('--model_folder', type=str, dest='model_folder', default=\"diabetes_model\", help='model location')\n", + "args = parser.parse_args()\n", + "model_folder = args.model_folder\n", + "\n", + "# Get the experiment run context\n", + "run = Run.get_context()\n", + "\n", + "# load the model\n", + "print(\"Loading model from \" + model_folder)\n", + "model_file = model_folder + \"/model.pkl\"\n", + "model = joblib.load(model_file)\n", + "\n", + "Model.register(workspace=run.experiment.workspace,\n", + " model_path = model_file,\n", + " model_name = 'diabetes_model',\n", + " tags={'Training context':'Pipeline'})\n", + "\n", + "run.complete()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from azureml.core.compute import ComputeTarget, AmlCompute\n", + "from azureml.core.compute_target import ComputeTargetException\n", + "\n", + "cluster_name = \"aml-cluster\"\n", + "\n", + "# Verify that cluster exists\n", + "try:\n", + " pipeline_cluster = ComputeTarget(workspace=ws, name=cluster_name)\n", + " print('Found existing cluster, use it.')\n", + "except ComputeTargetException:\n", + " # If not, create it\n", + " compute_config = AmlCompute.provisioning_configuration(vm_size='STANDARD_D2_V2',\n", + " max_nodes=4,\n", + " idle_seconds_before_scaledown=1800)\n", + " pipeline_cluster = ComputeTarget.create(ws, cluster_name, compute_config)\n", + "\n", + "pipeline_cluster.wait_for_completion(show_output=True)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from azureml.core import Environment\n", + "from azureml.core.conda_dependencies import CondaDependencies\n", + "from azureml.core.runconfig import RunConfiguration\n", + "\n", + "# Create a Python environment for the experiment\n", + "diabetes_env = Environment(\"diabetes-pipeline-env\")\n", + "diabetes_env.python.user_managed_dependencies = False # Let Azure ML manage dependencies\n", + "diabetes_env.docker.enabled = True # Use a docker container\n", + "\n", + "# Create a set of package dependencies\n", + "diabetes_packages = CondaDependencies.create(conda_packages=['scikit-learn','pandas'],\n", + " pip_packages=['azureml-sdk'])\n", + "\n", + "# Add the dependencies to the environment\n", + "diabetes_env.python.conda_dependencies = diabetes_packages\n", + "\n", + "# Register the environment (just in case you want to use it again)\n", + "diabetes_env.register(workspace=ws)\n", + "registered_env = Environment.get(ws, 'diabetes-pipeline-env')\n", + "\n", + "# Create a new runconfig object for the pipeline\n", + "pipeline_run_config = RunConfiguration()\n", + "\n", + "# Use the compute you created above. \n", + "pipeline_run_config.target = pipeline_cluster\n", + "\n", + "# Assign the environment to the run configuration\n", + "pipeline_run_config.environment = registered_env\n", + "\n", + "print (\"Run configuration created.\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from azureml.pipeline.core import PipelineData\n", + "from azureml.pipeline.steps import PythonScriptStep, EstimatorStep\n", + "from azureml.train.estimator import Estimator\n", + "\n", + "# Get the training dataset\n", + "#diabetes_ds = ws.datasets.get(\"diabetes dataset\")\n", + "\n", + "# Create a PipelineData (temporary Data Reference) for the model folder\n", + "model_folder = PipelineData(\"model_folder\", datastore=ws.get_default_datastore())\n", + "\n", + "estimator = Estimator(source_directory=training_folder,\n", + " compute_target = pipeline_cluster,\n", + " environment_definition=pipeline_run_config.environment,\n", + " entry_script='diabetes_training.py')\n", + "\n", + "# Step 1, run the estimator to train the model\n", + "train_step = EstimatorStep(name = \"Train Model\",\n", + " estimator=estimator, \n", + " estimator_entry_script_arguments=['--output_folder', model_folder],\n", + " outputs=[model_folder],\n", + " compute_target = pipeline_cluster,\n", + " allow_reuse = True)\n", + "\n", + "# Step 2, run the model registration script\n", + "register_step = PythonScriptStep(name = \"Register Model\",\n", + " source_directory = training_folder,\n", + " script_name = \"register_diabetes.py\",\n", + " arguments = ['--model_folder', model_folder],\n", + " inputs=[model_folder],\n", + " compute_target = pipeline_cluster,\n", + " runconfig = pipeline_run_config,\n", + " allow_reuse = True)\n", + "\n", + "print(\"Pipeline steps defined\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from azureml.core import Experiment\n", + "from azureml.pipeline.core import Pipeline\n", + "from azureml.widgets import RunDetails\n", + "\n", + "# Construct the pipeline\n", + "pipeline_steps = [train_step, register_step]\n", + "pipeline = Pipeline(workspace = ws, steps=pipeline_steps)\n", + "print(\"Pipeline is built.\")\n", + "\n", + "# Create an experiment and run the pipeline\n", + "experiment = Experiment(workspace = ws, name = 'diabetes-training-pipeline')\n", + "pipeline_run = experiment.submit(pipeline, regenerate_outputs=True)\n", + "print(\"Pipeline submitted for execution.\")\n", + "\n", + "RunDetails(pipeline_run).show()\n", + "pipeline_run.wait_for_completion()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from azureml.core import Model\n", + "\n", + "for model in Model.list(ws):\n", + " print(model.name, 'version:', model.version)\n", + " for tag_name in model.tags:\n", + " tag = model.tags[tag_name]\n", + " print ('\\t',tag_name, ':', tag)\n", + " for prop_name in model.properties:\n", + " prop = model.properties[prop_name]\n", + " print ('\\t',prop_name, ':', prop)\n", + " print('\\n')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.4" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/experimentation/Diabetes Ridge Regression Parameter Experimentation.ipynb b/experimentation/Diabetes Ridge Regression Parameter Experimentation.ipynb new file mode 100644 index 00000000..aab5e052 --- /dev/null +++ b/experimentation/Diabetes Ridge Regression Parameter Experimentation.ipynb @@ -0,0 +1,211 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Experiment with parameters for a Ridge Regression Model on the Diabetes Dataset" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "This notebook is for experimenting with different parameters to train a ridge regression model on the Diabetes dataset." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Change out of the experimentation directory\n", + "%cd .." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import azureml.core\n", + "from azureml.core import Workspace" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Load the workspace from the saved config file\n", + "ws = Workspace.from_config()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import os, shutil\n", + "\n", + "# Create a folder for the experiment files\n", + "training_folder = 'diabetes-training'\n", + "os.makedirs(training_folder, exist_ok=True)\n", + "\n", + "# Copy the data file into the experiment folder\n", + "shutil.copy('data/diabetes.csv', os.path.join(training_folder, \"diabetes.csv\"))\n", + "\n", + "# Copy the train functions into the experiment folder\n", + "shutil.copy('diabetes_regression/training/train.py', os.path.join(training_folder, \"train.py\"))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "%%writefile $training_folder/parameters.json\n", + "{\n", + " \"training\":\n", + " {\n", + " \"alpha\": 0.3\n", + " },\n", + " \"evaluation\":\n", + " {\n", + "\n", + " },\n", + " \"scoring\":\n", + " {\n", + " \n", + " }\n", + "}\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "%%writefile $training_folder/diabetes_training.py\n", + "# Import libraries\n", + "from azureml.core import Run\n", + "import json\n", + "import os\n", + "import pandas as pd\n", + "import shutil\n", + "\n", + "from train import split_data, train_model\n", + "\n", + "# Get the experiment run context\n", + "run = Run.get_context()\n", + "\n", + "# load the diabetes dataset\n", + "print(\"Loading Data...\")\n", + "train_df = pd.read_csv('diabetes.csv')\n", + "\n", + "data = split_data(train_df)\n", + "\n", + "# Specify the parameters to test\n", + "with open(\"parameters.json\") as f:\n", + " pars = json.load(f)\n", + " train_args = pars[\"training\"]\n", + "\n", + "# Log parameters\n", + "for k, v in train_args.items():\n", + " run.log(k, v)\n", + "\n", + "model, metrics = train_model(data, train_args)\n", + "\n", + "# Log metrics\n", + "for k, v in metrics.items():\n", + " run.log(k, v)\n", + "\n", + "# Save the parameters file to the outputs folder\n", + "os.makedirs('outputs', exist_ok=True)\n", + "shutil.copy('parameters.json', os.path.join('outputs', 'parameters.json'))\n", + " \n", + "run.complete()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from azureml.train.estimator import Estimator\n", + "from azureml.core import Experiment\n", + "\n", + "# Create an estimator\n", + "estimator = Estimator(source_directory=training_folder,\n", + " entry_script='diabetes_training.py',\n", + " compute_target='local',\n", + " conda_packages=['scikit-learn']\n", + " )\n", + "\n", + "# Create an experiment\n", + "experiment_name = 'diabetes-training'\n", + "experiment = Experiment(workspace = ws, name = experiment_name)\n", + "\n", + "# Run the experiment based on the estimator\n", + "run = experiment.submit(config=estimator)\n", + "run.wait_for_completion(show_output=True)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "metrics = run.get_metrics()\n", + "for k, v in metrics.items():\n", + " print(k, v)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "for file in run.get_file_names():\n", + " print(file)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3.6.10 64-bit ('OH3': conda)", + "language": "python", + "name": "python361064bitoh3conda5f7beeba8c1d407187c86667ecfb684f" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.6.10" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/experimentation/Diabetes Ridge Regression Scoring.ipynb b/experimentation/Diabetes Ridge Regression Scoring.ipynb new file mode 100644 index 00000000..9ac340ed --- /dev/null +++ b/experimentation/Diabetes Ridge Regression Scoring.ipynb @@ -0,0 +1,114 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Score Data with a Ridge Regression Model Trained on the Diabetes Dataset" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "This notebook loads the model trained in the Diabetes Ridge Regression Training notebook, prepares the data, and scores the data." + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "import json\n", + "import numpy\n", + "from azureml.core.model import Model\n", + "import joblib" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Load Model" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "model_path = Model.get_model_path(model_name=\"sklearn_regression_model.pkl\")\n", + "model = joblib.load(model_path)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Prepare Data" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [], + "source": [ + "raw_data = '{\"data\":[[1,2,3,4,5,6,7,8,9,10],[10,9,8,7,6,5,4,3,2,1]]}'\n", + "\n", + "data = json.loads(raw_data)[\"data\"]\n", + "data = numpy.array(data)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Score Data" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Test result: {'result': [5113.099642122813, 3713.6329271385353]}\n" + ] + } + ], + "source": [ + "request_headers = {}\n", + "\n", + "result = model.predict(data)\n", + "print(\"Test result: \", {\"result\": result.tolist()})" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python (storedna)", + "language": "python", + "name": "storedna" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.6.9" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/experimentation/Diabetes Ridge Regression Training.ipynb b/experimentation/Diabetes Ridge Regression Training.ipynb new file mode 100644 index 00000000..fa192115 --- /dev/null +++ b/experimentation/Diabetes Ridge Regression Training.ipynb @@ -0,0 +1,401 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Train a Ridge Regression Model on the Diabetes Dataset" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "This notebook loads the Diabetes dataset from sklearn, splits the data into training and validation sets, trains a Ridge regression model, validates the model on the validation set, and saves the model." + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "from sklearn.datasets import load_diabetes\n", + "from sklearn.linear_model import Ridge\n", + "from sklearn.metrics import mean_squared_error\n", + "from sklearn.model_selection import train_test_split\n", + "import joblib\n", + "import pandas as pd" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Load Data" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [], + "source": [ + "sample_data = load_diabetes()\n", + "\n", + "df = pd.DataFrame(\n", + " data=sample_data.data,\n", + " columns=sample_data.feature_names)\n", + "df['Y'] = sample_data.target" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "(442, 10)\n" + ] + } + ], + "source": [ + "print(df.shape)" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
agesexbmibps1s2s3s4s5s6Y
count4.420000e+024.420000e+024.420000e+024.420000e+024.420000e+024.420000e+024.420000e+024.420000e+024.420000e+024.420000e+02442.000000
mean-3.634285e-161.308343e-16-8.045349e-161.281655e-16-8.835316e-171.327024e-16-4.574646e-163.777301e-16-3.830854e-16-3.412882e-16152.133484
std4.761905e-024.761905e-024.761905e-024.761905e-024.761905e-024.761905e-024.761905e-024.761905e-024.761905e-024.761905e-0277.093005
min-1.072256e-01-4.464164e-02-9.027530e-02-1.123996e-01-1.267807e-01-1.156131e-01-1.023071e-01-7.639450e-02-1.260974e-01-1.377672e-0125.000000
25%-3.729927e-02-4.464164e-02-3.422907e-02-3.665645e-02-3.424784e-02-3.035840e-02-3.511716e-02-3.949338e-02-3.324879e-02-3.317903e-0287.000000
50%5.383060e-03-4.464164e-02-7.283766e-03-5.670611e-03-4.320866e-03-3.819065e-03-6.584468e-03-2.592262e-03-1.947634e-03-1.077698e-03140.500000
75%3.807591e-025.068012e-023.124802e-023.564384e-022.835801e-022.984439e-022.931150e-023.430886e-023.243323e-022.791705e-02211.500000
max1.107267e-015.068012e-021.705552e-011.320442e-011.539137e-011.987880e-011.811791e-011.852344e-011.335990e-011.356118e-01346.000000
\n", + "
" + ], + "text/plain": [ + " age sex bmi bp s1 \\\n", + "count 4.420000e+02 4.420000e+02 4.420000e+02 4.420000e+02 4.420000e+02 \n", + "mean -3.634285e-16 1.308343e-16 -8.045349e-16 1.281655e-16 -8.835316e-17 \n", + "std 4.761905e-02 4.761905e-02 4.761905e-02 4.761905e-02 4.761905e-02 \n", + "min -1.072256e-01 -4.464164e-02 -9.027530e-02 -1.123996e-01 -1.267807e-01 \n", + "25% -3.729927e-02 -4.464164e-02 -3.422907e-02 -3.665645e-02 -3.424784e-02 \n", + "50% 5.383060e-03 -4.464164e-02 -7.283766e-03 -5.670611e-03 -4.320866e-03 \n", + "75% 3.807591e-02 5.068012e-02 3.124802e-02 3.564384e-02 2.835801e-02 \n", + "max 1.107267e-01 5.068012e-02 1.705552e-01 1.320442e-01 1.539137e-01 \n", + "\n", + " s2 s3 s4 s5 s6 \\\n", + "count 4.420000e+02 4.420000e+02 4.420000e+02 4.420000e+02 4.420000e+02 \n", + "mean 1.327024e-16 -4.574646e-16 3.777301e-16 -3.830854e-16 -3.412882e-16 \n", + "std 4.761905e-02 4.761905e-02 4.761905e-02 4.761905e-02 4.761905e-02 \n", + "min -1.156131e-01 -1.023071e-01 -7.639450e-02 -1.260974e-01 -1.377672e-01 \n", + "25% -3.035840e-02 -3.511716e-02 -3.949338e-02 -3.324879e-02 -3.317903e-02 \n", + "50% -3.819065e-03 -6.584468e-03 -2.592262e-03 -1.947634e-03 -1.077698e-03 \n", + "75% 2.984439e-02 2.931150e-02 3.430886e-02 3.243323e-02 2.791705e-02 \n", + "max 1.987880e-01 1.811791e-01 1.852344e-01 1.335990e-01 1.356118e-01 \n", + "\n", + " Y \n", + "count 442.000000 \n", + "mean 152.133484 \n", + "std 77.093005 \n", + "min 25.000000 \n", + "25% 87.000000 \n", + "50% 140.500000 \n", + "75% 211.500000 \n", + "max 346.000000 " + ] + }, + "execution_count": 11, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# All data in a single dataframe\n", + "df.describe()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Split Data into Training and Validation Sets" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": {}, + "outputs": [], + "source": [ + "X = df.drop('Y', axis=1).values\n", + "y = df['Y'].values\n", + "\n", + "X_train, X_test, y_train, y_test = train_test_split(\n", + " X, y, test_size=0.2, random_state=0)\n", + "data = {\"train\": {\"X\": X_train, \"y\": y_train},\n", + " \"test\": {\"X\": X_test, \"y\": y_test}}" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Train Model on Training Set" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "Ridge(alpha=0.5, copy_X=True, fit_intercept=True, max_iter=None,\n", + " normalize=False, random_state=None, solver='auto', tol=0.001)" + ] + }, + "execution_count": 16, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# experiment parameters\n", + "args = {\n", + " \"alpha\": 0.5\n", + "}\n", + "\n", + "reg_model = Ridge(**args)\n", + "reg_model.fit(data[\"train\"][\"X\"], data[\"train\"][\"y\"])" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Validate Model on Validation Set" + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "{'mse': 3298.9096058070622}\n" + ] + } + ], + "source": [ + "preds = reg_model.predict(data[\"test\"][\"X\"])\n", + "mse = mean_squared_error(preds, y_test)\n", + "metrics = {\"mse\": mse}\n", + "print(metrics)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Save Model" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "['sklearn_regression_model.pkl']" + ] + }, + "execution_count": 7, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "model_name = \"sklearn_regression_model.pkl\"\n", + "\n", + "joblib.dump(value=reg, filename=model_name)" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.4" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/ml_service/__init__.py b/ml_service/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/ml_service/pipelines/__init__.py b/ml_service/pipelines/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/ml_service/pipelines/diabetes_regression_build_parallel_batchscore_pipeline.py b/ml_service/pipelines/diabetes_regression_build_parallel_batchscore_pipeline.py new file mode 100644 index 00000000..5a0f0125 --- /dev/null +++ b/ml_service/pipelines/diabetes_regression_build_parallel_batchscore_pipeline.py @@ -0,0 +1,428 @@ +""" +Copyright (C) Microsoft Corporation. All rights reserved.​ + ​ +Microsoft Corporation (“Microsoft”) grants you a nonexclusive, perpetual, +royalty-free right to use, copy, and modify the software code provided by us +("Software Code"). You may not sublicense the Software Code or any use of it +(except to your affiliates and to vendors to perform work on your behalf) +through distribution, network access, service agreement, lease, rental, or +otherwise. This license does not purport to express any claim of ownership over +data you may have shared with Microsoft in the creation of the Software Code. +Unless applicable law gives you more rights, Microsoft reserves all other +rights not expressly granted herein, whether by implication, estoppel or +otherwise. ​ + ​ +THE SOFTWARE CODE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS +OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +MICROSOFT OR ITS LICENSORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR +BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER +IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +ARISING IN ANY WAY OUT OF THE USE OF THE SOFTWARE CODE, EVEN IF ADVISED OF THE +POSSIBILITY OF SUCH DAMAGE. +""" +import os +from azureml.pipeline.steps import ParallelRunConfig, ParallelRunStep +from ml_service.util.manage_environment import get_environment +from ml_service.pipelines.load_sample_data import create_sample_data_csv +from ml_service.util.env_variables import Env +from ml_service.util.attach_compute import get_compute +from azureml.core import ( + Workspace, + Dataset, + Datastore, + RunConfiguration, +) +from azureml.pipeline.core import Pipeline, PipelineData, PipelineParameter +from azureml.core.compute import ComputeTarget +from azureml.data.datapath import DataPath +from azureml.pipeline.steps import PythonScriptStep +from typing import Tuple + + +def get_or_create_datastore( + datastorename: str, ws: Workspace, env: Env, input: bool = True +) -> Datastore: + """ + Obtains a datastore with matching name. Creates it if none exists. + + :param datastorename: Name of the datastore + :param ws: Current AML Workspace + :param env: Environment variables + :param input: Datastore points to the input container if + this is True(default) or the output storage container otherwise + + :returns: Datastore + + :raises: ValueError + """ + if datastorename is None: + raise ValueError("Datastore name is required.") + + containername = ( + env.scoring_datastore_input_container + if input + else env.scoring_datastore_output_container + ) + + if datastorename in ws.datastores: + + datastore = ws.datastores[datastorename] + + # the datastore is not registered but we have all details to register it + elif ( + env.scoring_datastore_access_key is not None + and containername is not None # NOQA: E501 + ): # NOQA:E501 + + datastore = Datastore.register_azure_blob_container( + workspace=ws, + datastore_name=datastorename, + account_name=env.scoring_datastore_storage_name, + account_key=env.scoring_datastore_access_key, + container_name=containername, + ) + else: + raise ValueError( + "No existing datastore named {} nor was enough information supplied to create one.".format( # NOQA: E501 + datastorename + ) + ) + + return datastore + + +def get_input_dataset(ws: Workspace, ds: Datastore, env: Env) -> Dataset: + """ + Gets an input dataset wrapped around an input data file. The input + data file is assumed to exist in the supplied datastore. + + + :param ws: AML Workspace + :param ds: Datastore containing the data file + :param env: Environment variables + + :returns: Input Dataset + """ + + scoringinputds = Dataset.Tabular.from_delimited_files( + path=DataPath(ds, env.scoring_datastore_input_filename) + ) + + scoringinputds = scoringinputds.register( + ws, + name=env.scoring_dataset_name, + tags={"purpose": "scoring input", "format": "csv"}, + create_new_version=True, + ).as_named_input(env.scoring_dataset_name) + + return scoringinputds + + +def get_fallback_input_dataset(ws: Workspace, env: Env) -> Dataset: + """ + Called when an input datastore does not exist or no input data file exists + at that location. Create a sample dataset using the diabetes dataset from + scikit-learn. Useful when debugging this code in the absence of the input + data location Azure blob. + + + :param ws: AML Workspace + :param env: Environment Variables + + :returns: Fallback input dataset + + :raises: FileNotFoundError + """ + # This call creates an example CSV from sklearn sample data. If you + # have already bootstrapped your project, you can comment this line + # out and use your own CSV. + create_sample_data_csv( + file_name=env.scoring_datastore_input_filename, for_scoring=True + ) + + if not os.path.exists(env.scoring_datastore_input_filename): + error_message = ( + "Could not find CSV dataset for scoring at {}. " + + "No alternate data store location was provided either.".format( + env.scoring_datastore_input_filename + ) # NOQA: E501 + ) + + raise FileNotFoundError(error_message) + + # upload the input data to the workspace default datastore + default_datastore = ws.get_default_datastore() + scoreinputdataref = default_datastore.upload_files( + [env.scoring_datastore_input_filename], + target_path="scoringinput", + overwrite=False, + ) + + scoringinputds = ( + Dataset.Tabular.from_delimited_files(scoreinputdataref) + .register(ws, env.scoring_dataset_name, create_new_version=True) + .as_named_input(env.scoring_dataset_name) + ) + + return scoringinputds + + +def get_output_location( + ws: Workspace, env: Env, outputdatastore: Datastore = None +) -> PipelineData: + """ + Returns a Datastore wrapped as a PipelineData instance suitable + for passing into a pipeline step. Represents the location where + the scoring output should be written. Uses the default workspace + blob store if no output datastore is supplied. + + + :param ws: AML Workspace + :param env: Environment Variables + :param outputdatastore: AML Datastore, optional, default is None + + :returns: PipelineData wrapping the output datastore + """ + + if outputdatastore is None: + output_loc = PipelineData( + name="defaultoutput", datastore=ws.get_default_datastore() + ) + else: + output_loc = PipelineData( + name=outputdatastore.name, datastore=outputdatastore + ) # NOQA: E501 + + return output_loc + + +def get_inputds_outputloc( + ws: Workspace, env: Env +) -> Tuple[Dataset, PipelineData]: # NOQA: E501 + """ + Prepare the input and output for the scoring step. Input is a tabular + dataset wrapped around the scoring data. Output is PipelineData + representing a location to write the scores down. + + :param ws: AML Workspace + :param env: Environment Variables + + :returns: Input dataset and output location + """ + + if env.scoring_datastore_storage_name is None: + # fall back to default + scoringinputds = get_fallback_input_dataset(ws, env) + output_loc = get_output_location(ws, env) + else: + inputdatastore = get_or_create_datastore( + "{}_in".format(env.scoring_datastore_storage_name), ws, env + ) + outputdatastore = get_or_create_datastore( + "{}_out".format(env.scoring_datastore_storage_name), + ws, + env, + input=False, # NOQA: E501 + ) + scoringinputds = get_input_dataset(ws, inputdatastore, env) + output_loc = get_output_location(ws, env, outputdatastore) + + return (scoringinputds, output_loc) + + +def get_run_configs( + ws: Workspace, computetarget: ComputeTarget, env: Env +) -> Tuple[ParallelRunConfig, RunConfiguration]: + """ + Creates the necessary run configurations required by the + pipeline to enable parallelized scoring. + + :param ws: AML Workspace + :param computetarget: AML Compute target + :param env: Environment Variables + + :returns: Tuple[Scoring Run configuration, Score copy run configuration] + """ + + # get a conda environment for scoring + environment = get_environment( + ws, + env.aml_env_name_scoring, + conda_dependencies_file=env.aml_env_score_conda_dep_file, + enable_docker=True, + use_gpu=env.use_gpu_for_scoring, + create_new=env.rebuild_env_scoring, + ) + + score_run_config = ParallelRunConfig( + entry_script=env.batchscore_script_path, + source_directory=env.sources_directory_train, + error_threshold=10, + output_action="append_row", + compute_target=computetarget, + node_count=env.max_nodes_scoring, + environment=environment, + run_invocation_timeout=300, + ) + + copy_run_config = RunConfiguration() + copy_run_config.environment = get_environment( + ws, + env.aml_env_name_score_copy, + conda_dependencies_file=env.aml_env_scorecopy_conda_dep_file, + enable_docker=True, + use_gpu=env.use_gpu_for_scoring, + create_new=env.rebuild_env_scoring, + ) + return (score_run_config, copy_run_config) + + +def get_scoring_pipeline( + scoring_dataset: Dataset, + output_loc: PipelineData, + score_run_config: ParallelRunConfig, + copy_run_config: RunConfiguration, + computetarget: ComputeTarget, + ws: Workspace, + env: Env, +) -> Pipeline: + """ + Creates the scoring pipeline. + + :param scoring_dataset: Data to score + :param output_loc: Location to save the scoring results + :param score_run_config: Parallel Run configuration to support + parallelized scoring + :param copy_run_config: Script Run configuration to support + score copying + :param computetarget: AML Compute target + :param ws: AML Workspace + :param env: Environment Variables + + :returns: Scoring pipeline instance + """ + # To help filter the model make the model name, model version and a + # tag/value pair bindable parameters so that they can be passed to + # the pipeline when invoked either over REST or via the AML SDK. + model_name_param = PipelineParameter( + "model_name", default_value=" " + ) # NOQA: E501 + model_version_param = PipelineParameter( + "model_version", default_value=" " + ) # NOQA: E501 + model_tag_name_param = PipelineParameter( + "model_tag_name", default_value=" " + ) # NOQA: E501 + model_tag_value_param = PipelineParameter( + "model_tag_value", default_value=" " + ) # NOQA: E501 + + scoring_step = ParallelRunStep( + name="scoringstep", + inputs=[scoring_dataset], + output=output_loc, + arguments=[ + "--model_name", + model_name_param, + "--model_version", + model_version_param, + "--model_tag_name", + model_tag_name_param, + "--model_tag_value", + model_tag_value_param, + ], + parallel_run_config=score_run_config, + allow_reuse=False, + ) + + copying_step = PythonScriptStep( + name="scorecopystep", + script_name=env.batchscore_copy_script_path, + source_directory=env.sources_directory_train, + arguments=[ + "--output_path", + output_loc, + "--scoring_output_filename", + env.scoring_datastore_output_filename + if env.scoring_datastore_output_filename is not None + else "", + "--scoring_datastore", + env.scoring_datastore_storage_name + if env.scoring_datastore_storage_name is not None + else "", + "--score_container", + env.scoring_datastore_output_container + if env.scoring_datastore_output_container is not None + else "", + "--scoring_datastore_key", + env.scoring_datastore_access_key + if env.scoring_datastore_access_key is not None + else "", + ], + inputs=[output_loc], + allow_reuse=False, + compute_target=computetarget, + runconfig=copy_run_config, + ) + return Pipeline(workspace=ws, steps=[scoring_step, copying_step]) + + +def build_batchscore_pipeline(): + """ + Main method that builds and publishes a scoring pipeline. + """ + + try: + env = Env() + + # Get Azure machine learning workspace + aml_workspace = Workspace.get( + name=env.workspace_name, + subscription_id=env.subscription_id, + resource_group=env.resource_group, + ) + + # Get Azure machine learning cluster + aml_compute_score = get_compute( + aml_workspace, + env.compute_name_scoring, + env.vm_size_scoring, + for_batch_scoring=True, + ) + + input_dataset, output_location = get_inputds_outputloc( + aml_workspace, env + ) # NOQA: E501 + + scoring_runconfig, score_copy_runconfig = get_run_configs( + aml_workspace, aml_compute_score, env + ) + + scoring_pipeline = get_scoring_pipeline( + input_dataset, + output_location, + scoring_runconfig, + score_copy_runconfig, + aml_compute_score, + aml_workspace, + env, + ) + + published_pipeline = scoring_pipeline.publish( + name=env.scoring_pipeline_name, + description="Diabetes Batch Scoring Pipeline", + ) + pipeline_id_string = "##vso[task.setvariable variable=pipeline_id;isOutput=true]{}".format( # NOQA: E501 + published_pipeline.id + ) + print(pipeline_id_string) + except Exception as e: + print(e) + exit(1) + + +if __name__ == "__main__": + build_batchscore_pipeline() diff --git a/ml_service/pipelines/diabetes_regression_build_train_pipeline.py b/ml_service/pipelines/diabetes_regression_build_train_pipeline.py new file mode 100644 index 00000000..03937186 --- /dev/null +++ b/ml_service/pipelines/diabetes_regression_build_train_pipeline.py @@ -0,0 +1,180 @@ +from azureml.pipeline.core.graph import PipelineParameter +from azureml.pipeline.steps import PythonScriptStep +from azureml.pipeline.core import Pipeline, PipelineData +from azureml.core import Workspace, Dataset, Datastore +from azureml.core.runconfig import RunConfiguration +from ml_service.pipelines.load_sample_data import create_sample_data_csv +from ml_service.util.attach_compute import get_compute +from ml_service.util.env_variables import Env +from ml_service.util.manage_environment import get_environment +import os + + +def main(): + e = Env() + # Get Azure machine learning workspace + aml_workspace = Workspace.get( + name=e.workspace_name, + subscription_id=e.subscription_id, + resource_group=e.resource_group, + ) + print("get_workspace:") + print(aml_workspace) + + # Get Azure machine learning cluster + aml_compute = get_compute(aml_workspace, e.compute_name, e.vm_size) + if aml_compute is not None: + print("aml_compute:") + print(aml_compute) + + # Create a reusable Azure ML environment + environment = get_environment( + aml_workspace, + e.aml_env_name, + conda_dependencies_file=e.aml_env_train_conda_dep_file, + create_new=e.rebuild_env, + ) # + run_config = RunConfiguration() + run_config.environment = environment + + if e.datastore_name: + datastore_name = e.datastore_name + else: + datastore_name = aml_workspace.get_default_datastore().name + run_config.environment.environment_variables[ + "DATASTORE_NAME" + ] = datastore_name # NOQA: E501 + + model_name_param = PipelineParameter(name="model_name", default_value=e.model_name) # NOQA: E501 + dataset_version_param = PipelineParameter( + name="dataset_version", default_value=e.dataset_version + ) + data_file_path_param = PipelineParameter( + name="data_file_path", default_value="none" + ) + caller_run_id_param = PipelineParameter(name="caller_run_id", default_value="none") # NOQA: E501 + + # Get dataset name + dataset_name = e.dataset_name + + # Check to see if dataset exists + if dataset_name not in aml_workspace.datasets: + # This call creates an example CSV from sklearn sample data. If you + # have already bootstrapped your project, you can comment this line + # out and use your own CSV. + create_sample_data_csv() + + # Use a CSV to read in the data set. + file_name = "diabetes.csv" + + if not os.path.exists(file_name): + raise Exception( + 'Could not find CSV dataset at "%s". If you have bootstrapped your project, you will need to provide a CSV.' # NOQA: E501 + % file_name + ) # NOQA: E501 + + # Upload file to default datastore in workspace + datatstore = Datastore.get(aml_workspace, datastore_name) + target_path = "training-data/" + datatstore.upload_files( + files=[file_name], + target_path=target_path, + overwrite=True, + show_progress=False, + ) + + # Register dataset + path_on_datastore = os.path.join(target_path, file_name) + dataset = Dataset.Tabular.from_delimited_files( + path=(datatstore, path_on_datastore) + ) + dataset = dataset.register( + workspace=aml_workspace, + name=dataset_name, + description="diabetes training data", + tags={"format": "CSV"}, + create_new_version=True, + ) + + # Create a PipelineData to pass data between steps + pipeline_data = PipelineData( + "pipeline_data", datastore=aml_workspace.get_default_datastore() + ) + + train_step = PythonScriptStep( + name="Train Model", + script_name=e.train_script_path, + compute_target=aml_compute, + source_directory=e.sources_directory_train, + outputs=[pipeline_data], + arguments=[ + "--model_name", + model_name_param, + "--step_output", + pipeline_data, + "--dataset_version", + dataset_version_param, + "--data_file_path", + data_file_path_param, + "--caller_run_id", + caller_run_id_param, + "--dataset_name", + dataset_name, + ], + runconfig=run_config, + allow_reuse=True, + ) + print("Step Train created") + + evaluate_step = PythonScriptStep( + name="Evaluate Model ", + script_name=e.evaluate_script_path, + compute_target=aml_compute, + source_directory=e.sources_directory_train, + arguments=[ + "--model_name", + model_name_param, + "--allow_run_cancel", + e.allow_run_cancel, + ], + runconfig=run_config, + allow_reuse=False, + ) + print("Step Evaluate created") + + register_step = PythonScriptStep( + name="Register Model ", + script_name=e.register_script_path, + compute_target=aml_compute, + source_directory=e.sources_directory_train, + inputs=[pipeline_data], + arguments=["--model_name", model_name_param, "--step_input", pipeline_data, ], # NOQA: E501 + runconfig=run_config, + allow_reuse=False, + ) + print("Step Register created") + # Check run_evaluation flag to include or exclude evaluation step. + if (e.run_evaluation).lower() == "true": + print("Include evaluation step before register step.") + evaluate_step.run_after(train_step) + register_step.run_after(evaluate_step) + steps = [train_step, evaluate_step, register_step] + else: + print("Exclude evaluation step and directly run register step.") + register_step.run_after(train_step) + steps = [train_step, register_step] + + train_pipeline = Pipeline(workspace=aml_workspace, steps=steps) + train_pipeline._set_experiment_name + train_pipeline.validate() + published_pipeline = train_pipeline.publish( + name=e.pipeline_name, + description="Model training/retraining pipeline", + version=e.build_id, + ) + print(f"Published pipeline: {published_pipeline.name}") + print(f"for build {published_pipeline.version}") + + +if __name__ == "__main__": + main() diff --git a/ml_service/pipelines/diabetes_regression_build_train_pipeline_with_r.py b/ml_service/pipelines/diabetes_regression_build_train_pipeline_with_r.py new file mode 100644 index 00000000..254f22eb --- /dev/null +++ b/ml_service/pipelines/diabetes_regression_build_train_pipeline_with_r.py @@ -0,0 +1,63 @@ +from azureml.pipeline.steps import PythonScriptStep +from azureml.pipeline.core import Pipeline +from azureml.core import Workspace +from azureml.core.runconfig import RunConfiguration +from ml_service.util.attach_compute import get_compute +from ml_service.util.env_variables import Env +from ml_service.util.manage_environment import get_environment + + +def main(): + e = Env() + # Get Azure machine learning workspace + aml_workspace = Workspace.get( + name=e.workspace_name, + subscription_id=e.subscription_id, + resource_group=e.resource_group, + ) + print("get_workspace:") + print(aml_workspace) + + # Get Azure machine learning cluster + aml_compute = get_compute(aml_workspace, e.compute_name, e.vm_size) + if aml_compute is not None: + print("aml_compute:") + print(aml_compute) + + # Create a reusable Azure ML environment + # Make sure to include `r-essentials' + # in diabetes_regression/conda_dependencies.yml + environment = get_environment( + aml_workspace, + e.aml_env_name, + conda_dependencies_file=e.aml_env_train_conda_dep_file, + create_new=e.rebuild_env, + ) # NOQA: E501 + run_config = RunConfiguration() + run_config.environment = environment + + train_step = PythonScriptStep( + name="Train Model", + script_name="train_with_r.py", + compute_target=aml_compute, + source_directory="diabetes_regression/training/R", + runconfig=run_config, + allow_reuse=False, + ) + print("Step Train created") + + steps = [train_step] + + train_pipeline = Pipeline(workspace=aml_workspace, steps=steps) + train_pipeline.validate() + published_pipeline = train_pipeline.publish( + name=e.pipeline_name, + description="Model training/retraining pipeline", + version=e.build_id, + ) + print(f"Published pipeline: {published_pipeline.name}") + print(f"for build {published_pipeline.version}") + + +if __name__ == "__main__": + main() diff --git a/ml_service/pipelines/diabetes_regression_build_train_pipeline_with_r_on_dbricks.py b/ml_service/pipelines/diabetes_regression_build_train_pipeline_with_r_on_dbricks.py new file mode 100644 index 00000000..ae607b3b --- /dev/null +++ b/ml_service/pipelines/diabetes_regression_build_train_pipeline_with_r_on_dbricks.py @@ -0,0 +1,55 @@ +from azureml.pipeline.core import Pipeline +from azureml.core import Workspace +from ml_service.util.attach_compute import get_compute +from azureml.pipeline.steps import DatabricksStep +from ml_service.util.env_variables import Env + + +def main(): + e = Env() + # Get Azure machine learning workspace + aml_workspace = Workspace.get( + name=e.workspace_name, + subscription_id=e.subscription_id, + resource_group=e.resource_group + ) + print("get_workspace:") + print(aml_workspace) + + # Get Azure machine learning cluster + aml_compute = get_compute( + aml_workspace, + e.compute_name, + e.vm_size) + if aml_compute is not None: + print("aml_compute:") + print(aml_compute) + + train_step = DatabricksStep( + name="DBPythonInLocalMachine", + num_workers=1, + python_script_name="train_with_r_on_databricks.py", + source_directory="diabetes_regression/training/R", + run_name='DB_Python_R_demo', + existing_cluster_id=e.db_cluster_id, + compute_target=aml_compute, + allow_reuse=False + ) + + print("Step Train created") + + steps = [train_step] + + train_pipeline = Pipeline(workspace=aml_workspace, steps=steps) + train_pipeline.validate() + published_pipeline = train_pipeline.publish( + name=e.pipeline_name + "_with_R_on_DB", + description="Model training/retraining pipeline", + version=e.build_id + ) + print(f'Published pipeline: {published_pipeline.name}') + print(f'for build {published_pipeline.version}') + + +if __name__ == '__main__': + main() diff --git a/ml_service/pipelines/load_sample_data.py b/ml_service/pipelines/load_sample_data.py new file mode 100644 index 00000000..304a8e7b --- /dev/null +++ b/ml_service/pipelines/load_sample_data.py @@ -0,0 +1,18 @@ + +import pandas as pd +from sklearn.datasets import load_diabetes + + +# Loads the diabetes sample data from sklearn and produces a csv file that can +# be used by the build/train pipeline script. +def create_sample_data_csv(file_name: str = "diabetes.csv", + for_scoring: bool = False): + sample_data = load_diabetes() + df = pd.DataFrame( + data=sample_data.data, + columns=sample_data.feature_names) + if not for_scoring: + df['Y'] = sample_data.target + # Hard code to diabetes so we fail fast if the project has been + # bootstrapped. + df.to_csv(file_name, index=False) diff --git a/ml_service/pipelines/run_parallel_batchscore_pipeline.py b/ml_service/pipelines/run_parallel_batchscore_pipeline.py new file mode 100644 index 00000000..c046eb9c --- /dev/null +++ b/ml_service/pipelines/run_parallel_batchscore_pipeline.py @@ -0,0 +1,134 @@ +""" +Copyright (C) Microsoft Corporation. All rights reserved.​ + ​ +Microsoft Corporation (“Microsoft”) grants you a nonexclusive, perpetual, +royalty-free right to use, copy, and modify the software code provided by us +("Software Code"). You may not sublicense the Software Code or any use of it +(except to your affiliates and to vendors to perform work on your behalf) +through distribution, network access, service agreement, lease, rental, or +otherwise. This license does not purport to express any claim of ownership over +data you may have shared with Microsoft in the creation of the Software Code. +Unless applicable law gives you more rights, Microsoft reserves all other +rights not expressly granted herein, whether by implication, estoppel or +otherwise. ​ + ​ +THE SOFTWARE CODE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS +OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +MICROSOFT OR ITS LICENSORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR +BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER +IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +ARISING IN ANY WAY OUT OF THE USE OF THE SOFTWARE CODE, EVEN IF ADVISED OF THE +POSSIBILITY OF SUCH DAMAGE. +""" + +from azure.storage.blob import ContainerClient +from ml_service.util.env_variables import Env +from azureml.core import Experiment, Workspace +from azureml.pipeline.core import PublishedPipeline +import argparse + + +def parse_args(): + parser = argparse.ArgumentParser() + parser.add_argument("--pipeline_id", type=str, default=None) + return parser.parse_args() + + +def get_pipeline(pipeline_id, ws: Workspace, env: Env): + if pipeline_id is not None: + scoringpipeline = PublishedPipeline.get(ws, pipeline_id) + else: + pipelines = PublishedPipeline.list(ws) + scoringpipelinelist = [ + pl for pl in pipelines if pl.name == env.scoring_pipeline_name + ] # noqa E501 + + if scoringpipelinelist.count == 0: + raise Exception( + "No pipeline found matching name:{}".format(env.scoring_pipeline_name) # NOQA: E501 + ) + else: + # latest published + scoringpipeline = scoringpipelinelist[0] + + return scoringpipeline + + +def copy_output(step_id: str, env: Env): + accounturl = "https://{}.blob.core.windows.net".format( + env.scoring_datastore_storage_name + ) + + srcblobname = "azureml/{}/{}_out/parallel_run_step.txt".format( + step_id, env.scoring_datastore_storage_name + ) + + srcbloburl = "{}/{}/{}".format( + accounturl, env.scoring_datastore_output_container, srcblobname + ) + + containerclient = ContainerClient( + accounturl, + env.scoring_datastore_output_container, + env.scoring_datastore_access_key, + ) + srcblobproperties = containerclient.get_blob_client( + srcblobname + ).get_blob_properties() # noqa E501 + + destfolder = srcblobproperties.last_modified.date().isoformat() + filetime = ( + srcblobproperties.last_modified.time() + .isoformat("milliseconds") + .replace(":", "_") + .replace(".", "_") + ) # noqa E501 + destfilenameparts = env.scoring_datastore_output_filename.split(".") + destblobname = "{}/{}_{}.{}".format( + destfolder, destfilenameparts[0], filetime, destfilenameparts[1] + ) + + destblobclient = containerclient.get_blob_client(destblobname) + destblobclient.start_copy_from_url(srcbloburl) + + +def run_batchscore_pipeline(): + try: + env = Env() + + args = parse_args() + + aml_workspace = Workspace.get( + name=env.workspace_name, + subscription_id=env.subscription_id, + resource_group=env.resource_group, + ) + + scoringpipeline = get_pipeline(args.pipeline_id, aml_workspace, env) + + experiment = Experiment(workspace=aml_workspace, name=env.experiment_name) # NOQA: E501 + + run = experiment.submit( + scoringpipeline, + pipeline_parameters={ + "model_name": env.model_name, + "model_version": env.model_version, + "model_tag_name": " ", + "model_tag_value": " ", + }, + ) + + run.wait_for_completion(show_output=True) + + if run.get_status() == "Finished": + copy_output(list(run.get_steps())[0].id, env) + + except Exception as ex: + print("Error: {}".format(ex)) + + +if __name__ == "__main__": + run_batchscore_pipeline() diff --git a/ml_service/pipelines/run_train_pipeline.py b/ml_service/pipelines/run_train_pipeline.py new file mode 100644 index 00000000..b68b9a15 --- /dev/null +++ b/ml_service/pipelines/run_train_pipeline.py @@ -0,0 +1,73 @@ +from azureml.pipeline.core import PublishedPipeline +from azureml.core import Experiment, Workspace +import argparse +from ml_service.util.env_variables import Env + + +def main(): + + parser = argparse.ArgumentParser("register") + parser.add_argument( + "--output_pipeline_id_file", + type=str, + default="pipeline_id.txt", + help="Name of a file to write pipeline ID to" + ) + parser.add_argument( + "--skip_train_execution", + action="store_true", + help=("Do not trigger the execution. " + "Use this in Azure DevOps when using a server job to trigger") + ) + args = parser.parse_args() + + e = Env() + + aml_workspace = Workspace.get( + name=e.workspace_name, + subscription_id=e.subscription_id, + resource_group=e.resource_group + ) + + # Find the pipeline that was published by the specified build ID + pipelines = PublishedPipeline.list(aml_workspace) + matched_pipes = [] + + for p in pipelines: + if p.name == e.pipeline_name: + if p.version == e.build_id: + matched_pipes.append(p) + + if(len(matched_pipes) > 1): + published_pipeline = None + raise Exception(f"Multiple active pipelines are published for build {e.build_id}.") # NOQA: E501 + elif(len(matched_pipes) == 0): + published_pipeline = None + raise KeyError(f"Unable to find a published pipeline for this build {e.build_id}") # NOQA: E501 + else: + published_pipeline = matched_pipes[0] + print("published pipeline id is", published_pipeline.id) + + # Save the Pipeline ID for other AzDO jobs after script is complete + if args.output_pipeline_id_file is not None: + with open(args.output_pipeline_id_file, "w") as out_file: + out_file.write(published_pipeline.id) + + if(args.skip_train_execution is False): + pipeline_parameters = {"model_name": e.model_name} + tags = {"BuildId": e.build_id} + if (e.build_uri is not None): + tags["BuildUri"] = e.build_uri + experiment = Experiment( + workspace=aml_workspace, + name=e.experiment_name) + run = experiment.submit( + published_pipeline, + tags=tags, + pipeline_parameters=pipeline_parameters) + + print("Pipeline run initiated ", run.id) + + +if __name__ == "__main__": + main() diff --git a/ml_service/util/__init__.py b/ml_service/util/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/ml_service/util/attach_compute.py b/ml_service/util/attach_compute.py new file mode 100644 index 00000000..cf8c07a6 --- /dev/null +++ b/ml_service/util/attach_compute.py @@ -0,0 +1,39 @@ + +import traceback +from azureml.core import Workspace +from azureml.core.compute import AmlCompute +from azureml.core.compute import ComputeTarget +from azureml.exceptions import ComputeTargetException +from ml_service.util.env_variables import Env + + +def get_compute(workspace: Workspace, compute_name: str, vm_size: str, for_batch_scoring: bool = False): # NOQA E501 + try: + if compute_name in workspace.compute_targets: + compute_target = workspace.compute_targets[compute_name] + if compute_target and type(compute_target) is AmlCompute: + print("Found existing compute target " + compute_name + " so using it.") # NOQA + else: + e = Env() + compute_config = AmlCompute.provisioning_configuration( + vm_size=vm_size, + vm_priority=e.vm_priority if not for_batch_scoring else e.vm_priority_scoring, # NOQA E501 + min_nodes=e.min_nodes if not for_batch_scoring else e.min_nodes_scoring, # NOQA E501 + max_nodes=e.max_nodes if not for_batch_scoring else e.max_nodes_scoring, # NOQA E501 + idle_seconds_before_scaledown="300" + # #Uncomment the below lines for VNet support + # vnet_resourcegroup_name=vnet_resourcegroup_name, + # vnet_name=vnet_name, + # subnet_name=subnet_name + ) + compute_target = ComputeTarget.create( + workspace, compute_name, compute_config + ) + compute_target.wait_for_completion( + show_output=True, min_node_count=None, timeout_in_minutes=10 + ) + return compute_target + except ComputeTargetException: + traceback.print_exc() + print("An error occurred trying to provision compute.") + exit(1) diff --git a/ml_service/util/create_scoring_image.py b/ml_service/util/create_scoring_image.py new file mode 100644 index 00000000..378cb3b4 --- /dev/null +++ b/ml_service/util/create_scoring_image.py @@ -0,0 +1,59 @@ +import os +import argparse +from azureml.core import Workspace +from azureml.core.environment import Environment +from azureml.core.model import Model, InferenceConfig +import shutil +from ml_service.util.env_variables import Env + +e = Env() + +# Get Azure machine learning workspace +ws = Workspace.get( + name=e.workspace_name, + subscription_id=e.subscription_id, + resource_group=e.resource_group +) + +parser = argparse.ArgumentParser("create scoring image") +parser.add_argument( + "--output_image_location_file", + type=str, + help=("Name of a file to write image location to, " + "in format REGISTRY.azurecr.io/IMAGE_NAME:IMAGE_VERSION") +) +args = parser.parse_args() + +model = Model(ws, name=e.model_name, version=e.model_version) +sources_dir = e.sources_directory_train +if (sources_dir is None): + sources_dir = 'diabetes_regression' +score_script = os.path.join(".", sources_dir, e.score_script) +score_file = os.path.basename(score_script) +path_to_scoring = os.path.dirname(score_script) +cwd = os.getcwd() +# Copy conda_dependencies.yml into scoring as this method does not accept relative paths. # NOQA: E501 +shutil.copy(os.path.join(".", sources_dir, + "conda_dependencies.yml"), path_to_scoring) +os.chdir(path_to_scoring) + +scoring_env = Environment.from_conda_specification(name="scoringenv", file_path="conda_dependencies.yml") # NOQA: E501 +inference_config = InferenceConfig( + entry_script=score_file, environment=scoring_env) +package = Model.package(ws, [model], inference_config) +package.wait_for_creation(show_output=True) +# Display the package location/ACR path +print(package.location) + +os.chdir(cwd) + +if package.state != "Succeeded": + raise Exception("Image creation status: {package.creation_state}") + +print("Package stored at {} with build log {}".format(package.location, package.package_build_log_uri)) # NOQA: E501 + +# Save the Image Location for other AzDO jobs after script is complete +if args.output_image_location_file is not None: + print("Writing image location to %s" % args.output_image_location_file) + with open(args.output_image_location_file, "w") as out_file: + out_file.write(str(package.location)) diff --git a/ml_service/util/create_scoring_image.sh b/ml_service/util/create_scoring_image.sh new file mode 100644 index 00000000..1651b73e --- /dev/null +++ b/ml_service/util/create_scoring_image.sh @@ -0,0 +1,17 @@ +#!/bin/sh + +set -euo pipefail # strict mode, fail on error +set -x # verbose + +docker run \ + --rm \ + -t \ + -v $PWD:/mlops \ + -v ${AZURE_CONFIG_DIR:-$HOME/.azure}:/root/.azure \ + -e SUBSCRIPTION_ID=$(az account show --query id -o tsv) \ + -e RESOURCE_GROUP=$RESOURCE_GROUP \ + -e WORKSPACE_NAME=$WORKSPACE_NAME \ + -e MODEL_NAME=$MODEL_NAME \ + -e IMAGE_NAME=$IMAGE_NAME \ + mcr.microsoft.com/mlops/python:latest \ + bash -c "cd /mlops/ && python ml_service/util/create_scoring_image.py" diff --git a/ml_service/util/env_variables.py b/ml_service/util/env_variables.py new file mode 100644 index 00000000..753c152d --- /dev/null +++ b/ml_service/util/env_variables.py @@ -0,0 +1,126 @@ +"""Env dataclass to load and hold all environment variables +""" +from dataclasses import dataclass +import os +from typing import Optional + +from dotenv import load_dotenv + + +@dataclass(frozen=True) +class Env: + """Loads all environment variables into a predefined set of properties + """ + + # to load .env file into environment variables for local execution + load_dotenv() + workspace_name: Optional[str] = os.environ.get("WORKSPACE_NAME") + resource_group: Optional[str] = os.environ.get("RESOURCE_GROUP") + subscription_id: Optional[str] = os.environ.get("SUBSCRIPTION_ID") + tenant_id: Optional[str] = os.environ.get("TENANT_ID") + app_id: Optional[str] = os.environ.get("SP_APP_ID") + app_secret: Optional[str] = os.environ.get("SP_APP_SECRET") + vm_size: Optional[str] = os.environ.get("AML_COMPUTE_CLUSTER_CPU_SKU") + compute_name: Optional[str] = os.environ.get("AML_COMPUTE_CLUSTER_NAME") + vm_priority: Optional[str] = os.environ.get( + "AML_CLUSTER_PRIORITY", "lowpriority" + ) # NOQA: E501 + min_nodes: int = int(os.environ.get("AML_CLUSTER_MIN_NODES", 0)) + max_nodes: int = int(os.environ.get("AML_CLUSTER_MAX_NODES", 4)) + build_id: Optional[str] = os.environ.get("BUILD_BUILDID") + pipeline_name: Optional[str] = os.environ.get("TRAINING_PIPELINE_NAME") + sources_directory_train: Optional[str] = os.environ.get( + "SOURCES_DIR_TRAIN" + ) # NOQA: E501 + train_script_path: Optional[str] = os.environ.get("TRAIN_SCRIPT_PATH") + evaluate_script_path: Optional[str] = os.environ.get( + "EVALUATE_SCRIPT_PATH" + ) # NOQA: E501 + register_script_path: Optional[str] = os.environ.get( + "REGISTER_SCRIPT_PATH" + ) # NOQA: E501 + model_name: Optional[str] = os.environ.get("MODEL_NAME") + experiment_name: Optional[str] = os.environ.get("EXPERIMENT_NAME") + model_version: Optional[str] = os.environ.get("MODEL_VERSION") + image_name: Optional[str] = os.environ.get("IMAGE_NAME") + db_cluster_id: Optional[str] = os.environ.get("DB_CLUSTER_ID") + score_script: Optional[str] = os.environ.get("SCORE_SCRIPT") + build_uri: Optional[str] = os.environ.get("BUILD_URI") + dataset_name: Optional[str] = os.environ.get("DATASET_NAME") + datastore_name: Optional[str] = os.environ.get("DATASTORE_NAME") + dataset_version: Optional[str] = os.environ.get("DATASET_VERSION") + run_evaluation: Optional[str] = os.environ.get("RUN_EVALUATION", "true") + allow_run_cancel: Optional[str] = os.environ.get( + "ALLOW_RUN_CANCEL", "true" + ) # NOQA: E501 + aml_env_name: Optional[str] = os.environ.get("AML_ENV_NAME") + aml_env_train_conda_dep_file: Optional[str] = os.environ.get( + "AML_ENV_TRAIN_CONDA_DEP_FILE", "conda_dependencies.yml" + ) + rebuild_env: Optional[bool] = os.environ.get( + "AML_REBUILD_ENVIRONMENT", "false" + ).lower().strip() == "true" + + use_gpu_for_scoring: Optional[bool] = os.environ.get( + "USE_GPU_FOR_SCORING", "false" + ).lower().strip() == "true" + aml_env_score_conda_dep_file: Optional[str] = os.environ.get( + "AML_ENV_SCORE_CONDA_DEP_FILE", "conda_dependencies_scoring.yml" + ) + aml_env_scorecopy_conda_dep_file: Optional[str] = os.environ.get( + "AML_ENV_SCORECOPY_CONDA_DEP_FILE", "conda_dependencies_scorecopy.yml" + ) + vm_size_scoring: Optional[str] = os.environ.get( + "AML_COMPUTE_CLUSTER_CPU_SKU_SCORING" + ) + compute_name_scoring: Optional[str] = os.environ.get( + "AML_COMPUTE_CLUSTER_NAME_SCORING" + ) + vm_priority_scoring: Optional[str] = os.environ.get( + "AML_CLUSTER_PRIORITY_SCORING", "lowpriority" + ) + min_nodes_scoring: int = int( + os.environ.get("AML_CLUSTER_MIN_NODES_SCORING", 0) + ) # NOQA: E501 + max_nodes_scoring: int = int( + os.environ.get("AML_CLUSTER_MAX_NODES_SCORING", 4) + ) # NOQA: E501 + rebuild_env_scoring: Optional[bool] = os.environ.get( + "AML_REBUILD_ENVIRONMENT_SCORING", "false" + ).lower().strip() == "true" + scoring_datastore_storage_name: Optional[str] = os.environ.get( + "SCORING_DATASTORE_STORAGE_NAME" + ) + scoring_datastore_access_key: Optional[str] = os.environ.get( + "SCORING_DATASTORE_ACCESS_KEY" + ) + scoring_datastore_input_container: Optional[str] = os.environ.get( + "SCORING_DATASTORE_INPUT_CONTAINER" + ) + scoring_datastore_input_filename: Optional[str] = os.environ.get( + "SCORING_DATASTORE_INPUT_FILENAME" + ) + scoring_datastore_output_container: Optional[str] = os.environ.get( + "SCORING_DATASTORE_OUTPUT_CONTAINER" + ) + scoring_datastore_output_filename: Optional[str] = os.environ.get( + "SCORING_DATASTORE_OUTPUT_FILENAME" + ) + scoring_dataset_name: Optional[str] = os.environ.get( + "SCORING_DATASET_NAME" + ) # NOQA: E501 + scoring_pipeline_name: Optional[str] = os.environ.get( + "SCORING_PIPELINE_NAME" + ) # NOQA: E501 + aml_env_name_scoring: Optional[str] = os.environ.get( + "AML_ENV_NAME_SCORING" + ) # NOQA: E501 + aml_env_name_score_copy: Optional[str] = os.environ.get( + "AML_ENV_NAME_SCORE_COPY" + ) # NOQA: E501 + batchscore_script_path: Optional[str] = os.environ.get( + "BATCHSCORE_SCRIPT_PATH" + ) # NOQA: E501 + batchscore_copy_script_path: Optional[str] = os.environ.get( + "BATCHSCORE_COPY_SCRIPT_PATH" + ) # NOQA: E501 diff --git a/ml_service/util/manage_environment.py b/ml_service/util/manage_environment.py new file mode 100644 index 00000000..b61c97fe --- /dev/null +++ b/ml_service/util/manage_environment.py @@ -0,0 +1,41 @@ + +import os +import traceback +from azureml.core import Workspace, Environment +from ml_service.util.env_variables import Env +from azureml.core.runconfig import DEFAULT_CPU_IMAGE, DEFAULT_GPU_IMAGE + + +def get_environment( + workspace: Workspace, + environment_name: str, + conda_dependencies_file: str, + create_new: bool = False, + enable_docker: bool = None, + use_gpu: bool = False +): + try: + e = Env() + environments = Environment.list(workspace=workspace) + restored_environment = None + for env in environments: + if env == environment_name: + restored_environment = environments[environment_name] + + if restored_environment is None or create_new: + new_env = Environment.from_conda_specification( + environment_name, + os.path.join(e.sources_directory_train, conda_dependencies_file), # NOQA: E501 + ) # NOQA: E501 + restored_environment = new_env + if enable_docker is not None: + restored_environment.docker.enabled = enable_docker + restored_environment.docker.base_image = DEFAULT_GPU_IMAGE if use_gpu else DEFAULT_CPU_IMAGE # NOQA: E501 + restored_environment.register(workspace) + + if restored_environment is not None: + print(restored_environment) + return restored_environment + except Exception: + traceback.print_exc() + exit(1) diff --git a/ml_service/util/smoke_test_scoring_service.py b/ml_service/util/smoke_test_scoring_service.py new file mode 100644 index 00000000..0fa34b1e --- /dev/null +++ b/ml_service/util/smoke_test_scoring_service.py @@ -0,0 +1,91 @@ +import argparse +import requests +import time +from azureml.core import Workspace +from azureml.core.webservice import AksWebservice, AciWebservice +from ml_service.util.env_variables import Env +import secrets + + +input = {"data": [[1, 2, 3, 4, 5, 6, 7, 8, 9, 10], + [10, 9, 8, 7, 6, 5, 4, 3, 2, 1]]} +output_len = 2 + + +def call_web_service(e, service_type, service_name): + aml_workspace = Workspace.get( + name=e.workspace_name, + subscription_id=e.subscription_id, + resource_group=e.resource_group + ) + print("Fetching service") + headers = {} + if service_type == "ACI": + service = AciWebservice(aml_workspace, service_name) + else: + service = AksWebservice(aml_workspace, service_name) + if service.auth_enabled: + service_keys = service.get_keys() + headers['Authorization'] = 'Bearer ' + service_keys[0] + print("Testing service") + print(". url: %s" % service.scoring_uri) + output = call_web_app(service.scoring_uri, headers) + + return output + + +def call_web_app(url, headers): + + # Generate an HTTP 'traceparent' distributed tracing header + # (per the W3C Trace Context proposed specification). + headers['traceparent'] = "00-{0}-{1}-00".format( + secrets.token_hex(16), secrets.token_hex(8)) + + retries = 600 + for i in range(retries): + try: + response = requests.post( + url, json=input, headers=headers) + response.raise_for_status() + return response.json() + except requests.exceptions.HTTPError as e: + if i == retries - 1: + raise e + print(e) + print("Retrying...") + time.sleep(1) + + +def main(): + + parser = argparse.ArgumentParser("smoke_test_scoring_service.py") + + parser.add_argument( + "--type", + type=str, + choices=["AKS", "ACI", "Webapp"], + required=True, + help="type of service" + ) + parser.add_argument( + "--service", + type=str, + required=True, + help="Name of the image to test" + ) + args = parser.parse_args() + + e = Env() + if args.type == "Webapp": + output = call_web_app(args.service, {}) + else: + output = call_web_service(e, args.type, args.service) + print("Verifying service output") + + assert "result" in output + assert len(output["result"]) == output_len + print("Smoke test successful.") + + +if __name__ == '__main__': + main() diff --git a/release-pipelines/releasedeployment.json b/release-pipelines/releasedeployment.json deleted file mode 100644 index 6147d931..00000000 --- a/release-pipelines/releasedeployment.json +++ /dev/null @@ -1,581 +0,0 @@ -{ - "source": 2, - "revision": 1, - "description": null, - "createdBy": { - "displayName": "Username", - "url": "https://app.vssps.visualstudio.com/Ababa295f-6e98-40b6-9dc1-aa6118e169e2/_apis/Identities/af1dae6a-5d55-49bb-a1a1-8e5db902dc1c", - "_links": { - "avatar": { - "href": "https://youaccount.visualstudio.com/_apis/GraphProfile/MemberAvatars/aad.ZmZhYjg5YzEtYmIxNC03NGRiLTk3NTAtZDBlMzQ2NGQwNjU0" - } - }, - "id": "af1dae6a-5d55-49bb-a1a1-8e5db902dc1c", - "uniqueName": "user@email.com", - "imageUrl": "https://youaccount.visualstudio.com/_api/_common/identityImage?id=af1dae6a-5d55-49bb-a1a1-8e5db902dc1c", - "descriptor": "aad.ZmZhYjg5YzEtYmIxNC03NGRiLTk3NTAtZDBlMzQ2NGQwNjU0" - }, - "createdOn": "2019-03-28T18:56:41.680Z", - "modifiedBy": { - "displayName": "User Name", - "url": "https://app.vssps.visualstudio.com/Ababa295f-6e98-40b6-9dc1-aa6118e169e2/_apis/Identities/af1dae6a-5d55-49bb-a1a1-8e5db902dc1c", - "_links": { - "avatar": { - "href": "https://youaccount.visualstudio.com/_apis/GraphProfile/MemberAvatars/aad.ZmZhYjg5YzEtYmIxNC03NGRiLTk3NTAtZDBlMzQ2NGQwNjU0" - } - }, - "id": "af1dae6a-5d55-49bb-a1a1-8e5db902dc1c", - "uniqueName": "user@email.com", - "imageUrl": "https://youaccount.visualstudio.com/_api/_common/identityImage?id=af1dae6a-5d55-49bb-a1a1-8e5db902dc1c", - "descriptor": "aad.ZmZhYjg5YzEtYmIxNC03NGRiLTk3NTAtZDBlMzQ2NGQwNjU0" - }, - "modifiedOn": "2019-03-28T18:56:41.680Z", - "isDeleted": false, - "variables": {}, - "variableGroups": [], - "environments": [ - { - "id": 8, - "name": "QA Environment", - "rank": 1, - "owner": { - "displayName": "User Name", - "url": "https://app.vssps.visualstudio.com/Ababa295f-6e98-40b6-9dc1-aa6118e169e2/_apis/Identities/af1dae6a-5d55-49bb-a1a1-8e5db902dc1c", - "_links": { - "avatar": { - "href": "https://youaccount.visualstudio.com/_apis/GraphProfile/MemberAvatars/aad.ZmZhYjg5YzEtYmIxNC03NGRiLTk3NTAtZDBlMzQ2NGQwNjU0" - } - }, - "id": "af1dae6a-5d55-49bb-a1a1-8e5db902dc1c", - "uniqueName": "user@email.com", - "imageUrl": "https://youaccount.visualstudio.com/_api/_common/identityImage?id=af1dae6a-5d55-49bb-a1a1-8e5db902dc1c", - "descriptor": "aad.ZmZhYjg5YzEtYmIxNC03NGRiLTk3NTAtZDBlMzQ2NGQwNjU0" - }, - "variables": {}, - "variableGroups": [], - "preDeployApprovals": { - "approvals": [ - { - "rank": 1, - "isAutomated": true, - "isNotificationOn": false, - "id": 22 - } - ], - "approvalOptions": { - "requiredApproverCount": null, - "releaseCreatorCanBeApprover": false, - "autoTriggeredAndPreviousEnvironmentApprovedCanBeSkipped": false, - "enforceIdentityRevalidation": false, - "timeoutInMinutes": 0, - "executionOrder": 1 - } - }, - "deployStep": { - "id": 25 - }, - "postDeployApprovals": { - "approvals": [ - { - "rank": 1, - "isAutomated": true, - "isNotificationOn": false, - "id": 26 - } - ], - "approvalOptions": { - "requiredApproverCount": null, - "releaseCreatorCanBeApprover": false, - "autoTriggeredAndPreviousEnvironmentApprovedCanBeSkipped": false, - "enforceIdentityRevalidation": false, - "timeoutInMinutes": 0, - "executionOrder": 2 - } - }, - "deployPhases": [ - { - "deploymentInput": { - "parallelExecution": { - "parallelExecutionType": 0 - }, - "skipArtifactsDownload": false, - "artifactsDownloadInput": { - "downloadInputs": [] - }, - "queueId": 18, - "demands": [], - "enableAccessToken": false, - "timeoutInMinutes": 0, - "jobCancelTimeoutInMinutes": 1, - "condition": "succeeded()", - "overrideInputs": {} - }, - "rank": 1, - "phaseType": 1, - "name": "Agent job", - "refName": null, - "workflowTasks": [ - { - "environment": {}, - "taskId": "33c63b11-352b-45a2-ba1b-54cb568a29ca", - "version": "0.*", - "name": "Use Python 3.6", - "refName": "", - "enabled": true, - "alwaysRun": false, - "continueOnError": false, - "timeoutInMinutes": 0, - "definitionType": "task", - "overrideInputs": {}, - "condition": "succeeded()", - "inputs": { - "versionSpec": "3.6", - "addToPath": "true", - "architecture": "x64" - } - }, - { - "environment": {}, - "taskId": "6c731c3c-3c68-459a-a5c9-bde6e6595b5b", - "version": "3.*", - "name": "Bash Script", - "refName": "", - "enabled": true, - "alwaysRun": false, - "continueOnError": false, - "timeoutInMinutes": 0, - "definitionType": "task", - "overrideInputs": {}, - "condition": "succeeded()", - "inputs": { - "targetType": "filePath", - "filePath": "$(System.DefaultWorkingDirectory)/DevOpsForAI/devops-for-ai/environment_setup/install_requirements.sh", - "arguments": "", - "script": "# Write your commands here\n\n# Use the environment variables input below to pass secret variables to this script", - "workingDirectory": "$(System.DefaultWorkingDirectory)/DevOpsForAI/devops-for-ai/environment_setup", - "failOnStderr": "false", - "noProfile": "true", - "noRc": "true" - } - }, - { - "environment": {}, - "taskId": "d9bafed4-0b18-4f58-968d-86655b4d2ce9", - "version": "2.*", - "name": "Login to Azure Subscription", - "refName": "", - "enabled": true, - "alwaysRun": false, - "continueOnError": false, - "timeoutInMinutes": 0, - "definitionType": "task", - "overrideInputs": {}, - "condition": "succeeded()", - "inputs": { - "script": "az login --service-principal -u $(spidentity) -p $(spsecret) --tenant $(sptenant)\n", - "workingDirectory": "", - "failOnStderr": "false" - } - }, - { - "environment": {}, - "taskId": "d9bafed4-0b18-4f58-968d-86655b4d2ce9", - "version": "2.*", - "name": "New model available, Create Scoring Image", - "refName": "", - "enabled": false, - "alwaysRun": false, - "continueOnError": false, - "timeoutInMinutes": 0, - "definitionType": "task", - "overrideInputs": {}, - "condition": "succeeded()", - "inputs": { - "script": "python aml_service/30-CreateScoringImage.py", - "workingDirectory": "$(System.DefaultWorkingDirectory)/DevOpsForAI/devops-for-ai", - "failOnStderr": "false" - } - }, - { - "environment": {}, - "taskId": "d9bafed4-0b18-4f58-968d-86655b4d2ce9", - "version": "2.*", - "name": "Get Latest Scoring Image Name & Version", - "refName": "", - "enabled": true, - "alwaysRun": false, - "continueOnError": false, - "timeoutInMinutes": 0, - "definitionType": "task", - "overrideInputs": {}, - "condition": "succeeded()", - "inputs": { - "script": "python aml_service/34-GetScoringImageName.py", - "workingDirectory": "$(System.DefaultWorkingDirectory)/DevOpsForAI/devops-for-ai", - "failOnStderr": "false" - } - }, - { - "environment": {}, - "taskId": "d9bafed4-0b18-4f58-968d-86655b4d2ce9", - "version": "2.*", - "name": "Deploy new image to ACI", - "refName": "", - "enabled": true, - "alwaysRun": false, - "continueOnError": false, - "timeoutInMinutes": 0, - "definitionType": "task", - "overrideInputs": {}, - "condition": "succeeded()", - "inputs": { - "script": "python aml_service/50-deployOnAci.py", - "workingDirectory": "$(System.DefaultWorkingDirectory)/DevOpsForAI/devops-for-ai", - "failOnStderr": "false" - } - }, - { - "environment": {}, - "taskId": "d9bafed4-0b18-4f58-968d-86655b4d2ce9", - "version": "2.*", - "name": "Test the image on ACI", - "refName": "", - "enabled": true, - "alwaysRun": false, - "continueOnError": false, - "timeoutInMinutes": 0, - "definitionType": "task", - "overrideInputs": {}, - "condition": "succeeded()", - "inputs": { - "script": "python aml_service/60-AciWebserviceTest.py", - "workingDirectory": "$(System.DefaultWorkingDirectory)/DevOpsForAI/devops-for-ai", - "failOnStderr": "false" - } - } - ] - } - ], - "environmentOptions": { - "emailNotificationType": "OnlyOnFailure", - "emailRecipients": "release.environment.owner;release.creator", - "skipArtifactsDownload": false, - "timeoutInMinutes": 0, - "enableAccessToken": false, - "publishDeploymentStatus": true, - "badgeEnabled": false, - "autoLinkWorkItems": false, - "pullRequestDeploymentEnabled": false - }, - "demands": [], - "conditions": [ - { - "name": "ReleaseStarted", - "conditionType": 1, - "value": "" - } - ], - "executionPolicy": { - "concurrencyCount": 1, - "queueDepthCount": 0 - }, - "schedules": [], - "currentRelease": { - "id": 0, - "url": "https://youaccount.vsrm.visualstudio.com/c9414c5b-b8f8-4d50-a8bf-eae8dbbb6a2a/_apis/Release/releases/0", - "_links": {} - }, - "retentionPolicy": { - "daysToKeep": 30, - "releasesToKeep": 3, - "retainBuild": true - }, - "processParameters": {}, - "properties": {}, - "preDeploymentGates": { - "id": 0, - "gatesOptions": null, - "gates": [] - }, - "postDeploymentGates": { - "id": 0, - "gatesOptions": null, - "gates": [] - }, - "environmentTriggers": [], - "badgeUrl": "https://youaccount.vsrm.visualstudio.com/_apis/public/Release/badge/c9414c5b-b8f8-4d50-a8bf-eae8dbbb6a2a/5/8" - }, - { - "id": 9, - "name": "Prod Environment", - "rank": 2, - "owner": { - "displayName": "User Name", - "url": "https://app.vssps.visualstudio.com/Ababa295f-6e98-40b6-9dc1-aa6118e169e2/_apis/Identities/af1dae6a-5d55-49bb-a1a1-8e5db902dc1c", - "_links": { - "avatar": { - "href": "https://youaccount.visualstudio.com/_apis/GraphProfile/MemberAvatars/aad.ZmZhYjg5YzEtYmIxNC03NGRiLTk3NTAtZDBlMzQ2NGQwNjU0" - } - }, - "id": "af1dae6a-5d55-49bb-a1a1-8e5db902dc1c", - "uniqueName": "user@email.com", - "imageUrl": "https://youaccount.visualstudio.com/_api/_common/identityImage?id=af1dae6a-5d55-49bb-a1a1-8e5db902dc1c", - "descriptor": "aad.ZmZhYjg5YzEtYmIxNC03NGRiLTk3NTAtZDBlMzQ2NGQwNjU0" - }, - "variables": {}, - "variableGroups": [], - "preDeployApprovals": { - "approvals": [ - { - "rank": 1, - "isAutomated": true, - "isNotificationOn": false, - "id": 23 - } - ], - "approvalOptions": { - "requiredApproverCount": null, - "releaseCreatorCanBeApprover": false, - "autoTriggeredAndPreviousEnvironmentApprovedCanBeSkipped": false, - "enforceIdentityRevalidation": false, - "timeoutInMinutes": 0, - "executionOrder": 1 - } - }, - "deployStep": { - "id": 24 - }, - "postDeployApprovals": { - "approvals": [ - { - "rank": 1, - "isAutomated": true, - "isNotificationOn": false, - "id": 27 - } - ], - "approvalOptions": { - "requiredApproverCount": null, - "releaseCreatorCanBeApprover": false, - "autoTriggeredAndPreviousEnvironmentApprovedCanBeSkipped": false, - "enforceIdentityRevalidation": false, - "timeoutInMinutes": 0, - "executionOrder": 2 - } - }, - "deployPhases": [ - { - "deploymentInput": { - "parallelExecution": { - "parallelExecutionType": 0 - }, - "skipArtifactsDownload": false, - "artifactsDownloadInput": { - "downloadInputs": [] - }, - "queueId": 18, - "demands": [], - "enableAccessToken": false, - "timeoutInMinutes": 0, - "jobCancelTimeoutInMinutes": 1, - "condition": "succeeded()", - "overrideInputs": {} - }, - "rank": 1, - "phaseType": 1, - "name": "Agent job", - "refName": null, - "workflowTasks": [ - { - "environment": {}, - "taskId": "33c63b11-352b-45a2-ba1b-54cb568a29ca", - "version": "0.*", - "name": "Use Python 3.6", - "refName": "", - "enabled": true, - "alwaysRun": false, - "continueOnError": false, - "timeoutInMinutes": 0, - "definitionType": "task", - "overrideInputs": {}, - "condition": "succeeded()", - "inputs": { - "versionSpec": "3.6", - "addToPath": "true", - "architecture": "x64" - } - }, - { - "environment": {}, - "taskId": "6c731c3c-3c68-459a-a5c9-bde6e6595b5b", - "version": "3.*", - "name": "Bash Script", - "refName": "", - "enabled": true, - "alwaysRun": false, - "continueOnError": false, - "timeoutInMinutes": 0, - "definitionType": "task", - "overrideInputs": {}, - "condition": "succeeded()", - "inputs": { - "targetType": "filePath", - "filePath": "$(System.DefaultWorkingDirectory)/DevOpsForAI/devops-for-ai/environment_setup/install_requirements.sh", - "arguments": "", - "script": "# Write your commands here\n\n# Use the environment variables input below to pass secret variables to this script", - "workingDirectory": "$(System.DefaultWorkingDirectory)/DevOpsForAI/devops-for-ai/environment_setup", - "failOnStderr": "false", - "noProfile": "true", - "noRc": "true" - } - }, - { - "environment": {}, - "taskId": "d9bafed4-0b18-4f58-968d-86655b4d2ce9", - "version": "2.*", - "name": "Login to Azure Subscription", - "refName": "", - "enabled": true, - "alwaysRun": false, - "continueOnError": false, - "timeoutInMinutes": 0, - "definitionType": "task", - "overrideInputs": {}, - "condition": "succeeded()", - "inputs": { - "script": "az login --service-principal -u $(spidentity) -p $(spsecret) --tenant $(sptenant)", - "workingDirectory": "", - "failOnStderr": "false" - } - }, - { - "environment": {}, - "taskId": "d9bafed4-0b18-4f58-968d-86655b4d2ce9", - "version": "2.*", - "name": "Get Latest Scoring Image Name & Version", - "refName": "", - "enabled": true, - "alwaysRun": false, - "continueOnError": false, - "timeoutInMinutes": 0, - "definitionType": "task", - "overrideInputs": {}, - "condition": "succeeded()", - "inputs": { - "script": "python aml_service/34-GetScoringImageName.py", - "workingDirectory": "$(System.DefaultWorkingDirectory)/DevOpsForAI/devops-for-ai", - "failOnStderr": "false" - } - }, - { - "environment": {}, - "taskId": "d9bafed4-0b18-4f58-968d-86655b4d2ce9", - "version": "2.*", - "name": "Deploy to AKS", - "refName": "", - "enabled": true, - "alwaysRun": false, - "continueOnError": false, - "timeoutInMinutes": 0, - "definitionType": "task", - "overrideInputs": {}, - "condition": "succeeded()", - "inputs": { - "script": "python aml_service/51-deployOnAks.py", - "workingDirectory": "$(System.DefaultWorkingDirectory)/DevOpsForAI/devops-for-ai", - "failOnStderr": "false" - } - }, - { - "environment": {}, - "taskId": "d9bafed4-0b18-4f58-968d-86655b4d2ce9", - "version": "2.*", - "name": "Test AKS endpoint", - "refName": "", - "enabled": true, - "alwaysRun": false, - "continueOnError": false, - "timeoutInMinutes": 0, - "definitionType": "task", - "overrideInputs": {}, - "condition": "succeeded()", - "inputs": { - "script": "python aml_service/61-AksWebserviceTest.py", - "workingDirectory": "$(System.DefaultWorkingDirectory)/DevOpsForAI/devops-for-ai", - "failOnStderr": "false" - } - } - ] - } - ], - "environmentOptions": { - "emailNotificationType": "OnlyOnFailure", - "emailRecipients": "release.environment.owner;release.creator", - "skipArtifactsDownload": false, - "timeoutInMinutes": 0, - "enableAccessToken": false, - "publishDeploymentStatus": true, - "badgeEnabled": false, - "autoLinkWorkItems": false, - "pullRequestDeploymentEnabled": false - }, - "demands": [], - "conditions": [], - "executionPolicy": { - "concurrencyCount": 1, - "queueDepthCount": 0 - }, - "schedules": [], - "currentRelease": { - "id": 0, - "url": "https://youaccount.vsrm.visualstudio.com/c9414c5b-b8f8-4d50-a8bf-eae8dbbb6a2a/_apis/Release/releases/0", - "_links": {} - }, - "retentionPolicy": { - "daysToKeep": 30, - "releasesToKeep": 3, - "retainBuild": true - }, - "processParameters": {}, - "properties": {}, - "preDeploymentGates": { - "id": 0, - "gatesOptions": null, - "gates": [] - }, - "postDeploymentGates": { - "id": 0, - "gatesOptions": null, - "gates": [] - }, - "environmentTriggers": [], - "badgeUrl": "https://youaccount.vsrm.visualstudio.com/_apis/public/Release/badge/c9414c5b-b8f8-4d50-a8bf-eae8dbbb6a2a/5/9" - } - ], - "artifacts": [], - "triggers": [], - "releaseNameFormat": "Release-$(rev:r)", - "tags": [], - "pipelineProcess": { - "type": 1 - }, - "properties": { - "DefinitionCreationSource": { - "$type": "System.String", - "$value": "ReleaseImport" - } - }, - "id": 5, - "name": "releasedeploymentpipeline", - "path": "\\", - "projectReference": null, - "url": "https://youaccount.vsrm.visualstudio.com/c9414c5b-b8f8-4d50-a8bf-eae8dbbb6a2a/_apis/Release/definitions/5", - "_links": { - "self": { - "href": "https://youaccount.vsrm.visualstudio.com/c9414c5b-b8f8-4d50-a8bf-eae8dbbb6a2a/_apis/Release/definitions/5" - }, - "web": { - "href": "https://youaccount.visualstudio.com/c9414c5b-b8f8-4d50-a8bf-eae8dbbb6a2a/_release?definitionId=5" - } - } -} \ No newline at end of file diff --git a/release-pipelines/retrainingtrigger.json b/release-pipelines/retrainingtrigger.json deleted file mode 100644 index 1bcba3ce..00000000 --- a/release-pipelines/retrainingtrigger.json +++ /dev/null @@ -1,291 +0,0 @@ -{ - "source": 2, - "revision": 1, - "description": null, - "createdBy": { - "displayName": "User Name", - "url": "https://spsprodcus3.vssps.visualstudio.com/A127dc0c3-e10b-4004-a104-fa5be489bed1/_apis/Identities/af1dae6a-5d55-49bb-a1a1-8e5db902dc1c", - "_links": { - "avatar": { - "href": "https://dev.azure.com/userorg/_apis/GraphProfile/MemberAvatars/aad.ZmZhYjg5YzEtYmIxNC03NGRiLTk3NTAtZDBlMzQ2NGQwNjU0" - } - }, - "id": "af1dae6a-5d55-49bb-a1a1-8e5db902dc1c", - "uniqueName": "user@email.com", - "imageUrl": "https://dev.azure.com/userorg/_api/_common/identityImage?id=af1dae6a-5d55-49bb-a1a1-8e5db902dc1c", - "descriptor": "aad.ZmZhYjg5YzEtYmIxNC03NGRiLTk3NTAtZDBlMzQ2NGQwNjU0" - }, - "createdOn": "2019-03-29T01:48:19.893Z", - "modifiedBy": { - "displayName": "User Name", - "url": "https://spsprodcus3.vssps.visualstudio.com/A127dc0c3-e10b-4004-a104-fa5be489bed1/_apis/Identities/af1dae6a-5d55-49bb-a1a1-8e5db902dc1c", - "_links": { - "avatar": { - "href": "https://dev.azure.com/userorg/_apis/GraphProfile/MemberAvatars/aad.ZmZhYjg5YzEtYmIxNC03NGRiLTk3NTAtZDBlMzQ2NGQwNjU0" - } - }, - "id": "af1dae6a-5d55-49bb-a1a1-8e5db902dc1c", - "uniqueName": "user@email.com", - "imageUrl": "https://dev.azure.com/userorg/_api/_common/identityImage?id=af1dae6a-5d55-49bb-a1a1-8e5db902dc1c", - "descriptor": "aad.ZmZhYjg5YzEtYmIxNC03NGRiLTk3NTAtZDBlMzQ2NGQwNjU0" - }, - "modifiedOn": "2019-03-29T01:48:19.893Z", - "isDeleted": false, - "variables": {}, - "variableGroups": [ - 7 - ], - "environments": [ - { - "id": 9, - "name": "Retrain", - "rank": 1, - "owner": { - "displayName": "User Name", - "url": "https://spsprodcus3.vssps.visualstudio.com/A127dc0c3-e10b-4004-a104-fa5be489bed1/_apis/Identities/af1dae6a-5d55-49bb-a1a1-8e5db902dc1c", - "_links": { - "avatar": { - "href": "https://dev.azure.com/userorg/_apis/GraphProfile/MemberAvatars/aad.ZmZhYjg5YzEtYmIxNC03NGRiLTk3NTAtZDBlMzQ2NGQwNjU0" - } - }, - "id": "af1dae6a-5d55-49bb-a1a1-8e5db902dc1c", - "uniqueName": "user@email.com", - "imageUrl": "https://dev.azure.com/userorg/_api/_common/identityImage?id=af1dae6a-5d55-49bb-a1a1-8e5db902dc1c", - "descriptor": "aad.ZmZhYjg5YzEtYmIxNC03NGRiLTk3NTAtZDBlMzQ2NGQwNjU0" - }, - "variables": {}, - "variableGroups": [], - "preDeployApprovals": { - "approvals": [ - { - "rank": 1, - "isAutomated": true, - "isNotificationOn": false, - "id": 29 - } - ], - "approvalOptions": { - "requiredApproverCount": null, - "releaseCreatorCanBeApprover": false, - "autoTriggeredAndPreviousEnvironmentApprovedCanBeSkipped": false, - "enforceIdentityRevalidation": false, - "timeoutInMinutes": 0, - "executionOrder": 1 - } - }, - "deployStep": { - "id": 30 - }, - "postDeployApprovals": { - "approvals": [ - { - "rank": 1, - "isAutomated": true, - "isNotificationOn": false, - "id": 31 - } - ], - "approvalOptions": { - "requiredApproverCount": null, - "releaseCreatorCanBeApprover": false, - "autoTriggeredAndPreviousEnvironmentApprovedCanBeSkipped": false, - "enforceIdentityRevalidation": false, - "timeoutInMinutes": 0, - "executionOrder": 2 - } - }, - "deployPhases": [ - { - "deploymentInput": { - "parallelExecution": { - "parallelExecutionType": 0 - }, - "skipArtifactsDownload": false, - "artifactsDownloadInput": { - "downloadInputs": [] - }, - "queueId": 6, - "demands": [], - "enableAccessToken": false, - "timeoutInMinutes": 0, - "jobCancelTimeoutInMinutes": 1, - "condition": "succeeded()", - "overrideInputs": {} - }, - "rank": 1, - "phaseType": 1, - "name": "Agent job", - "refName": null, - "workflowTasks": [ - { - "environment": {}, - "taskId": "33c63b11-352b-45a2-ba1b-54cb568a29ca", - "version": "0.*", - "name": "Use Python 3.6", - "refName": "", - "enabled": true, - "alwaysRun": false, - "continueOnError": false, - "timeoutInMinutes": 0, - "definitionType": "task", - "overrideInputs": {}, - "condition": "succeeded()", - "inputs": { - "versionSpec": "3.6", - "addToPath": "true", - "architecture": "x64" - } - }, - { - "environment": {}, - "taskId": "6c731c3c-3c68-459a-a5c9-bde6e6595b5b", - "version": "3.*", - "name": "Install Requirements", - "refName": "", - "enabled": true, - "alwaysRun": false, - "continueOnError": false, - "timeoutInMinutes": 0, - "definitionType": "task", - "overrideInputs": {}, - "condition": "succeeded()", - "inputs": { - "targetType": "filePath", - "filePath": "$(System.DefaultWorkingDirectory)/DevOpsForAI/devops-for-ai/environment_setup/install_requirements.sh", - "arguments": "", - "script": "# Write your commands here\n\n# Use the environment variables input below to pass secret variables to this script", - "workingDirectory": "$(System.DefaultWorkingDirectory)/DevOpsForAI/devops-for-ai/environment_setup", - "failOnStderr": "false", - "noProfile": "true", - "noRc": "true" - } - }, - { - "environment": {}, - "taskId": "d9bafed4-0b18-4f58-968d-86655b4d2ce9", - "version": "2.*", - "name": "Login to Azure Subscription", - "refName": "", - "enabled": true, - "alwaysRun": false, - "continueOnError": false, - "timeoutInMinutes": 0, - "definitionType": "task", - "overrideInputs": {}, - "condition": "succeeded()", - "inputs": { - "script": "az login --service-principal -u $(spidentity) -p $(spsecret) --tenant $(sptenant)", - "workingDirectory": "", - "failOnStderr": "false" - } - }, - { - "environment": {}, - "taskId": "d9bafed4-0b18-4f58-968d-86655b4d2ce9", - "version": "2.*", - "name": "Run AML Pipeline", - "refName": "", - "enabled": true, - "alwaysRun": false, - "continueOnError": false, - "timeoutInMinutes": 0, - "definitionType": "task", - "overrideInputs": {}, - "condition": "succeeded()", - "inputs": { - "script": "python aml_service/05-TriggerAmlPipeline.py", - "workingDirectory": "$(System.DefaultWorkingDirectory)/DevOpsForAI/devops-for-ai", - "failOnStderr": "false" - } - } - ] - } - ], - "environmentOptions": { - "emailNotificationType": "OnlyOnFailure", - "emailRecipients": "release.environment.owner;release.creator", - "skipArtifactsDownload": false, - "timeoutInMinutes": 0, - "enableAccessToken": false, - "publishDeploymentStatus": true, - "badgeEnabled": false, - "autoLinkWorkItems": false, - "pullRequestDeploymentEnabled": false - }, - "demands": [], - "conditions": [ - { - "name": "ReleaseStarted", - "conditionType": 1, - "value": "" - } - ], - "executionPolicy": { - "concurrencyCount": 1, - "queueDepthCount": 0 - }, - "schedules": [], - "currentRelease": { - "id": 0, - "url": "https://vsrm.dev.azure.com/userorg/420d3eaf-7dbb-46cb-a7c9-93662c745570/_apis/Release/releases/0", - "_links": {} - }, - "retentionPolicy": { - "daysToKeep": 30, - "releasesToKeep": 3, - "retainBuild": true - }, - "processParameters": {}, - "properties": {}, - "preDeploymentGates": { - "id": 0, - "gatesOptions": null, - "gates": [] - }, - "postDeploymentGates": { - "id": 0, - "gatesOptions": null, - "gates": [] - }, - "environmentTriggers": [], - "badgeUrl": "https://vsrm.dev.azure.com/userorg/_apis/public/Release/badge/420d3eaf-7dbb-46cb-a7c9-93662c745570/6/9" - } - ], - "artifacts": [], - "triggers": [ - { - "schedule": { - "jobId": "5efd6865-0305-493a-9ff9-08995bbb72e5", - "timeZoneId": "UTC", - "startHours": 3, - "startMinutes": 0, - "daysToRelease": 21 - }, - "triggerType": 2 - } - ], - "releaseNameFormat": "Release-$(rev:r)", - "tags": [], - "pipelineProcess": { - "type": 1 - }, - "properties": { - "DefinitionCreationSource": { - "$type": "System.String", - "$value": "ReleaseClone" - } - }, - "id": 6, - "name": "retrainingtriggerpipeline", - "path": "\\", - "projectReference": null, - "url": "https://vsrm.dev.azure.com/userorg/420d3eaf-7dbb-46cb-a7c9-93662c745570/_apis/Release/definitions/6", - "_links": { - "self": { - "href": "https://vsrm.dev.azure.com/userorg/420d3eaf-7dbb-46cb-a7c9-93662c745570/_apis/Release/definitions/6" - }, - "web": { - "href": "https://dev.azure.com/userorg/420d3eaf-7dbb-46cb-a7c9-93662c745570/_release?definitionId=6" - } - } -} \ No newline at end of file