diff --git a/.env.example b/.env.example index 892ecb2b..cb991043 100644 --- a/.env.example +++ b/.env.example @@ -15,6 +15,7 @@ WORKSPACE_NAME = 'aml-workspace' EXPERIMENT_NAME = '' # AML Compute Cluster Config +AML_ENV_NAME='diabetes_regression_training_env' AML_COMPUTE_CLUSTER_NAME = 'train-cluster' AML_COMPUTE_CLUSTER_CPU_SKU = 'STANDARD_DS2_V2' AML_CLUSTER_MAX_NODES = '4' diff --git a/.pipelines/diabetes_regression-variables-template.yml b/.pipelines/diabetes_regression-variables-template.yml index af32282e..a12fe67e 100644 --- a/.pipelines/diabetes_regression-variables-template.yml +++ b/.pipelines/diabetes_regression-variables-template.yml @@ -29,6 +29,8 @@ variables: value: diabetes_regression_model.pkl # AML Compute Cluster Config + - name: AML_ENV_NAME + value: diabetes_regression_training_env - name: AML_COMPUTE_CLUSTER_CPU_SKU value: STANDARD_DS2_V2 - name: AML_COMPUTE_CLUSTER_NAME diff --git a/bootstrap/bootstrap.py b/bootstrap/bootstrap.py index 92af061b..71c1549b 100644 --- a/bootstrap/bootstrap.py +++ b/bootstrap/bootstrap.py @@ -103,7 +103,6 @@ def replaceprojectname(project_dir, project_name, rename_name): r"ml_service/pipelines/diabetes_regression_build_train_pipeline.py", # NOQA: E501 r"ml_service/pipelines/diabetes_regression_verify_train_pipeline.py", # NOQA: E501 r"ml_service/util/create_scoring_image.py", - r"diabetes_regression/azureml_environment.json", r"diabetes_regression/conda_dependencies.yml", r"diabetes_regression/evaluate/evaluate_model.py", r"diabetes_regression/register/register_model.py", diff --git a/diabetes_regression/azureml_environment.json b/diabetes_regression/azureml_environment.json deleted file mode 100644 index 8a81614e..00000000 --- a/diabetes_regression/azureml_environment.json +++ /dev/null @@ -1,39 +0,0 @@ -{ - "name": "diabetes_regression_sklearn", - "version": null, - "environmentVariables": { - "EXAMPLE_ENV_VAR": "EXAMPLE_VALUE" - }, - "python": { - "userManagedDependencies": false, - "interpreterPath": "python", - "condaDependenciesFile": null, - "baseCondaEnvironment": null - }, - "docker": { - "enabled": true, - "baseImage": "mcr.microsoft.com/azureml/base:intelmpi2018.3-ubuntu16.04", - "baseDockerfile": null, - "sharedVolumes": true, - "shmSize": "2g", - "arguments": [], - "baseImageRegistry": { - "address": null, - "username": null, - "password": null - } - }, - "spark": { - "repositories": [], - "packages": [], - "precachePackages": true - }, - "databricks": { - "mavenLibraries": [], - "pypiLibraries": [], - "rcranLibraries": [], - "jarLibraries": [], - "eggLibraries": [] - }, - "inferencingStackVersion": null -} diff --git a/diabetes_regression/conda_dependencies.yml b/diabetes_regression/conda_dependencies.yml index add251f7..76350a01 100644 --- a/diabetes_regression/conda_dependencies.yml +++ b/diabetes_regression/conda_dependencies.yml @@ -1,32 +1,40 @@ +# Conda environment specification. The dependencies defined in this file will +# be automatically provisioned for managed runs. These include runs against +# the localdocker, remotedocker, and cluster compute targets. + +# Note that this file is NOT used to automatically manage dependencies for the +# local compute target. To provision these dependencies locally, run: +# conda env update --file conda_dependencies.yml + # Details about the Conda environment file format: # https://conda.io/docs/using/envs.html#create-environment-file-by-hand -name: diabetes_regression_sklearn +# For managing Spark packages and configuration, see spark_dependencies.yml. +# Version of this configuration file's structure and semantics in AzureML. +# This directive is stored in a comment to preserve the Conda file structure. +# [AzureMlVersion] = 2 +name: diabetes_regression_training_env dependencies: # The python interpreter version. + # Currently Azure ML Workbench only supports 3.5.2 and later. - python=3.7.5 - # Required by azureml-defaults, installed separately through Conda to # get a prebuilt version and not require build tools for the install. - psutil=5.6 #latest - - numpy=1.18.1 - - pandas=1.0.0 - - scikit-learn=0.22.1 - #- r-essentials - #- tensorflow - #- keras - - - pip=20.0.2 - pip: - # Dependencies for training environment. - - - azureml-core==1.0.85 - - # Dependencies for scoring environment. - - # You must list azureml-defaults as a pip dependency - - azureml-defaults==1.0.85 - - inference-schema[numpy-support]==1.0.1 - - azureml-dataprep==1.1.38 + # Required packages for AzureML execution, history, and data preparation. + - azureml-model-management-sdk==1.0.1b6.post1 + - azureml-sdk==1.0.74 + - scipy==1.3.1 + - scikit-learn==0.22 + - pandas==0.25.3 + - numpy==1.17.3 + - joblib==0.14.0 + - gunicorn==19.9.0 + - flask==1.1.1 + - inference-schema[numpy-support] + - azure + - azure-storage-blob + - azureml-dataprep diff --git a/diabetes_regression/scoring/inference_config.yml b/diabetes_regression/scoring/inference_config.yml index 52017bae..3fc86686 100644 --- a/diabetes_regression/scoring/inference_config.yml +++ b/diabetes_regression/scoring/inference_config.yml @@ -6,4 +6,4 @@ schemaFile: sourceDirectory: enableGpu: False baseImage: -baseImageRegistry: +baseImageRegistry: \ No newline at end of file diff --git a/docs/code_description.md b/docs/code_description.md index b09cf0a1..22bfe793 100644 --- a/docs/code_description.md +++ b/docs/code_description.md @@ -62,7 +62,6 @@ The repository provides a template with folders structure suitable for maintaini ### Environment Definitions -- `diabetes_regression/azureml_environment.json` : Azure ML environment definition for the training environment, including base Docker image and a reference to `conda_dependencies.yml` Conda environment file. - `diabetes_regression/conda_dependencies.yml` : Conda environment definition for the environment used for both training and scoring (Docker image in which train.py and score.py are run). - `diabetes_regression/ci_dependencies.yml` : Conda environment definition for the CI environment. diff --git a/ml_service/pipelines/diabetes_regression_build_train_pipeline.py b/ml_service/pipelines/diabetes_regression_build_train_pipeline.py index f382a476..c78465dd 100644 --- a/ml_service/pipelines/diabetes_regression_build_train_pipeline.py +++ b/ml_service/pipelines/diabetes_regression_build_train_pipeline.py @@ -1,11 +1,12 @@ from azureml.pipeline.core.graph import PipelineParameter from azureml.pipeline.steps import PythonScriptStep from azureml.pipeline.core import Pipeline, PipelineData -from azureml.core import Workspace, Environment +from azureml.core import Workspace from azureml.core.runconfig import RunConfiguration from azureml.core import Dataset from ml_service.util.attach_compute import get_compute from ml_service.util.env_variables import Env +from ml_service.util.manage_environment import get_environment from sklearn.datasets import load_diabetes import pandas as pd import os @@ -31,17 +32,16 @@ def main(): print("aml_compute:") print(aml_compute) - # Create a reusable run configuration environment - # Read definition from diabetes_regression/azureml_environment.json - environment = Environment.load_from_directory(e.sources_directory_train) - if (e.collection_uri is not None and e.teamproject_name is not None): - builduri_base = e.collection_uri + e.teamproject_name - builduri_base = builduri_base + "/_build/results?buildId=" - environment.environment_variables["BUILDURI_BASE"] = builduri_base - environment.register(aml_workspace) + # Create a reusable Azure ML environment + environment = get_environment( + aml_workspace, e.aml_env_name, create_new=False) # NOQA: E501 run_config = RunConfiguration() run_config.environment = environment + if (e.collection_uri is not None and e.teamproject_name is not None): + builduri_base = e.collection_uri + e.teamproject_name + builduri_base = builduri_base + "/_build/results?buildId=" + run_config.environment.environment_variables["BUILDURI_BASE"] = builduri_base # NOQA: E501 model_name_param = PipelineParameter( name="model_name", default_value=e.model_name) diff --git a/ml_service/pipelines/diabetes_regression_build_train_pipeline_with_r.py b/ml_service/pipelines/diabetes_regression_build_train_pipeline_with_r.py index 96ddf2cf..a41b0c2e 100644 --- a/ml_service/pipelines/diabetes_regression_build_train_pipeline_with_r.py +++ b/ml_service/pipelines/diabetes_regression_build_train_pipeline_with_r.py @@ -1,9 +1,10 @@ from azureml.pipeline.steps import PythonScriptStep from azureml.pipeline.core import Pipeline -from azureml.core import Workspace, Environment +from azureml.core import Workspace from azureml.core.runconfig import RunConfiguration from ml_service.util.attach_compute import get_compute from ml_service.util.env_variables import Env +from ml_service.util.manage_environment import get_environment def main(): @@ -26,19 +27,17 @@ def main(): print("aml_compute:") print(aml_compute) - # Create a reusable run configuration environment - # Read definition from diabetes_regression/azureml_environment.json + # Create a reusable Azure ML environment # Make sure to include `r-essentials' # in diabetes_regression/conda_dependencies.yml - environment = Environment.load_from_directory(e.sources_directory_train) + environment = get_environment( + aml_workspace, e.aml_env_name, create_new=False) # NOQA: E501 + run_config = RunConfiguration() + run_config.environment = environment if (e.collection_uri is not None and e.teamproject_name is not None): builduri_base = e.collection_uri + e.teamproject_name builduri_base = builduri_base + "/_build/results?buildId=" - environment.environment_variables["BUILDURI_BASE"] = builduri_base - environment.register(aml_workspace) - - run_config = RunConfiguration() - run_config.environment = environment + run_config.environment.environment_variables["BUILDURI_BASE"] = builduri_base # NOQA: E501 train_step = PythonScriptStep( name="Train Model", diff --git a/ml_service/util/env_variables.py b/ml_service/util/env_variables.py index 1bd69529..7729b82c 100644 --- a/ml_service/util/env_variables.py +++ b/ml_service/util/env_variables.py @@ -44,6 +44,7 @@ def __init__(self): self._run_evaluation = os.environ.get("RUN_EVALUATION", "true") self._allow_run_cancel = os.environ.get( "ALLOW_RUN_CANCEL", "true") + self._aml_env_name = os.environ.get("AML_ENV_NAME") @property def workspace_name(self): @@ -156,3 +157,7 @@ def run_evaluation(self): @property def allow_run_cancel(self): return self._allow_run_cancel + + @property + def aml_env_name(self): + return self._aml_env_name diff --git a/ml_service/util/manage_environment.py b/ml_service/util/manage_environment.py new file mode 100644 index 00000000..43749f3f --- /dev/null +++ b/ml_service/util/manage_environment.py @@ -0,0 +1,29 @@ +from azureml.core import Workspace, Environment +from ml_service.util.env_variables import Env +import os + + +def get_environment( + workspace: Workspace, + environment_name: str, + create_new: bool = False +): + try: + e = Env() + environments = Environment.list(workspace=workspace) + restored_environment = None + for env in environments: + if env == environment_name: + restored_environment = environments[environment_name] + + if restored_environment is None or create_new: + new_env = Environment.from_conda_specification(environment_name, os.path.join(e.sources_directory_train, "conda_dependencies.yml")) # NOQA: E501 + restored_environment = new_env + restored_environment.register(workspace) + + if restored_environment is not None: + print(restored_environment) + return restored_environment + except Exception as e: + print(e) + exit(1)