From 5650a417c99d8c4662b88801c123cba624edab43 Mon Sep 17 00:00:00 2001 From: Jovana Taylor Date: Wed, 26 Feb 2020 15:11:49 -0800 Subject: [PATCH 1/4] initial pass --- .../diabetes_regression-ci-build-train.yml | 3 +- .../evaluate/evaluate_model.py | 15 +----- .../register/register_model.py | 54 +++++++++---------- diabetes_regression/training/train.py | 16 +----- ...iabetes_regression_build_train_pipeline.py | 9 ---- ..._regression_build_train_pipeline_with_r.py | 4 -- ml_service/util/env_variables.py | 5 -- 7 files changed, 30 insertions(+), 76 deletions(-) diff --git a/.pipelines/diabetes_regression-ci-build-train.yml b/.pipelines/diabetes_regression-ci-build-train.yml index 96cf1d26..987b3694 100644 --- a/.pipelines/diabetes_regression-ci-build-train.yml +++ b/.pipelines/diabetes_regression-ci-build-train.yml @@ -78,6 +78,7 @@ stages: pool: server variables: AMLPIPELINE_ID: $[ dependencies.Get_Pipeline_ID.outputs['getpipelineid.AMLPIPELINEID'] ] + BUILD_URI: '$(SYSTEM.COLLECTIONURI)$(SYSTEM.TEAMPROJECT)/_build/results?buildId=$(BUILD.BUILDID)' steps: - task: ms-air-aiagility.vss-services-azureml.azureml-restApi-task.MLPublishedPipelineRestAPITask@0 displayName: 'Invoke ML pipeline' @@ -85,7 +86,7 @@ stages: azureSubscription: '$(WORKSPACE_SVC_CONNECTION)' PipelineId: '$(AMLPIPELINE_ID)' ExperimentName: '$(EXPERIMENT_NAME)' - PipelineParameters: '"ParameterAssignments": {"model_name": "$(MODEL_NAME)"}' + PipelineParameters: '"ParameterAssignments": {"model_name": "$(MODEL_NAME)"}, "tags": {"BuildId": "$(Build.BuildId)", "BuildUri": "$(BUILDURI)"}, "StepTags": {"BuildId": "$(Build.BuildId)", "BuildUri": "$(BUILDURI)"}' - job: "Training_Run_Report" dependsOn: "Run_ML_Pipeline" condition: always() diff --git a/diabetes_regression/evaluate/evaluate_model.py b/diabetes_regression/evaluate/evaluate_model.py index 57685b3d..f4d4c6db 100644 --- a/diabetes_regression/evaluate/evaluate_model.py +++ b/diabetes_regression/evaluate/evaluate_model.py @@ -23,7 +23,6 @@ ARISING IN ANY WAY OUT OF THE USE OF THE SOFTWARE CODE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. """ -import os from azureml.core import Run import argparse import traceback @@ -74,11 +73,7 @@ run_id = 'amlcompute' parser = argparse.ArgumentParser("evaluate") -parser.add_argument( - "--build_id", - type=str, - help="The Build ID of the build triggering this pipeline run", -) + parser.add_argument( "--run_id", type=str, @@ -99,19 +94,13 @@ ) args = parser.parse_args() -if (args.build_id is not None): - build_id = args.build_id if (args.run_id is not None): run_id = args.run_id if (run_id == 'amlcompute'): run_id = run.parent.id model_name = args.model_name metric_eval = "mse" -run.tag("BuildId", value=build_id) -builduri_base = os.environ.get("BUILDURI_BASE") -if (builduri_base is not None): - build_uri = builduri_base + build_id - run.tag("BuildUri", value=build_uri) + allow_run_cancel = args.allow_run_cancel # Parameterize the matrices on which the models should be compared # Add golden data set on which all the model performance can be evaluated diff --git a/diabetes_regression/register/register_model.py b/diabetes_regression/register/register_model.py index 3fc89495..51e6d7a7 100644 --- a/diabetes_regression/register/register_model.py +++ b/diabetes_regression/register/register_model.py @@ -43,7 +43,6 @@ def main(): experiment_name = os.environ.get("EXPERIMENT_NAME") resource_group = os.environ.get("RESOURCE_GROUP") subscription_id = os.environ.get("SUBSCRIPTION_ID") - build_id = os.environ.get('BUILD_BUILDID') # run_id useful to query previous runs run_id = "bd184a18-2ac8-4951-8e78-e290bef3b012" aml_workspace = Workspace.get( @@ -59,11 +58,6 @@ def main(): run_id = 'amlcompute' parser = argparse.ArgumentParser("register") - parser.add_argument( - "--build_id", - type=str, - help="The Build ID of the build triggering this pipeline run", - ) parser.add_argument( "--run_id", @@ -84,8 +78,6 @@ def main(): ) args = parser.parse_args() - if (args.build_id is not None): - build_id = args.build_id if (args.run_id is not None): run_id = args.run_id if (run_id == 'amlcompute'): @@ -98,32 +90,36 @@ def main(): model_file = os.path.join(model_path, model_name) model = joblib.load(model_file) model_mse = run.parent.get_metrics()["mse"] + parent_tags = run.parent.get_tags() + try: + build_id = parent_tags["BuildId"] + except KeyError: + build_id = None + try: + build_uri = parent_tags["BuildUri"] + except KeyError: + build_uri = None if (model is not None): if (build_id is None): register_aml_model(model_file, model_name, exp, run_id) + elif (build_uri is None): + register_aml_model( + model_file, + model_name, + model_mse, + exp, + run_id, + build_id) else: - run.tag("BuildId", value=build_id) - builduri_base = os.environ.get("BUILDURI_BASE") - if (builduri_base is not None): - build_uri = builduri_base + build_id - run.tag("BuildUri", value=build_uri) - register_aml_model( - model_file, - model_name, - model_mse, - exp, - run_id, - build_id, - build_uri) - else: - register_aml_model( - model_file, - model_name, - model_mse, - exp, - run_id, - build_id) + register_aml_model( + model_file, + model_name, + model_mse, + exp, + run_id, + build_id, + build_uri) else: print("Model not found. Skipping model registration.") sys.exit(0) diff --git a/diabetes_regression/training/train.py b/diabetes_regression/training/train.py index 6e40c2b7..c3f1203c 100644 --- a/diabetes_regression/training/train.py +++ b/diabetes_regression/training/train.py @@ -50,11 +50,7 @@ def main(): print("Running train.py") parser = argparse.ArgumentParser("train") - parser.add_argument( - "--build_id", - type=str, - help="The build ID of the build triggering this pipeline run", - ) + parser.add_argument( "--model_name", type=str, @@ -70,12 +66,10 @@ def main(): args = parser.parse_args() - print("Argument [build_id]: %s" % args.build_id) print("Argument [model_name]: %s" % args.model_name) print("Argument [step_output]: %s" % args.step_output) model_name = args.model_name - build_id = args.build_id step_output_path = args.step_output print("Getting training parameters") @@ -119,15 +113,7 @@ def main(): output_path = os.path.join('outputs', model_name) joblib.dump(value=reg, filename=output_path) - # Add properties to identify this specific training run - run.parent.tag("BuildId", value=build_id) - run.tag("BuildId", value=build_id) run.tag("run_type", value="train") - builduri_base = os.environ.get("BUILDURI_BASE") - if (builduri_base is not None): - build_uri = builduri_base + build_id - run.tag("BuildUri", value=build_uri) - run.parent.tag("BuildUri", value=build_uri) print(f"tags now present for run: {run.tags}") run.complete() diff --git a/ml_service/pipelines/diabetes_regression_build_train_pipeline.py b/ml_service/pipelines/diabetes_regression_build_train_pipeline.py index f382a476..56f06c27 100644 --- a/ml_service/pipelines/diabetes_regression_build_train_pipeline.py +++ b/ml_service/pipelines/diabetes_regression_build_train_pipeline.py @@ -34,10 +34,6 @@ def main(): # Create a reusable run configuration environment # Read definition from diabetes_regression/azureml_environment.json environment = Environment.load_from_directory(e.sources_directory_train) - if (e.collection_uri is not None and e.teamproject_name is not None): - builduri_base = e.collection_uri + e.teamproject_name - builduri_base = builduri_base + "/_build/results?buildId=" - environment.environment_variables["BUILDURI_BASE"] = builduri_base environment.register(aml_workspace) run_config = RunConfiguration() @@ -45,8 +41,6 @@ def main(): model_name_param = PipelineParameter( name="model_name", default_value=e.model_name) - build_id_param = PipelineParameter( - name="build_id", default_value=e.build_id) # Get dataset name dataset_name = e.dataset_name @@ -98,7 +92,6 @@ def main(): inputs=[dataset.as_named_input('training_data')], outputs=[pipeline_data], arguments=[ - "--build_id", build_id_param, "--model_name", model_name_param, "--step_output", pipeline_data ], @@ -113,7 +106,6 @@ def main(): compute_target=aml_compute, source_directory=e.sources_directory_train, arguments=[ - "--build_id", build_id_param, "--model_name", model_name_param, "--allow_run_cancel", e.allow_run_cancel, ], @@ -129,7 +121,6 @@ def main(): source_directory=e.sources_directory_train, inputs=[pipeline_data], arguments=[ - "--build_id", build_id_param, "--model_name", model_name_param, "--step_input", pipeline_data, ], diff --git a/ml_service/pipelines/diabetes_regression_build_train_pipeline_with_r.py b/ml_service/pipelines/diabetes_regression_build_train_pipeline_with_r.py index 96ddf2cf..3f15951d 100644 --- a/ml_service/pipelines/diabetes_regression_build_train_pipeline_with_r.py +++ b/ml_service/pipelines/diabetes_regression_build_train_pipeline_with_r.py @@ -31,10 +31,6 @@ def main(): # Make sure to include `r-essentials' # in diabetes_regression/conda_dependencies.yml environment = Environment.load_from_directory(e.sources_directory_train) - if (e.collection_uri is not None and e.teamproject_name is not None): - builduri_base = e.collection_uri + e.teamproject_name - builduri_base = builduri_base + "/_build/results?buildId=" - environment.environment_variables["BUILDURI_BASE"] = builduri_base environment.register(aml_workspace) run_config = RunConfiguration() diff --git a/ml_service/util/env_variables.py b/ml_service/util/env_variables.py index 1bd69529..2dc0e118 100644 --- a/ml_service/util/env_variables.py +++ b/ml_service/util/env_variables.py @@ -38,7 +38,6 @@ def __init__(self): self._image_name = os.environ.get('IMAGE_NAME') self._db_cluster_id = os.environ.get("DB_CLUSTER_ID") self._score_script = os.environ.get("SCORE_SCRIPT") - self._collection_uri = os.environ.get("SYSTEM_COLLECTIONURI") self._teamproject_name = os.environ.get("SYSTEM_TEAMPROJECT") self._dataset_name = os.environ.get("DATASET_NAME") self._run_evaluation = os.environ.get("RUN_EVALUATION", "true") @@ -137,10 +136,6 @@ def image_name(self): def score_script(self): return self._score_script - @property - def collection_uri(self): - return self._collection_uri - @property def teamproject_name(self): return self._teamproject_name From 8507e3b508462b8daaefac05b5ea6a1095ca2183 Mon Sep 17 00:00:00 2001 From: Jovana Taylor Date: Wed, 26 Feb 2020 16:01:07 -0800 Subject: [PATCH 2/4] fix parameter name --- .pipelines/diabetes_regression-ci-build-train.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.pipelines/diabetes_regression-ci-build-train.yml b/.pipelines/diabetes_regression-ci-build-train.yml index 987b3694..41336026 100644 --- a/.pipelines/diabetes_regression-ci-build-train.yml +++ b/.pipelines/diabetes_regression-ci-build-train.yml @@ -86,7 +86,7 @@ stages: azureSubscription: '$(WORKSPACE_SVC_CONNECTION)' PipelineId: '$(AMLPIPELINE_ID)' ExperimentName: '$(EXPERIMENT_NAME)' - PipelineParameters: '"ParameterAssignments": {"model_name": "$(MODEL_NAME)"}, "tags": {"BuildId": "$(Build.BuildId)", "BuildUri": "$(BUILDURI)"}, "StepTags": {"BuildId": "$(Build.BuildId)", "BuildUri": "$(BUILDURI)"}' + PipelineParameters: '"ParameterAssignments": {"model_name": "$(MODEL_NAME)"}, "tags": {"BuildId": "$(Build.BuildId)", "BuildUri": "$(BUILD_URI)"}, "StepTags": {"BuildId": "$(Build.BuildId)", "BuildUri": "$(BUILD_URI)"}' - job: "Training_Run_Report" dependsOn: "Run_ML_Pipeline" condition: always() From 479443eba7a0f2349971dbb39811cd636cb4934f Mon Sep 17 00:00:00 2001 From: Jovana Taylor Date: Thu, 27 Feb 2020 10:21:06 -0800 Subject: [PATCH 3/4] remove from local trigger --- diabetes_regression/register/register_model.py | 2 +- ml_service/pipelines/run_train_pipeline.py | 13 ++++++++----- 2 files changed, 9 insertions(+), 6 deletions(-) diff --git a/diabetes_regression/register/register_model.py b/diabetes_regression/register/register_model.py index 51e6d7a7..547eae87 100644 --- a/diabetes_regression/register/register_model.py +++ b/diabetes_regression/register/register_model.py @@ -102,7 +102,7 @@ def main(): if (model is not None): if (build_id is None): - register_aml_model(model_file, model_name, exp, run_id) + register_aml_model(model_file, model_name, model_mse, exp, run_id) elif (build_uri is None): register_aml_model( model_file, diff --git a/ml_service/pipelines/run_train_pipeline.py b/ml_service/pipelines/run_train_pipeline.py index f5dba4fd..59add8ba 100644 --- a/ml_service/pipelines/run_train_pipeline.py +++ b/ml_service/pipelines/run_train_pipeline.py @@ -1,5 +1,5 @@ from azureml.pipeline.core import PublishedPipeline -from azureml.core import Workspace +from azureml.core import Experiment, Workspace import argparse from ml_service.util.env_variables import Env @@ -55,10 +55,13 @@ def main(): if(args.skip_train_execution is False): pipeline_parameters = {"model_name": e.model_name} - run = published_pipeline.submit( - aml_workspace, - e.experiment_name, - pipeline_parameters) + experiment = Experiment( + workspace=aml_workspace, + name=e.experiment_name) + run = experiment.submit( + published_pipeline, + tags={"BuildId": e.build_id}, + pipeline_parameters=pipeline_parameters) print("Pipeline run initiated ", run.id) From c586fc962d9dfb90bd2c87fe313b99afa470dc64 Mon Sep 17 00:00:00 2001 From: Jovana Taylor Date: Mon, 2 Mar 2020 09:15:25 -0800 Subject: [PATCH 4/4] add build uri when running without agentless task, add message if buildid not found in tags --- .pipelines/diabetes_regression-ci.yml | 3 ++- diabetes_regression/register/register_model.py | 4 ++++ ml_service/pipelines/run_train_pipeline.py | 5 ++++- ml_service/util/env_variables.py | 6 +++--- 4 files changed, 13 insertions(+), 5 deletions(-) diff --git a/.pipelines/diabetes_regression-ci.yml b/.pipelines/diabetes_regression-ci.yml index 04f6d939..56258d50 100644 --- a/.pipelines/diabetes_regression-ci.yml +++ b/.pipelines/diabetes_regression-ci.yml @@ -50,6 +50,8 @@ stages: - stage: 'Trigger_AML_Pipeline' displayName: 'Train model' condition: and(succeeded(), not(variables['MODEL_BUILD_ID'])) + variables: + BUILD_URI: '$(SYSTEM.COLLECTIONURI)$(SYSTEM.TEAMPROJECT)/_build/results?buildId=$(BUILD.BUILDID)' jobs: - job: "Get_Pipeline_ID" condition: and(succeeded(), eq(coalesce(variables['auto-trigger-training'], 'true'), 'true')) @@ -78,7 +80,6 @@ stages: pool: server variables: AMLPIPELINE_ID: $[ dependencies.Get_Pipeline_ID.outputs['getpipelineid.AMLPIPELINEID'] ] - BUILD_URI: '$(SYSTEM.COLLECTIONURI)$(SYSTEM.TEAMPROJECT)/_build/results?buildId=$(BUILD.BUILDID)' steps: - task: ms-air-aiagility.vss-services-azureml.azureml-restApi-task.MLPublishedPipelineRestAPITask@0 displayName: 'Invoke ML pipeline' diff --git a/diabetes_regression/register/register_model.py b/diabetes_regression/register/register_model.py index 547eae87..8c63506c 100644 --- a/diabetes_regression/register/register_model.py +++ b/diabetes_regression/register/register_model.py @@ -95,10 +95,14 @@ def main(): build_id = parent_tags["BuildId"] except KeyError: build_id = None + print("BuildId tag not found on parent run.") + print("Tags present: {parent_tags}") try: build_uri = parent_tags["BuildUri"] except KeyError: build_uri = None + print("BuildUri tag not found on parent run.") + print("Tags present: {parent_tags}") if (model is not None): if (build_id is None): diff --git a/ml_service/pipelines/run_train_pipeline.py b/ml_service/pipelines/run_train_pipeline.py index 59add8ba..b68b9a15 100644 --- a/ml_service/pipelines/run_train_pipeline.py +++ b/ml_service/pipelines/run_train_pipeline.py @@ -55,12 +55,15 @@ def main(): if(args.skip_train_execution is False): pipeline_parameters = {"model_name": e.model_name} + tags = {"BuildId": e.build_id} + if (e.build_uri is not None): + tags["BuildUri"] = e.build_uri experiment = Experiment( workspace=aml_workspace, name=e.experiment_name) run = experiment.submit( published_pipeline, - tags={"BuildId": e.build_id}, + tags=tags, pipeline_parameters=pipeline_parameters) print("Pipeline run initiated ", run.id) diff --git a/ml_service/util/env_variables.py b/ml_service/util/env_variables.py index 4f9b647d..90bc906e 100644 --- a/ml_service/util/env_variables.py +++ b/ml_service/util/env_variables.py @@ -38,7 +38,7 @@ def __init__(self): self._image_name = os.environ.get('IMAGE_NAME') self._db_cluster_id = os.environ.get("DB_CLUSTER_ID") self._score_script = os.environ.get("SCORE_SCRIPT") - self._teamproject_name = os.environ.get("SYSTEM_TEAMPROJECT") + self._build_uri = os.environ.get("BUILD_URI") self._dataset_name = os.environ.get("DATASET_NAME") self._run_evaluation = os.environ.get("RUN_EVALUATION", "true") self._allow_run_cancel = os.environ.get( @@ -138,8 +138,8 @@ def score_script(self): return self._score_script @property - def teamproject_name(self): - return self._teamproject_name + def build_uri(self): + return self._build_uri @property def dataset_name(self):