diff --git a/.pipelines/diabetes_regression-ci.yml b/.pipelines/diabetes_regression-ci.yml index b3504dfe..56258d50 100644 --- a/.pipelines/diabetes_regression-ci.yml +++ b/.pipelines/diabetes_regression-ci.yml @@ -50,6 +50,8 @@ stages: - stage: 'Trigger_AML_Pipeline' displayName: 'Train model' condition: and(succeeded(), not(variables['MODEL_BUILD_ID'])) + variables: + BUILD_URI: '$(SYSTEM.COLLECTIONURI)$(SYSTEM.TEAMPROJECT)/_build/results?buildId=$(BUILD.BUILDID)' jobs: - job: "Get_Pipeline_ID" condition: and(succeeded(), eq(coalesce(variables['auto-trigger-training'], 'true'), 'true')) @@ -85,7 +87,7 @@ stages: azureSubscription: '$(WORKSPACE_SVC_CONNECTION)' PipelineId: '$(AMLPIPELINE_ID)' ExperimentName: '$(EXPERIMENT_NAME)' - PipelineParameters: '"ParameterAssignments": {"model_name": "$(MODEL_NAME)"}' + PipelineParameters: '"ParameterAssignments": {"model_name": "$(MODEL_NAME)"}, "tags": {"BuildId": "$(Build.BuildId)", "BuildUri": "$(BUILD_URI)"}, "StepTags": {"BuildId": "$(Build.BuildId)", "BuildUri": "$(BUILD_URI)"}' - job: "Training_Run_Report" dependsOn: "Run_ML_Pipeline" condition: always() diff --git a/diabetes_regression/evaluate/evaluate_model.py b/diabetes_regression/evaluate/evaluate_model.py index 57685b3d..f4d4c6db 100644 --- a/diabetes_regression/evaluate/evaluate_model.py +++ b/diabetes_regression/evaluate/evaluate_model.py @@ -23,7 +23,6 @@ ARISING IN ANY WAY OUT OF THE USE OF THE SOFTWARE CODE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. """ -import os from azureml.core import Run import argparse import traceback @@ -74,11 +73,7 @@ run_id = 'amlcompute' parser = argparse.ArgumentParser("evaluate") -parser.add_argument( - "--build_id", - type=str, - help="The Build ID of the build triggering this pipeline run", -) + parser.add_argument( "--run_id", type=str, @@ -99,19 +94,13 @@ ) args = parser.parse_args() -if (args.build_id is not None): - build_id = args.build_id if (args.run_id is not None): run_id = args.run_id if (run_id == 'amlcompute'): run_id = run.parent.id model_name = args.model_name metric_eval = "mse" -run.tag("BuildId", value=build_id) -builduri_base = os.environ.get("BUILDURI_BASE") -if (builduri_base is not None): - build_uri = builduri_base + build_id - run.tag("BuildUri", value=build_uri) + allow_run_cancel = args.allow_run_cancel # Parameterize the matrices on which the models should be compared # Add golden data set on which all the model performance can be evaluated diff --git a/diabetes_regression/register/register_model.py b/diabetes_regression/register/register_model.py index 3fc89495..8c63506c 100644 --- a/diabetes_regression/register/register_model.py +++ b/diabetes_regression/register/register_model.py @@ -43,7 +43,6 @@ def main(): experiment_name = os.environ.get("EXPERIMENT_NAME") resource_group = os.environ.get("RESOURCE_GROUP") subscription_id = os.environ.get("SUBSCRIPTION_ID") - build_id = os.environ.get('BUILD_BUILDID') # run_id useful to query previous runs run_id = "bd184a18-2ac8-4951-8e78-e290bef3b012" aml_workspace = Workspace.get( @@ -59,11 +58,6 @@ def main(): run_id = 'amlcompute' parser = argparse.ArgumentParser("register") - parser.add_argument( - "--build_id", - type=str, - help="The Build ID of the build triggering this pipeline run", - ) parser.add_argument( "--run_id", @@ -84,8 +78,6 @@ def main(): ) args = parser.parse_args() - if (args.build_id is not None): - build_id = args.build_id if (args.run_id is not None): run_id = args.run_id if (run_id == 'amlcompute'): @@ -98,32 +90,40 @@ def main(): model_file = os.path.join(model_path, model_name) model = joblib.load(model_file) model_mse = run.parent.get_metrics()["mse"] + parent_tags = run.parent.get_tags() + try: + build_id = parent_tags["BuildId"] + except KeyError: + build_id = None + print("BuildId tag not found on parent run.") + print("Tags present: {parent_tags}") + try: + build_uri = parent_tags["BuildUri"] + except KeyError: + build_uri = None + print("BuildUri tag not found on parent run.") + print("Tags present: {parent_tags}") if (model is not None): if (build_id is None): - register_aml_model(model_file, model_name, exp, run_id) + register_aml_model(model_file, model_name, model_mse, exp, run_id) + elif (build_uri is None): + register_aml_model( + model_file, + model_name, + model_mse, + exp, + run_id, + build_id) else: - run.tag("BuildId", value=build_id) - builduri_base = os.environ.get("BUILDURI_BASE") - if (builduri_base is not None): - build_uri = builduri_base + build_id - run.tag("BuildUri", value=build_uri) - register_aml_model( - model_file, - model_name, - model_mse, - exp, - run_id, - build_id, - build_uri) - else: - register_aml_model( - model_file, - model_name, - model_mse, - exp, - run_id, - build_id) + register_aml_model( + model_file, + model_name, + model_mse, + exp, + run_id, + build_id, + build_uri) else: print("Model not found. Skipping model registration.") sys.exit(0) diff --git a/diabetes_regression/training/train.py b/diabetes_regression/training/train.py index 6e40c2b7..c3f1203c 100644 --- a/diabetes_regression/training/train.py +++ b/diabetes_regression/training/train.py @@ -50,11 +50,7 @@ def main(): print("Running train.py") parser = argparse.ArgumentParser("train") - parser.add_argument( - "--build_id", - type=str, - help="The build ID of the build triggering this pipeline run", - ) + parser.add_argument( "--model_name", type=str, @@ -70,12 +66,10 @@ def main(): args = parser.parse_args() - print("Argument [build_id]: %s" % args.build_id) print("Argument [model_name]: %s" % args.model_name) print("Argument [step_output]: %s" % args.step_output) model_name = args.model_name - build_id = args.build_id step_output_path = args.step_output print("Getting training parameters") @@ -119,15 +113,7 @@ def main(): output_path = os.path.join('outputs', model_name) joblib.dump(value=reg, filename=output_path) - # Add properties to identify this specific training run - run.parent.tag("BuildId", value=build_id) - run.tag("BuildId", value=build_id) run.tag("run_type", value="train") - builduri_base = os.environ.get("BUILDURI_BASE") - if (builduri_base is not None): - build_uri = builduri_base + build_id - run.tag("BuildUri", value=build_uri) - run.parent.tag("BuildUri", value=build_uri) print(f"tags now present for run: {run.tags}") run.complete() diff --git a/ml_service/pipelines/diabetes_regression_build_train_pipeline.py b/ml_service/pipelines/diabetes_regression_build_train_pipeline.py index c78465dd..7192d308 100644 --- a/ml_service/pipelines/diabetes_regression_build_train_pipeline.py +++ b/ml_service/pipelines/diabetes_regression_build_train_pipeline.py @@ -38,15 +38,9 @@ def main(): run_config = RunConfiguration() run_config.environment = environment - if (e.collection_uri is not None and e.teamproject_name is not None): - builduri_base = e.collection_uri + e.teamproject_name - builduri_base = builduri_base + "/_build/results?buildId=" - run_config.environment.environment_variables["BUILDURI_BASE"] = builduri_base # NOQA: E501 model_name_param = PipelineParameter( name="model_name", default_value=e.model_name) - build_id_param = PipelineParameter( - name="build_id", default_value=e.build_id) # Get dataset name dataset_name = e.dataset_name @@ -98,7 +92,6 @@ def main(): inputs=[dataset.as_named_input('training_data')], outputs=[pipeline_data], arguments=[ - "--build_id", build_id_param, "--model_name", model_name_param, "--step_output", pipeline_data ], @@ -113,7 +106,6 @@ def main(): compute_target=aml_compute, source_directory=e.sources_directory_train, arguments=[ - "--build_id", build_id_param, "--model_name", model_name_param, "--allow_run_cancel", e.allow_run_cancel, ], @@ -129,7 +121,6 @@ def main(): source_directory=e.sources_directory_train, inputs=[pipeline_data], arguments=[ - "--build_id", build_id_param, "--model_name", model_name_param, "--step_input", pipeline_data, ], diff --git a/ml_service/pipelines/diabetes_regression_build_train_pipeline_with_r.py b/ml_service/pipelines/diabetes_regression_build_train_pipeline_with_r.py index a41b0c2e..ef42d63e 100644 --- a/ml_service/pipelines/diabetes_regression_build_train_pipeline_with_r.py +++ b/ml_service/pipelines/diabetes_regression_build_train_pipeline_with_r.py @@ -34,10 +34,6 @@ def main(): aml_workspace, e.aml_env_name, create_new=False) # NOQA: E501 run_config = RunConfiguration() run_config.environment = environment - if (e.collection_uri is not None and e.teamproject_name is not None): - builduri_base = e.collection_uri + e.teamproject_name - builduri_base = builduri_base + "/_build/results?buildId=" - run_config.environment.environment_variables["BUILDURI_BASE"] = builduri_base # NOQA: E501 train_step = PythonScriptStep( name="Train Model", diff --git a/ml_service/pipelines/run_train_pipeline.py b/ml_service/pipelines/run_train_pipeline.py index f5dba4fd..b68b9a15 100644 --- a/ml_service/pipelines/run_train_pipeline.py +++ b/ml_service/pipelines/run_train_pipeline.py @@ -1,5 +1,5 @@ from azureml.pipeline.core import PublishedPipeline -from azureml.core import Workspace +from azureml.core import Experiment, Workspace import argparse from ml_service.util.env_variables import Env @@ -55,10 +55,16 @@ def main(): if(args.skip_train_execution is False): pipeline_parameters = {"model_name": e.model_name} - run = published_pipeline.submit( - aml_workspace, - e.experiment_name, - pipeline_parameters) + tags = {"BuildId": e.build_id} + if (e.build_uri is not None): + tags["BuildUri"] = e.build_uri + experiment = Experiment( + workspace=aml_workspace, + name=e.experiment_name) + run = experiment.submit( + published_pipeline, + tags=tags, + pipeline_parameters=pipeline_parameters) print("Pipeline run initiated ", run.id) diff --git a/ml_service/util/env_variables.py b/ml_service/util/env_variables.py index 7729b82c..90bc906e 100644 --- a/ml_service/util/env_variables.py +++ b/ml_service/util/env_variables.py @@ -38,8 +38,7 @@ def __init__(self): self._image_name = os.environ.get('IMAGE_NAME') self._db_cluster_id = os.environ.get("DB_CLUSTER_ID") self._score_script = os.environ.get("SCORE_SCRIPT") - self._collection_uri = os.environ.get("SYSTEM_COLLECTIONURI") - self._teamproject_name = os.environ.get("SYSTEM_TEAMPROJECT") + self._build_uri = os.environ.get("BUILD_URI") self._dataset_name = os.environ.get("DATASET_NAME") self._run_evaluation = os.environ.get("RUN_EVALUATION", "true") self._allow_run_cancel = os.environ.get( @@ -139,12 +138,8 @@ def score_script(self): return self._score_script @property - def collection_uri(self): - return self._collection_uri - - @property - def teamproject_name(self): - return self._teamproject_name + def build_uri(self): + return self._build_uri @property def dataset_name(self):