Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Appearance settings

Commit 641faec

Browse filesBrowse files
jsondaicopybara-github
authored andcommitted
fix: fix numerical NaN experiment run logging error in EvalTask.
PiperOrigin-RevId: 641981976
1 parent 4e2d87f commit 641faec
Copy full SHA for 641faec

2 files changed

+65-7Lines changed: 65 additions & 7 deletions

File tree

Expand file treeCollapse file tree
Open diff view settings
Filter options
Expand file treeCollapse file tree
Open diff view settings
Collapse file

‎tests/unit/vertexai/test_evaluation.py‎

Copy file name to clipboardExpand all lines: tests/unit/vertexai/test_evaluation.py
+57-4Lines changed: 57 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -28,11 +28,13 @@
2828
)
2929
from vertexai import generative_models
3030
from vertexai.preview import evaluation
31+
from vertexai.preview.evaluation import _base as eval_base
32+
from vertexai.preview.evaluation import _evaluation
3133
from vertexai.preview.evaluation import utils
34+
import numpy as np
3235
import pandas as pd
3336
import pytest
3437

35-
3638
_TEST_PROJECT = "test-project"
3739
_TEST_LOCATION = "us-central1"
3840
_TEST_METRICS = (
@@ -78,6 +80,7 @@
7880
text,text,text\n
7981
"""
8082

83+
_TEST_EXPERIMENT = "test-experiment"
8184

8285
_MOCK_EXACT_MATCH_RESULT = (
8386
gapic_evaluation_service_types.EvaluateInstancesResponse(
@@ -135,6 +138,19 @@
135138
]
136139
}
137140
)
141+
MOCK_EVAL_RESULT = eval_base.EvalResult(
142+
summary_metrics={
143+
"row_count": 1,
144+
"mock_metric/mean": 1.0,
145+
"mock_metric/std": np.nan,
146+
},
147+
metrics_table=pd.DataFrame(
148+
{
149+
"response": ["test"],
150+
"mock_metric": [1.0],
151+
}
152+
),
153+
)
138154

139155

140156
@pytest.fixture
@@ -163,23 +179,22 @@ def teardown_method(self):
163179
initializer.global_pool.shutdown(wait=True)
164180

165181
def test_create_eval_task(self):
166-
test_experiment = "test_experiment_name"
167182
test_content_column_name = "test_content_column_name"
168183
test_reference_column_name = "test_reference_column_name"
169184
test_response_column_name = "test_response_column_name"
170185

171186
test_eval_task = evaluation.EvalTask(
172187
dataset=_TEST_EVAL_DATASET,
173188
metrics=_TEST_METRICS,
174-
experiment=test_experiment,
189+
experiment=_TEST_EXPERIMENT,
175190
content_column_name=test_content_column_name,
176191
reference_column_name=test_reference_column_name,
177192
response_column_name=test_response_column_name,
178193
)
179194

180195
assert test_eval_task.dataset.equals(_TEST_EVAL_DATASET)
181196
assert test_eval_task.metrics == _TEST_METRICS
182-
assert test_eval_task.experiment == test_experiment
197+
assert test_eval_task.experiment == _TEST_EXPERIMENT
183198
assert test_eval_task.content_column_name == test_content_column_name
184199
assert test_eval_task.reference_column_name == test_reference_column_name
185200
assert test_eval_task.response_column_name == test_response_column_name
@@ -470,6 +485,44 @@ def test_compute_pairwise_metrics_without_inference(self, api_transport):
470485
== 0.5
471486
)
472487

488+
def test_eval_result_experiment_run_logging(self):
489+
test_eval_task = evaluation.EvalTask(
490+
dataset=_TEST_EVAL_DATASET,
491+
metrics=_TEST_METRICS,
492+
experiment=_TEST_EXPERIMENT,
493+
)
494+
495+
with mock.patch.multiple(
496+
metadata._experiment_tracker,
497+
_experiment=mock.MagicMock(name=_TEST_EXPERIMENT),
498+
_experiment_run=None,
499+
set_experiment=mock.DEFAULT,
500+
reset=mock.DEFAULT,
501+
):
502+
with mock.patch.multiple(
503+
vertexai.preview,
504+
start_run=mock.MagicMock(),
505+
log_params=mock.DEFAULT,
506+
log_metrics=mock.DEFAULT,
507+
) as mock_metadata:
508+
with mock.patch.object(
509+
target=_evaluation,
510+
attribute="evaluate",
511+
side_effect=[MOCK_EVAL_RESULT],
512+
):
513+
test_result = test_eval_task.evaluate()
514+
515+
assert test_result.summary_metrics["row_count"] == 1
516+
assert test_result.summary_metrics["mock_metric/mean"] == 1.0
517+
assert test_result.summary_metrics["mock_metric/std"] == "NaN"
518+
mock_metadata["log_metrics"].assert_called_once_with(
519+
{
520+
"row_count": 1,
521+
"mock_metric/mean": 1.0,
522+
"mock_metric/std": "NaN",
523+
}
524+
)
525+
473526

474527
@pytest.mark.usefixtures("google_auth_mock")
475528
class TestEvaluationErrors:
Collapse file

‎vertexai/preview/evaluation/_eval_tasks.py‎

Copy file name to clipboardExpand all lines: vertexai/preview/evaluation/_eval_tasks.py
+8-3Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,7 @@
2828
from vertexai.preview.evaluation.metrics import (
2929
_base as metrics_base,
3030
)
31+
import numpy as np
3132

3233
if TYPE_CHECKING:
3334
import pandas as pd
@@ -284,9 +285,14 @@ def _evaluate_with_experiment(
284285
reference_column_name=self.reference_column_name,
285286
response_column_name=response_column_name,
286287
)
288+
289+
eval_result.summary_metrics = {
290+
k: ("NaN" if isinstance(v, float) and np.isnan(v) else v)
291+
for k, v in eval_result.summary_metrics.items()
292+
}
287293
try:
288294
vertexai.preview.log_metrics(eval_result.summary_metrics)
289-
except (ValueError, TypeError, exceptions.InvalidArgument) as e:
295+
except (TypeError, exceptions.InvalidArgument) as e:
290296
_LOGGER.warning(f"Experiment metrics logging failed: {str(e)}")
291297
return eval_result
292298

@@ -366,8 +372,7 @@ def _validate_experiment_run(self) -> None:
366372
if metadata._experiment_tracker.experiment_run:
367373
raise ValueError(
368374
"Experiment run already exists. Please specify the name of the"
369-
" experiment run to assign current session with in this evaluate"
370-
" method."
375+
" experiment run to assign current session within this evaluation."
371376
)
372377

373378
def _log_eval_experiment_param(

0 commit comments

Comments
0 (0)
Morty Proxy This is a proxified and sanitized view of the page, visit original site.