Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Appearance settings

test: stop checking text generation contents #935

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Aug 28, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
20 changes: 5 additions & 15 deletions 20 tests/system/small/ml/test_core.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,6 @@

from datetime import datetime
import typing
from unittest import TestCase

import pandas as pd
import pyarrow as pa
Expand All @@ -24,7 +23,7 @@
import bigframes
import bigframes.features
from bigframes.ml import core
import tests.system.utils
from tests.system import utils


def test_model_eval(
Expand Down Expand Up @@ -212,7 +211,7 @@ def test_pca_model_principal_components(penguins_bqml_pca_model: core.BqmlModel)
.reset_index(drop=True)
)

tests.system.utils.assert_pandas_df_equal_pca_components(
utils.assert_pandas_df_equal_pca_components(
result,
expected,
check_exact=False,
Expand All @@ -234,7 +233,7 @@ def test_pca_model_principal_component_info(penguins_bqml_pca_model: core.BqmlMo
"cumulative_explained_variance_ratio": [0.469357, 0.651283, 0.812383],
},
)
tests.system.utils.assert_pandas_df_equal(
utils.assert_pandas_df_equal(
result,
expected,
check_exact=False,
Expand Down Expand Up @@ -349,18 +348,9 @@ def test_model_generate_text(
llm_text_df, options=options
).to_pandas()

TestCase().assertSequenceEqual(df.shape, (3, 4))
TestCase().assertSequenceEqual(
[
"ml_generate_text_llm_result",
"ml_generate_text_rai_result",
"ml_generate_text_status",
"prompt",
],
df.columns.to_list(),
utils.check_pandas_df_schema_and_index(
df, columns=utils.ML_GENERATE_TEXT_OUTPUT, index=3, col_exact=False
)
series = df["ml_generate_text_llm_result"]
assert all(series.str.len() > 20)


def test_model_forecast(time_series_bqml_arima_plus_model: core.BqmlModel):
Expand Down
77 changes: 33 additions & 44 deletions 77 tests/system/small/ml/test_llm.py
Original file line number Diff line number Diff line change
Expand Up @@ -74,10 +74,9 @@ def test_create_text_generator_model_default_session(
llm_text_df = bpd.read_pandas(llm_text_pandas_df)

df = model.predict(llm_text_df).to_pandas()
assert df.shape == (3, 4)
assert "ml_generate_text_llm_result" in df.columns
series = df["ml_generate_text_llm_result"]
assert all(series.str.len() > 20)
utils.check_pandas_df_schema_and_index(
df, columns=utils.ML_GENERATE_TEXT_OUTPUT, index=3, col_exact=False
)


@pytest.mark.flaky(retries=2)
Expand All @@ -104,10 +103,9 @@ def test_create_text_generator_32k_model_default_session(
llm_text_df = bpd.read_pandas(llm_text_pandas_df)

df = model.predict(llm_text_df).to_pandas()
assert df.shape == (3, 4)
assert "ml_generate_text_llm_result" in df.columns
series = df["ml_generate_text_llm_result"]
assert all(series.str.len() > 20)
utils.check_pandas_df_schema_and_index(
df, columns=utils.ML_GENERATE_TEXT_OUTPUT, index=3, col_exact=False
)


@pytest.mark.flaky(retries=2)
Expand All @@ -131,10 +129,9 @@ def test_create_text_generator_model_default_connection(
)

df = model.predict(llm_text_df).to_pandas()
assert df.shape == (3, 4)
assert "ml_generate_text_llm_result" in df.columns
series = df["ml_generate_text_llm_result"]
assert all(series.str.len() > 20)
utils.check_pandas_df_schema_and_index(
df, columns=utils.ML_GENERATE_TEXT_OUTPUT, index=3, col_exact=False
)


# Marked as flaky only because BQML LLM is in preview, the service only has limited capacity, not stable enough.
Expand All @@ -143,21 +140,19 @@ def test_text_generator_predict_default_params_success(
palm2_text_generator_model, llm_text_df
):
df = palm2_text_generator_model.predict(llm_text_df).to_pandas()
assert df.shape == (3, 4)
assert "ml_generate_text_llm_result" in df.columns
series = df["ml_generate_text_llm_result"]
assert all(series.str.len() > 20)
utils.check_pandas_df_schema_and_index(
df, columns=utils.ML_GENERATE_TEXT_OUTPUT, index=3, col_exact=False
)


@pytest.mark.flaky(retries=2)
def test_text_generator_predict_series_default_params_success(
palm2_text_generator_model, llm_text_df
):
df = palm2_text_generator_model.predict(llm_text_df["prompt"]).to_pandas()
assert df.shape == (3, 4)
assert "ml_generate_text_llm_result" in df.columns
series = df["ml_generate_text_llm_result"]
assert all(series.str.len() > 20)
utils.check_pandas_df_schema_and_index(
df, columns=utils.ML_GENERATE_TEXT_OUTPUT, index=3, col_exact=False
)


@pytest.mark.flaky(retries=2)
Expand All @@ -166,10 +161,9 @@ def test_text_generator_predict_arbitrary_col_label_success(
):
llm_text_df = llm_text_df.rename(columns={"prompt": "arbitrary"})
df = palm2_text_generator_model.predict(llm_text_df).to_pandas()
assert df.shape == (3, 4)
assert "ml_generate_text_llm_result" in df.columns
series = df["ml_generate_text_llm_result"]
assert all(series.str.len() > 20)
utils.check_pandas_df_schema_and_index(
df, columns=utils.ML_GENERATE_TEXT_OUTPUT, index=3, col_exact=False
)


@pytest.mark.flaky(retries=2)
Expand All @@ -179,10 +173,9 @@ def test_text_generator_predict_with_params_success(
df = palm2_text_generator_model.predict(
llm_text_df, temperature=0.5, max_output_tokens=100, top_k=20, top_p=0.5
).to_pandas()
assert df.shape == (3, 4)
assert "ml_generate_text_llm_result" in df.columns
series = df["ml_generate_text_llm_result"]
assert all(series.str.len() > 20)
utils.check_pandas_df_schema_and_index(
df, columns=utils.ML_GENERATE_TEXT_OUTPUT, index=3, col_exact=False
)


def test_create_embedding_generator_model(
Expand Down Expand Up @@ -379,10 +372,9 @@ def test_gemini_text_generator_predict_default_params_success(
model_name=model_name, connection_name=bq_connection, session=session
)
df = gemini_text_generator_model.predict(llm_text_df).to_pandas()
assert df.shape == (3, 4)
assert "ml_generate_text_llm_result" in df.columns
series = df["ml_generate_text_llm_result"]
assert all(series.str.len() > 20)
utils.check_pandas_df_schema_and_index(
df, columns=utils.ML_GENERATE_TEXT_OUTPUT, index=3, col_exact=False
)


@pytest.mark.parametrize(
Expand All @@ -399,10 +391,9 @@ def test_gemini_text_generator_predict_with_params_success(
df = gemini_text_generator_model.predict(
llm_text_df, temperature=0.5, max_output_tokens=100, top_k=20, top_p=0.5
).to_pandas()
assert df.shape == (3, 4)
assert "ml_generate_text_llm_result" in df.columns
series = df["ml_generate_text_llm_result"]
assert all(series.str.len() > 20)
utils.check_pandas_df_schema_and_index(
df, columns=utils.ML_GENERATE_TEXT_OUTPUT, index=3, col_exact=False
)


@pytest.mark.parametrize(
Expand Down Expand Up @@ -444,10 +435,9 @@ def test_claude3_text_generator_predict_default_params_success(
model_name=model_name, connection_name=bq_connection, session=session
)
df = claude3_text_generator_model.predict(llm_text_df).to_pandas()
assert df.shape == (3, 3)
assert "ml_generate_text_llm_result" in df.columns
series = df["ml_generate_text_llm_result"]
assert all(series.str.len() > 20)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is this precisely the part which was failing due to instability?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It is the flakiness of the .predict. In fact this time it isn't instability but a quota issue. b/362768658.

It is not the issue of create model.

utils.check_pandas_df_schema_and_index(
df, columns=utils.ML_GENERATE_TEXT_OUTPUT, index=3, col_exact=False
)


@pytest.mark.parametrize(
Expand All @@ -466,10 +456,9 @@ def test_claude3_text_generator_predict_with_params_success(
df = claude3_text_generator_model.predict(
llm_text_df, max_output_tokens=100, top_k=20, top_p=0.5
).to_pandas()
assert df.shape == (3, 3)
assert "ml_generate_text_llm_result" in df.columns
series = df["ml_generate_text_llm_result"]
assert all(series.str.len() > 20)
utils.check_pandas_df_schema_and_index(
df, columns=utils.ML_GENERATE_TEXT_OUTPUT, index=3, col_exact=False
)


@pytest.mark.flaky(retries=2)
Expand Down
5 changes: 5 additions & 0 deletions 5 tests/system/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,11 @@
"log_loss",
"roc_auc",
]
ML_GENERATE_TEXT_OUTPUT = [
"ml_generate_text_llm_result",
"ml_generate_text_status",
"prompt",
]


def skip_legacy_pandas(test):
Expand Down
Morty Proxy This is a proxified and sanitized view of the page, visit original site.