From 35c8247230b1315bd66faaa808f78e0045605c29 Mon Sep 17 00:00:00 2001 From: Daniela Date: Thu, 17 Oct 2024 14:43:50 +0000 Subject: [PATCH 1/9] docs: add python translation for predicting outcomes --- .../linear_regression_tutorial_test.py | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/samples/snippets/linear_regression_tutorial_test.py b/samples/snippets/linear_regression_tutorial_test.py index 9a4908dbf5..bc6055e095 100644 --- a/samples/snippets/linear_regression_tutorial_test.py +++ b/samples/snippets/linear_regression_tutorial_test.py @@ -54,7 +54,25 @@ def test_linear_regression(random_model_id: str) -> None: # 0 227.012237 81838.159892 0.00507 173.080816 0.872377 0.872377 # 1 rows x columns # [END bigquery_dataframes_bqml_linear_evaluate] + # [START bigquery_dataframes_bqml_linear_predict] + # Select the model you'll use for predictions. `read_gbq_model` loads + # model data from BigQuery, but you could also use the `model` object + # object from previous steps. + model = bpd.read_gbq_model( + your_model_id, + # For example: "bqml_tutorial.penguins_model", + ) + + # Use 'contains' function to filter by island containing the string + # "Biscoe". + model = model.loc[model["island"].str.contains("Biscoe")] + + result = model.predict(model) + + # Expected output results: + # [END bigquery_dataframes_bqml_linear_predict] assert feature_columns is not None assert label_columns is not None assert model is not None assert score is not None + assert result is not None From e15cb36254f84b6845cb54abd8345df39350c75f Mon Sep 17 00:00:00 2001 From: Daniela Date: Thu, 17 Oct 2024 17:59:24 +0000 Subject: [PATCH 2/9] add '6' to previous snippet comment --- samples/snippets/linear_regression_tutorial_test.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/samples/snippets/linear_regression_tutorial_test.py b/samples/snippets/linear_regression_tutorial_test.py index bc6055e095..8c84497e53 100644 --- a/samples/snippets/linear_regression_tutorial_test.py +++ b/samples/snippets/linear_regression_tutorial_test.py @@ -52,7 +52,7 @@ def test_linear_regression(random_model_id: str) -> None: # Expected output results: # index mean_absolute_error mean_squared_error mean_squared_log_error median_absolute_error r2_score explained_variance # 0 227.012237 81838.159892 0.00507 173.080816 0.872377 0.872377 - # 1 rows x columns + # 1 rows x 6 columns # [END bigquery_dataframes_bqml_linear_evaluate] # [START bigquery_dataframes_bqml_linear_predict] # Select the model you'll use for predictions. `read_gbq_model` loads From afb03efa66cd9f4e5839dbee7ac040a30f59a0e0 Mon Sep 17 00:00:00 2001 From: Daniela Date: Tue, 22 Oct 2024 18:18:17 +0000 Subject: [PATCH 3/9] fix data to predict --- samples/snippets/linear_regression_tutorial_test.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/samples/snippets/linear_regression_tutorial_test.py b/samples/snippets/linear_regression_tutorial_test.py index 8c84497e53..1e9ef6ea60 100644 --- a/samples/snippets/linear_regression_tutorial_test.py +++ b/samples/snippets/linear_regression_tutorial_test.py @@ -65,9 +65,9 @@ def test_linear_regression(random_model_id: str) -> None: # Use 'contains' function to filter by island containing the string # "Biscoe". - model = model.loc[model["island"].str.contains("Biscoe")] + biscoe_data = model.loc[model["island"].str.contains("Biscoe")] - result = model.predict(model) + result = model.predict(biscoe_data) # Expected output results: # [END bigquery_dataframes_bqml_linear_predict] From 2d3b7ba288969b8dbc949681a3de9c9caf71c222 Mon Sep 17 00:00:00 2001 From: Daniela Date: Tue, 29 Oct 2024 20:43:14 +0000 Subject: [PATCH 4/9] add expected results --- samples/snippets/linear_regression_tutorial_test.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/samples/snippets/linear_regression_tutorial_test.py b/samples/snippets/linear_regression_tutorial_test.py index 1e9ef6ea60..04461aeded 100644 --- a/samples/snippets/linear_regression_tutorial_test.py +++ b/samples/snippets/linear_regression_tutorial_test.py @@ -70,6 +70,12 @@ def test_linear_regression(random_model_id: str) -> None: result = model.predict(biscoe_data) # Expected output results: + # predicted_body_mass_g species island culmen_length_mm culmen_depth_mm flipper_length_mm sex + # 23 4681.782896 Gentoo penguin (Pygoscelis papua) Biscoe + # 332 4740.7907 Gentoo penguin (Pygoscelis papua) Biscoe 46.2 14.4 214.0 + # 160 4731.310452 Gentoo penguin (Pygoscelis papua) Biscoe 44.5 14.3 216.0 + + # # [END bigquery_dataframes_bqml_linear_predict] assert feature_columns is not None assert label_columns is not None From 1592db243e412e97e9710af5739f2eac5c9dc4e6 Mon Sep 17 00:00:00 2001 From: Daniela Date: Tue, 29 Oct 2024 21:17:09 +0000 Subject: [PATCH 5/9] replace model with dataframe --- samples/snippets/linear_regression_tutorial_test.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/samples/snippets/linear_regression_tutorial_test.py b/samples/snippets/linear_regression_tutorial_test.py index 04461aeded..f67ff34921 100644 --- a/samples/snippets/linear_regression_tutorial_test.py +++ b/samples/snippets/linear_regression_tutorial_test.py @@ -65,12 +65,12 @@ def test_linear_regression(random_model_id: str) -> None: # Use 'contains' function to filter by island containing the string # "Biscoe". - biscoe_data = model.loc[model["island"].str.contains("Biscoe")] + biscoe_data = bq_df.loc[bq_df["island"].str.contains("Biscoe")] result = model.predict(biscoe_data) # Expected output results: - # predicted_body_mass_g species island culmen_length_mm culmen_depth_mm flipper_length_mm sex + # predicted_body_mass_g species island culmen_length_mm culmen_depth_mm flipper_length_mm sex # 23 4681.782896 Gentoo penguin (Pygoscelis papua) Biscoe # 332 4740.7907 Gentoo penguin (Pygoscelis papua) Biscoe 46.2 14.4 214.0 # 160 4731.310452 Gentoo penguin (Pygoscelis papua) Biscoe 44.5 14.3 216.0 From 1e2b4b2641052ae1101355e082da72a6ba02874b Mon Sep 17 00:00:00 2001 From: Daniela Date: Tue, 29 Oct 2024 22:43:53 +0000 Subject: [PATCH 6/9] update dataframe to drop nulls in body mass column --- .../snippets/linear_regression_tutorial_test.py | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/samples/snippets/linear_regression_tutorial_test.py b/samples/snippets/linear_regression_tutorial_test.py index f67ff34921..a9992e1668 100644 --- a/samples/snippets/linear_regression_tutorial_test.py +++ b/samples/snippets/linear_regression_tutorial_test.py @@ -63,6 +63,11 @@ def test_linear_regression(random_model_id: str) -> None: # For example: "bqml_tutorial.penguins_model", ) + bq_df = bpd.read_gbq("bigquery-public-data.ml_datasets.penguins") + + # Drop rows with nulls to get training data + bq_df = bq_df.dropna(subset=["body_mass_g"]) + # Use 'contains' function to filter by island containing the string # "Biscoe". biscoe_data = bq_df.loc[bq_df["island"].str.contains("Biscoe")] @@ -70,12 +75,10 @@ def test_linear_regression(random_model_id: str) -> None: result = model.predict(biscoe_data) # Expected output results: - # predicted_body_mass_g species island culmen_length_mm culmen_depth_mm flipper_length_mm sex - # 23 4681.782896 Gentoo penguin (Pygoscelis papua) Biscoe - # 332 4740.7907 Gentoo penguin (Pygoscelis papua) Biscoe 46.2 14.4 214.0 - # 160 4731.310452 Gentoo penguin (Pygoscelis papua) Biscoe 44.5 14.3 216.0 - - # + # predicted_body_mass_g species island culmen_length_mm culmen_depth_mm body_mass_g flipper_length_mm sex + # 332 4740.7907 Gentoo penguin (Pygoscelis papua) Biscoe 46.2 14.4 214.0 4650.0 + # 235 4752.246604 Gentoo penguin (Pygoscelis papua) Biscoe 47.3 13.8 216.0 4725.0 + # 160 4731.310452 Gentoo penguin (Pygoscelis papua) Biscoe 44.5 14.3 216.0 4100.0 # [END bigquery_dataframes_bqml_linear_predict] assert feature_columns is not None assert label_columns is not None From 54a6def00fa19095135115581771e37e2d82e963 Mon Sep 17 00:00:00 2001 From: Daniela Date: Tue, 29 Oct 2024 23:14:39 +0000 Subject: [PATCH 7/9] update df --- samples/snippets/linear_regression_tutorial_test.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/samples/snippets/linear_regression_tutorial_test.py b/samples/snippets/linear_regression_tutorial_test.py index a9992e1668..47b74164d3 100644 --- a/samples/snippets/linear_regression_tutorial_test.py +++ b/samples/snippets/linear_regression_tutorial_test.py @@ -63,9 +63,10 @@ def test_linear_regression(random_model_id: str) -> None: # For example: "bqml_tutorial.penguins_model", ) + # Load data from BigQuery bq_df = bpd.read_gbq("bigquery-public-data.ml_datasets.penguins") - # Drop rows with nulls to get training data + # Drop nulls from body mass columns bq_df = bq_df.dropna(subset=["body_mass_g"]) # Use 'contains' function to filter by island containing the string From 9ece67aa1d6a4e56f8aa25241a824ff9fe3c3848 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tim=20Swe=C3=B1a=20=28Swast=29?= Date: Wed, 30 Oct 2024 08:44:42 -0500 Subject: [PATCH 8/9] Update samples/snippets/linear_regression_tutorial_test.py --- samples/snippets/linear_regression_tutorial_test.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/samples/snippets/linear_regression_tutorial_test.py b/samples/snippets/linear_regression_tutorial_test.py index 47b74164d3..6dd4e2fce2 100644 --- a/samples/snippets/linear_regression_tutorial_test.py +++ b/samples/snippets/linear_regression_tutorial_test.py @@ -66,9 +66,6 @@ def test_linear_regression(random_model_id: str) -> None: # Load data from BigQuery bq_df = bpd.read_gbq("bigquery-public-data.ml_datasets.penguins") - # Drop nulls from body mass columns - bq_df = bq_df.dropna(subset=["body_mass_g"]) - # Use 'contains' function to filter by island containing the string # "Biscoe". biscoe_data = bq_df.loc[bq_df["island"].str.contains("Biscoe")] From 59b66096b4fb5440b05de22fdf82323b3470ca5c Mon Sep 17 00:00:00 2001 From: Daniela Date: Wed, 30 Oct 2024 15:34:18 +0000 Subject: [PATCH 9/9] complete snippet --- samples/snippets/linear_regression_tutorial_test.py | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/samples/snippets/linear_regression_tutorial_test.py b/samples/snippets/linear_regression_tutorial_test.py index 47b74164d3..452d88746d 100644 --- a/samples/snippets/linear_regression_tutorial_test.py +++ b/samples/snippets/linear_regression_tutorial_test.py @@ -66,9 +66,6 @@ def test_linear_regression(random_model_id: str) -> None: # Load data from BigQuery bq_df = bpd.read_gbq("bigquery-public-data.ml_datasets.penguins") - # Drop nulls from body mass columns - bq_df = bq_df.dropna(subset=["body_mass_g"]) - # Use 'contains' function to filter by island containing the string # "Biscoe". biscoe_data = bq_df.loc[bq_df["island"].str.contains("Biscoe")] @@ -77,9 +74,9 @@ def test_linear_regression(random_model_id: str) -> None: # Expected output results: # predicted_body_mass_g species island culmen_length_mm culmen_depth_mm body_mass_g flipper_length_mm sex - # 332 4740.7907 Gentoo penguin (Pygoscelis papua) Biscoe 46.2 14.4 214.0 4650.0 - # 235 4752.246604 Gentoo penguin (Pygoscelis papua) Biscoe 47.3 13.8 216.0 4725.0 - # 160 4731.310452 Gentoo penguin (Pygoscelis papua) Biscoe 44.5 14.3 216.0 4100.0 + # 23 4681.782896 Gentoo penguin (Pygoscelis papua) Biscoe + # 332 4740.7907 Gentoo penguin (Pygoscelis papua) Biscoe 46.2 14.4 214.0 4650.0 + # 160 4731.310452 Gentoo penguin (Pygoscelis papua) Biscoe 44.5 14.3 216.0 4100.0 # [END bigquery_dataframes_bqml_linear_predict] assert feature_columns is not None assert label_columns is not None