From 20bda90eabd2bd96b4e8b03394b0c9307df26081 Mon Sep 17 00:00:00 2001 From: Ashley Xu Date: Wed, 8 May 2024 17:12:42 +0000 Subject: [PATCH 1/3] docs: add code snippets for llm text generatiion --- samples/snippets/text_generation_test.py | 65 ++++++++++++++++++++++++ 1 file changed, 65 insertions(+) create mode 100644 samples/snippets/text_generation_test.py diff --git a/samples/snippets/text_generation_test.py b/samples/snippets/text_generation_test.py new file mode 100644 index 0000000000..feb7862f48 --- /dev/null +++ b/samples/snippets/text_generation_test.py @@ -0,0 +1,65 @@ +# Copyright 2024 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +def test_llm_text_generation() -> None: + # Determine project id, in this case prefer the one set in the environment + # variable GOOGLE_CLOUD_PROJECT (if any) + import os + + PROJECT_ID = os.getenv("GOOGLE_CLOUD_PROJECT", "bigframes-dev") + LOCATION = "us" + + # [START bigquery_dataframes_create_remote_model] + import bigframes + from bigframes.ml.llm import PaLM2TextGenerator + + bigframes.options.bigquery.project = PROJECT_ID + bigframes.options.bigquery.location = LOCATION + + model = PaLM2TextGenerator() + # [END bigquery_dataframes_create_remote_model] + assert model is not None + + # [START bigquery_dataframes_perform_keyword_extraction] + import bigframes.pandas as bpd + + df = bpd.read_gbq("bigquery-public-data.imdb.reviews").head(n=5) + df_prompt_prefix = "Extract the key words from the text below: " + df_prompt = df_prompt_prefix + df["review"] + + # Predict using the model + df_pred = model.predict( + df_prompt.to_frame(), temperature=0.2, max_output_tokens=100 + ) + df_pred.peek(5) + # [END bigquery_dataframes_perform_keyword_extraction] + assert df_pred["ml_generate_text_llm_result"] is not None + assert df_pred["ml_generate_text_llm_result"].iloc[0] is not None + + # [START bigquery_dataframes_perform_sentiment_analysis] + import bigframes.pandas as bpd + + df = bpd.read_gbq("bigquery-public-data.imdb.reviews").head(n=5) + df_prompt_prefix = "perform sentiment analysis on the following text, return one the following categories: positive, negative: " + df_prompt = df_prompt_prefix + df["review"] + + # Predict using the model + df_pred = model.predict( + df_prompt.to_frame(), temperature=0.2, max_output_tokens=100 + ) + df_pred.peek(5) + # [END bigquery_dataframes_perform_sentiment_analysis] + assert df_pred["ml_generate_text_llm_result"] is not None + assert df_pred["ml_generate_text_llm_result"].iloc[0] is not None From bab075be980a42798d060e4fa5f9568dc4de460d Mon Sep 17 00:00:00 2001 From: Ashley Xu Date: Wed, 8 May 2024 18:35:25 +0000 Subject: [PATCH 2/3] address comments --- samples/snippets/text_generation_test.py | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/samples/snippets/text_generation_test.py b/samples/snippets/text_generation_test.py index feb7862f48..9c47233583 100644 --- a/samples/snippets/text_generation_test.py +++ b/samples/snippets/text_generation_test.py @@ -40,9 +40,7 @@ def test_llm_text_generation() -> None: df_prompt = df_prompt_prefix + df["review"] # Predict using the model - df_pred = model.predict( - df_prompt.to_frame(), temperature=0.2, max_output_tokens=100 - ) + df_pred = model.predict(df_prompt, temperature=0.2, max_output_tokens=100) df_pred.peek(5) # [END bigquery_dataframes_perform_keyword_extraction] assert df_pred["ml_generate_text_llm_result"] is not None @@ -56,9 +54,7 @@ def test_llm_text_generation() -> None: df_prompt = df_prompt_prefix + df["review"] # Predict using the model - df_pred = model.predict( - df_prompt.to_frame(), temperature=0.2, max_output_tokens=100 - ) + df_pred = model.predict(df_prompt, temperature=0.2, max_output_tokens=100) df_pred.peek(5) # [END bigquery_dataframes_perform_sentiment_analysis] assert df_pred["ml_generate_text_llm_result"] is not None From e846f2d2e4fb7248fc9e47dd251e4d5cb441acf6 Mon Sep 17 00:00:00 2001 From: Ashley Xu Date: Thu, 9 May 2024 17:25:28 +0000 Subject: [PATCH 3/3] address more comments --- samples/snippets/text_generation_test.py | 25 +++++++++++++++--------- 1 file changed, 16 insertions(+), 9 deletions(-) diff --git a/samples/snippets/text_generation_test.py b/samples/snippets/text_generation_test.py index 9c47233583..c4df1dde3b 100644 --- a/samples/snippets/text_generation_test.py +++ b/samples/snippets/text_generation_test.py @@ -19,9 +19,9 @@ def test_llm_text_generation() -> None: import os PROJECT_ID = os.getenv("GOOGLE_CLOUD_PROJECT", "bigframes-dev") - LOCATION = "us" + LOCATION = "US" - # [START bigquery_dataframes_create_remote_model] + # [START bigquery_dataframes_generate_text_tutorial_create_remote_model] import bigframes from bigframes.ml.llm import PaLM2TextGenerator @@ -29,33 +29,40 @@ def test_llm_text_generation() -> None: bigframes.options.bigquery.location = LOCATION model = PaLM2TextGenerator() - # [END bigquery_dataframes_create_remote_model] + # [END bigquery_dataframes_generate_text_tutorial_create_remote_model] assert model is not None - # [START bigquery_dataframes_perform_keyword_extraction] + # [START bigquery_dataframes_generate_text_tutorial_perform_keyword_extraction] import bigframes.pandas as bpd - df = bpd.read_gbq("bigquery-public-data.imdb.reviews").head(n=5) + df = bpd.read_gbq("bigquery-public-data.imdb.reviews", max_results=5) df_prompt_prefix = "Extract the key words from the text below: " df_prompt = df_prompt_prefix + df["review"] # Predict using the model df_pred = model.predict(df_prompt, temperature=0.2, max_output_tokens=100) df_pred.peek(5) - # [END bigquery_dataframes_perform_keyword_extraction] + # [END bigquery_dataframes_generate_text_tutorial_perform_keyword_extraction] + # peek() is used to show a preview of the results. If the output + # of this sample changes, also update the screenshot for the associated + # tutorial on cloud.google.com. assert df_pred["ml_generate_text_llm_result"] is not None assert df_pred["ml_generate_text_llm_result"].iloc[0] is not None - # [START bigquery_dataframes_perform_sentiment_analysis] + # [START bigquery_dataframes_generate_text_tutorial_perform_sentiment_analysis] import bigframes.pandas as bpd - df = bpd.read_gbq("bigquery-public-data.imdb.reviews").head(n=5) + df = bpd.read_gbq("bigquery-public-data.imdb.reviews", max_results=5) df_prompt_prefix = "perform sentiment analysis on the following text, return one the following categories: positive, negative: " df_prompt = df_prompt_prefix + df["review"] # Predict using the model df_pred = model.predict(df_prompt, temperature=0.2, max_output_tokens=100) df_pred.peek(5) - # [END bigquery_dataframes_perform_sentiment_analysis] + # [END bigquery_dataframes_generate_text_tutorial_perform_sentiment_analysis] + # peek() is used to show a preview of the results. If the output + # of this sample changes, also update the screenshot for the associated + # tutorial on cloud.google.com. + assert df_pred["ml_generate_text_llm_result"] is not None assert df_pred["ml_generate_text_llm_result"].iloc[0] is not None