From 20bda90eabd2bd96b4e8b03394b0c9307df26081 Mon Sep 17 00:00:00 2001
From: Ashley Xu <ashleyxu@google.com>
Date: Wed, 8 May 2024 17:12:42 +0000
Subject: [PATCH 1/3] docs: add code snippets for llm text generatiion

---
 samples/snippets/text_generation_test.py | 65 ++++++++++++++++++++++++
 1 file changed, 65 insertions(+)
 create mode 100644 samples/snippets/text_generation_test.py

diff --git a/samples/snippets/text_generation_test.py b/samples/snippets/text_generation_test.py
new file mode 100644
index 0000000000..feb7862f48
--- /dev/null
+++ b/samples/snippets/text_generation_test.py
@@ -0,0 +1,65 @@
+# Copyright 2024 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+
+def test_llm_text_generation() -> None:
+    # Determine project id, in this case prefer the one set in the environment
+    # variable GOOGLE_CLOUD_PROJECT (if any)
+    import os
+
+    PROJECT_ID = os.getenv("GOOGLE_CLOUD_PROJECT", "bigframes-dev")
+    LOCATION = "us"
+
+    # [START bigquery_dataframes_create_remote_model]
+    import bigframes
+    from bigframes.ml.llm import PaLM2TextGenerator
+
+    bigframes.options.bigquery.project = PROJECT_ID
+    bigframes.options.bigquery.location = LOCATION
+
+    model = PaLM2TextGenerator()
+    # [END bigquery_dataframes_create_remote_model]
+    assert model is not None
+
+    # [START bigquery_dataframes_perform_keyword_extraction]
+    import bigframes.pandas as bpd
+
+    df = bpd.read_gbq("bigquery-public-data.imdb.reviews").head(n=5)
+    df_prompt_prefix = "Extract the key words from the text below: "
+    df_prompt = df_prompt_prefix + df["review"]
+
+    # Predict using the model
+    df_pred = model.predict(
+        df_prompt.to_frame(), temperature=0.2, max_output_tokens=100
+    )
+    df_pred.peek(5)
+    # [END bigquery_dataframes_perform_keyword_extraction]
+    assert df_pred["ml_generate_text_llm_result"] is not None
+    assert df_pred["ml_generate_text_llm_result"].iloc[0] is not None
+
+    # [START bigquery_dataframes_perform_sentiment_analysis]
+    import bigframes.pandas as bpd
+
+    df = bpd.read_gbq("bigquery-public-data.imdb.reviews").head(n=5)
+    df_prompt_prefix = "perform sentiment analysis on the following text, return one the following categories: positive, negative: "
+    df_prompt = df_prompt_prefix + df["review"]
+
+    # Predict using the model
+    df_pred = model.predict(
+        df_prompt.to_frame(), temperature=0.2, max_output_tokens=100
+    )
+    df_pred.peek(5)
+    # [END bigquery_dataframes_perform_sentiment_analysis]
+    assert df_pred["ml_generate_text_llm_result"] is not None
+    assert df_pred["ml_generate_text_llm_result"].iloc[0] is not None

From bab075be980a42798d060e4fa5f9568dc4de460d Mon Sep 17 00:00:00 2001
From: Ashley Xu <ashleyxu@google.com>
Date: Wed, 8 May 2024 18:35:25 +0000
Subject: [PATCH 2/3] address comments

---
 samples/snippets/text_generation_test.py | 8 ++------
 1 file changed, 2 insertions(+), 6 deletions(-)

diff --git a/samples/snippets/text_generation_test.py b/samples/snippets/text_generation_test.py
index feb7862f48..9c47233583 100644
--- a/samples/snippets/text_generation_test.py
+++ b/samples/snippets/text_generation_test.py
@@ -40,9 +40,7 @@ def test_llm_text_generation() -> None:
     df_prompt = df_prompt_prefix + df["review"]
 
     # Predict using the model
-    df_pred = model.predict(
-        df_prompt.to_frame(), temperature=0.2, max_output_tokens=100
-    )
+    df_pred = model.predict(df_prompt, temperature=0.2, max_output_tokens=100)
     df_pred.peek(5)
     # [END bigquery_dataframes_perform_keyword_extraction]
     assert df_pred["ml_generate_text_llm_result"] is not None
@@ -56,9 +54,7 @@ def test_llm_text_generation() -> None:
     df_prompt = df_prompt_prefix + df["review"]
 
     # Predict using the model
-    df_pred = model.predict(
-        df_prompt.to_frame(), temperature=0.2, max_output_tokens=100
-    )
+    df_pred = model.predict(df_prompt, temperature=0.2, max_output_tokens=100)
     df_pred.peek(5)
     # [END bigquery_dataframes_perform_sentiment_analysis]
     assert df_pred["ml_generate_text_llm_result"] is not None

From e846f2d2e4fb7248fc9e47dd251e4d5cb441acf6 Mon Sep 17 00:00:00 2001
From: Ashley Xu <ashleyxu@google.com>
Date: Thu, 9 May 2024 17:25:28 +0000
Subject: [PATCH 3/3] address more comments

---
 samples/snippets/text_generation_test.py | 25 +++++++++++++++---------
 1 file changed, 16 insertions(+), 9 deletions(-)

diff --git a/samples/snippets/text_generation_test.py b/samples/snippets/text_generation_test.py
index 9c47233583..c4df1dde3b 100644
--- a/samples/snippets/text_generation_test.py
+++ b/samples/snippets/text_generation_test.py
@@ -19,9 +19,9 @@ def test_llm_text_generation() -> None:
     import os
 
     PROJECT_ID = os.getenv("GOOGLE_CLOUD_PROJECT", "bigframes-dev")
-    LOCATION = "us"
+    LOCATION = "US"
 
-    # [START bigquery_dataframes_create_remote_model]
+    # [START bigquery_dataframes_generate_text_tutorial_create_remote_model]
     import bigframes
     from bigframes.ml.llm import PaLM2TextGenerator
 
@@ -29,33 +29,40 @@ def test_llm_text_generation() -> None:
     bigframes.options.bigquery.location = LOCATION
 
     model = PaLM2TextGenerator()
-    # [END bigquery_dataframes_create_remote_model]
+    # [END bigquery_dataframes_generate_text_tutorial_create_remote_model]
     assert model is not None
 
-    # [START bigquery_dataframes_perform_keyword_extraction]
+    # [START bigquery_dataframes_generate_text_tutorial_perform_keyword_extraction]
     import bigframes.pandas as bpd
 
-    df = bpd.read_gbq("bigquery-public-data.imdb.reviews").head(n=5)
+    df = bpd.read_gbq("bigquery-public-data.imdb.reviews", max_results=5)
     df_prompt_prefix = "Extract the key words from the text below: "
     df_prompt = df_prompt_prefix + df["review"]
 
     # Predict using the model
     df_pred = model.predict(df_prompt, temperature=0.2, max_output_tokens=100)
     df_pred.peek(5)
-    # [END bigquery_dataframes_perform_keyword_extraction]
+    # [END bigquery_dataframes_generate_text_tutorial_perform_keyword_extraction]
+    # peek() is used to show a preview of the results. If the output
+    # of this sample changes, also update the screenshot for the associated
+    # tutorial on cloud.google.com.
     assert df_pred["ml_generate_text_llm_result"] is not None
     assert df_pred["ml_generate_text_llm_result"].iloc[0] is not None
 
-    # [START bigquery_dataframes_perform_sentiment_analysis]
+    # [START bigquery_dataframes_generate_text_tutorial_perform_sentiment_analysis]
     import bigframes.pandas as bpd
 
-    df = bpd.read_gbq("bigquery-public-data.imdb.reviews").head(n=5)
+    df = bpd.read_gbq("bigquery-public-data.imdb.reviews", max_results=5)
     df_prompt_prefix = "perform sentiment analysis on the following text, return one the following categories: positive, negative: "
     df_prompt = df_prompt_prefix + df["review"]
 
     # Predict using the model
     df_pred = model.predict(df_prompt, temperature=0.2, max_output_tokens=100)
     df_pred.peek(5)
-    # [END bigquery_dataframes_perform_sentiment_analysis]
+    # [END bigquery_dataframes_generate_text_tutorial_perform_sentiment_analysis]
+    # peek() is used to show a preview of the results. If the output
+    # of this sample changes, also update the screenshot for the associated
+    # tutorial on cloud.google.com.
+
     assert df_pred["ml_generate_text_llm_result"] is not None
     assert df_pred["ml_generate_text_llm_result"].iloc[0] is not None