From f17f75527ee8107e2c1dd05dc9de832d038a9803 Mon Sep 17 00:00:00 2001
From: Holt Skinner <holtskinner@google.com>
Date: Wed, 22 Jan 2025 13:22:00 -0600
Subject: [PATCH 01/10] docs(generative_ai): Update Chat Completions API
 samples

- Fix imports (tests were failing)
- Add authentication sample
- Combine credentials refresher region tags
- Add samples for self-hosted models
---
 .../chat_completions_authentication.py        | 50 +++++++++++++++++
 .../chat_completions_credentials_refresher.py | 17 +++---
 .../chat_completions_non_streaming_image.py   | 12 ++---
 .../chat_completions_non_streaming_text.py    | 13 ++---
 ...etions_non_streaming_text_self_deployed.py | 52 ++++++++++++++++++
 .../chat_completions_streaming_image.py       | 11 ++--
 .../chat_completions_streaming_text.py        | 11 ++--
 ...ompletions_streaming_text_self_deployed.py | 54 +++++++++++++++++++
 .../chat_completions/chat_completions_test.py | 24 +++++++++
 .../chat_completions/requirements-test.txt    |  2 +-
 .../chat_completions/requirements.txt         |  6 +--
 11 files changed, 209 insertions(+), 43 deletions(-)
 create mode 100644 generative_ai/chat_completions/chat_completions_authentication.py
 create mode 100644 generative_ai/chat_completions/chat_completions_non_streaming_text_self_deployed.py
 create mode 100644 generative_ai/chat_completions/chat_completions_streaming_text_self_deployed.py

diff --git a/generative_ai/chat_completions/chat_completions_authentication.py b/generative_ai/chat_completions/chat_completions_authentication.py
new file mode 100644
index 00000000000..765ba728743
--- /dev/null
+++ b/generative_ai/chat_completions/chat_completions_authentication.py
@@ -0,0 +1,50 @@
+# Copyright 2024 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    https://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+
+def generate_text(project_id: str, location: str = "us-central1") -> object:
+    # [START generativeaionvertexai_gemini_chat_completions_authentication]
+    import openai
+
+    from google.auth import default
+    import google.auth.transport.requests
+
+    # TODO(developer): Update and un-comment below lines
+    # project_id = "PROJECT_ID"
+    # location = "us-central1"
+
+    # Programmatically get an access token
+    credentials, _ = default(scopes=["https://www.googleapis.com/auth/cloud-platform"])
+    credentials.refresh(google.auth.transport.requests.Request())
+    # Note: the credential lives for 1 hour by default (https://cloud.google.com/docs/authentication/token-types#at-lifetime); after expiration, it must be refreshed.
+
+    ##############################
+    # Choose one of the following:
+    ##############################
+
+    # If you are calling a Gemini model, set the ENDPOINT_ID variable to use openapi.
+    ENDPOINT_ID = "openapi"
+
+    # If you are calling a self-deployed model from Model Garden, set the
+    # ENDPOINT_ID variable and set the client's base URL to use your endpoint.
+    ENDPOINT_ID = "YOUR_ENDPOINT_ID"
+
+    # OpenAI Client
+    client = openai.OpenAI(
+        base_url=f"https://{location}-aiplatform.googleapis.com/v1beta1/projects/{project_id}/locations/{location}/endpoints/{ENDPOINT_ID}",
+        api_key=credentials.token,
+    )
+    # [END generativeaionvertexai_gemini_chat_completions_authentication]
+
+    return client
diff --git a/generative_ai/chat_completions/chat_completions_credentials_refresher.py b/generative_ai/chat_completions/chat_completions_credentials_refresher.py
index 87df24838b6..9ca8c0dc749 100644
--- a/generative_ai/chat_completions/chat_completions_credentials_refresher.py
+++ b/generative_ai/chat_completions/chat_completions_credentials_refresher.py
@@ -15,7 +15,7 @@
 # Disable linting on `Any` type annotations (needed for OpenAI kwargs and attributes).
 # flake8: noqa ANN401
 
-# [START generativeaionvertexai_credentials_refresher_class]
+# [START generativeaionvertexai_credentials_refresher]
 from typing import Any
 
 import google.auth
@@ -25,16 +25,15 @@
 
 class OpenAICredentialsRefresher:
     def __init__(self, **kwargs: Any) -> None:
-        # Set a dummy key here
-        self.client = openai.OpenAI(**kwargs, api_key="DUMMY")
+        # Set a placeholder key here
+        self.client = openai.OpenAI(**kwargs, api_key="PLACEHOLDER")
         self.creds, self.project = google.auth.default(
             scopes=["https://www.googleapis.com/auth/cloud-platform"]
         )
 
     def __getattr__(self, name: str) -> Any:
         if not self.creds.valid:
-            auth_req = google.auth.transport.requests.Request()
-            self.creds.refresh(auth_req)
+            self.creds.refresh(google.auth.transport.requests.Request())
 
             if not self.creds.valid:
                 raise RuntimeError("Unable to refresh auth")
@@ -43,11 +42,9 @@ def __getattr__(self, name: str) -> Any:
         return getattr(self.client, name)
 
 
-# [END generativeaionvertexai_credentials_refresher_class]
-
-
+# [END generativeaionvertexai_credentials_refresher]
 def generate_text(project_id: str, location: str = "us-central1") -> object:
-    # [START generativeaionvertexai_credentials_refresher_usage]
+    # [START generativeaionvertexai_credentials_refresher]
 
     # TODO(developer): Update and un-comment below lines
     # project_id = "PROJECT_ID"
@@ -63,6 +60,6 @@ def generate_text(project_id: str, location: str = "us-central1") -> object:
     )
 
     print(response)
-    # [END generativeaionvertexai_credentials_refresher_usage]
+    # [END generativeaionvertexai_credentials_refresher]
 
     return response
diff --git a/generative_ai/chat_completions/chat_completions_non_streaming_image.py b/generative_ai/chat_completions/chat_completions_non_streaming_image.py
index 688063cf62a..98db2f45bfc 100644
--- a/generative_ai/chat_completions/chat_completions_non_streaming_image.py
+++ b/generative_ai/chat_completions/chat_completions_non_streaming_image.py
@@ -15,21 +15,19 @@
 
 def generate_text(project_id: str, location: str = "us-central1") -> object:
     # [START generativeaionvertexai_gemini_chat_completions_non_streaming_image]
-    import vertexai
-    import openai
 
-    from google.auth import default, transport
+    from google.auth import default
+    import google.auth.transport.requests
+
+    import openai
 
     # TODO(developer): Update and un-comment below lines
     # project_id = "PROJECT_ID"
     # location = "us-central1"
 
-    vertexai.init(project=project_id, location=location)
-
     # Programmatically get an access token
     credentials, _ = default(scopes=["https://www.googleapis.com/auth/cloud-platform"])
-    auth_request = transport.requests.Request()
-    credentials.refresh(auth_request)
+    credentials.refresh(google.auth.transport.requests.Request())
 
     # OpenAI Client
     client = openai.OpenAI(
diff --git a/generative_ai/chat_completions/chat_completions_non_streaming_text.py b/generative_ai/chat_completions/chat_completions_non_streaming_text.py
index 32fd0a4df2d..7991d95e986 100644
--- a/generative_ai/chat_completions/chat_completions_non_streaming_text.py
+++ b/generative_ai/chat_completions/chat_completions_non_streaming_text.py
@@ -15,23 +15,20 @@
 
 def generate_text(project_id: str, location: str = "us-central1") -> object:
     # [START generativeaionvertexai_gemini_chat_completions_non_streaming]
-    import vertexai
-    import openai
+    from google.auth import default
+    import google.auth.transport.requests
 
-    from google.auth import default, transport
+    import openai
 
     # TODO(developer): Update and un-comment below lines
     # project_id = "PROJECT_ID"
     # location = "us-central1"
 
-    vertexai.init(project=project_id, location=location)
-
     # Programmatically get an access token
     credentials, _ = default(scopes=["https://www.googleapis.com/auth/cloud-platform"])
-    auth_request = transport.requests.Request()
-    credentials.refresh(auth_request)
+    credentials.refresh(google.auth.transport.requests.Request())
 
-    # # OpenAI Client
+    # OpenAI Client
     client = openai.OpenAI(
         base_url=f"https://{location}-aiplatform.googleapis.com/v1beta1/projects/{project_id}/locations/{location}/endpoints/openapi",
         api_key=credentials.token,
diff --git a/generative_ai/chat_completions/chat_completions_non_streaming_text_self_deployed.py b/generative_ai/chat_completions/chat_completions_non_streaming_text_self_deployed.py
new file mode 100644
index 00000000000..c58b10f037e
--- /dev/null
+++ b/generative_ai/chat_completions/chat_completions_non_streaming_text_self_deployed.py
@@ -0,0 +1,52 @@
+# Copyright 2025 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    https://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+
+def generate_text(
+    project_id: str,
+    location: str = "us-central1",
+    model_id: str = "gemma-2-9b-it",
+    endpoint_id: str = "YOUR_ENDPOINT_ID",
+) -> object:
+    # [START generativeaionvertexai_gemini_chat_completions_non_streaming_self_deployed]
+    from google.auth import default
+    import google.auth.transport.requests
+
+    import openai
+
+    # TODO(developer): Update and un-comment below lines
+    # project_id = "PROJECT_ID"
+    # location = "us-central1"
+    # model_id = "gemma-2-9b-it"
+    # endpoint_id = "YOUR_ENDPOINT_ID"
+
+    # Programmatically get an access token
+    credentials, _ = default(scopes=["https://www.googleapis.com/auth/cloud-platform"])
+    credentials.refresh(google.auth.transport.requests.Request())
+
+    # OpenAI Client
+    client = openai.OpenAI(
+        base_url=f"https://{location}-aiplatform.googleapis.com/v1beta1/projects/{project_id}/locations/{location}/endpoints/{endpoint_id}",
+        api_key=credentials.token,
+    )
+
+    response = client.chat.completions.create(
+        model=model_id,
+        messages=[{"role": "user", "content": "Why is the sky blue?"}],
+    )
+    print(response)
+
+    # [END generativeaionvertexai_gemini_chat_completions_non_streaming_self_deployed]
+
+    return response
diff --git a/generative_ai/chat_completions/chat_completions_streaming_image.py b/generative_ai/chat_completions/chat_completions_streaming_image.py
index f35e33ceaac..0aeb9b1f558 100644
--- a/generative_ai/chat_completions/chat_completions_streaming_image.py
+++ b/generative_ai/chat_completions/chat_completions_streaming_image.py
@@ -15,21 +15,18 @@
 
 def generate_text(project_id: str, location: str = "us-central1") -> object:
     # [START generativeaionvertexai_gemini_chat_completions_streaming_image]
-    import vertexai
-    import openai
+    from google.auth import default
+    import google.auth.transport.requests
 
-    from google.auth import default, transport
+    import openai
 
     # TODO(developer): Update and un-comment below lines
     # project_id = "PROJECT_ID"
     # location = "us-central1"
 
-    vertexai.init(project=project_id, location=location)
-
     # Programmatically get an access token
     credentials, _ = default(scopes=["https://www.googleapis.com/auth/cloud-platform"])
-    auth_request = transport.requests.Request()
-    credentials.refresh(auth_request)
+    credentials.refresh(google.auth.transport.requests.Request())
 
     # OpenAI Client
     client = openai.OpenAI(
diff --git a/generative_ai/chat_completions/chat_completions_streaming_text.py b/generative_ai/chat_completions/chat_completions_streaming_text.py
index 76769746b3a..64a87dbceef 100644
--- a/generative_ai/chat_completions/chat_completions_streaming_text.py
+++ b/generative_ai/chat_completions/chat_completions_streaming_text.py
@@ -15,21 +15,18 @@
 
 def generate_text(project_id: str, location: str = "us-central1") -> object:
     # [START generativeaionvertexai_gemini_chat_completions_streaming]
-    import vertexai
-    import openai
+    from google.auth import default
+    import google.auth.transport.requests
 
-    from google.auth import default, transport
+    import openai
 
     # TODO(developer): Update and un-comment below lines
     # project_id = "PROJECT_ID"
     # location = "us-central1"
 
-    vertexai.init(project=project_id, location=location)
-
     # Programmatically get an access token
     credentials, _ = default(scopes=["https://www.googleapis.com/auth/cloud-platform"])
-    auth_request = transport.requests.Request()
-    credentials.refresh(auth_request)
+    credentials.refresh(google.auth.transport.requests.Request())
 
     # OpenAI Client
     client = openai.OpenAI(
diff --git a/generative_ai/chat_completions/chat_completions_streaming_text_self_deployed.py b/generative_ai/chat_completions/chat_completions_streaming_text_self_deployed.py
new file mode 100644
index 00000000000..6418920106d
--- /dev/null
+++ b/generative_ai/chat_completions/chat_completions_streaming_text_self_deployed.py
@@ -0,0 +1,54 @@
+# Copyright 2025 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    https://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+
+def generate_text(
+    project_id: str,
+    location: str = "us-central1",
+    model_id: str = "gemma-2-9b-it",
+    endpoint_id: str = "YOUR_ENDPOINT_ID",
+) -> object:
+    # [START generativeaionvertexai_gemini_chat_completions_streaming_self_deployed]
+    from google.auth import default
+    import google.auth.transport.requests
+
+    import openai
+
+    # TODO(developer): Update and un-comment below lines
+    # project_id = "PROJECT_ID"
+    # location = "us-central1"
+    # model_id = "gemma-2-9b-it"
+    # endpoint_id = "YOUR_ENDPOINT_ID"
+
+    # Programmatically get an access token
+    credentials, _ = default(scopes=["https://www.googleapis.com/auth/cloud-platform"])
+    credentials.refresh(google.auth.transport.requests.Request())
+
+    # OpenAI Client
+    client = openai.OpenAI(
+        base_url=f"https://{location}-aiplatform.googleapis.com/v1beta1/projects/{project_id}/locations/{location}/endpoints/{endpoint_id}",
+        api_key=credentials.token,
+    )
+
+    response = client.chat.completions.create(
+        model=model_id,
+        messages=[{"role": "user", "content": "Why is the sky blue?"}],
+        stream=True,
+    )
+    for chunk in response:
+        print(chunk)
+
+    # [END generativeaionvertexai_gemini_chat_completions_streaming_self_deployed]
+
+    return response
diff --git a/generative_ai/chat_completions/chat_completions_test.py b/generative_ai/chat_completions/chat_completions_test.py
index 064d66d553f..0b4d738d4fc 100644
--- a/generative_ai/chat_completions/chat_completions_test.py
+++ b/generative_ai/chat_completions/chat_completions_test.py
@@ -14,15 +14,25 @@
 
 import os
 
+import chat_completions_authentication
 import chat_completions_credentials_refresher
 import chat_completions_non_streaming_image
 import chat_completions_non_streaming_text
 import chat_completions_streaming_image
 import chat_completions_streaming_text
+import chat_completions_streaming_text_self_deployed
+import chat_completions_non_streaming_text_self_deployed
 
 
 PROJECT_ID = os.getenv("GOOGLE_CLOUD_PROJECT")
 LOCATION = "us-central1"
+SELF_HOSTED_MODEL_ID = "gemma2-9b-it-mg-one-click-deploy"
+ENDPOINT_ID = "6443623023395209216"
+
+
+def test_authentication() -> None:
+    response = chat_completions_authentication.generate_text(PROJECT_ID, LOCATION)
+    assert response
 
 
 def test_streaming_text() -> None:
@@ -50,3 +60,17 @@ def test_credentials_refresher() -> None:
         PROJECT_ID, LOCATION
     )
     assert response
+
+
+def test_streaming_text_self_deployed() -> None:
+    response = chat_completions_streaming_text_self_deployed.generate_text(
+        PROJECT_ID, LOCATION, SELF_HOSTED_MODEL_ID, ENDPOINT_ID
+    )
+    assert response
+
+
+def test_non_streaming_text_self_deployed() -> None:
+    response = chat_completions_non_streaming_text_self_deployed.generate_text(
+        PROJECT_ID, LOCATION, SELF_HOSTED_MODEL_ID, ENDPOINT_ID
+    )
+    assert response
diff --git a/generative_ai/chat_completions/requirements-test.txt b/generative_ai/chat_completions/requirements-test.txt
index 92281986e50..3b9949d8513 100644
--- a/generative_ai/chat_completions/requirements-test.txt
+++ b/generative_ai/chat_completions/requirements-test.txt
@@ -1,4 +1,4 @@
 backoff==2.2.1
-google-api-core==2.19.0
+google-api-core==2.24.0
 pytest==8.2.0
 pytest-asyncio==0.23.6
diff --git a/generative_ai/chat_completions/requirements.txt b/generative_ai/chat_completions/requirements.txt
index 5d8bc64d330..48369134bbf 100644
--- a/generative_ai/chat_completions/requirements.txt
+++ b/generative_ai/chat_completions/requirements.txt
@@ -3,12 +3,12 @@ pandas==2.0.3; python_version == '3.8'
 pandas==2.1.4; python_version > '3.8'
 pillow==10.3.0; python_version < '3.8'
 pillow==10.3.0; python_version >= '3.8'
-google-cloud-aiplatform[all]==1.69.0
+google-cloud-aiplatform[all]==1.78.0
 sentencepiece==0.2.0
-google-auth==2.29.0
+google-auth==2.37.0
 anthropic[vertex]==0.28.0
 langchain-core==0.2.11
 langchain-google-vertexai==1.0.6
 numpy<2
-openai==1.30.5
+openai==1.60.0
 immutabledict==4.2.0

From 161b97590850f4e1638ceb1f816ad57d7b449608 Mon Sep 17 00:00:00 2001
From: Holt Skinner <holtskinner@google.com>
Date: Wed, 22 Jan 2025 13:34:43 -0600
Subject: [PATCH 02/10] Fix lint error

---
 generative_ai/chat_completions/chat_completions_test.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/generative_ai/chat_completions/chat_completions_test.py b/generative_ai/chat_completions/chat_completions_test.py
index 0b4d738d4fc..93e85923f68 100644
--- a/generative_ai/chat_completions/chat_completions_test.py
+++ b/generative_ai/chat_completions/chat_completions_test.py
@@ -18,10 +18,10 @@
 import chat_completions_credentials_refresher
 import chat_completions_non_streaming_image
 import chat_completions_non_streaming_text
+import chat_completions_non_streaming_text_self_deployed
 import chat_completions_streaming_image
 import chat_completions_streaming_text
 import chat_completions_streaming_text_self_deployed
-import chat_completions_non_streaming_text_self_deployed
 
 
 PROJECT_ID = os.getenv("GOOGLE_CLOUD_PROJECT")

From 4b6cbd60e1f202876e18e053078324e999909189 Mon Sep 17 00:00:00 2001
From: Holt Skinner <holtskinner@google.com>
Date: Thu, 23 Jan 2025 10:40:00 -0600
Subject: [PATCH 03/10] Update model endpoint id

---
 generative_ai/chat_completions/chat_completions_test.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/generative_ai/chat_completions/chat_completions_test.py b/generative_ai/chat_completions/chat_completions_test.py
index 93e85923f68..06f7c8fbdad 100644
--- a/generative_ai/chat_completions/chat_completions_test.py
+++ b/generative_ai/chat_completions/chat_completions_test.py
@@ -27,7 +27,7 @@
 PROJECT_ID = os.getenv("GOOGLE_CLOUD_PROJECT")
 LOCATION = "us-central1"
 SELF_HOSTED_MODEL_ID = "gemma2-9b-it-mg-one-click-deploy"
-ENDPOINT_ID = "6443623023395209216"
+ENDPOINT_ID = "5558665696616906752"
 
 
 def test_authentication() -> None:

From c4036ec66d8175fd1fe964b349b38a55b4594d6e Mon Sep 17 00:00:00 2001
From: Holt Skinner <13262395+holtskinner@users.noreply.github.com>
Date: Mon, 27 Jan 2025 09:49:56 -0600
Subject: [PATCH 04/10] Update
 generative_ai/chat_completions/chat_completions_authentication.py

---
 .../chat_completions/chat_completions_authentication.py         | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/generative_ai/chat_completions/chat_completions_authentication.py b/generative_ai/chat_completions/chat_completions_authentication.py
index 765ba728743..c494191a6cc 100644
--- a/generative_ai/chat_completions/chat_completions_authentication.py
+++ b/generative_ai/chat_completions/chat_completions_authentication.py
@@ -1,4 +1,4 @@
-# Copyright 2024 Google LLC
+# Copyright 2025 Google LLC
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.

From 75eccb5046e0a668d1ad25d691efc868ced28f76 Mon Sep 17 00:00:00 2001
From: Holt Skinner <13262395+holtskinner@users.noreply.github.com>
Date: Mon, 27 Jan 2025 09:51:12 -0600
Subject: [PATCH 05/10] Update
 generative_ai/chat_completions/chat_completions_authentication.py


From 706b9eaadb5011524912b8ec54a4c467e1fe6a45 Mon Sep 17 00:00:00 2001
From: Holt Skinner <13262395+holtskinner@users.noreply.github.com>
Date: Mon, 27 Jan 2025 09:52:06 -0600
Subject: [PATCH 06/10] Update
 generative_ai/chat_completions/chat_completions_authentication.py


From b20542e260b39ba0e994a319362642c7a7acfa61 Mon Sep 17 00:00:00 2001
From: Holt Skinner <holtskinner@google.com>
Date: Mon, 27 Jan 2025 11:14:21 -0600
Subject: [PATCH 07/10] Update Test to include new model Endpoint ID

---
 generative_ai/chat_completions/chat_completions_test.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/generative_ai/chat_completions/chat_completions_test.py b/generative_ai/chat_completions/chat_completions_test.py
index 06f7c8fbdad..7a733df97c5 100644
--- a/generative_ai/chat_completions/chat_completions_test.py
+++ b/generative_ai/chat_completions/chat_completions_test.py
@@ -26,8 +26,8 @@
 
 PROJECT_ID = os.getenv("GOOGLE_CLOUD_PROJECT")
 LOCATION = "us-central1"
-SELF_HOSTED_MODEL_ID = "gemma2-9b-it-mg-one-click-deploy"
-ENDPOINT_ID = "5558665696616906752"
+SELF_HOSTED_MODEL_ID = "perm-gemma-2-9b-it-testing"
+ENDPOINT_ID = "2231912946873663488"
 
 
 def test_authentication() -> None:

From af4d5a2d5c1e78c169d1d0b326caf9c8a9ef195e Mon Sep 17 00:00:00 2001
From: Holt Skinner <holtskinner@google.com>
Date: Mon, 27 Jan 2025 11:56:48 -0600
Subject: [PATCH 08/10] Update Ednpoint ID

---
 .../chat_completions/chat_completions_authentication.py         | 2 +-
 generative_ai/chat_completions/chat_completions_test.py         | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/generative_ai/chat_completions/chat_completions_authentication.py b/generative_ai/chat_completions/chat_completions_authentication.py
index c494191a6cc..3cdae04e221 100644
--- a/generative_ai/chat_completions/chat_completions_authentication.py
+++ b/generative_ai/chat_completions/chat_completions_authentication.py
@@ -38,7 +38,7 @@ def generate_text(project_id: str, location: str = "us-central1") -> object:
 
     # If you are calling a self-deployed model from Model Garden, set the
     # ENDPOINT_ID variable and set the client's base URL to use your endpoint.
-    ENDPOINT_ID = "YOUR_ENDPOINT_ID"
+    # ENDPOINT_ID = "YOUR_ENDPOINT_ID"
 
     # OpenAI Client
     client = openai.OpenAI(
diff --git a/generative_ai/chat_completions/chat_completions_test.py b/generative_ai/chat_completions/chat_completions_test.py
index 7a733df97c5..9c462fb3b89 100644
--- a/generative_ai/chat_completions/chat_completions_test.py
+++ b/generative_ai/chat_completions/chat_completions_test.py
@@ -27,7 +27,7 @@
 PROJECT_ID = os.getenv("GOOGLE_CLOUD_PROJECT")
 LOCATION = "us-central1"
 SELF_HOSTED_MODEL_ID = "perm-gemma-2-9b-it-testing"
-ENDPOINT_ID = "2231912946873663488"
+ENDPOINT_ID = "6714120476014149632"
 
 
 def test_authentication() -> None:

From d7cd2a1555886bdc3f1d83252c4c7ee66792f41d Mon Sep 17 00:00:00 2001
From: Holt Skinner <holtskinner@google.com>
Date: Mon, 27 Jan 2025 13:52:57 -0600
Subject: [PATCH 09/10] Change Model ID

---
 generative_ai/chat_completions/chat_completions_test.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/generative_ai/chat_completions/chat_completions_test.py b/generative_ai/chat_completions/chat_completions_test.py
index 9c462fb3b89..56489b53fcf 100644
--- a/generative_ai/chat_completions/chat_completions_test.py
+++ b/generative_ai/chat_completions/chat_completions_test.py
@@ -26,7 +26,7 @@
 
 PROJECT_ID = os.getenv("GOOGLE_CLOUD_PROJECT")
 LOCATION = "us-central1"
-SELF_HOSTED_MODEL_ID = "perm-gemma-2-9b-it-testing"
+SELF_HOSTED_MODEL_ID = "google/gemma-2-9b-it"
 ENDPOINT_ID = "6714120476014149632"
 
 
From bd7500b5889d0ab24d456566dac8172a05b245b0 Mon Sep 17 00:00:00 2001
From: Holt Skinner <holtskinner@google.com>
Date: Mon, 27 Jan 2025 13:55:10 -0600
Subject: [PATCH 10/10] Update endpoint to v1

---
 .../chat_completions/chat_completions_authentication.py         | 2 +-
 .../chat_completions/chat_completions_credentials_refresher.py  | 2 +-
 .../chat_completions/chat_completions_non_streaming_image.py    | 2 +-
 .../chat_completions/chat_completions_non_streaming_text.py     | 2 +-
 .../chat_completions_non_streaming_text_self_deployed.py        | 2 +-
 .../chat_completions/chat_completions_streaming_image.py        | 2 +-
 .../chat_completions/chat_completions_streaming_text.py         | 2 +-
 .../chat_completions_streaming_text_self_deployed.py            | 2 +-
 8 files changed, 8 insertions(+), 8 deletions(-)

diff --git a/generative_ai/chat_completions/chat_completions_authentication.py b/generative_ai/chat_completions/chat_completions_authentication.py
index 3cdae04e221..aae029c2163 100644
--- a/generative_ai/chat_completions/chat_completions_authentication.py
+++ b/generative_ai/chat_completions/chat_completions_authentication.py
@@ -42,7 +42,7 @@ def generate_text(project_id: str, location: str = "us-central1") -> object:
 
     # OpenAI Client
     client = openai.OpenAI(
-        base_url=f"https://{location}-aiplatform.googleapis.com/v1beta1/projects/{project_id}/locations/{location}/endpoints/{ENDPOINT_ID}",
+        base_url=f"https://{location}-aiplatform.googleapis.com/v1/projects/{project_id}/locations/{location}/endpoints/{ENDPOINT_ID}",
         api_key=credentials.token,
     )
     # [END generativeaionvertexai_gemini_chat_completions_authentication]
diff --git a/generative_ai/chat_completions/chat_completions_credentials_refresher.py b/generative_ai/chat_completions/chat_completions_credentials_refresher.py
index 9ca8c0dc749..9a0956b1374 100644
--- a/generative_ai/chat_completions/chat_completions_credentials_refresher.py
+++ b/generative_ai/chat_completions/chat_completions_credentials_refresher.py
@@ -51,7 +51,7 @@ def generate_text(project_id: str, location: str = "us-central1") -> object:
     # location = "us-central1"
 
     client = OpenAICredentialsRefresher(
-        base_url=f"https://{location}-aiplatform.googleapis.com/v1beta1/projects/{project_id}/locations/{location}/endpoints/openapi",
+        base_url=f"https://{location}-aiplatform.googleapis.com/v1/projects/{project_id}/locations/{location}/endpoints/openapi",
     )
 
     response = client.chat.completions.create(
diff --git a/generative_ai/chat_completions/chat_completions_non_streaming_image.py b/generative_ai/chat_completions/chat_completions_non_streaming_image.py
index 98db2f45bfc..0c94c071f46 100644
--- a/generative_ai/chat_completions/chat_completions_non_streaming_image.py
+++ b/generative_ai/chat_completions/chat_completions_non_streaming_image.py
@@ -31,7 +31,7 @@ def generate_text(project_id: str, location: str = "us-central1") -> object:
 
     # OpenAI Client
     client = openai.OpenAI(
-        base_url=f"https://{location}-aiplatform.googleapis.com/v1beta1/projects/{project_id}/locations/{location}/endpoints/openapi",
+        base_url=f"https://{location}-aiplatform.googleapis.com/v1/projects/{project_id}/locations/{location}/endpoints/openapi",
         api_key=credentials.token,
     )
 
diff --git a/generative_ai/chat_completions/chat_completions_non_streaming_text.py b/generative_ai/chat_completions/chat_completions_non_streaming_text.py
index 7991d95e986..6906ee27392 100644
--- a/generative_ai/chat_completions/chat_completions_non_streaming_text.py
+++ b/generative_ai/chat_completions/chat_completions_non_streaming_text.py
@@ -30,7 +30,7 @@ def generate_text(project_id: str, location: str = "us-central1") -> object:
 
     # OpenAI Client
     client = openai.OpenAI(
-        base_url=f"https://{location}-aiplatform.googleapis.com/v1beta1/projects/{project_id}/locations/{location}/endpoints/openapi",
+        base_url=f"https://{location}-aiplatform.googleapis.com/v1/projects/{project_id}/locations/{location}/endpoints/openapi",
         api_key=credentials.token,
     )
 
diff --git a/generative_ai/chat_completions/chat_completions_non_streaming_text_self_deployed.py b/generative_ai/chat_completions/chat_completions_non_streaming_text_self_deployed.py
index c58b10f037e..7789b85f599 100644
--- a/generative_ai/chat_completions/chat_completions_non_streaming_text_self_deployed.py
+++ b/generative_ai/chat_completions/chat_completions_non_streaming_text_self_deployed.py
@@ -37,7 +37,7 @@ def generate_text(
 
     # OpenAI Client
     client = openai.OpenAI(
-        base_url=f"https://{location}-aiplatform.googleapis.com/v1beta1/projects/{project_id}/locations/{location}/endpoints/{endpoint_id}",
+        base_url=f"https://{location}-aiplatform.googleapis.com/v1/projects/{project_id}/locations/{location}/endpoints/{endpoint_id}",
         api_key=credentials.token,
     )
 
diff --git a/generative_ai/chat_completions/chat_completions_streaming_image.py b/generative_ai/chat_completions/chat_completions_streaming_image.py
index 0aeb9b1f558..71d2897e018 100644
--- a/generative_ai/chat_completions/chat_completions_streaming_image.py
+++ b/generative_ai/chat_completions/chat_completions_streaming_image.py
@@ -30,7 +30,7 @@ def generate_text(project_id: str, location: str = "us-central1") -> object:
 
     # OpenAI Client
     client = openai.OpenAI(
-        base_url=f"https://{location}-aiplatform.googleapis.com/v1beta1/projects/{project_id}/locations/{location}/endpoints/openapi",
+        base_url=f"https://{location}-aiplatform.googleapis.com/v1/projects/{project_id}/locations/{location}/endpoints/openapi",
         api_key=credentials.token,
     )
 
diff --git a/generative_ai/chat_completions/chat_completions_streaming_text.py b/generative_ai/chat_completions/chat_completions_streaming_text.py
index 64a87dbceef..f2506b79dbc 100644
--- a/generative_ai/chat_completions/chat_completions_streaming_text.py
+++ b/generative_ai/chat_completions/chat_completions_streaming_text.py
@@ -30,7 +30,7 @@ def generate_text(project_id: str, location: str = "us-central1") -> object:
 
     # OpenAI Client
     client = openai.OpenAI(
-        base_url=f"https://{location}-aiplatform.googleapis.com/v1beta1/projects/{project_id}/locations/{location}/endpoints/openapi",
+        base_url=f"https://{location}-aiplatform.googleapis.com/v1/projects/{project_id}/locations/{location}/endpoints/openapi",
         api_key=credentials.token,
     )
 
diff --git a/generative_ai/chat_completions/chat_completions_streaming_text_self_deployed.py b/generative_ai/chat_completions/chat_completions_streaming_text_self_deployed.py
index 6418920106d..5329984eeb7 100644
--- a/generative_ai/chat_completions/chat_completions_streaming_text_self_deployed.py
+++ b/generative_ai/chat_completions/chat_completions_streaming_text_self_deployed.py
@@ -37,7 +37,7 @@ def generate_text(
 
     # OpenAI Client
     client = openai.OpenAI(
-        base_url=f"https://{location}-aiplatform.googleapis.com/v1beta1/projects/{project_id}/locations/{location}/endpoints/{endpoint_id}",
+        base_url=f"https://{location}-aiplatform.googleapis.com/v1/projects/{project_id}/locations/{location}/endpoints/{endpoint_id}",
         api_key=credentials.token,
     )