Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Appearance settings

chore: ensure colab sample notebooks are tested #351

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 14 commits into from
Jan 30, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 12 additions & 3 deletions 15 notebooks/getting_started/getting_started_bq_dataframes.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -532,6 +532,10 @@
},
"outputs": [],
"source": [
"# BigQuery DataFrames can read directly from GCS.\n",
"fn = 'gs://cloud-samples-data/vertex-ai/bigframe/penguins.csv'\n",
"\n",
"# Or from a local file.\n",
"# fn = 'penguins.csv'"
]
},
Expand Down Expand Up @@ -580,7 +584,9 @@
},
"outputs": [],
"source": [
"df_from_local = bf.read_csv(fn)"
"# If order is not important, use the \"bigquery\" engine to\n",
"# allow BigQuery DataFrames to read directly from GCS.\n",
"df_from_local = bf.read_csv(fn, engine=\"bigquery\")"
]
},
{
Expand Down Expand Up @@ -658,7 +664,10 @@
},
"outputs": [],
"source": [
"df_from_local.to_gbq(PROJECT_ID + \".\" + DATASET_ID + \".penguins\")"
"df_from_local.to_gbq(\n",
" PROJECT_ID + \".\" + DATASET_ID + \".penguins\",\n",
" if_exists=\"replace\",\n",
")"
]
},
{
Expand Down Expand Up @@ -771,7 +780,7 @@
},
"outputs": [],
"source": [
"bq_df[\"species\", \"body_mass_g\"].groupby(by=bq_df[\"species\"]).mean(numeric_only=True).head()"
"bq_df[[\"species\", \"body_mass_g\"]].groupby(by=bq_df[\"species\"]).mean(numeric_only=True).head()"
]
},
{
Expand Down
78 changes: 56 additions & 22 deletions 78 noxfile.py
Original file line number Diff line number Diff line change
Expand Up @@ -657,9 +657,23 @@ def system_prerelease(session: nox.sessions.Session):


@nox.session(python=SYSTEM_TEST_PYTHON_VERSIONS)
def notebook(session):
def notebook(session: nox.Session):
GOOGLE_CLOUD_PROJECT = os.getenv("GOOGLE_CLOUD_PROJECT")
if not GOOGLE_CLOUD_PROJECT:
session.error(
"Set GOOGLE_CLOUD_PROJECT environment variable to run notebook session."
)

session.install("-e", ".[all]")
session.install("pytest", "pytest-xdist", "pytest-retry", "nbmake")
session.install(
"pytest",
"pytest-xdist",
"pytest-retry",
"nbmake",
"google-cloud-aiplatform",
"matplotlib",
"seaborn",
)

notebooks_list = list(Path("notebooks/").glob("*/*.ipynb"))

Expand All @@ -669,19 +683,22 @@ def notebook(session):
# These notebooks contain special colab `param {type:"string"}`
# comments, which make it easy for customers to fill in their
# own information.
#
# With the notebooks_fill_params.py script, we are able to find and
# replace the PROJECT_ID parameter, but not the others.
#
# TODO(ashleyxu): Test these notebooks by replacing parameters with
# appropriate values and omitting cleanup logic that may break
# our test infrastructure.
"notebooks/getting_started/getting_started_bq_dataframes.ipynb",
"notebooks/getting_started/ml_fundamentals_bq_dataframes.ipynb",
"notebooks/generative_ai/bq_dataframes_llm_code_generation.ipynb",
"notebooks/generative_ai/bq_dataframes_llm_kmeans.ipynb",
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Do we still need DATASET when running bf.read_gbq()?

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Copy link
Contributor

@ashleyxuu ashleyxuu Jan 26, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

That's a read, which doesn't need to be parametrized. DATASET is used a a destination in the other samples.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thanks got it!

"notebooks/regression/bq_dataframes_ml_linear_regression.ipynb",
"notebooks/generative_ai/bq_dataframes_ml_drug_name_generation.ipynb",
"notebooks/vertex_sdk/sdk2_bigframes_pytorch.ipynb",
"notebooks/vertex_sdk/sdk2_bigframes_sklearn.ipynb",
"notebooks/vertex_sdk/sdk2_bigframes_tensorflow.ipynb",
"notebooks/visualization/bq_dataframes_covid_line_graphs.ipynb",
"notebooks/getting_started/ml_fundamentals_bq_dataframes.ipynb", # Needs DATASET.
"notebooks/regression/bq_dataframes_ml_linear_regression.ipynb", # Needs DATASET_ID.
"notebooks/generative_ai/bq_dataframes_ml_drug_name_generation.ipynb", # Needs CONNECTION.
# TODO(swast): investigate why we get 404 errors, even though
# bq_dataframes_llm_code_generation creates a bucket in the sample.
"notebooks/generative_ai/bq_dataframes_llm_code_generation.ipynb", # Needs BUCKET_URI.
"notebooks/vertex_sdk/sdk2_bigframes_pytorch.ipynb", # Needs BUCKET_URI.
"notebooks/vertex_sdk/sdk2_bigframes_sklearn.ipynb", # Needs BUCKET_URI.
"notebooks/vertex_sdk/sdk2_bigframes_tensorflow.ipynb", # Needs BUCKET_URI.
# The experimental notebooks imagine features that don't yet
# exist or only exist as temporary prototypes.
"notebooks/experimental/longer_ml_demo.ipynb",
Expand Down Expand Up @@ -709,9 +726,9 @@ def notebook(session):
for nb, regions in notebooks_reg.items()
}

# For some reason nbmake exits silently with "no tests ran" message if
# The pytest --nbmake exits silently with "no tests ran" message if
# one of the notebook paths supplied does not exist. Let's make sure that
# each path exists
# each path exists.
for nb in notebooks + list(notebooks_reg):
assert os.path.exists(nb), nb

Expand All @@ -723,16 +740,33 @@ def notebook(session):
pytest_command = [
"py.test",
"--nbmake",
"--nbmake-timeout=600",
"--nbmake-timeout=900", # 15 minutes
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is there a reason why we increase the timeout limit specifically to 15 mins. Do we test the boundary?

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The default was too slow for the LLM sample when I tested locally.

]

# Run self-contained notebooks in single session.run
# achieve parallelization via -n
session.run(
*pytest_command,
"-nauto",
*notebooks,
)
try:
# Populate notebook parameters and make a backup so that the notebooks
# are runnable.
session.run(
"python",
CURRENT_DIRECTORY / "scripts" / "notebooks_fill_params.py",
*notebooks,
)

# Run self-contained notebooks in single session.run
# achieve parallelization via -n
session.run(
*pytest_command,
"-nauto",
*notebooks,
)
finally:
# Prevent our notebook changes from getting checked in to git
# accidentally.
session.run(
"python",
CURRENT_DIRECTORY / "scripts" / "notebooks_restore_from_backup.py",
*notebooks,
)

# Run regionalized notebooks in parallel session.run's, since each notebook
# takes a different region via env param.
Expand Down
65 changes: 65 additions & 0 deletions 65 scripts/notebooks_fill_params.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,65 @@
# Copyright 2024 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import json
import os
import re
import shutil
import sys

GOOGLE_CLOUD_PROJECT = os.environ["GOOGLE_CLOUD_PROJECT"]


def make_backup(notebook_path: str):
shutil.copy(
notebook_path,
f"{notebook_path}.backup",
)


def replace_project(line):
"""
Notebooks contain special colab `param {type:"string"}`
comments, which make it easy for customers to fill in their
own information.
"""
# Make sure we're robust to whitespace differences.
cleaned = re.sub(r"\s", "", line)
if cleaned == 'PROJECT_ID=""#@param{type:"string"}':
return f'PROJECT_ID = "{GOOGLE_CLOUD_PROJECT}" # @param {{type:"string"}}\n'
else:
return line


def replace_params(notebook_path: str):
with open(notebook_path, "r", encoding="utf-8") as notebook_file:
notebook_json = json.load(notebook_file)

for cell in notebook_json["cells"]:
lines = cell.get("source", [])
new_lines = [replace_project(line) for line in lines]
cell["source"] = new_lines

with open(notebook_path, "w", encoding="utf-8") as notebook_file:
json.dump(notebook_json, notebook_file, indent=2, ensure_ascii=False)


def main(notebook_paths):
for notebook_path in notebook_paths:
make_backup(notebook_path)
replace_params(notebook_path)


if __name__ == "__main__":
main(sys.argv[1:])
35 changes: 35 additions & 0 deletions 35 scripts/notebooks_restore_from_backup.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
# Copyright 2024 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import pathlib
import shutil
import sys


def restore_from_backup(notebook_path):
backup_path = pathlib.Path(f"{notebook_path}.backup")
if backup_path.exists():
shutil.move(
backup_path,
notebook_path,
)


def main(notebook_paths):
for notebook_path in notebook_paths:
restore_from_backup(notebook_path)


if __name__ == "__main__":
main(sys.argv[1:])
Morty Proxy This is a proxified and sanitized view of the page, visit original site.