From dc5c3c59e46745f9efcb528e64cc6390152ef802 Mon Sep 17 00:00:00 2001 From: Shobhit Singh Date: Thu, 5 Sep 2024 22:39:34 +0000 Subject: [PATCH 1/5] chore: improve error handling, redefine recency to 6 hours --- scripts/manage_cloud_functions.py | 30 ++++++++++++++++++++++-------- 1 file changed, 22 insertions(+), 8 deletions(-) diff --git a/scripts/manage_cloud_functions.py b/scripts/manage_cloud_functions.py index 6b69089089..f0fc418b35 100644 --- a/scripts/manage_cloud_functions.py +++ b/scripts/manage_cloud_functions.py @@ -13,7 +13,7 @@ # limitations under the License. import argparse -from datetime import datetime +import datetime as dt import sys import time @@ -61,6 +61,13 @@ GCF_CLIENT = functions_v2.FunctionServiceClient() +# Consider GCFs created in last 6 hours as recent +RECENCY_CUTOFF = dt.timedelta(hours=6).total_seconds() + + +def is_recent(age: dt.timedelta): + return age.total_seconds() <= RECENCY_CUTOFF + def get_bigframes_functions(project, region): parent = f"projects/{args.project_id}/locations/{region}" @@ -94,8 +101,10 @@ def summarize_gcfs(args): # Count how many GCFs are newer than a day recent = 0 for f in functions: - age = datetime.now() - datetime.fromtimestamp(f.update_time.timestamp()) - if age.days <= 0: + age = dt.datetime.now() - dt.datetime.fromtimestamp( + f.update_time.timestamp() + ) + if is_recent(age): recent += 1 region_counts[region] = (functions_count, recent) @@ -106,7 +115,7 @@ def summarize_gcfs(args): region = item[0] count, recent = item[1] print( - "{}: Total={}, Recent={}, OlderThanADay={}".format( + "{}: Total={}, Recent={}, Older={}".format( region, count, recent, count - recent ) ) @@ -120,8 +129,10 @@ def cleanup_gcfs(args): functions = get_bigframes_functions(args.project_id, region) count = 0 for f in functions: - age = datetime.now() - datetime.fromtimestamp(f.update_time.timestamp()) - if age.days > 0: + age = dt.datetime.now() - dt.datetime.fromtimestamp( + f.update_time.timestamp() + ) + if not is_recent(age): try: count += 1 GCF_CLIENT.delete_function(name=f.name) @@ -134,12 +145,15 @@ def cleanup_gcfs(args): # that for this clean-up, i.e. 6 mutations per minute. So wait for # 60/6 = 10 seconds time.sleep(10) + except google.api_core.exceptions.NotFound: + # Most likely the function was deleted otherwise + pass except google.api_core.exceptions.ResourceExhausted: # Stop deleting in this region for now print( - f"Cannot delete any more functions in region {region} due to quota exhaustion. Please try again later." + f"Failed to delete function in region {region} due to quota exhaustion. Pausing for 2 minutes." ) - break + time.sleep(120) def list_str(values): From aac5070a1723912b23008a4227f6e851818430e9 Mon Sep 17 00:00:00 2001 From: Shobhit Singh Date: Fri, 6 Sep 2024 07:26:10 +0000 Subject: [PATCH 2/5] make recency cutoff a script param --- scripts/manage_cloud_functions.py | 24 +++++++++++++++--------- 1 file changed, 15 insertions(+), 9 deletions(-) diff --git a/scripts/manage_cloud_functions.py b/scripts/manage_cloud_functions.py index f0fc418b35..33af8463c9 100644 --- a/scripts/manage_cloud_functions.py +++ b/scripts/manage_cloud_functions.py @@ -61,13 +61,6 @@ GCF_CLIENT = functions_v2.FunctionServiceClient() -# Consider GCFs created in last 6 hours as recent -RECENCY_CUTOFF = dt.timedelta(hours=6).total_seconds() - - -def is_recent(age: dt.timedelta): - return age.total_seconds() <= RECENCY_CUTOFF - def get_bigframes_functions(project, region): parent = f"projects/{args.project_id}/locations/{region}" @@ -104,7 +97,7 @@ def summarize_gcfs(args): age = dt.datetime.now() - dt.datetime.fromtimestamp( f.update_time.timestamp() ) - if is_recent(age): + if age.total_seconds() < args.recency_cutoff: recent += 1 region_counts[region] = (functions_count, recent) @@ -132,7 +125,7 @@ def cleanup_gcfs(args): age = dt.datetime.now() - dt.datetime.fromtimestamp( f.update_time.timestamp() ) - if not is_recent(age): + if age.total_seconds() >= args.recency_cutoff: try: count += 1 GCF_CLIENT.delete_function(name=f.name) @@ -182,6 +175,19 @@ def list_str(values): help="Cloud functions region(s). If multiple regions, Specify comma separated (e.g. region1,region2)", ) + def hours_to_timedelta(hrs): + return dt.timedelta(hours=int(hrs)).total_seconds() + + parser.add_argument( + "-c", + "--recency-cutoff", + type=hours_to_timedelta, + required=False, + default=hours_to_timedelta("24"), + action="store", + help="Number of hours, cloud functions older than which should be considered stale (worthy of cleanup).", + ) + subparsers = parser.add_subparsers(title="subcommands", required=True) parser_summary = subparsers.add_parser( "summary", From c04420dfccc67ad82809ca7910dd18ad22910b8a Mon Sep 17 00:00:00 2001 From: Shobhit Singh Date: Wed, 9 Oct 2024 01:12:16 +0000 Subject: [PATCH 3/5] add automatic cleanup in session and test build script --- .kokoro/build.sh | 16 ++++++++++++++++ bigframes/session/__init__.py | 4 ++++ 2 files changed, 20 insertions(+) diff --git a/.kokoro/build.sh b/.kokoro/build.sh index 58eaa7fedf..4a2c8a570e 100755 --- a/.kokoro/build.sh +++ b/.kokoro/build.sh @@ -50,3 +50,19 @@ if [[ -n "${NOX_SESSION:-}" ]]; then else python3 -m nox --stop-on-first-error fi + +# In the end, cleanup a few stale (more than 12 hours old) temporary cloud run +# functions created by bigframems. This will keep the test GCP project within +# the "Number of functions" quota +# https://cloud.google.com/functions/quotas#resource_limits +testGcpProject=${GOOGLE_CLOUD_PROJECT} +if [[ -z "${testGcpProject}" ]]; then + testGcpProject=`gcloud config get project` +fi +if [[ -n "${testGcpProject}" ]]; then + python3 scripts/manage_cloud_functions.py \ + --project-id ${testGcpProject} \ + --recency-cutoff 12 \ + cleanup \ + --number 10 +fi diff --git a/bigframes/session/__init__.py b/bigframes/session/__init__.py index e52e2ef17f..d8d491a97d 100644 --- a/bigframes/session/__init__.py +++ b/bigframes/session/__init__.py @@ -275,6 +275,10 @@ def __init__( metrics=self._metrics, ) + def __del__(self): + """Automatic cleanup of internal resources""" + self.close() + @property def bqclient(self): return self._clients_provider.bqclient From 6d81a35da61b540ca0a35e0eadddb489d00381a0 Mon Sep 17 00:00:00 2001 From: Shobhit Singh Date: Thu, 10 Oct 2024 00:30:02 +0000 Subject: [PATCH 4/5] move cleanup to a nox session --- .kokoro/build.sh | 16 ---------------- .kokoro/continuous/doctest.cfg | 2 +- .kokoro/continuous/notebook.cfg | 2 +- .kokoro/load/notebook.cfg | 2 +- .kokoro/presubmit/doctest.cfg | 2 +- .kokoro/presubmit/notebook.cfg | 2 +- noxfile.py | 33 +++++++++++++++++++++++++++++++-- 7 files changed, 36 insertions(+), 23 deletions(-) diff --git a/.kokoro/build.sh b/.kokoro/build.sh index 4a2c8a570e..58eaa7fedf 100755 --- a/.kokoro/build.sh +++ b/.kokoro/build.sh @@ -50,19 +50,3 @@ if [[ -n "${NOX_SESSION:-}" ]]; then else python3 -m nox --stop-on-first-error fi - -# In the end, cleanup a few stale (more than 12 hours old) temporary cloud run -# functions created by bigframems. This will keep the test GCP project within -# the "Number of functions" quota -# https://cloud.google.com/functions/quotas#resource_limits -testGcpProject=${GOOGLE_CLOUD_PROJECT} -if [[ -z "${testGcpProject}" ]]; then - testGcpProject=`gcloud config get project` -fi -if [[ -n "${testGcpProject}" ]]; then - python3 scripts/manage_cloud_functions.py \ - --project-id ${testGcpProject} \ - --recency-cutoff 12 \ - cleanup \ - --number 10 -fi diff --git a/.kokoro/continuous/doctest.cfg b/.kokoro/continuous/doctest.cfg index dca21d43fd..6016700408 100644 --- a/.kokoro/continuous/doctest.cfg +++ b/.kokoro/continuous/doctest.cfg @@ -3,7 +3,7 @@ # Only run this nox session. env_vars: { key: "NOX_SESSION" - value: "doctest" + value: "doctest cleanup" } env_vars: { diff --git a/.kokoro/continuous/notebook.cfg b/.kokoro/continuous/notebook.cfg index cc73c3bea4..718ff9fda6 100644 --- a/.kokoro/continuous/notebook.cfg +++ b/.kokoro/continuous/notebook.cfg @@ -3,7 +3,7 @@ # Only run this nox session. env_vars: { key: "NOX_SESSION" - value: "notebook" + value: "notebook cleanup" } env_vars: { diff --git a/.kokoro/load/notebook.cfg b/.kokoro/load/notebook.cfg index c14297019a..e4b86890a2 100644 --- a/.kokoro/load/notebook.cfg +++ b/.kokoro/load/notebook.cfg @@ -3,7 +3,7 @@ # Only run this nox session. env_vars: { key: "NOX_SESSION" - value: "notebook" + value: "notebook cleanup" } env_vars: { diff --git a/.kokoro/presubmit/doctest.cfg b/.kokoro/presubmit/doctest.cfg index dca21d43fd..6016700408 100644 --- a/.kokoro/presubmit/doctest.cfg +++ b/.kokoro/presubmit/doctest.cfg @@ -3,7 +3,7 @@ # Only run this nox session. env_vars: { key: "NOX_SESSION" - value: "doctest" + value: "doctest cleanup" } env_vars: { diff --git a/.kokoro/presubmit/notebook.cfg b/.kokoro/presubmit/notebook.cfg index cc73c3bea4..718ff9fda6 100644 --- a/.kokoro/presubmit/notebook.cfg +++ b/.kokoro/presubmit/notebook.cfg @@ -3,7 +3,7 @@ # Only run this nox session. env_vars: { key: "NOX_SESSION" - value: "notebook" + value: "notebook cleanup" } env_vars: { diff --git a/noxfile.py b/noxfile.py index 92f8acad7f..a5edb29ccc 100644 --- a/noxfile.py +++ b/noxfile.py @@ -105,6 +105,7 @@ "system-3.9", "system-3.12", "cover", + "cleanup", ] # Error if a python version is missing @@ -697,8 +698,8 @@ def system_prerelease(session: nox.sessions.Session): @nox.session(python=SYSTEM_TEST_PYTHON_VERSIONS) def notebook(session: nox.Session): - GOOGLE_CLOUD_PROJECT = os.getenv("GOOGLE_CLOUD_PROJECT") - if not GOOGLE_CLOUD_PROJECT: + google_cloud_project = os.getenv("GOOGLE_CLOUD_PROJECT") + if not google_cloud_project: session.error( "Set GOOGLE_CLOUD_PROJECT environment variable to run notebook session." ) @@ -937,3 +938,31 @@ def release_dry_run(session): ): env["PROJECT_ROOT"] = "." session.run(".kokoro/release-nightly.sh", "--dry-run", env=env) + + +@nox.session(python=DEFAULT_PYTHON_VERSION) +def cleanup(session): + """Clean up stale and/or temporary resources in the test project.""" + google_cloud_project = os.getenv("GOOGLE_CLOUD_PROJECT") + if not google_cloud_project: + session.error( + "Set GOOGLE_CLOUD_PROJECT environment variable to run notebook session." + ) + + # Cleanup a few stale (more than 12 hours old) temporary cloud run + # functions created by bigframems. This will help keeping the test GCP + # project within the "Number of functions" quota + # https://cloud.google.com/functions/quotas#resource_limits + recency_cutoff_hours = 12 + cleanup_count_per_location = 10 + + session.install("-e", ".") + + session.run( + "python", + "scripts/manage_cloud_functions.py", + f"--project-id={google_cloud_project}", + f"--recency-cutoff={recency_cutoff_hours}", + "cleanup", + f"--number={cleanup_count_per_location}", + ) From fdba42e6326c1e4f03d530de61ed3242f90a9962 Mon Sep 17 00:00:00 2001 From: Shobhit Singh Date: Thu, 10 Oct 2024 06:06:55 +0000 Subject: [PATCH 5/5] run cleanup nox session only with doctest as doctest finishes faster --- .kokoro/continuous/notebook.cfg | 2 +- .kokoro/load/notebook.cfg | 2 +- .kokoro/presubmit/notebook.cfg | 2 +- noxfile.py | 1 - 4 files changed, 3 insertions(+), 4 deletions(-) diff --git a/.kokoro/continuous/notebook.cfg b/.kokoro/continuous/notebook.cfg index 718ff9fda6..cc73c3bea4 100644 --- a/.kokoro/continuous/notebook.cfg +++ b/.kokoro/continuous/notebook.cfg @@ -3,7 +3,7 @@ # Only run this nox session. env_vars: { key: "NOX_SESSION" - value: "notebook cleanup" + value: "notebook" } env_vars: { diff --git a/.kokoro/load/notebook.cfg b/.kokoro/load/notebook.cfg index e4b86890a2..c14297019a 100644 --- a/.kokoro/load/notebook.cfg +++ b/.kokoro/load/notebook.cfg @@ -3,7 +3,7 @@ # Only run this nox session. env_vars: { key: "NOX_SESSION" - value: "notebook cleanup" + value: "notebook" } env_vars: { diff --git a/.kokoro/presubmit/notebook.cfg b/.kokoro/presubmit/notebook.cfg index 718ff9fda6..cc73c3bea4 100644 --- a/.kokoro/presubmit/notebook.cfg +++ b/.kokoro/presubmit/notebook.cfg @@ -3,7 +3,7 @@ # Only run this nox session. env_vars: { key: "NOX_SESSION" - value: "notebook cleanup" + value: "notebook" } env_vars: { diff --git a/noxfile.py b/noxfile.py index a5edb29ccc..ef4bf1a37a 100644 --- a/noxfile.py +++ b/noxfile.py @@ -105,7 +105,6 @@ "system-3.9", "system-3.12", "cover", - "cleanup", ] # Error if a python version is missing