diff --git a/bigframes/core/groupby/__init__.py b/bigframes/core/groupby/__init__.py index 22ef11dd19..18cb83fa18 100644 --- a/bigframes/core/groupby/__init__.py +++ b/bigframes/core/groupby/__init__.py @@ -19,6 +19,7 @@ import pandas as pd import bigframes.constants as constants +from bigframes.core import log_adapter import bigframes.core as core import bigframes.core.block_transforms as block_ops import bigframes.core.blocks as blocks @@ -33,6 +34,7 @@ import third_party.bigframes_vendored.pandas.core.groupby as vendored_pandas_groupby +@log_adapter.class_logger class DataFrameGroupBy(vendored_pandas_groupby.DataFrameGroupBy): __doc__ = vendored_pandas_groupby.GroupBy.__doc__ @@ -406,6 +408,7 @@ def _resolve_label(self, label: blocks.Label) -> str: return col_ids[0] +@log_adapter.class_logger class SeriesGroupBy(vendored_pandas_groupby.SeriesGroupBy): __doc__ = vendored_pandas_groupby.GroupBy.__doc__ diff --git a/bigframes/core/log_adapter.py b/bigframes/core/log_adapter.py new file mode 100644 index 0000000000..b790d19562 --- /dev/null +++ b/bigframes/core/log_adapter.py @@ -0,0 +1,61 @@ +# Copyright 2023 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import functools +import threading +from typing import List + +_lock = threading.Lock() +MAX_LABELS_COUNT = 64 +_api_methods: List = [] + + +def class_logger(decorated_cls): + """Decorator that adds logging functionality to each method of the class.""" + for attr_name, attr_value in decorated_cls.__dict__.items(): + if callable(attr_value): + setattr(decorated_cls, attr_name, method_logger(attr_value)) + return decorated_cls + + +def method_logger(method): + """Decorator that adds logging functionality to a method.""" + + @functools.wraps(method) + def wrapper(*args, **kwargs): + api_method_name = str(method.__name__) + # Track regular and "dunder" methods + if api_method_name.startswith("__") or not api_method_name.startswith("_"): + add_api_method(api_method_name) + return method(*args, **kwargs) + + return wrapper + + +def add_api_method(api_method_name): + global _lock + global _api_methods + with _lock: + # Push the method to the front of the _api_methods list + _api_methods.insert(0, api_method_name) + # Keep the list length within the maximum limit (adjust MAX_LABELS_COUNT as needed) + _api_methods = _api_methods[:MAX_LABELS_COUNT] + + +def get_and_reset_api_methods(): + global _lock + with _lock: + previous_api_methods = list(_api_methods) + _api_methods.clear() + return previous_api_methods diff --git a/bigframes/core/nodes.py b/bigframes/core/nodes.py index 050d356239..44a8d808ff 100644 --- a/bigframes/core/nodes.py +++ b/bigframes/core/nodes.py @@ -11,6 +11,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. + from __future__ import annotations from dataclasses import dataclass, field diff --git a/bigframes/core/window/__init__.py b/bigframes/core/window/__init__.py index d3d081124e..240715b6df 100644 --- a/bigframes/core/window/__init__.py +++ b/bigframes/core/window/__init__.py @@ -16,12 +16,14 @@ import typing +from bigframes.core import log_adapter import bigframes.core as core import bigframes.core.blocks as blocks import bigframes.operations.aggregations as agg_ops import third_party.bigframes_vendored.pandas.core.window.rolling as vendored_pandas_rolling +@log_adapter.class_logger class Window(vendored_pandas_rolling.Window): __doc__ = vendored_pandas_rolling.Window.__doc__ diff --git a/bigframes/dataframe.py b/bigframes/dataframe.py index 4716de48d6..57b4ca42cf 100644 --- a/bigframes/dataframe.py +++ b/bigframes/dataframe.py @@ -41,6 +41,7 @@ import bigframes._config.display_options as display_options import bigframes.constants as constants import bigframes.core +from bigframes.core import log_adapter import bigframes.core.block_transforms as block_ops import bigframes.core.blocks as blocks import bigframes.core.groupby as groupby @@ -81,6 +82,7 @@ # Inherits from pandas DataFrame so that we can use the same docstrings. +@log_adapter.class_logger class DataFrame(vendored_pandas_frame.DataFrame): __doc__ = vendored_pandas_frame.DataFrame.__doc__ diff --git a/bigframes/operations/datetimes.py b/bigframes/operations/datetimes.py index 1b20c2d593..a8a33beb57 100644 --- a/bigframes/operations/datetimes.py +++ b/bigframes/operations/datetimes.py @@ -14,12 +14,14 @@ from __future__ import annotations +from bigframes.core import log_adapter import bigframes.operations as ops import bigframes.operations.base import bigframes.series as series import third_party.bigframes_vendored.pandas.core.indexes.accessor as vendordt +@log_adapter.class_logger class DatetimeMethods( bigframes.operations.base.SeriesMethods, vendordt.DatetimeProperties ): diff --git a/bigframes/operations/strings.py b/bigframes/operations/strings.py index 0545ea34d6..201b19abe8 100644 --- a/bigframes/operations/strings.py +++ b/bigframes/operations/strings.py @@ -18,6 +18,7 @@ from typing import cast, Literal, Optional, Union import bigframes.constants as constants +from bigframes.core import log_adapter import bigframes.dataframe as df import bigframes.operations as ops import bigframes.operations.base @@ -32,6 +33,7 @@ } +@log_adapter.class_logger class StringMethods(bigframes.operations.base.SeriesMethods, vendorstr.StringMethods): __doc__ = vendorstr.StringMethods.__doc__ diff --git a/bigframes/operations/structs.py b/bigframes/operations/structs.py index 506a557709..b2ae98f378 100644 --- a/bigframes/operations/structs.py +++ b/bigframes/operations/structs.py @@ -18,6 +18,7 @@ import ibis.expr.types as ibis_types +from bigframes.core import log_adapter import bigframes.dataframe import bigframes.operations import bigframes.operations.base @@ -38,6 +39,7 @@ def _as_ibis(self, x: ibis_types.Value): return struct_value[name].name(name) +@log_adapter.class_logger class StructAccessor( bigframes.operations.base.SeriesMethods, vendoracessors.StructAccessor ): diff --git a/bigframes/series.py b/bigframes/series.py index 2cffdd5fce..c929775a00 100644 --- a/bigframes/series.py +++ b/bigframes/series.py @@ -30,6 +30,7 @@ import bigframes.constants as constants import bigframes.core +from bigframes.core import log_adapter import bigframes.core.block_transforms as block_ops import bigframes.core.blocks as blocks import bigframes.core.groupby as groupby @@ -55,6 +56,7 @@ LevelsType = typing.Union[LevelType, typing.Sequence[LevelType]] +@log_adapter.class_logger class Series(bigframes.operations.base.SeriesMethods, vendored_pandas_series.Series): def __init__(self, *args, **kwargs): self._query_job: Optional[bigquery.QueryJob] = None diff --git a/bigframes/session/__init__.py b/bigframes/session/__init__.py index 1b12e5deeb..069bd5d260 100644 --- a/bigframes/session/__init__.py +++ b/bigframes/session/__init__.py @@ -64,6 +64,7 @@ import bigframes._config.bigquery_options as bigquery_options import bigframes.constants as constants +from bigframes.core import log_adapter import bigframes.core as core import bigframes.core.blocks as blocks import bigframes.core.guid as guid @@ -1347,6 +1348,10 @@ def _start_query( Starts query job and waits for results. """ job_config = self._prepare_job_config(job_config) + api_methods = log_adapter.get_and_reset_api_methods() + job_config.labels = bigframes_io.create_job_configs_labels( + job_configs_labels=job_config.labels, api_methods=api_methods + ) query_job = self.bqclient.query(sql, job_config=job_config) opts = bigframes.options.display @@ -1381,6 +1386,8 @@ def _prepare_job_config( ) -> bigquery.QueryJobConfig: if job_config is None: job_config = self.bqclient.default_query_job_config + if job_config is None: + job_config = bigquery.QueryJobConfig() if bigframes.options.compute.maximum_bytes_billed is not None: job_config.maximum_bytes_billed = ( bigframes.options.compute.maximum_bytes_billed diff --git a/bigframes/session/_io/bigquery.py b/bigframes/session/_io/bigquery.py index badc91e356..dae73301e7 100644 --- a/bigframes/session/_io/bigquery.py +++ b/bigframes/session/_io/bigquery.py @@ -17,17 +17,36 @@ from __future__ import annotations import datetime +import itertools import textwrap import types -from typing import Dict, Iterable, Optional, Union +from typing import Dict, Iterable, Optional, Sequence, Union import uuid import google.cloud.bigquery as bigquery IO_ORDERING_ID = "bqdf_row_nums" +MAX_LABELS_COUNT = 64 TEMP_TABLE_PREFIX = "bqdf{date}_{random_id}" +def create_job_configs_labels( + job_configs_labels: Optional[Dict[str, str]], + api_methods: Sequence[str], +) -> Dict[str, str]: + if job_configs_labels is None: + job_configs_labels = {} + + labels = list( + itertools.chain( + job_configs_labels.keys(), + (f"recent-bigframes-api-{i}" for i in range(len(api_methods))), + ) + ) + values = list(itertools.chain(job_configs_labels.values(), api_methods)) + return dict(zip(labels[:MAX_LABELS_COUNT], values[:MAX_LABELS_COUNT])) + + def create_export_csv_statement( table_id: str, uri: str, field_delimiter: str, header: bool ) -> str: diff --git a/tests/unit/core/test_log_adapter.py b/tests/unit/core/test_log_adapter.py new file mode 100644 index 0000000000..376b7f2075 --- /dev/null +++ b/tests/unit/core/test_log_adapter.py @@ -0,0 +1,60 @@ +# Copyright 2023 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import pytest + +from bigframes.core import log_adapter + +MAX_LABELS_COUNT = 64 + + +@pytest.fixture +def test_instance(): + # Create a simple class for testing + @log_adapter.class_logger + class TestClass: + def method1(self): + pass + + def method2(self): + pass + + return TestClass() + + +def test_method_logging(test_instance): + test_instance.method1() + test_instance.method2() + + # Check if the methods were added to the _api_methods list + api_methods = log_adapter.get_and_reset_api_methods() + assert api_methods is not None + assert "method1" in api_methods + assert "method2" in api_methods + + +def test_add_api_method_limit(test_instance): + # Ensure that add_api_method correctly adds a method to _api_methods + for i in range(70): + test_instance.method2() + assert len(log_adapter._api_methods) == MAX_LABELS_COUNT + + +def test_get_and_reset_api_methods(test_instance): + # Ensure that get_and_reset_api_methods returns a copy and resets the list + test_instance.method1() + test_instance.method2() + previous_methods = log_adapter.get_and_reset_api_methods() + assert previous_methods is not None + assert log_adapter._api_methods == [] diff --git a/tests/unit/session/test_io_bigquery.py b/tests/unit/session/test_io_bigquery.py index 03470208e4..e1481d3f05 100644 --- a/tests/unit/session/test_io_bigquery.py +++ b/tests/unit/session/test_io_bigquery.py @@ -19,7 +19,113 @@ import google.cloud.bigquery as bigquery import pytest -import bigframes.session._io.bigquery +import bigframes +from bigframes.core import log_adapter +import bigframes.pandas as bpd +import bigframes.session._io.bigquery as io_bq + + +def test_create_job_configs_labels_is_none(): + api_methods = ["agg", "series-mode"] + labels = io_bq.create_job_configs_labels( + job_configs_labels=None, api_methods=api_methods + ) + expected_dict = { + "recent-bigframes-api-0": "agg", + "recent-bigframes-api-1": "series-mode", + } + assert labels is not None + assert labels == expected_dict + + +def test_create_job_configs_labels_length_limit_not_met(): + cur_labels = { + "bigframes-api": "read_pandas", + "source": "bigquery-dataframes-temp", + } + api_methods = ["agg", "series-mode"] + labels = io_bq.create_job_configs_labels( + job_configs_labels=cur_labels, api_methods=api_methods + ) + expected_dict = { + "bigframes-api": "read_pandas", + "source": "bigquery-dataframes-temp", + "recent-bigframes-api-0": "agg", + "recent-bigframes-api-1": "series-mode", + } + assert labels is not None + assert len(labels) == 4 + assert labels == expected_dict + + +def test_create_job_configs_labels_log_adaptor_call_method_under_length_limit(): + cur_labels = { + "bigframes-api": "read_pandas", + "source": "bigquery-dataframes-temp", + } + df = bpd.DataFrame({"col1": [1, 2], "col2": [3, 4]}) + # Test running two methods + df.head() + df.max() + api_methods = log_adapter._api_methods + + labels = io_bq.create_job_configs_labels( + job_configs_labels=cur_labels, api_methods=api_methods + ) + expected_dict = { + "bigframes-api": "read_pandas", + "source": "bigquery-dataframes-temp", + "recent-bigframes-api-0": "__init__", + "recent-bigframes-api-1": "max", + "recent-bigframes-api-2": "__init__", + "recent-bigframes-api-3": "head", + "recent-bigframes-api-4": "__init__", + } + assert labels is not None + assert len(labels) == 7 + assert labels == expected_dict + + +def test_create_job_configs_labels_length_limit_met_and_labels_is_none(): + df = bpd.DataFrame({"col1": [1, 2], "col2": [3, 4]}) + # Test running methods more than the labels' length limit + for i in range(66): + df.head() + api_methods = log_adapter._api_methods + + labels = io_bq.create_job_configs_labels( + job_configs_labels=None, api_methods=api_methods + ) + assert labels is not None + assert len(labels) == 64 + assert "head" in labels.values() + + +def test_create_job_configs_labels_length_limit_met(): + cur_labels = { + "bigframes-api": "read_pandas", + "source": "bigquery-dataframes-temp", + } + for i in range(60): + key = f"bigframes-api-test-{i}" + value = f"test{i}" + cur_labels[key] = value + # If cur_labels length is 62, we can only add one label from api_methods + df = bpd.DataFrame({"col1": [1, 2], "col2": [3, 4]}) + # Test running two methods + df.head() + df.max() + api_methods = log_adapter._api_methods + + labels = io_bq.create_job_configs_labels( + job_configs_labels=cur_labels, api_methods=api_methods + ) + assert labels is not None + assert len(labels) == 64 + assert "max" in labels.values() + assert "head" not in labels.values() + assert "bigframes-api" in labels.keys() + assert "source" in labels.keys() def test_create_snapshot_sql_doesnt_timetravel_anonymous_datasets(): @@ -125,5 +231,5 @@ def test_create_temp_table_default_expiration(): ), ) def test_bq_schema_to_sql(schema: Iterable[bigquery.SchemaField], expected: str): - sql = bigframes.session._io.bigquery.bq_schema_to_sql(schema) + sql = io_bq.bq_schema_to_sql(schema) assert sql == expected diff --git a/tests/unit/test_compute_options.py b/tests/unit/test_compute_options.py index 499a0a5fef..a613bca7b9 100644 --- a/tests/unit/test_compute_options.py +++ b/tests/unit/test_compute_options.py @@ -18,13 +18,9 @@ def test_maximum_bytes_option(): session = resources.create_bigquery_session() - num_query_calls = 0 with bf.option_context("compute.maximum_bytes_billed", 10000): - # clear initial method calls - session.bqclient.method_calls = [] + session.bqclient.query.reset_mock() session._start_query("query") - for call in session.bqclient.method_calls: - _, _, kwargs = call - num_query_calls += 1 - assert kwargs["job_config"].maximum_bytes_billed == 10000 - assert num_query_calls > 0 + call = session.bqclient.query.call_args + assert call.kwargs["job_config"].maximum_bytes_billed == 10000 + session.bqclient.query.assert_called_once()