Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Appearance settings

fix!: make dataset and name params mandatory in udf #1619

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We鈥檒l occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Apr 16, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions 1 README.rst
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ Version 2.0 introduces breaking changes for improved security and performance. K
``cloud_function_service_account="default"``. And network ingress now defaults to ``"internal-only"``.
* **@remote_function Argument Passing:** Arguments other than ``input_types``, ``output_type``, and ``dataset``
to ``remote_function`` must now be passed using keyword syntax, as positional arguments are no longer supported.
* **@udf Argument Passing:** Arguments ``dataset`` and ``name`` to ``udf`` are now mandatory.
* **Endpoint Connections:** Automatic fallback to locational endpoints in certain regions is removed.
* **LLM Updates (Gemini Integration):** Integrations now default to the ``gemini-2.0-flash-001`` model.
PaLM2 support has been removed; please migrate any existing PaLM2 usage to Gemini. **Note:** The current default
Expand Down
2 changes: 1 addition & 1 deletion 2 bigframes/functions/_function_session.py
Original file line number Diff line number Diff line change
Expand Up @@ -793,7 +793,7 @@ def udf(
``bigframes.pandas.reset_session``/
``bigframes.pandas.clean_up_by_session_id``) does not clean up
the function, and leaves it for the user to manage the function
and the associated cloud function directly.
directly.
packages (str[], Optional):
Explicit name of the external package dependencies. Each
dependency is added to the `requirements.txt` as is, and can be
Expand Down
4 changes: 2 additions & 2 deletions 4 bigframes/pandas/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -117,9 +117,9 @@ def udf(
*,
input_types: Union[None, type, Sequence[type]] = None,
output_type: Optional[type] = None,
dataset: Optional[str] = None,
dataset: str,
bigquery_connection: Optional[str] = None,
name: Optional[str] = None,
name: str,
packages: Optional[Sequence[str]] = None,
):
return global_session.with_default_session(
Expand Down
19 changes: 9 additions & 10 deletions 19 bigframes/session/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -1444,9 +1444,9 @@ def udf(
*,
input_types: Union[None, type, Sequence[type]] = None,
output_type: Optional[type] = None,
dataset: Optional[str] = None,
dataset: str,
bigquery_connection: Optional[str] = None,
name: Optional[str] = None,
name: str,
packages: Optional[Sequence[str]] = None,
):
"""Decorator to turn a Python user defined function (udf) into a
Expand All @@ -1473,11 +1473,10 @@ def udf(
be specified. The supported output types are `bool`, `bytes`,
`float`, `int`, `str`, `list[bool]`, `list[float]`, `list[int]`
and `list[str]`.
dataset (str, Optional):
dataset (str):
Dataset in which to create a BigQuery managed function. It
should be in `<project_id>.<dataset_name>` or `<dataset_name>`
format. If this parameter is not provided then session dataset
id is used.
format.
bigquery_connection (str, Optional):
Name of the BigQuery connection. It is used to provide an
identity to the serverless instances running the user code. It
Expand All @@ -1489,18 +1488,18 @@ def udf(
will be created without any connection. A udf without a
connection has no internet access and no access to other GCP
services.
name (str, Optional):
name (str):
Explicit name of the persisted BigQuery managed function. Use it
with caution, because more than one users working in the same
project and dataset could overwrite each other's managed
functions if they use the same persistent name. When an explicit
name is provided, any session specific clean up (
functions if they use the same persistent name. Please note that
any session specific clean up (
``bigframes.session.Session.close``/
``bigframes.pandas.close_session``/
``bigframes.pandas.reset_session``/
``bigframes.pandas.clean_up_by_session_id``) does not clean up
the function, and leaves it for the user to manage the function
and the associated cloud function directly.
this function, and leaves it for the user to manage the function
directly.
packages (str[], Optional):
Explicit name of the external package dependencies. Each
dependency is added to the `requirements.txt` as is, and can be
Expand Down
55 changes: 39 additions & 16 deletions 55 tests/system/large/functions/test_managed_function.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,12 +16,15 @@
import pandas
import pyarrow
import pytest
import test_utils.prefixer

import bigframes
import bigframes.exceptions as bfe
import bigframes.pandas as bpd
from tests.system.utils import cleanup_function_assets

prefixer = test_utils.prefixer.Prefixer("bigframes", "")


def test_managed_function_multiply_with_ibis(
session,
Expand All @@ -37,6 +40,7 @@ def test_managed_function_multiply_with_ibis(
input_types=[int, int],
output_type=int,
dataset=dataset_id,
name=prefixer.create_prefix(),
)
def multiply(x, y):
return x * y
Expand Down Expand Up @@ -87,6 +91,7 @@ def test_managed_function_stringify_with_ibis(
input_types=[int],
output_type=str,
dataset=dataset_id,
name=prefixer.create_prefix(),
)
def stringify(x):
return f"I got {x}"
Expand Down Expand Up @@ -123,7 +128,10 @@ def stringify(x):
def test_managed_function_array_output(session, scalars_dfs, dataset_id):
try:

@session.udf(dataset=dataset_id)
@session.udf(
dataset=dataset_id,
name=prefixer.create_prefix(),
)
def featurize(x: int) -> list[float]:
return [float(i) for i in [x, x + 1, x + 2]]

Expand Down Expand Up @@ -160,10 +168,10 @@ def featurize(x: int) -> list[float]:
cleanup_function_assets(featurize, session.bqclient, ignore_failures=False)


def test_managed_function_series_apply(session, scalars_dfs):
def test_managed_function_series_apply(session, dataset_id, scalars_dfs):
try:

@session.udf()
@session.udf(dataset=dataset_id, name=prefixer.create_prefix())
def foo(x: int) -> bytes:
return bytes(abs(x))

Expand Down Expand Up @@ -214,13 +222,14 @@ def foo(x: int) -> bytes:

def test_managed_function_series_apply_array_output(
session,
dataset_id,
scalars_dfs,
):
try:

with pytest.warns(bfe.PreviewWarning, match="udf is in preview."):

@session.udf()
@session.udf(dataset=dataset_id, name=prefixer.create_prefix())
def foo_list(x: int) -> list[float]:
return [float(abs(x)), float(abs(x) + 1)]

Expand All @@ -243,7 +252,7 @@ def foo_list(x: int) -> list[float]:
cleanup_function_assets(foo_list, session.bqclient, ignore_failures=False)


def test_managed_function_series_combine(session, scalars_dfs):
def test_managed_function_series_combine(session, dataset_id, scalars_dfs):
try:
# This function is deliberately written to not work with NA input.
def add(x: int, y: int) -> int:
Expand All @@ -258,7 +267,9 @@ def add(x: int, y: int) -> int:
# make sure there are NA values in the test column.
assert any([pandas.isna(val) for val in bf_df[int_col_name_with_nulls]])

add_managed_func = session.udf()(add)
add_managed_func = session.udf(
dataset=dataset_id, name=prefixer.create_prefix()
)(add)

# with nulls in the series the managed function application would fail.
with pytest.raises(
Expand Down Expand Up @@ -301,7 +312,7 @@ def add(x: int, y: int) -> int:
)


def test_managed_function_series_combine_array_output(session, scalars_dfs):
def test_managed_function_series_combine_array_output(session, dataset_id, scalars_dfs):
try:

def add_list(x: int, y: int) -> list[int]:
Expand All @@ -316,7 +327,9 @@ def add_list(x: int, y: int) -> list[int]:
# Make sure there are NA values in the test column.
assert any([pandas.isna(val) for val in bf_df[int_col_name_with_nulls]])

add_list_managed_func = session.udf()(add_list)
add_list_managed_func = session.udf(
dataset=dataset_id, name=prefixer.create_prefix()
)(add_list)

# After filtering out nulls the managed function application should work
# similar to pandas.
Expand Down Expand Up @@ -364,7 +377,7 @@ def add_list(x: int, y: int) -> list[int]:
)


def test_managed_function_dataframe_map(session, scalars_dfs):
def test_managed_function_dataframe_map(session, dataset_id, scalars_dfs):
try:

def add_one(x):
Expand All @@ -373,6 +386,8 @@ def add_one(x):
mf_add_one = session.udf(
input_types=[int],
output_type=int,
dataset=dataset_id,
name=prefixer.create_prefix(),
)(add_one)

scalars_df, scalars_pandas_df = scalars_dfs
Expand All @@ -398,9 +413,7 @@ def add_one(x):
cleanup_function_assets(mf_add_one, session.bqclient, ignore_failures=False)


def test_managed_function_dataframe_map_array_output(
session, scalars_dfs, dataset_id_permanent
):
def test_managed_function_dataframe_map_array_output(session, scalars_dfs, dataset_id):
try:

def add_one_list(x):
Expand All @@ -409,6 +422,8 @@ def add_one_list(x):
mf_add_one_list = session.udf(
input_types=[int],
output_type=list[int],
dataset=dataset_id,
name=prefixer.create_prefix(),
)(add_one_list)

scalars_df, scalars_pandas_df = scalars_dfs
Expand Down Expand Up @@ -439,7 +454,7 @@ def add_one_list(x):
)


def test_managed_function_dataframe_apply_axis_1(session, scalars_dfs):
def test_managed_function_dataframe_apply_axis_1(session, dataset_id, scalars_dfs):
try:
scalars_df, scalars_pandas_df = scalars_dfs
series = scalars_df["int64_too"]
Expand All @@ -451,6 +466,8 @@ def add_ints(x, y):
add_ints_mf = session.udf(
input_types=[int, int],
output_type=int,
dataset=dataset_id,
name=prefixer.create_prefix(),
)(add_ints)
assert add_ints_mf.bigframes_bigquery_function # type: ignore

Expand All @@ -475,7 +492,7 @@ def add_ints(x, y):
cleanup_function_assets(add_ints_mf, session.bqclient, ignore_failures=False)


def test_managed_function_dataframe_apply_axis_1_array_output(session):
def test_managed_function_dataframe_apply_axis_1_array_output(session, dataset_id):
bf_df = bigframes.dataframe.DataFrame(
{
"Id": [1, 2, 3],
Expand All @@ -498,6 +515,8 @@ def test_managed_function_dataframe_apply_axis_1_array_output(session):
@session.udf(
input_types=[int, float, str],
output_type=list[str],
dataset=dataset_id,
name=prefixer.create_prefix(),
)
def foo(x, y, z):
return [str(x), str(y), z]
Expand Down Expand Up @@ -591,12 +610,16 @@ def foo(x, y, z):
],
)
def test_managed_function_with_connection(
session, scalars_dfs, request, connection_fixture
session, scalars_dfs, dataset_id, request, connection_fixture
):
try:
bigquery_connection = request.getfixturevalue(connection_fixture)

@session.udf(bigquery_connection=bigquery_connection)
@session.udf(
bigquery_connection=bigquery_connection,
dataset=dataset_id,
name=prefixer.create_prefix(),
)
def foo(x: int) -> int:
return x + 10

Expand Down
Morty Proxy This is a proxified and sanitized view of the page, visit original site.