googleapis · tswast · Apr 16, 2025 · Apr 16, 2025
@@ -25,6 +25,7 @@ Version 2.0 introduces breaking changes for improved security and performance. K
  ``cloud_function_service_account="default"``. And network ingress now defaults to ``"internal-only"``.
 * **@remote_function Argument Passing:** Arguments other than ``input_types``, ``output_type``, and ``dataset``
  to ``remote_function`` must now be passed using keyword syntax, as positional arguments are no longer supported.
+* **@udf Argument Passing:** Arguments ``dataset`` and ``name`` to ``udf`` are now mandatory.
 * **Endpoint Connections:** Automatic fallback to locational endpoints in certain regions is removed.
 * **LLM Updates (Gemini Integration):** Integrations now default to the ``gemini-2.0-flash-001`` model.
  PaLM2 support has been removed; please migrate any existing PaLM2 usage to Gemini. **Note:** The current default

@@ -793,7 +793,7 @@ def udf(
                ``bigframes.pandas.reset_session``/
                ``bigframes.pandas.clean_up_by_session_id``) does not clean up
                the function, and leaves it for the user to manage the function
-                and the associated cloud function directly.
+                directly.
            packages (str[], Optional):
                Explicit name of the external package dependencies. Each
                dependency is added to the `requirements.txt` as is, and can be

@@ -117,9 +117,9 @@ def udf(
    *,
    input_types: Union[None, type, Sequence[type]] = None,
    output_type: Optional[type] = None,
-    dataset: Optional[str] = None,
+    dataset: str,
    bigquery_connection: Optional[str] = None,
-    name: Optional[str] = None,
+    name: str,
    packages: Optional[Sequence[str]] = None,
 ):
    return global_session.with_default_session(

@@ -1444,9 +1444,9 @@ def udf(
        *,
        input_types: Union[None, type, Sequence[type]] = None,
        output_type: Optional[type] = None,
-        dataset: Optional[str] = None,
+        dataset: str,
        bigquery_connection: Optional[str] = None,
-        name: Optional[str] = None,
+        name: str,
        packages: Optional[Sequence[str]] = None,
    ):
        """Decorator to turn a Python user defined function (udf) into a
@@ -1473,11 +1473,10 @@ def udf(
                be specified. The supported output types are `bool`, `bytes`,
                `float`, `int`, `str`, `list[bool]`, `list[float]`, `list[int]`
                and `list[str]`.
-            dataset (str, Optional):
+            dataset (str):
                Dataset in which to create a BigQuery managed function. It
                should be in `<project_id>.<dataset_name>` or `<dataset_name>`
-                format. If this parameter is not provided then session dataset
-                id is used.
+                format.
            bigquery_connection (str, Optional):
                Name of the BigQuery connection. It is used to provide an
                identity to the serverless instances running the user code. It
@@ -1489,18 +1488,18 @@ def udf(
                will be created without any connection. A udf without a
                connection has no internet access and no access to other GCP
                services.
-            name (str, Optional):
+            name (str):
                Explicit name of the persisted BigQuery managed function. Use it
                with caution, because more than one users working in the same
                project and dataset could overwrite each other's managed
-                functions if they use the same persistent name. When an explicit
-                name is provided, any session specific clean up (
+                functions if they use the same persistent name. Please note that
+                any session specific clean up (
                ``bigframes.session.Session.close``/
                ``bigframes.pandas.close_session``/
                ``bigframes.pandas.reset_session``/
                ``bigframes.pandas.clean_up_by_session_id``) does not clean up
-                the function, and leaves it for the user to manage the function
-                and the associated cloud function directly.
+                this function, and leaves it for the user to manage the function
+                directly.
            packages (str[], Optional):
                Explicit name of the external package dependencies. Each
                dependency is added to the `requirements.txt` as is, and can be

@@ -16,12 +16,15 @@
 import pandas
 import pyarrow
 import pytest
+import test_utils.prefixer

 import bigframes
 import bigframes.exceptions as bfe
 import bigframes.pandas as bpd
 from tests.system.utils import cleanup_function_assets

+prefixer = test_utils.prefixer.Prefixer("bigframes", "")
+

 def test_managed_function_multiply_with_ibis(
    session,
@@ -37,6 +40,7 @@ def test_managed_function_multiply_with_ibis(
            input_types=[int, int],
            output_type=int,
            dataset=dataset_id,
+            name=prefixer.create_prefix(),
        )
        def multiply(x, y):
            return x * y
@@ -87,6 +91,7 @@ def test_managed_function_stringify_with_ibis(
            input_types=[int],
            output_type=str,
            dataset=dataset_id,
+            name=prefixer.create_prefix(),
        )
        def stringify(x):
            return f"I got {x}"
@@ -123,7 +128,10 @@ def stringify(x):
 def test_managed_function_array_output(session, scalars_dfs, dataset_id):
    try:

-        @session.udf(dataset=dataset_id)
+        @session.udf(
+            dataset=dataset_id,
+            name=prefixer.create_prefix(),
+        )
        def featurize(x: int) -> list[float]:
            return [float(i) for i in [x, x + 1, x + 2]]

@@ -160,10 +168,10 @@ def featurize(x: int) -> list[float]:
        cleanup_function_assets(featurize, session.bqclient, ignore_failures=False)


-def test_managed_function_series_apply(session, scalars_dfs):
+def test_managed_function_series_apply(session, dataset_id, scalars_dfs):
    try:

-        @session.udf()
+        @session.udf(dataset=dataset_id, name=prefixer.create_prefix())
        def foo(x: int) -> bytes:
            return bytes(abs(x))

@@ -214,13 +222,14 @@ def foo(x: int) -> bytes:

 def test_managed_function_series_apply_array_output(
    session,
+    dataset_id,
    scalars_dfs,
 ):
    try:

        with pytest.warns(bfe.PreviewWarning, match="udf is in preview."):

-            @session.udf()
+            @session.udf(dataset=dataset_id, name=prefixer.create_prefix())
            def foo_list(x: int) -> list[float]:
                return [float(abs(x)), float(abs(x) + 1)]

@@ -243,7 +252,7 @@ def foo_list(x: int) -> list[float]:
        cleanup_function_assets(foo_list, session.bqclient, ignore_failures=False)


-def test_managed_function_series_combine(session, scalars_dfs):
+def test_managed_function_series_combine(session, dataset_id, scalars_dfs):
    try:
        # This function is deliberately written to not work with NA input.
        def add(x: int, y: int) -> int:
@@ -258,7 +267,9 @@ def add(x: int, y: int) -> int:
        # make sure there are NA values in the test column.
        assert any([pandas.isna(val) for val in bf_df[int_col_name_with_nulls]])

-        add_managed_func = session.udf()(add)
+        add_managed_func = session.udf(
+            dataset=dataset_id, name=prefixer.create_prefix()
+        )(add)

        # with nulls in the series the managed function application would fail.
        with pytest.raises(
@@ -301,7 +312,7 @@ def add(x: int, y: int) -> int:
        )


-def test_managed_function_series_combine_array_output(session, scalars_dfs):
+def test_managed_function_series_combine_array_output(session, dataset_id, scalars_dfs):
    try:

        def add_list(x: int, y: int) -> list[int]:
@@ -316,7 +327,9 @@ def add_list(x: int, y: int) -> list[int]:
        # Make sure there are NA values in the test column.
        assert any([pandas.isna(val) for val in bf_df[int_col_name_with_nulls]])

-        add_list_managed_func = session.udf()(add_list)
+        add_list_managed_func = session.udf(
+            dataset=dataset_id, name=prefixer.create_prefix()
+        )(add_list)

        # After filtering out nulls the managed function application should work
        # similar to pandas.
@@ -364,7 +377,7 @@ def add_list(x: int, y: int) -> list[int]:
        )


-def test_managed_function_dataframe_map(session, scalars_dfs):
+def test_managed_function_dataframe_map(session, dataset_id, scalars_dfs):
    try:

        def add_one(x):
@@ -373,6 +386,8 @@ def add_one(x):
        mf_add_one = session.udf(
            input_types=[int],
            output_type=int,
+            dataset=dataset_id,
+            name=prefixer.create_prefix(),
        )(add_one)

        scalars_df, scalars_pandas_df = scalars_dfs
@@ -398,9 +413,7 @@ def add_one(x):
        cleanup_function_assets(mf_add_one, session.bqclient, ignore_failures=False)


-def test_managed_function_dataframe_map_array_output(
-    session, scalars_dfs, dataset_id_permanent
-):
+def test_managed_function_dataframe_map_array_output(session, scalars_dfs, dataset_id):
    try:

        def add_one_list(x):
@@ -409,6 +422,8 @@ def add_one_list(x):
        mf_add_one_list = session.udf(
            input_types=[int],
            output_type=list[int],
+            dataset=dataset_id,
+            name=prefixer.create_prefix(),
        )(add_one_list)

        scalars_df, scalars_pandas_df = scalars_dfs
@@ -439,7 +454,7 @@ def add_one_list(x):
        )


-def test_managed_function_dataframe_apply_axis_1(session, scalars_dfs):
+def test_managed_function_dataframe_apply_axis_1(session, dataset_id, scalars_dfs):
    try:
        scalars_df, scalars_pandas_df = scalars_dfs
        series = scalars_df["int64_too"]
@@ -451,6 +466,8 @@ def add_ints(x, y):
        add_ints_mf = session.udf(
            input_types=[int, int],
            output_type=int,
+            dataset=dataset_id,
+            name=prefixer.create_prefix(),
        )(add_ints)
        assert add_ints_mf.bigframes_bigquery_function  # type: ignore

@@ -475,7 +492,7 @@ def add_ints(x, y):
        cleanup_function_assets(add_ints_mf, session.bqclient, ignore_failures=False)


-def test_managed_function_dataframe_apply_axis_1_array_output(session):
+def test_managed_function_dataframe_apply_axis_1_array_output(session, dataset_id):
    bf_df = bigframes.dataframe.DataFrame(
        {
            "Id": [1, 2, 3],
@@ -498,6 +515,8 @@ def test_managed_function_dataframe_apply_axis_1_array_output(session):
        @session.udf(
            input_types=[int, float, str],
            output_type=list[str],
+            dataset=dataset_id,
+            name=prefixer.create_prefix(),
        )
        def foo(x, y, z):
            return [str(x), str(y), z]
@@ -591,12 +610,16 @@ def foo(x, y, z):
    ],
 )
 def test_managed_function_with_connection(
-    session, scalars_dfs, request, connection_fixture
+    session, scalars_dfs, dataset_id, request, connection_fixture
 ):
    try:
        bigquery_connection = request.getfixturevalue(connection_fixture)

-        @session.udf(bigquery_connection=bigquery_connection)
+        @session.udf(
+            bigquery_connection=bigquery_connection,
+            dataset=dataset_id,
+            name=prefixer.create_prefix(),
+        )
        def foo(x: int) -> int:
            return x + 10