From 494e35dbd187e0758834d16e790ca7202cbcc4eb Mon Sep 17 00:00:00 2001
From: Shenyang Cai <sycai@google.com>
Date: Thu, 26 Sep 2024 19:08:03 +0000
Subject: [PATCH 1/9] refactor: move reader functions from __init__.py to a
 separate file under pandas package

---
 bigframes/pandas/__init__.py | 331 ++-------------------------------
 bigframes/pandas/readers.py  | 347 +++++++++++++++++++++++++++++++++++
 2 files changed, 364 insertions(+), 314 deletions(-)
 create mode 100644 bigframes/pandas/readers.py

diff --git a/bigframes/pandas/__init__.py b/bigframes/pandas/__init__.py
index 1bdf49eaf5..89bf6c8047 100644
--- a/bigframes/pandas/__init__.py
+++ b/bigframes/pandas/__init__.py
@@ -21,20 +21,7 @@
 import inspect
 import sys
 import typing
-from typing import (
-    Any,
-    Callable,
-    Dict,
-    IO,
-    Iterable,
-    List,
-    Literal,
-    MutableSequence,
-    Optional,
-    Sequence,
-    Tuple,
-    Union,
-)
+from typing import Any, Iterable, List, Literal, Optional, Sequence, Tuple, Union
 
 import bigframes_vendored.constants as constants
 import bigframes_vendored.pandas.core.reshape.concat as vendored_pandas_concat
@@ -42,16 +29,7 @@
 import bigframes_vendored.pandas.core.reshape.merge as vendored_pandas_merge
 import bigframes_vendored.pandas.core.reshape.tile as vendored_pandas_tile
 import bigframes_vendored.pandas.core.tools.datetimes as vendored_pandas_datetimes
-import bigframes_vendored.pandas.io.gbq as vendored_pandas_gbq
-from google.cloud import bigquery
-import numpy
 import pandas
-from pandas._typing import (
-    CompressionOptions,
-    FilePath,
-    ReadPickleBuffer,
-    StorageOptions,
-)
 
 import bigframes._config as config
 import bigframes.core.blocks
@@ -65,6 +43,18 @@
 import bigframes.enums
 import bigframes.functions._utils as functions_utils
 import bigframes.operations as ops
+from bigframes.pandas.readers import (
+    read_csv,
+    read_gbq,
+    read_gbq_function,
+    read_gbq_model,
+    read_gbq_query,
+    read_gbq_table,
+    read_json,
+    read_pandas,
+    read_parquet,
+    read_pickle,
+)
 import bigframes.series
 import bigframes.session
 import bigframes.session._io.bigquery
@@ -373,286 +363,6 @@ def merge(
 merge.__doc__ = vendored_pandas_merge.merge.__doc__
 
 
-def _set_default_session_location_if_possible(query):
-    # Set the location as per the query if this is the first query the user is
-    # running and:
-    # (1) Default session has not started yet, and
-    # (2) Location is not set yet, and
-    # (3) Use of regional endpoints is not set.
-    # If query is a table name, then it would be the location of the table.
-    # If query is a SQL with a table, then it would be table's location.
-    # If query is a SQL with no table, then it would be the BQ default location.
-    if (
-        options.bigquery._session_started
-        or options.bigquery.location
-        or options.bigquery.use_regional_endpoints
-    ):
-        return
-
-    clients_provider = bigframes.session.clients.ClientsProvider(
-        project=options.bigquery.project,
-        location=options.bigquery.location,
-        use_regional_endpoints=options.bigquery.use_regional_endpoints,
-        credentials=options.bigquery.credentials,
-        application_name=options.bigquery.application_name,
-        bq_kms_key_name=options.bigquery.kms_key_name,
-    )
-
-    bqclient = clients_provider.bqclient
-
-    if bigframes.session._io.bigquery.is_query(query):
-        # Intentionally run outside of the session so that we can detect the
-        # location before creating the session. Since it's a dry_run, labels
-        # aren't necessary.
-        job = bqclient.query(query, bigquery.QueryJobConfig(dry_run=True))
-        options.bigquery.location = job.location
-    else:
-        table = bqclient.get_table(query)
-        options.bigquery.location = table.location
-
-
-# Note: the following methods are duplicated from Session. This duplication
-# enables the following:
-#
-# 1. Static type checking knows the argument and return types, which is
-#    difficult to do with decorators. Aside: When we require Python 3.10, we
-#    can use Concatenate for generic typing in decorators. See:
-#    https://stackoverflow.com/a/68290080/101923
-# 2. docstrings get processed by static processing tools, such as VS Code's
-#    autocomplete.
-# 3. Positional arguments function as expected. If we were to pull in the
-#    methods directly from Session, a Session object would need to be the first
-#    argument, even if we allow a default value.
-# 4. Allows to set BigQuery options for the BigFrames session based on the
-#    method and its arguments.
-
-
-def read_csv(
-    filepath_or_buffer: str | IO["bytes"],
-    *,
-    sep: Optional[str] = ",",
-    header: Optional[int] = 0,
-    names: Optional[
-        Union[MutableSequence[Any], numpy.ndarray[Any, Any], Tuple[Any, ...], range]
-    ] = None,
-    index_col: Optional[
-        Union[
-            int,
-            str,
-            Sequence[Union[str, int]],
-            bigframes.enums.DefaultIndexKind,
-            Literal[False],
-        ]
-    ] = None,
-    usecols: Optional[
-        Union[
-            MutableSequence[str],
-            Tuple[str, ...],
-            Sequence[int],
-            pandas.Series,
-            pandas.Index,
-            numpy.ndarray[Any, Any],
-            Callable[[Any], bool],
-        ]
-    ] = None,
-    dtype: Optional[Dict] = None,
-    engine: Optional[
-        Literal["c", "python", "pyarrow", "python-fwf", "bigquery"]
-    ] = None,
-    encoding: Optional[str] = None,
-    **kwargs,
-) -> bigframes.dataframe.DataFrame:
-    return global_session.with_default_session(
-        bigframes.session.Session.read_csv,
-        filepath_or_buffer=filepath_or_buffer,
-        sep=sep,
-        header=header,
-        names=names,
-        index_col=index_col,
-        usecols=usecols,
-        dtype=dtype,
-        engine=engine,
-        encoding=encoding,
-        **kwargs,
-    )
-
-
-read_csv.__doc__ = inspect.getdoc(bigframes.session.Session.read_csv)
-
-
-def read_json(
-    path_or_buf: str | IO["bytes"],
-    *,
-    orient: Literal[
-        "split", "records", "index", "columns", "values", "table"
-    ] = "columns",
-    dtype: Optional[Dict] = None,
-    encoding: Optional[str] = None,
-    lines: bool = False,
-    engine: Literal["ujson", "pyarrow", "bigquery"] = "ujson",
-    **kwargs,
-) -> bigframes.dataframe.DataFrame:
-    return global_session.with_default_session(
-        bigframes.session.Session.read_json,
-        path_or_buf=path_or_buf,
-        orient=orient,
-        dtype=dtype,
-        encoding=encoding,
-        lines=lines,
-        engine=engine,
-        **kwargs,
-    )
-
-
-read_json.__doc__ = inspect.getdoc(bigframes.session.Session.read_json)
-
-
-def read_gbq(
-    query_or_table: str,
-    *,
-    index_col: Iterable[str] | str | bigframes.enums.DefaultIndexKind = (),
-    columns: Iterable[str] = (),
-    configuration: Optional[Dict] = None,
-    max_results: Optional[int] = None,
-    filters: vendored_pandas_gbq.FiltersType = (),
-    use_cache: Optional[bool] = None,
-    col_order: Iterable[str] = (),
-) -> bigframes.dataframe.DataFrame:
-    _set_default_session_location_if_possible(query_or_table)
-    return global_session.with_default_session(
-        bigframes.session.Session.read_gbq,
-        query_or_table,
-        index_col=index_col,
-        columns=columns,
-        configuration=configuration,
-        max_results=max_results,
-        filters=filters,
-        use_cache=use_cache,
-        col_order=col_order,
-    )
-
-
-read_gbq.__doc__ = inspect.getdoc(bigframes.session.Session.read_gbq)
-
-
-def read_gbq_model(model_name: str):
-    return global_session.with_default_session(
-        bigframes.session.Session.read_gbq_model,
-        model_name,
-    )
-
-
-read_gbq_model.__doc__ = inspect.getdoc(bigframes.session.Session.read_gbq_model)
-
-
-def read_gbq_query(
-    query: str,
-    *,
-    index_col: Iterable[str] | str | bigframes.enums.DefaultIndexKind = (),
-    columns: Iterable[str] = (),
-    configuration: Optional[Dict] = None,
-    max_results: Optional[int] = None,
-    use_cache: Optional[bool] = None,
-    col_order: Iterable[str] = (),
-    filters: vendored_pandas_gbq.FiltersType = (),
-) -> bigframes.dataframe.DataFrame:
-    _set_default_session_location_if_possible(query)
-    return global_session.with_default_session(
-        bigframes.session.Session.read_gbq_query,
-        query,
-        index_col=index_col,
-        columns=columns,
-        configuration=configuration,
-        max_results=max_results,
-        use_cache=use_cache,
-        col_order=col_order,
-        filters=filters,
-    )
-
-
-read_gbq_query.__doc__ = inspect.getdoc(bigframes.session.Session.read_gbq_query)
-
-
-def read_gbq_table(
-    query: str,
-    *,
-    index_col: Iterable[str] | str | bigframes.enums.DefaultIndexKind = (),
-    columns: Iterable[str] = (),
-    max_results: Optional[int] = None,
-    filters: vendored_pandas_gbq.FiltersType = (),
-    use_cache: bool = True,
-    col_order: Iterable[str] = (),
-) -> bigframes.dataframe.DataFrame:
-    _set_default_session_location_if_possible(query)
-    return global_session.with_default_session(
-        bigframes.session.Session.read_gbq_table,
-        query,
-        index_col=index_col,
-        columns=columns,
-        max_results=max_results,
-        filters=filters,
-        use_cache=use_cache,
-        col_order=col_order,
-    )
-
-
-read_gbq_table.__doc__ = inspect.getdoc(bigframes.session.Session.read_gbq_table)
-
-
-@typing.overload
-def read_pandas(pandas_dataframe: pandas.DataFrame) -> bigframes.dataframe.DataFrame:
-    ...
-
-
-@typing.overload
-def read_pandas(pandas_dataframe: pandas.Series) -> bigframes.series.Series:
-    ...
-
-
-@typing.overload
-def read_pandas(pandas_dataframe: pandas.Index) -> bigframes.core.indexes.Index:
-    ...
-
-
-def read_pandas(pandas_dataframe: Union[pandas.DataFrame, pandas.Series, pandas.Index]):
-    return global_session.with_default_session(
-        bigframes.session.Session.read_pandas,
-        pandas_dataframe,
-    )
-
-
-read_pandas.__doc__ = inspect.getdoc(bigframes.session.Session.read_pandas)
-
-
-def read_pickle(
-    filepath_or_buffer: FilePath | ReadPickleBuffer,
-    compression: CompressionOptions = "infer",
-    storage_options: StorageOptions = None,
-):
-    return global_session.with_default_session(
-        bigframes.session.Session.read_pickle,
-        filepath_or_buffer=filepath_or_buffer,
-        compression=compression,
-        storage_options=storage_options,
-    )
-
-
-read_pickle.__doc__ = inspect.getdoc(bigframes.session.Session.read_pickle)
-
-
-def read_parquet(
-    path: str | IO["bytes"], *, engine: str = "auto"
-) -> bigframes.dataframe.DataFrame:
-    return global_session.with_default_session(
-        bigframes.session.Session.read_parquet,
-        path,
-        engine=engine,
-    )
-
-
-read_parquet.__doc__ = inspect.getdoc(bigframes.session.Session.read_parquet)
-
-
 def remote_function(
     input_types: Union[None, type, Sequence[type]] = None,
     output_type: Optional[type] = None,
@@ -697,17 +407,6 @@ def remote_function(
 remote_function.__doc__ = inspect.getdoc(bigframes.session.Session.remote_function)
 
 
-def read_gbq_function(function_name: str, is_row_processor: bool = False):
-    return global_session.with_default_session(
-        bigframes.session.Session.read_gbq_function,
-        function_name=function_name,
-        is_row_processor=is_row_processor,
-    )
-
-
-read_gbq_function.__doc__ = inspect.getdoc(bigframes.session.Session.read_gbq_function)
-
-
 @typing.overload
 def to_datetime(
     arg: Union[
@@ -901,7 +600,11 @@ def reset_session():
     "read_gbq",
     "read_gbq_function",
     "read_gbq_model",
+    "read_gbq_query",
+    "read_gbq_table",
+    "read_json",
     "read_pandas",
+    "read_parquet",
     "read_pickle",
     "remote_function",
     "to_datetime",
diff --git a/bigframes/pandas/readers.py b/bigframes/pandas/readers.py
new file mode 100644
index 0000000000..4e08b3ef5e
--- /dev/null
+++ b/bigframes/pandas/readers.py
@@ -0,0 +1,347 @@
+# Copyright 2024 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from __future__ import annotations
+
+import inspect
+import typing
+from typing import (
+    Any,
+    Callable,
+    Dict,
+    IO,
+    Iterable,
+    Literal,
+    MutableSequence,
+    Optional,
+    Sequence,
+    Tuple,
+    Union,
+)
+
+import bigframes_vendored.pandas.io.gbq as vendored_pandas_gbq
+from google.cloud import bigquery
+import numpy
+import pandas
+from pandas._typing import (
+    CompressionOptions,
+    FilePath,
+    ReadPickleBuffer,
+    StorageOptions,
+)
+
+import bigframes._config as config
+import bigframes.core.blocks
+import bigframes.core.global_session as global_session
+import bigframes.core.indexes
+import bigframes.core.joins
+import bigframes.core.reshape
+import bigframes.core.tools
+import bigframes.dataframe
+import bigframes.enums
+import bigframes.series
+import bigframes.session
+import bigframes.session._io.bigquery
+import bigframes.session.clients
+import bigframes.version
+
+# Note: the following methods are duplicated from Session. This duplication
+# enables the following:
+#
+# 1. Static type checking knows the argument and return types, which is
+#    difficult to do with decorators. Aside: When we require Python 3.10, we
+#    can use Concatenate for generic typing in decorators. See:
+#    https://stackoverflow.com/a/68290080/101923
+# 2. docstrings get processed by static processing tools, such as VS Code's
+#    autocomplete.
+# 3. Positional arguments function as expected. If we were to pull in the
+#    methods directly from Session, a Session object would need to be the first
+#    argument, even if we allow a default value.
+# 4. Allows to set BigQuery options for the BigFrames session based on the
+#    method and its arguments.
+
+
+def read_csv(
+    filepath_or_buffer: str | IO["bytes"],
+    *,
+    sep: Optional[str] = ",",
+    header: Optional[int] = 0,
+    names: Optional[
+        Union[MutableSequence[Any], numpy.ndarray[Any, Any], Tuple[Any, ...], range]
+    ] = None,
+    index_col: Optional[
+        Union[
+            int,
+            str,
+            Sequence[Union[str, int]],
+            bigframes.enums.DefaultIndexKind,
+            Literal[False],
+        ]
+    ] = None,
+    usecols: Optional[
+        Union[
+            MutableSequence[str],
+            Tuple[str, ...],
+            Sequence[int],
+            pandas.Series,
+            pandas.Index,
+            numpy.ndarray[Any, Any],
+            Callable[[Any], bool],
+        ]
+    ] = None,
+    dtype: Optional[Dict] = None,
+    engine: Optional[
+        Literal["c", "python", "pyarrow", "python-fwf", "bigquery"]
+    ] = None,
+    encoding: Optional[str] = None,
+    **kwargs,
+) -> bigframes.dataframe.DataFrame:
+    return global_session.with_default_session(
+        bigframes.session.Session.read_csv,
+        filepath_or_buffer=filepath_or_buffer,
+        sep=sep,
+        header=header,
+        names=names,
+        index_col=index_col,
+        usecols=usecols,
+        dtype=dtype,
+        engine=engine,
+        encoding=encoding,
+        **kwargs,
+    )
+
+
+read_csv.__doc__ = inspect.getdoc(bigframes.session.Session.read_csv)
+
+
+def read_json(
+    path_or_buf: str | IO["bytes"],
+    *,
+    orient: Literal[
+        "split", "records", "index", "columns", "values", "table"
+    ] = "columns",
+    dtype: Optional[Dict] = None,
+    encoding: Optional[str] = None,
+    lines: bool = False,
+    engine: Literal["ujson", "pyarrow", "bigquery"] = "ujson",
+    **kwargs,
+) -> bigframes.dataframe.DataFrame:
+    return global_session.with_default_session(
+        bigframes.session.Session.read_json,
+        path_or_buf=path_or_buf,
+        orient=orient,
+        dtype=dtype,
+        encoding=encoding,
+        lines=lines,
+        engine=engine,
+        **kwargs,
+    )
+
+
+read_json.__doc__ = inspect.getdoc(bigframes.session.Session.read_json)
+
+
+def read_gbq(
+    query_or_table: str,
+    *,
+    index_col: Iterable[str] | str | bigframes.enums.DefaultIndexKind = (),
+    columns: Iterable[str] = (),
+    configuration: Optional[Dict] = None,
+    max_results: Optional[int] = None,
+    filters: vendored_pandas_gbq.FiltersType = (),
+    use_cache: Optional[bool] = None,
+    col_order: Iterable[str] = (),
+) -> bigframes.dataframe.DataFrame:
+    _set_default_session_location_if_possible(query_or_table)
+    return global_session.with_default_session(
+        bigframes.session.Session.read_gbq,
+        query_or_table,
+        index_col=index_col,
+        columns=columns,
+        configuration=configuration,
+        max_results=max_results,
+        filters=filters,
+        use_cache=use_cache,
+        col_order=col_order,
+    )
+
+
+read_gbq.__doc__ = inspect.getdoc(bigframes.session.Session.read_gbq)
+
+
+def read_gbq_model(model_name: str):
+    return global_session.with_default_session(
+        bigframes.session.Session.read_gbq_model,
+        model_name,
+    )
+
+
+read_gbq_model.__doc__ = inspect.getdoc(bigframes.session.Session.read_gbq_model)
+
+
+def read_gbq_query(
+    query: str,
+    *,
+    index_col: Iterable[str] | str | bigframes.enums.DefaultIndexKind = (),
+    columns: Iterable[str] = (),
+    configuration: Optional[Dict] = None,
+    max_results: Optional[int] = None,
+    use_cache: Optional[bool] = None,
+    col_order: Iterable[str] = (),
+    filters: vendored_pandas_gbq.FiltersType = (),
+) -> bigframes.dataframe.DataFrame:
+    _set_default_session_location_if_possible(query)
+    return global_session.with_default_session(
+        bigframes.session.Session.read_gbq_query,
+        query,
+        index_col=index_col,
+        columns=columns,
+        configuration=configuration,
+        max_results=max_results,
+        use_cache=use_cache,
+        col_order=col_order,
+        filters=filters,
+    )
+
+
+read_gbq_query.__doc__ = inspect.getdoc(bigframes.session.Session.read_gbq_query)
+
+
+def read_gbq_table(
+    query: str,
+    *,
+    index_col: Iterable[str] | str | bigframes.enums.DefaultIndexKind = (),
+    columns: Iterable[str] = (),
+    max_results: Optional[int] = None,
+    filters: vendored_pandas_gbq.FiltersType = (),
+    use_cache: bool = True,
+    col_order: Iterable[str] = (),
+) -> bigframes.dataframe.DataFrame:
+    _set_default_session_location_if_possible(query)
+    return global_session.with_default_session(
+        bigframes.session.Session.read_gbq_table,
+        query,
+        index_col=index_col,
+        columns=columns,
+        max_results=max_results,
+        filters=filters,
+        use_cache=use_cache,
+        col_order=col_order,
+    )
+
+
+read_gbq_table.__doc__ = inspect.getdoc(bigframes.session.Session.read_gbq_table)
+
+
+@typing.overload
+def read_pandas(pandas_dataframe: pandas.DataFrame) -> bigframes.dataframe.DataFrame:
+    ...
+
+
+@typing.overload
+def read_pandas(pandas_dataframe: pandas.Series) -> bigframes.series.Series:
+    ...
+
+
+@typing.overload
+def read_pandas(pandas_dataframe: pandas.Index) -> bigframes.core.indexes.Index:
+    ...
+
+
+def read_pandas(pandas_dataframe: Union[pandas.DataFrame, pandas.Series, pandas.Index]):
+    return global_session.with_default_session(
+        bigframes.session.Session.read_pandas,
+        pandas_dataframe,
+    )
+
+
+read_pandas.__doc__ = inspect.getdoc(bigframes.session.Session.read_pandas)
+
+
+def read_pickle(
+    filepath_or_buffer: FilePath | ReadPickleBuffer,
+    compression: CompressionOptions = "infer",
+    storage_options: StorageOptions = None,
+):
+    return global_session.with_default_session(
+        bigframes.session.Session.read_pickle,
+        filepath_or_buffer=filepath_or_buffer,
+        compression=compression,
+        storage_options=storage_options,
+    )
+
+
+read_pickle.__doc__ = inspect.getdoc(bigframes.session.Session.read_pickle)
+
+
+def read_parquet(
+    path: str | IO["bytes"], *, engine: str = "auto"
+) -> bigframes.dataframe.DataFrame:
+    return global_session.with_default_session(
+        bigframes.session.Session.read_parquet,
+        path,
+        engine=engine,
+    )
+
+
+read_parquet.__doc__ = inspect.getdoc(bigframes.session.Session.read_parquet)
+
+
+def read_gbq_function(function_name: str, is_row_processor: bool = False):
+    return global_session.with_default_session(
+        bigframes.session.Session.read_gbq_function,
+        function_name=function_name,
+        is_row_processor=is_row_processor,
+    )
+
+
+read_gbq_function.__doc__ = inspect.getdoc(bigframes.session.Session.read_gbq_function)
+
+
+def _set_default_session_location_if_possible(query):
+    # Set the location as per the query if this is the first query the user is
+    # running and:
+    # (1) Default session has not started yet, and
+    # (2) Location is not set yet, and
+    # (3) Use of regional endpoints is not set.
+    # If query is a table name, then it would be the location of the table.
+    # If query is a SQL with a table, then it would be table's location.
+    # If query is a SQL with no table, then it would be the BQ default location.
+    if (
+        config.options.bigquery._session_started
+        or config.options.bigquery.location
+        or config.options.bigquery.use_regional_endpoints
+    ):
+        return
+
+    clients_provider = bigframes.session.clients.ClientsProvider(
+        project=config.options.bigquery.project,
+        location=config.options.bigquery.location,
+        use_regional_endpoints=config.options.bigquery.use_regional_endpoints,
+        credentials=config.options.bigquery.credentials,
+        application_name=config.options.bigquery.application_name,
+        bq_kms_key_name=config.options.bigquery.kms_key_name,
+    )
+
+    bqclient = clients_provider.bqclient
+
+    if bigframes.session._io.bigquery.is_query(query):
+        # Intentionally run outside of the session so that we can detect the
+        # location before creating the session. Since it's a dry_run, labels
+        # aren't necessary.
+        job = bqclient.query(query, bigquery.QueryJobConfig(dry_run=True))
+        config.options.bigquery.location = job.location
+    else:
+        table = bqclient.get_table(query)
+        config.options.bigquery.location = table.location

From fa385644ccdb810eef9d8301056c029551d20612 Mon Sep 17 00:00:00 2001
From: Shenyang Cai <sycai@google.com>
Date: Thu, 26 Sep 2024 19:17:55 +0000
Subject: [PATCH 2/9] fix type in __all__

---
 bigframes/pandas/__init__.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/bigframes/pandas/__init__.py b/bigframes/pandas/__init__.py
index 89bf6c8047..68eb0deddb 100644
--- a/bigframes/pandas/__init__.py
+++ b/bigframes/pandas/__init__.py
@@ -592,7 +592,7 @@ def reset_session():
             pass
 
 # Use __all__ to let type checkers know what is part of the public API.
-__all___ = [
+__all__ = [
     # Functions
     "concat",
     "merge",

From ed79ff2a73b2506ce957f658c7e5986da93ae294 Mon Sep 17 00:00:00 2001
From: Shenyang Cai <sycai@google.com>
Date: Thu, 26 Sep 2024 20:10:39 +0000
Subject: [PATCH 3/9] move read functions under bigframes.pandas.io.api to
 reflect the structure of pandas

---
 bigframes/pandas/__init__.py               |  2 +-
 bigframes/pandas/io/__init__.py            | 14 ++++++++++++++
 bigframes/pandas/{readers.py => io/api.py} |  0
 3 files changed, 15 insertions(+), 1 deletion(-)
 create mode 100644 bigframes/pandas/io/__init__.py
 rename bigframes/pandas/{readers.py => io/api.py} (100%)

diff --git a/bigframes/pandas/__init__.py b/bigframes/pandas/__init__.py
index 68eb0deddb..eaf80bb444 100644
--- a/bigframes/pandas/__init__.py
+++ b/bigframes/pandas/__init__.py
@@ -43,7 +43,7 @@
 import bigframes.enums
 import bigframes.functions._utils as functions_utils
 import bigframes.operations as ops
-from bigframes.pandas.readers import (
+from bigframes.pandas.io.api import (
     read_csv,
     read_gbq,
     read_gbq_function,
diff --git a/bigframes/pandas/io/__init__.py b/bigframes/pandas/io/__init__.py
new file mode 100644
index 0000000000..c38dc3b14a
--- /dev/null
+++ b/bigframes/pandas/io/__init__.py
@@ -0,0 +1,14 @@
+# Copyright 2024 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
diff --git a/bigframes/pandas/readers.py b/bigframes/pandas/io/api.py
similarity index 100%
rename from bigframes/pandas/readers.py
rename to bigframes/pandas/io/api.py

From 18e52cc961c2f6b40c0a1a6b0c2b5208f7b0e8a5 Mon Sep 17 00:00:00 2001
From: Shenyang Cai <sycai@google.com>
Date: Thu, 26 Sep 2024 20:10:39 +0000
Subject: [PATCH 4/9] move read functions under bigframes.pandas.io.api to
 reflect the structure of pandas

---
 bigframes/pandas/__init__.py               |  2 +-
 bigframes/pandas/io/__init__.py            | 13 +++++++++++++
 bigframes/pandas/{readers.py => io/api.py} |  0
 3 files changed, 14 insertions(+), 1 deletion(-)
 create mode 100644 bigframes/pandas/io/__init__.py
 rename bigframes/pandas/{readers.py => io/api.py} (100%)

diff --git a/bigframes/pandas/__init__.py b/bigframes/pandas/__init__.py
index 68eb0deddb..eaf80bb444 100644
--- a/bigframes/pandas/__init__.py
+++ b/bigframes/pandas/__init__.py
@@ -43,7 +43,7 @@
 import bigframes.enums
 import bigframes.functions._utils as functions_utils
 import bigframes.operations as ops
-from bigframes.pandas.readers import (
+from bigframes.pandas.io.api import (
     read_csv,
     read_gbq,
     read_gbq_function,
diff --git a/bigframes/pandas/io/__init__.py b/bigframes/pandas/io/__init__.py
new file mode 100644
index 0000000000..6d5e14bcf4
--- /dev/null
+++ b/bigframes/pandas/io/__init__.py
@@ -0,0 +1,13 @@
+# Copyright 2024 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
diff --git a/bigframes/pandas/readers.py b/bigframes/pandas/io/api.py
similarity index 100%
rename from bigframes/pandas/readers.py
rename to bigframes/pandas/io/api.py

From 10c6533ea0a1ff580739a98312eeed1ec259572a Mon Sep 17 00:00:00 2001
From: Owl Bot <gcf-owl-bot[bot]@users.noreply.github.com>
Date: Thu, 26 Sep 2024 20:13:10 +0000
Subject: [PATCH 5/9] =?UTF-8?q?=F0=9F=A6=89=20Updates=20from=20OwlBot=20po?=
 =?UTF-8?q?st-processor?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

See https://github.com/googleapis/repo-automation-bots/blob/main/packages/owl-bot/README.md
---
 bigframes/pandas/io/__init__.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/bigframes/pandas/io/__init__.py b/bigframes/pandas/io/__init__.py
index c38dc3b14a..6d5e14bcf4 100644
--- a/bigframes/pandas/io/__init__.py
+++ b/bigframes/pandas/io/__init__.py
@@ -11,4 +11,3 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-

From 288eee413b0ce549efebc10b83e32e97bdd657c7 Mon Sep 17 00:00:00 2001
From: Shenyang Cai <sycai@google.com>
Date: Thu, 26 Sep 2024 21:07:11 +0000
Subject: [PATCH 6/9] update function import

---
 bigframes/streaming/__init__.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/bigframes/streaming/__init__.py b/bigframes/streaming/__init__.py
index 66f345f0ab..d439d622a2 100644
--- a/bigframes/streaming/__init__.py
+++ b/bigframes/streaming/__init__.py
@@ -15,13 +15,13 @@
 import inspect
 
 import bigframes.core.global_session as global_session
-import bigframes.pandas as bpd
+from bigframes.pandas.io.api import _set_default_session_location_if_possible
 import bigframes.session
 import bigframes.streaming.dataframe as streaming_dataframe
 
 
 def read_gbq_table(table: str) -> streaming_dataframe.StreamingDataFrame:
-    bpd._set_default_session_location_if_possible(table)
+    _set_default_session_location_if_possible(table)
     return global_session.with_default_session(
         bigframes.session.Session.read_gbq_table_streaming, table
     )

From 0016b7e75fa63b37cc4ae91a7ea2408a34d9199d Mon Sep 17 00:00:00 2001
From: Shenyang Cai <sycai@google.com>
Date: Thu, 26 Sep 2024 22:49:29 +0000
Subject: [PATCH 7/9] fix missing comma

---
 bigframes/pandas/__init__.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/bigframes/pandas/__init__.py b/bigframes/pandas/__init__.py
index eaf80bb444..98da6d826c 100644
--- a/bigframes/pandas/__init__.py
+++ b/bigframes/pandas/__init__.py
@@ -614,7 +614,7 @@ def reset_session():
     "Float64Dtype",
     "Int64Dtype",
     "StringDtype",
-    "ArrowDtype"
+    "ArrowDtype",
     # Class aliases
     "DataFrame",
     "Index",

From 1b3fca6bcd84cc8d9b8b102494987dc17d670394 Mon Sep 17 00:00:00 2001
From: Shenyang Cai <sycai@google.com>
Date: Thu, 26 Sep 2024 22:55:04 +0000
Subject: [PATCH 8/9] try to fix dup doc by directly import types from pandas

---
 bigframes/pandas/__init__.py | 9 +--------
 1 file changed, 1 insertion(+), 8 deletions(-)

diff --git a/bigframes/pandas/__init__.py b/bigframes/pandas/__init__.py
index 98da6d826c..dabb56a33a 100644
--- a/bigframes/pandas/__init__.py
+++ b/bigframes/pandas/__init__.py
@@ -30,6 +30,7 @@
 import bigframes_vendored.pandas.core.reshape.tile as vendored_pandas_tile
 import bigframes_vendored.pandas.core.tools.datetimes as vendored_pandas_datetimes
 import pandas
+from pandas import ArrowDtype, BooleanDtype, Float64Dtype, Int64Dtype, NA, StringDtype
 
 import bigframes._config as config
 import bigframes.core.blocks
@@ -527,14 +528,6 @@ def clean_up_by_session_id(
     )
 
 
-# pandas dtype attributes
-NA = pandas.NA
-BooleanDtype = pandas.BooleanDtype
-Float64Dtype = pandas.Float64Dtype
-Int64Dtype = pandas.Int64Dtype
-StringDtype = pandas.StringDtype
-ArrowDtype = pandas.ArrowDtype
-
 # Class aliases
 # TODO(swast): Make these real classes so we can refer to these in type
 # checking and docstrings.

From 91865c9371759e03cd1f1fd5e3b4ef754e54eae9 Mon Sep 17 00:00:00 2001
From: Shenyang Cai <sycai@google.com>
Date: Thu, 26 Sep 2024 23:43:03 +0000
Subject: [PATCH 9/9] fix doc generation

---
 bigframes/pandas/__init__.py                          | 9 ++++++++-
 docs/reference/bigframes.pandas/general_functions.rst | 1 +
 2 files changed, 9 insertions(+), 1 deletion(-)

diff --git a/bigframes/pandas/__init__.py b/bigframes/pandas/__init__.py
index dabb56a33a..98da6d826c 100644
--- a/bigframes/pandas/__init__.py
+++ b/bigframes/pandas/__init__.py
@@ -30,7 +30,6 @@
 import bigframes_vendored.pandas.core.reshape.tile as vendored_pandas_tile
 import bigframes_vendored.pandas.core.tools.datetimes as vendored_pandas_datetimes
 import pandas
-from pandas import ArrowDtype, BooleanDtype, Float64Dtype, Int64Dtype, NA, StringDtype
 
 import bigframes._config as config
 import bigframes.core.blocks
@@ -528,6 +527,14 @@ def clean_up_by_session_id(
     )
 
 
+# pandas dtype attributes
+NA = pandas.NA
+BooleanDtype = pandas.BooleanDtype
+Float64Dtype = pandas.Float64Dtype
+Int64Dtype = pandas.Int64Dtype
+StringDtype = pandas.StringDtype
+ArrowDtype = pandas.ArrowDtype
+
 # Class aliases
 # TODO(swast): Make these real classes so we can refer to these in type
 # checking and docstrings.
diff --git a/docs/reference/bigframes.pandas/general_functions.rst b/docs/reference/bigframes.pandas/general_functions.rst
index 4fff9aabf8..fff1a9ef59 100644
--- a/docs/reference/bigframes.pandas/general_functions.rst
+++ b/docs/reference/bigframes.pandas/general_functions.rst
@@ -6,3 +6,4 @@ General functions
 .. automodule:: bigframes.pandas
     :members:
     :undoc-members:
+    :noindex: