From 494e35dbd187e0758834d16e790ca7202cbcc4eb Mon Sep 17 00:00:00 2001 From: Shenyang Cai Date: Thu, 26 Sep 2024 19:08:03 +0000 Subject: [PATCH 1/9] refactor: move reader functions from __init__.py to a separate file under pandas package --- bigframes/pandas/__init__.py | 331 ++------------------------------- bigframes/pandas/readers.py | 347 +++++++++++++++++++++++++++++++++++ 2 files changed, 364 insertions(+), 314 deletions(-) create mode 100644 bigframes/pandas/readers.py diff --git a/bigframes/pandas/__init__.py b/bigframes/pandas/__init__.py index 1bdf49eaf5..89bf6c8047 100644 --- a/bigframes/pandas/__init__.py +++ b/bigframes/pandas/__init__.py @@ -21,20 +21,7 @@ import inspect import sys import typing -from typing import ( - Any, - Callable, - Dict, - IO, - Iterable, - List, - Literal, - MutableSequence, - Optional, - Sequence, - Tuple, - Union, -) +from typing import Any, Iterable, List, Literal, Optional, Sequence, Tuple, Union import bigframes_vendored.constants as constants import bigframes_vendored.pandas.core.reshape.concat as vendored_pandas_concat @@ -42,16 +29,7 @@ import bigframes_vendored.pandas.core.reshape.merge as vendored_pandas_merge import bigframes_vendored.pandas.core.reshape.tile as vendored_pandas_tile import bigframes_vendored.pandas.core.tools.datetimes as vendored_pandas_datetimes -import bigframes_vendored.pandas.io.gbq as vendored_pandas_gbq -from google.cloud import bigquery -import numpy import pandas -from pandas._typing import ( - CompressionOptions, - FilePath, - ReadPickleBuffer, - StorageOptions, -) import bigframes._config as config import bigframes.core.blocks @@ -65,6 +43,18 @@ import bigframes.enums import bigframes.functions._utils as functions_utils import bigframes.operations as ops +from bigframes.pandas.readers import ( + read_csv, + read_gbq, + read_gbq_function, + read_gbq_model, + read_gbq_query, + read_gbq_table, + read_json, + read_pandas, + read_parquet, + read_pickle, +) import bigframes.series import bigframes.session import bigframes.session._io.bigquery @@ -373,286 +363,6 @@ def merge( merge.__doc__ = vendored_pandas_merge.merge.__doc__ -def _set_default_session_location_if_possible(query): - # Set the location as per the query if this is the first query the user is - # running and: - # (1) Default session has not started yet, and - # (2) Location is not set yet, and - # (3) Use of regional endpoints is not set. - # If query is a table name, then it would be the location of the table. - # If query is a SQL with a table, then it would be table's location. - # If query is a SQL with no table, then it would be the BQ default location. - if ( - options.bigquery._session_started - or options.bigquery.location - or options.bigquery.use_regional_endpoints - ): - return - - clients_provider = bigframes.session.clients.ClientsProvider( - project=options.bigquery.project, - location=options.bigquery.location, - use_regional_endpoints=options.bigquery.use_regional_endpoints, - credentials=options.bigquery.credentials, - application_name=options.bigquery.application_name, - bq_kms_key_name=options.bigquery.kms_key_name, - ) - - bqclient = clients_provider.bqclient - - if bigframes.session._io.bigquery.is_query(query): - # Intentionally run outside of the session so that we can detect the - # location before creating the session. Since it's a dry_run, labels - # aren't necessary. - job = bqclient.query(query, bigquery.QueryJobConfig(dry_run=True)) - options.bigquery.location = job.location - else: - table = bqclient.get_table(query) - options.bigquery.location = table.location - - -# Note: the following methods are duplicated from Session. This duplication -# enables the following: -# -# 1. Static type checking knows the argument and return types, which is -# difficult to do with decorators. Aside: When we require Python 3.10, we -# can use Concatenate for generic typing in decorators. See: -# https://stackoverflow.com/a/68290080/101923 -# 2. docstrings get processed by static processing tools, such as VS Code's -# autocomplete. -# 3. Positional arguments function as expected. If we were to pull in the -# methods directly from Session, a Session object would need to be the first -# argument, even if we allow a default value. -# 4. Allows to set BigQuery options for the BigFrames session based on the -# method and its arguments. - - -def read_csv( - filepath_or_buffer: str | IO["bytes"], - *, - sep: Optional[str] = ",", - header: Optional[int] = 0, - names: Optional[ - Union[MutableSequence[Any], numpy.ndarray[Any, Any], Tuple[Any, ...], range] - ] = None, - index_col: Optional[ - Union[ - int, - str, - Sequence[Union[str, int]], - bigframes.enums.DefaultIndexKind, - Literal[False], - ] - ] = None, - usecols: Optional[ - Union[ - MutableSequence[str], - Tuple[str, ...], - Sequence[int], - pandas.Series, - pandas.Index, - numpy.ndarray[Any, Any], - Callable[[Any], bool], - ] - ] = None, - dtype: Optional[Dict] = None, - engine: Optional[ - Literal["c", "python", "pyarrow", "python-fwf", "bigquery"] - ] = None, - encoding: Optional[str] = None, - **kwargs, -) -> bigframes.dataframe.DataFrame: - return global_session.with_default_session( - bigframes.session.Session.read_csv, - filepath_or_buffer=filepath_or_buffer, - sep=sep, - header=header, - names=names, - index_col=index_col, - usecols=usecols, - dtype=dtype, - engine=engine, - encoding=encoding, - **kwargs, - ) - - -read_csv.__doc__ = inspect.getdoc(bigframes.session.Session.read_csv) - - -def read_json( - path_or_buf: str | IO["bytes"], - *, - orient: Literal[ - "split", "records", "index", "columns", "values", "table" - ] = "columns", - dtype: Optional[Dict] = None, - encoding: Optional[str] = None, - lines: bool = False, - engine: Literal["ujson", "pyarrow", "bigquery"] = "ujson", - **kwargs, -) -> bigframes.dataframe.DataFrame: - return global_session.with_default_session( - bigframes.session.Session.read_json, - path_or_buf=path_or_buf, - orient=orient, - dtype=dtype, - encoding=encoding, - lines=lines, - engine=engine, - **kwargs, - ) - - -read_json.__doc__ = inspect.getdoc(bigframes.session.Session.read_json) - - -def read_gbq( - query_or_table: str, - *, - index_col: Iterable[str] | str | bigframes.enums.DefaultIndexKind = (), - columns: Iterable[str] = (), - configuration: Optional[Dict] = None, - max_results: Optional[int] = None, - filters: vendored_pandas_gbq.FiltersType = (), - use_cache: Optional[bool] = None, - col_order: Iterable[str] = (), -) -> bigframes.dataframe.DataFrame: - _set_default_session_location_if_possible(query_or_table) - return global_session.with_default_session( - bigframes.session.Session.read_gbq, - query_or_table, - index_col=index_col, - columns=columns, - configuration=configuration, - max_results=max_results, - filters=filters, - use_cache=use_cache, - col_order=col_order, - ) - - -read_gbq.__doc__ = inspect.getdoc(bigframes.session.Session.read_gbq) - - -def read_gbq_model(model_name: str): - return global_session.with_default_session( - bigframes.session.Session.read_gbq_model, - model_name, - ) - - -read_gbq_model.__doc__ = inspect.getdoc(bigframes.session.Session.read_gbq_model) - - -def read_gbq_query( - query: str, - *, - index_col: Iterable[str] | str | bigframes.enums.DefaultIndexKind = (), - columns: Iterable[str] = (), - configuration: Optional[Dict] = None, - max_results: Optional[int] = None, - use_cache: Optional[bool] = None, - col_order: Iterable[str] = (), - filters: vendored_pandas_gbq.FiltersType = (), -) -> bigframes.dataframe.DataFrame: - _set_default_session_location_if_possible(query) - return global_session.with_default_session( - bigframes.session.Session.read_gbq_query, - query, - index_col=index_col, - columns=columns, - configuration=configuration, - max_results=max_results, - use_cache=use_cache, - col_order=col_order, - filters=filters, - ) - - -read_gbq_query.__doc__ = inspect.getdoc(bigframes.session.Session.read_gbq_query) - - -def read_gbq_table( - query: str, - *, - index_col: Iterable[str] | str | bigframes.enums.DefaultIndexKind = (), - columns: Iterable[str] = (), - max_results: Optional[int] = None, - filters: vendored_pandas_gbq.FiltersType = (), - use_cache: bool = True, - col_order: Iterable[str] = (), -) -> bigframes.dataframe.DataFrame: - _set_default_session_location_if_possible(query) - return global_session.with_default_session( - bigframes.session.Session.read_gbq_table, - query, - index_col=index_col, - columns=columns, - max_results=max_results, - filters=filters, - use_cache=use_cache, - col_order=col_order, - ) - - -read_gbq_table.__doc__ = inspect.getdoc(bigframes.session.Session.read_gbq_table) - - -@typing.overload -def read_pandas(pandas_dataframe: pandas.DataFrame) -> bigframes.dataframe.DataFrame: - ... - - -@typing.overload -def read_pandas(pandas_dataframe: pandas.Series) -> bigframes.series.Series: - ... - - -@typing.overload -def read_pandas(pandas_dataframe: pandas.Index) -> bigframes.core.indexes.Index: - ... - - -def read_pandas(pandas_dataframe: Union[pandas.DataFrame, pandas.Series, pandas.Index]): - return global_session.with_default_session( - bigframes.session.Session.read_pandas, - pandas_dataframe, - ) - - -read_pandas.__doc__ = inspect.getdoc(bigframes.session.Session.read_pandas) - - -def read_pickle( - filepath_or_buffer: FilePath | ReadPickleBuffer, - compression: CompressionOptions = "infer", - storage_options: StorageOptions = None, -): - return global_session.with_default_session( - bigframes.session.Session.read_pickle, - filepath_or_buffer=filepath_or_buffer, - compression=compression, - storage_options=storage_options, - ) - - -read_pickle.__doc__ = inspect.getdoc(bigframes.session.Session.read_pickle) - - -def read_parquet( - path: str | IO["bytes"], *, engine: str = "auto" -) -> bigframes.dataframe.DataFrame: - return global_session.with_default_session( - bigframes.session.Session.read_parquet, - path, - engine=engine, - ) - - -read_parquet.__doc__ = inspect.getdoc(bigframes.session.Session.read_parquet) - - def remote_function( input_types: Union[None, type, Sequence[type]] = None, output_type: Optional[type] = None, @@ -697,17 +407,6 @@ def remote_function( remote_function.__doc__ = inspect.getdoc(bigframes.session.Session.remote_function) -def read_gbq_function(function_name: str, is_row_processor: bool = False): - return global_session.with_default_session( - bigframes.session.Session.read_gbq_function, - function_name=function_name, - is_row_processor=is_row_processor, - ) - - -read_gbq_function.__doc__ = inspect.getdoc(bigframes.session.Session.read_gbq_function) - - @typing.overload def to_datetime( arg: Union[ @@ -901,7 +600,11 @@ def reset_session(): "read_gbq", "read_gbq_function", "read_gbq_model", + "read_gbq_query", + "read_gbq_table", + "read_json", "read_pandas", + "read_parquet", "read_pickle", "remote_function", "to_datetime", diff --git a/bigframes/pandas/readers.py b/bigframes/pandas/readers.py new file mode 100644 index 0000000000..4e08b3ef5e --- /dev/null +++ b/bigframes/pandas/readers.py @@ -0,0 +1,347 @@ +# Copyright 2024 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import annotations + +import inspect +import typing +from typing import ( + Any, + Callable, + Dict, + IO, + Iterable, + Literal, + MutableSequence, + Optional, + Sequence, + Tuple, + Union, +) + +import bigframes_vendored.pandas.io.gbq as vendored_pandas_gbq +from google.cloud import bigquery +import numpy +import pandas +from pandas._typing import ( + CompressionOptions, + FilePath, + ReadPickleBuffer, + StorageOptions, +) + +import bigframes._config as config +import bigframes.core.blocks +import bigframes.core.global_session as global_session +import bigframes.core.indexes +import bigframes.core.joins +import bigframes.core.reshape +import bigframes.core.tools +import bigframes.dataframe +import bigframes.enums +import bigframes.series +import bigframes.session +import bigframes.session._io.bigquery +import bigframes.session.clients +import bigframes.version + +# Note: the following methods are duplicated from Session. This duplication +# enables the following: +# +# 1. Static type checking knows the argument and return types, which is +# difficult to do with decorators. Aside: When we require Python 3.10, we +# can use Concatenate for generic typing in decorators. See: +# https://stackoverflow.com/a/68290080/101923 +# 2. docstrings get processed by static processing tools, such as VS Code's +# autocomplete. +# 3. Positional arguments function as expected. If we were to pull in the +# methods directly from Session, a Session object would need to be the first +# argument, even if we allow a default value. +# 4. Allows to set BigQuery options for the BigFrames session based on the +# method and its arguments. + + +def read_csv( + filepath_or_buffer: str | IO["bytes"], + *, + sep: Optional[str] = ",", + header: Optional[int] = 0, + names: Optional[ + Union[MutableSequence[Any], numpy.ndarray[Any, Any], Tuple[Any, ...], range] + ] = None, + index_col: Optional[ + Union[ + int, + str, + Sequence[Union[str, int]], + bigframes.enums.DefaultIndexKind, + Literal[False], + ] + ] = None, + usecols: Optional[ + Union[ + MutableSequence[str], + Tuple[str, ...], + Sequence[int], + pandas.Series, + pandas.Index, + numpy.ndarray[Any, Any], + Callable[[Any], bool], + ] + ] = None, + dtype: Optional[Dict] = None, + engine: Optional[ + Literal["c", "python", "pyarrow", "python-fwf", "bigquery"] + ] = None, + encoding: Optional[str] = None, + **kwargs, +) -> bigframes.dataframe.DataFrame: + return global_session.with_default_session( + bigframes.session.Session.read_csv, + filepath_or_buffer=filepath_or_buffer, + sep=sep, + header=header, + names=names, + index_col=index_col, + usecols=usecols, + dtype=dtype, + engine=engine, + encoding=encoding, + **kwargs, + ) + + +read_csv.__doc__ = inspect.getdoc(bigframes.session.Session.read_csv) + + +def read_json( + path_or_buf: str | IO["bytes"], + *, + orient: Literal[ + "split", "records", "index", "columns", "values", "table" + ] = "columns", + dtype: Optional[Dict] = None, + encoding: Optional[str] = None, + lines: bool = False, + engine: Literal["ujson", "pyarrow", "bigquery"] = "ujson", + **kwargs, +) -> bigframes.dataframe.DataFrame: + return global_session.with_default_session( + bigframes.session.Session.read_json, + path_or_buf=path_or_buf, + orient=orient, + dtype=dtype, + encoding=encoding, + lines=lines, + engine=engine, + **kwargs, + ) + + +read_json.__doc__ = inspect.getdoc(bigframes.session.Session.read_json) + + +def read_gbq( + query_or_table: str, + *, + index_col: Iterable[str] | str | bigframes.enums.DefaultIndexKind = (), + columns: Iterable[str] = (), + configuration: Optional[Dict] = None, + max_results: Optional[int] = None, + filters: vendored_pandas_gbq.FiltersType = (), + use_cache: Optional[bool] = None, + col_order: Iterable[str] = (), +) -> bigframes.dataframe.DataFrame: + _set_default_session_location_if_possible(query_or_table) + return global_session.with_default_session( + bigframes.session.Session.read_gbq, + query_or_table, + index_col=index_col, + columns=columns, + configuration=configuration, + max_results=max_results, + filters=filters, + use_cache=use_cache, + col_order=col_order, + ) + + +read_gbq.__doc__ = inspect.getdoc(bigframes.session.Session.read_gbq) + + +def read_gbq_model(model_name: str): + return global_session.with_default_session( + bigframes.session.Session.read_gbq_model, + model_name, + ) + + +read_gbq_model.__doc__ = inspect.getdoc(bigframes.session.Session.read_gbq_model) + + +def read_gbq_query( + query: str, + *, + index_col: Iterable[str] | str | bigframes.enums.DefaultIndexKind = (), + columns: Iterable[str] = (), + configuration: Optional[Dict] = None, + max_results: Optional[int] = None, + use_cache: Optional[bool] = None, + col_order: Iterable[str] = (), + filters: vendored_pandas_gbq.FiltersType = (), +) -> bigframes.dataframe.DataFrame: + _set_default_session_location_if_possible(query) + return global_session.with_default_session( + bigframes.session.Session.read_gbq_query, + query, + index_col=index_col, + columns=columns, + configuration=configuration, + max_results=max_results, + use_cache=use_cache, + col_order=col_order, + filters=filters, + ) + + +read_gbq_query.__doc__ = inspect.getdoc(bigframes.session.Session.read_gbq_query) + + +def read_gbq_table( + query: str, + *, + index_col: Iterable[str] | str | bigframes.enums.DefaultIndexKind = (), + columns: Iterable[str] = (), + max_results: Optional[int] = None, + filters: vendored_pandas_gbq.FiltersType = (), + use_cache: bool = True, + col_order: Iterable[str] = (), +) -> bigframes.dataframe.DataFrame: + _set_default_session_location_if_possible(query) + return global_session.with_default_session( + bigframes.session.Session.read_gbq_table, + query, + index_col=index_col, + columns=columns, + max_results=max_results, + filters=filters, + use_cache=use_cache, + col_order=col_order, + ) + + +read_gbq_table.__doc__ = inspect.getdoc(bigframes.session.Session.read_gbq_table) + + +@typing.overload +def read_pandas(pandas_dataframe: pandas.DataFrame) -> bigframes.dataframe.DataFrame: + ... + + +@typing.overload +def read_pandas(pandas_dataframe: pandas.Series) -> bigframes.series.Series: + ... + + +@typing.overload +def read_pandas(pandas_dataframe: pandas.Index) -> bigframes.core.indexes.Index: + ... + + +def read_pandas(pandas_dataframe: Union[pandas.DataFrame, pandas.Series, pandas.Index]): + return global_session.with_default_session( + bigframes.session.Session.read_pandas, + pandas_dataframe, + ) + + +read_pandas.__doc__ = inspect.getdoc(bigframes.session.Session.read_pandas) + + +def read_pickle( + filepath_or_buffer: FilePath | ReadPickleBuffer, + compression: CompressionOptions = "infer", + storage_options: StorageOptions = None, +): + return global_session.with_default_session( + bigframes.session.Session.read_pickle, + filepath_or_buffer=filepath_or_buffer, + compression=compression, + storage_options=storage_options, + ) + + +read_pickle.__doc__ = inspect.getdoc(bigframes.session.Session.read_pickle) + + +def read_parquet( + path: str | IO["bytes"], *, engine: str = "auto" +) -> bigframes.dataframe.DataFrame: + return global_session.with_default_session( + bigframes.session.Session.read_parquet, + path, + engine=engine, + ) + + +read_parquet.__doc__ = inspect.getdoc(bigframes.session.Session.read_parquet) + + +def read_gbq_function(function_name: str, is_row_processor: bool = False): + return global_session.with_default_session( + bigframes.session.Session.read_gbq_function, + function_name=function_name, + is_row_processor=is_row_processor, + ) + + +read_gbq_function.__doc__ = inspect.getdoc(bigframes.session.Session.read_gbq_function) + + +def _set_default_session_location_if_possible(query): + # Set the location as per the query if this is the first query the user is + # running and: + # (1) Default session has not started yet, and + # (2) Location is not set yet, and + # (3) Use of regional endpoints is not set. + # If query is a table name, then it would be the location of the table. + # If query is a SQL with a table, then it would be table's location. + # If query is a SQL with no table, then it would be the BQ default location. + if ( + config.options.bigquery._session_started + or config.options.bigquery.location + or config.options.bigquery.use_regional_endpoints + ): + return + + clients_provider = bigframes.session.clients.ClientsProvider( + project=config.options.bigquery.project, + location=config.options.bigquery.location, + use_regional_endpoints=config.options.bigquery.use_regional_endpoints, + credentials=config.options.bigquery.credentials, + application_name=config.options.bigquery.application_name, + bq_kms_key_name=config.options.bigquery.kms_key_name, + ) + + bqclient = clients_provider.bqclient + + if bigframes.session._io.bigquery.is_query(query): + # Intentionally run outside of the session so that we can detect the + # location before creating the session. Since it's a dry_run, labels + # aren't necessary. + job = bqclient.query(query, bigquery.QueryJobConfig(dry_run=True)) + config.options.bigquery.location = job.location + else: + table = bqclient.get_table(query) + config.options.bigquery.location = table.location From fa385644ccdb810eef9d8301056c029551d20612 Mon Sep 17 00:00:00 2001 From: Shenyang Cai Date: Thu, 26 Sep 2024 19:17:55 +0000 Subject: [PATCH 2/9] fix type in __all__ --- bigframes/pandas/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bigframes/pandas/__init__.py b/bigframes/pandas/__init__.py index 89bf6c8047..68eb0deddb 100644 --- a/bigframes/pandas/__init__.py +++ b/bigframes/pandas/__init__.py @@ -592,7 +592,7 @@ def reset_session(): pass # Use __all__ to let type checkers know what is part of the public API. -__all___ = [ +__all__ = [ # Functions "concat", "merge", From ed79ff2a73b2506ce957f658c7e5986da93ae294 Mon Sep 17 00:00:00 2001 From: Shenyang Cai Date: Thu, 26 Sep 2024 20:10:39 +0000 Subject: [PATCH 3/9] move read functions under bigframes.pandas.io.api to reflect the structure of pandas --- bigframes/pandas/__init__.py | 2 +- bigframes/pandas/io/__init__.py | 14 ++++++++++++++ bigframes/pandas/{readers.py => io/api.py} | 0 3 files changed, 15 insertions(+), 1 deletion(-) create mode 100644 bigframes/pandas/io/__init__.py rename bigframes/pandas/{readers.py => io/api.py} (100%) diff --git a/bigframes/pandas/__init__.py b/bigframes/pandas/__init__.py index 68eb0deddb..eaf80bb444 100644 --- a/bigframes/pandas/__init__.py +++ b/bigframes/pandas/__init__.py @@ -43,7 +43,7 @@ import bigframes.enums import bigframes.functions._utils as functions_utils import bigframes.operations as ops -from bigframes.pandas.readers import ( +from bigframes.pandas.io.api import ( read_csv, read_gbq, read_gbq_function, diff --git a/bigframes/pandas/io/__init__.py b/bigframes/pandas/io/__init__.py new file mode 100644 index 0000000000..c38dc3b14a --- /dev/null +++ b/bigframes/pandas/io/__init__.py @@ -0,0 +1,14 @@ +# Copyright 2024 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + diff --git a/bigframes/pandas/readers.py b/bigframes/pandas/io/api.py similarity index 100% rename from bigframes/pandas/readers.py rename to bigframes/pandas/io/api.py From 18e52cc961c2f6b40c0a1a6b0c2b5208f7b0e8a5 Mon Sep 17 00:00:00 2001 From: Shenyang Cai Date: Thu, 26 Sep 2024 20:10:39 +0000 Subject: [PATCH 4/9] move read functions under bigframes.pandas.io.api to reflect the structure of pandas --- bigframes/pandas/__init__.py | 2 +- bigframes/pandas/io/__init__.py | 13 +++++++++++++ bigframes/pandas/{readers.py => io/api.py} | 0 3 files changed, 14 insertions(+), 1 deletion(-) create mode 100644 bigframes/pandas/io/__init__.py rename bigframes/pandas/{readers.py => io/api.py} (100%) diff --git a/bigframes/pandas/__init__.py b/bigframes/pandas/__init__.py index 68eb0deddb..eaf80bb444 100644 --- a/bigframes/pandas/__init__.py +++ b/bigframes/pandas/__init__.py @@ -43,7 +43,7 @@ import bigframes.enums import bigframes.functions._utils as functions_utils import bigframes.operations as ops -from bigframes.pandas.readers import ( +from bigframes.pandas.io.api import ( read_csv, read_gbq, read_gbq_function, diff --git a/bigframes/pandas/io/__init__.py b/bigframes/pandas/io/__init__.py new file mode 100644 index 0000000000..6d5e14bcf4 --- /dev/null +++ b/bigframes/pandas/io/__init__.py @@ -0,0 +1,13 @@ +# Copyright 2024 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. diff --git a/bigframes/pandas/readers.py b/bigframes/pandas/io/api.py similarity index 100% rename from bigframes/pandas/readers.py rename to bigframes/pandas/io/api.py From 10c6533ea0a1ff580739a98312eeed1ec259572a Mon Sep 17 00:00:00 2001 From: Owl Bot Date: Thu, 26 Sep 2024 20:13:10 +0000 Subject: [PATCH 5/9] =?UTF-8?q?=F0=9F=A6=89=20Updates=20from=20OwlBot=20po?= =?UTF-8?q?st-processor?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit See https://github.com/googleapis/repo-automation-bots/blob/main/packages/owl-bot/README.md --- bigframes/pandas/io/__init__.py | 1 - 1 file changed, 1 deletion(-) diff --git a/bigframes/pandas/io/__init__.py b/bigframes/pandas/io/__init__.py index c38dc3b14a..6d5e14bcf4 100644 --- a/bigframes/pandas/io/__init__.py +++ b/bigframes/pandas/io/__init__.py @@ -11,4 +11,3 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. - From 288eee413b0ce549efebc10b83e32e97bdd657c7 Mon Sep 17 00:00:00 2001 From: Shenyang Cai Date: Thu, 26 Sep 2024 21:07:11 +0000 Subject: [PATCH 6/9] update function import --- bigframes/streaming/__init__.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/bigframes/streaming/__init__.py b/bigframes/streaming/__init__.py index 66f345f0ab..d439d622a2 100644 --- a/bigframes/streaming/__init__.py +++ b/bigframes/streaming/__init__.py @@ -15,13 +15,13 @@ import inspect import bigframes.core.global_session as global_session -import bigframes.pandas as bpd +from bigframes.pandas.io.api import _set_default_session_location_if_possible import bigframes.session import bigframes.streaming.dataframe as streaming_dataframe def read_gbq_table(table: str) -> streaming_dataframe.StreamingDataFrame: - bpd._set_default_session_location_if_possible(table) + _set_default_session_location_if_possible(table) return global_session.with_default_session( bigframes.session.Session.read_gbq_table_streaming, table ) From 0016b7e75fa63b37cc4ae91a7ea2408a34d9199d Mon Sep 17 00:00:00 2001 From: Shenyang Cai Date: Thu, 26 Sep 2024 22:49:29 +0000 Subject: [PATCH 7/9] fix missing comma --- bigframes/pandas/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bigframes/pandas/__init__.py b/bigframes/pandas/__init__.py index eaf80bb444..98da6d826c 100644 --- a/bigframes/pandas/__init__.py +++ b/bigframes/pandas/__init__.py @@ -614,7 +614,7 @@ def reset_session(): "Float64Dtype", "Int64Dtype", "StringDtype", - "ArrowDtype" + "ArrowDtype", # Class aliases "DataFrame", "Index", From 1b3fca6bcd84cc8d9b8b102494987dc17d670394 Mon Sep 17 00:00:00 2001 From: Shenyang Cai Date: Thu, 26 Sep 2024 22:55:04 +0000 Subject: [PATCH 8/9] try to fix dup doc by directly import types from pandas --- bigframes/pandas/__init__.py | 9 +-------- 1 file changed, 1 insertion(+), 8 deletions(-) diff --git a/bigframes/pandas/__init__.py b/bigframes/pandas/__init__.py index 98da6d826c..dabb56a33a 100644 --- a/bigframes/pandas/__init__.py +++ b/bigframes/pandas/__init__.py @@ -30,6 +30,7 @@ import bigframes_vendored.pandas.core.reshape.tile as vendored_pandas_tile import bigframes_vendored.pandas.core.tools.datetimes as vendored_pandas_datetimes import pandas +from pandas import ArrowDtype, BooleanDtype, Float64Dtype, Int64Dtype, NA, StringDtype import bigframes._config as config import bigframes.core.blocks @@ -527,14 +528,6 @@ def clean_up_by_session_id( ) -# pandas dtype attributes -NA = pandas.NA -BooleanDtype = pandas.BooleanDtype -Float64Dtype = pandas.Float64Dtype -Int64Dtype = pandas.Int64Dtype -StringDtype = pandas.StringDtype -ArrowDtype = pandas.ArrowDtype - # Class aliases # TODO(swast): Make these real classes so we can refer to these in type # checking and docstrings. From 91865c9371759e03cd1f1fd5e3b4ef754e54eae9 Mon Sep 17 00:00:00 2001 From: Shenyang Cai Date: Thu, 26 Sep 2024 23:43:03 +0000 Subject: [PATCH 9/9] fix doc generation --- bigframes/pandas/__init__.py | 9 ++++++++- docs/reference/bigframes.pandas/general_functions.rst | 1 + 2 files changed, 9 insertions(+), 1 deletion(-) diff --git a/bigframes/pandas/__init__.py b/bigframes/pandas/__init__.py index dabb56a33a..98da6d826c 100644 --- a/bigframes/pandas/__init__.py +++ b/bigframes/pandas/__init__.py @@ -30,7 +30,6 @@ import bigframes_vendored.pandas.core.reshape.tile as vendored_pandas_tile import bigframes_vendored.pandas.core.tools.datetimes as vendored_pandas_datetimes import pandas -from pandas import ArrowDtype, BooleanDtype, Float64Dtype, Int64Dtype, NA, StringDtype import bigframes._config as config import bigframes.core.blocks @@ -528,6 +527,14 @@ def clean_up_by_session_id( ) +# pandas dtype attributes +NA = pandas.NA +BooleanDtype = pandas.BooleanDtype +Float64Dtype = pandas.Float64Dtype +Int64Dtype = pandas.Int64Dtype +StringDtype = pandas.StringDtype +ArrowDtype = pandas.ArrowDtype + # Class aliases # TODO(swast): Make these real classes so we can refer to these in type # checking and docstrings. diff --git a/docs/reference/bigframes.pandas/general_functions.rst b/docs/reference/bigframes.pandas/general_functions.rst index 4fff9aabf8..fff1a9ef59 100644 --- a/docs/reference/bigframes.pandas/general_functions.rst +++ b/docs/reference/bigframes.pandas/general_functions.rst @@ -6,3 +6,4 @@ General functions .. automodule:: bigframes.pandas :members: :undoc-members: + :noindex: