diff --git a/bigframes/core/__init__.py b/bigframes/core/__init__.py index 9032993452..e4a60e08e1 100644 --- a/bigframes/core/__init__.py +++ b/bigframes/core/__init__.py @@ -69,7 +69,7 @@ def from_ibis( return cls(node) @classmethod - def from_pandas(cls, pd_df: pandas.DataFrame): + def from_pandas(cls, pd_df: pandas.DataFrame, session: bigframes.Session): iobytes = io.BytesIO() # Use alphanumeric identifiers, to avoid downstream problems with escaping. as_ids = [ @@ -78,7 +78,7 @@ def from_pandas(cls, pd_df: pandas.DataFrame): ] unique_ids = tuple(bigframes.core.utils.disambiguate_ids(as_ids)) pd_df.reset_index(drop=True).set_axis(unique_ids, axis=1).to_feather(iobytes) - node = nodes.ReadLocalNode(iobytes.getvalue()) + node = nodes.ReadLocalNode(feather_bytes=iobytes.getvalue(), session=session) return cls(node) @property diff --git a/bigframes/core/blocks.py b/bigframes/core/blocks.py index 93dcd1d691..375ce7e7e0 100644 --- a/bigframes/core/blocks.py +++ b/bigframes/core/blocks.py @@ -140,7 +140,7 @@ def __init__( self._stats_cache[" ".join(self.index_columns)] = {} @classmethod - def from_local(cls, data) -> Block: + def from_local(cls, data, session: bigframes.Session) -> Block: pd_data = pd.DataFrame(data) columns = pd_data.columns @@ -162,7 +162,7 @@ def from_local(cls, data) -> Block: ) index_ids = pd_data.columns[: len(index_labels)] - keys_expr = core.ArrayValue.from_pandas(pd_data) + keys_expr = core.ArrayValue.from_pandas(pd_data, session) return cls( keys_expr, column_labels=columns, diff --git a/bigframes/core/nodes.py b/bigframes/core/nodes.py index 1cd3277cbc..9da535e15f 100644 --- a/bigframes/core/nodes.py +++ b/bigframes/core/nodes.py @@ -155,6 +155,7 @@ def __hash__(self): @dataclass(frozen=True) class ReadLocalNode(BigFrameNode): feather_bytes: bytes + session: typing.Optional[bigframes.session.Session] = None def __hash__(self): return self._node_hash diff --git a/bigframes/dataframe.py b/bigframes/dataframe.py index 6ed882987c..5dae7a82f9 100644 --- a/bigframes/dataframe.py +++ b/bigframes/dataframe.py @@ -1646,7 +1646,7 @@ def _reindex_rows( raise NotImplementedError( "Cannot reindex with index with different nlevels" ) - new_indexer = DataFrame(index=index)[[]] + new_indexer = DataFrame(index=index, session=self._session)[[]] # multiindex join is senstive to index names, so we will set all these result = new_indexer.rename_axis(range(new_indexer.index.nlevels)).join( self.rename_axis(range(self.index.nlevels)), diff --git a/bigframes/session/__init__.py b/bigframes/session/__init__.py index e3c392cd2f..5266267a22 100644 --- a/bigframes/session/__init__.py +++ b/bigframes/session/__init__.py @@ -916,7 +916,7 @@ def _read_pandas( def _read_pandas_inline( self, pandas_dataframe: pandas.DataFrame ) -> dataframe.DataFrame: - return dataframe.DataFrame(blocks.Block.from_local(pandas_dataframe)) + return dataframe.DataFrame(blocks.Block.from_local(pandas_dataframe, self)) def _read_pandas_load_job( self, pandas_dataframe: pandas.DataFrame, api_name: str diff --git a/notebooks/location/regionalized.ipynb b/notebooks/location/regionalized.ipynb index a7ff5db84e..86f43b1dd6 100644 --- a/notebooks/location/regionalized.ipynb +++ b/notebooks/location/regionalized.ipynb @@ -2791,7 +2791,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.10.12" + "version": "3.10.9" }, "orig_nbformat": 4 }, diff --git a/tests/system/small/test_dataframe.py b/tests/system/small/test_dataframe.py index 9f4e138b73..61dcd778ef 100644 --- a/tests/system/small/test_dataframe.py +++ b/tests/system/small/test_dataframe.py @@ -93,6 +93,23 @@ def test_df_construct_from_dict(): ) +def test_df_construct_inline_respects_location(): + import bigframes.pandas as bpd + + bpd.close_session() + bpd.options.bigquery.location = "europe-west1" + + df = bpd.DataFrame([[1, 2, 3], [4, 5, 6]]) + repr(df) + + table = bpd.get_global_session().bqclient.get_table(df.query_job.destination) + assert table.location == "europe-west1" + + # Reset global session + bpd.close_session() + bpd.options.bigquery.location = "us" + + def test_get_column(scalars_dfs): scalars_df, scalars_pandas_df = scalars_dfs col_name = "int64_col" diff --git a/tests/system/small/test_session.py b/tests/system/small/test_session.py index 2e2252be06..aba4a52c43 100644 --- a/tests/system/small/test_session.py +++ b/tests/system/small/test_session.py @@ -369,6 +369,17 @@ def test_read_pandas(session, scalars_dfs): pd.testing.assert_frame_equal(result, expected) +def test_read_pandas_inline_respects_location(): + options = bigframes.BigQueryOptions(location="europe-west1") + session = bigframes.Session(options) + + df = session.read_pandas(pd.DataFrame([[1, 2, 3], [4, 5, 6]])) + repr(df) + + table = session.bqclient.get_table(df.query_job.destination) + assert table.location == "europe-west1" + + def test_read_pandas_col_label_w_space(session: bigframes.Session): expected = pd.DataFrame( { diff --git a/tests/unit/core/test_blocks.py b/tests/unit/core/test_blocks.py index 5a4f0951d3..0bb5e0101a 100644 --- a/tests/unit/core/test_blocks.py +++ b/tests/unit/core/test_blocks.py @@ -12,10 +12,13 @@ # See the License for the specific language governing permissions and # limitations under the License. +from unittest import mock + import pandas import pandas.testing import pytest +import bigframes import bigframes.core.blocks as blocks @@ -74,8 +77,12 @@ ) def test_block_from_local(data): expected = pandas.DataFrame(data) + mock_session = mock.create_autospec(spec=bigframes.Session) + + # hard-coded the returned dimension of the session for that each of the test case contains 3 rows. + mock_session._execute.return_value = (iter([[3]]), None) - block = blocks.Block.from_local(data) + block = blocks.Block.from_local(data, mock_session) pandas.testing.assert_index_equal(block.column_labels, expected.columns) assert tuple(block.index.names) == tuple(expected.index.names)