diff --git a/bigframes/core/__init__.py b/bigframes/core/__init__.py index 63f36d4ddd..b640692bc8 100644 --- a/bigframes/core/__init__.py +++ b/bigframes/core/__init__.py @@ -165,7 +165,7 @@ def cached(self, cluster_cols: typing.Sequence[str]) -> ArrayValue: ibis_expr = compiled_value._to_ibis_expr( ordering_mode="unordered", expose_hidden_cols=True ) - tmp_table = self.session._ibis_to_session_table( + tmp_table = self.session._ibis_to_temp_table( ibis_expr, cluster_cols=cluster_cols, api_name="cached" ) diff --git a/bigframes/session/__init__.py b/bigframes/session/__init__.py index bd5845631b..9b881de9a0 100644 --- a/bigframes/session/__init__.py +++ b/bigframes/session/__init__.py @@ -36,7 +36,6 @@ Tuple, Union, ) -import uuid import warnings import google.api_core.client_info @@ -836,11 +835,20 @@ def _read_bigquery_load_job( ) self._start_generic_job(load_job) + table_id = f"{table.project}.{table.dataset_id}.{table.table_id}" + + # Update the table expiration so we aren't limited to the default 24 + # hours of the anonymous dataset. + table_expiration = bigquery.Table(table_id) + table_expiration.expires = ( + datetime.datetime.now(datetime.timezone.utc) + constants.DEFAULT_EXPIRATION + ) + self.bqclient.update_table(table_expiration, ["expires"]) # The BigQuery REST API for tables.get doesn't take a session ID, so we # can't get the schema for a temp table that way. return self.read_gbq_table( - f"{table.project}.{table.dataset_id}.{table.table_id}", + table_id, index_col=index_col, col_order=col_order, ) @@ -977,7 +985,7 @@ def _read_pandas( job_config.clustering_fields = cluster_cols job_config.labels = {"bigframes-api": api_name} - load_table_destination = self._create_session_table() + load_table_destination = bigframes_io.random_table(self._anonymous_dataset) load_job = self.bqclient.load_table_from_dataframe( pandas_dataframe_copy, load_table_destination, @@ -990,8 +998,9 @@ def _read_pandas( total_ordering_columns=frozenset([ordering_col]), integer_encoding=IntegerEncoding(True, is_sequential=True), ) - table_expression = self.ibis_client.sql( - f"SELECT * FROM `{load_table_destination.table_id}`" + table_expression = self.ibis_client.table( + load_table_destination.table_id, + database=f"{load_table_destination.project}.{load_table_destination.dataset_id}", ) # b/297590178 Potentially a bug in bqclient.load_table_from_dataframe(), that only when the DF is empty, the index columns disappear in table_expression. @@ -1269,13 +1278,6 @@ def _check_file_size(self, filepath: str): "for large files to avoid loading the file into local memory." ) - def _create_session_table(self) -> bigquery.TableReference: - table_name = f"{uuid.uuid4().hex}" - dataset = bigquery.Dataset( - bigquery.DatasetReference(self.bqclient.project, "_SESSION") - ) - return dataset.table(table_name) - def _create_empty_temp_table( self, schema: Iterable[bigquery.SchemaField], @@ -1310,7 +1312,7 @@ def _create_sequential_ordering( ibis.row_number().cast(ibis_dtypes.int64).name(default_ordering_name) ) table = table.mutate(**{default_ordering_name: default_ordering_col}) - table_ref = self._ibis_to_session_table( + table_ref = self._ibis_to_temp_table( table, cluster_cols=list(index_cols) + [default_ordering_name], api_name=api_name, @@ -1326,7 +1328,7 @@ def _create_sequential_ordering( ) return table, ordering - def _ibis_to_session_table( + def _ibis_to_temp_table( self, table: ibis_types.Table, cluster_cols: Iterable[str],