googleapis · tswast · Oct 22, 2025 · Oct 21, 2025 · Oct 21, 2025
@@ -967,7 +967,7 @@ def _compute_dry_run(
        }

        dry_run_stats = dry_runs.get_query_stats_with_dtypes(
-            query_job, column_dtypes, self.index.dtypes
+            query_job, column_dtypes, self.index.dtypes, self.expr.node
        )
        return dry_run_stats, query_job


@@ -20,6 +20,7 @@
 import pandas

 from bigframes import dtypes
+from bigframes.core import bigframe_node, nodes


 def get_table_stats(table: bigquery.Table) -> pandas.Series:
@@ -86,13 +87,26 @@ def get_query_stats_with_dtypes(
    query_job: bigquery.QueryJob,
    column_dtypes: Dict[str, dtypes.Dtype],
    index_dtypes: Sequence[dtypes.Dtype],
+    expr_root: bigframe_node.BigFrameNode | None = None,
 ) -> pandas.Series:
+    """
+    Returns important stats from the query job as a Pandas Series. The dtypes information is added too.
+
+    Args:
+        expr_root (Optional):
+            The root of the expression tree that may contain local data, whose size is added to the
+            total bytes count if available.
+
+    """
    index = ["columnCount", "columnDtypes", "indexLevel", "indexDtypes"]
    values = [len(column_dtypes), column_dtypes, len(index_dtypes), index_dtypes]

    s = pandas.Series(values, index=index)

-    return pandas.concat([s, get_query_stats(query_job)])
+    result = pandas.concat([s, get_query_stats(query_job)])
+    if expr_root is not None:
+        result["totalBytesProcessed"] += get_local_bytes(expr_root)
+    return result


 def get_query_stats(
@@ -145,4 +159,24 @@ def get_query_stats(
        else None
    )

-    return pandas.Series(values, index=index)
+    result = pandas.Series(values, index=index)
+    if result["totalBytesProcessed"] is None:
+        result["totalBytesProcessed"] = 0
+    else:
+        result["totalBytesProcessed"] = int(result["totalBytesProcessed"])
+
+    return result
+
+
+def get_local_bytes(root: bigframe_node.BigFrameNode) -> int:
+    def get_total_bytes(
+        root: bigframe_node.BigFrameNode, child_results: tuple[int, ...]
+    ) -> int:
+        child_bytes = sum(child_results)
+
+        if isinstance(root, nodes.ReadLocalNode):
+            return child_bytes + root.local_data_source.data.get_total_buffer_size()
+
+        return child_bytes
+
+    return root.reduce_up(get_total_bytes)
@@ -2173,6 +2173,22 @@ def test_read_gbq_query_dry_run(scalars_table_id, session):
    _assert_query_dry_run_stats_are_valid(result)


+def test_block_dry_run_includes_local_data(session):
+    df1 = bigframes.dataframe.DataFrame({"col_1": [1, 2, 3]}, session=session)
+    df2 = bigframes.dataframe.DataFrame({"col_2": [1, 2, 3]}, session=session)
+
+    result = df1.merge(df2, how="cross").to_pandas(dry_run=True)
+
+    assert isinstance(result, pd.Series)
+    _assert_query_dry_run_stats_are_valid(result)
+    assert result["totalBytesProcessed"] > 0
+    assert (
+        df1.to_pandas(dry_run=True)["totalBytesProcessed"]
+        + df2.to_pandas(dry_run=True)["totalBytesProcessed"]
+        == result["totalBytesProcessed"]
+    )
+
+
 def _assert_query_dry_run_stats_are_valid(result: pd.Series):
    expected_index = pd.Index(
        [