googleapis · gcf-merge-on-green · Oct 27, 2023 · Oct 26, 2023 · Oct 26, 2023 · Oct 26, 2023
@@ -1110,19 +1110,18 @@ def _assign_single_item(
            # local_df is likely (but not guarunteed) to be cached locally
            # since the original list came from memory and so is probably < MAX_INLINE_DF_SIZE

-            this_offsets_col_id = bigframes.core.guid.generate_guid()
-            this_expr = self._get_block()._expr.promote_offsets(this_offsets_col_id)
-            block = blocks.Block(
-                expr=this_expr,
-                index_labels=self.index.names,
-                index_columns=self._block.index_columns,
-                column_labels=[this_offsets_col_id] + list(self._block.value_columns),
-            )  # offsets are temporarily the first value column, label set to id
-            this_df_with_offsets = DataFrame(data=block)
-            join_result = this_df_with_offsets.join(
-                other=local_df, on=this_offsets_col_id, how="left"
+            new_column_block = local_df._block
+            original_index_column_ids = self._block.index_columns
+            self_block = self._block.reset_index(drop=False)
+            result_index, (get_column_left, get_column_right) = self_block.index.join(
+                new_column_block.index, how="left", block_identity_join=True
            )
-            return join_result.drop(columns=[this_offsets_col_id])
+            result_block = result_index._block
+            result_block = result_block.set_index(
+                [get_column_left[col_id] for col_id in original_index_column_ids],
+                index_labels=self._block.index_labels,
+            )
+            return DataFrame(result_block)
        else:
            return self._assign_scalar(k, v)


@@ -505,14 +505,32 @@ def test_assign_new_column_w_setitem_list(scalars_dfs):
    pd.testing.assert_frame_equal(bf_result, pd_result)


+def test_assign_new_column_w_setitem_list_repeated(scalars_dfs):
+    scalars_df, scalars_pandas_df = scalars_dfs
+    bf_df = scalars_df.copy()
+    pd_df = scalars_pandas_df.copy()
+    bf_df["new_col"] = [9, 8, 7, 6, 5, 4, 3, 2, 1]
+    pd_df["new_col"] = [9, 8, 7, 6, 5, 4, 3, 2, 1]
+    bf_df["new_col_2"] = [1, 3, 2, 5, 4, 7, 6, 9, 8]
+    pd_df["new_col_2"] = [1, 3, 2, 5, 4, 7, 6, 9, 8]
+    bf_result = bf_df.to_pandas()
+    pd_result = pd_df
+
+    # Convert default pandas dtypes `int64` to match BigQuery DataFrames dtypes.
+    pd_result["new_col"] = pd_result["new_col"].astype("Int64")
+    pd_result["new_col_2"] = pd_result["new_col_2"].astype("Int64")
+
+    pd.testing.assert_frame_equal(bf_result, pd_result)
+
+
 def test_assign_new_column_w_setitem_list_custom_index(scalars_dfs):
    scalars_df, scalars_pandas_df = scalars_dfs
    bf_df = scalars_df.copy()
    pd_df = scalars_pandas_df.copy()

    # set the custom index
-    pd_df = pd_df.set_index("string_col")
-    bf_df = bf_df.set_index("string_col")
+    pd_df = pd_df.set_index(["string_col", "int64_col"])
+    bf_df = bf_df.set_index(["string_col", "int64_col"])

    bf_df["new_col"] = [9, 8, 7, 6, 5, 4, 3, 2, 1]
    pd_df["new_col"] = [9, 8, 7, 6, 5, 4, 3, 2, 1]