From 4a8e7f3805756b790e4385363d51ba8b0f2bebf1 Mon Sep 17 00:00:00 2001 From: Huan Chen Date: Thu, 29 Aug 2024 22:56:36 +0000 Subject: [PATCH 1/2] docs:update struct examples. --- .../pandas/core/arrays/arrow/accessors.py | 44 +++++++++---------- 1 file changed, 22 insertions(+), 22 deletions(-) diff --git a/third_party/bigframes_vendored/pandas/core/arrays/arrow/accessors.py b/third_party/bigframes_vendored/pandas/core/arrays/arrow/accessors.py index 4cba928bb6..bdf5276843 100644 --- a/third_party/bigframes_vendored/pandas/core/arrays/arrow/accessors.py +++ b/third_party/bigframes_vendored/pandas/core/arrays/arrow/accessors.py @@ -22,12 +22,12 @@ def field(self, name_or_index: str | int): >>> bpd.options.display.progress_bar = None >>> s = bpd.Series( ... [ - ... {"version": 1, "project": "pandas"}, - ... {"version": 2, "project": "pandas"}, - ... {"version": 1, "project": "numpy"}, + ... {"project": "pandas", "version": 1}, + ... {"project": "pandas", "version": 2}, + ... {"project": "numpy", "version": 1}, ... ], ... dtype=bpd.ArrowDtype(pa.struct( - ... [("version", pa.int64()), ("project", pa.string())] + ... [("project", pa.string()), ("version", pa.int64())] ... )) ... ) @@ -41,7 +41,7 @@ def field(self, name_or_index: str | int): Extract by field index. - >>> s.struct.field(0) + >>> s.struct.field(1) 0 1 1 2 2 1 @@ -68,22 +68,22 @@ def explode(self): >>> bpd.options.display.progress_bar = None >>> s = bpd.Series( ... [ - ... {"version": 1, "project": "pandas"}, - ... {"version": 2, "project": "pandas"}, - ... {"version": 1, "project": "numpy"}, + ... {"project": "pandas", "version": 1}, + ... {"project": "pandas", "version": 2}, + ... {"project": "numpy", "version": 1}, ... ], ... dtype=bpd.ArrowDtype(pa.struct( - ... [("version", pa.int64()), ("project", pa.string())] + ... [("project", pa.string()), ("version", pa.int64())] ... )) ... ) Extract all child fields. >>> s.struct.explode() - version project - 0 1 pandas - 1 2 pandas - 2 1 numpy + project version + 0 pandas 1 + 1 pandas 2 + 2 numpy 1 [3 rows x 2 columns] @@ -113,8 +113,8 @@ def dtypes(self): ... )) ... ) >>> s.struct.dtypes() - version Int64 project string[pyarrow] + version Int64 dtype: object Returns: @@ -140,21 +140,21 @@ def explode(self, column, *, separator: str = "."): >>> countries = bpd.Series(["cn", "es", "us"]) >>> files = bpd.Series( ... [ - ... {"version": 1, "project": "pandas"}, - ... {"version": 2, "project": "pandas"}, - ... {"version": 1, "project": "numpy"}, + ... {"project": "pandas", "version": 1}, + ... {"project": "pandas", "version": 2}, + ... {"project": "numpy", "version": 1}, ... ], ... dtype=bpd.ArrowDtype(pa.struct( - ... [("version", pa.int64()), ("project", pa.string())] + ... [("project", pa.string()), ("version", pa.int64())] ... )) ... ) >>> downloads = bpd.Series([100, 200, 300]) >>> df = bpd.DataFrame({"country": countries, "file": files, "download_count": downloads}) >>> df.struct.explode("file") - country file.version file.project download_count - 0 cn 1 pandas 100 - 1 es 2 pandas 200 - 2 us 1 numpy 300 + country file.project file.version download_count + 0 cn pandas 1 100 + 1 es pandas 2 200 + 2 us numpy 1 300 [3 rows x 4 columns] From d23a7021a7a192ca9ccd9c0e4c1d21a9e7a26c24 Mon Sep 17 00:00:00 2001 From: Huan Chen Date: Wed, 4 Sep 2024 14:47:46 +0000 Subject: [PATCH 2/2] format fix --- .../bigframes_vendored/pandas/core/arrays/arrow/accessors.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/third_party/bigframes_vendored/pandas/core/arrays/arrow/accessors.py b/third_party/bigframes_vendored/pandas/core/arrays/arrow/accessors.py index bdf5276843..ab199d53bd 100644 --- a/third_party/bigframes_vendored/pandas/core/arrays/arrow/accessors.py +++ b/third_party/bigframes_vendored/pandas/core/arrays/arrow/accessors.py @@ -80,9 +80,9 @@ def explode(self): Extract all child fields. >>> s.struct.explode() - project version + project version 0 pandas 1 - 1 pandas 2 + 1 pandas 2 2 numpy 1 [3 rows x 2 columns]