-
-
Notifications
You must be signed in to change notification settings - Fork 2.7k
feat: make plotly-express dataframe agnostic via narwhals #4790
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 8 commits
9873e97
0389591
ba93236
421fc1d
ca5c820
a6aab24
7665f10
ec4f250
7e0d4c2
5543638
cd0dab7
f334b32
e5eb949
7747e30
ac00b36
da80c5b
8a72ba1
aeff203
2041bef
71473f1
9f74c38
28587c9
1bb2448
f45addf
5341759
dfc957c
90f2667
fb58d1b
ddb3b35
37ce302
4da8768
210e01a
c00525e
3486a3e
c0ce093
0eb6951
844a6a9
0c27789
0e6ff78
c2337c9
2cc5d7b
1b27487
23a23be
7968cff
cf76721
91db84b
5c6772e
9ec3f9e
400a624
1aa5163
594ded0
6676061
d7d2884
d6ee676
82c114d
0ceabc1
c9b626e
87841d1
ffa7b3b
3ba19ae
a70146b
1fa9fe4
0103aa6
673d141
b858ed8
bbcf438
49efae2
a36bc24
c119153
7416407
1867f6f
d3a28c0
e6e9994
64b8c70
3f6b383
755aea8
6f18021
4d62e73
a770fd8
7d6f7d6
b8c10ec
490b64a
f7fd4c9
8753acb
1429e6f
878d4db
192e0a8
de6761c
bcfef68
519cc68
e5520a7
b855352
51e2b23
7ef9f28
e9a367d
12fed31
27b2996
f27f959
126a79d
7735366
b6516b4
6f1389f
db22268
b514c01
ce8fb9a
e47827e
9a9283a
2630a5a
fef6dbe
48c7f62
18cc11c
d94cbf7
c320c46
afdb31f
68ab52a
b8ccec4
f102998
2df0427
55a0178
bb327d5
7d611fb
a22a7be
44a52e5
fc74b2e
499e2fa
d2e1008
742b2ec
269dea6
b1dc48d
17fb96f
9f2c55b
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -1421,48 +1421,41 @@ def build_dataframe(args, constructor): | |
# Cast data_frame argument to DataFrame (it could be a numpy array, dict etc.) | ||
df_provided = args["data_frame"] is not None | ||
is_pd_like = False | ||
needs_interchanging = False | ||
if df_provided: | ||
|
||
if nw.dependencies.is_polars_dataframe( | ||
args["data_frame"] | ||
) or nw.dependencies.is_pyarrow_table(args["data_frame"]): | ||
args["data_frame"] = nw.from_native(args["data_frame"], eager_only=True) | ||
columns = args["data_frame"].columns | ||
|
||
elif nw.dependencies.is_polars_series( | ||
args["data_frame"] | ||
) or nw.dependencies.is_pyarrow_chunked_array(args["data_frame"]): | ||
args["data_frame"] = nw.from_native( | ||
args["data_frame"], | ||
series_only=True, | ||
).to_frame() | ||
columns = args["data_frame"].columns | ||
|
||
elif nw.dependencies.is_pandas_like_dataframe(args["data_frame"]): | ||
if nw.dependencies.is_pandas_like_dataframe(args["data_frame"]): | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @FBruzzesi This For example it was not obvious to me at first glance that There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. That's valid, thanks - I've heard from someone else too that There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I'd agree with that, I'm curious what's the use case for the pass-through functionality -- what can a program do with an object that might be a Narwhals dataframe, but might be some other kind of dataframe that Narwhals was not able to wrap? Is it primarily intended for checking whether Narwhals can handle a specific dataframe type? If so, couldn't you just catch the thrown error? (Not really relevant to this PR, I'm just curious.) There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
First example I can think of in scikit-lego add_lags, we allow for numpy arrays or dataframes, and use native methods via separate paths if the result from |
||
|
||
columns = args["data_frame"].columns # This can be multi index | ||
args["data_frame"] = nw.from_native(args["data_frame"]) | ||
args["data_frame"] = nw.from_native(args["data_frame"], eager_only=True) | ||
is_pd_like = True | ||
|
||
elif nw.dependencies.is_pandas_like_series(args["data_frame"]): | ||
|
||
args["data_frame"] = nw.from_native( | ||
args["data_frame"], | ||
series_only=True, | ||
args["data_frame"], series_only=True | ||
).to_frame() | ||
columns = args["data_frame"].columns | ||
is_pd_like = True | ||
|
||
elif hasattr(args["data_frame"], "__dataframe__"): | ||
# data_frame supports interchange protocol | ||
args["data_frame"] = nw.from_native( | ||
nw.from_native( | ||
args["data_frame"], eager_or_interchange_only=True | ||
).to_pandas(), # Converts to pandas | ||
eager_only=True, | ||
) | ||
elif isinstance( | ||
data_frame := nw.from_native( | ||
args["data_frame"], eager_or_interchange_only=True, strict=False | ||
), | ||
nw.DataFrame, | ||
): | ||
args["data_frame"] = data_frame | ||
needs_interchanging = nw.get_level(data_frame) == "interchange" | ||
columns = args["data_frame"].columns | ||
|
||
elif isinstance( | ||
series := nw.from_native( | ||
args["data_frame"], series_only=True, strict=False | ||
), | ||
nw.Series, | ||
): | ||
args["data_frame"] = series.to_frame() | ||
columns = args["data_frame"].columns | ||
is_pd_like = True | ||
|
||
elif hasattr(args["data_frame"], "toPandas"): | ||
# data_frame is PySpark: it does not support interchange and it is not | ||
|
@@ -1498,11 +1491,16 @@ def build_dataframe(args, constructor): | |
columns = None # no data_frame | ||
|
||
df_input: nw.DataFrame | None = args["data_frame"] | ||
index = nw.maybe_get_index(df_input) if df_provided else None | ||
|
||
# This is safe since at this point `_compliant_frame` is one of the "full" level | ||
# support dataframe(s) | ||
native_namespace = nw.get_native_namespace(df_input) if df_provided else None | ||
index = ( | ||
nw.maybe_get_index(df_input) | ||
if df_provided and not needs_interchanging | ||
else None | ||
) | ||
native_namespace = ( | ||
nw.get_native_namespace(df_input) | ||
if df_provided and not needs_interchanging | ||
else None | ||
) | ||
|
||
# now we handle special cases like wide-mode or x-xor-y specification | ||
# by rearranging args to tee things up for process_args_into_dataframe to work | ||
|
@@ -1575,6 +1573,32 @@ def build_dataframe(args, constructor): | |
value_name = _escape_col_name(columns, "value", []) | ||
var_name = _escape_col_name(columns, var_name, []) | ||
|
||
if isinstance(args["data_frame"], nw.DataFrame) and needs_interchanging: | ||
FBruzzesi marked this conversation as resolved.
Show resolved
Hide resolved
|
||
# Interchange to PyArrow | ||
FBruzzesi marked this conversation as resolved.
Show resolved
Hide resolved
|
||
if wide_mode: | ||
args["data_frame"] = nw.from_native( | ||
args["data_frame"].to_arrow(), eager_only=True | ||
) | ||
else: | ||
# Save precious resources by only interchanging columns that are | ||
# actually going to be plotted. This is tricky to do in the general case, | ||
# because Plotly allows calls like `px.line(df, x='x', y=['y1', df['y1']])`, | ||
# but interchange-only objects (e.g. DuckDB) don't typically have a concept | ||
# of self-standing Series. It's more important to perform project pushdown | ||
# here seeing as we're materialising to an (eager) PyArrow table. | ||
necessary_columns = { | ||
i for i in args.values() if isinstance(i, str) and i in columns | ||
} | ||
for field in args: | ||
if args[field] is not None and field in array_attrables: | ||
necessary_columns.update(i for i in args[field] if i in columns) | ||
columns = list(necessary_columns) | ||
args["data_frame"] = nw.from_native( | ||
args["data_frame"].select(columns).to_arrow(), eager_only=True | ||
) | ||
import pyarrow as pa | ||
|
||
native_namespace = pa | ||
missing_bar_dim = None | ||
if ( | ||
constructor in [go.Scatter, go.Bar, go.Funnel] + hist2d_types | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,3 +1,3 @@ | ||
requests==2.25.1 | ||
pytest==7.4.4 | ||
narwhals>=1.11.0 | ||
narwhals>=1.12.0 |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -21,4 +21,4 @@ kaleido | |
orjson==3.8.12 | ||
polars[timezone] | ||
pyarrow | ||
narwhals>=1.11.0 | ||
narwhals>=1.12.0 |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,3 +1,3 @@ | ||
requests==2.25.1 | ||
pytest==7.4.4 | ||
narwhals>=1.11.0 | ||
narwhals>=1.12.0 |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -21,4 +21,4 @@ kaleido | |
orjson==3.8.12 | ||
polars[timezone] | ||
pyarrow | ||
narwhals>=1.11.0 | ||
narwhals>=1.12.0 |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,3 +1,3 @@ | ||
requests==2.25.1 | ||
pytest==7.4.4 | ||
narwhals>=1.11.0 | ||
narwhals>=1.12.0 |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -20,4 +20,4 @@ kaleido | |
orjson==3.9.10 | ||
polars[timezone] | ||
pyarrow | ||
narwhals>=1.11.0 | ||
narwhals>=1.12.0 |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -21,4 +21,4 @@ kaleido | |
orjson==3.9.10 | ||
polars[timezone] | ||
pyarrow | ||
narwhals>=1.11.0 | ||
narwhals>=1.12.0 |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,3 +1,3 @@ | ||
requests==2.25.1 | ||
pytest==8.1.1 | ||
narwhals>=1.11.0 | ||
narwhals>=1.12.0 |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -21,4 +21,4 @@ psutil==5.7.0 | |
kaleido | ||
polars[timezone] | ||
pyarrow | ||
narwhals>=1.11.0 | ||
narwhals>=1.12.0 |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,3 +1,3 @@ | ||
requests==2.25.1 | ||
pytest==6.2.3 | ||
narwhals>=1.11.0 | ||
narwhals>=1.12.0 |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -22,4 +22,4 @@ kaleido | |
orjson==3.8.12 | ||
polars[timezone] | ||
pyarrow | ||
narwhals>=1.11.0 | ||
narwhals>=1.12.0 |
Uh oh!
There was an error while loading. Please reload this page.