Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Appearance settings

Latest commit

 

History

History
History
43 lines (37 loc) · 1.43 KB

File metadata and controls

43 lines (37 loc) · 1.43 KB
Copy raw file
Download raw file
Open symbols panel
Edit and raw actions
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
"""
Example: Polars -> hyrax -> Polars round-trip via Arrow C Data Interface,
plus a sklearn-style DecisionTreeClassifier on titanic.
Run from repo root:
python3 -m venv .venv
source ./.venv/bin/activate
pip install polars pyarrow
cabal build dataframe-arrow
python3 python/example.py
"""
import polars as pl
import hyrax as hx
raw = pl.read_csv("data/titanic.csv").drop_nulls(subset=["Age"])
print("Polars input shape:", raw.shape)
df = hx.from_arrow(raw.to_arrow())
agg = df.groupBy(["Sex"]).aggregate({
"n": hx.count(hx.col("Sex")),
"median_age": hx.median(hx.col("Age")),
"mean_fare": hx.mean(hx.col("Fare")),
})
print("\nGroupBy result:")
print(pl.from_arrow(agg))
adults = (df
.filter(hx.col("Age") >= 18.0)
.derive("price_per_class", hx.col("Fare") / hx.col("Pclass"))
.select(["Sex", "Pclass", "Age", "Fare", "price_per_class"]))
print("\nAdults (head 3):")
print(pl.from_arrow(adults.limit(3).collect()))
print(pl.from_arrow(hx.read_csv("data/titanic.csv").collect())) # schema round-trip test
features = df.select(["Survived", "Pclass", "Sex", "Age", "Fare"])
clf = hx.DecisionTreeClassifier(max_depth=3, min_samples_split=20).fit(
features, target="Survived", target_type="int"
)
preds = clf.predict_array(features).to_pylist()
truth = raw["Survived"].to_list()
acc = sum(1 for t, p in zip(truth, preds) if t == p) / len(truth)
print(f"\nDecision tree accuracy on train set: {acc:.3f}")
Morty Proxy This is a proxified and sanitized view of the page, visit original site.