Closed
Description
I was trying to load quantile regression from the latest sklearn version '0.24.1'. but for some reason it does not exist any longer.
so I tried to fetch the code from https://github.com/scikit-learn/scikit-learn/blob/df20e8156/sklearn/linear_model/_quantile.py
and make my own function
but does not get converged.
here is the code
import pandas as pd
import numpy as np
from numpy.linalg import pinv
from sklearn.datasets import make_regression
from sklearn.model_selection import train_test_split
X, y = make_regression(n_samples=10000,
n_features=1,
n_informative=1,
n_targets=1,
random_state=42)
Xs = pd.DataFrame(X, columns = ['distance'])
ys = pd.DataFrame(y, columns = ['time_to_buyer'])
Xs['distance'] = Xs['distance'].apply(lambda x: 10 + 2 * (x + np.random.normal(loc=1)) )
ys['time_to_buyer'] = ys['time_to_buyer'].apply(lambda x: 60 + 0.3* (x + np.random.normal(loc=1)) )
df = Xs.merge(ys, left_index=True, right_index=True)
train_df, test_df = train_test_split(df, test_size=0.10, shuffle=False)
X_train, y_train = train_df[['distance']], train_df[['time_to_buyer']]
X_test, y_test = test_df[['distance']], test_df[['time_to_buyer']]
X = X_train
y = y_train
n_features = X_train.shape[1]
n_samples = X_train.shape[0]
n_params = n_features
n_params += 1
q = 0.5
alpha = 1
sample_weight = np.ones(n_samples, dtype=np.float64)
alpha = np.sum(sample_weight) * alpha
mask = sample_weight != 0
n_mask = int(np.sum(mask)) # use n_mask instead of n_samples
c = np.concatenate(
[
np.full(2 * n_params, fill_value=alpha),
sample_weight[mask] * q,
sample_weight[mask] * (1 - q),
]
)
# do not penalize the intercept
c[0] = 0
c[n_params] = 0
A_eq = np.concatenate(
[
np.ones((n_mask, 1)),
X[mask],
-np.ones((n_mask, 1)),
-X[mask],
np.eye(n_mask),
-np.eye(n_mask),
],
axis=1,
)
b_eq = y[mask]
from scipy.optimize import linprog
result = linprog(
c=c,
A_eq=A_eq,
b_eq=b_eq,
method="interior-point",
options={"lstsq": True},
)
basically the convergance keeps going and I have to kill the instance.
However the same data using statsmodels
implementation works just fine
from statsmodels.regression.quantile_regression import QuantReg
mod = QuantReg(y, X)
res = mod.fit(q=.5)
print(res.summary())
Metadata
Metadata
Assignees
Labels
No labels