Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Appearance settings

refactor!: move model optional args to kwargs #381

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 3 commits into from
Feb 21, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 5 additions & 6 deletions 11 bigframes/ml/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -127,6 +127,10 @@ def register(self: _T, vertex_ai_model_id: Optional[str] = None) -> _T:
self._bqml_model.register(vertex_ai_model_id)
return self

@abc.abstractmethod
def to_gbq(self, model_name, replace):
pass


class TrainablePredictor(Predictor):
"""A BigQuery DataFrames ML Model base class that can be used to fit and predict outputs.
Expand All @@ -141,11 +145,6 @@ def _fit(self, X, y, transforms=None):
def score(self, X, y):
pass

# TODO(b/291812029): move to Predictor after implement in LLM and imported models
@abc.abstractmethod
def to_gbq(self, model_name, replace):
pass


class SupervisedTrainablePredictor(TrainablePredictor):
"""A BigQuery DataFrames ML Supervised Model base class that can be used to fit and predict outputs.
Expand All @@ -165,7 +164,7 @@ def fit(
class UnsupervisedTrainablePredictor(TrainablePredictor):
"""A BigQuery DataFrames ML Unsupervised Model base class that can be used to fit and predict outputs.

Only need to provide both X (y is optional and ignored) in unsupervised tasks."""
Only need to provide X (y is optional and ignored) in unsupervised tasks."""

_T = TypeVar("_T", bound="UnsupervisedTrainablePredictor")

Expand Down
4 changes: 4 additions & 0 deletions 4 bigframes/ml/ensemble.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,7 @@ class XGBRegressor(
def __init__(
self,
num_parallel_tree: int = 1,
*,
booster: Literal["gbtree", "dart"] = "gbtree",
dart_normalized_type: Literal["tree", "forest"] = "tree",
tree_method: Literal["auto", "exact", "approx", "hist"] = "auto",
Expand Down Expand Up @@ -215,6 +216,7 @@ class XGBClassifier(
def __init__(
self,
num_parallel_tree: int = 1,
*,
booster: Literal["gbtree", "dart"] = "gbtree",
dart_normalized_type: Literal["tree", "forest"] = "tree",
tree_method: Literal["auto", "exact", "approx", "hist"] = "auto",
Expand Down Expand Up @@ -372,6 +374,7 @@ class RandomForestRegressor(
def __init__(
self,
num_parallel_tree: int = 100,
*,
tree_method: Literal["auto", "exact", "approx", "hist"] = "auto",
min_tree_child_weight: int = 1,
colsample_bytree=1.0,
Expand Down Expand Up @@ -538,6 +541,7 @@ class RandomForestClassifier(
def __init__(
self,
num_parallel_tree: int = 100,
*,
tree_method: Literal["auto", "exact", "approx", "hist"] = "auto",
min_tree_child_weight: int = 1,
colsample_bytree: float = 1.0,
Expand Down
2 changes: 1 addition & 1 deletion 2 bigframes/ml/forecasting.py
Original file line number Diff line number Diff line change
Expand Up @@ -87,7 +87,7 @@ def _fit(
)

def predict(
self, X=None, horizon: int = 3, confidence_level: float = 0.95
self, X=None, *, horizon: int = 3, confidence_level: float = 0.95
) -> bpd.DataFrame:
"""Predict the closest cluster for each sample in X.

Expand Down
36 changes: 21 additions & 15 deletions 36 bigframes/ml/imported.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,15 +32,17 @@ class TensorFlowModel(base.Predictor):
"""Imported TensorFlow model.

Args:
model_path (str):
GCS path that holds the model files.
session (BigQuery Session):
BQ session to create the model
model_path (str):
GCS path that holds the model files."""
"""

def __init__(
shobsi marked this conversation as resolved.
Show resolved Hide resolved
self,
model_path: str,
*,
session: Optional[bigframes.Session] = None,
model_path: Optional[str] = None,
):
self.session = session or bpd.get_global_session()
self.model_path = model_path
Expand All @@ -59,7 +61,7 @@ def _from_bq(
) -> TensorFlowModel:
assert model.model_type == "TENSORFLOW"

tf_model = cls(session=session, model_path=None)
tf_model = cls(session=session, model_path="")
tf_model._bqml_model = core.BqmlModel(session, model)
return tf_model

Expand Down Expand Up @@ -109,15 +111,17 @@ class ONNXModel(base.Predictor):
"""Imported Open Neural Network Exchange (ONNX) model.

Args:
model_path (str):
Cloud Storage path that holds the model files.
session (BigQuery Session):
BQ session to create the model
model_path (str):
Cloud Storage path that holds the model files."""
"""

def __init__(
self,
model_path: str,
*,
session: Optional[bigframes.Session] = None,
model_path: Optional[str] = None,
):
self.session = session or bpd.get_global_session()
self.model_path = model_path
Expand All @@ -134,7 +138,7 @@ def _create_bqml_model(self):
def _from_bq(cls, session: bigframes.Session, model: bigquery.Model) -> ONNXModel:
assert model.model_type == "ONNX"

onnx_model = cls(session=session, model_path=None)
onnx_model = cls(session=session, model_path="")
shobsi marked this conversation as resolved.
Show resolved Hide resolved
onnx_model._bqml_model = core.BqmlModel(session, model)
return onnx_model

Expand Down Expand Up @@ -189,8 +193,8 @@ class XGBoostModel(base.Predictor):
https://cloud.google.com/bigquery/docs/reference/standard-sql/bigqueryml-syntax-create-xgboost#limitations

Args:
session (BigQuery Session):
BQ session to create the model
model_path (str):
Cloud Storage path that holds the model files.
input (Dict, default None):
Specify the model input schema information when you
create the XGBoost model. The input should be the format of
Expand All @@ -203,15 +207,17 @@ class XGBoostModel(base.Predictor):
{field_name: field_type}. Output is optional only if feature_names
and feature_types are both specified in the model file. Supported types
are "bool", "string", "int64", "float64", "array<bool>", "array<string>", "array<int64>", "array<float64>".
model_path (str):
Cloud Storage path that holds the model files."""
session (BigQuery Session):
BQ session to create the model
"""

def __init__(
self,
session: Optional[bigframes.Session] = None,
model_path: str,
*,
input: Mapping[str, str] = {},
output: Mapping[str, str] = {},
model_path: Optional[str] = None,
session: Optional[bigframes.Session] = None,
):
self.session = session or bpd.get_global_session()
self.model_path = model_path
Expand Down Expand Up @@ -248,7 +254,7 @@ def _from_bq(
) -> XGBoostModel:
assert model.model_type == "XGBOOST"

xgboost_model = cls(session=session, model_path=None)
xgboost_model = cls(session=session, model_path="")
xgboost_model._bqml_model = core.BqmlModel(session, model)
return xgboost_model

Expand Down
2 changes: 2 additions & 0 deletions 2 bigframes/ml/linear_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,7 @@ class LinearRegression(

def __init__(
self,
*,
optimize_strategy: Literal[
"auto_strategy", "batch_gradient_descent", "normal_equation"
] = "normal_equation",
Expand Down Expand Up @@ -192,6 +193,7 @@ class LogisticRegression(
# TODO(ashleyxu) support class_weights in the constructor.
def __init__(
self,
*,
fit_intercept: bool = True,
class_weights: Optional[Union[Literal["balanced"], Dict[str, float]]] = None,
):
Expand Down
5 changes: 5 additions & 0 deletions 5 bigframes/ml/llm.py
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,7 @@ class PaLM2TextGenerator(base.Predictor):

def __init__(
self,
*,
model_name: Literal["text-bison", "text-bison-32k"] = "text-bison",
session: Optional[bigframes.Session] = None,
connection_name: Optional[str] = None,
Expand Down Expand Up @@ -140,6 +141,7 @@ def _from_bq(
def predict(
self,
X: Union[bpd.DataFrame, bpd.Series],
*,
temperature: float = 0.0,
max_output_tokens: int = 128,
top_k: int = 40,
Expand Down Expand Up @@ -273,6 +275,7 @@ class PaLM2TextEmbeddingGenerator(base.Predictor):

def __init__(
self,
*,
model_name: Literal[
"textembedding-gecko", "textembedding-gecko-multilingual"
] = "textembedding-gecko",
Expand Down Expand Up @@ -415,6 +418,7 @@ class GeminiTextGenerator(base.Predictor):

def __init__(
self,
*,
session: Optional[bigframes.Session] = None,
connection_name: Optional[str] = None,
):
Expand Down Expand Up @@ -475,6 +479,7 @@ def _from_bq(
def predict(
self,
X: Union[bpd.DataFrame, bpd.Series],
*,
temperature: float = 0.9,
max_output_tokens: int = 8192,
top_k: int = 40,
Expand Down
6 changes: 6 additions & 0 deletions 6 bigframes/ml/metrics/_metrics.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@
def r2_score(
y_true: Union[bpd.DataFrame, bpd.Series],
y_pred: Union[bpd.DataFrame, bpd.Series],
*,
force_finite=True,
) -> float:
y_true_series, y_pred_series = utils.convert_to_series(y_true, y_pred)
Expand Down Expand Up @@ -61,6 +62,7 @@ def r2_score(
def accuracy_score(
y_true: Union[bpd.DataFrame, bpd.Series],
y_pred: Union[bpd.DataFrame, bpd.Series],
*,
normalize=True,
) -> float:
# TODO(ashleyxu): support sample_weight as the parameter
Expand All @@ -83,6 +85,7 @@ def accuracy_score(
def roc_curve(
y_true: Union[bpd.DataFrame, bpd.Series],
y_score: Union[bpd.DataFrame, bpd.Series],
*,
drop_intermediate: bool = True,
) -> Tuple[bpd.Series, bpd.Series, bpd.Series]:
# TODO(bmil): Add multi-class support
Expand Down Expand Up @@ -227,6 +230,7 @@ def confusion_matrix(
def recall_score(
y_true: Union[bpd.DataFrame, bpd.Series],
y_pred: Union[bpd.DataFrame, bpd.Series],
*,
average: str = "binary",
) -> pd.Series:
# TODO(ashleyxu): support more average type, default to "binary"
Expand Down Expand Up @@ -263,6 +267,7 @@ def recall_score(
def precision_score(
y_true: Union[bpd.DataFrame, bpd.Series],
y_pred: Union[bpd.DataFrame, bpd.Series],
*,
average: str = "binary",
) -> pd.Series:
# TODO(ashleyxu): support more average type, default to "binary"
Expand Down Expand Up @@ -301,6 +306,7 @@ def precision_score(
def f1_score(
y_true: Union[bpd.DataFrame, bpd.Series],
y_pred: Union[bpd.DataFrame, bpd.Series],
*,
average: str = "binary",
) -> pd.Series:
# TODO(ashleyxu): support more average type, default to "binary"
Expand Down
1 change: 1 addition & 0 deletions 1 bigframes/ml/remote.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,7 @@ def __init__(
endpoint: str,
input: Mapping[str, str],
output: Mapping[str, str],
*,
session: Optional[bigframes.Session] = None,
connection_name: Optional[str] = None,
):
Expand Down
Morty Proxy This is a proxified and sanitized view of the page, visit original site.