googleapis · gcf-merge-on-green · Feb 21, 2024 · Feb 14, 2024 · Feb 16, 2024 · Feb 21, 2024
@@ -127,6 +127,10 @@ def register(self: _T, vertex_ai_model_id: Optional[str] = None) -> _T:
        self._bqml_model.register(vertex_ai_model_id)
        return self

+    @abc.abstractmethod
+    def to_gbq(self, model_name, replace):
+        pass
+

 class TrainablePredictor(Predictor):
    """A BigQuery DataFrames ML Model base class that can be used to fit and predict outputs.
@@ -141,11 +145,6 @@ def _fit(self, X, y, transforms=None):
    def score(self, X, y):
        pass

-    # TODO(b/291812029): move to Predictor after implement in LLM and imported models
-    @abc.abstractmethod
-    def to_gbq(self, model_name, replace):
-        pass
-

 class SupervisedTrainablePredictor(TrainablePredictor):
    """A BigQuery DataFrames ML Supervised Model base class that can be used to fit and predict outputs.
@@ -165,7 +164,7 @@ def fit(
 class UnsupervisedTrainablePredictor(TrainablePredictor):
    """A BigQuery DataFrames ML Unsupervised Model base class that can be used to fit and predict outputs.

-    Only need to provide both X (y is optional and ignored) in unsupervised tasks."""
+    Only need to provide X (y is optional and ignored) in unsupervised tasks."""

    _T = TypeVar("_T", bound="UnsupervisedTrainablePredictor")


@@ -58,6 +58,7 @@ class XGBRegressor(
    def __init__(
        self,
        num_parallel_tree: int = 1,
+        *,
        booster: Literal["gbtree", "dart"] = "gbtree",
        dart_normalized_type: Literal["tree", "forest"] = "tree",
        tree_method: Literal["auto", "exact", "approx", "hist"] = "auto",
@@ -215,6 +216,7 @@ class XGBClassifier(
    def __init__(
        self,
        num_parallel_tree: int = 1,
+        *,
        booster: Literal["gbtree", "dart"] = "gbtree",
        dart_normalized_type: Literal["tree", "forest"] = "tree",
        tree_method: Literal["auto", "exact", "approx", "hist"] = "auto",
@@ -372,6 +374,7 @@ class RandomForestRegressor(
    def __init__(
        self,
        num_parallel_tree: int = 100,
+        *,
        tree_method: Literal["auto", "exact", "approx", "hist"] = "auto",
        min_tree_child_weight: int = 1,
        colsample_bytree=1.0,
@@ -538,6 +541,7 @@ class RandomForestClassifier(
    def __init__(
        self,
        num_parallel_tree: int = 100,
+        *,
        tree_method: Literal["auto", "exact", "approx", "hist"] = "auto",
        min_tree_child_weight: int = 1,
        colsample_bytree: float = 1.0,

@@ -87,7 +87,7 @@ def _fit(
        )

    def predict(
-        self, X=None, horizon: int = 3, confidence_level: float = 0.95
+        self, X=None, *, horizon: int = 3, confidence_level: float = 0.95
    ) -> bpd.DataFrame:
        """Predict the closest cluster for each sample in X.


@@ -32,15 +32,17 @@ class TensorFlowModel(base.Predictor):
    """Imported TensorFlow model.

    Args:
+        model_path (str):
+            GCS path that holds the model files.
        session (BigQuery Session):
            BQ session to create the model
-        model_path (str):
-            GCS path that holds the model files."""
+    """

    def __init__(
        self,
+        model_path: str,
+        *,
        session: Optional[bigframes.Session] = None,
-        model_path: Optional[str] = None,
    ):
        self.session = session or bpd.get_global_session()
        self.model_path = model_path
@@ -59,7 +61,7 @@ def _from_bq(
    ) -> TensorFlowModel:
        assert model.model_type == "TENSORFLOW"

-        tf_model = cls(session=session, model_path=None)
+        tf_model = cls(session=session, model_path="")
        tf_model._bqml_model = core.BqmlModel(session, model)
        return tf_model

@@ -109,15 +111,17 @@ class ONNXModel(base.Predictor):
    """Imported Open Neural Network Exchange (ONNX) model.

    Args:
+        model_path (str):
+            Cloud Storage path that holds the model files.
        session (BigQuery Session):
            BQ session to create the model
-        model_path (str):
-            Cloud Storage path that holds the model files."""
+    """

    def __init__(
        self,
+        model_path: str,
+        *,
        session: Optional[bigframes.Session] = None,
-        model_path: Optional[str] = None,
    ):
        self.session = session or bpd.get_global_session()
        self.model_path = model_path
@@ -134,7 +138,7 @@ def _create_bqml_model(self):
    def _from_bq(cls, session: bigframes.Session, model: bigquery.Model) -> ONNXModel:
        assert model.model_type == "ONNX"

-        onnx_model = cls(session=session, model_path=None)
+        onnx_model = cls(session=session, model_path="")
        onnx_model._bqml_model = core.BqmlModel(session, model)
        return onnx_model

@@ -189,8 +193,8 @@ class XGBoostModel(base.Predictor):
        https://cloud.google.com/bigquery/docs/reference/standard-sql/bigqueryml-syntax-create-xgboost#limitations

    Args:
-        session (BigQuery Session):
-            BQ session to create the model
+        model_path (str):
+            Cloud Storage path that holds the model files.
        input (Dict, default None):
            Specify the model input schema information when you
            create the XGBoost model. The input should be the format of
@@ -203,15 +207,17 @@ class XGBoostModel(base.Predictor):
            {field_name: field_type}. Output is optional only if feature_names
            and feature_types are both specified in the model file. Supported types
            are "bool", "string", "int64", "float64", "array<bool>", "array<string>", "array<int64>", "array<float64>".
-        model_path (str):
-            Cloud Storage path that holds the model files."""
+        session (BigQuery Session):
+            BQ session to create the model
+    """

    def __init__(
        self,
-        session: Optional[bigframes.Session] = None,
+        model_path: str,
+        *,
        input: Mapping[str, str] = {},
        output: Mapping[str, str] = {},
-        model_path: Optional[str] = None,
+        session: Optional[bigframes.Session] = None,
    ):
        self.session = session or bpd.get_global_session()
        self.model_path = model_path
@@ -248,7 +254,7 @@ def _from_bq(
    ) -> XGBoostModel:
        assert model.model_type == "XGBOOST"

-        xgboost_model = cls(session=session, model_path=None)
+        xgboost_model = cls(session=session, model_path="")
        xgboost_model._bqml_model = core.BqmlModel(session, model)
        return xgboost_model


@@ -58,6 +58,7 @@ class LinearRegression(

    def __init__(
        self,
+        *,
        optimize_strategy: Literal[
            "auto_strategy", "batch_gradient_descent", "normal_equation"
        ] = "normal_equation",
@@ -192,6 +193,7 @@ class LogisticRegression(
    # TODO(ashleyxu) support class_weights in the constructor.
    def __init__(
        self,
+        *,
        fit_intercept: bool = True,
        class_weights: Optional[Union[Literal["balanced"], Dict[str, float]]] = None,
    ):

@@ -66,6 +66,7 @@ class PaLM2TextGenerator(base.Predictor):

    def __init__(
        self,
+        *,
        model_name: Literal["text-bison", "text-bison-32k"] = "text-bison",
        session: Optional[bigframes.Session] = None,
        connection_name: Optional[str] = None,
@@ -140,6 +141,7 @@ def _from_bq(
    def predict(
        self,
        X: Union[bpd.DataFrame, bpd.Series],
+        *,
        temperature: float = 0.0,
        max_output_tokens: int = 128,
        top_k: int = 40,
@@ -273,6 +275,7 @@ class PaLM2TextEmbeddingGenerator(base.Predictor):

    def __init__(
        self,
+        *,
        model_name: Literal[
            "textembedding-gecko", "textembedding-gecko-multilingual"
        ] = "textembedding-gecko",
@@ -415,6 +418,7 @@ class GeminiTextGenerator(base.Predictor):

    def __init__(
        self,
+        *,
        session: Optional[bigframes.Session] = None,
        connection_name: Optional[str] = None,
    ):
@@ -475,6 +479,7 @@ def _from_bq(
    def predict(
        self,
        X: Union[bpd.DataFrame, bpd.Series],
+        *,
        temperature: float = 0.9,
        max_output_tokens: int = 8192,
        top_k: int = 40,

@@ -34,6 +34,7 @@
 def r2_score(
    y_true: Union[bpd.DataFrame, bpd.Series],
    y_pred: Union[bpd.DataFrame, bpd.Series],
+    *,
    force_finite=True,
 ) -> float:
    y_true_series, y_pred_series = utils.convert_to_series(y_true, y_pred)
@@ -61,6 +62,7 @@ def r2_score(
 def accuracy_score(
    y_true: Union[bpd.DataFrame, bpd.Series],
    y_pred: Union[bpd.DataFrame, bpd.Series],
+    *,
    normalize=True,
 ) -> float:
    # TODO(ashleyxu): support sample_weight as the parameter
@@ -83,6 +85,7 @@ def accuracy_score(
 def roc_curve(
    y_true: Union[bpd.DataFrame, bpd.Series],
    y_score: Union[bpd.DataFrame, bpd.Series],
+    *,
    drop_intermediate: bool = True,
 ) -> Tuple[bpd.Series, bpd.Series, bpd.Series]:
    # TODO(bmil): Add multi-class support
@@ -227,6 +230,7 @@ def confusion_matrix(
 def recall_score(
    y_true: Union[bpd.DataFrame, bpd.Series],
    y_pred: Union[bpd.DataFrame, bpd.Series],
+    *,
    average: str = "binary",
 ) -> pd.Series:
    # TODO(ashleyxu): support more average type, default to "binary"
@@ -263,6 +267,7 @@ def recall_score(
 def precision_score(
    y_true: Union[bpd.DataFrame, bpd.Series],
    y_pred: Union[bpd.DataFrame, bpd.Series],
+    *,
    average: str = "binary",
 ) -> pd.Series:
    # TODO(ashleyxu): support more average type, default to "binary"
@@ -301,6 +306,7 @@ def precision_score(
 def f1_score(
    y_true: Union[bpd.DataFrame, bpd.Series],
    y_pred: Union[bpd.DataFrame, bpd.Series],
+    *,
    average: str = "binary",
 ) -> pd.Series:
    # TODO(ashleyxu): support more average type, default to "binary"

@@ -54,6 +54,7 @@ def __init__(
        endpoint: str,
        input: Mapping[str, str],
        output: Mapping[str, str],
+        *,
        session: Optional[bigframes.Session] = None,
        connection_name: Optional[str] = None,
    ):