From e191a9c6ce4d2c15484e5d95300388fb24a298e9 Mon Sep 17 00:00:00 2001 From: Ashley Xu Date: Tue, 2 Apr 2024 16:37:21 +0000 Subject: [PATCH] docs: address more comments from techncal writers to meet legal purposes --- bigframes/ml/base.py | 10 +++++----- bigframes/ml/ensemble.py | 18 +++++++++--------- bigframes/ml/forecasting.py | 6 +++--- bigframes/ml/metrics/_metrics.py | 2 +- bigframes/ml/model_selection.py | 4 ++-- bigframes/ml/pipeline.py | 2 +- bigframes/ml/preprocessing.py | 2 +- .../ml_fundamentals_bq_dataframes.ipynb | 2 +- .../regression/sklearn_linear_regression.ipynb | 2 +- .../pandas/core/config_init.py | 10 +++++----- .../bigframes_vendored/xgboost/sklearn.py | 2 +- 11 files changed, 30 insertions(+), 30 deletions(-) diff --git a/bigframes/ml/base.py b/bigframes/ml/base.py index 5e7aada8de..c57cb78791 100644 --- a/bigframes/ml/base.py +++ b/bigframes/ml/base.py @@ -77,7 +77,7 @@ def fit_transform(self, x_train: Union[DataFrame, Series], y_train: Union[DataFr """ def __repr__(self): - """Print the estimator's constructor with all non-default parameter values""" + """Print the estimator's constructor with all non-default parameter values.""" # Estimator pretty printer adapted from Sklearn's, which is in turn an adaption of # the inbuilt pretty-printer in CPython @@ -106,13 +106,13 @@ def predict(self, X): def register(self: _T, vertex_ai_model_id: Optional[str] = None) -> _T: """Register the model to Vertex AI. - After register, go to Google Cloud Console (https://console.cloud.google.com/vertex-ai/models) + After register, go to the Google Cloud console (https://console.cloud.google.com/vertex-ai/models) to manage the model registries. Refer to https://cloud.google.com/vertex-ai/docs/model-registry/introduction for more options. Args: vertex_ai_model_id (Optional[str], default None): - optional string id as model id in Vertex. If not set, will by default to 'bigframes_{bq_model_id}'. + Optional string id as model id in Vertex. If not set, will default to 'bigframes_{bq_model_id}'. Vertex Ai model id will be truncated to 63 characters due to its limitation. Returns: @@ -191,9 +191,9 @@ def to_gbq(self: _T, model_name: str, replace: bool = False) -> _T: Args: model_name (str): - the name of the model. + The name of the model. replace (bool, default False): - whether to replace if the model already exists. Default to False. + Whether to replace if the model already exists. Default to False. Returns: Saved transformer.""" diff --git a/bigframes/ml/ensemble.py b/bigframes/ml/ensemble.py index 72ea600c58..a8f0329145 100644 --- a/bigframes/ml/ensemble.py +++ b/bigframes/ml/ensemble.py @@ -12,7 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. -"""Ensemble models. This module is styled after Scikit-Learn's ensemble module: +"""Ensemble models. This module is styled after scikit-learn's ensemble module: https://scikit-learn.org/stable/modules/ensemble.html""" from __future__ import annotations @@ -190,9 +190,9 @@ def to_gbq(self, model_name: str, replace: bool = False) -> XGBRegressor: Args: model_name (str): - the name of the model. + The name of the model. replace (bool, default False): - whether to replace if the model already exists. Default to False. + Whether to replace if the model already exists. Default to False. Returns: saved model.""" if not self._bqml_model: @@ -343,9 +343,9 @@ def to_gbq(self, model_name: str, replace: bool = False) -> XGBClassifier: Args: model_name (str): - the name of the model. + The name of the model. replace (bool, default False): - whether to replace if the model already exists. Default to False. + Whether to replace if the model already exists. Default to False. Returns: XGBClassifier: saved model.""" @@ -506,9 +506,9 @@ def to_gbq(self, model_name: str, replace: bool = False) -> RandomForestRegresso Args: model_name (str): - the name of the model. + The name of the model. replace (bool, default False): - whether to replace if the model already exists. Default to False. + Whether to replace if the model already exists. Default to False. Returns: RandomForestRegressor: saved model.""" @@ -669,9 +669,9 @@ def to_gbq(self, model_name: str, replace: bool = False) -> RandomForestClassifi Args: model_name (str): - the name of the model. + The name of the model. replace (bool, default False): - whether to replace if the model already exists. Default to False. + Whether to replace if the model already exists. Default to False. Returns: RandomForestClassifier: saved model.""" diff --git a/bigframes/ml/forecasting.py b/bigframes/ml/forecasting.py index 7993327200..e50a8ed35b 100644 --- a/bigframes/ml/forecasting.py +++ b/bigframes/ml/forecasting.py @@ -248,12 +248,12 @@ def predict( an int value that specifies the number of time points to forecast. The default value is 3, and the maximum value is 1000. confidence_level (float, default 0.95): - a float value that specifies percentage of the future values that fall in the prediction interval. + A float value that specifies percentage of the future values that fall in the prediction interval. The valid input range is [0.0, 1.0). Returns: bigframes.dataframe.DataFrame: The predicted DataFrames. Which - contains 2 columns "forecast_timestamp" and "forecast_value". + contains 2 columns: "forecast_timestamp" and "forecast_value". """ if horizon < 1 or horizon > 1000: raise ValueError(f"horizon must be [1, 1000], but is {horizon}.") @@ -284,7 +284,7 @@ def detect_anomalies( Identifies the custom threshold to use for anomaly detection. The value must be in the range [0, 1), with a default value of 0.95. Returns: - bigframes.dataframe.DataFrame: detected DataFrame.""" + bigframes.dataframe.DataFrame: Detected DataFrame.""" if anomaly_prob_threshold < 0.0 or anomaly_prob_threshold >= 1.0: raise ValueError( f"anomaly_prob_threshold must be [0.0, 1.0), but is {anomaly_prob_threshold}." diff --git a/bigframes/ml/metrics/_metrics.py b/bigframes/ml/metrics/_metrics.py index ee86798b33..0687d177d2 100644 --- a/bigframes/ml/metrics/_metrics.py +++ b/bigframes/ml/metrics/_metrics.py @@ -13,7 +13,7 @@ # limitations under the License. """Metrics functions for evaluating models. This module is styled after -Scikit-Learn's metrics module: https://scikit-learn.org/stable/modules/metrics.html.""" +scikit-learn's metrics module: https://scikit-learn.org/stable/modules/metrics.html.""" import inspect import typing diff --git a/bigframes/ml/model_selection.py b/bigframes/ml/model_selection.py index 443b9e7be6..42c13fdb40 100644 --- a/bigframes/ml/model_selection.py +++ b/bigframes/ml/model_selection.py @@ -13,7 +13,7 @@ # limitations under the License. """Functions for test/train split and model tuning. This module is styled after -Scikit-Learn's model_selection module: +scikit-learn's model_selection module: https://scikit-learn.org/stable/modules/classes.html#module-sklearn.model_selection.""" @@ -51,7 +51,7 @@ def train_test_split( List[Union[bigframes.dataframe.DataFrame, bigframes.series.Series]]: A list of BigQuery DataFrames or Series. """ - # TODO(garrettwu): Scikit-Learn throws an error when the dataframes don't have the same + # TODO(garrettwu): scikit-learn throws an error when the dataframes don't have the same # number of rows. We probably want to do something similar. Now the implementation is based # on index. We'll move to based on ordering first. diff --git a/bigframes/ml/pipeline.py b/bigframes/ml/pipeline.py index 92a3bae77d..5df2378575 100644 --- a/bigframes/ml/pipeline.py +++ b/bigframes/ml/pipeline.py @@ -12,7 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. -"""For composing estimators together. This module is styled after Scikit-Learn's +"""For composing estimators together. This module is styled after scikit-learn's pipeline module: https://scikit-learn.org/stable/modules/pipeline.html.""" diff --git a/bigframes/ml/preprocessing.py b/bigframes/ml/preprocessing.py index fd7d44f731..673ee27db0 100644 --- a/bigframes/ml/preprocessing.py +++ b/bigframes/ml/preprocessing.py @@ -13,7 +13,7 @@ # limitations under the License. """Transformers that prepare data for other estimators. This module is styled after -Scikit-Learn's preprocessing module: https://scikit-learn.org/stable/modules/preprocessing.html.""" +scikit-learn's preprocessing module: https://scikit-learn.org/stable/modules/preprocessing.html.""" from __future__ import annotations diff --git a/notebooks/getting_started/ml_fundamentals_bq_dataframes.ipynb b/notebooks/getting_started/ml_fundamentals_bq_dataframes.ipynb index b3c965aded..e7b69f017b 100644 --- a/notebooks/getting_started/ml_fundamentals_bq_dataframes.ipynb +++ b/notebooks/getting_started/ml_fundamentals_bq_dataframes.ipynb @@ -1051,7 +1051,7 @@ "source": [ "## Estimators\n", "\n", - "Following Scikit-Learn, all learning components are \"estimators\"; objects that can learn from training data and then apply themselves to new data. Estimators share the following patterns:\n", + "Following scikit-learn, all learning components are \"estimators\"; objects that can learn from training data and then apply themselves to new data. Estimators share the following patterns:\n", "\n", "- a constructor that takes a list of parameters\n", "- a standard string representation that shows the class name and all non-default parameters, e.g. `LinearRegression(fit_intercept=False)`\n", diff --git a/notebooks/regression/sklearn_linear_regression.ipynb b/notebooks/regression/sklearn_linear_regression.ipynb index ec14d15cdf..2873527449 100644 --- a/notebooks/regression/sklearn_linear_regression.ipynb +++ b/notebooks/regression/sklearn_linear_regression.ipynb @@ -7,7 +7,7 @@ "source": [ "# Using ML - SKLearn linear regression\n", "\n", - "This demo shows how we can implement a linear regression in BigQuery DataFrames ML, with API that is exactly compatible with Scikit-Learn." + "This demo shows how we can implement a linear regression in BigQuery DataFrames ML, with API that is exactly compatible with scikit-learn." ] }, { diff --git a/third_party/bigframes_vendored/pandas/core/config_init.py b/third_party/bigframes_vendored/pandas/core/config_init.py index ecc103d7c8..a3178e2761 100644 --- a/third_party/bigframes_vendored/pandas/core/config_init.py +++ b/third_party/bigframes_vendored/pandas/core/config_init.py @@ -59,18 +59,18 @@ repr_mode (Literal[`head`, `deferred`]): `head`: Execute, download, and display results (limited to head) from - dataframe and series objects during repr. + Dataframe and Series objects during repr. `deferred`: - Prevent executions from repr statements in dataframe and series objects. - Instead estimated bytes processed will be shown. Dataframe and Series + Prevent executions from repr statements in DataFrame and Series objects. + Instead, estimated bytes processed will be shown. DataFrame and Series objects can still be computed with methods that explicitly execute and download results. max_info_columns (int): max_info_columns is used in DataFrame.info method to decide if - per column information will be printed. + information in each column will be printed. max_info_rows (int or None): df.info() will usually show null-counts for each column. - For large frames this can be quite slow. max_info_rows and max_info_cols + For large frames, this can be quite slow. max_info_rows and max_info_cols limit this null check only to frames with smaller dimensions than specified. memory_usage (bool): diff --git a/third_party/bigframes_vendored/xgboost/sklearn.py b/third_party/bigframes_vendored/xgboost/sklearn.py index 250e34dc2c..424b17a371 100644 --- a/third_party/bigframes_vendored/xgboost/sklearn.py +++ b/third_party/bigframes_vendored/xgboost/sklearn.py @@ -1,4 +1,4 @@ -"""Scikit-Learn Wrapper interface for XGBoost.""" +"""scikit-learn Wrapper interface for XGBoost.""" from typing import Any