From e191a9c6ce4d2c15484e5d95300388fb24a298e9 Mon Sep 17 00:00:00 2001
From: Ashley Xu <ashleyxu@google.com>
Date: Tue, 2 Apr 2024 16:37:21 +0000
Subject: [PATCH] docs: address more comments from techncal writers to meet
 legal purposes

---
 bigframes/ml/base.py                           | 10 +++++-----
 bigframes/ml/ensemble.py                       | 18 +++++++++---------
 bigframes/ml/forecasting.py                    |  6 +++---
 bigframes/ml/metrics/_metrics.py               |  2 +-
 bigframes/ml/model_selection.py                |  4 ++--
 bigframes/ml/pipeline.py                       |  2 +-
 bigframes/ml/preprocessing.py                  |  2 +-
 .../ml_fundamentals_bq_dataframes.ipynb        |  2 +-
 .../regression/sklearn_linear_regression.ipynb |  2 +-
 .../pandas/core/config_init.py                 | 10 +++++-----
 .../bigframes_vendored/xgboost/sklearn.py      |  2 +-
 11 files changed, 30 insertions(+), 30 deletions(-)

diff --git a/bigframes/ml/base.py b/bigframes/ml/base.py
index 5e7aada8de..c57cb78791 100644
--- a/bigframes/ml/base.py
+++ b/bigframes/ml/base.py
@@ -77,7 +77,7 @@ def fit_transform(self, x_train: Union[DataFrame, Series], y_train: Union[DataFr
     """
 
     def __repr__(self):
-        """Print the estimator's constructor with all non-default parameter values"""
+        """Print the estimator's constructor with all non-default parameter values."""
 
         # Estimator pretty printer adapted from Sklearn's, which is in turn an adaption of
         # the inbuilt pretty-printer in CPython
@@ -106,13 +106,13 @@ def predict(self, X):
     def register(self: _T, vertex_ai_model_id: Optional[str] = None) -> _T:
         """Register the model to Vertex AI.
 
-        After register, go to Google Cloud Console (https://console.cloud.google.com/vertex-ai/models)
+        After register, go to the Google Cloud console (https://console.cloud.google.com/vertex-ai/models)
         to manage the model registries.
         Refer to https://cloud.google.com/vertex-ai/docs/model-registry/introduction for more options.
 
         Args:
             vertex_ai_model_id (Optional[str], default None):
-                optional string id as model id in Vertex. If not set, will by default to 'bigframes_{bq_model_id}'.
+                Optional string id as model id in Vertex. If not set, will default to 'bigframes_{bq_model_id}'.
                 Vertex Ai model id will be truncated to 63 characters due to its limitation.
 
         Returns:
@@ -191,9 +191,9 @@ def to_gbq(self: _T, model_name: str, replace: bool = False) -> _T:
 
         Args:
             model_name (str):
-                the name of the model.
+                The name of the model.
             replace (bool, default False):
-                whether to replace if the model already exists. Default to False.
+                Whether to replace if the model already exists. Default to False.
 
         Returns:
             Saved transformer."""
diff --git a/bigframes/ml/ensemble.py b/bigframes/ml/ensemble.py
index 72ea600c58..a8f0329145 100644
--- a/bigframes/ml/ensemble.py
+++ b/bigframes/ml/ensemble.py
@@ -12,7 +12,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-"""Ensemble models. This module is styled after Scikit-Learn's ensemble module:
+"""Ensemble models. This module is styled after scikit-learn's ensemble module:
 https://scikit-learn.org/stable/modules/ensemble.html"""
 
 from __future__ import annotations
@@ -190,9 +190,9 @@ def to_gbq(self, model_name: str, replace: bool = False) -> XGBRegressor:
 
         Args:
             model_name (str):
-                the name of the model.
+                The name of the model.
             replace (bool, default False):
-                whether to replace if the model already exists. Default to False.
+                Whether to replace if the model already exists. Default to False.
 
         Returns: saved model."""
         if not self._bqml_model:
@@ -343,9 +343,9 @@ def to_gbq(self, model_name: str, replace: bool = False) -> XGBClassifier:
 
         Args:
             model_name (str):
-                the name of the model.
+                The name of the model.
             replace (bool, default False):
-                whether to replace if the model already exists. Default to False.
+                Whether to replace if the model already exists. Default to False.
 
         Returns:
             XGBClassifier: saved model."""
@@ -506,9 +506,9 @@ def to_gbq(self, model_name: str, replace: bool = False) -> RandomForestRegresso
 
         Args:
             model_name (str):
-                the name of the model.
+                The name of the model.
             replace (bool, default False):
-                whether to replace if the model already exists. Default to False.
+                Whether to replace if the model already exists. Default to False.
 
         Returns:
             RandomForestRegressor: saved model."""
@@ -669,9 +669,9 @@ def to_gbq(self, model_name: str, replace: bool = False) -> RandomForestClassifi
 
         Args:
             model_name (str):
-                the name of the model.
+                The name of the model.
             replace (bool, default False):
-                whether to replace if the model already exists. Default to False.
+                Whether to replace if the model already exists. Default to False.
 
         Returns:
             RandomForestClassifier: saved model."""
diff --git a/bigframes/ml/forecasting.py b/bigframes/ml/forecasting.py
index 7993327200..e50a8ed35b 100644
--- a/bigframes/ml/forecasting.py
+++ b/bigframes/ml/forecasting.py
@@ -248,12 +248,12 @@ def predict(
                 an int value that specifies the number of time points to forecast.
                 The default value is 3, and the maximum value is 1000.
             confidence_level (float, default 0.95):
-                a float value that specifies percentage of the future values that fall in the prediction interval.
+                A float value that specifies percentage of the future values that fall in the prediction interval.
                 The valid input range is [0.0, 1.0).
 
         Returns:
             bigframes.dataframe.DataFrame: The predicted DataFrames. Which
-                contains 2 columns "forecast_timestamp" and "forecast_value".
+                contains 2 columns: "forecast_timestamp" and "forecast_value".
         """
         if horizon < 1 or horizon > 1000:
             raise ValueError(f"horizon must be [1, 1000], but is {horizon}.")
@@ -284,7 +284,7 @@ def detect_anomalies(
                 Identifies the custom threshold to use for anomaly detection. The value must be in the range [0, 1), with a default value of 0.95.
 
         Returns:
-            bigframes.dataframe.DataFrame: detected DataFrame."""
+            bigframes.dataframe.DataFrame: Detected DataFrame."""
         if anomaly_prob_threshold < 0.0 or anomaly_prob_threshold >= 1.0:
             raise ValueError(
                 f"anomaly_prob_threshold must be [0.0, 1.0), but is {anomaly_prob_threshold}."
diff --git a/bigframes/ml/metrics/_metrics.py b/bigframes/ml/metrics/_metrics.py
index ee86798b33..0687d177d2 100644
--- a/bigframes/ml/metrics/_metrics.py
+++ b/bigframes/ml/metrics/_metrics.py
@@ -13,7 +13,7 @@
 # limitations under the License.
 
 """Metrics functions for evaluating models. This module is styled after
-Scikit-Learn's metrics module: https://scikit-learn.org/stable/modules/metrics.html."""
+scikit-learn's metrics module: https://scikit-learn.org/stable/modules/metrics.html."""
 
 import inspect
 import typing
diff --git a/bigframes/ml/model_selection.py b/bigframes/ml/model_selection.py
index 443b9e7be6..42c13fdb40 100644
--- a/bigframes/ml/model_selection.py
+++ b/bigframes/ml/model_selection.py
@@ -13,7 +13,7 @@
 # limitations under the License.
 
 """Functions for test/train split and model tuning. This module is styled after
-Scikit-Learn's model_selection module:
+scikit-learn's model_selection module:
 https://scikit-learn.org/stable/modules/classes.html#module-sklearn.model_selection."""
 
 
@@ -51,7 +51,7 @@ def train_test_split(
         List[Union[bigframes.dataframe.DataFrame, bigframes.series.Series]]: A list of BigQuery DataFrames or Series.
     """
 
-    # TODO(garrettwu): Scikit-Learn throws an error when the dataframes don't have the same
+    # TODO(garrettwu): scikit-learn throws an error when the dataframes don't have the same
     # number of rows. We probably want to do something similar. Now the implementation is based
     # on index. We'll move to based on ordering first.
 
diff --git a/bigframes/ml/pipeline.py b/bigframes/ml/pipeline.py
index 92a3bae77d..5df2378575 100644
--- a/bigframes/ml/pipeline.py
+++ b/bigframes/ml/pipeline.py
@@ -12,7 +12,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-"""For composing estimators together. This module is styled after Scikit-Learn's
+"""For composing estimators together. This module is styled after scikit-learn's
 pipeline module: https://scikit-learn.org/stable/modules/pipeline.html."""
 
 
diff --git a/bigframes/ml/preprocessing.py b/bigframes/ml/preprocessing.py
index fd7d44f731..673ee27db0 100644
--- a/bigframes/ml/preprocessing.py
+++ b/bigframes/ml/preprocessing.py
@@ -13,7 +13,7 @@
 # limitations under the License.
 
 """Transformers that prepare data for other estimators. This module is styled after
-Scikit-Learn's preprocessing module: https://scikit-learn.org/stable/modules/preprocessing.html."""
+scikit-learn's preprocessing module: https://scikit-learn.org/stable/modules/preprocessing.html."""
 
 from __future__ import annotations
 
diff --git a/notebooks/getting_started/ml_fundamentals_bq_dataframes.ipynb b/notebooks/getting_started/ml_fundamentals_bq_dataframes.ipynb
index b3c965aded..e7b69f017b 100644
--- a/notebooks/getting_started/ml_fundamentals_bq_dataframes.ipynb
+++ b/notebooks/getting_started/ml_fundamentals_bq_dataframes.ipynb
@@ -1051,7 +1051,7 @@
       "source": [
         "## Estimators\n",
         "\n",
-        "Following Scikit-Learn, all learning components are \"estimators\"; objects that can learn from training data and then apply themselves to new data. Estimators share the following patterns:\n",
+        "Following scikit-learn, all learning components are \"estimators\"; objects that can learn from training data and then apply themselves to new data. Estimators share the following patterns:\n",
         "\n",
         "- a constructor that takes a list of parameters\n",
         "- a standard string representation that shows the class name and all non-default parameters, e.g. `LinearRegression(fit_intercept=False)`\n",
diff --git a/notebooks/regression/sklearn_linear_regression.ipynb b/notebooks/regression/sklearn_linear_regression.ipynb
index ec14d15cdf..2873527449 100644
--- a/notebooks/regression/sklearn_linear_regression.ipynb
+++ b/notebooks/regression/sklearn_linear_regression.ipynb
@@ -7,7 +7,7 @@
    "source": [
     "# Using ML - SKLearn linear regression\n",
     "\n",
-    "This demo shows how we can implement a linear regression in BigQuery DataFrames ML, with API that is exactly compatible with Scikit-Learn."
+    "This demo shows how we can implement a linear regression in BigQuery DataFrames ML, with API that is exactly compatible with scikit-learn."
    ]
   },
   {
diff --git a/third_party/bigframes_vendored/pandas/core/config_init.py b/third_party/bigframes_vendored/pandas/core/config_init.py
index ecc103d7c8..a3178e2761 100644
--- a/third_party/bigframes_vendored/pandas/core/config_init.py
+++ b/third_party/bigframes_vendored/pandas/core/config_init.py
@@ -59,18 +59,18 @@
     repr_mode (Literal[`head`, `deferred`]):
         `head`:
             Execute, download, and display results (limited to head) from
-            dataframe and series objects during repr.
+            Dataframe and Series objects during repr.
         `deferred`:
-            Prevent executions from repr statements in dataframe and series objects.
-            Instead estimated bytes processed will be shown. Dataframe and Series
+            Prevent executions from repr statements in DataFrame and Series objects.
+            Instead, estimated bytes processed will be shown. DataFrame and Series
             objects can still be computed with methods that explicitly execute and
             download results.
     max_info_columns (int):
         max_info_columns is used in DataFrame.info method to decide if
-        per column information will be printed.
+        information in each column will be printed.
     max_info_rows (int or None):
         df.info() will usually show null-counts for each column.
-        For large frames this can be quite slow. max_info_rows and max_info_cols
+        For large frames, this can be quite slow. max_info_rows and max_info_cols
         limit this null check only to frames with smaller dimensions than
         specified.
     memory_usage (bool):
diff --git a/third_party/bigframes_vendored/xgboost/sklearn.py b/third_party/bigframes_vendored/xgboost/sklearn.py
index 250e34dc2c..424b17a371 100644
--- a/third_party/bigframes_vendored/xgboost/sklearn.py
+++ b/third_party/bigframes_vendored/xgboost/sklearn.py
@@ -1,4 +1,4 @@
-"""Scikit-Learn Wrapper interface for XGBoost."""
+"""scikit-learn Wrapper interface for XGBoost."""
 
 from typing import Any