From 78fa7597d295a7a497aff3ed7d33ca83c0a3dc04 Mon Sep 17 00:00:00 2001 From: Ashley Xu Date: Mon, 9 Oct 2023 19:03:03 +0000 Subject: [PATCH 1/2] docs: add more preprocessing models into the docs menu. --- docs/templates/toc.yml | 12 ++++++-- .../sklearn/preprocessing/_data.py | 2 +- .../sklearn/preprocessing/_encoder.py | 30 +++++++++---------- 3 files changed, 26 insertions(+), 18 deletions(-) diff --git a/docs/templates/toc.yml b/docs/templates/toc.yml index 891f15a51b..0758bb41d8 100644 --- a/docs/templates/toc.yml +++ b/docs/templates/toc.yml @@ -121,10 +121,18 @@ - items: - name: Overview uid: bigframes.ml.preprocessing - - name: OneHotEncoder - uid: bigframes.ml.preprocessing.OneHotEncoder + - name: KBinsDiscretizer + uid: bigframes.ml.preprocessing.KBinsDiscretizer + - name: LabelEncoder + uid: bigframes.ml.preprocessing.LabelEncoder + - name: MaxAbsScaler + uid: bigframes.ml.preprocessing.MaxAbsScaler + - name: MinMaxScaler + uid: bigframes.ml.preprocessing.MinMaxScaler - name: StandardScaler uid: bigframes.ml.preprocessing.StandardScaler + - name: OneHotEncoder + uid: bigframes.ml.preprocessing.OneHotEncoder name: preprocessing name: bigframes.ml name: BigQuery DataFrames diff --git a/third_party/bigframes_vendored/sklearn/preprocessing/_data.py b/third_party/bigframes_vendored/sklearn/preprocessing/_data.py index 58e16e135b..5ce102d573 100644 --- a/third_party/bigframes_vendored/sklearn/preprocessing/_data.py +++ b/third_party/bigframes_vendored/sklearn/preprocessing/_data.py @@ -29,7 +29,7 @@ class StandardScaler(BaseEstimator, TransformerMixin): individual features do not more or less look like standard normally distributed data (e.g. Gaussian with 0 mean and unit variance). - Examples: + **Examples:** .. code-block:: diff --git a/third_party/bigframes_vendored/sklearn/preprocessing/_encoder.py b/third_party/bigframes_vendored/sklearn/preprocessing/_encoder.py index cf660ece5d..8da9a98c53 100644 --- a/third_party/bigframes_vendored/sklearn/preprocessing/_encoder.py +++ b/third_party/bigframes_vendored/sklearn/preprocessing/_encoder.py @@ -17,6 +17,21 @@ class OneHotEncoder(BaseEstimator): Note that this method deviates from Scikit-Learn; instead of producing sparse binary columns, the encoding is a single column of `STRUCT`. + **Examples:** + + Given a dataset with two features, we let the encoder find the unique + values per feature and transform the data to a binary one-hot encoding. + + .. code-block:: + + from bigframes.ml.preprocessing import OneHotEncoder + import bigframes.pandas as bpd + + enc = OneHotEncoder() + X = bpd.DataFrame({"a": ["Male", "Female", "Female"], "b": ["1", "3", "2"]}) + enc.fit(X) + print(enc.transform(bpd.DataFrame({"a": ["Female", "Male"], "b": ["1", "4"]}))) + Args: drop (Optional[Literal["most_frequent"]], default None): Specifies a methodology to use to drop one of the categories per feature. @@ -37,21 +52,6 @@ class OneHotEncoder(BaseEstimator): when considering infrequent categories. If there are infrequent categories, max_categories includes the category representing the infrequent categories along with the frequent categories. Default None, set limit to 1,000,000. - - Examples: - - Given a dataset with two features, we let the encoder find the unique - values per feature and transform the data to a binary one-hot encoding. - - .. code-block:: - - from bigframes.ml.preprocessing import OneHotEncoder - import bigframes.pandas as bpd - - enc = OneHotEncoder() - X = bpd.DataFrame({"a": ["Male", "Female", "Female"], "b": ["1", "3", "2"]}) - enc.fit(X) - print(enc.transform(bpd.DataFrame({"a": ["Female", "Male"], "b": ["1", "4"]}))) """ def fit(self, X, y=None): From b0207902d58de16665a4aa16cc81a841b45793c9 Mon Sep 17 00:00:00 2001 From: Ashley Xu Date: Mon, 9 Oct 2023 22:38:04 +0000 Subject: [PATCH 2/2] Trigger kokoro rebuild