From df8feed5a35095875e08e53948bc7de9a1d4d05a Mon Sep 17 00:00:00 2001 From: Garrett Wu Date: Tue, 14 Nov 2023 20:51:50 +0000 Subject: [PATCH 1/5] feat!: model.predict returns all the columns --- bigframes/ml/cluster.py | 4 +- bigframes/ml/decomposition.py | 9 +--- bigframes/ml/ensemble.py | 49 ++----------------- bigframes/ml/forecasting.py | 9 +--- bigframes/ml/imported.py | 22 +-------- bigframes/ml/linear_model.py | 24 ++------- bigframes/ml/llm.py | 12 +---- tests/system/large/ml/test_cluster.py | 4 +- tests/system/large/ml/test_ensemble.py | 2 +- tests/system/small/ml/test_cluster.py | 4 +- tests/system/small/ml/test_ensemble.py | 20 ++++++-- tests/system/small/ml/test_forecasting.py | 4 +- tests/system/small/ml/test_imported.py | 8 ++- tests/system/small/ml/test_linear_model.py | 8 ++- tests/system/small/ml/test_llm.py | 22 ++++----- .../sklearn/cluster/_kmeans.py | 16 +----- .../sklearn/linear_model/_base.py | 3 +- 17 files changed, 69 insertions(+), 151 deletions(-) diff --git a/bigframes/ml/cluster.py b/bigframes/ml/cluster.py index 772b90f666..c9f52ba0b6 100644 --- a/bigframes/ml/cluster.py +++ b/bigframes/ml/cluster.py @@ -17,7 +17,7 @@ from __future__ import annotations -from typing import cast, Dict, List, Optional, Union +from typing import Dict, List, Optional, Union from google.cloud import bigquery @@ -92,7 +92,7 @@ def predict( (X,) = utils.convert_to_dataframe(X) - return cast(bpd.DataFrame, self._bqml_model.predict(X)[["CENTROID_ID"]]) + return self._bqml_model.predict(X) def to_gbq(self, model_name: str, replace: bool = False) -> KMeans: """Save the model to BigQuery. diff --git a/bigframes/ml/decomposition.py b/bigframes/ml/decomposition.py index 8e6be6d28c..7cda7a6993 100644 --- a/bigframes/ml/decomposition.py +++ b/bigframes/ml/decomposition.py @@ -17,7 +17,7 @@ from __future__ import annotations -from typing import cast, List, Optional, Union +from typing import List, Optional, Union from google.cloud import bigquery @@ -106,12 +106,7 @@ def predict(self, X: Union[bpd.DataFrame, bpd.Series]) -> bpd.DataFrame: (X,) = utils.convert_to_dataframe(X) - return cast( - bpd.DataFrame, - self._bqml_model.predict(X)[ - ["principal_component_" + str(i + 1) for i in range(self.n_components)] - ], - ) + return self._bqml_model.predict(X) def to_gbq(self, model_name: str, replace: bool = False) -> PCA: """Save the model to BigQuery. diff --git a/bigframes/ml/ensemble.py b/bigframes/ml/ensemble.py index 19ca8608ff..fcb3fe5343 100644 --- a/bigframes/ml/ensemble.py +++ b/bigframes/ml/ensemble.py @@ -17,7 +17,7 @@ from __future__ import annotations -from typing import cast, Dict, List, Literal, Optional, Union +from typing import Dict, List, Literal, Optional, Union from google.cloud import bigquery @@ -168,16 +168,7 @@ def predict( raise RuntimeError("A model must be fitted before predict") (X,) = utils.convert_to_dataframe(X) - df = self._bqml_model.predict(X) - return cast( - bpd.DataFrame, - df[ - [ - cast(str, field.name) - for field in self._bqml_model.model.label_columns - ] - ], - ) + return self._bqml_model.predict(X) def score( self, @@ -328,19 +319,9 @@ def _fit( def predict(self, X: Union[bpd.DataFrame, bpd.Series]) -> bpd.DataFrame: if not self._bqml_model: raise RuntimeError("A model must be fitted before predict") - (X,) = utils.convert_to_dataframe(X) - df = self._bqml_model.predict(X) - return cast( - bpd.DataFrame, - df[ - [ - cast(str, field.name) - for field in self._bqml_model.model.label_columns - ] - ], - ) + return self._bqml_model.predict(X) def score( self, @@ -486,19 +467,9 @@ def predict( ) -> bpd.DataFrame: if not self._bqml_model: raise RuntimeError("A model must be fitted before predict") - (X,) = utils.convert_to_dataframe(X) - df = self._bqml_model.predict(X) - return cast( - bpd.DataFrame, - df[ - [ - cast(str, field.name) - for field in self._bqml_model.model.label_columns - ] - ], - ) + return self._bqml_model.predict(X) def score( self, @@ -661,19 +632,9 @@ def predict( ) -> bpd.DataFrame: if not self._bqml_model: raise RuntimeError("A model must be fitted before predict") - (X,) = utils.convert_to_dataframe(X) - df = self._bqml_model.predict(X) - return cast( - bpd.DataFrame, - df[ - [ - cast(str, field.name) - for field in self._bqml_model.model.label_columns - ] - ], - ) + return self._bqml_model.predict(X) def score( self, diff --git a/bigframes/ml/forecasting.py b/bigframes/ml/forecasting.py index 8e309d5e73..cf23854fa0 100644 --- a/bigframes/ml/forecasting.py +++ b/bigframes/ml/forecasting.py @@ -16,7 +16,7 @@ from __future__ import annotations -from typing import cast, Dict, List, Optional, Union +from typing import Dict, List, Optional, Union from google.cloud import bigquery @@ -24,8 +24,6 @@ from bigframes.ml import base, core, globals, utils import bigframes.pandas as bpd -_PREDICT_OUTPUT_COLUMNS = ["forecast_timestamp", "forecast_value"] - class ARIMAPlus(base.SupervisedTrainablePredictor): """Time Series ARIMA Plus model.""" @@ -100,10 +98,7 @@ def predict(self, X=None) -> bpd.DataFrame: if not self._bqml_model: raise RuntimeError("A model must be fitted before predict") - return cast( - bpd.DataFrame, - self._bqml_model.forecast()[_PREDICT_OUTPUT_COLUMNS], - ) + return self._bqml_model.forecast() def score( self, diff --git a/bigframes/ml/imported.py b/bigframes/ml/imported.py index fb8aa98bef..f6afc9aa38 100644 --- a/bigframes/ml/imported.py +++ b/bigframes/ml/imported.py @@ -78,16 +78,7 @@ def predict(self, X: Union[bpd.DataFrame, bpd.Series]) -> bpd.DataFrame: (X,) = utils.convert_to_dataframe(X) - df = self._bqml_model.predict(X) - return cast( - bpd.DataFrame, - df[ - [ - cast(str, field.name) - for field in self._bqml_model.model.label_columns - ] - ], - ) + return self._bqml_model.predict(X) def to_gbq(self, model_name: str, replace: bool = False) -> TensorFlowModel: """Save the model to BigQuery. @@ -161,16 +152,7 @@ def predict(self, X: Union[bpd.DataFrame, bpd.Series]) -> bpd.DataFrame: (X,) = utils.convert_to_dataframe(X) - df = self._bqml_model.predict(X) - return cast( - bpd.DataFrame, - df[ - [ - cast(str, field.name) - for field in self._bqml_model.model.label_columns - ] - ], - ) + return self._bqml_model.predict(X) def to_gbq(self, model_name: str, replace: bool = False) -> ONNXModel: """Save the model to BigQuery. diff --git a/bigframes/ml/linear_model.py b/bigframes/ml/linear_model.py index f11879500b..433d9fbc38 100644 --- a/bigframes/ml/linear_model.py +++ b/bigframes/ml/linear_model.py @@ -17,7 +17,7 @@ from __future__ import annotations -from typing import cast, Dict, List, Literal, Optional, Union +from typing import Dict, List, Literal, Optional, Union from google.cloud import bigquery @@ -145,16 +145,7 @@ def predict(self, X: Union[bpd.DataFrame, bpd.Series]) -> bpd.DataFrame: (X,) = utils.convert_to_dataframe(X) - df = self._bqml_model.predict(X) - return cast( - bpd.DataFrame, - df[ - [ - cast(str, field.name) - for field in self._bqml_model.model.label_columns - ] - ], - ) + return self._bqml_model.predict(X) def score( self, @@ -267,16 +258,7 @@ def predict( (X,) = utils.convert_to_dataframe(X) - df = self._bqml_model.predict(X) - return cast( - bpd.DataFrame, - df[ - [ - cast(str, field.name) - for field in self._bqml_model.model.label_columns - ] - ], - ) + return self._bqml_model.predict(X) def score( self, diff --git a/bigframes/ml/llm.py b/bigframes/ml/llm.py index 3cfc28e61f..fde8ed0ee0 100644 --- a/bigframes/ml/llm.py +++ b/bigframes/ml/llm.py @@ -181,11 +181,7 @@ def predict( "top_p": top_p, "flatten_json_output": True, } - df = self._bqml_model.generate_text(X, options) - return cast( - bpd.DataFrame, - df[[_TEXT_GENERATE_RESULT_COLUMN]], - ) + return self._bqml_model.generate_text(X, options) class PaLM2TextEmbeddingGenerator(base.Predictor): @@ -287,8 +283,4 @@ def predict(self, X: Union[bpd.DataFrame, bpd.Series]) -> bpd.DataFrame: options = { "flatten_json_output": True, } - df = self._bqml_model.generate_text_embedding(X, options) - return cast( - bpd.DataFrame, - df[[_EMBED_TEXT_RESULT_COLUMN]], - ) + return self._bqml_model.generate_text_embedding(X, options) diff --git a/tests/system/large/ml/test_cluster.py b/tests/system/large/ml/test_cluster.py index f01116665f..9244c4b9f1 100644 --- a/tests/system/large/ml/test_cluster.py +++ b/tests/system/large/ml/test_cluster.py @@ -98,7 +98,9 @@ def test_cluster_configure_fit_score_predict( score_result, score_expected, check_exact=False, rtol=0.1 ) - result = model.predict(new_penguins).to_pandas() + predictions = model.predict(new_penguins).to_pandas() + assert predictions.shape == (4, 9) + result = predictions[["CENTROID_ID"]] expected = pd.DataFrame( {"CENTROID_ID": [2, 3, 1, 2]}, dtype="Int64", diff --git a/tests/system/large/ml/test_ensemble.py b/tests/system/large/ml/test_ensemble.py index a8613dfeb9..b98d7a757c 100644 --- a/tests/system/large/ml/test_ensemble.py +++ b/tests/system/large/ml/test_ensemble.py @@ -179,7 +179,7 @@ def test_xgbclassifier_default_params(penguins_df_default_index, dataset_id): ) -@pytest.mark.flaky(retries=2, delay=120) +# @pytest.mark.flaky(retries=2, delay=120) def test_xgbclassifier_dart_booster_multiple_params( penguins_df_default_index, dataset_id ): diff --git a/tests/system/small/ml/test_cluster.py b/tests/system/small/ml/test_cluster.py index 266a38e3ee..a9fec0bbce 100644 --- a/tests/system/small/ml/test_cluster.py +++ b/tests/system/small/ml/test_cluster.py @@ -62,7 +62,9 @@ def test_kmeans_predict(session, penguins_kmeans_model: cluster.KMeans): new_penguins = session.read_pandas(_PD_NEW_PENGUINS) - result = penguins_kmeans_model.predict(new_penguins).to_pandas() + predictions = penguins_kmeans_model.predict(new_penguins).to_pandas() + assert predictions.shape == (4, 9) + result = predictions[["CENTROID_ID"]] expected = pd.DataFrame( {"CENTROID_ID": [2, 3, 1, 2]}, dtype="Int64", diff --git a/tests/system/small/ml/test_ensemble.py b/tests/system/small/ml/test_ensemble.py index bba083d98d..55d9fef661 100644 --- a/tests/system/small/ml/test_ensemble.py +++ b/tests/system/small/ml/test_ensemble.py @@ -98,7 +98,9 @@ def test_xgbregressor_model_score_series( def test_xgbregressor_model_predict( penguins_xgbregressor_model: bigframes.ml.ensemble.XGBRegressor, new_penguins_df ): - result = penguins_xgbregressor_model.predict(new_penguins_df).to_pandas() + predictions = penguins_xgbregressor_model.predict(new_penguins_df).to_pandas() + assert predictions.shape == (3, 8) + result = predictions[["predicted_body_mass_g"]] expected = pandas.DataFrame( {"predicted_body_mass_g": ["4293.1538089", "3410.0271", "3357.944"]}, dtype="Float64", @@ -220,7 +222,9 @@ def test_xgbclassifier_model_score_series( def test_xgbclassifier_model_predict( penguins_xgbclassifier_model: bigframes.ml.ensemble.XGBClassifier, new_penguins_df ): - result = penguins_xgbclassifier_model.predict(new_penguins_df).to_pandas() + predictions = penguins_xgbclassifier_model.predict(new_penguins_df).to_pandas() + assert predictions.shape == (3, 9) + result = predictions[["predicted_sex"]] expected = pandas.DataFrame( {"predicted_sex": ["MALE", "MALE", "FEMALE"]}, dtype="string[pyarrow]", @@ -363,7 +367,11 @@ def test_randomforestregressor_model_predict( penguins_randomforest_regressor_model: bigframes.ml.ensemble.RandomForestRegressor, new_penguins_df, ): - result = penguins_randomforest_regressor_model.predict(new_penguins_df).to_pandas() + predictions = penguins_randomforest_regressor_model.predict( + new_penguins_df + ).to_pandas() + assert predictions.shape == (3, 8) + result = predictions[["predicted_body_mass_g"]] expected = pandas.DataFrame( {"predicted_body_mass_g": ["3897.341797", "3458.385742", "3458.385742"]}, dtype="Float64", @@ -490,7 +498,11 @@ def test_randomforestclassifier_model_predict( penguins_randomforest_classifier_model: bigframes.ml.ensemble.RandomForestClassifier, new_penguins_df, ): - result = penguins_randomforest_classifier_model.predict(new_penguins_df).to_pandas() + predictions = penguins_randomforest_classifier_model.predict( + new_penguins_df + ).to_pandas() + assert predictions.shape == (3, 9) + result = predictions[["predicted_sex"]] expected = pandas.DataFrame( {"predicted_sex": ["MALE", "MALE", "FEMALE"]}, dtype="string[pyarrow]", diff --git a/tests/system/small/ml/test_forecasting.py b/tests/system/small/ml/test_forecasting.py index 55079c94cf..948db59650 100644 --- a/tests/system/small/ml/test_forecasting.py +++ b/tests/system/small/ml/test_forecasting.py @@ -22,6 +22,8 @@ def test_model_predict(time_series_arima_plus_model): utc = pytz.utc predictions = time_series_arima_plus_model.predict().to_pandas() + assert predictions.shape == (3, 8) + result = predictions[["forecast_timestamp", "forecast_value"]] expected = pd.DataFrame( { "forecast_timestamp": [ @@ -38,7 +40,7 @@ def test_model_predict(time_series_arima_plus_model): ) pd.testing.assert_frame_equal( - predictions, + result, expected, rtol=0.1, check_index_type=False, diff --git a/tests/system/small/ml/test_imported.py b/tests/system/small/ml/test_imported.py index d305567066..9008e85a0b 100644 --- a/tests/system/small/ml/test_imported.py +++ b/tests/system/small/ml/test_imported.py @@ -32,7 +32,9 @@ def test_tensorflow_create_model_default_session(imported_tensorflow_model_path) def test_tensorflow_model_predict(imported_tensorflow_model, llm_text_df): df = llm_text_df.rename(columns={"prompt": "input"}) - result = imported_tensorflow_model.predict(df).to_pandas() + predictions = imported_tensorflow_model.predict(df).to_pandas() + assert predictions.shape == (3, 2) + result = predictions[["dense_1"]] # The values are non-human-readable. As they are a dense layer of Neural Network. # And since it is pretrained and imported, the model is a opaque-box. # We may want to switch to better test model and cases. @@ -72,7 +74,9 @@ def test_onnx_create_model_default_session(imported_onnx_model_path): def test_onnx_model_predict(imported_onnx_model, onnx_iris_df): - result = imported_onnx_model.predict(onnx_iris_df).to_pandas() + predictions = imported_onnx_model.predict(onnx_iris_df).to_pandas() + assert predictions.shape == (3, 7) + result = predictions[["label", "probabilities"]] value1 = np.array([0.9999993443489075, 0.0, 0.0]) value2 = np.array([0.0, 0.0, 0.9999993443489075]) expected = pd.DataFrame( diff --git a/tests/system/small/ml/test_linear_model.py b/tests/system/small/ml/test_linear_model.py index 3a8232ed9e..218c1074ab 100644 --- a/tests/system/small/ml/test_linear_model.py +++ b/tests/system/small/ml/test_linear_model.py @@ -91,13 +91,15 @@ def test_linear_reg_model_score_series( def test_linear_reg_model_predict(penguins_linear_model, new_penguins_df): predictions = penguins_linear_model.predict(new_penguins_df).to_pandas() + assert predictions.shape == (3, 8) + result = predictions[["predicted_body_mass_g"]] expected = pandas.DataFrame( {"predicted_body_mass_g": [4030.1, 3280.8, 3177.9]}, dtype="Float64", index=pandas.Index([1633, 1672, 1690], name="tag_number", dtype="Int64"), ) pandas.testing.assert_frame_equal( - predictions.sort_index(), + result.sort_index(), expected, check_exact=False, rtol=0.1, @@ -224,13 +226,15 @@ def test_logistic_model_score_series( def test_logsitic_model_predict(penguins_logistic_model, new_penguins_df): predictions = penguins_logistic_model.predict(new_penguins_df).to_pandas() + assert predictions.shape == (3, 9) + result = predictions[["predicted_sex"]] expected = pandas.DataFrame( {"predicted_sex": ["MALE", "MALE", "FEMALE"]}, dtype="string[pyarrow]", index=pandas.Index([1633, 1672, 1690], name="tag_number", dtype="Int64"), ) pandas.testing.assert_frame_equal( - predictions.sort_index(), + result.sort_index(), expected, check_exact=False, rtol=0.1, diff --git a/tests/system/small/ml/test_llm.py b/tests/system/small/ml/test_llm.py index 79d3c40317..306098548e 100644 --- a/tests/system/small/ml/test_llm.py +++ b/tests/system/small/ml/test_llm.py @@ -12,8 +12,6 @@ # See the License for the specific language governing permissions and # limitations under the License. -from unittest import TestCase - import numpy as np import pytest @@ -48,7 +46,7 @@ def test_create_text_generator_model_default_session(bq_connection, llm_text_pan llm_text_df = bpd.read_pandas(llm_text_pandas_df) df = model.predict(llm_text_df).to_pandas() - TestCase().assertSequenceEqual(df.shape, (3, 1)) + assert df.shape == (3, 4) assert "ml_generate_text_llm_result" in df.columns series = df["ml_generate_text_llm_result"] assert all(series.str.len() > 20) @@ -72,7 +70,7 @@ def test_create_text_generator_32k_model_default_session( llm_text_df = bpd.read_pandas(llm_text_pandas_df) df = model.predict(llm_text_df).to_pandas() - TestCase().assertSequenceEqual(df.shape, (3, 1)) + assert df.shape == (3, 4) assert "ml_generate_text_llm_result" in df.columns series = df["ml_generate_text_llm_result"] assert all(series.str.len() > 20) @@ -97,7 +95,7 @@ def test_create_text_generator_model_default_connection(llm_text_pandas_df): ) df = model.predict(llm_text_df).to_pandas() - TestCase().assertSequenceEqual(df.shape, (3, 1)) + assert df.shape == (3, 4) assert "ml_generate_text_llm_result" in df.columns series = df["ml_generate_text_llm_result"] assert all(series.str.len() > 20) @@ -109,7 +107,7 @@ def test_text_generator_predict_default_params_success( palm2_text_generator_model, llm_text_df ): df = palm2_text_generator_model.predict(llm_text_df).to_pandas() - TestCase().assertSequenceEqual(df.shape, (3, 1)) + assert df.shape == (3, 4) assert "ml_generate_text_llm_result" in df.columns series = df["ml_generate_text_llm_result"] assert all(series.str.len() > 20) @@ -120,7 +118,7 @@ def test_text_generator_predict_series_default_params_success( palm2_text_generator_model, llm_text_df ): df = palm2_text_generator_model.predict(llm_text_df["prompt"]).to_pandas() - TestCase().assertSequenceEqual(df.shape, (3, 1)) + assert df.shape == (3, 4) assert "ml_generate_text_llm_result" in df.columns series = df["ml_generate_text_llm_result"] assert all(series.str.len() > 20) @@ -132,7 +130,7 @@ def test_text_generator_predict_arbitrary_col_label_success( ): llm_text_df = llm_text_df.rename(columns={"prompt": "arbitrary"}) df = palm2_text_generator_model.predict(llm_text_df).to_pandas() - TestCase().assertSequenceEqual(df.shape, (3, 1)) + assert df.shape == (3, 4) assert "ml_generate_text_llm_result" in df.columns series = df["ml_generate_text_llm_result"] assert all(series.str.len() > 20) @@ -145,7 +143,7 @@ def test_text_generator_predict_with_params_success( df = palm2_text_generator_model.predict( llm_text_df, temperature=0.5, max_output_tokens=100, top_k=20, top_p=0.5 ).to_pandas() - TestCase().assertSequenceEqual(df.shape, (3, 1)) + assert df.shape == (3, 4) assert "ml_generate_text_llm_result" in df.columns series = df["ml_generate_text_llm_result"] assert all(series.str.len() > 20) @@ -196,7 +194,7 @@ def test_embedding_generator_predict_success( palm2_embedding_generator_model, llm_text_df ): df = palm2_embedding_generator_model.predict(llm_text_df).to_pandas() - TestCase().assertSequenceEqual(df.shape, (3, 1)) + assert df.shape == (3, 4) assert "text_embedding" in df.columns series = df["text_embedding"] value = series[0] @@ -209,7 +207,7 @@ def test_embedding_generator_multilingual_predict_success( palm2_embedding_generator_multilingual_model, llm_text_df ): df = palm2_embedding_generator_multilingual_model.predict(llm_text_df).to_pandas() - TestCase().assertSequenceEqual(df.shape, (3, 1)) + assert df.shape == (3, 4) assert "text_embedding" in df.columns series = df["text_embedding"] value = series[0] @@ -222,7 +220,7 @@ def test_embedding_generator_predict_series_success( palm2_embedding_generator_model, llm_text_df ): df = palm2_embedding_generator_model.predict(llm_text_df["prompt"]).to_pandas() - TestCase().assertSequenceEqual(df.shape, (3, 1)) + assert df.shape == (3, 4) assert "text_embedding" in df.columns series = df["text_embedding"] value = series[0] diff --git a/third_party/bigframes_vendored/sklearn/cluster/_kmeans.py b/third_party/bigframes_vendored/sklearn/cluster/_kmeans.py index 5369d3662d..be6c5e7c52 100644 --- a/third_party/bigframes_vendored/sklearn/cluster/_kmeans.py +++ b/third_party/bigframes_vendored/sklearn/cluster/_kmeans.py @@ -20,19 +20,7 @@ class _BaseKMeans(BaseEstimator, ABC): """Base class for KMeans and MiniBatchKMeans""" - def predict(self, X): - """Predict the closest cluster each sample in X belongs to. - - Args: - X (bigframes.dataframe.DataFrame or bigframes.series.Series): - Series or DataFrame of shape (n_samples, n_features). The data matrix for - which we want to get the predictions. - - Returns: - bigframes.dataframe.DataFrame: DataFrame of shape (n_samples,), containing the - class labels for each sample. - """ - raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE) + pass class KMeans(_BaseKMeans): @@ -73,7 +61,7 @@ def predict( DataFrame of shape (n_samples, n_features). New data to predict. Returns: - bigframes.dataframe.DataFrame: DataFrame of the cluster each sample belongs to. + bigframes.dataframe.DataFrame: DataFrame of shape (n_samples, n_input_columns + n_prediction_columns). Returns predicted labels. """ raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE) diff --git a/third_party/bigframes_vendored/sklearn/linear_model/_base.py b/third_party/bigframes_vendored/sklearn/linear_model/_base.py index 8dc3b6280a..8113298877 100644 --- a/third_party/bigframes_vendored/sklearn/linear_model/_base.py +++ b/third_party/bigframes_vendored/sklearn/linear_model/_base.py @@ -16,7 +16,6 @@ # Original location: https://github.com/scikit-learn/scikit-learn/blob/main/sklearn/linear_model/_base.py from abc import ABCMeta -from typing import List, Optional from bigframes import constants from third_party.bigframes_vendored.sklearn.base import ( @@ -35,7 +34,7 @@ def predict(self, X): Series or DataFrame of shape (n_samples, n_features). Samples. Returns: - bigframes.dataframe.DataFrame: DataFrame of shape (n_samples,). Returns predicted values. + bigframes.dataframe.DataFrame: DataFrame of shape (n_samples, n_input_columns + n_prediction_columns). Returns predicted values. """ raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE) From d798c68d78c3e21955d8379c2284e67c826e7fee Mon Sep 17 00:00:00 2001 From: Garrett Wu Date: Wed, 15 Nov 2023 19:15:01 +0000 Subject: [PATCH 2/5] fix tests --- tests/system/large/ml/test_pipeline.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/tests/system/large/ml/test_pipeline.py b/tests/system/large/ml/test_pipeline.py index 3e56954058..2929baf3f7 100644 --- a/tests/system/large/ml/test_pipeline.py +++ b/tests/system/large/ml/test_pipeline.py @@ -545,7 +545,9 @@ def test_pipeline_standard_scaler_kmeans_fit_score_predict( score_result, score_expected, check_exact=False, rtol=0.1 ) - result = pl.predict(new_penguins).to_pandas().sort_index() + predictions = pl.predict(new_penguins).to_pandas().sort_index() + assert predictions.shape == (6, 9) + result = predictions[["CENTROID_ID"]] expected = pd.DataFrame( {"CENTROID_ID": [1, 2, 1, 2, 1, 2]}, dtype="Int64", From bd13023e6ece512ab22c7877c57d9885301c0d3a Mon Sep 17 00:00:00 2001 From: Garrett Wu Date: Wed, 15 Nov 2023 20:10:35 +0000 Subject: [PATCH 3/5] fix notebook --- .../sklearn_linear_regression.ipynb | 1192 +++++++++-------- 1 file changed, 617 insertions(+), 575 deletions(-) diff --git a/notebooks/regression/sklearn_linear_regression.ipynb b/notebooks/regression/sklearn_linear_regression.ipynb index beb77ef092..ec14d15cdf 100644 --- a/notebooks/regression/sklearn_linear_regression.ipynb +++ b/notebooks/regression/sklearn_linear_regression.ipynb @@ -20,46 +20,16 @@ }, { "cell_type": "code", - "execution_count": 12, + "execution_count": 1, "metadata": {}, "outputs": [ { "data": { - "application/vnd.jupyter.widget-view+json": { - "model_id": "62b29650872b4c438d0eefb825fcae32", - "version_major": 2, - "version_minor": 0 - }, - "text/plain": [ - "HTML(value='Query job d47c23df-1830-4451-9016-7747c1420abd is RUNNING. " ] }, "metadata": {}, @@ -67,13 +37,11 @@ }, { "data": { - "application/vnd.jupyter.widget-view+json": { - "model_id": "0f25faa156584cc59dda9b0e60f72534", - "version_major": 2, - "version_minor": 0 - }, + "text/html": [ + "Query job 12e0f983-695e-4903-8ff1-2f353d7e8cba is DONE. 28.9 kB processed. Open Job" + ], "text/plain": [ - "HTML(value='Query job d8eed0ca-7ce9-4ed8-a592-e16af9f9db8d is DONE. 0 Bytes processed. " ] }, "metadata": {}, @@ -113,250 +81,250 @@ " \n", " 0\n", " Adelie Penguin (Pygoscelis adeliae)\n", - " Dream\n", - " 36.6\n", - " 18.4\n", - " 184.0\n", - " 3475.0\n", - " FEMALE\n", + " Biscoe\n", + " 40.1\n", + " 18.9\n", + " 188.0\n", + " 4300.0\n", + " MALE\n", " \n", " \n", " 1\n", " Adelie Penguin (Pygoscelis adeliae)\n", - " Dream\n", - " 39.8\n", - " 19.1\n", - " 184.0\n", - " 4650.0\n", + " Torgersen\n", + " 39.1\n", + " 18.7\n", + " 181.0\n", + " 3750.0\n", " MALE\n", " \n", " \n", " 2\n", - " Adelie Penguin (Pygoscelis adeliae)\n", - " Dream\n", - " 40.9\n", - " 18.9\n", - " 184.0\n", - " 3900.0\n", - " MALE\n", + " Gentoo penguin (Pygoscelis papua)\n", + " Biscoe\n", + " 47.4\n", + " 14.6\n", + " 212.0\n", + " 4725.0\n", + " FEMALE\n", " \n", " \n", " 3\n", " Chinstrap penguin (Pygoscelis antarctica)\n", " Dream\n", - " 46.5\n", - " 17.9\n", - " 192.0\n", - " 3500.0\n", + " 42.5\n", + " 16.7\n", + " 187.0\n", + " 3350.0\n", " FEMALE\n", " \n", " \n", " 4\n", " Adelie Penguin (Pygoscelis adeliae)\n", - " Dream\n", - " 37.3\n", - " 16.8\n", - " 192.0\n", - " 3000.0\n", - " FEMALE\n", + " Biscoe\n", + " 43.2\n", + " 19.0\n", + " 197.0\n", + " 4775.0\n", + " MALE\n", " \n", " \n", " 5\n", - " Adelie Penguin (Pygoscelis adeliae)\n", - " Dream\n", - " 43.2\n", - " 18.5\n", - " 192.0\n", - " 4100.0\n", + " Gentoo penguin (Pygoscelis papua)\n", + " Biscoe\n", + " 46.7\n", + " 15.3\n", + " 219.0\n", + " 5200.0\n", " MALE\n", " \n", " \n", " 6\n", - " Chinstrap penguin (Pygoscelis antarctica)\n", - " Dream\n", - " 46.9\n", - " 16.6\n", - " 192.0\n", - " 2700.0\n", - " FEMALE\n", + " Adelie Penguin (Pygoscelis adeliae)\n", + " Biscoe\n", + " 41.3\n", + " 21.1\n", + " 195.0\n", + " 4400.0\n", + " MALE\n", " \n", " \n", " 7\n", - " Chinstrap penguin (Pygoscelis antarctica)\n", - " Dream\n", - " 50.5\n", - " 18.4\n", - " 200.0\n", - " 3400.0\n", + " Gentoo penguin (Pygoscelis papua)\n", + " Biscoe\n", + " 45.2\n", + " 13.8\n", + " 215.0\n", + " 4750.0\n", " FEMALE\n", " \n", " \n", " 8\n", - " Chinstrap penguin (Pygoscelis antarctica)\n", - " Dream\n", - " 49.5\n", - " 19.0\n", - " 200.0\n", - " 3800.0\n", - " MALE\n", + " Gentoo penguin (Pygoscelis papua)\n", + " Biscoe\n", + " 46.5\n", + " 13.5\n", + " 210.0\n", + " 4550.0\n", + " FEMALE\n", " \n", " \n", " 9\n", - " Adelie Penguin (Pygoscelis adeliae)\n", - " Dream\n", - " 40.2\n", - " 20.1\n", - " 200.0\n", - " 3975.0\n", - " MALE\n", + " Gentoo penguin (Pygoscelis papua)\n", + " Biscoe\n", + " 50.5\n", + " 15.2\n", + " 216.0\n", + " 5000.0\n", + " FEMALE\n", " \n", " \n", " 10\n", - " Adelie Penguin (Pygoscelis adeliae)\n", - " Dream\n", - " 40.8\n", - " 18.9\n", - " 208.0\n", - " 4300.0\n", + " Gentoo penguin (Pygoscelis papua)\n", + " Biscoe\n", + " 48.2\n", + " 15.6\n", + " 221.0\n", + " 5100.0\n", " MALE\n", " \n", " \n", " 11\n", " Adelie Penguin (Pygoscelis adeliae)\n", " Dream\n", - " 39.0\n", - " 18.7\n", - " 185.0\n", - " 3650.0\n", - " MALE\n", + " 38.1\n", + " 18.6\n", + " 190.0\n", + " 3700.0\n", + " FEMALE\n", " \n", " \n", " 12\n", - " Adelie Penguin (Pygoscelis adeliae)\n", - " Dream\n", - " 37.0\n", - " 16.9\n", - " 185.0\n", - " 3000.0\n", - " FEMALE\n", + " Gentoo penguin (Pygoscelis papua)\n", + " Biscoe\n", + " 50.7\n", + " 15.0\n", + " 223.0\n", + " 5550.0\n", + " MALE\n", " \n", " \n", " 13\n", - " Chinstrap penguin (Pygoscelis antarctica)\n", - " Dream\n", - " 47.0\n", - " 17.3\n", - " 185.0\n", - " 3700.0\n", - " FEMALE\n", + " Adelie Penguin (Pygoscelis adeliae)\n", + " Biscoe\n", + " 37.8\n", + " 20.0\n", + " 190.0\n", + " 4250.0\n", + " MALE\n", " \n", " \n", " 14\n", " Adelie Penguin (Pygoscelis adeliae)\n", - " Dream\n", - " 34.0\n", - " 17.1\n", - " 185.0\n", - " 3400.0\n", + " Biscoe\n", + " 35.0\n", + " 17.9\n", + " 190.0\n", + " 3450.0\n", " FEMALE\n", " \n", " \n", " 15\n", - " Adelie Penguin (Pygoscelis adeliae)\n", - " Dream\n", - " 37.0\n", - " 16.5\n", - " 185.0\n", - " 3400.0\n", - " FEMALE\n", + " Gentoo penguin (Pygoscelis papua)\n", + " Biscoe\n", + " 48.7\n", + " 15.7\n", + " 208.0\n", + " 5350.0\n", + " MALE\n", " \n", " \n", " 16\n", - " Chinstrap penguin (Pygoscelis antarctica)\n", - " Dream\n", - " 45.7\n", - " 17.3\n", - " 193.0\n", - " 3600.0\n", - " FEMALE\n", + " Adelie Penguin (Pygoscelis adeliae)\n", + " Torgersen\n", + " 34.6\n", + " 21.1\n", + " 198.0\n", + " 4400.0\n", + " MALE\n", " \n", " \n", " 17\n", - " Chinstrap penguin (Pygoscelis antarctica)\n", - " Dream\n", - " 50.6\n", - " 19.4\n", - " 193.0\n", - " 3800.0\n", + " Gentoo penguin (Pygoscelis papua)\n", + " Biscoe\n", + " 46.8\n", + " 15.4\n", + " 215.0\n", + " 5150.0\n", " MALE\n", " \n", " \n", " 18\n", - " Adelie Penguin (Pygoscelis adeliae)\n", + " Chinstrap penguin (Pygoscelis antarctica)\n", " Dream\n", - " 39.7\n", - " 17.9\n", - " 193.0\n", - " 4250.0\n", + " 50.3\n", + " 20.0\n", + " 197.0\n", + " 3300.0\n", " MALE\n", " \n", " \n", " 19\n", " Adelie Penguin (Pygoscelis adeliae)\n", " Dream\n", - " 37.8\n", + " 37.2\n", " 18.1\n", - " 193.0\n", - " 3750.0\n", + " 178.0\n", + " 3900.0\n", " MALE\n", " \n", " \n", " 20\n", " Chinstrap penguin (Pygoscelis antarctica)\n", " Dream\n", - " 46.6\n", - " 17.8\n", - " 193.0\n", - " 3800.0\n", - " FEMALE\n", + " 51.0\n", + " 18.8\n", + " 203.0\n", + " 4100.0\n", + " MALE\n", " \n", " \n", " 21\n", - " Chinstrap penguin (Pygoscelis antarctica)\n", - " Dream\n", - " 51.3\n", - " 19.2\n", - " 193.0\n", - " 3650.0\n", - " MALE\n", + " Adelie Penguin (Pygoscelis adeliae)\n", + " Biscoe\n", + " 40.5\n", + " 17.9\n", + " 187.0\n", + " 3200.0\n", + " FEMALE\n", " \n", " \n", " 22\n", - " Adelie Penguin (Pygoscelis adeliae)\n", - " Dream\n", - " 40.2\n", - " 17.1\n", - " 193.0\n", - " 3400.0\n", + " Gentoo penguin (Pygoscelis papua)\n", + " Biscoe\n", + " 45.5\n", + " 13.9\n", + " 210.0\n", + " 4200.0\n", " FEMALE\n", " \n", " \n", " 23\n", " Adelie Penguin (Pygoscelis adeliae)\n", " Dream\n", - " 36.8\n", + " 42.2\n", " 18.5\n", - " 193.0\n", - " 3500.0\n", + " 180.0\n", + " 3550.0\n", " FEMALE\n", " \n", " \n", " 24\n", " Chinstrap penguin (Pygoscelis antarctica)\n", " Dream\n", - " 49.6\n", - " 18.2\n", - " 193.0\n", + " 51.7\n", + " 20.3\n", + " 194.0\n", " 3775.0\n", " MALE\n", " \n", @@ -366,65 +334,65 @@ "[344 rows x 7 columns in total]" ], "text/plain": [ - " species island culmen_length_mm \\\n", - "0 Adelie Penguin (Pygoscelis adeliae) Dream 36.6 \n", - "1 Adelie Penguin (Pygoscelis adeliae) Dream 39.8 \n", - "2 Adelie Penguin (Pygoscelis adeliae) Dream 40.9 \n", - "3 Chinstrap penguin (Pygoscelis antarctica) Dream 46.5 \n", - "4 Adelie Penguin (Pygoscelis adeliae) Dream 37.3 \n", - "5 Adelie Penguin (Pygoscelis adeliae) Dream 43.2 \n", - "6 Chinstrap penguin (Pygoscelis antarctica) Dream 46.9 \n", - "7 Chinstrap penguin (Pygoscelis antarctica) Dream 50.5 \n", - "8 Chinstrap penguin (Pygoscelis antarctica) Dream 49.5 \n", - "9 Adelie Penguin (Pygoscelis adeliae) Dream 40.2 \n", - "10 Adelie Penguin (Pygoscelis adeliae) Dream 40.8 \n", - "11 Adelie Penguin (Pygoscelis adeliae) Dream 39.0 \n", - "12 Adelie Penguin (Pygoscelis adeliae) Dream 37.0 \n", - "13 Chinstrap penguin (Pygoscelis antarctica) Dream 47.0 \n", - "14 Adelie Penguin (Pygoscelis adeliae) Dream 34.0 \n", - "15 Adelie Penguin (Pygoscelis adeliae) Dream 37.0 \n", - "16 Chinstrap penguin (Pygoscelis antarctica) Dream 45.7 \n", - "17 Chinstrap penguin (Pygoscelis antarctica) Dream 50.6 \n", - "18 Adelie Penguin (Pygoscelis adeliae) Dream 39.7 \n", - "19 Adelie Penguin (Pygoscelis adeliae) Dream 37.8 \n", - "20 Chinstrap penguin (Pygoscelis antarctica) Dream 46.6 \n", - "21 Chinstrap penguin (Pygoscelis antarctica) Dream 51.3 \n", - "22 Adelie Penguin (Pygoscelis adeliae) Dream 40.2 \n", - "23 Adelie Penguin (Pygoscelis adeliae) Dream 36.8 \n", - "24 Chinstrap penguin (Pygoscelis antarctica) Dream 49.6 \n", + " species island culmen_length_mm \\\n", + "0 Adelie Penguin (Pygoscelis adeliae) Biscoe 40.1 \n", + "1 Adelie Penguin (Pygoscelis adeliae) Torgersen 39.1 \n", + "2 Gentoo penguin (Pygoscelis papua) Biscoe 47.4 \n", + "3 Chinstrap penguin (Pygoscelis antarctica) Dream 42.5 \n", + "4 Adelie Penguin (Pygoscelis adeliae) Biscoe 43.2 \n", + "5 Gentoo penguin (Pygoscelis papua) Biscoe 46.7 \n", + "6 Adelie Penguin (Pygoscelis adeliae) Biscoe 41.3 \n", + "7 Gentoo penguin (Pygoscelis papua) Biscoe 45.2 \n", + "8 Gentoo penguin (Pygoscelis papua) Biscoe 46.5 \n", + "9 Gentoo penguin (Pygoscelis papua) Biscoe 50.5 \n", + "10 Gentoo penguin (Pygoscelis papua) Biscoe 48.2 \n", + "11 Adelie Penguin (Pygoscelis adeliae) Dream 38.1 \n", + "12 Gentoo penguin (Pygoscelis papua) Biscoe 50.7 \n", + "13 Adelie Penguin (Pygoscelis adeliae) Biscoe 37.8 \n", + "14 Adelie Penguin (Pygoscelis adeliae) Biscoe 35.0 \n", + "15 Gentoo penguin (Pygoscelis papua) Biscoe 48.7 \n", + "16 Adelie Penguin (Pygoscelis adeliae) Torgersen 34.6 \n", + "17 Gentoo penguin (Pygoscelis papua) Biscoe 46.8 \n", + "18 Chinstrap penguin (Pygoscelis antarctica) Dream 50.3 \n", + "19 Adelie Penguin (Pygoscelis adeliae) Dream 37.2 \n", + "20 Chinstrap penguin (Pygoscelis antarctica) Dream 51.0 \n", + "21 Adelie Penguin (Pygoscelis adeliae) Biscoe 40.5 \n", + "22 Gentoo penguin (Pygoscelis papua) Biscoe 45.5 \n", + "23 Adelie Penguin (Pygoscelis adeliae) Dream 42.2 \n", + "24 Chinstrap penguin (Pygoscelis antarctica) Dream 51.7 \n", "\n", " culmen_depth_mm flipper_length_mm body_mass_g sex \n", - "0 18.4 184.0 3475.0 FEMALE \n", - "1 19.1 184.0 4650.0 MALE \n", - "2 18.9 184.0 3900.0 MALE \n", - "3 17.9 192.0 3500.0 FEMALE \n", - "4 16.8 192.0 3000.0 FEMALE \n", - "5 18.5 192.0 4100.0 MALE \n", - "6 16.6 192.0 2700.0 FEMALE \n", - "7 18.4 200.0 3400.0 FEMALE \n", - "8 19.0 200.0 3800.0 MALE \n", - "9 20.1 200.0 3975.0 MALE \n", - "10 18.9 208.0 4300.0 MALE \n", - "11 18.7 185.0 3650.0 MALE \n", - "12 16.9 185.0 3000.0 FEMALE \n", - "13 17.3 185.0 3700.0 FEMALE \n", - "14 17.1 185.0 3400.0 FEMALE \n", - "15 16.5 185.0 3400.0 FEMALE \n", - "16 17.3 193.0 3600.0 FEMALE \n", - "17 19.4 193.0 3800.0 MALE \n", - "18 17.9 193.0 4250.0 MALE \n", - "19 18.1 193.0 3750.0 MALE \n", - "20 17.8 193.0 3800.0 FEMALE \n", - "21 19.2 193.0 3650.0 MALE \n", - "22 17.1 193.0 3400.0 FEMALE \n", - "23 18.5 193.0 3500.0 FEMALE \n", - "24 18.2 193.0 3775.0 MALE \n", + "0 18.9 188.0 4300.0 MALE \n", + "1 18.7 181.0 3750.0 MALE \n", + "2 14.6 212.0 4725.0 FEMALE \n", + "3 16.7 187.0 3350.0 FEMALE \n", + "4 19.0 197.0 4775.0 MALE \n", + "5 15.3 219.0 5200.0 MALE \n", + "6 21.1 195.0 4400.0 MALE \n", + "7 13.8 215.0 4750.0 FEMALE \n", + "8 13.5 210.0 4550.0 FEMALE \n", + "9 15.2 216.0 5000.0 FEMALE \n", + "10 15.6 221.0 5100.0 MALE \n", + "11 18.6 190.0 3700.0 FEMALE \n", + "12 15.0 223.0 5550.0 MALE \n", + "13 20.0 190.0 4250.0 MALE \n", + "14 17.9 190.0 3450.0 FEMALE \n", + "15 15.7 208.0 5350.0 MALE \n", + "16 21.1 198.0 4400.0 MALE \n", + "17 15.4 215.0 5150.0 MALE \n", + "18 20.0 197.0 3300.0 MALE \n", + "19 18.1 178.0 3900.0 MALE \n", + "20 18.8 203.0 4100.0 MALE \n", + "21 17.9 187.0 3200.0 FEMALE \n", + "22 13.9 210.0 4200.0 FEMALE \n", + "23 18.5 180.0 3550.0 FEMALE \n", + "24 20.3 194.0 3775.0 MALE \n", "...\n", "\n", "[344 rows x 7 columns]" ] }, - "execution_count": 12, + "execution_count": 1, "metadata": {}, "output_type": "execute_result" } @@ -450,32 +418,16 @@ }, { "cell_type": "code", - "execution_count": 13, + "execution_count": 2, "metadata": {}, "outputs": [ { "data": { - "application/vnd.jupyter.widget-view+json": { - "model_id": "a9ad907fa6e64a61a9dce420bc7d2beb", - "version_major": 2, - "version_minor": 0 - }, - "text/plain": [ - "HTML(value='Query job 3537a10a-641a-4d40-ae47-449c641b1bc5 is DONE. 28.9 kB processed. Open Job" + ], "text/plain": [ - "HTML(value='Query job 34101409-7c65-4045-ad52-c6ba24dc9cbb is DONE. 31.7 kB processed. " ] }, "metadata": {}, @@ -483,13 +435,11 @@ }, { "data": { - "application/vnd.jupyter.widget-view+json": { - "model_id": "514e68d5b0b4452a9ccdff947848541a", - "version_major": 2, - "version_minor": 0 - }, + "text/html": [ + "Query job 2af0b0d6-c11b-499e-8d25-a2c628b2853b is DONE. 28.9 kB processed. Open Job" + ], "text/plain": [ - "HTML(value='Query job 74190ac2-21a2-47b0-bc21-ef5373565f17 is DONE. 0 Bytes processed. " ] }, "metadata": {}, @@ -527,294 +477,294 @@ " \n", " \n", " 0\n", - " Dream\n", - " 36.6\n", - " 18.4\n", - " 184.0\n", - " 3475.0\n", - " FEMALE\n", - " \n", - " \n", - " 1\n", - " Dream\n", - " 39.8\n", - " 19.1\n", - " 184.0\n", - " 4650.0\n", + " Biscoe\n", + " 40.1\n", + " 18.9\n", + " 188.0\n", + " 4300.0\n", " MALE\n", " \n", " \n", - " 2\n", - " Dream\n", - " 40.9\n", - " 18.9\n", - " 184.0\n", - " 3900.0\n", + " 1\n", + " Torgersen\n", + " 39.1\n", + " 18.7\n", + " 181.0\n", + " 3750.0\n", " MALE\n", " \n", " \n", " 4\n", - " Dream\n", - " 37.3\n", - " 16.8\n", - " 192.0\n", - " 3000.0\n", - " FEMALE\n", - " \n", - " \n", - " 5\n", - " Dream\n", + " Biscoe\n", " 43.2\n", - " 18.5\n", - " 192.0\n", - " 4100.0\n", - " MALE\n", - " \n", - " \n", - " 9\n", - " Dream\n", - " 40.2\n", - " 20.1\n", - " 200.0\n", - " 3975.0\n", + " 19.0\n", + " 197.0\n", + " 4775.0\n", " MALE\n", " \n", " \n", - " 10\n", - " Dream\n", - " 40.8\n", - " 18.9\n", - " 208.0\n", - " 4300.0\n", + " 6\n", + " Biscoe\n", + " 41.3\n", + " 21.1\n", + " 195.0\n", + " 4400.0\n", " MALE\n", " \n", " \n", " 11\n", " Dream\n", - " 39.0\n", - " 18.7\n", - " 185.0\n", - " 3650.0\n", - " MALE\n", - " \n", - " \n", - " 12\n", - " Dream\n", - " 37.0\n", - " 16.9\n", - " 185.0\n", - " 3000.0\n", + " 38.1\n", + " 18.6\n", + " 190.0\n", + " 3700.0\n", " FEMALE\n", " \n", " \n", - " 14\n", - " Dream\n", - " 34.0\n", - " 17.1\n", - " 185.0\n", - " 3400.0\n", - " FEMALE\n", + " 13\n", + " Biscoe\n", + " 37.8\n", + " 20.0\n", + " 190.0\n", + " 4250.0\n", + " MALE\n", " \n", " \n", - " 15\n", - " Dream\n", - " 37.0\n", - " 16.5\n", - " 185.0\n", - " 3400.0\n", + " 14\n", + " Biscoe\n", + " 35.0\n", + " 17.9\n", + " 190.0\n", + " 3450.0\n", " FEMALE\n", " \n", " \n", - " 18\n", - " Dream\n", - " 39.7\n", - " 17.9\n", - " 193.0\n", - " 4250.0\n", + " 16\n", + " Torgersen\n", + " 34.6\n", + " 21.1\n", + " 198.0\n", + " 4400.0\n", " MALE\n", " \n", " \n", " 19\n", " Dream\n", - " 37.8\n", + " 37.2\n", " 18.1\n", - " 193.0\n", - " 3750.0\n", + " 178.0\n", + " 3900.0\n", " MALE\n", " \n", " \n", - " 22\n", - " Dream\n", - " 40.2\n", - " 17.1\n", - " 193.0\n", - " 3400.0\n", + " 21\n", + " Biscoe\n", + " 40.5\n", + " 17.9\n", + " 187.0\n", + " 3200.0\n", " FEMALE\n", " \n", " \n", " 23\n", " Dream\n", - " 36.8\n", + " 42.2\n", " 18.5\n", - " 193.0\n", - " 3500.0\n", + " 180.0\n", + " 3550.0\n", " FEMALE\n", " \n", " \n", - " 26\n", + " 30\n", " Dream\n", - " 41.5\n", - " 18.5\n", - " 201.0\n", - " 4000.0\n", + " 39.2\n", + " 21.1\n", + " 196.0\n", + " 4150.0\n", " MALE\n", " \n", " \n", - " 31\n", - " Dream\n", - " 33.1\n", - " 16.1\n", - " 178.0\n", - " 2900.0\n", - " FEMALE\n", + " 32\n", + " Torgersen\n", + " 42.9\n", + " 17.6\n", + " 196.0\n", + " 4700.0\n", + " MALE\n", " \n", " \n", - " 32\n", + " 38\n", " Dream\n", - " 37.2\n", - " 18.1\n", - " 178.0\n", + " 41.1\n", + " 17.5\n", + " 190.0\n", " 3900.0\n", " MALE\n", " \n", " \n", - " 33\n", - " Dream\n", - " 39.5\n", - " 16.7\n", - " 178.0\n", - " 3250.0\n", + " 40\n", + " Torgersen\n", + " 38.6\n", + " 21.2\n", + " 191.0\n", + " 3800.0\n", + " MALE\n", + " \n", + " \n", + " 42\n", + " Biscoe\n", + " 35.5\n", + " 16.2\n", + " 195.0\n", + " 3350.0\n", " FEMALE\n", " \n", " \n", - " 35\n", + " 44\n", " Dream\n", - " 36.0\n", - " 18.5\n", + " 39.2\n", + " 18.6\n", + " 190.0\n", + " 4250.0\n", + " MALE\n", + " \n", + " \n", + " 45\n", + " Torgersen\n", + " 35.2\n", + " 15.9\n", " 186.0\n", - " 3100.0\n", + " 3050.0\n", " FEMALE\n", " \n", " \n", - " 36\n", + " 46\n", " Dream\n", + " 43.2\n", + " 18.5\n", + " 192.0\n", + " 4100.0\n", + " MALE\n", + " \n", + " \n", + " 49\n", + " Biscoe\n", " 39.6\n", - " 18.1\n", + " 17.7\n", " 186.0\n", - " 4450.0\n", - " MALE\n", + " 3500.0\n", + " FEMALE\n", " \n", " \n", - " 38\n", - " Dream\n", - " 41.3\n", + " 53\n", + " Biscoe\n", + " 45.6\n", " 20.3\n", - " 194.0\n", - " 3550.0\n", + " 191.0\n", + " 4600.0\n", " MALE\n", " \n", " \n", - " 41\n", - " Dream\n", - " 35.7\n", - " 18.0\n", - " 202.0\n", - " 3550.0\n", + " 58\n", + " Torgersen\n", + " 40.9\n", + " 16.8\n", + " 191.0\n", + " 3700.0\n", " FEMALE\n", " \n", " \n", - " 51\n", - " Dream\n", - " 38.1\n", - " 17.6\n", - " 187.0\n", - " 3425.0\n", + " 60\n", + " Torgersen\n", + " 40.3\n", + " 18.0\n", + " 195.0\n", + " 3250.0\n", " FEMALE\n", " \n", " \n", - " 53\n", + " 62\n", " Dream\n", " 36.0\n", - " 17.1\n", - " 187.0\n", - " 3700.0\n", + " 18.5\n", + " 186.0\n", + " 3100.0\n", " FEMALE\n", " \n", + " \n", + " 63\n", + " Torgersen\n", + " 39.3\n", + " 20.6\n", + " 190.0\n", + " 3650.0\n", + " MALE\n", + " \n", " \n", "\n", "

25 rows × 6 columns

\n", "[146 rows x 6 columns in total]" ], "text/plain": [ - " island culmen_length_mm culmen_depth_mm flipper_length_mm body_mass_g \\\n", - "0 Dream 36.6 18.4 184.0 3475.0 \n", - "1 Dream 39.8 19.1 184.0 4650.0 \n", - "2 Dream 40.9 18.9 184.0 3900.0 \n", - "4 Dream 37.3 16.8 192.0 3000.0 \n", - "5 Dream 43.2 18.5 192.0 4100.0 \n", - "9 Dream 40.2 20.1 200.0 3975.0 \n", - "10 Dream 40.8 18.9 208.0 4300.0 \n", - "11 Dream 39.0 18.7 185.0 3650.0 \n", - "12 Dream 37.0 16.9 185.0 3000.0 \n", - "14 Dream 34.0 17.1 185.0 3400.0 \n", - "15 Dream 37.0 16.5 185.0 3400.0 \n", - "18 Dream 39.7 17.9 193.0 4250.0 \n", - "19 Dream 37.8 18.1 193.0 3750.0 \n", - "22 Dream 40.2 17.1 193.0 3400.0 \n", - "23 Dream 36.8 18.5 193.0 3500.0 \n", - "26 Dream 41.5 18.5 201.0 4000.0 \n", - "31 Dream 33.1 16.1 178.0 2900.0 \n", - "32 Dream 37.2 18.1 178.0 3900.0 \n", - "33 Dream 39.5 16.7 178.0 3250.0 \n", - "35 Dream 36.0 18.5 186.0 3100.0 \n", - "36 Dream 39.6 18.1 186.0 4450.0 \n", - "38 Dream 41.3 20.3 194.0 3550.0 \n", - "41 Dream 35.7 18.0 202.0 3550.0 \n", - "51 Dream 38.1 17.6 187.0 3425.0 \n", - "53 Dream 36.0 17.1 187.0 3700.0 \n", + " island culmen_length_mm culmen_depth_mm flipper_length_mm \\\n", + "0 Biscoe 40.1 18.9 188.0 \n", + "1 Torgersen 39.1 18.7 181.0 \n", + "4 Biscoe 43.2 19.0 197.0 \n", + "6 Biscoe 41.3 21.1 195.0 \n", + "11 Dream 38.1 18.6 190.0 \n", + "13 Biscoe 37.8 20.0 190.0 \n", + "14 Biscoe 35.0 17.9 190.0 \n", + "16 Torgersen 34.6 21.1 198.0 \n", + "19 Dream 37.2 18.1 178.0 \n", + "21 Biscoe 40.5 17.9 187.0 \n", + "23 Dream 42.2 18.5 180.0 \n", + "30 Dream 39.2 21.1 196.0 \n", + "32 Torgersen 42.9 17.6 196.0 \n", + "38 Dream 41.1 17.5 190.0 \n", + "40 Torgersen 38.6 21.2 191.0 \n", + "42 Biscoe 35.5 16.2 195.0 \n", + "44 Dream 39.2 18.6 190.0 \n", + "45 Torgersen 35.2 15.9 186.0 \n", + "46 Dream 43.2 18.5 192.0 \n", + "49 Biscoe 39.6 17.7 186.0 \n", + "53 Biscoe 45.6 20.3 191.0 \n", + "58 Torgersen 40.9 16.8 191.0 \n", + "60 Torgersen 40.3 18.0 195.0 \n", + "62 Dream 36.0 18.5 186.0 \n", + "63 Torgersen 39.3 20.6 190.0 \n", "\n", - " sex \n", - "0 FEMALE \n", - "1 MALE \n", - "2 MALE \n", - "4 FEMALE \n", - "5 MALE \n", - "9 MALE \n", - "10 MALE \n", - "11 MALE \n", - "12 FEMALE \n", - "14 FEMALE \n", - "15 FEMALE \n", - "18 MALE \n", - "19 MALE \n", - "22 FEMALE \n", - "23 FEMALE \n", - "26 MALE \n", - "31 FEMALE \n", - "32 MALE \n", - "33 FEMALE \n", - "35 FEMALE \n", - "36 MALE \n", - "38 MALE \n", - "41 FEMALE \n", - "51 FEMALE \n", - "53 FEMALE \n", + " body_mass_g sex \n", + "0 4300.0 MALE \n", + "1 3750.0 MALE \n", + "4 4775.0 MALE \n", + "6 4400.0 MALE \n", + "11 3700.0 FEMALE \n", + "13 4250.0 MALE \n", + "14 3450.0 FEMALE \n", + "16 4400.0 MALE \n", + "19 3900.0 MALE \n", + "21 3200.0 FEMALE \n", + "23 3550.0 FEMALE \n", + "30 4150.0 MALE \n", + "32 4700.0 MALE \n", + "38 3900.0 MALE \n", + "40 3800.0 MALE \n", + "42 3350.0 FEMALE \n", + "44 4250.0 MALE \n", + "45 3050.0 FEMALE \n", + "46 4100.0 MALE \n", + "49 3500.0 FEMALE \n", + "53 4600.0 MALE \n", + "58 3700.0 FEMALE \n", + "60 3250.0 FEMALE \n", + "62 3100.0 FEMALE \n", + "63 3650.0 MALE \n", "...\n", "\n", "[146 rows x 6 columns]" ] }, - "execution_count": 13, + "execution_count": 2, "metadata": {}, "output_type": "execute_result" } @@ -843,18 +793,16 @@ }, { "cell_type": "code", - "execution_count": 14, + "execution_count": 3, "metadata": {}, "outputs": [ { "data": { - "application/vnd.jupyter.widget-view+json": { - "model_id": "81f9aa34c7234bd88b6b7a4bc77d4b4e", - "version_major": 2, - "version_minor": 0 - }, + "text/html": [ + "Query job 0808457b-a0df-4a37-b7a5-8885f4a4588c is DONE. 28.9 kB processed.
Open Job" + ], "text/plain": [ - "HTML(value='Query job 288f0daa-a51e-45b4-86bf-d054467c4a99 is DONE. 28.9 kB processed. " ] }, "metadata": {}, @@ -881,7 +829,7 @@ }, { "cell_type": "code", - "execution_count": 15, + "execution_count": 4, "metadata": {}, "outputs": [ { @@ -897,7 +845,7 @@ " ('linreg', LinearRegression(fit_intercept=False))])" ] }, - "execution_count": 15, + "execution_count": 4, "metadata": {}, "output_type": "execute_result" } @@ -936,9 +884,63 @@ }, { "cell_type": "code", - "execution_count": 16, + "execution_count": 5, "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/html": [ + "Query job e9bfa6a5-a53f-4d8b-ae8c-cc8cd55d0947 is DONE. 28.9 kB processed. Open Job" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "Query job d8d553cf-3d36-49aa-b18b-9a05576a1fb0 is DONE. 28.9 kB processed. Open Job" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "Query job 75ef0083-9a4f-4ffb-a6c6-d82974a1659f is DONE. 0 Bytes processed. Open Job" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/plain": [ + "Pipeline(steps=[('preproc',\n", + " ColumnTransformer(transformers=[('onehot', OneHotEncoder(),\n", + " ['island', 'species', 'sex']),\n", + " ('scaler', StandardScaler(),\n", + " ['culmen_depth_mm',\n", + " 'culmen_length_mm',\n", + " 'flipper_length_mm'])])),\n", + " ('linreg', LinearRegression(fit_intercept=False))])" + ] + }, + "execution_count": 5, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "pipeline.fit(X_train, y_train)" ] @@ -953,18 +955,16 @@ }, { "cell_type": "code", - "execution_count": 17, + "execution_count": 6, "metadata": {}, "outputs": [ { "data": { - "application/vnd.jupyter.widget-view+json": { - "model_id": "fcf406d36c0d4915b318cd30c0f3df25", - "version_major": 2, - "version_minor": 0 - }, + "text/html": [ + "Query job 55c5a9ce-8159-4a1a-99a4-af3a906640ba is DONE. 29.3 kB processed. Open Job" + ], "text/plain": [ - "HTML(value='Query job 81196f97-304b-4d77-bb0f-8fc8adb8fe75 is RUNNING. " ] }, "metadata": {}, @@ -972,13 +972,11 @@ }, { "data": { - "application/vnd.jupyter.widget-view+json": { - "model_id": "41399a6b1d4f45328bacc6c868cefdf6", - "version_major": 2, - "version_minor": 0 - }, + "text/html": [ + "Query job 3e41c470-de70-4f13-89d9-c5564d0b2836 is DONE. 232 Bytes processed. Open Job" + ], "text/plain": [ - "HTML(value='Query job b417f27a-387d-4eb2-8d6d-287327ef0471 is DONE. 232 Bytes processed. " ] }, "metadata": {}, @@ -986,13 +984,11 @@ }, { "data": { - "application/vnd.jupyter.widget-view+json": { - "model_id": "e3c17676eab448c0942c0c32689ba4b5", - "version_major": 2, - "version_minor": 0 - }, + "text/html": [ + "Query job ed2f9042-a737-4d13-bd21-8c3d29cd61a2 is DONE. 28.9 kB processed. Open Job" + ], "text/plain": [ - "HTML(value='Query job b7f89a61-d76a-47be-8b83-917d69f255a2 is DONE. 31.7 kB processed. " ] }, "metadata": {}, @@ -1000,13 +996,11 @@ }, { "data": { - "application/vnd.jupyter.widget-view+json": { - "model_id": "6c903861564b412aad9d9decad26560c", - "version_major": 2, - "version_minor": 0 - }, + "text/html": [ + "Query job 815d16b5-0a5d-42be-a766-1cff5b8f22f2 is DONE. 28.9 kB processed. Open Job" + ], "text/plain": [ - "HTML(value='Query job 9619c393-90b3-4fea-a197-d09389e9486c is DONE. 31.7 kB processed. " ] }, "metadata": {}, @@ -1014,13 +1008,11 @@ }, { "data": { - "application/vnd.jupyter.widget-view+json": { - "model_id": "2c2534cd90e64c81be45753b81b1be46", - "version_major": 2, - "version_minor": 0 - }, + "text/html": [ + "Query job 37a38dc6-5073-4544-a1e3-da145a843922 is DONE. 29.4 kB processed. Open Job" + ], "text/plain": [ - "HTML(value='Query job e5854451-ffb4-4a28-a25f-3bdd68e9edae is DONE. 32.2 kB processed. " ] }, "metadata": {}, @@ -1029,10 +1021,10 @@ { "data": { "text/plain": [ - "0.6757452736197735" + "0.2655729213572775" ] }, - "execution_count": 17, + "execution_count": 6, "metadata": {}, "output_type": "execute_result" } @@ -1040,9 +1032,9 @@ "source": [ "from bigframes.ml.metrics import r2_score\n", "\n", - "pred_y = pipeline.predict(X_test)\n", + "y_pred = pipeline.predict(X_test)[\"predicted_body_mass_g\"]\n", "\n", - "r2_score(y_test, pred_y)" + "r2_score(y_test, y_pred)" ] }, { @@ -1055,18 +1047,16 @@ }, { "cell_type": "code", - "execution_count": 18, + "execution_count": 7, "metadata": {}, "outputs": [ { "data": { - "application/vnd.jupyter.widget-view+json": { - "model_id": "9295d6a3ff834f7a91a43d3f4ef4a61c", - "version_major": 2, - "version_minor": 0 - }, + "text/html": [ + "Load job 7b46750c-70b4-468d-87ba-9f84f579f2a6 is DONE. Open Job" + ], "text/plain": [ - "HTML(value='Load job d4c2f933-3514-4901-bcd7-888ee66eba82 is RUNNING. " ] }, "metadata": {}, @@ -1097,32 +1087,16 @@ }, { "cell_type": "code", - "execution_count": 19, + "execution_count": 8, "metadata": {}, "outputs": [ { "data": { - "application/vnd.jupyter.widget-view+json": { - "model_id": "b7eb82b3b5fc4a8e97468070a3e76300", - "version_major": 2, - "version_minor": 0 - }, - "text/plain": [ - "HTML(value='Query job e4ffd919-6f69-4382-a7e5-db37c7c1fefa is RUNNING. Open Job" + ], "text/plain": [ - "HTML(value='Query job 6b3e3285-79e9-4137-bf3b-7b7185ef76a5 is DONE. 24 Bytes processed. " ] }, "metadata": {}, @@ -1130,13 +1104,11 @@ }, { "data": { - "application/vnd.jupyter.widget-view+json": { - "model_id": "edc7bc6434bd4be4926626a235aab65a", - "version_major": 2, - "version_minor": 0 - }, + "text/html": [ + "Query job 207cb787-cf8a-43ea-8e73-644d3f58b11a is DONE. 24 Bytes processed. Open Job" + ], "text/plain": [ - "HTML(value='Query job 173c4194-e194-43d2-8359-7bec83d3c861 is DONE. 0 Bytes processed. " ] }, "metadata": {}, @@ -1144,13 +1116,11 @@ }, { "data": { - "application/vnd.jupyter.widget-view+json": { - "model_id": "55a8cbd9b1ab47eeab6e1c305847630f", - "version_major": 2, - "version_minor": 0 - }, + "text/html": [ + "Query job c5dc5075-cac0-4947-9e9f-06aa9cc5bd2a is DONE. 0 Bytes processed. Open Job" + ], "text/plain": [ - "HTML(value='Query job 53ba2332-590c-488d-9505-23aebaaad9cb is DONE. 48 Bytes processed. " ] }, "metadata": {}, @@ -1158,13 +1128,11 @@ }, { "data": { - "application/vnd.jupyter.widget-view+json": { - "model_id": "463a5b072148474db629b9346fa3a6d1", - "version_major": 2, - "version_minor": 0 - }, + "text/html": [ + "Query job 2ca4a569-7186-48ed-b3e4-004dca704798 is DONE. 282 Bytes processed. Open Job" + ], "text/plain": [ - "HTML(value='Query job 66e4a8e0-4cae-4e9d-86e0-17dc24f6cfbb is DONE. 0 Bytes processed. " ] }, "metadata": {}, @@ -1192,41 +1160,83 @@ " \n", " \n", " predicted_body_mass_g\n", + " species\n", + " island\n", + " culmen_length_mm\n", + " culmen_depth_mm\n", + " flipper_length_mm\n", + " sex\n", " \n", " \n", " tag_number\n", " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", " \n", " \n", " \n", " \n", " 1633\n", - " 3965.994361\n", + " 4017.203152\n", + " Adelie Penguin (Pygoscelis adeliae)\n", + " Torgersen\n", + " 39.5\n", + " 18.8\n", + " 196.0\n", + " MALE\n", " \n", " \n", " 1672\n", - " 3246.312058\n", + " 3127.601519\n", + " Adelie Penguin (Pygoscelis adeliae)\n", + " Torgersen\n", + " 38.5\n", + " 17.2\n", + " 181.0\n", + " FEMALE\n", " \n", " \n", " 1690\n", - " 3456.404062\n", + " 3386.101231\n", + " Adelie Penguin (Pygoscelis adeliae)\n", + " Dream\n", + " 37.9\n", + " 18.1\n", + " 188.0\n", + " FEMALE\n", " \n", " \n", "\n", - "

3 rows × 1 columns

\n", - "[3 rows x 1 columns in total]" + "

3 rows × 7 columns

\n", + "[3 rows x 7 columns in total]" ], "text/plain": [ - " predicted_body_mass_g\n", - "tag_number \n", - "1633 3965.994361\n", - "1672 3246.312058\n", - "1690 3456.404062\n", + " predicted_body_mass_g species \\\n", + "tag_number \n", + "1633 4017.203152 Adelie Penguin (Pygoscelis adeliae) \n", + "1672 3127.601519 Adelie Penguin (Pygoscelis adeliae) \n", + "1690 3386.101231 Adelie Penguin (Pygoscelis adeliae) \n", + "\n", + " island culmen_length_mm culmen_depth_mm flipper_length_mm \\\n", + "tag_number \n", + "1633 Torgersen 39.5 18.8 196.0 \n", + "1672 Torgersen 38.5 17.2 181.0 \n", + "1690 Dream 37.9 18.1 188.0 \n", "\n", - "[3 rows x 1 columns]" + " sex \n", + "tag_number \n", + "1633 MALE \n", + "1672 FEMALE \n", + "1690 FEMALE \n", + "\n", + "[3 rows x 7 columns]" ] }, - "execution_count": 19, + "execution_count": 8, "metadata": {}, "output_type": "execute_result" } @@ -1240,28 +1250,53 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "## 4. Save in BigQuery" + "## 6. Save in BigQuery" ] }, { "cell_type": "code", - "execution_count": 20, + "execution_count": 9, "metadata": {}, "outputs": [ { "data": { + "text/html": [ + "Copy job d1def4a4-1da1-43a9-8ae5-4459444d993d is DONE.
Open Job" + ], "text/plain": [ - "Pipeline(steps=[('preproc',\n", - " ColumnTransformer(transformers=[('onehot', OneHotEncoder(),\n", - " ['island', 'species', 'sex']),\n", - " ('scaler', StandardScaler(),\n", - " ['culmen_depth_mm',\n", - " 'culmen_length_mm',\n", - " 'flipper_length_mm'])])),\n", - " ('linreg', LinearRegression(fit_intercept=False))])" + "" ] }, - "execution_count": 20, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/plain": [ + "Pipeline(steps=[('transform',\n", + " ColumnTransformer(transformers=[('ont_hot_encoder',\n", + " OneHotEncoder(max_categories=1000001,\n", + " min_frequency=0),\n", + " 'island'),\n", + " ('standard_scaler',\n", + " StandardScaler(),\n", + " 'culmen_length_mm'),\n", + " ('standard_scaler',\n", + " StandardScaler(),\n", + " 'culmen_depth_mm'),\n", + " ('standard_scaler',\n", + " StandardScaler(),\n", + " 'flipper_length_mm'),\n", + " ('ont_hot_encoder',\n", + " OneHotEncoder(max_categories=1000001,\n", + " min_frequency=0),\n", + " 'sex')])),\n", + " ('estimator',\n", + " LinearRegression(fit_intercept=False,\n", + " optimize_strategy='NORMAL_EQUATION'))])" + ] + }, + "execution_count": 9, "metadata": {}, "output_type": "execute_result" } @@ -1269,6 +1304,13 @@ "source": [ "pipeline.to_gbq(\"bigframes-dev.bigframes_demo_us.penguin_model\", replace=True)" ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] } ], "metadata": { @@ -1287,7 +1329,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.10.12" + "version": "3.10.9" }, "orig_nbformat": 4, "vscode": { From 56c3c2b3373977fcac107d46b34c0c8a4baf634e Mon Sep 17 00:00:00 2001 From: Garrett Wu Date: Wed, 15 Nov 2023 23:15:15 +0000 Subject: [PATCH 4/5] fix notebook --- .../getting_started/ml_fundamentals.ipynb | 3586 ++++++++++------- 1 file changed, 2043 insertions(+), 1543 deletions(-) diff --git a/notebooks/getting_started/ml_fundamentals.ipynb b/notebooks/getting_started/ml_fundamentals.ipynb index 2f566dd704..165bd90f31 100644 --- a/notebooks/getting_started/ml_fundamentals.ipynb +++ b/notebooks/getting_started/ml_fundamentals.ipynb @@ -14,46 +14,16 @@ }, { "cell_type": "code", - "execution_count": 18, + "execution_count": 1, "metadata": {}, "outputs": [ { "data": { - "application/vnd.jupyter.widget-view+json": { - "model_id": "0c8a8bc0b4d64448aef68d6a98fae666", - "version_major": 2, - "version_minor": 0 - }, - "text/plain": [ - "HTML(value='Query job 28e903c6-e874-4b99-8f53-0755e0b0c188 is RUNNING. " ] }, "metadata": {}, @@ -61,13 +31,11 @@ }, { "data": { - "application/vnd.jupyter.widget-view+json": { - "model_id": "9680fd748e0546b4a010fda0155c5027", - "version_major": 2, - "version_minor": 0 - }, + "text/html": [ + "Query job e8aba858-7660-4274-8d90-8d2b0382f8f6 is DONE. 28.9 kB processed. Open Job" + ], "text/plain": [ - "HTML(value='Query job 7950d6a7-3747-4454-bba2-9660e830647f is DONE. 31.7 kB processed. " ] }, "metadata": {}, @@ -117,250 +85,250 @@ " \n", " 0\n", " Adelie Penguin (Pygoscelis adeliae)\n", - " Dream\n", - " 36.6\n", - " 18.4\n", - " 184.0\n", - " 3475.0\n", - " FEMALE\n", + " Biscoe\n", + " 40.1\n", + " 18.9\n", + " 188.0\n", + " 4300.0\n", + " MALE\n", " \n", " \n", " 1\n", " Adelie Penguin (Pygoscelis adeliae)\n", - " Dream\n", - " 39.8\n", - " 19.1\n", - " 184.0\n", - " 4650.0\n", + " Torgersen\n", + " 39.1\n", + " 18.7\n", + " 181.0\n", + " 3750.0\n", " MALE\n", " \n", " \n", " 2\n", - " Adelie Penguin (Pygoscelis adeliae)\n", - " Dream\n", - " 40.9\n", - " 18.9\n", - " 184.0\n", - " 3900.0\n", - " MALE\n", + " Gentoo penguin (Pygoscelis papua)\n", + " Biscoe\n", + " 47.4\n", + " 14.6\n", + " 212.0\n", + " 4725.0\n", + " FEMALE\n", " \n", " \n", " 3\n", " Chinstrap penguin (Pygoscelis antarctica)\n", " Dream\n", - " 46.5\n", - " 17.9\n", - " 192.0\n", - " 3500.0\n", + " 42.5\n", + " 16.7\n", + " 187.0\n", + " 3350.0\n", " FEMALE\n", " \n", " \n", " 4\n", " Adelie Penguin (Pygoscelis adeliae)\n", - " Dream\n", - " 37.3\n", - " 16.8\n", - " 192.0\n", - " 3000.0\n", - " FEMALE\n", + " Biscoe\n", + " 43.2\n", + " 19.0\n", + " 197.0\n", + " 4775.0\n", + " MALE\n", " \n", " \n", " 5\n", - " Adelie Penguin (Pygoscelis adeliae)\n", - " Dream\n", - " 43.2\n", - " 18.5\n", - " 192.0\n", - " 4100.0\n", + " Gentoo penguin (Pygoscelis papua)\n", + " Biscoe\n", + " 46.7\n", + " 15.3\n", + " 219.0\n", + " 5200.0\n", " MALE\n", " \n", " \n", " 6\n", - " Chinstrap penguin (Pygoscelis antarctica)\n", - " Dream\n", - " 46.9\n", - " 16.6\n", - " 192.0\n", - " 2700.0\n", - " FEMALE\n", + " Adelie Penguin (Pygoscelis adeliae)\n", + " Biscoe\n", + " 41.3\n", + " 21.1\n", + " 195.0\n", + " 4400.0\n", + " MALE\n", " \n", " \n", " 7\n", - " Chinstrap penguin (Pygoscelis antarctica)\n", - " Dream\n", - " 50.5\n", - " 18.4\n", - " 200.0\n", - " 3400.0\n", + " Gentoo penguin (Pygoscelis papua)\n", + " Biscoe\n", + " 45.2\n", + " 13.8\n", + " 215.0\n", + " 4750.0\n", " FEMALE\n", " \n", " \n", " 8\n", - " Chinstrap penguin (Pygoscelis antarctica)\n", - " Dream\n", - " 49.5\n", - " 19.0\n", - " 200.0\n", - " 3800.0\n", - " MALE\n", + " Gentoo penguin (Pygoscelis papua)\n", + " Biscoe\n", + " 46.5\n", + " 13.5\n", + " 210.0\n", + " 4550.0\n", + " FEMALE\n", " \n", " \n", " 9\n", - " Adelie Penguin (Pygoscelis adeliae)\n", - " Dream\n", - " 40.2\n", - " 20.1\n", - " 200.0\n", - " 3975.0\n", - " MALE\n", + " Gentoo penguin (Pygoscelis papua)\n", + " Biscoe\n", + " 50.5\n", + " 15.2\n", + " 216.0\n", + " 5000.0\n", + " FEMALE\n", " \n", " \n", " 10\n", - " Adelie Penguin (Pygoscelis adeliae)\n", - " Dream\n", - " 40.8\n", - " 18.9\n", - " 208.0\n", - " 4300.0\n", + " Gentoo penguin (Pygoscelis papua)\n", + " Biscoe\n", + " 48.2\n", + " 15.6\n", + " 221.0\n", + " 5100.0\n", " MALE\n", " \n", " \n", " 11\n", " Adelie Penguin (Pygoscelis adeliae)\n", " Dream\n", - " 39.0\n", - " 18.7\n", - " 185.0\n", - " 3650.0\n", - " MALE\n", + " 38.1\n", + " 18.6\n", + " 190.0\n", + " 3700.0\n", + " FEMALE\n", " \n", " \n", " 12\n", - " Adelie Penguin (Pygoscelis adeliae)\n", - " Dream\n", - " 37.0\n", - " 16.9\n", - " 185.0\n", - " 3000.0\n", - " FEMALE\n", + " Gentoo penguin (Pygoscelis papua)\n", + " Biscoe\n", + " 50.7\n", + " 15.0\n", + " 223.0\n", + " 5550.0\n", + " MALE\n", " \n", " \n", " 13\n", - " Chinstrap penguin (Pygoscelis antarctica)\n", - " Dream\n", - " 47.0\n", - " 17.3\n", - " 185.0\n", - " 3700.0\n", - " FEMALE\n", + " Adelie Penguin (Pygoscelis adeliae)\n", + " Biscoe\n", + " 37.8\n", + " 20.0\n", + " 190.0\n", + " 4250.0\n", + " MALE\n", " \n", " \n", " 14\n", " Adelie Penguin (Pygoscelis adeliae)\n", - " Dream\n", - " 34.0\n", - " 17.1\n", - " 185.0\n", - " 3400.0\n", + " Biscoe\n", + " 35.0\n", + " 17.9\n", + " 190.0\n", + " 3450.0\n", " FEMALE\n", " \n", " \n", " 15\n", - " Adelie Penguin (Pygoscelis adeliae)\n", - " Dream\n", - " 37.0\n", - " 16.5\n", - " 185.0\n", - " 3400.0\n", - " FEMALE\n", + " Gentoo penguin (Pygoscelis papua)\n", + " Biscoe\n", + " 48.7\n", + " 15.7\n", + " 208.0\n", + " 5350.0\n", + " MALE\n", " \n", " \n", " 16\n", - " Chinstrap penguin (Pygoscelis antarctica)\n", - " Dream\n", - " 45.7\n", - " 17.3\n", - " 193.0\n", - " 3600.0\n", - " FEMALE\n", + " Adelie Penguin (Pygoscelis adeliae)\n", + " Torgersen\n", + " 34.6\n", + " 21.1\n", + " 198.0\n", + " 4400.0\n", + " MALE\n", " \n", " \n", " 17\n", - " Chinstrap penguin (Pygoscelis antarctica)\n", - " Dream\n", - " 50.6\n", - " 19.4\n", - " 193.0\n", - " 3800.0\n", + " Gentoo penguin (Pygoscelis papua)\n", + " Biscoe\n", + " 46.8\n", + " 15.4\n", + " 215.0\n", + " 5150.0\n", " MALE\n", " \n", " \n", " 18\n", - " Adelie Penguin (Pygoscelis adeliae)\n", + " Chinstrap penguin (Pygoscelis antarctica)\n", " Dream\n", - " 39.7\n", - " 17.9\n", - " 193.0\n", - " 4250.0\n", + " 50.3\n", + " 20.0\n", + " 197.0\n", + " 3300.0\n", " MALE\n", " \n", " \n", " 19\n", " Adelie Penguin (Pygoscelis adeliae)\n", " Dream\n", - " 37.8\n", + " 37.2\n", " 18.1\n", - " 193.0\n", - " 3750.0\n", + " 178.0\n", + " 3900.0\n", " MALE\n", " \n", " \n", " 20\n", " Chinstrap penguin (Pygoscelis antarctica)\n", " Dream\n", - " 46.6\n", - " 17.8\n", - " 193.0\n", - " 3800.0\n", - " FEMALE\n", + " 51.0\n", + " 18.8\n", + " 203.0\n", + " 4100.0\n", + " MALE\n", " \n", " \n", " 21\n", - " Chinstrap penguin (Pygoscelis antarctica)\n", - " Dream\n", - " 51.3\n", - " 19.2\n", - " 193.0\n", - " 3650.0\n", - " MALE\n", + " Adelie Penguin (Pygoscelis adeliae)\n", + " Biscoe\n", + " 40.5\n", + " 17.9\n", + " 187.0\n", + " 3200.0\n", + " FEMALE\n", " \n", " \n", " 22\n", - " Adelie Penguin (Pygoscelis adeliae)\n", - " Dream\n", - " 40.2\n", - " 17.1\n", - " 193.0\n", - " 3400.0\n", + " Gentoo penguin (Pygoscelis papua)\n", + " Biscoe\n", + " 45.5\n", + " 13.9\n", + " 210.0\n", + " 4200.0\n", " FEMALE\n", " \n", " \n", " 23\n", " Adelie Penguin (Pygoscelis adeliae)\n", " Dream\n", - " 36.8\n", + " 42.2\n", " 18.5\n", - " 193.0\n", - " 3500.0\n", + " 180.0\n", + " 3550.0\n", " FEMALE\n", " \n", " \n", " 24\n", " Chinstrap penguin (Pygoscelis antarctica)\n", " Dream\n", - " 49.6\n", - " 18.2\n", - " 193.0\n", + " 51.7\n", + " 20.3\n", + " 194.0\n", " 3775.0\n", " MALE\n", " \n", @@ -370,86 +338,86 @@ "[334 rows x 7 columns in total]" ], "text/plain": [ - " species island \\\n", - "penguin_id \n", - "0 Adelie Penguin (Pygoscelis adeliae) Dream \n", - "1 Adelie Penguin (Pygoscelis adeliae) Dream \n", - "2 Adelie Penguin (Pygoscelis adeliae) Dream \n", - "3 Chinstrap penguin (Pygoscelis antarctica) Dream \n", - "4 Adelie Penguin (Pygoscelis adeliae) Dream \n", - "5 Adelie Penguin (Pygoscelis adeliae) Dream \n", - "6 Chinstrap penguin (Pygoscelis antarctica) Dream \n", - "7 Chinstrap penguin (Pygoscelis antarctica) Dream \n", - "8 Chinstrap penguin (Pygoscelis antarctica) Dream \n", - "9 Adelie Penguin (Pygoscelis adeliae) Dream \n", - "10 Adelie Penguin (Pygoscelis adeliae) Dream \n", - "11 Adelie Penguin (Pygoscelis adeliae) Dream \n", - "12 Adelie Penguin (Pygoscelis adeliae) Dream \n", - "13 Chinstrap penguin (Pygoscelis antarctica) Dream \n", - "14 Adelie Penguin (Pygoscelis adeliae) Dream \n", - "15 Adelie Penguin (Pygoscelis adeliae) Dream \n", - "16 Chinstrap penguin (Pygoscelis antarctica) Dream \n", - "17 Chinstrap penguin (Pygoscelis antarctica) Dream \n", - "18 Adelie Penguin (Pygoscelis adeliae) Dream \n", - "19 Adelie Penguin (Pygoscelis adeliae) Dream \n", - "20 Chinstrap penguin (Pygoscelis antarctica) Dream \n", - "21 Chinstrap penguin (Pygoscelis antarctica) Dream \n", - "22 Adelie Penguin (Pygoscelis adeliae) Dream \n", - "23 Adelie Penguin (Pygoscelis adeliae) Dream \n", - "24 Chinstrap penguin (Pygoscelis antarctica) Dream \n", + " species island \\\n", + "penguin_id \n", + "0 Adelie Penguin (Pygoscelis adeliae) Biscoe \n", + "1 Adelie Penguin (Pygoscelis adeliae) Torgersen \n", + "2 Gentoo penguin (Pygoscelis papua) Biscoe \n", + "3 Chinstrap penguin (Pygoscelis antarctica) Dream \n", + "4 Adelie Penguin (Pygoscelis adeliae) Biscoe \n", + "5 Gentoo penguin (Pygoscelis papua) Biscoe \n", + "6 Adelie Penguin (Pygoscelis adeliae) Biscoe \n", + "7 Gentoo penguin (Pygoscelis papua) Biscoe \n", + "8 Gentoo penguin (Pygoscelis papua) Biscoe \n", + "9 Gentoo penguin (Pygoscelis papua) Biscoe \n", + "10 Gentoo penguin (Pygoscelis papua) Biscoe \n", + "11 Adelie Penguin (Pygoscelis adeliae) Dream \n", + "12 Gentoo penguin (Pygoscelis papua) Biscoe \n", + "13 Adelie Penguin (Pygoscelis adeliae) Biscoe \n", + "14 Adelie Penguin (Pygoscelis adeliae) Biscoe \n", + "15 Gentoo penguin (Pygoscelis papua) Biscoe \n", + "16 Adelie Penguin (Pygoscelis adeliae) Torgersen \n", + "17 Gentoo penguin (Pygoscelis papua) Biscoe \n", + "18 Chinstrap penguin (Pygoscelis antarctica) Dream \n", + "19 Adelie Penguin (Pygoscelis adeliae) Dream \n", + "20 Chinstrap penguin (Pygoscelis antarctica) Dream \n", + "21 Adelie Penguin (Pygoscelis adeliae) Biscoe \n", + "22 Gentoo penguin (Pygoscelis papua) Biscoe \n", + "23 Adelie Penguin (Pygoscelis adeliae) Dream \n", + "24 Chinstrap penguin (Pygoscelis antarctica) Dream \n", "\n", " culmen_length_mm culmen_depth_mm flipper_length_mm body_mass_g \\\n", "penguin_id \n", - "0 36.6 18.4 184.0 3475.0 \n", - "1 39.8 19.1 184.0 4650.0 \n", - "2 40.9 18.9 184.0 3900.0 \n", - "3 46.5 17.9 192.0 3500.0 \n", - "4 37.3 16.8 192.0 3000.0 \n", - "5 43.2 18.5 192.0 4100.0 \n", - "6 46.9 16.6 192.0 2700.0 \n", - "7 50.5 18.4 200.0 3400.0 \n", - "8 49.5 19.0 200.0 3800.0 \n", - "9 40.2 20.1 200.0 3975.0 \n", - "10 40.8 18.9 208.0 4300.0 \n", - "11 39.0 18.7 185.0 3650.0 \n", - "12 37.0 16.9 185.0 3000.0 \n", - "13 47.0 17.3 185.0 3700.0 \n", - "14 34.0 17.1 185.0 3400.0 \n", - "15 37.0 16.5 185.0 3400.0 \n", - "16 45.7 17.3 193.0 3600.0 \n", - "17 50.6 19.4 193.0 3800.0 \n", - "18 39.7 17.9 193.0 4250.0 \n", - "19 37.8 18.1 193.0 3750.0 \n", - "20 46.6 17.8 193.0 3800.0 \n", - "21 51.3 19.2 193.0 3650.0 \n", - "22 40.2 17.1 193.0 3400.0 \n", - "23 36.8 18.5 193.0 3500.0 \n", - "24 49.6 18.2 193.0 3775.0 \n", + "0 40.1 18.9 188.0 4300.0 \n", + "1 39.1 18.7 181.0 3750.0 \n", + "2 47.4 14.6 212.0 4725.0 \n", + "3 42.5 16.7 187.0 3350.0 \n", + "4 43.2 19.0 197.0 4775.0 \n", + "5 46.7 15.3 219.0 5200.0 \n", + "6 41.3 21.1 195.0 4400.0 \n", + "7 45.2 13.8 215.0 4750.0 \n", + "8 46.5 13.5 210.0 4550.0 \n", + "9 50.5 15.2 216.0 5000.0 \n", + "10 48.2 15.6 221.0 5100.0 \n", + "11 38.1 18.6 190.0 3700.0 \n", + "12 50.7 15.0 223.0 5550.0 \n", + "13 37.8 20.0 190.0 4250.0 \n", + "14 35.0 17.9 190.0 3450.0 \n", + "15 48.7 15.7 208.0 5350.0 \n", + "16 34.6 21.1 198.0 4400.0 \n", + "17 46.8 15.4 215.0 5150.0 \n", + "18 50.3 20.0 197.0 3300.0 \n", + "19 37.2 18.1 178.0 3900.0 \n", + "20 51.0 18.8 203.0 4100.0 \n", + "21 40.5 17.9 187.0 3200.0 \n", + "22 45.5 13.9 210.0 4200.0 \n", + "23 42.2 18.5 180.0 3550.0 \n", + "24 51.7 20.3 194.0 3775.0 \n", "\n", " sex \n", "penguin_id \n", - "0 FEMALE \n", + "0 MALE \n", "1 MALE \n", - "2 MALE \n", + "2 FEMALE \n", "3 FEMALE \n", - "4 FEMALE \n", + "4 MALE \n", "5 MALE \n", - "6 FEMALE \n", + "6 MALE \n", "7 FEMALE \n", - "8 MALE \n", - "9 MALE \n", + "8 FEMALE \n", + "9 FEMALE \n", "10 MALE \n", - "11 MALE \n", - "12 FEMALE \n", - "13 FEMALE \n", + "11 FEMALE \n", + "12 MALE \n", + "13 MALE \n", "14 FEMALE \n", - "15 FEMALE \n", - "16 FEMALE \n", + "15 MALE \n", + "16 MALE \n", "17 MALE \n", "18 MALE \n", "19 MALE \n", - "20 FEMALE \n", - "21 MALE \n", + "20 MALE \n", + "21 FEMALE \n", "22 FEMALE \n", "23 FEMALE \n", "24 MALE \n", @@ -458,7 +426,7 @@ "[334 rows x 7 columns]" ] }, - "execution_count": 18, + "execution_count": 1, "metadata": {}, "output_type": "execute_result" } @@ -490,18 +458,16 @@ }, { "cell_type": "code", - "execution_count": 19, + "execution_count": 2, "metadata": {}, "outputs": [ { "data": { - "application/vnd.jupyter.widget-view+json": { - "model_id": "171160f246eb43d1832aeefb055c0851", - "version_major": 2, - "version_minor": 0 - }, + "text/html": [ + "Query job deda90a8-6ec7-419c-8067-e85777bd916f is DONE. 28.9 kB processed. Open Job" + ], "text/plain": [ - "HTML(value='Query job 1408053d-cb80-4870-af28-e94b90a20a6d is DONE. 28.9 kB processed. " ] }, "metadata": {}, @@ -509,13 +475,11 @@ }, { "data": { - "application/vnd.jupyter.widget-view+json": { - "model_id": "eaffac40f94745728e6bd618bebd2c53", - "version_major": 2, - "version_minor": 0 - }, + "text/html": [ + "Query job efe8fa0a-d450-475a-99d5-36beeb985247 is DONE. 28.9 kB processed. Open Job" + ], "text/plain": [ - "HTML(value='Query job 262885fe-973c-4338-a853-227f9db4835a is DONE. 31.7 kB processed. " ] }, "metadata": {}, @@ -523,13 +487,11 @@ }, { "data": { - "application/vnd.jupyter.widget-view+json": { - "model_id": "68e7ecdc639f4d3ab482830bf6a9da04", - "version_major": 2, - "version_minor": 0 - }, + "text/html": [ + "Query job 5022c56d-e605-4cab-be1b-1ecf189588a1 is DONE. 28.9 kB processed. Open Job" + ], "text/plain": [ - "HTML(value='Query job fb1dc831-7f6f-42ce-96da-1292d73919b4 is DONE. 31.7 kB processed. " ] }, "metadata": {}, @@ -537,13 +499,11 @@ }, { "data": { - "application/vnd.jupyter.widget-view+json": { - "model_id": "ebfe197fd88348129ebe2f7d288bf4b9", - "version_major": 2, - "version_minor": 0 - }, + "text/html": [ + "Query job 175bd293-d448-4510-b926-1d8cfb4eb5e7 is DONE. 28.9 kB processed. Open Job" + ], "text/plain": [ - "HTML(value='Query job e79add79-f1e4-4cf0-bb97-04d153222f19 is DONE. 31.7 kB processed. " ] }, "metadata": {}, @@ -551,13 +511,11 @@ }, { "data": { - "application/vnd.jupyter.widget-view+json": { - "model_id": "2ae69ea7da5247e8a1f7cd0e049629cb", - "version_major": 2, - "version_minor": 0 - }, + "text/html": [ + "Query job a3a2e68c-f5f3-4237-99ad-44974f29d090 is DONE. 28.9 kB processed. Open Job" + ], "text/plain": [ - "HTML(value='Query job cb5ee343-f86e-4795-b0ce-d58854e72e5c is RUNNING. " ] }, "metadata": {}, @@ -596,18 +554,16 @@ }, { "cell_type": "code", - "execution_count": 20, + "execution_count": 3, "metadata": {}, "outputs": [ { "data": { - "application/vnd.jupyter.widget-view+json": { - "model_id": "5ed4206cd3ad4cd485315605bf033df2", - "version_major": 2, - "version_minor": 0 - }, + "text/html": [ + "Query job db3365fb-67ca-44cc-a117-88a80dc63cca is DONE. 28.9 kB processed. Open Job" + ], "text/plain": [ - "HTML(value='Query job e65af31c-feda-468d-89c9-dec033574640 is DONE. 31.7 kB processed. " ] }, "metadata": {}, @@ -615,13 +571,11 @@ }, { "data": { - "application/vnd.jupyter.widget-view+json": { - "model_id": "ac72db21945542558fdd62093d9dc0c3", - "version_major": 2, - "version_minor": 0 - }, + "text/html": [ + "Query job ab78f7ab-a115-448b-92d0-19c091a831ca is DONE. 28.9 kB processed. Open Job" + ], "text/plain": [ - "HTML(value='Query job 0455f252-2b94-457e-bad5-672b91d9b51f is RUNNING. " ] }, "metadata": {}, @@ -667,47 +621,47 @@ " \n", " \n", " \n", - " 156\n", - " Biscoe\n", - " 46.2\n", - " 14.5\n", - " 209.0\n", - " FEMALE\n", - " Gentoo penguin (Pygoscelis papua)\n", + " 249\n", + " Torgersen\n", + " 41.1\n", + " 18.6\n", + " 189.0\n", + " MALE\n", + " Adelie Penguin (Pygoscelis adeliae)\n", " \n", " \n", - " 189\n", + " 36\n", " Biscoe\n", - " 35.3\n", - " 18.9\n", - " 187.0\n", + " 43.4\n", + " 14.4\n", + " 218.0\n", " FEMALE\n", - " Adelie Penguin (Pygoscelis adeliae)\n", + " Gentoo penguin (Pygoscelis papua)\n", " \n", " \n", - " 279\n", + " 74\n", " Biscoe\n", - " 45.1\n", - " 14.5\n", - " 215.0\n", + " 42.8\n", + " 14.2\n", + " 209.0\n", " FEMALE\n", " Gentoo penguin (Pygoscelis papua)\n", " \n", " \n", - " 245\n", - " Biscoe\n", - " 49.5\n", - " 16.2\n", - " 229.0\n", - " MALE\n", - " Gentoo penguin (Pygoscelis papua)\n", + " 235\n", + " Dream\n", + " 34.0\n", + " 17.1\n", + " 185.0\n", + " FEMALE\n", + " Adelie Penguin (Pygoscelis adeliae)\n", " \n", " \n", - " 343\n", - " Torgersen\n", - " 37.3\n", - " 20.5\n", - " 199.0\n", + " 117\n", + " Dream\n", + " 37.8\n", + " 18.1\n", + " 193.0\n", " MALE\n", " Adelie Penguin (Pygoscelis adeliae)\n", " \n", @@ -719,24 +673,24 @@ "text/plain": [ " island culmen_length_mm culmen_depth_mm flipper_length_mm \\\n", "penguin_id \n", - "156 Biscoe 46.2 14.5 209.0 \n", - "189 Biscoe 35.3 18.9 187.0 \n", - "279 Biscoe 45.1 14.5 215.0 \n", - "245 Biscoe 49.5 16.2 229.0 \n", - "343 Torgersen 37.3 20.5 199.0 \n", + "249 Torgersen 41.1 18.6 189.0 \n", + "36 Biscoe 43.4 14.4 218.0 \n", + "74 Biscoe 42.8 14.2 209.0 \n", + "235 Dream 34.0 17.1 185.0 \n", + "117 Dream 37.8 18.1 193.0 \n", "\n", " sex species \n", "penguin_id \n", - "156 FEMALE Gentoo penguin (Pygoscelis papua) \n", - "189 FEMALE Adelie Penguin (Pygoscelis adeliae) \n", - "279 FEMALE Gentoo penguin (Pygoscelis papua) \n", - "245 MALE Gentoo penguin (Pygoscelis papua) \n", - "343 MALE Adelie Penguin (Pygoscelis adeliae) \n", + "249 MALE Adelie Penguin (Pygoscelis adeliae) \n", + "36 FEMALE Gentoo penguin (Pygoscelis papua) \n", + "74 FEMALE Gentoo penguin (Pygoscelis papua) \n", + "235 FEMALE Adelie Penguin (Pygoscelis adeliae) \n", + "117 MALE Adelie Penguin (Pygoscelis adeliae) \n", "\n", "[5 rows x 6 columns]" ] }, - "execution_count": 20, + "execution_count": 3, "metadata": {}, "output_type": "execute_result" } @@ -749,18 +703,16 @@ }, { "cell_type": "code", - "execution_count": 21, + "execution_count": 4, "metadata": {}, "outputs": [ { "data": { - "application/vnd.jupyter.widget-view+json": { - "model_id": "d6dd794f89724099950dcc927d63d0f5", - "version_major": 2, - "version_minor": 0 - }, + "text/html": [ + "Query job 22a72cad-11a6-4f8e-b16d-f92853b8112e is DONE. 28.9 kB processed. Open Job" + ], "text/plain": [ - "HTML(value='Query job d5a173bd-a7dc-42fa-8468-b088d47ccfe0 is RUNNING. " ] }, "metadata": {}, @@ -768,13 +720,11 @@ }, { "data": { - "application/vnd.jupyter.widget-view+json": { - "model_id": "a8ab7ca12e0d43a6803483480e837c6e", - "version_major": 2, - "version_minor": 0 - }, + "text/html": [ + "Query job bc952727-8806-4fe2-abf2-c3a8a2bd9b6d is DONE. 28.9 kB processed. Open Job" + ], "text/plain": [ - "HTML(value='Query job c6b6518b-2689-4dc1-a5b0-2a9ab75301eb is RUNNING. " ] }, "metadata": {}, @@ -810,24 +760,24 @@ " \n", " \n", " \n", - " 156\n", - " 4800.0\n", + " 249\n", + " 3325.0\n", " \n", " \n", - " 189\n", - " 3800.0\n", + " 36\n", + " 4600.0\n", " \n", " \n", - " 279\n", - " 5000.0\n", + " 74\n", + " 4700.0\n", " \n", " \n", - " 245\n", - " 5800.0\n", + " 235\n", + " 3400.0\n", " \n", " \n", - " 343\n", - " 3775.0\n", + " 117\n", + " 3750.0\n", " \n", " \n", "\n", @@ -837,16 +787,16 @@ "text/plain": [ " body_mass_g\n", "penguin_id \n", - "156 4800.0\n", - "189 3800.0\n", - "279 5000.0\n", - "245 5800.0\n", - "343 3775.0\n", + "249 3325.0\n", + "36 4600.0\n", + "74 4700.0\n", + "235 3400.0\n", + "117 3750.0\n", "\n", "[5 rows x 1 columns]" ] }, - "execution_count": 21, + "execution_count": 4, "metadata": {}, "output_type": "execute_result" } @@ -880,18 +830,16 @@ }, { "cell_type": "code", - "execution_count": 22, + "execution_count": 5, "metadata": {}, "outputs": [ { "data": { - "application/vnd.jupyter.widget-view+json": { - "model_id": "380c57dc3fe54fbd8ad2fb23f1e66e37", - "version_major": 2, - "version_minor": 0 - }, + "text/html": [ + "Query job f239341e-785f-43e1-bfe0-683132d6f15f is DONE. 28.9 kB processed. Open Job" + ], "text/plain": [ - "HTML(value='Query job 03a0eb1c-747e-4c2a-b7b5-d3e4e5a78134 is RUNNING. " ] }, "metadata": {}, @@ -899,13 +847,11 @@ }, { "data": { - "application/vnd.jupyter.widget-view+json": { - "model_id": "3db47aadba854beca71960d846838dc4", - "version_major": 2, - "version_minor": 0 - }, + "text/html": [ + "Query job 2d5bbbb9-efc4-4f4e-a8dc-2c7b66b0e5e0 is DONE. 0 Bytes processed. Open Job" + ], "text/plain": [ - "HTML(value='Query job 70608c84-dac8-4e77-8a9e-00d823b24f37 is RUNNING. " ] }, "metadata": {}, @@ -913,13 +859,11 @@ }, { "data": { - "application/vnd.jupyter.widget-view+json": { - "model_id": "1de81f2944a44cbda3f16fa8a1fae813", - "version_major": 2, - "version_minor": 0 - }, + "text/html": [ + "Query job 66120e1c-2471-4a0c-8b82-aeb189c8866a is DONE. 28.9 kB processed. Open Job" + ], "text/plain": [ - "HTML(value='Query job d18fdc32-2152-45d3-8c62-bf9b1556ec47 is RUNNING. " ] }, "metadata": {}, @@ -927,13 +871,11 @@ }, { "data": { - "application/vnd.jupyter.widget-view+json": { - "model_id": "b06cae61a4534388a4e9ed26ce442cc2", - "version_major": 2, - "version_minor": 0 - }, + "text/html": [ + "Query job 62825fc4-5b77-43e5-a3e4-525ebfd1285b is DONE. 2.1 kB processed. Open Job" + ], "text/plain": [ - "HTML(value='Query job 2a022682-535f-4dc0-80ba-1640306ad9ef is RUNNING. " ] }, "metadata": {}, @@ -941,13 +883,11 @@ }, { "data": { - "application/vnd.jupyter.widget-view+json": { - "model_id": "977c8eae2c9848e98c5478c41af82633", - "version_major": 2, - "version_minor": 0 - }, + "text/html": [ + "Query job 656d1d69-b4ff-4db6-9f2d-28dcf91e2fd7 is DONE. 0 Bytes processed. Open Job" + ], "text/plain": [ - "HTML(value='Query job c145b39d-7d02-4394-80f0-fc605b2ba256 is DONE. 0 Bytes processed. " ] }, "metadata": {}, @@ -955,13 +895,11 @@ }, { "data": { - "application/vnd.jupyter.widget-view+json": { - "model_id": "aefc3085fee04c438d0327d400b4b72a", - "version_major": 2, - "version_minor": 0 - }, + "text/html": [ + "Query job 466507c8-1474-4725-93e5-baf8ee292e39 is DONE. 8.5 kB processed. Open Job" + ], "text/plain": [ - "HTML(value='Query job fc156a2b-db95-44a3-9ad1-d95b9d290080 is RUNNING. " ] }, "metadata": {}, @@ -1002,153 +940,153 @@ " \n", " \n", " 0\n", - " -1.344188\n", - " 0.642519\n", - " -1.193942\n", - " \n", - " \n", - " 1\n", - " -0.750047\n", - " 1.005876\n", - " -1.193942\n", + " -0.750505\n", + " 0.84903\n", + " -0.937262\n", " \n", " \n", " 2\n", - " -0.545811\n", - " 0.90206\n", - " -1.193942\n", + " 0.622496\n", + " -1.322402\n", + " 0.804051\n", " \n", " \n", - " 4\n", - " -1.214219\n", - " -0.188011\n", - " -0.619171\n", + " 3\n", + " -0.299107\n", + " -0.261935\n", + " -1.009817\n", " \n", " \n", " 5\n", - " -0.118772\n", - " 0.694427\n", - " -0.619171\n", + " 0.490839\n", + " -0.968913\n", + " 1.311935\n", " \n", " \n", " 6\n", - " 0.568203\n", - " -0.291828\n", - " -0.619171\n", + " -0.524806\n", + " 1.959995\n", + " -0.429379\n", " \n", " \n", " 7\n", - " 1.236611\n", - " 0.642519\n", - " -0.044401\n", + " 0.208715\n", + " -1.726389\n", + " 1.021716\n", " \n", " \n", " 9\n", - " -0.675779\n", - " 1.524957\n", - " -0.044401\n", + " 1.205551\n", + " -1.019412\n", + " 1.09427\n", " \n", " \n", " 10\n", - " -0.564378\n", - " 0.90206\n", - " 0.530369\n", - " \n", - " \n", - " 11\n", - " -0.898582\n", - " 0.798243\n", - " -1.122096\n", + " 0.772962\n", + " -0.817418\n", + " 1.457044\n", " \n", " \n", " 12\n", - " -1.26992\n", - " -0.136103\n", - " -1.122096\n", - " \n", - " \n", - " 13\n", - " 0.58677\n", - " 0.071529\n", - " -1.122096\n", + " 1.243168\n", + " -1.120408\n", + " 1.602153\n", " \n", " \n", " 14\n", - " -1.826927\n", - " -0.032287\n", - " -1.122096\n", + " -1.709725\n", + " 0.344046\n", + " -0.792152\n", " \n", " \n", - " 15\n", - " -1.26992\n", - " -0.343736\n", - " -1.122096\n", - " \n", - " \n", - " 16\n", - " 0.3454\n", - " 0.071529\n", - " -0.547325\n", + " 17\n", + " 0.509647\n", + " -0.918415\n", + " 1.021716\n", " \n", " \n", " 18\n", - " -0.768614\n", - " 0.382978\n", - " -0.547325\n", + " 1.167935\n", + " 1.404513\n", + " -0.284269\n", " \n", " \n", " 19\n", - " -1.121385\n", - " 0.486795\n", - " -0.547325\n", + " -1.295944\n", + " 0.445043\n", + " -1.662809\n", " \n", " \n", " 20\n", - " 0.512502\n", - " 0.33107\n", - " -0.547325\n", + " 1.299593\n", + " 0.798532\n", + " 0.151059\n", " \n", " \n", " 21\n", - " 1.385146\n", - " 1.057784\n", - " -0.547325\n", + " -0.675272\n", + " 0.344046\n", + " -1.009817\n", " \n", " \n", " 22\n", - " -0.675779\n", - " -0.032287\n", - " -0.547325\n", + " 0.26514\n", + " -1.675891\n", + " 0.658942\n", " \n", " \n", " 24\n", - " 1.069509\n", - " 0.538703\n", - " -0.547325\n", + " 1.43125\n", + " 1.556008\n", + " -0.501934\n", + " \n", + " \n", + " 25\n", + " 0.302756\n", + " 0.041055\n", + " -0.574488\n", " \n", " \n", " 26\n", - " -0.43441\n", - " 0.694427\n", - " 0.027445\n", + " 0.302756\n", + " -1.675891\n", + " 0.949161\n", + " \n", + " \n", + " 27\n", + " 0.227523\n", + " -1.776888\n", + " 0.658942\n", " \n", " \n", " 28\n", - " 1.923586\n", - " 1.888314\n", - " 0.027445\n", + " 1.318401\n", + " -0.362932\n", + " 1.747263\n", + " \n", + " \n", + " 29\n", + " 2.202388\n", + " 1.303516\n", + " 0.441278\n", " \n", " \n", " 30\n", - " 1.292312\n", - " 0.694427\n", - " 0.027445\n", + " -0.919779\n", + " 1.959995\n", + " -0.356824\n", " \n", " \n", " 31\n", - " -1.994029\n", - " -0.551368\n", - " -1.62502\n", + " 1.036277\n", + " -0.615424\n", + " 1.747263\n", + " \n", + " \n", + " 32\n", + " -0.223874\n", + " 0.19255\n", + " -0.356824\n", " \n", " \n", "\n", @@ -1158,65 +1096,65 @@ "text/plain": [ " standard_scaled_culmen_length_mm standard_scaled_culmen_depth_mm \\\n", "penguin_id \n", - "0 -1.344188 0.642519 \n", - "1 -0.750047 1.005876 \n", - "2 -0.545811 0.90206 \n", - "4 -1.214219 -0.188011 \n", - "5 -0.118772 0.694427 \n", - "6 0.568203 -0.291828 \n", - "7 1.236611 0.642519 \n", - "9 -0.675779 1.524957 \n", - "10 -0.564378 0.90206 \n", - "11 -0.898582 0.798243 \n", - "12 -1.26992 -0.136103 \n", - "13 0.58677 0.071529 \n", - "14 -1.826927 -0.032287 \n", - "15 -1.26992 -0.343736 \n", - "16 0.3454 0.071529 \n", - "18 -0.768614 0.382978 \n", - "19 -1.121385 0.486795 \n", - "20 0.512502 0.33107 \n", - "21 1.385146 1.057784 \n", - "22 -0.675779 -0.032287 \n", - "24 1.069509 0.538703 \n", - "26 -0.43441 0.694427 \n", - "28 1.923586 1.888314 \n", - "30 1.292312 0.694427 \n", - "31 -1.994029 -0.551368 \n", + "0 -0.750505 0.84903 \n", + "2 0.622496 -1.322402 \n", + "3 -0.299107 -0.261935 \n", + "5 0.490839 -0.968913 \n", + "6 -0.524806 1.959995 \n", + "7 0.208715 -1.726389 \n", + "9 1.205551 -1.019412 \n", + "10 0.772962 -0.817418 \n", + "12 1.243168 -1.120408 \n", + "14 -1.709725 0.344046 \n", + "17 0.509647 -0.918415 \n", + "18 1.167935 1.404513 \n", + "19 -1.295944 0.445043 \n", + "20 1.299593 0.798532 \n", + "21 -0.675272 0.344046 \n", + "22 0.26514 -1.675891 \n", + "24 1.43125 1.556008 \n", + "25 0.302756 0.041055 \n", + "26 0.302756 -1.675891 \n", + "27 0.227523 -1.776888 \n", + "28 1.318401 -0.362932 \n", + "29 2.202388 1.303516 \n", + "30 -0.919779 1.959995 \n", + "31 1.036277 -0.615424 \n", + "32 -0.223874 0.19255 \n", "\n", " standard_scaled_flipper_length_mm \n", "penguin_id \n", - "0 -1.193942 \n", - "1 -1.193942 \n", - "2 -1.193942 \n", - "4 -0.619171 \n", - "5 -0.619171 \n", - "6 -0.619171 \n", - "7 -0.044401 \n", - "9 -0.044401 \n", - "10 0.530369 \n", - "11 -1.122096 \n", - "12 -1.122096 \n", - "13 -1.122096 \n", - "14 -1.122096 \n", - "15 -1.122096 \n", - "16 -0.547325 \n", - "18 -0.547325 \n", - "19 -0.547325 \n", - "20 -0.547325 \n", - "21 -0.547325 \n", - "22 -0.547325 \n", - "24 -0.547325 \n", - "26 0.027445 \n", - "28 0.027445 \n", - "30 0.027445 \n", - "31 -1.62502 \n", + "0 -0.937262 \n", + "2 0.804051 \n", + "3 -1.009817 \n", + "5 1.311935 \n", + "6 -0.429379 \n", + "7 1.021716 \n", + "9 1.09427 \n", + "10 1.457044 \n", + "12 1.602153 \n", + "14 -0.792152 \n", + "17 1.021716 \n", + "18 -0.284269 \n", + "19 -1.662809 \n", + "20 0.151059 \n", + "21 -1.009817 \n", + "22 0.658942 \n", + "24 -0.501934 \n", + "25 -0.574488 \n", + "26 0.949161 \n", + "27 0.658942 \n", + "28 1.747263 \n", + "29 0.441278 \n", + "30 -0.356824 \n", + "31 1.747263 \n", + "32 -0.356824 \n", "...\n", "\n", "[267 rows x 3 columns]" ] }, - "execution_count": 22, + "execution_count": 5, "metadata": {}, "output_type": "execute_result" } @@ -1237,32 +1175,16 @@ }, { "cell_type": "code", - "execution_count": 23, + "execution_count": 6, "metadata": {}, "outputs": [ { "data": { - "application/vnd.jupyter.widget-view+json": { - "model_id": "74f3c24c0a434e12bf6a56dc4809b501", - "version_major": 2, - "version_minor": 0 - }, - "text/plain": [ - "HTML(value='Query job c6268b07-0d3d-4fe0-971d-cc99fd98cd7e is RUNNING. Open Job" + ], "text/plain": [ - "HTML(value='Query job 31550d88-fc7b-4fcb-9975-9ed24bf2e009 is RUNNING. " ] }, "metadata": {}, @@ -1270,13 +1192,11 @@ }, { "data": { - "application/vnd.jupyter.widget-view+json": { - "model_id": "5a04e46a7d0248b1ae523f2ca6903ee8", - "version_major": 2, - "version_minor": 0 - }, + "text/html": [ + "Query job 1e17f5f7-2956-4bdd-baa9-c07591481341 is DONE. 536 Bytes processed. Open Job" + ], "text/plain": [ - "HTML(value='Query job 5ec7c8b1-037c-466c-a51e-963f8274e76b is RUNNING. " ] }, "metadata": {}, @@ -1284,13 +1204,11 @@ }, { "data": { - "application/vnd.jupyter.widget-view+json": { - "model_id": "62563820bfb245be85bbc1bf3dfb993c", - "version_major": 2, - "version_minor": 0 - }, + "text/html": [ + "Query job e2fde7a6-67b4-45a4-91d4-1cb9eff66ae5 is DONE. 0 Bytes processed. Open Job" + ], "text/plain": [ - "HTML(value='Query job 4e860716-bc41-4ef6-83ff-310d085ed7cc is DONE. 0 Bytes processed. " ] }, "metadata": {}, @@ -1298,13 +1216,11 @@ }, { "data": { - "application/vnd.jupyter.widget-view+json": { - "model_id": "98aff3bfded44868bf120451c89df9f5", - "version_major": 2, - "version_minor": 0 - }, + "text/html": [ + "Query job e0683619-23c5-44fd-8930-9d3c9d02729a is DONE. 2.1 kB processed. Open Job" + ], "text/plain": [ - "HTML(value='Query job 6b96a757-42fe-4b65-92fd-a3ae339fe769 is RUNNING. " ] }, "metadata": {}, @@ -1344,154 +1260,154 @@ " \n", " \n", " \n", - " 3\n", - " 0.493935\n", - " 0.382978\n", - " -0.619171\n", - " \n", - " \n", - " 8\n", - " 1.050942\n", - " 0.953968\n", - " -0.044401\n", + " 1\n", + " -0.938587\n", + " 0.748033\n", + " -1.445145\n", " \n", " \n", - " 17\n", - " 1.255178\n", - " 1.1616\n", - " -0.547325\n", + " 4\n", + " -0.16745\n", + " 0.899528\n", + " -0.284269\n", " \n", " \n", - " 23\n", - " -1.307054\n", - " 0.694427\n", - " -0.547325\n", + " 8\n", + " 0.453222\n", + " -1.877885\n", + " 0.658942\n", " \n", " \n", - " 25\n", - " 1.515114\n", - " 0.486795\n", - " 0.027445\n", + " 11\n", + " -1.12667\n", + " 0.697535\n", + " -0.792152\n", " \n", " \n", - " 27\n", - " 1.236611\n", - " 1.265417\n", - " 0.027445\n", + " 13\n", + " -1.183094\n", + " 1.404513\n", + " -0.792152\n", " \n", " \n", - " 29\n", - " 1.403713\n", - " 0.953968\n", - " 0.027445\n", + " 15\n", + " 0.867003\n", + " -0.766919\n", + " 0.513833\n", " \n", " \n", - " 34\n", - " 0.419668\n", - " 0.538703\n", - " -1.62502\n", + " 16\n", + " -1.784958\n", + " 1.959995\n", + " -0.211715\n", " \n", " \n", - " 35\n", - " -1.455589\n", - " 0.694427\n", - " -1.050249\n", + " 23\n", + " -0.355532\n", + " 0.647036\n", + " -1.5177\n", " \n", " \n", - " 39\n", - " 0.326833\n", - " 1.1616\n", - " -0.475479\n", + " 34\n", + " -0.600039\n", + " -1.776888\n", + " 0.949161\n", " \n", " \n", - " 51\n", - " -1.065684\n", - " 0.227254\n", - " -0.978403\n", + " 36\n", + " -0.129833\n", + " -1.423399\n", + " 1.23938\n", " \n", " \n", - " 52\n", - " -0.248741\n", - " 0.071529\n", - " -0.978403\n", + " 42\n", + " -1.615684\n", + " -0.514427\n", + " -0.429379\n", " \n", " \n", - " 60\n", - " 0.531069\n", - " 0.382978\n", - " -0.403633\n", + " 48\n", + " 0.415606\n", + " -0.716421\n", + " 1.021716\n", " \n", " \n", " 61\n", - " 0.401101\n", - " 0.90206\n", - " -0.403633\n", + " 0.396797\n", + " -1.170907\n", + " 1.457044\n", " \n", " \n", " 64\n", - " -1.455589\n", - " 0.33107\n", - " -0.403633\n", + " 0.434414\n", + " -1.120408\n", + " 1.09427\n", " \n", " \n", " 65\n", - " -0.564378\n", - " 0.642519\n", - " -0.403633\n", + " -1.220711\n", + " 1.051024\n", + " -1.445145\n", " \n", " \n", - " 67\n", - " 1.273745\n", - " 1.317325\n", - " 0.171138\n", + " 68\n", + " -1.484026\n", + " -0.009443\n", + " -1.009817\n", " \n", " \n", - " 83\n", - " 2.629128\n", - " 0.33107\n", - " -1.409481\n", + " 70\n", + " 1.638141\n", + " 1.404513\n", + " 0.296168\n", " \n", " \n", - " 85\n", - " -1.288487\n", - " 0.746335\n", - " -0.83471\n", + " 72\n", + " 0.829387\n", + " 0.142052\n", + " -0.719598\n", " \n", " \n", - " 93\n", - " -0.508677\n", - " 0.486795\n", - " 0.314831\n", + " 74\n", + " -0.242683\n", + " -1.524396\n", + " 0.586387\n", " \n", " \n", - " 104\n", - " 0.382534\n", - " -0.032287\n", - " -0.762864\n", + " 77\n", + " -1.277136\n", + " -0.211437\n", + " -0.647043\n", " \n", " \n", - " 105\n", - " -1.065684\n", - " 0.746335\n", - " -0.762864\n", + " 81\n", + " 0.208715\n", + " -1.221405\n", + " 0.804051\n", + " \n", + " \n", + " 91\n", + " 1.261976\n", + " 0.647036\n", + " 0.005949\n", " \n", " \n", - " 108\n", - " 1.162343\n", - " 0.382978\n", - " -0.762864\n", + " 96\n", + " 0.246331\n", + " -1.322402\n", + " 0.731497\n", " \n", " \n", - " 113\n", - " 1.496547\n", - " 1.213509\n", - " 0.386677\n", + " 105\n", + " -1.803766\n", + " 0.445043\n", + " -1.009817\n", " \n", " \n", - " 130\n", - " -0.341575\n", - " 1.213509\n", - " -0.044401\n", + " 111\n", + " -1.164286\n", + " 0.697535\n", + " -2.098138\n", " \n", " \n", "\n", @@ -1501,65 +1417,65 @@ "text/plain": [ " standard_scaled_culmen_length_mm standard_scaled_culmen_depth_mm \\\n", "penguin_id \n", - "3 0.493935 0.382978 \n", - "8 1.050942 0.953968 \n", - "17 1.255178 1.1616 \n", - "23 -1.307054 0.694427 \n", - "25 1.515114 0.486795 \n", - "27 1.236611 1.265417 \n", - "29 1.403713 0.953968 \n", - "34 0.419668 0.538703 \n", - "35 -1.455589 0.694427 \n", - "39 0.326833 1.1616 \n", - "51 -1.065684 0.227254 \n", - "52 -0.248741 0.071529 \n", - "60 0.531069 0.382978 \n", - "61 0.401101 0.90206 \n", - "64 -1.455589 0.33107 \n", - "65 -0.564378 0.642519 \n", - "67 1.273745 1.317325 \n", - "83 2.629128 0.33107 \n", - "85 -1.288487 0.746335 \n", - "93 -0.508677 0.486795 \n", - "104 0.382534 -0.032287 \n", - "105 -1.065684 0.746335 \n", - "108 1.162343 0.382978 \n", - "113 1.496547 1.213509 \n", - "130 -0.341575 1.213509 \n", + "1 -0.938587 0.748033 \n", + "4 -0.16745 0.899528 \n", + "8 0.453222 -1.877885 \n", + "11 -1.12667 0.697535 \n", + "13 -1.183094 1.404513 \n", + "15 0.867003 -0.766919 \n", + "16 -1.784958 1.959995 \n", + "23 -0.355532 0.647036 \n", + "34 -0.600039 -1.776888 \n", + "36 -0.129833 -1.423399 \n", + "42 -1.615684 -0.514427 \n", + "48 0.415606 -0.716421 \n", + "61 0.396797 -1.170907 \n", + "64 0.434414 -1.120408 \n", + "65 -1.220711 1.051024 \n", + "68 -1.484026 -0.009443 \n", + "70 1.638141 1.404513 \n", + "72 0.829387 0.142052 \n", + "74 -0.242683 -1.524396 \n", + "77 -1.277136 -0.211437 \n", + "81 0.208715 -1.221405 \n", + "91 1.261976 0.647036 \n", + "96 0.246331 -1.322402 \n", + "105 -1.803766 0.445043 \n", + "111 -1.164286 0.697535 \n", "\n", " standard_scaled_flipper_length_mm \n", "penguin_id \n", - "3 -0.619171 \n", - "8 -0.044401 \n", - "17 -0.547325 \n", - "23 -0.547325 \n", - "25 0.027445 \n", - "27 0.027445 \n", - "29 0.027445 \n", - "34 -1.62502 \n", - "35 -1.050249 \n", - "39 -0.475479 \n", - "51 -0.978403 \n", - "52 -0.978403 \n", - "60 -0.403633 \n", - "61 -0.403633 \n", - "64 -0.403633 \n", - "65 -0.403633 \n", - "67 0.171138 \n", - "83 -1.409481 \n", - "85 -0.83471 \n", - "93 0.314831 \n", - "104 -0.762864 \n", - "105 -0.762864 \n", - "108 -0.762864 \n", - "113 0.386677 \n", - "130 -0.044401 \n", + "1 -1.445145 \n", + "4 -0.284269 \n", + "8 0.658942 \n", + "11 -0.792152 \n", + "13 -0.792152 \n", + "15 0.513833 \n", + "16 -0.211715 \n", + "23 -1.5177 \n", + "34 0.949161 \n", + "36 1.23938 \n", + "42 -0.429379 \n", + "48 1.021716 \n", + "61 1.457044 \n", + "64 1.09427 \n", + "65 -1.445145 \n", + "68 -1.009817 \n", + "70 0.296168 \n", + "72 -0.719598 \n", + "74 0.586387 \n", + "77 -0.647043 \n", + "81 0.804051 \n", + "91 0.005949 \n", + "96 0.731497 \n", + "105 -1.009817 \n", + "111 -2.098138 \n", "...\n", "\n", "[67 rows x 3 columns]" ] }, - "execution_count": 23, + "execution_count": 6, "metadata": {}, "output_type": "execute_result" } @@ -1581,32 +1497,16 @@ }, { "cell_type": "code", - "execution_count": 24, + "execution_count": 7, "metadata": {}, "outputs": [ { "data": { - "application/vnd.jupyter.widget-view+json": { - "model_id": "d642a617d27f4e2493c80dbdd1686193", - "version_major": 2, - "version_minor": 0 - }, - "text/plain": [ - "HTML(value='Query job a8d8afa4-d91e-487e-8709-8727a73ab453 is RUNNING. Open Job" + ], "text/plain": [ - "HTML(value='Query job b9afd624-4345-4160-8809-05786563ce35 is RUNNING. " ] }, "metadata": {}, @@ -1614,13 +1514,11 @@ }, { "data": { - "application/vnd.jupyter.widget-view+json": { - "model_id": "09217776c2294e8b929a56e7a73fbfa8", - "version_major": 2, - "version_minor": 0 - }, + "text/html": [ + "Query job 41962e2e-4d14-4053-9297-3ce61699551a is DONE. 0 Bytes processed. Open Job" + ], "text/plain": [ - "HTML(value='Query job c918fc7c-a956-4259-b5c5-09c2eac615cd is RUNNING. " ] }, "metadata": {}, @@ -1628,13 +1526,11 @@ }, { "data": { - "application/vnd.jupyter.widget-view+json": { - "model_id": "9c1581fc9fcb49739d1d81b73506b894", - "version_major": 2, - "version_minor": 0 - }, + "text/html": [ + "Query job 5d3c22c9-c972-4213-8557-726c9e0aca37 is DONE. 22.9 kB processed. Open Job" + ], "text/plain": [ - "HTML(value='Query job 1d855341-282f-4d10-9ba9-3ce6683b729a is RUNNING. " ] }, "metadata": {}, @@ -1642,13 +1538,11 @@ }, { "data": { - "application/vnd.jupyter.widget-view+json": { - "model_id": "d7749eb7cf554697a60c90f3718ad582", - "version_major": 2, - "version_minor": 0 - }, + "text/html": [ + "Query job 9cb7b33f-ea05-4cf4-9f92-bb3aa4ea8d10 is DONE. 2.1 kB processed. Open Job" + ], "text/plain": [ - "HTML(value='Query job c257ff78-3e15-4296-82f5-ba6c2eb6a6ff is RUNNING. " ] }, "metadata": {}, @@ -1656,13 +1550,11 @@ }, { "data": { - "application/vnd.jupyter.widget-view+json": { - "model_id": "e900465918224249bccc781d992aadbb", - "version_major": 2, - "version_minor": 0 - }, + "text/html": [ + "Query job fe1f35d6-d82c-4aab-a284-637b72554f5b is DONE. 29.2 kB processed. Open Job" + ], "text/plain": [ - "HTML(value='Query job a17eec0c-10d0-4943-95be-60fced57d5cb is RUNNING. " ] }, "metadata": {}, @@ -1670,13 +1562,11 @@ }, { "data": { - "application/vnd.jupyter.widget-view+json": { - "model_id": "b0272ee35c5745a491b7c5883b3fbb1b", - "version_major": 2, - "version_minor": 0 - }, + "text/html": [ + "Query job 37bc90ff-59cb-4b0c-8f9d-73bcda43524a is DONE. 536 Bytes processed. Open Job" + ], "text/plain": [ - "HTML(value='Query job 1db53c8a-cf45-4c69-a443-6b7a49fc3a07 is DONE. 536 Bytes processed. " ] }, "metadata": {}, @@ -1684,13 +1574,11 @@ }, { "data": { - "application/vnd.jupyter.widget-view+json": { - "model_id": "00f9d4b55bb94997aaebdae298cefab3", - "version_major": 2, - "version_minor": 0 - }, + "text/html": [ + "Query job e23f4724-fdd8-45a9-8c87-defd8d471035 is DONE. 0 Bytes processed. Open Job" + ], "text/plain": [ - "HTML(value='Query job ae870ee3-e633-4556-94e6-6669fa0bfde2 is DONE. 0 Bytes processed. " ] }, "metadata": {}, @@ -1698,13 +1586,11 @@ }, { "data": { - "application/vnd.jupyter.widget-view+json": { - "model_id": "9cd8e791be5844669cba10dc53f862ae", - "version_major": 2, - "version_minor": 0 - }, + "text/html": [ + "Query job 257378db-0569-42d7-965a-7757154c710b is DONE. 21.4 kB processed. Open Job" + ], "text/plain": [ - "HTML(value='Query job baa7c70c-eabc-49e1-bae9-fdd4891cdb6e is RUNNING. " ] }, "metadata": {}, @@ -1751,226 +1637,226 @@ " \n", " \n", " 0\n", - " [{'index': 2, 'value': 1.0}]\n", - " -1.344188\n", - " 0.642519\n", - " -1.193942\n", - " [{'index': 2, 'value': 1.0}]\n", " [{'index': 1, 'value': 1.0}]\n", - " \n", - " \n", - " 1\n", + " -0.750505\n", + " 0.84903\n", + " -0.937262\n", " [{'index': 2, 'value': 1.0}]\n", - " -0.750047\n", - " 1.005876\n", - " -1.193942\n", - " [{'index': 3, 'value': 1.0}]\n", " [{'index': 1, 'value': 1.0}]\n", " \n", " \n", " 2\n", - " [{'index': 2, 'value': 1.0}]\n", - " -0.545811\n", - " 0.90206\n", - " -1.193942\n", - " [{'index': 3, 'value': 1.0}]\n", " [{'index': 1, 'value': 1.0}]\n", + " 0.622496\n", + " -1.322402\n", + " 0.804051\n", + " [{'index': 1, 'value': 1.0}]\n", + " [{'index': 3, 'value': 1.0}]\n", " \n", " \n", - " 4\n", - " [{'index': 2, 'value': 1.0}]\n", - " -1.214219\n", - " -0.188011\n", - " -0.619171\n", + " 3\n", " [{'index': 2, 'value': 1.0}]\n", + " -0.299107\n", + " -0.261935\n", + " -1.009817\n", " [{'index': 1, 'value': 1.0}]\n", + " [{'index': 2, 'value': 1.0}]\n", " \n", " \n", " 5\n", + " [{'index': 1, 'value': 1.0}]\n", + " 0.490839\n", + " -0.968913\n", + " 1.311935\n", " [{'index': 2, 'value': 1.0}]\n", - " -0.118772\n", - " 0.694427\n", - " -0.619171\n", " [{'index': 3, 'value': 1.0}]\n", - " [{'index': 1, 'value': 1.0}]\n", " \n", " \n", " 6\n", + " [{'index': 1, 'value': 1.0}]\n", + " -0.524806\n", + " 1.959995\n", + " -0.429379\n", " [{'index': 2, 'value': 1.0}]\n", - " 0.568203\n", - " -0.291828\n", - " -0.619171\n", - " [{'index': 2, 'value': 1.0}]\n", - " [{'index': 2, 'value': 1.0}]\n", + " [{'index': 1, 'value': 1.0}]\n", " \n", " \n", " 7\n", - " [{'index': 2, 'value': 1.0}]\n", - " 1.236611\n", - " 0.642519\n", - " -0.044401\n", - " [{'index': 2, 'value': 1.0}]\n", - " [{'index': 2, 'value': 1.0}]\n", + " [{'index': 1, 'value': 1.0}]\n", + " 0.208715\n", + " -1.726389\n", + " 1.021716\n", + " [{'index': 1, 'value': 1.0}]\n", + " [{'index': 3, 'value': 1.0}]\n", " \n", " \n", " 9\n", - " [{'index': 2, 'value': 1.0}]\n", - " -0.675779\n", - " 1.524957\n", - " -0.044401\n", - " [{'index': 3, 'value': 1.0}]\n", " [{'index': 1, 'value': 1.0}]\n", + " 1.205551\n", + " -1.019412\n", + " 1.09427\n", + " [{'index': 1, 'value': 1.0}]\n", + " [{'index': 3, 'value': 1.0}]\n", " \n", " \n", " 10\n", + " [{'index': 1, 'value': 1.0}]\n", + " 0.772962\n", + " -0.817418\n", + " 1.457044\n", " [{'index': 2, 'value': 1.0}]\n", - " -0.564378\n", - " 0.90206\n", - " 0.530369\n", " [{'index': 3, 'value': 1.0}]\n", - " [{'index': 1, 'value': 1.0}]\n", " \n", " \n", - " 11\n", + " 12\n", + " [{'index': 1, 'value': 1.0}]\n", + " 1.243168\n", + " -1.120408\n", + " 1.602153\n", " [{'index': 2, 'value': 1.0}]\n", - " -0.898582\n", - " 0.798243\n", - " -1.122096\n", " [{'index': 3, 'value': 1.0}]\n", - " [{'index': 1, 'value': 1.0}]\n", " \n", " \n", - " 12\n", - " [{'index': 2, 'value': 1.0}]\n", - " -1.26992\n", - " -0.136103\n", - " -1.122096\n", - " [{'index': 2, 'value': 1.0}]\n", + " 14\n", + " [{'index': 1, 'value': 1.0}]\n", + " -1.709725\n", + " 0.344046\n", + " -0.792152\n", + " [{'index': 1, 'value': 1.0}]\n", " [{'index': 1, 'value': 1.0}]\n", " \n", " \n", - " 13\n", - " [{'index': 2, 'value': 1.0}]\n", - " 0.58677\n", - " 0.071529\n", - " -1.122096\n", - " [{'index': 2, 'value': 1.0}]\n", + " 17\n", + " [{'index': 1, 'value': 1.0}]\n", + " 0.509647\n", + " -0.918415\n", + " 1.021716\n", " [{'index': 2, 'value': 1.0}]\n", + " [{'index': 3, 'value': 1.0}]\n", " \n", " \n", - " 14\n", + " 18\n", + " [{'index': 2, 'value': 1.0}]\n", + " 1.167935\n", + " 1.404513\n", + " -0.284269\n", " [{'index': 2, 'value': 1.0}]\n", - " -1.826927\n", - " -0.032287\n", - " -1.122096\n", " [{'index': 2, 'value': 1.0}]\n", - " [{'index': 1, 'value': 1.0}]\n", " \n", " \n", - " 15\n", + " 19\n", " [{'index': 2, 'value': 1.0}]\n", - " -1.26992\n", - " -0.343736\n", - " -1.122096\n", + " -1.295944\n", + " 0.445043\n", + " -1.662809\n", " [{'index': 2, 'value': 1.0}]\n", " [{'index': 1, 'value': 1.0}]\n", " \n", " \n", - " 16\n", + " 20\n", " [{'index': 2, 'value': 1.0}]\n", - " 0.3454\n", - " 0.071529\n", - " -0.547325\n", + " 1.299593\n", + " 0.798532\n", + " 0.151059\n", " [{'index': 2, 'value': 1.0}]\n", " [{'index': 2, 'value': 1.0}]\n", " \n", " \n", - " 18\n", - " [{'index': 2, 'value': 1.0}]\n", - " -0.768614\n", - " 0.382978\n", - " -0.547325\n", - " [{'index': 3, 'value': 1.0}]\n", + " 21\n", + " [{'index': 1, 'value': 1.0}]\n", + " -0.675272\n", + " 0.344046\n", + " -1.009817\n", + " [{'index': 1, 'value': 1.0}]\n", " [{'index': 1, 'value': 1.0}]\n", " \n", " \n", - " 19\n", - " [{'index': 2, 'value': 1.0}]\n", - " -1.121385\n", - " 0.486795\n", - " -0.547325\n", - " [{'index': 3, 'value': 1.0}]\n", + " 22\n", + " [{'index': 1, 'value': 1.0}]\n", + " 0.26514\n", + " -1.675891\n", + " 0.658942\n", " [{'index': 1, 'value': 1.0}]\n", + " [{'index': 3, 'value': 1.0}]\n", " \n", " \n", - " 20\n", + " 24\n", " [{'index': 2, 'value': 1.0}]\n", - " 0.512502\n", - " 0.33107\n", - " -0.547325\n", + " 1.43125\n", + " 1.556008\n", + " -0.501934\n", " [{'index': 2, 'value': 1.0}]\n", " [{'index': 2, 'value': 1.0}]\n", " \n", " \n", - " 21\n", + " 25\n", " [{'index': 2, 'value': 1.0}]\n", - " 1.385146\n", - " 1.057784\n", - " -0.547325\n", - " [{'index': 3, 'value': 1.0}]\n", + " 0.302756\n", + " 0.041055\n", + " -0.574488\n", + " [{'index': 1, 'value': 1.0}]\n", " [{'index': 2, 'value': 1.0}]\n", " \n", " \n", - " 22\n", - " [{'index': 2, 'value': 1.0}]\n", - " -0.675779\n", - " -0.032287\n", - " -0.547325\n", - " [{'index': 2, 'value': 1.0}]\n", + " 26\n", + " [{'index': 1, 'value': 1.0}]\n", + " 0.302756\n", + " -1.675891\n", + " 0.949161\n", " [{'index': 1, 'value': 1.0}]\n", + " [{'index': 3, 'value': 1.0}]\n", " \n", " \n", - " 24\n", - " [{'index': 2, 'value': 1.0}]\n", - " 1.069509\n", - " 0.538703\n", - " -0.547325\n", + " 27\n", + " [{'index': 1, 'value': 1.0}]\n", + " 0.227523\n", + " -1.776888\n", + " 0.658942\n", + " [{'index': 1, 'value': 1.0}]\n", " [{'index': 3, 'value': 1.0}]\n", - " [{'index': 2, 'value': 1.0}]\n", " \n", " \n", - " 26\n", + " 28\n", + " [{'index': 1, 'value': 1.0}]\n", + " 1.318401\n", + " -0.362932\n", + " 1.747263\n", " [{'index': 2, 'value': 1.0}]\n", - " -0.43441\n", - " 0.694427\n", - " 0.027445\n", " [{'index': 3, 'value': 1.0}]\n", - " [{'index': 1, 'value': 1.0}]\n", " \n", " \n", - " 28\n", + " 29\n", + " [{'index': 2, 'value': 1.0}]\n", + " 2.202388\n", + " 1.303516\n", + " 0.441278\n", " [{'index': 2, 'value': 1.0}]\n", - " 1.923586\n", - " 1.888314\n", - " 0.027445\n", - " [{'index': 3, 'value': 1.0}]\n", " [{'index': 2, 'value': 1.0}]\n", " \n", " \n", " 30\n", " [{'index': 2, 'value': 1.0}]\n", - " 1.292312\n", - " 0.694427\n", - " 0.027445\n", - " [{'index': 3, 'value': 1.0}]\n", + " -0.919779\n", + " 1.959995\n", + " -0.356824\n", " [{'index': 2, 'value': 1.0}]\n", + " [{'index': 1, 'value': 1.0}]\n", " \n", " \n", " 31\n", + " [{'index': 1, 'value': 1.0}]\n", + " 1.036277\n", + " -0.615424\n", + " 1.747263\n", " [{'index': 2, 'value': 1.0}]\n", - " -1.994029\n", - " -0.551368\n", - " -1.62502\n", + " [{'index': 3, 'value': 1.0}]\n", + " \n", + " \n", + " 32\n", + " [{'index': 3, 'value': 1.0}]\n", + " -0.223874\n", + " 0.19255\n", + " -0.356824\n", " [{'index': 2, 'value': 1.0}]\n", " [{'index': 1, 'value': 1.0}]\n", " \n", @@ -1982,121 +1868,121 @@ "text/plain": [ " onehotencoded_island standard_scaled_culmen_length_mm \\\n", "penguin_id \n", - "0 [{'index': 2, 'value': 1.0}] -1.344188 \n", - "1 [{'index': 2, 'value': 1.0}] -0.750047 \n", - "2 [{'index': 2, 'value': 1.0}] -0.545811 \n", - "4 [{'index': 2, 'value': 1.0}] -1.214219 \n", - "5 [{'index': 2, 'value': 1.0}] -0.118772 \n", - "6 [{'index': 2, 'value': 1.0}] 0.568203 \n", - "7 [{'index': 2, 'value': 1.0}] 1.236611 \n", - "9 [{'index': 2, 'value': 1.0}] -0.675779 \n", - "10 [{'index': 2, 'value': 1.0}] -0.564378 \n", - "11 [{'index': 2, 'value': 1.0}] -0.898582 \n", - "12 [{'index': 2, 'value': 1.0}] -1.26992 \n", - "13 [{'index': 2, 'value': 1.0}] 0.58677 \n", - "14 [{'index': 2, 'value': 1.0}] -1.826927 \n", - "15 [{'index': 2, 'value': 1.0}] -1.26992 \n", - "16 [{'index': 2, 'value': 1.0}] 0.3454 \n", - "18 [{'index': 2, 'value': 1.0}] -0.768614 \n", - "19 [{'index': 2, 'value': 1.0}] -1.121385 \n", - "20 [{'index': 2, 'value': 1.0}] 0.512502 \n", - "21 [{'index': 2, 'value': 1.0}] 1.385146 \n", - "22 [{'index': 2, 'value': 1.0}] -0.675779 \n", - "24 [{'index': 2, 'value': 1.0}] 1.069509 \n", - "26 [{'index': 2, 'value': 1.0}] -0.43441 \n", - "28 [{'index': 2, 'value': 1.0}] 1.923586 \n", - "30 [{'index': 2, 'value': 1.0}] 1.292312 \n", - "31 [{'index': 2, 'value': 1.0}] -1.994029 \n", + "0 [{'index': 1, 'value': 1.0}] -0.750505 \n", + "2 [{'index': 1, 'value': 1.0}] 0.622496 \n", + "3 [{'index': 2, 'value': 1.0}] -0.299107 \n", + "5 [{'index': 1, 'value': 1.0}] 0.490839 \n", + "6 [{'index': 1, 'value': 1.0}] -0.524806 \n", + "7 [{'index': 1, 'value': 1.0}] 0.208715 \n", + "9 [{'index': 1, 'value': 1.0}] 1.205551 \n", + "10 [{'index': 1, 'value': 1.0}] 0.772962 \n", + "12 [{'index': 1, 'value': 1.0}] 1.243168 \n", + "14 [{'index': 1, 'value': 1.0}] -1.709725 \n", + "17 [{'index': 1, 'value': 1.0}] 0.509647 \n", + "18 [{'index': 2, 'value': 1.0}] 1.167935 \n", + "19 [{'index': 2, 'value': 1.0}] -1.295944 \n", + "20 [{'index': 2, 'value': 1.0}] 1.299593 \n", + "21 [{'index': 1, 'value': 1.0}] -0.675272 \n", + "22 [{'index': 1, 'value': 1.0}] 0.26514 \n", + "24 [{'index': 2, 'value': 1.0}] 1.43125 \n", + "25 [{'index': 2, 'value': 1.0}] 0.302756 \n", + "26 [{'index': 1, 'value': 1.0}] 0.302756 \n", + "27 [{'index': 1, 'value': 1.0}] 0.227523 \n", + "28 [{'index': 1, 'value': 1.0}] 1.318401 \n", + "29 [{'index': 2, 'value': 1.0}] 2.202388 \n", + "30 [{'index': 2, 'value': 1.0}] -0.919779 \n", + "31 [{'index': 1, 'value': 1.0}] 1.036277 \n", + "32 [{'index': 3, 'value': 1.0}] -0.223874 \n", "\n", " standard_scaled_culmen_depth_mm \\\n", "penguin_id \n", - "0 0.642519 \n", - "1 1.005876 \n", - "2 0.90206 \n", - "4 -0.188011 \n", - "5 0.694427 \n", - "6 -0.291828 \n", - "7 0.642519 \n", - "9 1.524957 \n", - "10 0.90206 \n", - "11 0.798243 \n", - "12 -0.136103 \n", - "13 0.071529 \n", - "14 -0.032287 \n", - "15 -0.343736 \n", - "16 0.071529 \n", - "18 0.382978 \n", - "19 0.486795 \n", - "20 0.33107 \n", - "21 1.057784 \n", - "22 -0.032287 \n", - "24 0.538703 \n", - "26 0.694427 \n", - "28 1.888314 \n", - "30 0.694427 \n", - "31 -0.551368 \n", + "0 0.84903 \n", + "2 -1.322402 \n", + "3 -0.261935 \n", + "5 -0.968913 \n", + "6 1.959995 \n", + "7 -1.726389 \n", + "9 -1.019412 \n", + "10 -0.817418 \n", + "12 -1.120408 \n", + "14 0.344046 \n", + "17 -0.918415 \n", + "18 1.404513 \n", + "19 0.445043 \n", + "20 0.798532 \n", + "21 0.344046 \n", + "22 -1.675891 \n", + "24 1.556008 \n", + "25 0.041055 \n", + "26 -1.675891 \n", + "27 -1.776888 \n", + "28 -0.362932 \n", + "29 1.303516 \n", + "30 1.959995 \n", + "31 -0.615424 \n", + "32 0.19255 \n", "\n", " standard_scaled_flipper_length_mm onehotencoded_sex \\\n", "penguin_id \n", - "0 -1.193942 [{'index': 2, 'value': 1.0}] \n", - "1 -1.193942 [{'index': 3, 'value': 1.0}] \n", - "2 -1.193942 [{'index': 3, 'value': 1.0}] \n", - "4 -0.619171 [{'index': 2, 'value': 1.0}] \n", - "5 -0.619171 [{'index': 3, 'value': 1.0}] \n", - "6 -0.619171 [{'index': 2, 'value': 1.0}] \n", - "7 -0.044401 [{'index': 2, 'value': 1.0}] \n", - "9 -0.044401 [{'index': 3, 'value': 1.0}] \n", - "10 0.530369 [{'index': 3, 'value': 1.0}] \n", - "11 -1.122096 [{'index': 3, 'value': 1.0}] \n", - "12 -1.122096 [{'index': 2, 'value': 1.0}] \n", - "13 -1.122096 [{'index': 2, 'value': 1.0}] \n", - "14 -1.122096 [{'index': 2, 'value': 1.0}] \n", - "15 -1.122096 [{'index': 2, 'value': 1.0}] \n", - "16 -0.547325 [{'index': 2, 'value': 1.0}] \n", - "18 -0.547325 [{'index': 3, 'value': 1.0}] \n", - "19 -0.547325 [{'index': 3, 'value': 1.0}] \n", - "20 -0.547325 [{'index': 2, 'value': 1.0}] \n", - "21 -0.547325 [{'index': 3, 'value': 1.0}] \n", - "22 -0.547325 [{'index': 2, 'value': 1.0}] \n", - "24 -0.547325 [{'index': 3, 'value': 1.0}] \n", - "26 0.027445 [{'index': 3, 'value': 1.0}] \n", - "28 0.027445 [{'index': 3, 'value': 1.0}] \n", - "30 0.027445 [{'index': 3, 'value': 1.0}] \n", - "31 -1.62502 [{'index': 2, 'value': 1.0}] \n", + "0 -0.937262 [{'index': 2, 'value': 1.0}] \n", + "2 0.804051 [{'index': 1, 'value': 1.0}] \n", + "3 -1.009817 [{'index': 1, 'value': 1.0}] \n", + "5 1.311935 [{'index': 2, 'value': 1.0}] \n", + "6 -0.429379 [{'index': 2, 'value': 1.0}] \n", + "7 1.021716 [{'index': 1, 'value': 1.0}] \n", + "9 1.09427 [{'index': 1, 'value': 1.0}] \n", + "10 1.457044 [{'index': 2, 'value': 1.0}] \n", + "12 1.602153 [{'index': 2, 'value': 1.0}] \n", + "14 -0.792152 [{'index': 1, 'value': 1.0}] \n", + "17 1.021716 [{'index': 2, 'value': 1.0}] \n", + "18 -0.284269 [{'index': 2, 'value': 1.0}] \n", + "19 -1.662809 [{'index': 2, 'value': 1.0}] \n", + "20 0.151059 [{'index': 2, 'value': 1.0}] \n", + "21 -1.009817 [{'index': 1, 'value': 1.0}] \n", + "22 0.658942 [{'index': 1, 'value': 1.0}] \n", + "24 -0.501934 [{'index': 2, 'value': 1.0}] \n", + "25 -0.574488 [{'index': 1, 'value': 1.0}] \n", + "26 0.949161 [{'index': 1, 'value': 1.0}] \n", + "27 0.658942 [{'index': 1, 'value': 1.0}] \n", + "28 1.747263 [{'index': 2, 'value': 1.0}] \n", + "29 0.441278 [{'index': 2, 'value': 1.0}] \n", + "30 -0.356824 [{'index': 2, 'value': 1.0}] \n", + "31 1.747263 [{'index': 2, 'value': 1.0}] \n", + "32 -0.356824 [{'index': 2, 'value': 1.0}] \n", "\n", " onehotencoded_species \n", "penguin_id \n", "0 [{'index': 1, 'value': 1.0}] \n", - "1 [{'index': 1, 'value': 1.0}] \n", - "2 [{'index': 1, 'value': 1.0}] \n", - "4 [{'index': 1, 'value': 1.0}] \n", - "5 [{'index': 1, 'value': 1.0}] \n", - "6 [{'index': 2, 'value': 1.0}] \n", - "7 [{'index': 2, 'value': 1.0}] \n", - "9 [{'index': 1, 'value': 1.0}] \n", - "10 [{'index': 1, 'value': 1.0}] \n", - "11 [{'index': 1, 'value': 1.0}] \n", - "12 [{'index': 1, 'value': 1.0}] \n", - "13 [{'index': 2, 'value': 1.0}] \n", + "2 [{'index': 3, 'value': 1.0}] \n", + "3 [{'index': 2, 'value': 1.0}] \n", + "5 [{'index': 3, 'value': 1.0}] \n", + "6 [{'index': 1, 'value': 1.0}] \n", + "7 [{'index': 3, 'value': 1.0}] \n", + "9 [{'index': 3, 'value': 1.0}] \n", + "10 [{'index': 3, 'value': 1.0}] \n", + "12 [{'index': 3, 'value': 1.0}] \n", "14 [{'index': 1, 'value': 1.0}] \n", - "15 [{'index': 1, 'value': 1.0}] \n", - "16 [{'index': 2, 'value': 1.0}] \n", - "18 [{'index': 1, 'value': 1.0}] \n", + "17 [{'index': 3, 'value': 1.0}] \n", + "18 [{'index': 2, 'value': 1.0}] \n", "19 [{'index': 1, 'value': 1.0}] \n", "20 [{'index': 2, 'value': 1.0}] \n", - "21 [{'index': 2, 'value': 1.0}] \n", - "22 [{'index': 1, 'value': 1.0}] \n", + "21 [{'index': 1, 'value': 1.0}] \n", + "22 [{'index': 3, 'value': 1.0}] \n", "24 [{'index': 2, 'value': 1.0}] \n", - "26 [{'index': 1, 'value': 1.0}] \n", - "28 [{'index': 2, 'value': 1.0}] \n", - "30 [{'index': 2, 'value': 1.0}] \n", - "31 [{'index': 1, 'value': 1.0}] \n", + "25 [{'index': 2, 'value': 1.0}] \n", + "26 [{'index': 3, 'value': 1.0}] \n", + "27 [{'index': 3, 'value': 1.0}] \n", + "28 [{'index': 3, 'value': 1.0}] \n", + "29 [{'index': 2, 'value': 1.0}] \n", + "30 [{'index': 1, 'value': 1.0}] \n", + "31 [{'index': 3, 'value': 1.0}] \n", + "32 [{'index': 1, 'value': 1.0}] \n", "...\n", "\n", "[267 rows x 6 columns]" ] }, - "execution_count": 24, + "execution_count": 7, "metadata": {}, "output_type": "execute_result" } @@ -2138,18 +2024,28 @@ }, { "cell_type": "code", - "execution_count": 25, + "execution_count": 8, "metadata": {}, "outputs": [ { "data": { - "application/vnd.jupyter.widget-view+json": { - "model_id": "5db4c5c80ba4417db151aa561dab5ee7", - "version_major": 2, - "version_minor": 0 - }, + "text/html": [ + "Query job 7d9c9f8b-6b4c-451f-ae3d-06fb7090d148 is DONE. 21.4 kB processed. Open Job" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "Query job be87ccfa-72ab-4858-9d4a-b2f5f8b2a5e6 is DONE. 28.9 kB processed. Open Job" + ], "text/plain": [ - "HTML(value='Query job ceced0cc-13a7-4b14-b42c-4d5f69e7e49a is RUNNING. " ] }, "metadata": {}, @@ -2157,13 +2053,11 @@ }, { "data": { - "application/vnd.jupyter.widget-view+json": { - "model_id": "e6b05d83de0e496d9e47392762046fc5", - "version_major": 2, - "version_minor": 0 - }, + "text/html": [ + "Query job 2d651fac-11bf-42da-8c18-bd33207379ca is DONE. 0 Bytes processed. Open Job" + ], "text/plain": [ - "HTML(value='Query job a708b8df-6040-49b1-a6da-d2c0d162f247 is RUNNING. " ] }, "metadata": {}, @@ -2171,13 +2065,11 @@ }, { "data": { - "application/vnd.jupyter.widget-view+json": { - "model_id": "24d616c24a844abfbfd77ebd9f28486a", - "version_major": 2, - "version_minor": 0 - }, + "text/html": [ + "Query job 58836ccc-242b-4574-bc48-4c269e74dbf1 is DONE. 5.7 kB processed. Open Job" + ], "text/plain": [ - "HTML(value='Query job e9b9cbb5-f6a4-4d85-ba78-1edae77dce94 is RUNNING. " ] }, "metadata": {}, @@ -2185,13 +2077,11 @@ }, { "data": { - "application/vnd.jupyter.widget-view+json": { - "model_id": "ce49b66c6fa0460aa3ee28746765b6ac", - "version_major": 2, - "version_minor": 0 - }, + "text/html": [ + "Query job 1bf531f0-0fde-489b-ab36-6040a2a12377 is DONE. 536 Bytes processed. Open Job" + ], "text/plain": [ - "HTML(value='Query job 6c0a41a7-a732-413a-b074-ba82f175eab8 is RUNNING. " ] }, "metadata": {}, @@ -2199,13 +2089,11 @@ }, { "data": { - "application/vnd.jupyter.widget-view+json": { - "model_id": "4a6010d73ca04ea9a133de99aa90da3c", - "version_major": 2, - "version_minor": 0 - }, + "text/html": [ + "Query job 4245f4e6-4d5b-404f-81d7-50f0553e2456 is DONE. 0 Bytes processed. Open Job" + ], "text/plain": [ - "HTML(value='Query job 2d08b79d-9c36-4db7-824a-332fdd02e9fc is DONE. 0 Bytes processed. " ] }, "metadata": {}, @@ -2213,13 +2101,11 @@ }, { "data": { - "application/vnd.jupyter.widget-view+json": { - "model_id": "ce9cfdca964a4062a52ebaae9d13ae59", - "version_major": 2, - "version_minor": 0 - }, + "text/html": [ + "Query job ed951699-c005-450e-a8b6-0916ec234e7f is DONE. 5.9 kB processed. Open Job" + ], "text/plain": [ - "HTML(value='Query job 7fa0bf53-1022-45ee-b3ac-78fa5c155585 is RUNNING. " ] }, "metadata": {}, @@ -2247,152 +2133,397 @@ " \n", " \n", " predicted_body_mass_g\n", + " onehotencoded_island\n", + " standard_scaled_culmen_length_mm\n", + " standard_scaled_culmen_depth_mm\n", + " standard_scaled_flipper_length_mm\n", + " onehotencoded_sex\n", + " onehotencoded_species\n", " \n", " \n", " penguin_id\n", " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", " \n", " \n", " \n", " \n", - " 3\n", - " 3394.118128\n", + " 1\n", + " 3781.402407\n", + " [{'index': 3, 'value': 1.0}]\n", + " -0.938587\n", + " 0.748033\n", + " -1.445145\n", + " [{'index': 2, 'value': 1.0}]\n", + " [{'index': 1, 'value': 1.0}]\n", " \n", " \n", - " 8\n", - " 4048.685642\n", + " 4\n", + " 4124.107944\n", + " [{'index': 1, 'value': 1.0}]\n", + " -0.16745\n", + " 0.899528\n", + " -0.284269\n", + " [{'index': 2, 'value': 1.0}]\n", + " [{'index': 1, 'value': 1.0}]\n", " \n", " \n", - " 17\n", - " 3976.454093\n", + " 8\n", + " 4670.344196\n", + " [{'index': 1, 'value': 1.0}]\n", + " 0.453222\n", + " -1.877885\n", + " 0.658942\n", + " [{'index': 1, 'value': 1.0}]\n", + " [{'index': 3, 'value': 1.0}]\n", " \n", " \n", - " 23\n", - " 3541.582194\n", + " 11\n", + " 3529.417214\n", + " [{'index': 2, 'value': 1.0}]\n", + " -1.12667\n", + " 0.697535\n", + " -0.792152\n", + " [{'index': 1, 'value': 1.0}]\n", + " [{'index': 1, 'value': 1.0}]\n", " \n", " \n", - " 25\n", - " 4032.844186\n", + " 13\n", + " 4014.101714\n", + " [{'index': 1, 'value': 1.0}]\n", + " -1.183094\n", + " 1.404513\n", + " -0.792152\n", + " [{'index': 2, 'value': 1.0}]\n", + " [{'index': 1, 'value': 1.0}]\n", " \n", " \n", - " 27\n", - " 4118.351772\n", + " 15\n", + " 5212.41288\n", + " [{'index': 1, 'value': 1.0}]\n", + " 0.867003\n", + " -0.766919\n", + " 0.513833\n", + " [{'index': 2, 'value': 1.0}]\n", + " [{'index': 3, 'value': 1.0}]\n", " \n", " \n", - " 29\n", - " 4087.767826\n", + " 16\n", + " 4163.595615\n", + " [{'index': 3, 'value': 1.0}]\n", + " -1.784958\n", + " 1.959995\n", + " -0.211715\n", + " [{'index': 2, 'value': 1.0}]\n", + " [{'index': 1, 'value': 1.0}]\n", " \n", " \n", - " 34\n", - " 3183.755249\n", + " 23\n", + " 3392.453069\n", + " [{'index': 2, 'value': 1.0}]\n", + " -0.355532\n", + " 0.647036\n", + " -1.5177\n", + " [{'index': 1, 'value': 1.0}]\n", + " [{'index': 1, 'value': 1.0}]\n", " \n", " \n", - " 35\n", - " 3418.802274\n", - " \n", - " \n", - " 39\n", - " 3519.186468\n", + " 34\n", + " 4698.305397\n", + " [{'index': 1, 'value': 1.0}]\n", + " -0.600039\n", + " -1.776888\n", + " 0.949161\n", + " [{'index': 1, 'value': 1.0}]\n", + " [{'index': 3, 'value': 1.0}]\n", " \n", " \n", - " 51\n", - " 3398.135365\n", + " 36\n", + " 4828.226949\n", + " [{'index': 1, 'value': 1.0}]\n", + " -0.129833\n", + " -1.423399\n", + " 1.23938\n", + " [{'index': 1, 'value': 1.0}]\n", + " [{'index': 3, 'value': 1.0}]\n", " \n", " \n", - " 52\n", - " 3223.615957\n", + " 42\n", + " 3430.58866\n", + " [{'index': 1, 'value': 1.0}]\n", + " -1.615684\n", + " -0.514427\n", + " -0.429379\n", + " [{'index': 1, 'value': 1.0}]\n", + " [{'index': 1, 'value': 1.0}]\n", " \n", " \n", - " 60\n", - " 3445.014718\n", + " 48\n", + " 5314.260221\n", + " [{'index': 1, 'value': 1.0}]\n", + " 0.415606\n", + " -0.716421\n", + " 1.021716\n", + " [{'index': 2, 'value': 1.0}]\n", + " [{'index': 3, 'value': 1.0}]\n", " \n", " \n", " 61\n", - " 3505.638864\n", + " 5363.205372\n", + " [{'index': 1, 'value': 1.0}]\n", + " 0.396797\n", + " -1.170907\n", + " 1.457044\n", + " [{'index': 2, 'value': 1.0}]\n", + " [{'index': 3, 'value': 1.0}]\n", " \n", " \n", " 64\n", - " 3515.905786\n", + " 4855.908314\n", + " [{'index': 1, 'value': 1.0}]\n", + " 0.434414\n", + " -1.120408\n", + " 1.09427\n", + " [{'index': 1, 'value': 1.0}]\n", + " [{'index': 3, 'value': 1.0}]\n", " \n", " \n", " 65\n", - " 4028.363185\n", + " 3413.100524\n", + " [{'index': 2, 'value': 1.0}]\n", + " -1.220711\n", + " 1.051024\n", + " -1.445145\n", + " [{'index': 1, 'value': 1.0}]\n", + " [{'index': 1, 'value': 1.0}]\n", " \n", " \n", - " 67\n", - " 4159.993943\n", + " 68\n", + " 3340.219002\n", + " [{'index': 3, 'value': 1.0}]\n", + " -1.484026\n", + " -0.009443\n", + " -1.009817\n", + " [{'index': 1, 'value': 1.0}]\n", + " [{'index': 1, 'value': 1.0}]\n", " \n", " \n", - " 83\n", - " 3348.16883\n", + " 70\n", + " 4228.73157\n", + " [{'index': 2, 'value': 1.0}]\n", + " 1.638141\n", + " 1.404513\n", + " 0.296168\n", + " [{'index': 2, 'value': 1.0}]\n", + " [{'index': 2, 'value': 1.0}]\n", " \n", " \n", - " 85\n", - " 3485.050273\n", + " 72\n", + " 3811.538478\n", + " [{'index': 2, 'value': 1.0}]\n", + " 0.829387\n", + " 0.142052\n", + " -0.719598\n", + " [{'index': 2, 'value': 1.0}]\n", + " [{'index': 2, 'value': 1.0}]\n", " \n", " \n", - " 93\n", - " 4172.874548\n", + " 74\n", + " 4659.770763\n", + " [{'index': 1, 'value': 1.0}]\n", + " -0.242683\n", + " -1.524396\n", + " 0.586387\n", + " [{'index': 1, 'value': 1.0}]\n", + " [{'index': 3, 'value': 1.0}]\n", " \n", " \n", - " 104\n", - " 3299.302424\n", + " 77\n", + " 3453.388804\n", + " [{'index': 2, 'value': 1.0}]\n", + " -1.277136\n", + " -0.211437\n", + " -0.647043\n", + " [{'index': 1, 'value': 1.0}]\n", + " [{'index': 1, 'value': 1.0}]\n", " \n", " \n", - " 105\n", - " 3515.687917\n", + " 81\n", + " 4766.245033\n", + " [{'index': 1, 'value': 1.0}]\n", + " 0.208715\n", + " -1.221405\n", + " 0.804051\n", + " [{'index': 1, 'value': 1.0}]\n", + " [{'index': 3, 'value': 1.0}]\n", + " \n", + " \n", + " 91\n", + " 4057.807281\n", + " [{'index': 2, 'value': 1.0}]\n", + " 1.261976\n", + " 0.647036\n", + " 0.005949\n", + " [{'index': 2, 'value': 1.0}]\n", + " [{'index': 2, 'value': 1.0}]\n", " \n", " \n", - " 108\n", - " 3405.224618\n", + " 96\n", + " 4739.827445\n", + " [{'index': 1, 'value': 1.0}]\n", + " 0.246331\n", + " -1.322402\n", + " 0.731497\n", + " [{'index': 1, 'value': 1.0}]\n", + " [{'index': 3, 'value': 1.0}]\n", " \n", " \n", - " 113\n", - " 4209.140425\n", + " 105\n", + " 3394.891976\n", + " [{'index': 1, 'value': 1.0}]\n", + " -1.803766\n", + " 0.445043\n", + " -1.009817\n", + " [{'index': 1, 'value': 1.0}]\n", + " [{'index': 1, 'value': 1.0}]\n", " \n", " \n", - " 130\n", - " 4197.905737\n", + " 111\n", + " 3201.493683\n", + " [{'index': 1, 'value': 1.0}]\n", + " -1.164286\n", + " 0.697535\n", + " -2.098138\n", + " [{'index': 1, 'value': 1.0}]\n", + " [{'index': 1, 'value': 1.0}]\n", " \n", " \n", "\n", - "

25 rows × 1 columns

\n", - "[67 rows x 1 columns in total]" - ], - "text/plain": [ - " predicted_body_mass_g\n", - "penguin_id \n", - "3 3394.118128\n", - "8 4048.685642\n", - "17 3976.454093\n", - "23 3541.582194\n", - "25 4032.844186\n", - "27 4118.351772\n", - "29 4087.767826\n", - "34 3183.755249\n", - "35 3418.802274\n", - "39 3519.186468\n", - "51 3398.135365\n", - "52 3223.615957\n", - "60 3445.014718\n", - "61 3505.638864\n", - "64 3515.905786\n", - "65 4028.363185\n", - "67 4159.993943\n", - "83 3348.16883\n", - "85 3485.050273\n", - "93 4172.874548\n", - "104 3299.302424\n", - "105 3515.687917\n", - "108 3405.224618\n", - "113 4209.140425\n", - "130 4197.905737\n", - "...\n", + "

25 rows × 7 columns

\n", + "[67 rows x 7 columns in total]" + ], + "text/plain": [ + " predicted_body_mass_g onehotencoded_island \\\n", + "penguin_id \n", + "1 3781.402407 [{'index': 3, 'value': 1.0}] \n", + "4 4124.107944 [{'index': 1, 'value': 1.0}] \n", + "8 4670.344196 [{'index': 1, 'value': 1.0}] \n", + "11 3529.417214 [{'index': 2, 'value': 1.0}] \n", + "13 4014.101714 [{'index': 1, 'value': 1.0}] \n", + "15 5212.41288 [{'index': 1, 'value': 1.0}] \n", + "16 4163.595615 [{'index': 3, 'value': 1.0}] \n", + "23 3392.453069 [{'index': 2, 'value': 1.0}] \n", + "34 4698.305397 [{'index': 1, 'value': 1.0}] \n", + "36 4828.226949 [{'index': 1, 'value': 1.0}] \n", + "42 3430.58866 [{'index': 1, 'value': 1.0}] \n", + "48 5314.260221 [{'index': 1, 'value': 1.0}] \n", + "61 5363.205372 [{'index': 1, 'value': 1.0}] \n", + "64 4855.908314 [{'index': 1, 'value': 1.0}] \n", + "65 3413.100524 [{'index': 2, 'value': 1.0}] \n", + "68 3340.219002 [{'index': 3, 'value': 1.0}] \n", + "70 4228.73157 [{'index': 2, 'value': 1.0}] \n", + "72 3811.538478 [{'index': 2, 'value': 1.0}] \n", + "74 4659.770763 [{'index': 1, 'value': 1.0}] \n", + "77 3453.388804 [{'index': 2, 'value': 1.0}] \n", + "81 4766.245033 [{'index': 1, 'value': 1.0}] \n", + "91 4057.807281 [{'index': 2, 'value': 1.0}] \n", + "96 4739.827445 [{'index': 1, 'value': 1.0}] \n", + "105 3394.891976 [{'index': 1, 'value': 1.0}] \n", + "111 3201.493683 [{'index': 1, 'value': 1.0}] \n", "\n", - "[67 rows x 1 columns]" + " standard_scaled_culmen_length_mm standard_scaled_culmen_depth_mm \\\n", + "penguin_id \n", + "1 -0.938587 0.748033 \n", + "4 -0.16745 0.899528 \n", + "8 0.453222 -1.877885 \n", + "11 -1.12667 0.697535 \n", + "13 -1.183094 1.404513 \n", + "15 0.867003 -0.766919 \n", + "16 -1.784958 1.959995 \n", + "23 -0.355532 0.647036 \n", + "34 -0.600039 -1.776888 \n", + "36 -0.129833 -1.423399 \n", + "42 -1.615684 -0.514427 \n", + "48 0.415606 -0.716421 \n", + "61 0.396797 -1.170907 \n", + "64 0.434414 -1.120408 \n", + "65 -1.220711 1.051024 \n", + "68 -1.484026 -0.009443 \n", + "70 1.638141 1.404513 \n", + "72 0.829387 0.142052 \n", + "74 -0.242683 -1.524396 \n", + "77 -1.277136 -0.211437 \n", + "81 0.208715 -1.221405 \n", + "91 1.261976 0.647036 \n", + "96 0.246331 -1.322402 \n", + "105 -1.803766 0.445043 \n", + "111 -1.164286 0.697535 \n", + "\n", + " standard_scaled_flipper_length_mm onehotencoded_sex \\\n", + "penguin_id \n", + "1 -1.445145 [{'index': 2, 'value': 1.0}] \n", + "4 -0.284269 [{'index': 2, 'value': 1.0}] \n", + "8 0.658942 [{'index': 1, 'value': 1.0}] \n", + "11 -0.792152 [{'index': 1, 'value': 1.0}] \n", + "13 -0.792152 [{'index': 2, 'value': 1.0}] \n", + "15 0.513833 [{'index': 2, 'value': 1.0}] \n", + "16 -0.211715 [{'index': 2, 'value': 1.0}] \n", + "23 -1.5177 [{'index': 1, 'value': 1.0}] \n", + "34 0.949161 [{'index': 1, 'value': 1.0}] \n", + "36 1.23938 [{'index': 1, 'value': 1.0}] \n", + "42 -0.429379 [{'index': 1, 'value': 1.0}] \n", + "48 1.021716 [{'index': 2, 'value': 1.0}] \n", + "61 1.457044 [{'index': 2, 'value': 1.0}] \n", + "64 1.09427 [{'index': 1, 'value': 1.0}] \n", + "65 -1.445145 [{'index': 1, 'value': 1.0}] \n", + "68 -1.009817 [{'index': 1, 'value': 1.0}] \n", + "70 0.296168 [{'index': 2, 'value': 1.0}] \n", + "72 -0.719598 [{'index': 2, 'value': 1.0}] \n", + "74 0.586387 [{'index': 1, 'value': 1.0}] \n", + "77 -0.647043 [{'index': 1, 'value': 1.0}] \n", + "81 0.804051 [{'index': 1, 'value': 1.0}] \n", + "91 0.005949 [{'index': 2, 'value': 1.0}] \n", + "96 0.731497 [{'index': 1, 'value': 1.0}] \n", + "105 -1.009817 [{'index': 1, 'value': 1.0}] \n", + "111 -2.098138 [{'index': 1, 'value': 1.0}] \n", + "\n", + " onehotencoded_species \n", + "penguin_id \n", + "1 [{'index': 1, 'value': 1.0}] \n", + "4 [{'index': 1, 'value': 1.0}] \n", + "8 [{'index': 3, 'value': 1.0}] \n", + "11 [{'index': 1, 'value': 1.0}] \n", + "13 [{'index': 1, 'value': 1.0}] \n", + "15 [{'index': 3, 'value': 1.0}] \n", + "16 [{'index': 1, 'value': 1.0}] \n", + "23 [{'index': 1, 'value': 1.0}] \n", + "34 [{'index': 3, 'value': 1.0}] \n", + "36 [{'index': 3, 'value': 1.0}] \n", + "42 [{'index': 1, 'value': 1.0}] \n", + "48 [{'index': 3, 'value': 1.0}] \n", + "61 [{'index': 3, 'value': 1.0}] \n", + "64 [{'index': 3, 'value': 1.0}] \n", + "65 [{'index': 1, 'value': 1.0}] \n", + "68 [{'index': 1, 'value': 1.0}] \n", + "70 [{'index': 2, 'value': 1.0}] \n", + "72 [{'index': 2, 'value': 1.0}] \n", + "74 [{'index': 3, 'value': 1.0}] \n", + "77 [{'index': 1, 'value': 1.0}] \n", + "81 [{'index': 3, 'value': 1.0}] \n", + "91 [{'index': 2, 'value': 1.0}] \n", + "96 [{'index': 3, 'value': 1.0}] \n", + "105 [{'index': 1, 'value': 1.0}] \n", + "111 [{'index': 1, 'value': 1.0}] \n", + "\n", + "[67 rows x 7 columns]" ] }, - "execution_count": 25, + "execution_count": 8, "metadata": {}, "output_type": "execute_result" } @@ -2423,18 +2554,16 @@ }, { "cell_type": "code", - "execution_count": 26, + "execution_count": 9, "metadata": {}, "outputs": [ { "data": { - "application/vnd.jupyter.widget-view+json": { - "model_id": "d7a16e04253a42b7a5ce247d8f63b656", - "version_major": 2, - "version_minor": 0 - }, + "text/html": [ + "Query job 027042f1-9a18-43d8-a378-ab9410e395b1 is DONE. 23.5 kB processed.
Open Job" + ], "text/plain": [ - "HTML(value='Query job 6f19614c-82c0-4f8b-b74b-9d91a894efdd is RUNNING. " ] }, "metadata": {}, @@ -2442,13 +2571,11 @@ }, { "data": { - "application/vnd.jupyter.widget-view+json": { - "model_id": "4a99ac15431e433595de1040872a4558", - "version_major": 2, - "version_minor": 0 - }, + "text/html": [ + "Query job 6c8484a0-a504-4e50-93d6-3d247c9ff558 is DONE. 0 Bytes processed. Open Job" + ], "text/plain": [ - "HTML(value='Query job 51899e2d-f6ef-4e62-98b6-c11550f74f4b is RUNNING. " ] }, "metadata": {}, @@ -2456,13 +2583,11 @@ }, { "data": { - "application/vnd.jupyter.widget-view+json": { - "model_id": "90909b620e084f59b0f9da266257593f", - "version_major": 2, - "version_minor": 0 - }, + "text/html": [ + "Query job e81ca2de-df2e-41ec-af86-14f8dcec1b44 is DONE. 6.2 kB processed. Open Job" + ], "text/plain": [ - "HTML(value='Query job 44d3fddc-74bc-4de0-a458-2c73b38f74fb is RUNNING. " ] }, "metadata": {}, @@ -2470,13 +2595,11 @@ }, { "data": { - "application/vnd.jupyter.widget-view+json": { - "model_id": "2a9c2c05041a4fb691809bab5310bb05", - "version_major": 2, - "version_minor": 0 - }, + "text/html": [ + "Query job 3e6d413c-f8c4-4390-95eb-3a1f5bc59aed is DONE. 536 Bytes processed. Open Job" + ], "text/plain": [ - "HTML(value='Query job 33584475-f02b-4c98-9a51-e29996f4f950 is RUNNING. " ] }, "metadata": {}, @@ -2484,13 +2607,11 @@ }, { "data": { - "application/vnd.jupyter.widget-view+json": { - "model_id": "6b0677c228d54b409c66e5dfa98d7e00", - "version_major": 2, - "version_minor": 0 - }, + "text/html": [ + "Query job e448220d-0c50-45b7-bcbe-d1159b3d18ce is DONE. 0 Bytes processed. Open Job" + ], "text/plain": [ - "HTML(value='Query job df25ba49-280e-424d-a357-dde71a9b35dd is DONE. 0 Bytes processed. " ] }, "metadata": {}, @@ -2498,13 +2619,11 @@ }, { "data": { - "application/vnd.jupyter.widget-view+json": { - "model_id": "379ae6497fb34f969d21b2cd664e8bfa", - "version_major": 2, - "version_minor": 0 - }, + "text/html": [ + "Query job e167a234-828d-4f05-8654-63cf97e50ba3 is DONE. 10.2 kB processed. Open Job" + ], "text/plain": [ - "HTML(value='Query job 6f92a04e-af7e-41d6-9303-6366c1751294 is RUNNING. " ] }, "metadata": {}, @@ -2532,152 +2651,452 @@ " \n", " \n", " CENTROID_ID\n", + " NEAREST_CENTROIDS_DISTANCE\n", + " onehotencoded_island\n", + " standard_scaled_culmen_length_mm\n", + " standard_scaled_culmen_depth_mm\n", + " standard_scaled_flipper_length_mm\n", + " onehotencoded_sex\n", + " onehotencoded_species\n", " \n", " \n", " penguin_id\n", " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", " \n", " \n", " \n", " \n", - " 3\n", - " 3\n", - " \n", - " \n", - " 8\n", + " 1\n", " 3\n", + " [{'CENTROID_ID': 3, 'DISTANCE': 1.236380597035...\n", + " [{'index': 3, 'value': 1.0}]\n", + " -0.938587\n", + " 0.748033\n", + " -1.445145\n", + " [{'index': 2, 'value': 1.0}]\n", + " [{'index': 1, 'value': 1.0}]\n", " \n", " \n", - " 17\n", + " 4\n", " 3\n", + " [{'CENTROID_ID': 3, 'DISTANCE': 1.039497631856...\n", + " [{'index': 1, 'value': 1.0}]\n", + " -0.16745\n", + " 0.899528\n", + " -0.284269\n", + " [{'index': 2, 'value': 1.0}]\n", + " [{'index': 1, 'value': 1.0}]\n", " \n", " \n", - " 23\n", + " 8\n", " 1\n", + " [{'CENTROID_ID': 1, 'DISTANCE': 1.171040485975...\n", + " [{'index': 1, 'value': 1.0}]\n", + " 0.453222\n", + " -1.877885\n", + " 0.658942\n", + " [{'index': 1, 'value': 1.0}]\n", + " [{'index': 3, 'value': 1.0}]\n", " \n", " \n", - " 25\n", - " 3\n", + " 11\n", + " 2\n", + " [{'CENTROID_ID': 2, 'DISTANCE': 0.969102754012...\n", + " [{'index': 2, 'value': 1.0}]\n", + " -1.12667\n", + " 0.697535\n", + " -0.792152\n", + " [{'index': 1, 'value': 1.0}]\n", + " [{'index': 1, 'value': 1.0}]\n", " \n", " \n", - " 27\n", + " 13\n", " 3\n", + " [{'CENTROID_ID': 3, 'DISTANCE': 1.113138945949...\n", + " [{'index': 1, 'value': 1.0}]\n", + " -1.183094\n", + " 1.404513\n", + " -0.792152\n", + " [{'index': 2, 'value': 1.0}]\n", + " [{'index': 1, 'value': 1.0}]\n", " \n", " \n", - " 29\n", - " 3\n", + " 15\n", + " 1\n", + " [{'CENTROID_ID': 1, 'DISTANCE': 1.070996026772...\n", + " [{'index': 1, 'value': 1.0}]\n", + " 0.867003\n", + " -0.766919\n", + " 0.513833\n", + " [{'index': 2, 'value': 1.0}]\n", + " [{'index': 3, 'value': 1.0}]\n", " \n", " \n", - " 34\n", + " 16\n", " 3\n", + " [{'CENTROID_ID': 3, 'DISTANCE': 1.780136190720...\n", + " [{'index': 3, 'value': 1.0}]\n", + " -1.784958\n", + " 1.959995\n", + " -0.211715\n", + " [{'index': 2, 'value': 1.0}]\n", + " [{'index': 1, 'value': 1.0}]\n", " \n", " \n", - " 35\n", - " 1\n", + " 23\n", + " 2\n", + " [{'CENTROID_ID': 2, 'DISTANCE': 1.382540667483...\n", + " [{'index': 2, 'value': 1.0}]\n", + " -0.355532\n", + " 0.647036\n", + " -1.5177\n", + " [{'index': 1, 'value': 1.0}]\n", + " [{'index': 1, 'value': 1.0}]\n", " \n", " \n", - " 39\n", - " 3\n", + " 34\n", + " 1\n", + " [{'CENTROID_ID': 1, 'DISTANCE': 1.598627908302...\n", + " [{'index': 1, 'value': 1.0}]\n", + " -0.600039\n", + " -1.776888\n", + " 0.949161\n", + " [{'index': 1, 'value': 1.0}]\n", + " [{'index': 3, 'value': 1.0}]\n", " \n", " \n", - " 51\n", + " 36\n", " 1\n", + " [{'CENTROID_ID': 1, 'DISTANCE': 1.095162305190...\n", + " [{'index': 1, 'value': 1.0}]\n", + " -0.129833\n", + " -1.423399\n", + " 1.23938\n", + " [{'index': 1, 'value': 1.0}]\n", + " [{'index': 3, 'value': 1.0}]\n", " \n", " \n", - " 52\n", - " 3\n", + " 42\n", + " 2\n", + " [{'CENTROID_ID': 2, 'DISTANCE': 1.275841743930...\n", + " [{'index': 1, 'value': 1.0}]\n", + " -1.615684\n", + " -0.514427\n", + " -0.429379\n", + " [{'index': 1, 'value': 1.0}]\n", + " [{'index': 1, 'value': 1.0}]\n", " \n", " \n", - " 60\n", - " 3\n", + " 48\n", + " 1\n", + " [{'CENTROID_ID': 1, 'DISTANCE': 0.882209023196...\n", + " [{'index': 1, 'value': 1.0}]\n", + " 0.415606\n", + " -0.716421\n", + " 1.021716\n", + " [{'index': 2, 'value': 1.0}]\n", + " [{'index': 3, 'value': 1.0}]\n", " \n", " \n", " 61\n", - " 3\n", + " 1\n", + " [{'CENTROID_ID': 1, 'DISTANCE': 0.816202832282...\n", + " [{'index': 1, 'value': 1.0}]\n", + " 0.396797\n", + " -1.170907\n", + " 1.457044\n", + " [{'index': 2, 'value': 1.0}]\n", + " [{'index': 3, 'value': 1.0}]\n", " \n", " \n", " 64\n", " 1\n", + " [{'CENTROID_ID': 1, 'DISTANCE': 0.735435721625...\n", + " [{'index': 1, 'value': 1.0}]\n", + " 0.434414\n", + " -1.120408\n", + " 1.09427\n", + " [{'index': 1, 'value': 1.0}]\n", + " [{'index': 3, 'value': 1.0}]\n", " \n", " \n", " 65\n", - " 1\n", + " 2\n", + " [{'CENTROID_ID': 2, 'DISTANCE': 1.292559869148...\n", + " [{'index': 2, 'value': 1.0}]\n", + " -1.220711\n", + " 1.051024\n", + " -1.445145\n", + " [{'index': 1, 'value': 1.0}]\n", + " [{'index': 1, 'value': 1.0}]\n", " \n", " \n", - " 67\n", - " 3\n", + " 68\n", + " 2\n", + " [{'CENTROID_ID': 2, 'DISTANCE': 0.876430138449...\n", + " [{'index': 3, 'value': 1.0}]\n", + " -1.484026\n", + " -0.009443\n", + " -1.009817\n", + " [{'index': 1, 'value': 1.0}]\n", + " [{'index': 1, 'value': 1.0}]\n", " \n", " \n", - " 83\n", - " 3\n", + " 70\n", + " 4\n", + " [{'CENTROID_ID': 4, 'DISTANCE': 1.314229913955...\n", + " [{'index': 2, 'value': 1.0}]\n", + " 1.638141\n", + " 1.404513\n", + " 0.296168\n", + " [{'index': 2, 'value': 1.0}]\n", + " [{'index': 2, 'value': 1.0}]\n", " \n", " \n", - " 85\n", - " 1\n", + " 72\n", + " 4\n", + " [{'CENTROID_ID': 4, 'DISTANCE': 0.938569518009...\n", + " [{'index': 2, 'value': 1.0}]\n", + " 0.829387\n", + " 0.142052\n", + " -0.719598\n", + " [{'index': 2, 'value': 1.0}]\n", + " [{'index': 2, 'value': 1.0}]\n", " \n", " \n", - " 93\n", + " 74\n", " 1\n", + " [{'CENTROID_ID': 1, 'DISTANCE': 1.350320088546...\n", + " [{'index': 1, 'value': 1.0}]\n", + " -0.242683\n", + " -1.524396\n", + " 0.586387\n", + " [{'index': 1, 'value': 1.0}]\n", + " [{'index': 3, 'value': 1.0}]\n", " \n", " \n", - " 104\n", - " 3\n", + " 77\n", + " 2\n", + " [{'CENTROID_ID': 2, 'DISTANCE': 0.904806634663...\n", + " [{'index': 2, 'value': 1.0}]\n", + " -1.277136\n", + " -0.211437\n", + " -0.647043\n", + " [{'index': 1, 'value': 1.0}]\n", + " [{'index': 1, 'value': 1.0}]\n", " \n", " \n", - " 105\n", + " 81\n", " 1\n", + " [{'CENTROID_ID': 1, 'DISTANCE': 0.919082578073...\n", + " [{'index': 1, 'value': 1.0}]\n", + " 0.208715\n", + " -1.221405\n", + " 0.804051\n", + " [{'index': 1, 'value': 1.0}]\n", + " [{'index': 3, 'value': 1.0}]\n", " \n", " \n", - " 108\n", - " 3\n", + " 91\n", + " 4\n", + " [{'CENTROID_ID': 4, 'DISTANCE': 0.760360038086...\n", + " [{'index': 2, 'value': 1.0}]\n", + " 1.261976\n", + " 0.647036\n", + " 0.005949\n", + " [{'index': 2, 'value': 1.0}]\n", + " [{'index': 2, 'value': 1.0}]\n", " \n", " \n", - " 113\n", - " 3\n", + " 96\n", + " 1\n", + " [{'CENTROID_ID': 1, 'DISTANCE': 0.950188657227...\n", + " [{'index': 1, 'value': 1.0}]\n", + " 0.246331\n", + " -1.322402\n", + " 0.731497\n", + " [{'index': 1, 'value': 1.0}]\n", + " [{'index': 3, 'value': 1.0}]\n", " \n", " \n", - " 130\n", - " 1\n", + " 105\n", + " 2\n", + " [{'CENTROID_ID': 2, 'DISTANCE': 1.101316467029...\n", + " [{'index': 1, 'value': 1.0}]\n", + " -1.803766\n", + " 0.445043\n", + " -1.009817\n", + " [{'index': 1, 'value': 1.0}]\n", + " [{'index': 1, 'value': 1.0}]\n", + " \n", + " \n", + " 111\n", + " 2\n", + " [{'CENTROID_ID': 2, 'DISTANCE': 1.549061068385...\n", + " [{'index': 1, 'value': 1.0}]\n", + " -1.164286\n", + " 0.697535\n", + " -2.098138\n", + " [{'index': 1, 'value': 1.0}]\n", + " [{'index': 1, 'value': 1.0}]\n", " \n", " \n", "\n", - "

25 rows × 1 columns

\n", - "[67 rows x 1 columns in total]" + "

25 rows × 8 columns

\n", + "[67 rows x 8 columns in total]" ], "text/plain": [ - " CENTROID_ID\n", - "penguin_id \n", - "3 3\n", - "8 3\n", - "17 3\n", - "23 1\n", - "25 3\n", - "27 3\n", - "29 3\n", - "34 3\n", - "35 1\n", - "39 3\n", - "51 1\n", - "52 3\n", - "60 3\n", - "61 3\n", - "64 1\n", - "65 1\n", - "67 3\n", - "83 3\n", - "85 1\n", - "93 1\n", - "104 3\n", - "105 1\n", - "108 3\n", - "113 3\n", - "130 1\n", - "...\n", + " CENTROID_ID NEAREST_CENTROIDS_DISTANCE \\\n", + "penguin_id \n", + "1 3 [{'CENTROID_ID': 3, 'DISTANCE': 1.236380597035... \n", + "4 3 [{'CENTROID_ID': 3, 'DISTANCE': 1.039497631856... \n", + "8 1 [{'CENTROID_ID': 1, 'DISTANCE': 1.171040485975... \n", + "11 2 [{'CENTROID_ID': 2, 'DISTANCE': 0.969102754012... \n", + "13 3 [{'CENTROID_ID': 3, 'DISTANCE': 1.113138945949... \n", + "15 1 [{'CENTROID_ID': 1, 'DISTANCE': 1.070996026772... \n", + "16 3 [{'CENTROID_ID': 3, 'DISTANCE': 1.780136190720... \n", + "23 2 [{'CENTROID_ID': 2, 'DISTANCE': 1.382540667483... \n", + "34 1 [{'CENTROID_ID': 1, 'DISTANCE': 1.598627908302... \n", + "36 1 [{'CENTROID_ID': 1, 'DISTANCE': 1.095162305190... \n", + "42 2 [{'CENTROID_ID': 2, 'DISTANCE': 1.275841743930... \n", + "48 1 [{'CENTROID_ID': 1, 'DISTANCE': 0.882209023196... \n", + "61 1 [{'CENTROID_ID': 1, 'DISTANCE': 0.816202832282... \n", + "64 1 [{'CENTROID_ID': 1, 'DISTANCE': 0.735435721625... \n", + "65 2 [{'CENTROID_ID': 2, 'DISTANCE': 1.292559869148... \n", + "68 2 [{'CENTROID_ID': 2, 'DISTANCE': 0.876430138449... \n", + "70 4 [{'CENTROID_ID': 4, 'DISTANCE': 1.314229913955... \n", + "72 4 [{'CENTROID_ID': 4, 'DISTANCE': 0.938569518009... \n", + "74 1 [{'CENTROID_ID': 1, 'DISTANCE': 1.350320088546... \n", + "77 2 [{'CENTROID_ID': 2, 'DISTANCE': 0.904806634663... \n", + "81 1 [{'CENTROID_ID': 1, 'DISTANCE': 0.919082578073... \n", + "91 4 [{'CENTROID_ID': 4, 'DISTANCE': 0.760360038086... \n", + "96 1 [{'CENTROID_ID': 1, 'DISTANCE': 0.950188657227... \n", + "105 2 [{'CENTROID_ID': 2, 'DISTANCE': 1.101316467029... \n", + "111 2 [{'CENTROID_ID': 2, 'DISTANCE': 1.549061068385... \n", "\n", - "[67 rows x 1 columns]" + " onehotencoded_island standard_scaled_culmen_length_mm \\\n", + "penguin_id \n", + "1 [{'index': 3, 'value': 1.0}] -0.938587 \n", + "4 [{'index': 1, 'value': 1.0}] -0.16745 \n", + "8 [{'index': 1, 'value': 1.0}] 0.453222 \n", + "11 [{'index': 2, 'value': 1.0}] -1.12667 \n", + "13 [{'index': 1, 'value': 1.0}] -1.183094 \n", + "15 [{'index': 1, 'value': 1.0}] 0.867003 \n", + "16 [{'index': 3, 'value': 1.0}] -1.784958 \n", + "23 [{'index': 2, 'value': 1.0}] -0.355532 \n", + "34 [{'index': 1, 'value': 1.0}] -0.600039 \n", + "36 [{'index': 1, 'value': 1.0}] -0.129833 \n", + "42 [{'index': 1, 'value': 1.0}] -1.615684 \n", + "48 [{'index': 1, 'value': 1.0}] 0.415606 \n", + "61 [{'index': 1, 'value': 1.0}] 0.396797 \n", + "64 [{'index': 1, 'value': 1.0}] 0.434414 \n", + "65 [{'index': 2, 'value': 1.0}] -1.220711 \n", + "68 [{'index': 3, 'value': 1.0}] -1.484026 \n", + "70 [{'index': 2, 'value': 1.0}] 1.638141 \n", + "72 [{'index': 2, 'value': 1.0}] 0.829387 \n", + "74 [{'index': 1, 'value': 1.0}] -0.242683 \n", + "77 [{'index': 2, 'value': 1.0}] -1.277136 \n", + "81 [{'index': 1, 'value': 1.0}] 0.208715 \n", + "91 [{'index': 2, 'value': 1.0}] 1.261976 \n", + "96 [{'index': 1, 'value': 1.0}] 0.246331 \n", + "105 [{'index': 1, 'value': 1.0}] -1.803766 \n", + "111 [{'index': 1, 'value': 1.0}] -1.164286 \n", + "\n", + " standard_scaled_culmen_depth_mm \\\n", + "penguin_id \n", + "1 0.748033 \n", + "4 0.899528 \n", + "8 -1.877885 \n", + "11 0.697535 \n", + "13 1.404513 \n", + "15 -0.766919 \n", + "16 1.959995 \n", + "23 0.647036 \n", + "34 -1.776888 \n", + "36 -1.423399 \n", + "42 -0.514427 \n", + "48 -0.716421 \n", + "61 -1.170907 \n", + "64 -1.120408 \n", + "65 1.051024 \n", + "68 -0.009443 \n", + "70 1.404513 \n", + "72 0.142052 \n", + "74 -1.524396 \n", + "77 -0.211437 \n", + "81 -1.221405 \n", + "91 0.647036 \n", + "96 -1.322402 \n", + "105 0.445043 \n", + "111 0.697535 \n", + "\n", + " standard_scaled_flipper_length_mm onehotencoded_sex \\\n", + "penguin_id \n", + "1 -1.445145 [{'index': 2, 'value': 1.0}] \n", + "4 -0.284269 [{'index': 2, 'value': 1.0}] \n", + "8 0.658942 [{'index': 1, 'value': 1.0}] \n", + "11 -0.792152 [{'index': 1, 'value': 1.0}] \n", + "13 -0.792152 [{'index': 2, 'value': 1.0}] \n", + "15 0.513833 [{'index': 2, 'value': 1.0}] \n", + "16 -0.211715 [{'index': 2, 'value': 1.0}] \n", + "23 -1.5177 [{'index': 1, 'value': 1.0}] \n", + "34 0.949161 [{'index': 1, 'value': 1.0}] \n", + "36 1.23938 [{'index': 1, 'value': 1.0}] \n", + "42 -0.429379 [{'index': 1, 'value': 1.0}] \n", + "48 1.021716 [{'index': 2, 'value': 1.0}] \n", + "61 1.457044 [{'index': 2, 'value': 1.0}] \n", + "64 1.09427 [{'index': 1, 'value': 1.0}] \n", + "65 -1.445145 [{'index': 1, 'value': 1.0}] \n", + "68 -1.009817 [{'index': 1, 'value': 1.0}] \n", + "70 0.296168 [{'index': 2, 'value': 1.0}] \n", + "72 -0.719598 [{'index': 2, 'value': 1.0}] \n", + "74 0.586387 [{'index': 1, 'value': 1.0}] \n", + "77 -0.647043 [{'index': 1, 'value': 1.0}] \n", + "81 0.804051 [{'index': 1, 'value': 1.0}] \n", + "91 0.005949 [{'index': 2, 'value': 1.0}] \n", + "96 0.731497 [{'index': 1, 'value': 1.0}] \n", + "105 -1.009817 [{'index': 1, 'value': 1.0}] \n", + "111 -2.098138 [{'index': 1, 'value': 1.0}] \n", + "\n", + " onehotencoded_species \n", + "penguin_id \n", + "1 [{'index': 1, 'value': 1.0}] \n", + "4 [{'index': 1, 'value': 1.0}] \n", + "8 [{'index': 3, 'value': 1.0}] \n", + "11 [{'index': 1, 'value': 1.0}] \n", + "13 [{'index': 1, 'value': 1.0}] \n", + "15 [{'index': 3, 'value': 1.0}] \n", + "16 [{'index': 1, 'value': 1.0}] \n", + "23 [{'index': 1, 'value': 1.0}] \n", + "34 [{'index': 3, 'value': 1.0}] \n", + "36 [{'index': 3, 'value': 1.0}] \n", + "42 [{'index': 1, 'value': 1.0}] \n", + "48 [{'index': 3, 'value': 1.0}] \n", + "61 [{'index': 3, 'value': 1.0}] \n", + "64 [{'index': 3, 'value': 1.0}] \n", + "65 [{'index': 1, 'value': 1.0}] \n", + "68 [{'index': 1, 'value': 1.0}] \n", + "70 [{'index': 2, 'value': 1.0}] \n", + "72 [{'index': 2, 'value': 1.0}] \n", + "74 [{'index': 3, 'value': 1.0}] \n", + "77 [{'index': 1, 'value': 1.0}] \n", + "81 [{'index': 3, 'value': 1.0}] \n", + "91 [{'index': 2, 'value': 1.0}] \n", + "96 [{'index': 3, 'value': 1.0}] \n", + "105 [{'index': 1, 'value': 1.0}] \n", + "111 [{'index': 1, 'value': 1.0}] \n", + "\n", + "[67 rows x 8 columns]" ] }, - "execution_count": 26, + "execution_count": 9, "metadata": {}, "output_type": "execute_result" } @@ -2704,7 +3123,7 @@ }, { "cell_type": "code", - "execution_count": 27, + "execution_count": 10, "metadata": {}, "outputs": [ { @@ -2721,7 +3140,7 @@ " ('linreg', LinearRegression())])" ] }, - "execution_count": 27, + "execution_count": 10, "metadata": {}, "output_type": "execute_result" } @@ -2748,18 +3167,16 @@ }, { "cell_type": "code", - "execution_count": 28, + "execution_count": 11, "metadata": {}, "outputs": [ { "data": { - "application/vnd.jupyter.widget-view+json": { - "model_id": "887bf58cebf14bdba95db828390fd33d", - "version_major": 2, - "version_minor": 0 - }, + "text/html": [ + "Query job b11be0d8-e6f1-41cb-8cb2-25a38e7ef311 is DONE. 24.7 kB processed.
Open Job" + ], "text/plain": [ - "HTML(value='Query job ed42cbb3-3d25-47ca-96c5-71a84e426a8c is RUNNING. " ] }, "metadata": {}, @@ -2767,13 +3184,11 @@ }, { "data": { - "application/vnd.jupyter.widget-view+json": { - "model_id": "24357055792a4eaaa60997fea0f76921", - "version_major": 2, - "version_minor": 0 - }, + "text/html": [ + "Query job f32ea25c-be39-4726-a8f5-604ae83849a6 is DONE. 8.5 kB processed. Open Job" + ], "text/plain": [ - "HTML(value='Query job 3fc74930-03b9-4a49-8ed3-c3edc4dd6e51 is RUNNING. " ] }, "metadata": {}, @@ -2781,13 +3196,11 @@ }, { "data": { - "application/vnd.jupyter.widget-view+json": { - "model_id": "bba878d6d3e345f1a29aea50f7101e8f", - "version_major": 2, - "version_minor": 0 - }, + "text/html": [ + "Query job 86e29b78-76f5-4937-8bde-407b99af04a2 is DONE. 0 Bytes processed. Open Job" + ], "text/plain": [ - "HTML(value='Query job 38a4ce3b-5c2a-4d44-b826-f24529d6500b is RUNNING. " ] }, "metadata": {}, @@ -2795,13 +3208,11 @@ }, { "data": { - "application/vnd.jupyter.widget-view+json": { - "model_id": "4bc2c53aeb7d4a8280f9fbbe373f4b55", - "version_major": 2, - "version_minor": 0 - }, + "text/html": [ + "Query job ca819734-0d41-4d9e-b743-09edae8c7fee is DONE. 29.6 kB processed. Open Job" + ], "text/plain": [ - "HTML(value='Query job ecad776d-77c8-4d94-8186-d5571b512b62 is RUNNING. " ] }, "metadata": {}, @@ -2809,13 +3220,11 @@ }, { "data": { - "application/vnd.jupyter.widget-view+json": { - "model_id": "f4f695cb0a224102b6e26adeb1827981", - "version_major": 2, - "version_minor": 0 - }, + "text/html": [ + "Query job 49bb5bed-cc84-47e0-9a90-08ab01e00548 is DONE. 536 Bytes processed. Open Job" + ], "text/plain": [ - "HTML(value='Query job c9bfc58f-ce2c-47a9-bbc7-b10d9de9b5a6 is DONE. 0 Bytes processed. " ] }, "metadata": {}, @@ -2823,13 +3232,23 @@ }, { "data": { - "application/vnd.jupyter.widget-view+json": { - "model_id": "cb1df595006d485288a1060299970e5e", - "version_major": 2, - "version_minor": 0 - }, + "text/html": [ + "Query job 1e40a085-2289-47dd-afd8-820413186b9f is DONE. 0 Bytes processed. Open Job" + ], "text/plain": [ - "HTML(value='Query job 8fd8036e-3753-433d-975b-c7b42406f648 is RUNNING. " + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "Query job 60319296-a480-4f51-b7ad-190ac6de963a is DONE. 6.2 kB processed. Open Job" + ], + "text/plain": [ + "" ] }, "metadata": {}, @@ -2857,152 +3276,369 @@ " \n", " \n", " predicted_body_mass_g\n", + " island\n", + " culmen_length_mm\n", + " culmen_depth_mm\n", + " flipper_length_mm\n", + " sex\n", + " species\n", " \n", " \n", " penguin_id\n", " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", " \n", " \n", " \n", " \n", - " 3\n", - " 3394.116212\n", - " \n", - " \n", - " 8\n", - " 4048.683645\n", + " 1\n", + " 3781.396682\n", + " Torgersen\n", + " 39.1\n", + " 18.7\n", + " 181.0\n", + " MALE\n", + " Adelie Penguin (Pygoscelis adeliae)\n", " \n", " \n", - " 17\n", - " 3976.452358\n", + " 4\n", + " 4124.102574\n", + " Biscoe\n", + " 43.2\n", + " 19.0\n", + " 197.0\n", + " MALE\n", + " Adelie Penguin (Pygoscelis adeliae)\n", " \n", " \n", - " 23\n", - " 3541.580346\n", + " 8\n", + " 4670.338389\n", + " Biscoe\n", + " 46.5\n", + " 13.5\n", + " 210.0\n", + " FEMALE\n", + " Gentoo penguin (Pygoscelis papua)\n", " \n", " \n", - " 25\n", - " 4032.842027\n", + " 11\n", + " 3529.411644\n", + " Dream\n", + " 38.1\n", + " 18.6\n", + " 190.0\n", + " FEMALE\n", + " Adelie Penguin (Pygoscelis adeliae)\n", " \n", " \n", - " 27\n", - " 4118.34983\n", + " 13\n", + " 4014.09632\n", + " Biscoe\n", + " 37.8\n", + " 20.0\n", + " 190.0\n", + " MALE\n", + " Adelie Penguin (Pygoscelis adeliae)\n", " \n", " \n", - " 29\n", - " 4087.765797\n", + " 15\n", + " 5212.407319\n", + " Biscoe\n", + " 48.7\n", + " 15.7\n", + " 208.0\n", + " MALE\n", + " Gentoo penguin (Pygoscelis papua)\n", " \n", " \n", - " 34\n", - " 3183.75379\n", + " 16\n", + " 4163.590502\n", + " Torgersen\n", + " 34.6\n", + " 21.1\n", + " 198.0\n", + " MALE\n", + " Adelie Penguin (Pygoscelis adeliae)\n", " \n", " \n", - " 35\n", - " 3418.800633\n", + " 23\n", + " 3392.44731\n", + " Dream\n", + " 42.2\n", + " 18.5\n", + " 180.0\n", + " FEMALE\n", + " Adelie Penguin (Pygoscelis adeliae)\n", " \n", " \n", - " 39\n", - " 3519.18471\n", + " 34\n", + " 4698.299674\n", + " Biscoe\n", + " 40.9\n", + " 13.7\n", + " 214.0\n", + " FEMALE\n", + " Gentoo penguin (Pygoscelis papua)\n", " \n", " \n", - " 51\n", - " 3398.133564\n", + " 36\n", + " 4828.221398\n", + " Biscoe\n", + " 43.4\n", + " 14.4\n", + " 218.0\n", + " FEMALE\n", + " Gentoo penguin (Pygoscelis papua)\n", " \n", " \n", - " 52\n", - " 3223.614107\n", + " 42\n", + " 3430.582874\n", + " Biscoe\n", + " 35.5\n", + " 16.2\n", + " 195.0\n", + " FEMALE\n", + " Adelie Penguin (Pygoscelis adeliae)\n", " \n", " \n", - " 60\n", - " 3445.012713\n", + " 48\n", + " 5314.254798\n", + " Biscoe\n", + " 46.3\n", + " 15.8\n", + " 215.0\n", + " MALE\n", + " Gentoo penguin (Pygoscelis papua)\n", " \n", " \n", " 61\n", - " 3505.637004\n", + " 5363.19995\n", + " Biscoe\n", + " 46.2\n", + " 14.9\n", + " 221.0\n", + " MALE\n", + " Gentoo penguin (Pygoscelis papua)\n", " \n", " \n", " 64\n", - " 3515.903779\n", + " 4855.90281\n", + " Biscoe\n", + " 46.4\n", + " 15.0\n", + " 216.0\n", + " FEMALE\n", + " Gentoo penguin (Pygoscelis papua)\n", " \n", " \n", " 65\n", - " 4028.361259\n", + " 3413.094869\n", + " Dream\n", + " 37.6\n", + " 19.3\n", + " 181.0\n", + " FEMALE\n", + " Adelie Penguin (Pygoscelis adeliae)\n", " \n", " \n", - " 67\n", - " 4159.991956\n", + " 68\n", + " 3340.213193\n", + " Torgersen\n", + " 36.2\n", + " 17.2\n", + " 187.0\n", + " FEMALE\n", + " Adelie Penguin (Pygoscelis adeliae)\n", " \n", " \n", - " 83\n", - " 3348.167212\n", + " 70\n", + " 4228.726508\n", + " Dream\n", + " 52.8\n", + " 20.0\n", + " 205.0\n", + " MALE\n", + " Chinstrap penguin (Pygoscelis antarctica)\n", " \n", " \n", - " 85\n", - " 3485.048557\n", + " 72\n", + " 3811.532821\n", + " Dream\n", + " 48.5\n", + " 17.5\n", + " 191.0\n", + " MALE\n", + " Chinstrap penguin (Pygoscelis antarctica)\n", " \n", " \n", - " 93\n", - " 4172.872284\n", + " 74\n", + " 4659.765013\n", + " Biscoe\n", + " 42.8\n", + " 14.2\n", + " 209.0\n", + " FEMALE\n", + " Gentoo penguin (Pygoscelis papua)\n", " \n", " \n", - " 104\n", - " 3299.300454\n", + " 77\n", + " 3453.383042\n", + " Dream\n", + " 37.3\n", + " 16.8\n", + " 192.0\n", + " FEMALE\n", + " Adelie Penguin (Pygoscelis adeliae)\n", " \n", " \n", - " 105\n", - " 3515.68617\n", + " 81\n", + " 4766.239424\n", + " Biscoe\n", + " 45.2\n", + " 14.8\n", + " 212.0\n", + " FEMALE\n", + " Gentoo penguin (Pygoscelis papua)\n", " \n", " \n", - " 108\n", - " 3405.222757\n", + " 91\n", + " 4057.801947\n", + " Dream\n", + " 50.8\n", + " 18.5\n", + " 201.0\n", + " MALE\n", + " Chinstrap penguin (Pygoscelis antarctica)\n", + " \n", + " \n", + " 96\n", + " 4739.821792\n", + " Biscoe\n", + " 45.4\n", + " 14.6\n", + " 211.0\n", + " FEMALE\n", + " Gentoo penguin (Pygoscelis papua)\n", " \n", " \n", - " 113\n", - " 4209.13832\n", + " 105\n", + " 3394.886275\n", + " Biscoe\n", + " 34.5\n", + " 18.1\n", + " 187.0\n", + " FEMALE\n", + " Adelie Penguin (Pygoscelis adeliae)\n", " \n", " \n", - " 130\n", - " 4197.90382\n", + " 111\n", + " 3201.48777\n", + " Biscoe\n", + " 37.9\n", + " 18.6\n", + " 172.0\n", + " FEMALE\n", + " Adelie Penguin (Pygoscelis adeliae)\n", " \n", " \n", "\n", - "

25 rows × 1 columns

\n", - "[67 rows x 1 columns in total]" - ], - "text/plain": [ - " predicted_body_mass_g\n", - "penguin_id \n", - "3 3394.116212\n", - "8 4048.683645\n", - "17 3976.452358\n", - "23 3541.580346\n", - "25 4032.842027\n", - "27 4118.34983\n", - "29 4087.765797\n", - "34 3183.75379\n", - "35 3418.800633\n", - "39 3519.18471\n", - "51 3398.133564\n", - "52 3223.614107\n", - "60 3445.012713\n", - "61 3505.637004\n", - "64 3515.903779\n", - "65 4028.361259\n", - "67 4159.991956\n", - "83 3348.167212\n", - "85 3485.048557\n", - "93 4172.872284\n", - "104 3299.300454\n", - "105 3515.68617\n", - "108 3405.222757\n", - "113 4209.13832\n", - "130 4197.90382\n", - "...\n", + "

25 rows × 7 columns

\n", + "[67 rows x 7 columns in total]" + ], + "text/plain": [ + " predicted_body_mass_g island culmen_length_mm \\\n", + "penguin_id \n", + "1 3781.396682 Torgersen 39.1 \n", + "4 4124.102574 Biscoe 43.2 \n", + "8 4670.338389 Biscoe 46.5 \n", + "11 3529.411644 Dream 38.1 \n", + "13 4014.09632 Biscoe 37.8 \n", + "15 5212.407319 Biscoe 48.7 \n", + "16 4163.590502 Torgersen 34.6 \n", + "23 3392.44731 Dream 42.2 \n", + "34 4698.299674 Biscoe 40.9 \n", + "36 4828.221398 Biscoe 43.4 \n", + "42 3430.582874 Biscoe 35.5 \n", + "48 5314.254798 Biscoe 46.3 \n", + "61 5363.19995 Biscoe 46.2 \n", + "64 4855.90281 Biscoe 46.4 \n", + "65 3413.094869 Dream 37.6 \n", + "68 3340.213193 Torgersen 36.2 \n", + "70 4228.726508 Dream 52.8 \n", + "72 3811.532821 Dream 48.5 \n", + "74 4659.765013 Biscoe 42.8 \n", + "77 3453.383042 Dream 37.3 \n", + "81 4766.239424 Biscoe 45.2 \n", + "91 4057.801947 Dream 50.8 \n", + "96 4739.821792 Biscoe 45.4 \n", + "105 3394.886275 Biscoe 34.5 \n", + "111 3201.48777 Biscoe 37.9 \n", "\n", - "[67 rows x 1 columns]" + " culmen_depth_mm flipper_length_mm sex \\\n", + "penguin_id \n", + "1 18.7 181.0 MALE \n", + "4 19.0 197.0 MALE \n", + "8 13.5 210.0 FEMALE \n", + "11 18.6 190.0 FEMALE \n", + "13 20.0 190.0 MALE \n", + "15 15.7 208.0 MALE \n", + "16 21.1 198.0 MALE \n", + "23 18.5 180.0 FEMALE \n", + "34 13.7 214.0 FEMALE \n", + "36 14.4 218.0 FEMALE \n", + "42 16.2 195.0 FEMALE \n", + "48 15.8 215.0 MALE \n", + "61 14.9 221.0 MALE \n", + "64 15.0 216.0 FEMALE \n", + "65 19.3 181.0 FEMALE \n", + "68 17.2 187.0 FEMALE \n", + "70 20.0 205.0 MALE \n", + "72 17.5 191.0 MALE \n", + "74 14.2 209.0 FEMALE \n", + "77 16.8 192.0 FEMALE \n", + "81 14.8 212.0 FEMALE \n", + "91 18.5 201.0 MALE \n", + "96 14.6 211.0 FEMALE \n", + "105 18.1 187.0 FEMALE \n", + "111 18.6 172.0 FEMALE \n", + "\n", + " species \n", + "penguin_id \n", + "1 Adelie Penguin (Pygoscelis adeliae) \n", + "4 Adelie Penguin (Pygoscelis adeliae) \n", + "8 Gentoo penguin (Pygoscelis papua) \n", + "11 Adelie Penguin (Pygoscelis adeliae) \n", + "13 Adelie Penguin (Pygoscelis adeliae) \n", + "15 Gentoo penguin (Pygoscelis papua) \n", + "16 Adelie Penguin (Pygoscelis adeliae) \n", + "23 Adelie Penguin (Pygoscelis adeliae) \n", + "34 Gentoo penguin (Pygoscelis papua) \n", + "36 Gentoo penguin (Pygoscelis papua) \n", + "42 Adelie Penguin (Pygoscelis adeliae) \n", + "48 Gentoo penguin (Pygoscelis papua) \n", + "61 Gentoo penguin (Pygoscelis papua) \n", + "64 Gentoo penguin (Pygoscelis papua) \n", + "65 Adelie Penguin (Pygoscelis adeliae) \n", + "68 Adelie Penguin (Pygoscelis adeliae) \n", + "70 Chinstrap penguin (Pygoscelis antarctica) \n", + "72 Chinstrap penguin (Pygoscelis antarctica) \n", + "74 Gentoo penguin (Pygoscelis papua) \n", + "77 Adelie Penguin (Pygoscelis adeliae) \n", + "81 Gentoo penguin (Pygoscelis papua) \n", + "91 Chinstrap penguin (Pygoscelis antarctica) \n", + "96 Gentoo penguin (Pygoscelis papua) \n", + "105 Adelie Penguin (Pygoscelis adeliae) \n", + "111 Adelie Penguin (Pygoscelis adeliae) \n", + "\n", + "[67 rows x 7 columns]" ] }, - "execution_count": 28, + "execution_count": 11, "metadata": {}, "output_type": "execute_result" } @@ -3034,60 +3670,16 @@ }, { "cell_type": "code", - "execution_count": 29, + "execution_count": 12, "metadata": {}, "outputs": [ { "data": { - "application/vnd.jupyter.widget-view+json": { - "model_id": "2d32081be31f44abb8de67e2209d76cd", - "version_major": 2, - "version_minor": 0 - }, - "text/plain": [ - "HTML(value='Query job 2a043039-670f-4eb8-9cf0-765ee6ed7de6 is RUNNING. Open Job" + ], "text/plain": [ - "HTML(value='Query job bc8b2042-1e13-441c-9531-300ed5badb7a is RUNNING. " ] }, "metadata": {}, @@ -3095,13 +3687,11 @@ }, { "data": { - "application/vnd.jupyter.widget-view+json": { - "model_id": "4588ae10de634460bf4026ddd9076351", - "version_major": 2, - "version_minor": 0 - }, + "text/html": [ + "Query job 7f1f565b-0f73-4a4e-b33f-8484fa260838 is DONE. 0 Bytes processed. Open Job" + ], "text/plain": [ - "HTML(value='Query job 5e867182-dd7a-4aff-87a8-f7596e900fd5 is DONE. 0 Bytes processed. " ] }, "metadata": {}, @@ -3109,13 +3699,11 @@ }, { "data": { - "application/vnd.jupyter.widget-view+json": { - "model_id": "8209cf8286a545ebb7b6ef9d002a43a1", - "version_major": 2, - "version_minor": 0 - }, + "text/html": [ + "Query job d4b9d4a6-d75e-46e1-b092-ab58e8aef890 is DONE. 48 Bytes processed. Open Job" + ], "text/plain": [ - "HTML(value='Query job d4cdb016-8f1e-4960-8ed7-4524ccc5a8a8 is RUNNING. " ] }, "metadata": {}, @@ -3153,12 +3741,12 @@ " \n", " \n", " 0\n", - " 229.48269\n", - " 82962.794947\n", - " 0.004248\n", - " 206.728384\n", - " 0.88633\n", - " 0.892953\n", + " 216.444357\n", + " 72639.698707\n", + " 0.00463\n", + " 170.588356\n", + " 0.896396\n", + " 0.900547\n", " \n", " \n", "\n", @@ -3167,15 +3755,15 @@ ], "text/plain": [ " mean_absolute_error mean_squared_error mean_squared_log_error \\\n", - "0 229.48269 82962.794947 0.004248 \n", + "0 216.444357 72639.698707 0.00463 \n", "\n", " median_absolute_error r2_score explained_variance \n", - "0 206.728384 0.88633 0.892953 \n", + "0 170.588356 0.896396 0.900547 \n", "\n", "[1 rows x 6 columns]" ] }, - "execution_count": 29, + "execution_count": 12, "metadata": {}, "output_type": "execute_result" } @@ -3195,18 +3783,16 @@ }, { "cell_type": "code", - "execution_count": 30, + "execution_count": 14, "metadata": {}, "outputs": [ { "data": { - "application/vnd.jupyter.widget-view+json": { - "model_id": "f32692d89f00406499f4ea5aa55268fb", - "version_major": 2, - "version_minor": 0 - }, + "text/html": [ + "Query job 73448ee8-698b-435f-b11e-6fe2de3bcd8d is DONE. 28.9 kB processed. Open Job" + ], "text/plain": [ - "HTML(value='Query job e57383ef-f043-458b-96c6-893e7c5b0c00 is RUNNING. " ] }, "metadata": {}, @@ -3214,13 +3800,11 @@ }, { "data": { - "application/vnd.jupyter.widget-view+json": { - "model_id": "72e5f23a99de4a818c8493b8b4f3854d", - "version_major": 2, - "version_minor": 0 - }, + "text/html": [ + "Query job e002f59d-a03c-4ec9-a85a-93adbfd7bd17 is DONE. 28.9 kB processed. Open Job" + ], "text/plain": [ - "HTML(value='Query job 1a9db485-477b-43e2-94eb-dea7dc21d45d is RUNNING. " ] }, "metadata": {}, @@ -3228,13 +3812,11 @@ }, { "data": { - "application/vnd.jupyter.widget-view+json": { - "model_id": "9d5333a91b504dd9be51c997715530ab", - "version_major": 2, - "version_minor": 0 - }, + "text/html": [ + "Query job 4ab1febc-fb55-473a-b295-69e4329cc5f0 is DONE. 30.0 kB processed. Open Job" + ], "text/plain": [ - "HTML(value='Query job 4570a563-b8e0-4308-b8cb-c4731491d4f7 is RUNNING. " ] }, "metadata": {}, @@ -3243,10 +3825,10 @@ { "data": { "text/plain": [ - "0.8863300923278365" + "0.8963962044533755" ] }, - "execution_count": 30, + "execution_count": 14, "metadata": {}, "output_type": "execute_result" } @@ -3254,7 +3836,7 @@ "source": [ "from bigframes.ml.metrics import r2_score\n", "\n", - "r2_score(y_test, predicted_y_test)" + "r2_score(y_test, predicted_y_test[\"predicted_body_mass_g\"])" ] }, { @@ -3274,57 +3856,9 @@ }, { "cell_type": "code", - "execution_count": 33, + "execution_count": 15, "metadata": {}, - "outputs": [ - { - "data": { - "application/vnd.jupyter.widget-view+json": { - "model_id": "fbc4a70f31d4465b974a7f7c9cc97731", - "version_major": 2, - "version_minor": 0 - }, - "text/plain": [ - "HTML(value='Copy job c2413be4-6972-4e36-8234-5063628b6d71 is RUNNING. Date: Thu, 16 Nov 2023 01:35:06 +0000 Subject: [PATCH 5/5] update docs --- bigframes/ml/llm.py | 5 +++-- third_party/bigframes_vendored/sklearn/linear_model/_base.py | 3 +-- third_party/bigframes_vendored/xgboost/sklearn.py | 2 +- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/bigframes/ml/llm.py b/bigframes/ml/llm.py index fde8ed0ee0..93e2ba825f 100644 --- a/bigframes/ml/llm.py +++ b/bigframes/ml/llm.py @@ -149,7 +149,8 @@ def predict( Returns: - bigframes.dataframe.DataFrame: Output DataFrame with only 1 column as the output text results.""" + bigframes.dataframe.DataFrame: DataFrame of shape (n_samples, n_input_columns + n_prediction_columns). Returns predicted values. + """ # Params reference: https://cloud.google.com/vertex-ai/docs/generative-ai/learn/models if temperature < 0.0 or temperature > 1.0: @@ -265,7 +266,7 @@ def predict(self, X: Union[bpd.DataFrame, bpd.Series]) -> bpd.DataFrame: Input DataFrame, which needs to contain a column with name "content". Only the column will be used as input. Content can include preamble, questions, suggestions, instructions, or examples. Returns: - bigframes.dataframe.DataFrame: Output DataFrame with only 1 column as the output embedding results + bigframes.dataframe.DataFrame: DataFrame of shape (n_samples, n_input_columns + n_prediction_columns). Returns predicted values. """ # Params reference: https://cloud.google.com/vertex-ai/docs/generative-ai/learn/models diff --git a/third_party/bigframes_vendored/sklearn/linear_model/_base.py b/third_party/bigframes_vendored/sklearn/linear_model/_base.py index 8113298877..ab946e5861 100644 --- a/third_party/bigframes_vendored/sklearn/linear_model/_base.py +++ b/third_party/bigframes_vendored/sklearn/linear_model/_base.py @@ -49,8 +49,7 @@ def predict(self, X): which we want to get the predictions. Returns: - bigframes.dataframe.DataFrame: DataFrame of shape (n_samples,), containing - the class labels for each sample. + bigframes.dataframe.DataFrame: DataFrame of shape (n_samples, n_input_columns + n_prediction_columns). Returns predicted values. """ raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE) diff --git a/third_party/bigframes_vendored/xgboost/sklearn.py b/third_party/bigframes_vendored/xgboost/sklearn.py index b7b43b85a3..dfd0ba7356 100644 --- a/third_party/bigframes_vendored/xgboost/sklearn.py +++ b/third_party/bigframes_vendored/xgboost/sklearn.py @@ -18,7 +18,7 @@ def predict(self, X): Series or DataFrame of shape (n_samples, n_features). Samples. Returns: - DataFrame of shape (n_samples,): Returns predicted values. + bigframes.dataframe.DataFrame: DataFrame of shape (n_samples, n_input_columns + n_prediction_columns). Returns predicted values. """ raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE)