25 rows × 1 columns
\n", - "[67 rows x 1 columns in total]" - ], - "text/plain": [ - " predicted_body_mass_g\n", - "penguin_id \n", - "3 3394.118128\n", - "8 4048.685642\n", - "17 3976.454093\n", - "23 3541.582194\n", - "25 4032.844186\n", - "27 4118.351772\n", - "29 4087.767826\n", - "34 3183.755249\n", - "35 3418.802274\n", - "39 3519.186468\n", - "51 3398.135365\n", - "52 3223.615957\n", - "60 3445.014718\n", - "61 3505.638864\n", - "64 3515.905786\n", - "65 4028.363185\n", - "67 4159.993943\n", - "83 3348.16883\n", - "85 3485.050273\n", - "93 4172.874548\n", - "104 3299.302424\n", - "105 3515.687917\n", - "108 3405.224618\n", - "113 4209.140425\n", - "130 4197.905737\n", - "...\n", + "25 rows × 7 columns
\n", + "[67 rows x 7 columns in total]" + ], + "text/plain": [ + " predicted_body_mass_g onehotencoded_island \\\n", + "penguin_id \n", + "1 3781.402407 [{'index': 3, 'value': 1.0}] \n", + "4 4124.107944 [{'index': 1, 'value': 1.0}] \n", + "8 4670.344196 [{'index': 1, 'value': 1.0}] \n", + "11 3529.417214 [{'index': 2, 'value': 1.0}] \n", + "13 4014.101714 [{'index': 1, 'value': 1.0}] \n", + "15 5212.41288 [{'index': 1, 'value': 1.0}] \n", + "16 4163.595615 [{'index': 3, 'value': 1.0}] \n", + "23 3392.453069 [{'index': 2, 'value': 1.0}] \n", + "34 4698.305397 [{'index': 1, 'value': 1.0}] \n", + "36 4828.226949 [{'index': 1, 'value': 1.0}] \n", + "42 3430.58866 [{'index': 1, 'value': 1.0}] \n", + "48 5314.260221 [{'index': 1, 'value': 1.0}] \n", + "61 5363.205372 [{'index': 1, 'value': 1.0}] \n", + "64 4855.908314 [{'index': 1, 'value': 1.0}] \n", + "65 3413.100524 [{'index': 2, 'value': 1.0}] \n", + "68 3340.219002 [{'index': 3, 'value': 1.0}] \n", + "70 4228.73157 [{'index': 2, 'value': 1.0}] \n", + "72 3811.538478 [{'index': 2, 'value': 1.0}] \n", + "74 4659.770763 [{'index': 1, 'value': 1.0}] \n", + "77 3453.388804 [{'index': 2, 'value': 1.0}] \n", + "81 4766.245033 [{'index': 1, 'value': 1.0}] \n", + "91 4057.807281 [{'index': 2, 'value': 1.0}] \n", + "96 4739.827445 [{'index': 1, 'value': 1.0}] \n", + "105 3394.891976 [{'index': 1, 'value': 1.0}] \n", + "111 3201.493683 [{'index': 1, 'value': 1.0}] \n", "\n", - "[67 rows x 1 columns]" + " standard_scaled_culmen_length_mm standard_scaled_culmen_depth_mm \\\n", + "penguin_id \n", + "1 -0.938587 0.748033 \n", + "4 -0.16745 0.899528 \n", + "8 0.453222 -1.877885 \n", + "11 -1.12667 0.697535 \n", + "13 -1.183094 1.404513 \n", + "15 0.867003 -0.766919 \n", + "16 -1.784958 1.959995 \n", + "23 -0.355532 0.647036 \n", + "34 -0.600039 -1.776888 \n", + "36 -0.129833 -1.423399 \n", + "42 -1.615684 -0.514427 \n", + "48 0.415606 -0.716421 \n", + "61 0.396797 -1.170907 \n", + "64 0.434414 -1.120408 \n", + "65 -1.220711 1.051024 \n", + "68 -1.484026 -0.009443 \n", + "70 1.638141 1.404513 \n", + "72 0.829387 0.142052 \n", + "74 -0.242683 -1.524396 \n", + "77 -1.277136 -0.211437 \n", + "81 0.208715 -1.221405 \n", + "91 1.261976 0.647036 \n", + "96 0.246331 -1.322402 \n", + "105 -1.803766 0.445043 \n", + "111 -1.164286 0.697535 \n", + "\n", + " standard_scaled_flipper_length_mm onehotencoded_sex \\\n", + "penguin_id \n", + "1 -1.445145 [{'index': 2, 'value': 1.0}] \n", + "4 -0.284269 [{'index': 2, 'value': 1.0}] \n", + "8 0.658942 [{'index': 1, 'value': 1.0}] \n", + "11 -0.792152 [{'index': 1, 'value': 1.0}] \n", + "13 -0.792152 [{'index': 2, 'value': 1.0}] \n", + "15 0.513833 [{'index': 2, 'value': 1.0}] \n", + "16 -0.211715 [{'index': 2, 'value': 1.0}] \n", + "23 -1.5177 [{'index': 1, 'value': 1.0}] \n", + "34 0.949161 [{'index': 1, 'value': 1.0}] \n", + "36 1.23938 [{'index': 1, 'value': 1.0}] \n", + "42 -0.429379 [{'index': 1, 'value': 1.0}] \n", + "48 1.021716 [{'index': 2, 'value': 1.0}] \n", + "61 1.457044 [{'index': 2, 'value': 1.0}] \n", + "64 1.09427 [{'index': 1, 'value': 1.0}] \n", + "65 -1.445145 [{'index': 1, 'value': 1.0}] \n", + "68 -1.009817 [{'index': 1, 'value': 1.0}] \n", + "70 0.296168 [{'index': 2, 'value': 1.0}] \n", + "72 -0.719598 [{'index': 2, 'value': 1.0}] \n", + "74 0.586387 [{'index': 1, 'value': 1.0}] \n", + "77 -0.647043 [{'index': 1, 'value': 1.0}] \n", + "81 0.804051 [{'index': 1, 'value': 1.0}] \n", + "91 0.005949 [{'index': 2, 'value': 1.0}] \n", + "96 0.731497 [{'index': 1, 'value': 1.0}] \n", + "105 -1.009817 [{'index': 1, 'value': 1.0}] \n", + "111 -2.098138 [{'index': 1, 'value': 1.0}] \n", + "\n", + " onehotencoded_species \n", + "penguin_id \n", + "1 [{'index': 1, 'value': 1.0}] \n", + "4 [{'index': 1, 'value': 1.0}] \n", + "8 [{'index': 3, 'value': 1.0}] \n", + "11 [{'index': 1, 'value': 1.0}] \n", + "13 [{'index': 1, 'value': 1.0}] \n", + "15 [{'index': 3, 'value': 1.0}] \n", + "16 [{'index': 1, 'value': 1.0}] \n", + "23 [{'index': 1, 'value': 1.0}] \n", + "34 [{'index': 3, 'value': 1.0}] \n", + "36 [{'index': 3, 'value': 1.0}] \n", + "42 [{'index': 1, 'value': 1.0}] \n", + "48 [{'index': 3, 'value': 1.0}] \n", + "61 [{'index': 3, 'value': 1.0}] \n", + "64 [{'index': 3, 'value': 1.0}] \n", + "65 [{'index': 1, 'value': 1.0}] \n", + "68 [{'index': 1, 'value': 1.0}] \n", + "70 [{'index': 2, 'value': 1.0}] \n", + "72 [{'index': 2, 'value': 1.0}] \n", + "74 [{'index': 3, 'value': 1.0}] \n", + "77 [{'index': 1, 'value': 1.0}] \n", + "81 [{'index': 3, 'value': 1.0}] \n", + "91 [{'index': 2, 'value': 1.0}] \n", + "96 [{'index': 3, 'value': 1.0}] \n", + "105 [{'index': 1, 'value': 1.0}] \n", + "111 [{'index': 1, 'value': 1.0}] \n", + "\n", + "[67 rows x 7 columns]" ] }, - "execution_count": 25, + "execution_count": 8, "metadata": {}, "output_type": "execute_result" } @@ -2423,18 +2554,16 @@ }, { "cell_type": "code", - "execution_count": 26, + "execution_count": 9, "metadata": {}, "outputs": [ { "data": { - "application/vnd.jupyter.widget-view+json": { - "model_id": "d7a16e04253a42b7a5ce247d8f63b656", - "version_major": 2, - "version_minor": 0 - }, + "text/html": [ + "Query job 027042f1-9a18-43d8-a378-ab9410e395b1 is DONE. 23.5 kB processed. Open Job" + ], "text/plain": [ - "HTML(value='Query job 6f19614c-82c0-4f8b-b74b-9d91a894efdd is RUNNING. " ] }, "metadata": {}, @@ -2442,13 +2571,11 @@ }, { "data": { - "application/vnd.jupyter.widget-view+json": { - "model_id": "4a99ac15431e433595de1040872a4558", - "version_major": 2, - "version_minor": 0 - }, + "text/html": [ + "Query job 6c8484a0-a504-4e50-93d6-3d247c9ff558 is DONE. 0 Bytes processed. Open Job" + ], "text/plain": [ - "HTML(value='Query job 51899e2d-f6ef-4e62-98b6-c11550f74f4b is RUNNING. " ] }, "metadata": {}, @@ -2456,13 +2583,11 @@ }, { "data": { - "application/vnd.jupyter.widget-view+json": { - "model_id": "90909b620e084f59b0f9da266257593f", - "version_major": 2, - "version_minor": 0 - }, + "text/html": [ + "Query job e81ca2de-df2e-41ec-af86-14f8dcec1b44 is DONE. 6.2 kB processed. Open Job" + ], "text/plain": [ - "HTML(value='Query job 44d3fddc-74bc-4de0-a458-2c73b38f74fb is RUNNING. " ] }, "metadata": {}, @@ -2470,13 +2595,11 @@ }, { "data": { - "application/vnd.jupyter.widget-view+json": { - "model_id": "2a9c2c05041a4fb691809bab5310bb05", - "version_major": 2, - "version_minor": 0 - }, + "text/html": [ + "Query job 3e6d413c-f8c4-4390-95eb-3a1f5bc59aed is DONE. 536 Bytes processed. Open Job" + ], "text/plain": [ - "HTML(value='Query job 33584475-f02b-4c98-9a51-e29996f4f950 is RUNNING. " ] }, "metadata": {}, @@ -2484,13 +2607,11 @@ }, { "data": { - "application/vnd.jupyter.widget-view+json": { - "model_id": "6b0677c228d54b409c66e5dfa98d7e00", - "version_major": 2, - "version_minor": 0 - }, + "text/html": [ + "Query job e448220d-0c50-45b7-bcbe-d1159b3d18ce is DONE. 0 Bytes processed. Open Job" + ], "text/plain": [ - "HTML(value='Query job df25ba49-280e-424d-a357-dde71a9b35dd is DONE. 0 Bytes processed. " ] }, "metadata": {}, @@ -2498,13 +2619,11 @@ }, { "data": { - "application/vnd.jupyter.widget-view+json": { - "model_id": "379ae6497fb34f969d21b2cd664e8bfa", - "version_major": 2, - "version_minor": 0 - }, + "text/html": [ + "Query job e167a234-828d-4f05-8654-63cf97e50ba3 is DONE. 10.2 kB processed. Open Job" + ], "text/plain": [ - "HTML(value='Query job 6f92a04e-af7e-41d6-9303-6366c1751294 is RUNNING. " ] }, "metadata": {}, @@ -2532,152 +2651,452 @@ "25 rows × 1 columns
\n", - "[67 rows x 1 columns in total]" + "25 rows × 8 columns
\n", + "[67 rows x 8 columns in total]" ], "text/plain": [ - " CENTROID_ID\n", - "penguin_id \n", - "3 3\n", - "8 3\n", - "17 3\n", - "23 1\n", - "25 3\n", - "27 3\n", - "29 3\n", - "34 3\n", - "35 1\n", - "39 3\n", - "51 1\n", - "52 3\n", - "60 3\n", - "61 3\n", - "64 1\n", - "65 1\n", - "67 3\n", - "83 3\n", - "85 1\n", - "93 1\n", - "104 3\n", - "105 1\n", - "108 3\n", - "113 3\n", - "130 1\n", - "...\n", + " CENTROID_ID NEAREST_CENTROIDS_DISTANCE \\\n", + "penguin_id \n", + "1 3 [{'CENTROID_ID': 3, 'DISTANCE': 1.236380597035... \n", + "4 3 [{'CENTROID_ID': 3, 'DISTANCE': 1.039497631856... \n", + "8 1 [{'CENTROID_ID': 1, 'DISTANCE': 1.171040485975... \n", + "11 2 [{'CENTROID_ID': 2, 'DISTANCE': 0.969102754012... \n", + "13 3 [{'CENTROID_ID': 3, 'DISTANCE': 1.113138945949... \n", + "15 1 [{'CENTROID_ID': 1, 'DISTANCE': 1.070996026772... \n", + "16 3 [{'CENTROID_ID': 3, 'DISTANCE': 1.780136190720... \n", + "23 2 [{'CENTROID_ID': 2, 'DISTANCE': 1.382540667483... \n", + "34 1 [{'CENTROID_ID': 1, 'DISTANCE': 1.598627908302... \n", + "36 1 [{'CENTROID_ID': 1, 'DISTANCE': 1.095162305190... \n", + "42 2 [{'CENTROID_ID': 2, 'DISTANCE': 1.275841743930... \n", + "48 1 [{'CENTROID_ID': 1, 'DISTANCE': 0.882209023196... \n", + "61 1 [{'CENTROID_ID': 1, 'DISTANCE': 0.816202832282... \n", + "64 1 [{'CENTROID_ID': 1, 'DISTANCE': 0.735435721625... \n", + "65 2 [{'CENTROID_ID': 2, 'DISTANCE': 1.292559869148... \n", + "68 2 [{'CENTROID_ID': 2, 'DISTANCE': 0.876430138449... \n", + "70 4 [{'CENTROID_ID': 4, 'DISTANCE': 1.314229913955... \n", + "72 4 [{'CENTROID_ID': 4, 'DISTANCE': 0.938569518009... \n", + "74 1 [{'CENTROID_ID': 1, 'DISTANCE': 1.350320088546... \n", + "77 2 [{'CENTROID_ID': 2, 'DISTANCE': 0.904806634663... \n", + "81 1 [{'CENTROID_ID': 1, 'DISTANCE': 0.919082578073... \n", + "91 4 [{'CENTROID_ID': 4, 'DISTANCE': 0.760360038086... \n", + "96 1 [{'CENTROID_ID': 1, 'DISTANCE': 0.950188657227... \n", + "105 2 [{'CENTROID_ID': 2, 'DISTANCE': 1.101316467029... \n", + "111 2 [{'CENTROID_ID': 2, 'DISTANCE': 1.549061068385... \n", "\n", - "[67 rows x 1 columns]" + " onehotencoded_island standard_scaled_culmen_length_mm \\\n", + "penguin_id \n", + "1 [{'index': 3, 'value': 1.0}] -0.938587 \n", + "4 [{'index': 1, 'value': 1.0}] -0.16745 \n", + "8 [{'index': 1, 'value': 1.0}] 0.453222 \n", + "11 [{'index': 2, 'value': 1.0}] -1.12667 \n", + "13 [{'index': 1, 'value': 1.0}] -1.183094 \n", + "15 [{'index': 1, 'value': 1.0}] 0.867003 \n", + "16 [{'index': 3, 'value': 1.0}] -1.784958 \n", + "23 [{'index': 2, 'value': 1.0}] -0.355532 \n", + "34 [{'index': 1, 'value': 1.0}] -0.600039 \n", + "36 [{'index': 1, 'value': 1.0}] -0.129833 \n", + "42 [{'index': 1, 'value': 1.0}] -1.615684 \n", + "48 [{'index': 1, 'value': 1.0}] 0.415606 \n", + "61 [{'index': 1, 'value': 1.0}] 0.396797 \n", + "64 [{'index': 1, 'value': 1.0}] 0.434414 \n", + "65 [{'index': 2, 'value': 1.0}] -1.220711 \n", + "68 [{'index': 3, 'value': 1.0}] -1.484026 \n", + "70 [{'index': 2, 'value': 1.0}] 1.638141 \n", + "72 [{'index': 2, 'value': 1.0}] 0.829387 \n", + "74 [{'index': 1, 'value': 1.0}] -0.242683 \n", + "77 [{'index': 2, 'value': 1.0}] -1.277136 \n", + "81 [{'index': 1, 'value': 1.0}] 0.208715 \n", + "91 [{'index': 2, 'value': 1.0}] 1.261976 \n", + "96 [{'index': 1, 'value': 1.0}] 0.246331 \n", + "105 [{'index': 1, 'value': 1.0}] -1.803766 \n", + "111 [{'index': 1, 'value': 1.0}] -1.164286 \n", + "\n", + " standard_scaled_culmen_depth_mm \\\n", + "penguin_id \n", + "1 0.748033 \n", + "4 0.899528 \n", + "8 -1.877885 \n", + "11 0.697535 \n", + "13 1.404513 \n", + "15 -0.766919 \n", + "16 1.959995 \n", + "23 0.647036 \n", + "34 -1.776888 \n", + "36 -1.423399 \n", + "42 -0.514427 \n", + "48 -0.716421 \n", + "61 -1.170907 \n", + "64 -1.120408 \n", + "65 1.051024 \n", + "68 -0.009443 \n", + "70 1.404513 \n", + "72 0.142052 \n", + "74 -1.524396 \n", + "77 -0.211437 \n", + "81 -1.221405 \n", + "91 0.647036 \n", + "96 -1.322402 \n", + "105 0.445043 \n", + "111 0.697535 \n", + "\n", + " standard_scaled_flipper_length_mm onehotencoded_sex \\\n", + "penguin_id \n", + "1 -1.445145 [{'index': 2, 'value': 1.0}] \n", + "4 -0.284269 [{'index': 2, 'value': 1.0}] \n", + "8 0.658942 [{'index': 1, 'value': 1.0}] \n", + "11 -0.792152 [{'index': 1, 'value': 1.0}] \n", + "13 -0.792152 [{'index': 2, 'value': 1.0}] \n", + "15 0.513833 [{'index': 2, 'value': 1.0}] \n", + "16 -0.211715 [{'index': 2, 'value': 1.0}] \n", + "23 -1.5177 [{'index': 1, 'value': 1.0}] \n", + "34 0.949161 [{'index': 1, 'value': 1.0}] \n", + "36 1.23938 [{'index': 1, 'value': 1.0}] \n", + "42 -0.429379 [{'index': 1, 'value': 1.0}] \n", + "48 1.021716 [{'index': 2, 'value': 1.0}] \n", + "61 1.457044 [{'index': 2, 'value': 1.0}] \n", + "64 1.09427 [{'index': 1, 'value': 1.0}] \n", + "65 -1.445145 [{'index': 1, 'value': 1.0}] \n", + "68 -1.009817 [{'index': 1, 'value': 1.0}] \n", + "70 0.296168 [{'index': 2, 'value': 1.0}] \n", + "72 -0.719598 [{'index': 2, 'value': 1.0}] \n", + "74 0.586387 [{'index': 1, 'value': 1.0}] \n", + "77 -0.647043 [{'index': 1, 'value': 1.0}] \n", + "81 0.804051 [{'index': 1, 'value': 1.0}] \n", + "91 0.005949 [{'index': 2, 'value': 1.0}] \n", + "96 0.731497 [{'index': 1, 'value': 1.0}] \n", + "105 -1.009817 [{'index': 1, 'value': 1.0}] \n", + "111 -2.098138 [{'index': 1, 'value': 1.0}] \n", + "\n", + " onehotencoded_species \n", + "penguin_id \n", + "1 [{'index': 1, 'value': 1.0}] \n", + "4 [{'index': 1, 'value': 1.0}] \n", + "8 [{'index': 3, 'value': 1.0}] \n", + "11 [{'index': 1, 'value': 1.0}] \n", + "13 [{'index': 1, 'value': 1.0}] \n", + "15 [{'index': 3, 'value': 1.0}] \n", + "16 [{'index': 1, 'value': 1.0}] \n", + "23 [{'index': 1, 'value': 1.0}] \n", + "34 [{'index': 3, 'value': 1.0}] \n", + "36 [{'index': 3, 'value': 1.0}] \n", + "42 [{'index': 1, 'value': 1.0}] \n", + "48 [{'index': 3, 'value': 1.0}] \n", + "61 [{'index': 3, 'value': 1.0}] \n", + "64 [{'index': 3, 'value': 1.0}] \n", + "65 [{'index': 1, 'value': 1.0}] \n", + "68 [{'index': 1, 'value': 1.0}] \n", + "70 [{'index': 2, 'value': 1.0}] \n", + "72 [{'index': 2, 'value': 1.0}] \n", + "74 [{'index': 3, 'value': 1.0}] \n", + "77 [{'index': 1, 'value': 1.0}] \n", + "81 [{'index': 3, 'value': 1.0}] \n", + "91 [{'index': 2, 'value': 1.0}] \n", + "96 [{'index': 3, 'value': 1.0}] \n", + "105 [{'index': 1, 'value': 1.0}] \n", + "111 [{'index': 1, 'value': 1.0}] \n", + "\n", + "[67 rows x 8 columns]" ] }, - "execution_count": 26, + "execution_count": 9, "metadata": {}, "output_type": "execute_result" } @@ -2704,7 +3123,7 @@ }, { "cell_type": "code", - "execution_count": 27, + "execution_count": 10, "metadata": {}, "outputs": [ { @@ -2721,7 +3140,7 @@ " ('linreg', LinearRegression())])" ] }, - "execution_count": 27, + "execution_count": 10, "metadata": {}, "output_type": "execute_result" } @@ -2748,18 +3167,16 @@ }, { "cell_type": "code", - "execution_count": 28, + "execution_count": 11, "metadata": {}, "outputs": [ { "data": { - "application/vnd.jupyter.widget-view+json": { - "model_id": "887bf58cebf14bdba95db828390fd33d", - "version_major": 2, - "version_minor": 0 - }, + "text/html": [ + "Query job b11be0d8-e6f1-41cb-8cb2-25a38e7ef311 is DONE. 24.7 kB processed. Open Job" + ], "text/plain": [ - "HTML(value='Query job ed42cbb3-3d25-47ca-96c5-71a84e426a8c is RUNNING. " ] }, "metadata": {}, @@ -2767,13 +3184,11 @@ }, { "data": { - "application/vnd.jupyter.widget-view+json": { - "model_id": "24357055792a4eaaa60997fea0f76921", - "version_major": 2, - "version_minor": 0 - }, + "text/html": [ + "Query job f32ea25c-be39-4726-a8f5-604ae83849a6 is DONE. 8.5 kB processed. Open Job" + ], "text/plain": [ - "HTML(value='Query job 3fc74930-03b9-4a49-8ed3-c3edc4dd6e51 is RUNNING. " ] }, "metadata": {}, @@ -2781,13 +3196,11 @@ }, { "data": { - "application/vnd.jupyter.widget-view+json": { - "model_id": "bba878d6d3e345f1a29aea50f7101e8f", - "version_major": 2, - "version_minor": 0 - }, + "text/html": [ + "Query job 86e29b78-76f5-4937-8bde-407b99af04a2 is DONE. 0 Bytes processed. Open Job" + ], "text/plain": [ - "HTML(value='Query job 38a4ce3b-5c2a-4d44-b826-f24529d6500b is RUNNING. " ] }, "metadata": {}, @@ -2795,13 +3208,11 @@ }, { "data": { - "application/vnd.jupyter.widget-view+json": { - "model_id": "4bc2c53aeb7d4a8280f9fbbe373f4b55", - "version_major": 2, - "version_minor": 0 - }, + "text/html": [ + "Query job ca819734-0d41-4d9e-b743-09edae8c7fee is DONE. 29.6 kB processed. Open Job" + ], "text/plain": [ - "HTML(value='Query job ecad776d-77c8-4d94-8186-d5571b512b62 is RUNNING. " ] }, "metadata": {}, @@ -2809,13 +3220,11 @@ }, { "data": { - "application/vnd.jupyter.widget-view+json": { - "model_id": "f4f695cb0a224102b6e26adeb1827981", - "version_major": 2, - "version_minor": 0 - }, + "text/html": [ + "Query job 49bb5bed-cc84-47e0-9a90-08ab01e00548 is DONE. 536 Bytes processed. Open Job" + ], "text/plain": [ - "HTML(value='Query job c9bfc58f-ce2c-47a9-bbc7-b10d9de9b5a6 is DONE. 0 Bytes processed. " ] }, "metadata": {}, @@ -2823,13 +3232,23 @@ }, { "data": { - "application/vnd.jupyter.widget-view+json": { - "model_id": "cb1df595006d485288a1060299970e5e", - "version_major": 2, - "version_minor": 0 - }, + "text/html": [ + "Query job 1e40a085-2289-47dd-afd8-820413186b9f is DONE. 0 Bytes processed. Open Job" + ], "text/plain": [ - "HTML(value='Query job 8fd8036e-3753-433d-975b-c7b42406f648 is RUNNING. " + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "Query job 60319296-a480-4f51-b7ad-190ac6de963a is DONE. 6.2 kB processed. Open Job" + ], + "text/plain": [ + "25 rows × 1 columns
\n", - "[67 rows x 1 columns in total]" - ], - "text/plain": [ - " predicted_body_mass_g\n", - "penguin_id \n", - "3 3394.116212\n", - "8 4048.683645\n", - "17 3976.452358\n", - "23 3541.580346\n", - "25 4032.842027\n", - "27 4118.34983\n", - "29 4087.765797\n", - "34 3183.75379\n", - "35 3418.800633\n", - "39 3519.18471\n", - "51 3398.133564\n", - "52 3223.614107\n", - "60 3445.012713\n", - "61 3505.637004\n", - "64 3515.903779\n", - "65 4028.361259\n", - "67 4159.991956\n", - "83 3348.167212\n", - "85 3485.048557\n", - "93 4172.872284\n", - "104 3299.300454\n", - "105 3515.68617\n", - "108 3405.222757\n", - "113 4209.13832\n", - "130 4197.90382\n", - "...\n", + "25 rows × 7 columns
\n", + "[67 rows x 7 columns in total]" + ], + "text/plain": [ + " predicted_body_mass_g island culmen_length_mm \\\n", + "penguin_id \n", + "1 3781.396682 Torgersen 39.1 \n", + "4 4124.102574 Biscoe 43.2 \n", + "8 4670.338389 Biscoe 46.5 \n", + "11 3529.411644 Dream 38.1 \n", + "13 4014.09632 Biscoe 37.8 \n", + "15 5212.407319 Biscoe 48.7 \n", + "16 4163.590502 Torgersen 34.6 \n", + "23 3392.44731 Dream 42.2 \n", + "34 4698.299674 Biscoe 40.9 \n", + "36 4828.221398 Biscoe 43.4 \n", + "42 3430.582874 Biscoe 35.5 \n", + "48 5314.254798 Biscoe 46.3 \n", + "61 5363.19995 Biscoe 46.2 \n", + "64 4855.90281 Biscoe 46.4 \n", + "65 3413.094869 Dream 37.6 \n", + "68 3340.213193 Torgersen 36.2 \n", + "70 4228.726508 Dream 52.8 \n", + "72 3811.532821 Dream 48.5 \n", + "74 4659.765013 Biscoe 42.8 \n", + "77 3453.383042 Dream 37.3 \n", + "81 4766.239424 Biscoe 45.2 \n", + "91 4057.801947 Dream 50.8 \n", + "96 4739.821792 Biscoe 45.4 \n", + "105 3394.886275 Biscoe 34.5 \n", + "111 3201.48777 Biscoe 37.9 \n", "\n", - "[67 rows x 1 columns]" + " culmen_depth_mm flipper_length_mm sex \\\n", + "penguin_id \n", + "1 18.7 181.0 MALE \n", + "4 19.0 197.0 MALE \n", + "8 13.5 210.0 FEMALE \n", + "11 18.6 190.0 FEMALE \n", + "13 20.0 190.0 MALE \n", + "15 15.7 208.0 MALE \n", + "16 21.1 198.0 MALE \n", + "23 18.5 180.0 FEMALE \n", + "34 13.7 214.0 FEMALE \n", + "36 14.4 218.0 FEMALE \n", + "42 16.2 195.0 FEMALE \n", + "48 15.8 215.0 MALE \n", + "61 14.9 221.0 MALE \n", + "64 15.0 216.0 FEMALE \n", + "65 19.3 181.0 FEMALE \n", + "68 17.2 187.0 FEMALE \n", + "70 20.0 205.0 MALE \n", + "72 17.5 191.0 MALE \n", + "74 14.2 209.0 FEMALE \n", + "77 16.8 192.0 FEMALE \n", + "81 14.8 212.0 FEMALE \n", + "91 18.5 201.0 MALE \n", + "96 14.6 211.0 FEMALE \n", + "105 18.1 187.0 FEMALE \n", + "111 18.6 172.0 FEMALE \n", + "\n", + " species \n", + "penguin_id \n", + "1 Adelie Penguin (Pygoscelis adeliae) \n", + "4 Adelie Penguin (Pygoscelis adeliae) \n", + "8 Gentoo penguin (Pygoscelis papua) \n", + "11 Adelie Penguin (Pygoscelis adeliae) \n", + "13 Adelie Penguin (Pygoscelis adeliae) \n", + "15 Gentoo penguin (Pygoscelis papua) \n", + "16 Adelie Penguin (Pygoscelis adeliae) \n", + "23 Adelie Penguin (Pygoscelis adeliae) \n", + "34 Gentoo penguin (Pygoscelis papua) \n", + "36 Gentoo penguin (Pygoscelis papua) \n", + "42 Adelie Penguin (Pygoscelis adeliae) \n", + "48 Gentoo penguin (Pygoscelis papua) \n", + "61 Gentoo penguin (Pygoscelis papua) \n", + "64 Gentoo penguin (Pygoscelis papua) \n", + "65 Adelie Penguin (Pygoscelis adeliae) \n", + "68 Adelie Penguin (Pygoscelis adeliae) \n", + "70 Chinstrap penguin (Pygoscelis antarctica) \n", + "72 Chinstrap penguin (Pygoscelis antarctica) \n", + "74 Gentoo penguin (Pygoscelis papua) \n", + "77 Adelie Penguin (Pygoscelis adeliae) \n", + "81 Gentoo penguin (Pygoscelis papua) \n", + "91 Chinstrap penguin (Pygoscelis antarctica) \n", + "96 Gentoo penguin (Pygoscelis papua) \n", + "105 Adelie Penguin (Pygoscelis adeliae) \n", + "111 Adelie Penguin (Pygoscelis adeliae) \n", + "\n", + "[67 rows x 7 columns]" ] }, - "execution_count": 28, + "execution_count": 11, "metadata": {}, "output_type": "execute_result" } @@ -3034,60 +3670,16 @@ }, { "cell_type": "code", - "execution_count": 29, + "execution_count": 12, "metadata": {}, "outputs": [ { "data": { - "application/vnd.jupyter.widget-view+json": { - "model_id": "2d32081be31f44abb8de67e2209d76cd", - "version_major": 2, - "version_minor": 0 - }, - "text/plain": [ - "HTML(value='Query job 2a043039-670f-4eb8-9cf0-765ee6ed7de6 is RUNNING. Open Job" + ], "text/plain": [ - "HTML(value='Query job bc8b2042-1e13-441c-9531-300ed5badb7a is RUNNING. " ] }, "metadata": {}, @@ -3095,13 +3687,11 @@ }, { "data": { - "application/vnd.jupyter.widget-view+json": { - "model_id": "4588ae10de634460bf4026ddd9076351", - "version_major": 2, - "version_minor": 0 - }, + "text/html": [ + "Query job 7f1f565b-0f73-4a4e-b33f-8484fa260838 is DONE. 0 Bytes processed. Open Job" + ], "text/plain": [ - "HTML(value='Query job 5e867182-dd7a-4aff-87a8-f7596e900fd5 is DONE. 0 Bytes processed. " ] }, "metadata": {}, @@ -3109,13 +3699,11 @@ }, { "data": { - "application/vnd.jupyter.widget-view+json": { - "model_id": "8209cf8286a545ebb7b6ef9d002a43a1", - "version_major": 2, - "version_minor": 0 - }, + "text/html": [ + "Query job d4b9d4a6-d75e-46e1-b092-ab58e8aef890 is DONE. 48 Bytes processed. Open Job" + ], "text/plain": [ - "HTML(value='Query job d4cdb016-8f1e-4960-8ed7-4524ccc5a8a8 is RUNNING. " ] }, "metadata": {}, @@ -3153,12 +3741,12 @@ " \n", "25 rows × 6 columns
\n", "[146 rows x 6 columns in total]" ], "text/plain": [ - " island culmen_length_mm culmen_depth_mm flipper_length_mm body_mass_g \\\n", - "0 Dream 36.6 18.4 184.0 3475.0 \n", - "1 Dream 39.8 19.1 184.0 4650.0 \n", - "2 Dream 40.9 18.9 184.0 3900.0 \n", - "4 Dream 37.3 16.8 192.0 3000.0 \n", - "5 Dream 43.2 18.5 192.0 4100.0 \n", - "9 Dream 40.2 20.1 200.0 3975.0 \n", - "10 Dream 40.8 18.9 208.0 4300.0 \n", - "11 Dream 39.0 18.7 185.0 3650.0 \n", - "12 Dream 37.0 16.9 185.0 3000.0 \n", - "14 Dream 34.0 17.1 185.0 3400.0 \n", - "15 Dream 37.0 16.5 185.0 3400.0 \n", - "18 Dream 39.7 17.9 193.0 4250.0 \n", - "19 Dream 37.8 18.1 193.0 3750.0 \n", - "22 Dream 40.2 17.1 193.0 3400.0 \n", - "23 Dream 36.8 18.5 193.0 3500.0 \n", - "26 Dream 41.5 18.5 201.0 4000.0 \n", - "31 Dream 33.1 16.1 178.0 2900.0 \n", - "32 Dream 37.2 18.1 178.0 3900.0 \n", - "33 Dream 39.5 16.7 178.0 3250.0 \n", - "35 Dream 36.0 18.5 186.0 3100.0 \n", - "36 Dream 39.6 18.1 186.0 4450.0 \n", - "38 Dream 41.3 20.3 194.0 3550.0 \n", - "41 Dream 35.7 18.0 202.0 3550.0 \n", - "51 Dream 38.1 17.6 187.0 3425.0 \n", - "53 Dream 36.0 17.1 187.0 3700.0 \n", + " island culmen_length_mm culmen_depth_mm flipper_length_mm \\\n", + "0 Biscoe 40.1 18.9 188.0 \n", + "1 Torgersen 39.1 18.7 181.0 \n", + "4 Biscoe 43.2 19.0 197.0 \n", + "6 Biscoe 41.3 21.1 195.0 \n", + "11 Dream 38.1 18.6 190.0 \n", + "13 Biscoe 37.8 20.0 190.0 \n", + "14 Biscoe 35.0 17.9 190.0 \n", + "16 Torgersen 34.6 21.1 198.0 \n", + "19 Dream 37.2 18.1 178.0 \n", + "21 Biscoe 40.5 17.9 187.0 \n", + "23 Dream 42.2 18.5 180.0 \n", + "30 Dream 39.2 21.1 196.0 \n", + "32 Torgersen 42.9 17.6 196.0 \n", + "38 Dream 41.1 17.5 190.0 \n", + "40 Torgersen 38.6 21.2 191.0 \n", + "42 Biscoe 35.5 16.2 195.0 \n", + "44 Dream 39.2 18.6 190.0 \n", + "45 Torgersen 35.2 15.9 186.0 \n", + "46 Dream 43.2 18.5 192.0 \n", + "49 Biscoe 39.6 17.7 186.0 \n", + "53 Biscoe 45.6 20.3 191.0 \n", + "58 Torgersen 40.9 16.8 191.0 \n", + "60 Torgersen 40.3 18.0 195.0 \n", + "62 Dream 36.0 18.5 186.0 \n", + "63 Torgersen 39.3 20.6 190.0 \n", "\n", - " sex \n", - "0 FEMALE \n", - "1 MALE \n", - "2 MALE \n", - "4 FEMALE \n", - "5 MALE \n", - "9 MALE \n", - "10 MALE \n", - "11 MALE \n", - "12 FEMALE \n", - "14 FEMALE \n", - "15 FEMALE \n", - "18 MALE \n", - "19 MALE \n", - "22 FEMALE \n", - "23 FEMALE \n", - "26 MALE \n", - "31 FEMALE \n", - "32 MALE \n", - "33 FEMALE \n", - "35 FEMALE \n", - "36 MALE \n", - "38 MALE \n", - "41 FEMALE \n", - "51 FEMALE \n", - "53 FEMALE \n", + " body_mass_g sex \n", + "0 4300.0 MALE \n", + "1 3750.0 MALE \n", + "4 4775.0 MALE \n", + "6 4400.0 MALE \n", + "11 3700.0 FEMALE \n", + "13 4250.0 MALE \n", + "14 3450.0 FEMALE \n", + "16 4400.0 MALE \n", + "19 3900.0 MALE \n", + "21 3200.0 FEMALE \n", + "23 3550.0 FEMALE \n", + "30 4150.0 MALE \n", + "32 4700.0 MALE \n", + "38 3900.0 MALE \n", + "40 3800.0 MALE \n", + "42 3350.0 FEMALE \n", + "44 4250.0 MALE \n", + "45 3050.0 FEMALE \n", + "46 4100.0 MALE \n", + "49 3500.0 FEMALE \n", + "53 4600.0 MALE \n", + "58 3700.0 FEMALE \n", + "60 3250.0 FEMALE \n", + "62 3100.0 FEMALE \n", + "63 3650.0 MALE \n", "...\n", "\n", "[146 rows x 6 columns]" ] }, - "execution_count": 13, + "execution_count": 2, "metadata": {}, "output_type": "execute_result" } @@ -843,18 +793,16 @@ }, { "cell_type": "code", - "execution_count": 14, + "execution_count": 3, "metadata": {}, "outputs": [ { "data": { - "application/vnd.jupyter.widget-view+json": { - "model_id": "81f9aa34c7234bd88b6b7a4bc77d4b4e", - "version_major": 2, - "version_minor": 0 - }, + "text/html": [ + "Query job 0808457b-a0df-4a37-b7a5-8885f4a4588c is DONE. 28.9 kB processed. Open Job" + ], "text/plain": [ - "HTML(value='Query job 288f0daa-a51e-45b4-86bf-d054467c4a99 is DONE. 28.9 kB processed. " ] }, "metadata": {}, @@ -881,7 +829,7 @@ }, { "cell_type": "code", - "execution_count": 15, + "execution_count": 4, "metadata": {}, "outputs": [ { @@ -897,7 +845,7 @@ " ('linreg', LinearRegression(fit_intercept=False))])" ] }, - "execution_count": 15, + "execution_count": 4, "metadata": {}, "output_type": "execute_result" } @@ -936,9 +884,63 @@ }, { "cell_type": "code", - "execution_count": 16, + "execution_count": 5, "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/html": [ + "Query job e9bfa6a5-a53f-4d8b-ae8c-cc8cd55d0947 is DONE. 28.9 kB processed. Open Job" + ], + "text/plain": [ + "3 rows × 1 columns
\n", - "[3 rows x 1 columns in total]" + "3 rows × 7 columns
\n", + "[3 rows x 7 columns in total]" ], "text/plain": [ - " predicted_body_mass_g\n", - "tag_number \n", - "1633 3965.994361\n", - "1672 3246.312058\n", - "1690 3456.404062\n", + " predicted_body_mass_g species \\\n", + "tag_number \n", + "1633 4017.203152 Adelie Penguin (Pygoscelis adeliae) \n", + "1672 3127.601519 Adelie Penguin (Pygoscelis adeliae) \n", + "1690 3386.101231 Adelie Penguin (Pygoscelis adeliae) \n", + "\n", + " island culmen_length_mm culmen_depth_mm flipper_length_mm \\\n", + "tag_number \n", + "1633 Torgersen 39.5 18.8 196.0 \n", + "1672 Torgersen 38.5 17.2 181.0 \n", + "1690 Dream 37.9 18.1 188.0 \n", "\n", - "[3 rows x 1 columns]" + " sex \n", + "tag_number \n", + "1633 MALE \n", + "1672 FEMALE \n", + "1690 FEMALE \n", + "\n", + "[3 rows x 7 columns]" ] }, - "execution_count": 19, + "execution_count": 8, "metadata": {}, "output_type": "execute_result" } @@ -1240,28 +1250,53 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "## 4. Save in BigQuery" + "## 6. Save in BigQuery" ] }, { "cell_type": "code", - "execution_count": 20, + "execution_count": 9, "metadata": {}, "outputs": [ { "data": { + "text/html": [ + "Copy job d1def4a4-1da1-43a9-8ae5-4459444d993d is DONE. Open Job" + ], "text/plain": [ - "Pipeline(steps=[('preproc',\n", - " ColumnTransformer(transformers=[('onehot', OneHotEncoder(),\n", - " ['island', 'species', 'sex']),\n", - " ('scaler', StandardScaler(),\n", - " ['culmen_depth_mm',\n", - " 'culmen_length_mm',\n", - " 'flipper_length_mm'])])),\n", - " ('linreg', LinearRegression(fit_intercept=False))])" + "