diff --git a/.github/FUNDING.yml b/.github/FUNDING.yml
new file mode 100644
index 0000000..89e4415
--- /dev/null
+++ b/.github/FUNDING.yml
@@ -0,0 +1,3 @@
+# These are supported funding model platforms
+
+github: chris1610
diff --git a/Monte_Carlo_Simulationv2.ipynb b/Monte_Carlo_Simulationv2.ipynb
new file mode 100644
index 0000000..6499910
--- /dev/null
+++ b/Monte_Carlo_Simulationv2.ipynb
@@ -0,0 +1,468 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Monte Carlo Simulation with Python\n",
+    "\n",
+    "Notebook to accompany article on [Practical Business Python](https://pbpython.com/monte-carlo.html)\n",
+    "\n",
+    "Update to use numpy for faster loops based on comments [here](https://www.reddit.com/r/Python/comments/arxwkm/monte_carlo_simulation_with_python/)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import pandas as pd\n",
+    "import numpy as np\n",
+    "import seaborn as sns"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "sns.set_style('whitegrid')"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Define the variables for the Percent to target based on historical results\n",
+    "avg = 1\n",
+    "std_dev = .1\n",
+    "num_reps = 500\n",
+    "num_simulations = 100000"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Show an example of calculating the percent to target\n",
+    "pct_to_target = np.random.normal(\n",
+    "    avg,\n",
+    "    std_dev,\n",
+    "    size=(num_reps, num_simulations)\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "array([[0.79328531, 0.99211018, 1.14343423, ..., 0.83737887, 0.93507967,\n",
+       "        0.86079851],\n",
+       "       [1.03126742, 1.04414961, 1.08119495, ..., 0.98607625, 1.01161899,\n",
+       "        0.96872644],\n",
+       "       [1.08616345, 0.93970666, 1.07594111, ..., 0.94057821, 1.00399945,\n",
+       "        1.05325946],\n",
+       "       ...,\n",
+       "       [1.10388204, 0.90397305, 0.96005999, ..., 0.88810244, 1.18064642,\n",
+       "        0.94066897],\n",
+       "       [1.07581302, 0.92552317, 1.08256074, ..., 0.91934988, 1.06668758,\n",
+       "        1.05969099],\n",
+       "       [1.12755095, 0.95080038, 0.978849  , ..., 1.0094155 , 0.94359533,\n",
+       "        1.06332923]])"
+      ]
+     },
+     "execution_count": 5,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "pct_to_target[0:10]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 28,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Another example for the sales target distribution\n",
+    "sales_target_values = [75_000, 100_000, 200_000, 300_000, 400_000, 500_000]\n",
+    "sales_target_prob = [.3, .3, .2, .1, .05, .05]\n",
+    "sales_target = np.random.choice(sales_target_values, p=sales_target_prob, \n",
+    "                                size=(num_reps, num_simulations))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 29,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "array([[ 75000, 200000,  75000, ...,  75000, 100000, 200000],\n",
+       "       [200000,  75000, 100000, ..., 200000, 100000, 100000],\n",
+       "       [400000,  75000, 100000, ..., 500000, 200000,  75000],\n",
+       "       ...,\n",
+       "       [500000,  75000, 500000, ...,  75000,  75000,  75000],\n",
+       "       [ 75000, 100000,  75000, ...,  75000, 500000, 100000],\n",
+       "       [100000,  75000,  75000, ..., 100000, 100000,  75000]])"
+      ]
+     },
+     "execution_count": 29,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "sales_target[0:10]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 64,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "commission_percentages = np.take(\n",
+    "    np.array([0.04, 0.03, 0.02]),\n",
+    "    np.digitize(pct_to_target, bins=[.9, .99, 10])\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 65,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "2    26992625\n",
+       "1    15075317\n",
+       "0     7932058\n",
+       "dtype: int64"
+      ]
+     },
+     "execution_count": 65,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "pd.DataFrame(np.digitize(pct_to_target, bins=[.9, .99, 10]).flatten()).value_counts()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 66,
+   "metadata": {
+    "scrolled": false
+   },
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "0.02    26992625\n",
+       "0.03    15075317\n",
+       "0.04     7932058\n",
+       "dtype: int64"
+      ]
+     },
+     "execution_count": 66,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "# frequencies\n",
+    "pd.DataFrame(commission_percentages.flatten()).value_counts()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 67,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "array([[0.04, 0.02, 0.02, ..., 0.04, 0.03, 0.04],\n",
+       "       [0.02, 0.02, 0.02, ..., 0.03, 0.02, 0.03],\n",
+       "       [0.02, 0.03, 0.02, ..., 0.03, 0.02, 0.02],\n",
+       "       ...,\n",
+       "       [0.02, 0.03, 0.03, ..., 0.04, 0.02, 0.03],\n",
+       "       [0.02, 0.03, 0.02, ..., 0.03, 0.02, 0.02],\n",
+       "       [0.02, 0.03, 0.03, ..., 0.02, 0.03, 0.02]])"
+      ]
+     },
+     "execution_count": 67,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "commission_percentages[0:10]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 50,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "total_commissions = (commission_percentages * sales_target).sum(axis=0)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 51,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "96546.42131435724"
+      ]
+     },
+     "execution_count": 51,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "total_commissions.std()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 52,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>Total_Commissions</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>2838250.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>2786750.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>2795500.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>3054750.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td>2831750.0</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "   Total_Commissions\n",
+       "0          2838250.0\n",
+       "1          2786750.0\n",
+       "2          2795500.0\n",
+       "3          3054750.0\n",
+       "4          2831750.0"
+      ]
+     },
+     "execution_count": 52,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "# Show how to create the dataframe\n",
+    "df = pd.DataFrame(data={'Total_Commissions': total_commissions})\n",
+    "df.head()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 53,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "<AxesSubplot:title={'center':'Commissions Distribution'}, ylabel='Frequency'>"
+      ]
+     },
+     "execution_count": 53,
+     "metadata": {},
+     "output_type": "execute_result"
+    },
+    {
+     "data": {
+      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAY0AAAERCAYAAACHA/vpAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjMuNCwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8QVMy6AAAACXBIWXMAAAsTAAALEwEAmpwYAAAueElEQVR4nO3de1hU9aLG8e8wDEjMkJFlTyneMdONN7adEq9pWGreUUhKLdvaUbemblDxlve87LO1sDTr9GiCpGbu7OZtaxZ5klKLUIssRU3xspNBmUFY548OcyQVlikw6vt5np6HWfObtd65NK/rNstiGIaBiIiICT4VHUBERG4cKg0RETFNpSEiIqapNERExDSVhoiImKbSEBER01QaUq4KCgp488036dmzJ926dePxxx9n7ty5uN3ucs/SrVs3zp49e9WP27x5M9OnTy+DRMXVr1+frl270q1bN5544gl69uzJ22+/7bk/KSmJJUuWlDiPd955p9hjLnbx49u3b88333xzVfkOHz7M8OHDATh+/Dj9+vW7qsfLjcm3ogPIrWXKlCn8+uuvvPXWWzgcDs6dO8eYMWOYMGECc+fOLdcs77333h963COPPMIjjzxyndNc3ltvvUVwcDAAp0+fZsiQIbhcLgYNGkR0dHSpj09LS6NevXqXvc/M40ty9OhRDh48CEDVqlVJTk6+pvnJjcGik/ukvGRlZdGlSxd27NiB3W73TM/Ozuarr74iMjKSnJwcpk6dyr59+7BYLLRq1YoXXngBX19f/vSnPzFw4EA+//xzzp07x7Bhw/joo484cOAAd999N6+++iq33Xab6XH169cnNTWVgoIC4uLiOHPmDABt2rRh5MiRZGdnX3b62rVr+fjjj3nttdf45ZdfmDJlCkeOHMEwDLp3786zzz5LVlYWAwYMoE2bNuzZs4ezZ88yduxYOnbsSGZmJhMmTMDtdmMYBr179+bJJ5+85PUqyldUGgBfffUVI0aM4NNPP+Xll1/mzJkzTJo0iZUrV5KcnIzNZsPf358XX3yRgwcPMmHCBPz9/RkyZAinT59m9+7dnDhxgvr161OjRg3P49u3b8+DDz7Ivn37cLvdDBw4kN69e7Nz506mTZvG+++/D+C5/d5779GpUyeOHz/On//8Z6ZOnUrXrl35+uuvyc/PZ/bs2aSmpmK1WgkLC2PcuHHY7Xbat29Pjx49SE1N5dixY3Tr1o2RI0eW4adOrjdtnpJyk56eTt26dYsVBsBdd91FZGQkANOnT6dy5cr885//ZM2aNezfv5833ngDALfbTZUqVVi9ejXdu3cnISGBCRMm8MEHH+B0Otm8efNVjSuSkpJCtWrVePfdd3n77bf5+eefycnJueL0i40ZM4YHH3yQf/7znyQlJbF+/Xo2bNgA/Lb5JiIigtWrVzN69GhmzpwJwLJly2jfvj1r165lyZIl7Nq1i8LCQlOv4f333092dranyOC3TX4zZ87k9ddfZ82aNURFRZGWlkbHjh1p3749AwYM8JTSkSNHePfdd5k3b94l8/b39+fdd9/ljTfeYMGCBXz//fdXzGG1Wpk+fTohISEsW7as2H2LFy/mxIkTvPfee7z33nsUFhby0ksvee4/d+6cp+TeeOMNDh8+bOq5i3dQaUi58fHxKfXLcfv27fTv3x+LxYKfnx/9+vVj+/btnvuLyiUkJITQ0FCqVq2Kj48P1apV49dff73qcQCtWrXik08+YfDgwaxatYrRo0fjcDiuOL3IuXPn+OqrrzxfyA6Hg549e3ry2mw22rRpA8ADDzzAv//9bwA6duzI66+/zrBhw/jkk09ISEjAx8fc/4oWiwX47Qu+iNVqpVOnTvTr148XX3yRoKAgevfufdnHN2nSBF/fy2+VLtonUbVqVVq2bElqaqqpTL+3fft2+vXrh81mw8fHh9jYWD799FPP/UWb9qpWrcqdd955yfsh3k2lIeUmLCyMH3/8EafTWWz68ePHee6558jLy6OwsNDzxQhQWFjIhQsXPLdtNttl//49s+OKcm3evJm+ffty5MgR+vTpw7fffnvF6Rdn+/3W3YvzFn1pAsWeU7t27fj444957LHHyMjIoGvXrvzyyy8lZizyzTffUK1aNQIDA4tNnzdvHq+++iohISEsWbKEF1544bKPv+22264474uLq7CwEF9fXywWS7HnmJ+fX2rGy72HFz/u4sL7/fzF+6k0pNxUrVqVrl27Mn78eE9xOJ1OpkyZQuXKlalUqRIRERGsWLECwzBwu92kpKTw8MMPl2muefPmkZiYSIcOHZgwYQJ169bl+++/v+L0Ina7ncaNG3uOTsrJyWHdunWl5h09ejQffPABnTt3ZvLkydjtdg4dOlRqzuPHjzNv3jwGDRpUbPrp06dp06YNlStXZsCAAYwcOdJzJJTVai1WuiV59913gd92cKempvLQQw8RHBzM0aNHOXXqFIZheDa9Fc37ciXSqlUrkpKSyM/Pp7CwkLfffpuWLVuayiDeT0dPSbmaPHkyiYmJ9OvXD6vVitvtpkOHDp5DNxMSEpg+fTpdu3YlPz+fVq1aMWTIkDLN9PTTTxMfH0+XLl3w8/Ojfv36dO7cmV9//fWy04t2CsNvhfPiiy+ydu1a3G43Xbt2pWfPnhw5cuSKy3v++eeZMGECq1atwmq10qFDB/785z9fMZuPjw9WqxWAXr16XbLTPDg4mKFDhzJgwAAqVark2d8A0Lp1a2bPnm3qdXC5XPTo0YP8/HwSEhKoVasW8Ntmq169enHXXXfRtm1bTyHVrVsXf39/evfuzd///nfPfIYOHcqcOXPo3r07Fy5cICwsjIkTJ5rKIN5PR0+JiIhp2jwlIiKmqTRERMQ0lYaIiJim0hAREdNu6qOndu/eXeyY8LLicrnKZTlXyxtzeWMmUK6r5Y25vDET3Ji5XC4XTZo0uex9N3Vp+Pv706BBgzJfTkZGRrks52p5Yy5vzATKdbW8MZc3ZoIbM1dGRsYVH6fNUyIiYppKQ0RETFNpiIiIaTf1Pg0R+WPy8/PJysoiLy/viveXtN27InhjJvDuXAcPHqRatWql/qjnxVQaInKJrKwsHA4HNWvWLPaLtUXOnz9PQEBABSS7Mm/MBN6b69y5c5w7d46srCzP74yZoc1TInKJvLw87rzzzssWhtwcLBYLd9555xXXJq9EpSEil6XCuPn9kfdYpSEipcrLLyh2+1o3t/x+fnLj0D4NESlVJZuVmvEbSh9o0k+zO1+3eUn5UmnILS0vv4BKNmuFnLFbtGy51OzZs0lPTyc7O5u8vDyqV6/OHXfcwcKFC4uN279/P2fPnr3iRax27txJcnJysYtE/d7333/P3LlzOX/+POfOnaNNmzYMHz68TDfPjRo1ijlz5uDn52dq/Pbt2zl27Bh9+/Yts0xmqTTklna9/wV9NfSv7SuLj48HYO3atfz444+MGTPmsuM++eQTqlSpcsXSKM3Zs2d54YUXWLRoETVr1qSgoIC//vWvJCcnEx0d/Yfzl6akEruc1q1bl1GSq6fSEBGvl5+fz/jx4zl8+DAFBQUMHDiQ5s2b8+6772Kz2WjYsCE//fQT77zzjucx//jHP0qd7+bNm3nwwQepWbMm8Nt1z+fMmeM5b2H27NmkpaUB0KVLF8+lgX19fTl69Chut5vHH3+crVu3cuzYMRITEzl27BhLlizBZrPxyy+/0KtXL9LS0ti3bx9PPfUUMTExtG/fng8//JBt27axdOlSfH19ue+++3jppZf4+uuvmTNnDr6+vgQFBTFv3jw++eQTT3m+8cYbbNiwAV9fX8LDwxk7diyLFi0iKyuLU6dOcfToUcaNG0erVq34+9//zhdffEFhYSGdO3dmwIAB1/xeqDRExOutWrWKO+64g7lz5+J0OunZsyfJycn06NGDKlWqEBYWxrZt21iyZAkBAQFMmjSJHTt2ULVq1RLne+LECapXr15sWmBgIABbt24lKyuLlJQULly4QExMDP/xH/8BwH333cf06dOZNGkSWVlZLF26lIULF7JlyxYaNGjAL7/8wrp160hPT2fEiBFs2rSJ48ePM2zYMGJiYjzLev/99xkwYACdO3dm3bp1OJ1ONm3aRMeOHXnmmWfYsmULZ8+e9Yzfv38/H374IcnJyfj6+jJ8+HC2bt0KgJ+fH6+//jqfffYZb7zxBq1atWLdunWsWLGCqlWrsnbt2uvyXujoKRHxepmZmZ5NUHa7nTp16nD48OFiY4KDg4mLi2PcuHHs37+fCxculDrfe++9l19++aXYtMOHD/Pll1+SmZlJeHg4FosFm81G48aNyczMBOCBBx4AICgoiLp163r+drvdANSrVw+bzYbD4aBatWr4+flx++2343K5ii1r3LhxfPnll/Tv35+vvvoKHx8fhgwZwunTp3n66af56KOP8PX9/3/b//jjjzRu3BibzYbFYiE8PJzvv/8ewLNf7p577vHkWLBgAQsWLOCZZ54pVj7XQmsaIlKqvPyC67oP5moPAqhTpw67du2iY8eOOJ1ODhw4QLVq1bBYLBQWFpKTk8PixYvZtm0bAAMHDsQwjFLn265dO1577TWio6MJCQkhPz+f2bNn8/DDD1OnTh3Wrl3LgAEDyM/P5+uvv6ZHjx5A6ec3mN2JvmrVKoYPH86dd97JpEmT2LhxI7m5ufTo0YO4uDhee+01UlJSuPfeewGoXbs2b775JhcuXMBqtfLll1/SvXt39u3bd8ky3W43H330EQsWLMAwDDp37kznzp257777TGW7EpWGiJTq91/w1/rTGFd71FhUVBQTJ04kOjoal8vFsGHDuPPOO2nUqBEvvfQSderUoUmTJvTo0YPbbruNoKAgTpw4QbVq1Uqcr91uZ/bs2SQkJGAYBrm5ubRr146YmBgsFgv/8z//Q9++fcnPz6dTp040bNjwDz/nywkLC2PgwIFUrlyZwMBA2rZty6FDh4iPj+e2227DZrPx4osv8uWXXwJQv359HnvsMaKjoyksLKR58+Z06NCBffv2XTLvorWbbt26cfvtt9OyZUtP+VwLi2Gmjm9Q5XXxkxvxIisVxRszefPRUxX1epW2XG/8PSVvzATen+ty73VJ77/WNETkpjdlyhTP/oiLLV26lEqVKlVAohuXSkNELsswjJvm96emTJlS0RG80h/Z0KSjp0TkEpUqVeLUqVN/6EtFbgyGYXDq1KmrXtPSmoaIXKJatWpkZWWRnZ192fvz8/Ov6sI95cEbM4F35yo6JPhqlElpFBQUkJCQwMGDB7FarcyaNQvDMIiPj8disVCvXj0mT56Mj48PKSkpnhNVhg4dSrt27cjLy2Ps2LGcOnWKwMBA5syZQ3BwMLt372bGjBlYrVYiIiIYNmxYWcQXueXZbLYSL8zjjQc0eGMm8O5cV3PxpSJlsnmq6AzF5ORkRowYwaxZs5g1axYjR45k5cqVGIbB5s2byc7OZvny5SQnJ7Ns2TIWLFiA2+0mKSmJ0NBQVq5cSffu3UlMTARg8uTJzJ8/n6SkJPbs2UN6enpZxBcRkSsokzWNDh060LZtWwCOHj1KlSpV+Ne//kWLFi2A335867PPPsPHx4emTZvi5+eHn58fISEh7Nu3j7S0NJ599lnP2MTERJxOJ263m5CQEAAiIiJITU0t8bhpl8tVLtfmzcvL88prAHtjLm/LVNH/AizttfC216uIN+byxkxw8+Uqs30avr6+xMXFsXHjRhYuXMjWrVs9R2IEBgaSk5OD0+nE4XB4HhMYGIjT6Sw2/eKxdru92Njf/4zA7/n7++s8DS/L5Y2ZKlJpr4W3vl7emMsbM8GNmaukMinTo6fmzJnDxx9/zMSJE4v95kpubi5BQUHY7XZyc3OLTXc4HMWmlzQ2KCioLOOLiMjvlElprFu3jtdeew347bKQFouFRo0asXPnTuC3C4qEh4cTFhZGWloaLpeLnJwcMjMzCQ0NpVmzZp7fkNm+fTvNmzfHbrdjs9k4dOgQhmGwY8cOwsPDyyK+iIhcQZlsnnr00UcZN24cTz75JBcuXGD8+PHUqVOHiRMnsmDBAmrXrk1kZCRWq5XY2FhiYmIwDINRo0bh7+9PdHQ0cXFxREdHY7PZmD9/PgBTp05lzJgxFBQUEBERQePGjcsivoiIXEGZlMZtt9122QugrFix4pJpUVFRREVFFZsWEBBwyWUdAZo0aUJKSsr1CyoiIldFZ4SLiIhpKg0RETFNpSEiIqapNERExDSVhoiImKbSEBER01QaIiJimkpDRERMU2mIiIhpKg0RETFNpSEiIqapNERExDSVhoiImKbSEBER01QaIiJimkpDRERMU2mIiIhpKg0RETFNpSEiIqapNERExDSVhoiImKbSEBER01QaIiJimkpDRERM873eM8zPz2f8+PEcOXIEt9vN0KFDueeeexgyZAg1a9YEIDo6mscff5yUlBSSk5Px9fVl6NChtGvXjry8PMaOHcupU6cIDAxkzpw5BAcHs3v3bmbMmIHVaiUiIoJhw4Zd7+giIlKK614a69evp3LlysydO5czZ87Qo0cP/vM//5OBAwcyaNAgz7js7GyWL1/OmjVrcLlcxMTE0LJlS5KSkggNDWX48OFs2LCBxMREEhISmDx5MosWLaJ69eo899xzpKen07Bhw+sdX0RESnDdS6NTp05ERkZ6blutVr799lsOHjzI5s2bqVGjBuPHj2fv3r00bdoUPz8//Pz8CAkJYd++faSlpfHss88C0Lp1axITE3E6nbjdbkJCQgCIiIggNTW11NJwuVxkZGRc76d4iby8vHJZztXyxlzelqlBgwYVuvzSXgtve72KeGMub8wEN1+u614agYGBADidTkaMGMHIkSNxu9306dOHRo0asXjxYl555RXuv/9+HA5Hscc5nU6cTqdnemBgIDk5OTidTux2e7Gxhw8fLjWLv79/uXwpZGRkVPiXz+V4Yy5vzFSRSnstvPX18sZc3pgJbsxcJZVJmewIP3bsGE899RTdunWja9eudOzYkUaNGgHQsWNHvvvuO+x2O7m5uZ7H5Obm4nA4ik3Pzc0lKCjosmODgoLKIrqIiJTgupfGyZMnGTRoEGPHjqV3794APPPMM+zduxfAs1kpLCyMtLQ0XC4XOTk5ZGZmEhoaSrNmzdi2bRsA27dvp3nz5tjtdmw2G4cOHcIwDHbs2EF4ePj1ji4iIqW47punXn31Vc6ePUtiYiKJiYkAxMfHM3PmTGw2G1WqVGHatGnY7XZiY2OJiYnBMAxGjRqFv78/0dHRxMXFER0djc1mY/78+QBMnTqVMWPGUFBQQEREBI0bN77e0aUC5eUXUMlmregYIlKK614aCQkJJCQkXDI9OTn5kmlRUVFERUUVmxYQEMDChQsvGdukSRNSUlKuX1DxKpVsVmrGbyj35f40u3O5L1PkRqaT+0RExDSVhoiImKbSEBER01QaIiJimkpDRERMU2mIiIhpKg0RETFNpSEiIqapNERExDSVhoiImKbSEBER01QaIiJimkpDRERMU2mIiIhpKg2RCpKXX1DqmLK4TKiZ5YpcyXW/noaImKNriMiNSGsaIiJimkpDRERMU2mIiIhpKg0RETFNpSEiIqapNERExDSVhoiImKbSEBER0677yX35+fmMHz+eI0eO4Ha7GTp0KHXr1iU+Ph6LxUK9evWYPHkyPj4+pKSkkJycjK+vL0OHDqVdu3bk5eUxduxYTp06RWBgIHPmzCE4OJjdu3czY8YMrFYrERERDBs27HpHFxGRUpha0zh58qTpGa5fv57KlSuzcuVKli5dyrRp05g1axYjR45k5cqVGIbB5s2byc7OZvny5SQnJ7Ns2TIWLFiA2+0mKSmJ0NBQVq5cSffu3UlMTARg8uTJzJ8/n6SkJPbs2UN6evofe8YiIvKHmVrTGD58OMHBwfTu3Zs2bdrg43PlrunUqRORkZGe21arlfT0dFq0aAFA69at+eyzz/Dx8aFp06b4+fnh5+dHSEgI+/btIy0tjWeffdYzNjExEafTidvtJiQkBICIiAhSU1Np2LBhibldLhcZGRlmnuI1ycvLK5flXC1vzHWlTGXxG0tyZdf6ubiRPlsV7WbLZao0kpKSyMzMZPXq1SxevJiHHnqI3r17U7169UvGBgYGAuB0OhkxYgQjR45kzpw5WCwWz/05OTk4nU4cDkexxzmdzmLTLx5rt9uLjT18+HCpuf39/cvlyygjI8Mrv/S8MZc3ZroVXet74I3vozdmghszV0llYnpH+N1330316tWpVKkSBw4cYMaMGfzjH/+47Nhjx47x1FNP0a1bN7p27VpszSQ3N5egoCDsdju5ubnFpjscjmLTSxobFBRkNrqIiFwnpkrjr3/9K3379uXs2bPMnTuXxYsX8+qrr7Jt27ZLxp48eZJBgwYxduxYevfuDcADDzzAzp07Adi+fTvh4eGEhYWRlpaGy+UiJyeHzMxMQkNDadasmWe+27dvp3nz5tjtdmw2G4cOHcIwDHbs2EF4ePj1eg1ERMQkU5unoqKiaNKkCYGBgZw4ccIzPSkp6ZKxr776KmfPniUxMdGzE3vChAlMnz6dBQsWULt2bSIjI7FarcTGxhITE4NhGIwaNQp/f3+io6OJi4sjOjoam83G/PnzAZg6dSpjxoyhoKCAiIgIGjdufD2ev4iIXAVTpfH111/z6aefEh8fz/Tp02nUqBHPPfcc/v7+l4xNSEggISHhkukrVqy4ZFpUVBRRUVHFpgUEBLBw4cJLxjZp0oSUlBQzcUVEpIyY2jy1ZcsW4uPjAVi4cCFbtmwp01AiIuKdTJWGxWLB7XYDv528ZxhGmYYSERHvZGrzVL9+/ejatSuhoaH8+OOPnvMoRETk1mKqNPr06cMjjzzC4cOHqV69OsHBwWWdS0REvJCp0sjIyGDVqlW4XC7PtFmzZpVZKBER8U6mSiM+Pp7+/ftzzz33lHUeERHxYqZKo0qVKvTp06ess4iIiJczVRr33XcfS5YsoUGDBp7fkIqIiCjTYCIi4n1MlUZ+fj4HDx7k4MGDnmkqDRGRW4+p0pg1axYHDx7k0KFD1K9fn7vvvrusc4mIiBcyVRorVqxg48aN/Prrr/To0YOff/6ZSZMmlXU2ERHxMqbOCN+wYQP//d//jcPh4Omnn2bPnj1lnUtERLyQqdIo+tmQop3gfn5+ZZdIRES8lqnNU126dOHJJ5/k6NGjDB48mA4dOpR1LhER8UKmSqN///489NBDHDhwgFq1anH//feXdS4REfFCpkrj5Zdf9vydmZnJpk2bGDZsWJmFEhER72T6jHD4bd/Gd999R2FhYZmGEhER72T6p9Evpp9GFxG5NZkqjYvPBM/OzubYsWNlFkhERLyXqdK4+EQ+f39//va3v5VZIBER8V6mSmP58uVlnUNERG4ApkrjiSeeIDc3F39/f8+FmAzDwGKxsHnz5jINKCIi3sNUaTRt2pTu3bvTtGlT9u/fz7Jly5g+fXpZZxMRES9jqjQyMzNp2rQpAPXr1+fYsWP6KRERkVuQqd+ecjgc/Nd//Rdbtmxh7ty53HvvvaU+Zs+ePcTGxgKQnp5Oq1atiI2NJTY2lg8++ACAlJQUevbsSVRUFFu3bgUgLy+P4cOHExMTw+DBgzl9+jQAu3fvpk+fPvTr16/YyYYiIlJ+TK1pzJ8/n5UrV/Lpp59Sv359Ro0aVeL4pUuXsn79egICAgD47rvvGDhwIIMGDfKMyc7OZvny5axZswaXy0VMTAwtW7YkKSmJ0NBQhg8fzoYNG0hMTCQhIYHJkyezaNEiqlevznPPPUd6ejoNGza8hqcuIiJXy9Sahr+/P7fffjt33HEHtWrV4uzZsyWODwkJYdGiRZ7b3377Lf/617948sknGT9+PE6nk71799K0aVP8/PxwOByEhISwb98+0tLSaNWqFQCtW7cmNTUVp9OJ2+0mJCQEi8VCREQEqamp1/C0RUTkjzB9nsbdd9/N559/TqNGjYiLi2Pp0qVXHB8ZGUlWVpbndlhYGH369KFRo0YsXryYV155hfvvvx+Hw+EZExgYiNPpxOl0eqYHBgaSk5OD0+nEbrcXG3v48OFSc7tcLjIyMsw8xWuSl5dXLsu5Wt6Y60qZGjRoUAFpbl3X+rm4kT5bFe1my2WqNA4dOsSMGTPYtWsX7du3Z8mSJVe1kI4dOxIUFOT5e9q0aYSHh5Obm+sZk5ubi8PhwG63e6bn5uYSFBRUbNrF00vj7+9fLl9GGRkZXvml5425vDHTreha3wNvfB+9MRPcmLlKKhNTm6cKCgo4ffo0FosFp9OJj4+ph3k888wz7N27F4DU1FQaNmxIWFgYaWlpuFwucnJyyMzMJDQ0lGbNmrFt2zYAtm/fTvPmzbHb7dhsNg4dOoRhGOzYsYPw8PCryiAiItfO1JrGqFGjiI6OJjs7m759+zJhwoSrWsiUKVOYNm0aNpuNKlWqMG3aNOx2O7GxscTExGAYBqNGjcLf35/o6Gji4uKIjo7GZrMxf/58AKZOncqYMWMoKCggIiKCxo0bX/2zFRGRa2KqNI4dO8bHH3/M6dOnueOOOzyXfS1JtWrVSElJAaBhw4YkJydfMiYqKoqoqKhi0wICAli4cOElY5s0aeKZn4iIVAxT25mKvqyDg4NNFYaIiNycTK1puN1uunfvTq1atTz7M4o2G4mIyK2jxNJITEzk+eefZ8yYMRw/fpyqVauWVy4REfFCJW6e+uKLLwBo0aIF77zzDi1atPD8JyIit54SS8MwjMv+LSIit6YSS+Pind7aAS4iIiXu00hPT6dfv34YhsEPP/zg+dtisVz2EFoREbm5lVga69evL68cIiJyAyixNO67777yyiEiIjeAq/sRKRERuaWpNERExDSVhoiImKbSEBER01QaIiJimkpDRERMU2mIiIhpKg0RETFNpSEiIqapNERExDSVhoiImKbSEBER01QaIiJimkpDRERMU2mIiIhpZVYae/bsITY2FoCff/6Z6OhoYmJimDx5MoWFhQCkpKTQs2dPoqKi2Lp1KwB5eXkMHz6cmJgYBg8ezOnTpwHYvXs3ffr0oV+/frz88stlFVtEREpQJqWxdOlSEhIScLlcAMyaNYuRI0eycuVKDMNg8+bNZGdns3z5cpKTk1m2bBkLFizA7XaTlJREaGgoK1eupHv37iQmJgIwefJk5s+fT1JSEnv27CE9Pb0soouISAnKpDRCQkJYtGiR53Z6ejotWrQAoHXr1nz++efs3buXpk2b4ufnh8PhICQkhH379pGWlkarVq08Y1NTU3E6nbjdbkJCQrBYLERERJCamloW0UVEpAQlXu71j4qMjCQrK8tz2zAMLBYLAIGBgeTk5OB0OnE4HJ4xgYGBOJ3OYtMvHmu324uNPXz4cKk5XC4XGRkZ1+tpXVFeXl65LOdqeWOuK2Vq0KBBBaS5dV3r5+JG+mxVtJstV5mUxu/5+Pz/Ck1ubi5BQUHY7XZyc3OLTXc4HMWmlzQ2KCio1OX6+/uXy5dRRkaGV37peWMub8x0K7rW98Ab30dvzAQ3Zq6SyqRcjp564IEH2LlzJwDbt28nPDycsLAw0tLScLlc5OTkkJmZSWhoKM2aNWPbtm2esc2bN8dut2Oz2Th06BCGYbBjxw7Cw8PLI7qIiFykXNY04uLimDhxIgsWLKB27dpERkZitVqJjY0lJiYGwzAYNWoU/v7+REdHExcXR3R0NDabjfnz5wMwdepUxowZQ0FBARERETRu3Lg8oouIyEXKrDSqVatGSkoKALVq1WLFihWXjImKiiIqKqrYtICAABYuXHjJ2CZNmnjmJ2UjL7+ASjZrmS7DG1fTRcS8clnTkBtDJZuVmvEbKmTZP83uXCHLFZGrozPCRUTENJWGiIiYptIQERHTVBoiImKaSkNERExTaYjcYvLyC655Hn/00OnrsWypWDrkVuQWo0Or5VpoTUNERExTaYiIiGkqDRERMU2lISIipqk0RETENJWGiIiYptIQERHTVBoiImKaSkNERExTaYiIiGkqDRERMU2lISIipqk0RETENJWGiIiYptIQERHTVBoiImJauV6EqXv37jgcDgCqVavGkCFDiI+Px2KxUK9ePSZPnoyPjw8pKSkkJyfj6+vL0KFDadeuHXl5eYwdO5ZTp04RGBjInDlzCA4OLs/4IiK3vHIrDZfLBcDy5cs904YMGcLIkSN58MEHmTRpEps3b6ZJkyYsX76cNWvW4HK5iImJoWXLliQlJREaGsrw4cPZsGEDiYmJJCQklFd8ERGhHDdP7du3j/PnzzNo0CCeeuopdu/eTXp6Oi1atACgdevWfP755+zdu5emTZvi5+eHw+EgJCSEffv2kZaWRqtWrTxjU1NTyyu6iIj8n3Jb06hUqRLPPPMMffr04aeffmLw4MEYhoHFYgEgMDCQnJwcnE6nZxNW0XSn01lsetHY0rhcLjIyMsrmCV0kLy+vXJZzta42V4MGDcowjchvyur/lZvl/8Py8kdzlVtp1KpVixo1amCxWKhVqxaVK1cmPT3dc39ubi5BQUHY7XZyc3OLTXc4HMWmF40tjb+/f7l8EWZkZHjlF6635pJbW1l9Jr31834j5iqpTMpt89Tq1auZPXs2AMePH8fpdNKyZUt27twJwPbt2wkPDycsLIy0tDRcLhc5OTlkZmYSGhpKs2bN2LZtm2ds8+bNyyu6iIj8n3Jb0+jduzfjxo0jOjoai8XCzJkzueOOO5g4cSILFiygdu3aREZGYrVaiY2NJSYmBsMwGDVqFP7+/kRHRxMXF0d0dDQ2m4358+eXV3QREfk/5VYafn5+l/2iX7FixSXToqKiiIqKKjYtICCAhQsXllk+EREpnU7uExER01QaIiJimkpDRERMU2mIiIhpKg0RETFNpSEiIqapNERExDSVhoiImKbSEBER01QaIiJimkpDRERMU2mIiIhpKg0RETFNpSEiIqapNERExDSVhoiImKbS8EJ5+QXXZT7eeF1iubVdr8/25ZT0eS/L5d5qyu3KfWJeJZuVmvEbyn25P83uXO7LlFuLPts3Pq1piIiIaSoNERExTaUhIiKmqTRERMQ0lYaIiJim0hAREdNuqENuCwsLmTJlCvv378fPz4/p06dTo0aNio4lInLLuKHWNDZt2oTb7WbVqlWMHj2a2bNnV3QkEZFbyg1VGmlpabRq1QqAJk2a8O2335bp8syeRaozr0W8W0WdEX4znoluMQzDqOgQZk2YMIFHH32UNm3aANC2bVs2bdqEr+/lt7Lt3r0bf3//8owoInLDc7lcNGnS5LL33VD7NOx2O7m5uZ7bhYWFVywM4IpPWkRE/pgbavNUs2bN2L59O/DbWkRoaGgFJxIRubXcUJunio6eOnDgAIZhMHPmTOrUqVPRsUREbhk3VGmIiEjFuqE2T4mISMVSaYiIiGkqDRERMe2GOuS2IuXn5zN+/HiOHDmC2+1m6NChPPLII57733zzTVavXk1wcDAAU6dOpXbt2hWea+/evcyePRvDMLjrrruYO3duuZy7UlKu7OxsXnjhBc/YjIwMRo8eTXR0dIVlAli/fj1vvvkmPj4+9OrVi5iYmDLNYzbXunXrWLZsGQ6Hgx49etCnT59yyVVQUEBCQgIHDx7EarUya9YsQkJCPPdv2bKFV155BV9fX3r16kVUVJRX5AI4f/48AwcOZMaMGeV2sExpud5//33eeustrFYroaGhTJkyBR+fsv13e2mZPv74Y5YsWYLFYqFv377mPluGmLJ69Wpj+vTphmEYxunTp402bdoUu3/06NHGN99841W5CgsLjSeeeML46aefDMMwjJSUFCMzM7PCc13sq6++MmJjY40LFy5UeKaWLVsaZ86cMVwul9GhQwfj3//+d5lnKi3XqVOnjLZt2xpnzpwxCgoKjNjYWOPw4cPlkmvjxo1GfHy8YRiG8cUXXxhDhgzx3Od2uz2vkcvlMnr27GmcOHGiwnMZhmHs3bvX6NGjh/Hwww8bP/zwQ7lkKi3X+fPnjUceecQ4d+6cYRiGMWrUKGPTpk0VmunChQtGx44djbNnzxoXLlwwHn30UePUqVOlzlNrGiZ16tSJyMhIz22r1Vrs/vT0dJYsWUJ2djZt27blL3/5S4XnOnjwIJUrV+att97iwIEDtGnTplzWfkrLVcQwDKZNm8a8efMue395Z6pfvz45OTn4+vpiGAYWi6XMM5WWKysri/vvv5/KlSsD8Kc//Yk9e/ZQrVq1Ms/VoUMH2rZtC8DRo0epUqWK577MzExCQkK4/fbbAWjevDm7du3iscceq9BcAG63m1deeYW//e1vZZ7FbC4/Pz+Sk5MJCAgA4MKFC+Wyxl9SJqvVygcffICvry+nTp0CIDAwsNR5qjRMKnoxnU4nI0aMYOTIkcXu79y5MzExMdjtdoYNG8bWrVtp165dheY6c+YMX3/9NRMnTqRGjRoMGTKERo0a8dBDD1VoriJbtmyhXr165VZkpWWqV68evXr1IiAggI4dOxIUFFThuWrUqMEPP/zAyZMnCQwMJDU1lZo1a5ZLLgBfX1/i4uLYuHEjCxcu9Ex3Op04HA7P7cDAQJxOZ4Xngt8KrKJcKZePj4/nC3v58uWcO3eOli1bVmimovs++eQTXnzxRdq0aVPiL2x4lNFa0U3p6NGjRo8ePYx33nmn2PTCwkLj7NmzntsrVqwwXn755QrP9cMPPxhdunTx3H7zzTeNJUuWVHiuIiNGjDB27dpVbnlKypSRkWFERkZ6VtVHjRplfPDBBxWeyzAMY/PmzUa/fv2MkSNHGgkJCcbGjRvLLVeREydOGG3btjVyc3MNw/jt9Xr22Wc998+YMcP48MMPKzzXxfr371+um6cudrlcBQUFxuzZs42//OUvns1UFZ3p4mxjx441Vq9eXep8dPSUSSdPnmTQoEGMHTuW3r17F7vP6XTSpUsXcnNzMQyDnTt30qhRowrPVb16dXJzc/n5558B2LVrF/Xq1avwXEXS09Np1qxZueQpLZPD4aBSpUr4+/tjtVoJDg7m7NmzFZ7rwoUL7Nmzh7fffps5c+bw448/lttrtm7dOl577TUAAgICsFgsnk1nderU4eeff+bf//43brebXbt20bRp0wrPVZFKyzVp0iRcLheJiYmezVQVmcnpdNK/f3/cbjc+Pj4EBASY2jGvM8JNmj59Oh9++GGxTSl9+vTh/Pnz9O3bl3Xr1rF8+XL8/Px46KGHGDFihFfkSk1NZf78+RiGQdOmTUlISPCKXKdPn2bgwIG899575ZLHTKakpCTWrFmDzWYjJCSEadOm4efnV+G5Xn75ZTZt2oS/vz8DBw6kU6dOZZ4J4Ny5c4wbN46TJ09y4cIFBg8ezPnz5zl37hx9+/b1HD1lGAa9evXiySef9IpcRWJjY5kyZUq5HT1VUq5GjRrRq1cvwsPDPfvKnnrqKTp27Fhhmfr27cuqVatYvXo1vr6+1K9fn4kTJ5ZawCoNERExTZunRETENJWGiIiYptIQERHTVBoiImKaSkNE5Ca0Z88eYmNjSxyzdu1a+vTpQ8+ePXnllVdMzVdnhIuI3GSWLl3K+vXrSzwf5NChQyQlJXlOFVi4cCH5+fnYbLYS5601DRGRm0xISAiLFi3y3N6/fz+xsbHExsYyfPhwcnJy+Pzzz2nUqBFxcXH079+fZs2alVoYoDUNEZGbTmRkJFlZWZ7bEydOZObMmdStW5d33nmH119/nUqVKrFr1y6SkpJwuVxER0ezevXqUn9zTaUhInKTy8zMZOrUqcBv12+pVasWYWFhtGjRArvdjt1up06dOvz000+EhYWVOC+VhojITa5WrVrMmTOHe++9l7S0NLKzs6lVqxYrV67E5XJRUFDg+bn70qg0RERuclOmTCEuLo6CggIAZsyYQa1atejVqxfR0dEYhsHzzz/vuW5LSfTbUyIiYpqOnhIREdNUGiIiYppKQ0RETFNpiIiIaSoNERExTaUhIiKmqTRERMS0/wVf3ZYTm53/1wAAAABJRU5ErkJggg==\n",
+      "text/plain": [
+       "<Figure size 432x288 with 1 Axes>"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    }
+   ],
+   "source": [
+    "df.plot(kind='hist', title='Commissions Distribution')"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 54,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>Total_Commissions</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>count</th>\n",
+       "      <td>1.000000e+05</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>mean</th>\n",
+       "      <td>2.831683e+06</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>std</th>\n",
+       "      <td>9.654690e+04</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>min</th>\n",
+       "      <td>2.472750e+06</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>25%</th>\n",
+       "      <td>2.765750e+06</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>50%</th>\n",
+       "      <td>2.830250e+06</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>75%</th>\n",
+       "      <td>2.896500e+06</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>max</th>\n",
+       "      <td>3.278500e+06</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "       Total_Commissions\n",
+       "count       1.000000e+05\n",
+       "mean        2.831683e+06\n",
+       "std         9.654690e+04\n",
+       "min         2.472750e+06\n",
+       "25%         2.765750e+06\n",
+       "50%         2.830250e+06\n",
+       "75%         2.896500e+06\n",
+       "max         3.278500e+06"
+      ]
+     },
+     "execution_count": 54,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "df.describe()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.6.12"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
diff --git a/data/Example4.xlsx b/data/Example4.xlsx
new file mode 100644
index 0000000..307ffc3
Binary files /dev/null and b/data/Example4.xlsx differ
diff --git a/data/cereal_data.csv b/data/cereal_data.csv
new file mode 100644
index 0000000..6a907de
--- /dev/null
+++ b/data/cereal_data.csv
@@ -0,0 +1,78 @@
+name,mfr,type,calories,protein,fat,sodium,fiber,carbo,sugars,potass,vitamins,shelf,weight,cups,rating,cereal
+100% Bran,Nabisco,Cold,70,4,1,130,10.0,5.0,6,280,25,Top,1.0,0.33,68.4,1
+100% Natural Bran,Quaker Oats,Cold,120,3,5,15,2.0,8.0,8,135,0,Top,1.0,1.0,33.98,1
+All-Bran,Kellogs,Cold,70,4,1,260,9.0,7.0,5,320,25,Top,1.0,0.33,59.43,1
+All-Bran with Extra Fiber,Kellogs,Cold,50,4,0,140,14.0,8.0,0,330,25,Top,1.0,0.5,93.7,1
+Almond Delight,Ralston Purina,Cold,110,2,2,200,1.0,14.0,8,-1,25,Top,1.0,0.75,34.38,1
+Apple Cinnamon Cheerios,General Mills,Cold,110,2,2,180,1.5,10.5,10,70,25,Bottom,1.0,0.75,29.51,1
+Apple Jacks,Kellogs,Cold,110,2,0,125,1.0,11.0,14,30,25,Middle,1.0,1.0,33.17,1
+Basic 4,General Mills,Cold,130,3,2,210,2.0,18.0,8,100,25,Top,1.33,0.75,37.04,1
+Bran Chex,Ralston Purina,Cold,90,2,1,200,4.0,15.0,6,125,25,Bottom,1.0,0.67,49.12,1
+Bran Flakes,Post,Cold,90,3,0,210,5.0,13.0,5,190,25,Top,1.0,0.67,53.31,1
+Cap'n'Crunch,Quaker Oats,Cold,120,1,2,220,0.0,12.0,12,35,25,Middle,1.0,0.75,18.04,1
+Cheerios,General Mills,Cold,110,6,2,290,2.0,17.0,1,105,25,Bottom,1.0,1.25,50.76,1
+Cinnamon Toast Crunch,General Mills,Cold,120,1,3,210,0.0,13.0,9,45,25,Middle,1.0,0.75,19.82,1
+Clusters,General Mills,Cold,110,3,2,140,2.0,13.0,7,105,25,Top,1.0,0.5,40.4,1
+Cocoa Puffs,General Mills,Cold,110,1,1,180,0.0,12.0,13,55,25,Middle,1.0,1.0,22.74,1
+Corn Chex,Ralston Purina,Cold,110,2,0,280,0.0,22.0,3,25,25,Bottom,1.0,1.0,41.45,1
+Corn Flakes,Kellogs,Cold,100,2,0,290,1.0,21.0,2,35,25,Bottom,1.0,1.0,45.86,1
+Corn Pops,Kellogs,Cold,110,1,0,90,1.0,13.0,12,20,25,Middle,1.0,1.0,35.78,1
+Count Chocula,General Mills,Cold,110,1,1,180,0.0,12.0,13,65,25,Middle,1.0,1.0,22.4,1
+Cracklin' Oat Bran,Kellogs,Cold,110,3,3,140,4.0,10.0,7,160,25,Top,1.0,0.5,40.45,1
+Cream of Wheat (Quick),Nabisco,Hot,100,3,0,80,1.0,21.0,0,-1,0,Middle,1.0,1.0,64.53,1
+Crispix,Kellogs,Cold,110,2,0,220,1.0,21.0,3,30,25,Top,1.0,1.0,46.9,1
+Crispy Wheat & Raisins,General Mills,Cold,100,2,1,140,2.0,11.0,10,120,25,Top,1.0,0.75,36.18,1
+Double Chex,Ralston Purina,Cold,100,2,0,190,1.0,18.0,5,80,25,Top,1.0,0.75,44.33,1
+Froot Loops,Kellogs,Cold,110,2,1,125,1.0,11.0,13,30,25,Middle,1.0,1.0,32.21,1
+Frosted Flakes,Kellogs,Cold,110,1,0,200,1.0,14.0,11,25,25,Bottom,1.0,0.75,31.44,1
+Frosted Mini-Wheats,Kellogs,Cold,100,3,0,0,3.0,14.0,7,100,25,Middle,1.0,0.8,58.35,1
+"Fruit & Fibre Dates, Walnuts, and Oats",Post,Cold,120,3,2,160,5.0,12.0,10,200,25,Top,1.25,0.67,40.92,1
+Fruitful Bran,Kellogs,Cold,120,3,0,240,5.0,14.0,12,190,25,Top,1.33,0.67,41.02,1
+Fruity Pebbles,Post,Cold,110,1,1,135,0.0,13.0,12,25,25,Middle,1.0,0.75,28.03,1
+Golden Crisp,Post,Cold,100,2,0,45,0.0,11.0,15,40,25,Bottom,1.0,0.88,35.25,1
+Golden Grahams,General Mills,Cold,110,1,1,280,0.0,15.0,9,45,25,Middle,1.0,0.75,23.8,1
+Grape Nuts Flakes,Post,Cold,100,3,1,140,3.0,15.0,5,85,25,Top,1.0,0.88,52.08,1
+Grape-Nuts,Post,Cold,110,3,0,170,3.0,17.0,3,90,25,Top,1.0,0.25,53.37,1
+Great Grains Pecan,Post,Cold,120,3,3,75,3.0,13.0,4,100,25,Top,1.0,0.33,45.81,1
+Honey Graham Ohs,Quaker Oats,Cold,120,1,2,220,1.0,12.0,11,45,25,Middle,1.0,1.0,21.87,1
+Honey Nut Cheerios,General Mills,Cold,110,3,1,250,1.5,11.5,10,90,25,Bottom,1.0,0.75,31.07,1
+Honey-comb,Post,Cold,110,1,0,180,0.0,14.0,11,35,25,Bottom,1.0,1.33,28.74,1
+Just Right Crunchy  Nuggets,Kellogs,Cold,110,2,1,170,1.0,17.0,6,60,100,Top,1.0,1.0,36.52,1
+Just Right Fruit & Nut,Kellogs,Cold,140,3,1,170,2.0,20.0,9,95,100,Top,1.3,0.75,36.47,1
+Kix,General Mills,Cold,110,2,1,260,0.0,21.0,3,40,25,Middle,1.0,1.5,39.24,1
+Life,Quaker Oats,Cold,100,4,2,150,2.0,12.0,6,95,25,Middle,1.0,0.67,45.33,1
+Lucky Charms,General Mills,Cold,110,2,1,180,0.0,12.0,12,55,25,Middle,1.0,1.0,26.73,1
+Maypo,AM Home Food,Hot,100,4,1,0,0.0,16.0,3,95,25,Middle,1.0,1.0,54.85,1
+"Muesli Raisins, Dates, & Almonds",Ralston Purina,Cold,150,4,3,95,3.0,16.0,11,170,25,Top,1.0,1.0,37.14,1
+"Muesli Raisins, Peaches, & Pecans",Ralston Purina,Cold,150,4,3,150,3.0,16.0,11,170,25,Top,1.0,1.0,34.14,1
+Mueslix Crispy Blend,Kellogs,Cold,160,3,2,150,3.0,17.0,13,160,25,Top,1.5,0.67,30.31,1
+Multi-Grain Cheerios,General Mills,Cold,100,2,1,220,2.0,15.0,6,90,25,Bottom,1.0,1.0,40.11,1
+Nut&Honey Crunch,Kellogs,Cold,120,2,1,190,0.0,15.0,9,40,25,Middle,1.0,0.67,29.92,1
+Nutri-Grain Almond-Raisin,Kellogs,Cold,140,3,2,220,3.0,21.0,7,130,25,Top,1.33,0.67,40.69,1
+Nutri-grain Wheat,Kellogs,Cold,90,3,0,170,3.0,18.0,2,90,25,Top,1.0,1.0,59.64,1
+Oatmeal Raisin Crisp,General Mills,Cold,130,3,2,170,1.5,13.5,10,120,25,Top,1.25,0.5,30.45,1
+Post Nat. Raisin Bran,Post,Cold,120,3,1,200,6.0,11.0,14,260,25,Top,1.33,0.67,37.84,1
+Product 19,Kellogs,Cold,100,3,0,320,1.0,20.0,3,45,100,Top,1.0,1.0,41.5,1
+Puffed Rice,Quaker Oats,Cold,50,1,0,0,0.0,13.0,0,15,0,Top,0.5,1.0,60.76,1
+Puffed Wheat,Quaker Oats,Cold,50,2,0,0,1.0,10.0,0,50,0,Top,0.5,1.0,63.01,1
+Quaker Oat Squares,Quaker Oats,Cold,100,4,1,135,2.0,14.0,6,110,25,Top,1.0,0.5,49.51,1
+Quaker Oatmeal,Quaker Oats,Hot,100,5,2,0,2.7,-1.0,-1,110,0,Bottom,1.0,0.67,50.83,1
+Raisin Bran,Kellogs,Cold,120,3,1,210,5.0,14.0,12,240,25,Middle,1.33,0.75,39.26,1
+Raisin Nut Bran,General Mills,Cold,100,3,2,140,2.5,10.5,8,140,25,Top,1.0,0.5,39.7,1
+Raisin Squares,Kellogs,Cold,90,2,0,0,2.0,15.0,6,110,25,Top,1.0,0.5,55.33,1
+Rice Chex,Ralston Purina,Cold,110,1,0,240,0.0,23.0,2,30,25,Bottom,1.0,1.13,42.0,1
+Rice Krispies,Kellogs,Cold,110,2,0,290,0.0,22.0,3,35,25,Bottom,1.0,1.0,40.56,1
+Shredded Wheat,Nabisco,Cold,80,2,0,0,3.0,16.0,0,95,0,Bottom,0.83,1.0,68.24,1
+Shredded Wheat 'n'Bran,Nabisco,Cold,90,3,0,0,4.0,19.0,0,140,0,Bottom,1.0,0.67,74.47,1
+Shredded Wheat spoon size,Nabisco,Cold,90,3,0,0,3.0,20.0,0,120,0,Bottom,1.0,0.67,72.8,1
+Smacks,Kellogs,Cold,110,2,1,70,1.0,9.0,15,40,25,Middle,1.0,0.75,31.23,1
+Special K,Kellogs,Cold,110,6,0,230,1.0,16.0,3,55,25,Bottom,1.0,1.0,53.13,1
+Strawberry Fruit Wheats,Nabisco,Cold,90,2,0,15,3.0,15.0,5,90,25,Middle,1.0,1.0,59.36,1
+Total Corn Flakes,General Mills,Cold,110,2,1,200,0.0,21.0,3,35,100,Top,1.0,1.0,38.84,1
+Total Raisin Bran,General Mills,Cold,140,3,1,190,4.0,15.0,14,230,100,Top,1.5,1.0,28.59,1
+Total Whole Grain,General Mills,Cold,100,3,1,200,3.0,16.0,3,110,100,Top,1.0,1.0,46.66,1
+Triples,General Mills,Cold,110,2,1,250,0.0,21.0,3,60,25,Top,1.0,0.75,39.11,1
+Trix,General Mills,Cold,110,1,1,140,0.0,13.0,12,25,25,Middle,1.0,1.0,27.75,1
+Wheat Chex,Ralston Purina,Cold,100,3,1,230,3.0,17.0,3,115,25,Bottom,1.0,0.67,49.79,1
+Wheaties,General Mills,Cold,100,3,1,200,3.0,17.0,3,110,25,Bottom,1.0,1.0,51.59,1
+Wheaties Honey Gold,General Mills,Cold,110,2,1,200,1.0,16.0,8,60,25,Bottom,1.0,0.75,36.19,1
diff --git a/data/sales_9_2022.xlsx b/data/sales_9_2022.xlsx
new file mode 100644
index 0000000..9b6845c
Binary files /dev/null and b/data/sales_9_2022.xlsx differ
diff --git a/data/shipping_tables.xlsx b/data/shipping_tables.xlsx
new file mode 100644
index 0000000..aad8f1b
Binary files /dev/null and b/data/shipping_tables.xlsx differ
diff --git a/notebooks/Category-Encoding-Article.ipynb b/notebooks/Category-Encoding-Article.ipynb
index e54c5c8..25a601c 100644
--- a/notebooks/Category-Encoding-Article.ipynb
+++ b/notebooks/Category-Encoding-Article.ipynb
@@ -12,21 +12,23 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "Import the pandas, scikit-learn, numpy and [category_encoder](http://contrib.scikit-learn.org/categorical-encoding/) libraries."
+    "Import the pandas, scikit-learn, numpy and [category_encoder](https://github.com/scikit-learn-contrib/category_encoders) libraries."
    ]
   },
   {
    "cell_type": "code",
    "execution_count": 1,
-   "metadata": {
-    "collapsed": false
-   },
+   "metadata": {},
    "outputs": [],
    "source": [
     "import pandas as pd\n",
     "import numpy as np\n",
     "\n",
-    "from sklearn.preprocessing import LabelBinarizer, LabelEncoder\n",
+    "from sklearn.preprocessing import OrdinalEncoder, OneHotEncoder\n",
+    "from sklearn.compose import make_column_transformer\n",
+    "from sklearn.linear_model import LinearRegression\n",
+    "from sklearn.pipeline import make_pipeline\n",
+    "from sklearn.model_selection import cross_val_score\n",
     "\n",
     "import category_encoders as ce"
    ]
@@ -41,9 +43,7 @@
   {
    "cell_type": "code",
    "execution_count": 2,
-   "metadata": {
-    "collapsed": true
-   },
+   "metadata": {},
    "outputs": [],
    "source": [
     "headers = [\"symboling\", \"normalized_losses\", \"make\", \"fuel_type\", \"aspiration\", \"num_doors\", \"body_style\",\n",
@@ -62,26 +62,35 @@
   {
    "cell_type": "code",
    "execution_count": 3,
-   "metadata": {
-    "collapsed": true
-   },
+   "metadata": {},
    "outputs": [],
    "source": [
-    "df = pd.read_csv(\"http://mlr.cs.umass.edu/ml/machine-learning-databases/autos/imports-85.data\",\n",
+    "df = pd.read_csv(\"https://archive.ics.uci.edu/ml/machine-learning-databases/autos/imports-85.data\",\n",
     "                 header=None, names=headers, na_values=\"?\" )"
    ]
   },
   {
    "cell_type": "code",
    "execution_count": 4,
-   "metadata": {
-    "collapsed": false
-   },
+   "metadata": {},
    "outputs": [
     {
      "data": {
       "text/html": [
        "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
        "<table border=\"1\" class=\"dataframe\">\n",
        "  <thead>\n",
        "    <tr style=\"text-align: right;\">\n",
@@ -243,12 +252,12 @@
        "3          2              164.0         audi       gas        std      four   \n",
        "4          2              164.0         audi       gas        std      four   \n",
        "\n",
-       "    body_style drive_wheels engine_location  wheel_base   ...     engine_size  \\\n",
-       "0  convertible          rwd           front        88.6   ...             130   \n",
-       "1  convertible          rwd           front        88.6   ...             130   \n",
-       "2    hatchback          rwd           front        94.5   ...             152   \n",
-       "3        sedan          fwd           front        99.8   ...             109   \n",
-       "4        sedan          4wd           front        99.4   ...             136   \n",
+       "    body_style drive_wheels engine_location  wheel_base  ...  engine_size  \\\n",
+       "0  convertible          rwd           front        88.6  ...          130   \n",
+       "1  convertible          rwd           front        88.6  ...          130   \n",
+       "2    hatchback          rwd           front        94.5  ...          152   \n",
+       "3        sedan          fwd           front        99.8  ...          109   \n",
+       "4        sedan          4wd           front        99.4  ...          136   \n",
        "\n",
        "   fuel_system  bore  stroke compression_ratio horsepower  peak_rpm city_mpg  \\\n",
        "0         mpfi  3.47    2.68               9.0      111.0    5000.0       21   \n",
@@ -286,9 +295,7 @@
   {
    "cell_type": "code",
    "execution_count": 5,
-   "metadata": {
-    "collapsed": false
-   },
+   "metadata": {},
    "outputs": [
     {
      "data": {
@@ -341,9 +348,7 @@
   {
    "cell_type": "code",
    "execution_count": 6,
-   "metadata": {
-    "collapsed": true
-   },
+   "metadata": {},
    "outputs": [],
    "source": [
     "obj_df = df.select_dtypes(include=['object']).copy()"
@@ -352,14 +357,25 @@
   {
    "cell_type": "code",
    "execution_count": 7,
-   "metadata": {
-    "collapsed": false
-   },
+   "metadata": {},
    "outputs": [
     {
      "data": {
       "text/html": [
        "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
        "<table border=\"1\" class=\"dataframe\">\n",
        "  <thead>\n",
        "    <tr style=\"text-align: right;\">\n",
@@ -481,14 +497,25 @@
   {
    "cell_type": "code",
    "execution_count": 8,
-   "metadata": {
-    "collapsed": false
-   },
+   "metadata": {},
    "outputs": [
     {
      "data": {
       "text/html": [
        "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
        "<table border=\"1\" class=\"dataframe\">\n",
        "  <thead>\n",
        "    <tr style=\"text-align: right;\">\n",
@@ -565,9 +592,7 @@
   {
    "cell_type": "code",
    "execution_count": 9,
-   "metadata": {
-    "collapsed": false
-   },
+   "metadata": {},
    "outputs": [
     {
      "data": {
@@ -596,9 +621,7 @@
   {
    "cell_type": "code",
    "execution_count": 10,
-   "metadata": {
-    "collapsed": true
-   },
+   "metadata": {},
    "outputs": [],
    "source": [
     "obj_df = obj_df.fillna({\"num_doors\": \"four\"})"
@@ -607,14 +630,25 @@
   {
    "cell_type": "code",
    "execution_count": 11,
-   "metadata": {
-    "collapsed": false
-   },
+   "metadata": {},
    "outputs": [
     {
      "data": {
       "text/html": [
        "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
        "<table border=\"1\" class=\"dataframe\">\n",
        "  <thead>\n",
        "    <tr style=\"text-align: right;\">\n",
@@ -668,9 +702,7 @@
   {
    "cell_type": "code",
    "execution_count": 12,
-   "metadata": {
-    "collapsed": false
-   },
+   "metadata": {},
    "outputs": [
     {
      "data": {
@@ -680,8 +712,8 @@
        "five       11\n",
        "eight       5\n",
        "two         4\n",
-       "twelve      1\n",
        "three       1\n",
+       "twelve      1\n",
        "Name: num_cylinders, dtype: int64"
       ]
      },
@@ -697,9 +729,7 @@
   {
    "cell_type": "code",
    "execution_count": 13,
-   "metadata": {
-    "collapsed": true
-   },
+   "metadata": {},
    "outputs": [],
    "source": [
     "cleanup_nums = {\"num_doors\":     {\"four\": 4, \"two\": 2},\n",
@@ -710,25 +740,34 @@
   {
    "cell_type": "code",
    "execution_count": 14,
-   "metadata": {
-    "collapsed": true
-   },
+   "metadata": {},
    "outputs": [],
    "source": [
-    "obj_df.replace(cleanup_nums, inplace=True)"
+    "obj_df = obj_df.replace(cleanup_nums)"
    ]
   },
   {
    "cell_type": "code",
    "execution_count": 15,
-   "metadata": {
-    "collapsed": false
-   },
+   "metadata": {},
    "outputs": [
     {
      "data": {
       "text/html": [
        "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
        "<table border=\"1\" class=\"dataframe\">\n",
        "  <thead>\n",
        "    <tr style=\"text-align: right;\">\n",
@@ -850,9 +889,7 @@
   {
    "cell_type": "code",
    "execution_count": 16,
-   "metadata": {
-    "collapsed": false
-   },
+   "metadata": {},
    "outputs": [
     {
      "data": {
@@ -889,9 +926,7 @@
   {
    "cell_type": "code",
    "execution_count": 17,
-   "metadata": {
-    "collapsed": false
-   },
+   "metadata": {},
    "outputs": [
     {
      "data": {
@@ -916,9 +951,7 @@
   {
    "cell_type": "code",
    "execution_count": 18,
-   "metadata": {
-    "collapsed": true
-   },
+   "metadata": {},
    "outputs": [],
    "source": [
     "obj_df[\"body_style\"] = obj_df[\"body_style\"].astype('category')"
@@ -927,9 +960,7 @@
   {
    "cell_type": "code",
    "execution_count": 19,
-   "metadata": {
-    "collapsed": false
-   },
+   "metadata": {},
    "outputs": [
     {
      "data": {
@@ -966,9 +997,7 @@
   {
    "cell_type": "code",
    "execution_count": 20,
-   "metadata": {
-    "collapsed": false
-   },
+   "metadata": {},
    "outputs": [],
    "source": [
     "obj_df[\"body_style_cat\"] = obj_df[\"body_style\"].cat.codes"
@@ -977,14 +1006,25 @@
   {
    "cell_type": "code",
    "execution_count": 21,
-   "metadata": {
-    "collapsed": false
-   },
+   "metadata": {},
    "outputs": [
     {
      "data": {
       "text/html": [
        "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
        "<table border=\"1\" class=\"dataframe\">\n",
        "  <thead>\n",
        "    <tr style=\"text-align: right;\">\n",
@@ -1105,9 +1145,7 @@
   {
    "cell_type": "code",
    "execution_count": 22,
-   "metadata": {
-    "collapsed": false
-   },
+   "metadata": {},
    "outputs": [
     {
      "data": {
@@ -1145,14 +1183,25 @@
   {
    "cell_type": "code",
    "execution_count": 23,
-   "metadata": {
-    "collapsed": false
-   },
+   "metadata": {},
    "outputs": [
     {
      "data": {
       "text/html": [
        "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
        "<table border=\"1\" class=\"dataframe\">\n",
        "  <thead>\n",
        "    <tr style=\"text-align: right;\">\n",
@@ -1185,9 +1234,9 @@
        "      <td>4</td>\n",
        "      <td>mpfi</td>\n",
        "      <td>0</td>\n",
-       "      <td>0.0</td>\n",
-       "      <td>0.0</td>\n",
-       "      <td>1.0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>1</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>1</th>\n",
@@ -1201,9 +1250,9 @@
        "      <td>4</td>\n",
        "      <td>mpfi</td>\n",
        "      <td>0</td>\n",
-       "      <td>0.0</td>\n",
-       "      <td>0.0</td>\n",
-       "      <td>1.0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>1</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>2</th>\n",
@@ -1217,9 +1266,9 @@
        "      <td>6</td>\n",
        "      <td>mpfi</td>\n",
        "      <td>2</td>\n",
-       "      <td>0.0</td>\n",
-       "      <td>0.0</td>\n",
-       "      <td>1.0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>1</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>3</th>\n",
@@ -1233,9 +1282,9 @@
        "      <td>4</td>\n",
        "      <td>mpfi</td>\n",
        "      <td>3</td>\n",
-       "      <td>0.0</td>\n",
-       "      <td>1.0</td>\n",
-       "      <td>0.0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>1</td>\n",
+       "      <td>0</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>4</th>\n",
@@ -1249,9 +1298,9 @@
        "      <td>5</td>\n",
        "      <td>mpfi</td>\n",
        "      <td>3</td>\n",
-       "      <td>1.0</td>\n",
-       "      <td>0.0</td>\n",
-       "      <td>0.0</td>\n",
+       "      <td>1</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
        "    </tr>\n",
        "  </tbody>\n",
        "</table>\n",
@@ -1266,18 +1315,18 @@
        "4         audi       gas        std          4        sedan           front   \n",
        "\n",
        "  engine_type  num_cylinders fuel_system  body_style_cat  drive_wheels_4wd  \\\n",
-       "0        dohc              4        mpfi               0               0.0   \n",
-       "1        dohc              4        mpfi               0               0.0   \n",
-       "2        ohcv              6        mpfi               2               0.0   \n",
-       "3         ohc              4        mpfi               3               0.0   \n",
-       "4         ohc              5        mpfi               3               1.0   \n",
+       "0        dohc              4        mpfi               0                 0   \n",
+       "1        dohc              4        mpfi               0                 0   \n",
+       "2        ohcv              6        mpfi               2                 0   \n",
+       "3         ohc              4        mpfi               3                 0   \n",
+       "4         ohc              5        mpfi               3                 1   \n",
        "\n",
        "   drive_wheels_fwd  drive_wheels_rwd  \n",
-       "0               0.0               1.0  \n",
-       "1               0.0               1.0  \n",
-       "2               0.0               1.0  \n",
-       "3               1.0               0.0  \n",
-       "4               0.0               0.0  "
+       "0                 0                 1  \n",
+       "1                 0                 1  \n",
+       "2                 0                 1  \n",
+       "3                 1                 0  \n",
+       "4                 0                 0  "
       ]
      },
      "execution_count": 23,
@@ -1299,14 +1348,25 @@
   {
    "cell_type": "code",
    "execution_count": 24,
-   "metadata": {
-    "collapsed": false
-   },
+   "metadata": {},
    "outputs": [
     {
      "data": {
       "text/html": [
        "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
        "<table border=\"1\" class=\"dataframe\">\n",
        "  <thead>\n",
        "    <tr style=\"text-align: right;\">\n",
@@ -1342,14 +1402,14 @@
        "      <td>4</td>\n",
        "      <td>mpfi</td>\n",
        "      <td>0</td>\n",
-       "      <td>1.0</td>\n",
-       "      <td>0.0</td>\n",
-       "      <td>0.0</td>\n",
-       "      <td>0.0</td>\n",
-       "      <td>0.0</td>\n",
-       "      <td>0.0</td>\n",
-       "      <td>0.0</td>\n",
-       "      <td>1.0</td>\n",
+       "      <td>1</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>1</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>1</th>\n",
@@ -1362,14 +1422,14 @@
        "      <td>4</td>\n",
        "      <td>mpfi</td>\n",
        "      <td>0</td>\n",
-       "      <td>1.0</td>\n",
-       "      <td>0.0</td>\n",
-       "      <td>0.0</td>\n",
-       "      <td>0.0</td>\n",
-       "      <td>0.0</td>\n",
-       "      <td>0.0</td>\n",
-       "      <td>0.0</td>\n",
-       "      <td>1.0</td>\n",
+       "      <td>1</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>1</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>2</th>\n",
@@ -1382,14 +1442,14 @@
        "      <td>6</td>\n",
        "      <td>mpfi</td>\n",
        "      <td>2</td>\n",
-       "      <td>0.0</td>\n",
-       "      <td>0.0</td>\n",
-       "      <td>1.0</td>\n",
-       "      <td>0.0</td>\n",
-       "      <td>0.0</td>\n",
-       "      <td>0.0</td>\n",
-       "      <td>0.0</td>\n",
-       "      <td>1.0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>1</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>1</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>3</th>\n",
@@ -1402,14 +1462,14 @@
        "      <td>4</td>\n",
        "      <td>mpfi</td>\n",
        "      <td>3</td>\n",
-       "      <td>0.0</td>\n",
-       "      <td>0.0</td>\n",
-       "      <td>0.0</td>\n",
-       "      <td>1.0</td>\n",
-       "      <td>0.0</td>\n",
-       "      <td>0.0</td>\n",
-       "      <td>1.0</td>\n",
-       "      <td>0.0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>1</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>1</td>\n",
+       "      <td>0</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>4</th>\n",
@@ -1422,14 +1482,14 @@
        "      <td>5</td>\n",
        "      <td>mpfi</td>\n",
        "      <td>3</td>\n",
-       "      <td>0.0</td>\n",
-       "      <td>0.0</td>\n",
-       "      <td>0.0</td>\n",
-       "      <td>1.0</td>\n",
-       "      <td>0.0</td>\n",
-       "      <td>1.0</td>\n",
-       "      <td>0.0</td>\n",
-       "      <td>0.0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>1</td>\n",
+       "      <td>0</td>\n",
+       "      <td>1</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
        "    </tr>\n",
        "  </tbody>\n",
        "</table>\n",
@@ -1444,18 +1504,18 @@
        "4         audi       gas        std          4           front         ohc   \n",
        "\n",
        "   num_cylinders fuel_system  body_style_cat  body_convertible  body_hardtop  \\\n",
-       "0              4        mpfi               0               1.0           0.0   \n",
-       "1              4        mpfi               0               1.0           0.0   \n",
-       "2              6        mpfi               2               0.0           0.0   \n",
-       "3              4        mpfi               3               0.0           0.0   \n",
-       "4              5        mpfi               3               0.0           0.0   \n",
+       "0              4        mpfi               0                 1             0   \n",
+       "1              4        mpfi               0                 1             0   \n",
+       "2              6        mpfi               2                 0             0   \n",
+       "3              4        mpfi               3                 0             0   \n",
+       "4              5        mpfi               3                 0             0   \n",
        "\n",
        "   body_hatchback  body_sedan  body_wagon  drive_4wd  drive_fwd  drive_rwd  \n",
-       "0             0.0         0.0         0.0        0.0        0.0        1.0  \n",
-       "1             0.0         0.0         0.0        0.0        0.0        1.0  \n",
-       "2             1.0         0.0         0.0        0.0        0.0        1.0  \n",
-       "3             0.0         1.0         0.0        0.0        1.0        0.0  \n",
-       "4             0.0         1.0         0.0        1.0        0.0        0.0  "
+       "0               0           0           0          0          0          1  \n",
+       "1               0           0           0          0          0          1  \n",
+       "2               1           0           0          0          0          1  \n",
+       "3               0           1           0          0          1          0  \n",
+       "4               0           1           0          1          0          0  "
       ]
      },
      "execution_count": 24,
@@ -1478,9 +1538,7 @@
   {
    "cell_type": "code",
    "execution_count": 25,
-   "metadata": {
-    "collapsed": false
-   },
+   "metadata": {},
    "outputs": [
     {
      "data": {
@@ -1488,8 +1546,8 @@
        "ohc      148\n",
        "ohcf      15\n",
        "ohcv      13\n",
-       "dohc      12\n",
        "l         12\n",
+       "dohc      12\n",
        "rotor      4\n",
        "dohcv      1\n",
        "Name: engine_type, dtype: int64"
@@ -1514,9 +1572,7 @@
   {
    "cell_type": "code",
    "execution_count": 26,
-   "metadata": {
-    "collapsed": false
-   },
+   "metadata": {},
    "outputs": [],
    "source": [
     "obj_df[\"OHC_Code\"] = np.where(obj_df[\"engine_type\"].str.contains(\"ohc\"), 1, 0)"
@@ -1525,14 +1581,25 @@
   {
    "cell_type": "code",
    "execution_count": 27,
-   "metadata": {
-    "collapsed": false
-   },
+   "metadata": {},
    "outputs": [
     {
      "data": {
       "text/html": [
        "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
        "<table border=\"1\" class=\"dataframe\">\n",
        "  <thead>\n",
        "    <tr style=\"text-align: right;\">\n",
@@ -1717,36 +1784,43 @@
   {
    "cell_type": "code",
    "execution_count": 28,
-   "metadata": {
-    "collapsed": true
-   },
+   "metadata": {},
    "outputs": [],
    "source": [
-    "lb_make = LabelEncoder()"
+    "ord_enc = OrdinalEncoder()"
    ]
   },
   {
    "cell_type": "code",
    "execution_count": 29,
-   "metadata": {
-    "collapsed": true
-   },
+   "metadata": {},
    "outputs": [],
    "source": [
-    "obj_df[\"make_code\"] = lb_make.fit_transform(obj_df[\"make\"])"
+    "obj_df[\"make_code\"] = ord_enc.fit_transform(obj_df[[\"make\"]])"
    ]
   },
   {
    "cell_type": "code",
    "execution_count": 30,
-   "metadata": {
-    "collapsed": false
-   },
+   "metadata": {},
    "outputs": [
     {
      "data": {
       "text/html": [
        "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
        "<table border=\"1\" class=\"dataframe\">\n",
        "  <thead>\n",
        "    <tr style=\"text-align: right;\">\n",
@@ -1759,57 +1833,57 @@
        "    <tr>\n",
        "      <th>0</th>\n",
        "      <td>alfa-romero</td>\n",
-       "      <td>0</td>\n",
+       "      <td>0.0</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>1</th>\n",
        "      <td>alfa-romero</td>\n",
-       "      <td>0</td>\n",
+       "      <td>0.0</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>2</th>\n",
        "      <td>alfa-romero</td>\n",
-       "      <td>0</td>\n",
+       "      <td>0.0</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>3</th>\n",
        "      <td>audi</td>\n",
-       "      <td>1</td>\n",
+       "      <td>1.0</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>4</th>\n",
        "      <td>audi</td>\n",
-       "      <td>1</td>\n",
+       "      <td>1.0</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>5</th>\n",
        "      <td>audi</td>\n",
-       "      <td>1</td>\n",
+       "      <td>1.0</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>6</th>\n",
        "      <td>audi</td>\n",
-       "      <td>1</td>\n",
+       "      <td>1.0</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>7</th>\n",
        "      <td>audi</td>\n",
-       "      <td>1</td>\n",
+       "      <td>1.0</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>8</th>\n",
        "      <td>audi</td>\n",
-       "      <td>1</td>\n",
+       "      <td>1.0</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>9</th>\n",
        "      <td>audi</td>\n",
-       "      <td>1</td>\n",
+       "      <td>1.0</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>10</th>\n",
        "      <td>bmw</td>\n",
-       "      <td>2</td>\n",
+       "      <td>2.0</td>\n",
        "    </tr>\n",
        "  </tbody>\n",
        "</table>\n",
@@ -1817,17 +1891,17 @@
       ],
       "text/plain": [
        "           make  make_code\n",
-       "0   alfa-romero          0\n",
-       "1   alfa-romero          0\n",
-       "2   alfa-romero          0\n",
-       "3          audi          1\n",
-       "4          audi          1\n",
-       "5          audi          1\n",
-       "6          audi          1\n",
-       "7          audi          1\n",
-       "8          audi          1\n",
-       "9          audi          1\n",
-       "10          bmw          2"
+       "0   alfa-romero        0.0\n",
+       "1   alfa-romero        0.0\n",
+       "2   alfa-romero        0.0\n",
+       "3          audi        1.0\n",
+       "4          audi        1.0\n",
+       "5          audi        1.0\n",
+       "6          audi        1.0\n",
+       "7          audi        1.0\n",
+       "8          audi        1.0\n",
+       "9          audi        1.0\n",
+       "10          bmw        2.0"
       ]
      },
      "execution_count": 30,
@@ -1849,13 +1923,11 @@
   {
    "cell_type": "code",
    "execution_count": 31,
-   "metadata": {
-    "collapsed": false
-   },
+   "metadata": {},
    "outputs": [],
    "source": [
-    "lb_style = LabelBinarizer()\n",
-    "lb_results = lb_style.fit_transform(obj_df[\"body_style\"])"
+    "oe_style = OneHotEncoder()\n",
+    "oe_results = oe_style.fit_transform(obj_df[[\"body_style\"]])"
    ]
   },
   {
@@ -1868,20 +1940,18 @@
   {
    "cell_type": "code",
    "execution_count": 32,
-   "metadata": {
-    "collapsed": false
-   },
+   "metadata": {},
    "outputs": [
     {
      "data": {
       "text/plain": [
-       "array([[1, 0, 0, 0, 0],\n",
-       "       [1, 0, 0, 0, 0],\n",
-       "       [0, 0, 1, 0, 0],\n",
-       "       ..., \n",
-       "       [0, 0, 0, 1, 0],\n",
-       "       [0, 0, 0, 1, 0],\n",
-       "       [0, 0, 0, 1, 0]])"
+       "array([[1., 0., 0., 0., 0.],\n",
+       "       [1., 0., 0., 0., 0.],\n",
+       "       [0., 0., 1., 0., 0.],\n",
+       "       ...,\n",
+       "       [0., 0., 0., 1., 0.],\n",
+       "       [0., 0., 0., 1., 0.],\n",
+       "       [0., 0., 0., 1., 0.]])"
       ]
      },
      "execution_count": 32,
@@ -1890,23 +1960,34 @@
     }
    ],
    "source": [
-    "lb_results"
+    "oe_results.toarray()"
    ]
   },
   {
    "cell_type": "code",
    "execution_count": 33,
-   "metadata": {
-    "collapsed": false
-   },
+   "metadata": {},
    "outputs": [
     {
      "data": {
       "text/html": [
        "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead tr th {\n",
+       "        text-align: left;\n",
+       "    }\n",
+       "</style>\n",
        "<table border=\"1\" class=\"dataframe\">\n",
        "  <thead>\n",
-       "    <tr style=\"text-align: right;\">\n",
+       "    <tr>\n",
        "      <th></th>\n",
        "      <th>convertible</th>\n",
        "      <th>hardtop</th>\n",
@@ -1918,55 +1999,55 @@
        "  <tbody>\n",
        "    <tr>\n",
        "      <th>0</th>\n",
-       "      <td>1</td>\n",
-       "      <td>0</td>\n",
-       "      <td>0</td>\n",
-       "      <td>0</td>\n",
-       "      <td>0</td>\n",
+       "      <td>1.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>1</th>\n",
-       "      <td>1</td>\n",
-       "      <td>0</td>\n",
-       "      <td>0</td>\n",
-       "      <td>0</td>\n",
-       "      <td>0</td>\n",
+       "      <td>1.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>2</th>\n",
-       "      <td>0</td>\n",
-       "      <td>0</td>\n",
-       "      <td>1</td>\n",
-       "      <td>0</td>\n",
-       "      <td>0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>1.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>3</th>\n",
-       "      <td>0</td>\n",
-       "      <td>0</td>\n",
-       "      <td>0</td>\n",
-       "      <td>1</td>\n",
-       "      <td>0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>1.0</td>\n",
+       "      <td>0.0</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>4</th>\n",
-       "      <td>0</td>\n",
-       "      <td>0</td>\n",
-       "      <td>0</td>\n",
-       "      <td>1</td>\n",
-       "      <td>0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>1.0</td>\n",
+       "      <td>0.0</td>\n",
        "    </tr>\n",
        "  </tbody>\n",
        "</table>\n",
        "</div>"
       ],
       "text/plain": [
-       "   convertible  hardtop  hatchback  sedan  wagon\n",
-       "0            1        0          0      0      0\n",
-       "1            1        0          0      0      0\n",
-       "2            0        0          1      0      0\n",
-       "3            0        0          0      1      0\n",
-       "4            0        0          0      1      0"
+       "  convertible hardtop hatchback sedan wagon\n",
+       "0         1.0     0.0       0.0   0.0   0.0\n",
+       "1         1.0     0.0       0.0   0.0   0.0\n",
+       "2         0.0     0.0       1.0   0.0   0.0\n",
+       "3         0.0     0.0       0.0   1.0   0.0\n",
+       "4         0.0     0.0       0.0   1.0   0.0"
       ]
      },
      "execution_count": 33,
@@ -1975,7 +2056,7 @@
     }
    ],
    "source": [
-    "pd.DataFrame(lb_results, columns=lb_style.classes_).head()"
+    "pd.DataFrame(oe_results.toarray(), columns=oe_style.categories_).head()"
    ]
   },
   {
@@ -1983,15 +2064,13 @@
    "metadata": {},
    "source": [
     "### Advanced Encoding\n",
-    "[category_encoder](http://contrib.scikit-learn.org/categorical-encoding/) library"
+    "[category_encoder](https://github.com/scikit-learn-contrib/category_encoders) library"
    ]
   },
   {
    "cell_type": "code",
    "execution_count": 34,
-   "metadata": {
-    "collapsed": true
-   },
+   "metadata": {},
    "outputs": [],
    "source": [
     "# Get a new clean dataframe\n",
@@ -2001,14 +2080,25 @@
   {
    "cell_type": "code",
    "execution_count": 35,
-   "metadata": {
-    "collapsed": false
-   },
+   "metadata": {},
    "outputs": [
     {
      "data": {
       "text/html": [
        "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
        "<table border=\"1\" class=\"dataframe\">\n",
        "  <thead>\n",
        "    <tr style=\"text-align: right;\">\n",
@@ -2130,15 +2220,42 @@
   {
    "cell_type": "code",
    "execution_count": 36,
-   "metadata": {
-    "collapsed": false
-   },
+   "metadata": {},
    "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "/home/chris/miniconda3/envs/pbpcode/lib/python3.8/site-packages/category_encoders/utils.py:21: FutureWarning: is_categorical is deprecated and will be removed in a future version.  Use is_categorical_dtype instead\n",
+      "  elif pd.api.types.is_categorical(cols):\n"
+     ]
+    },
     {
      "data": {
       "text/plain": [
-       "BackwardDifferenceEncoder(cols=['engine_type'], drop_invariant=False,\n",
-       "             return_df=True, verbose=0)"
+       "BackwardDifferenceEncoder(cols=['engine_type'],\n",
+       "                          mapping=[{'col': 'engine_type',\n",
+       "                                    'mapping':     engine_type_0  engine_type_1  engine_type_2  engine_type_3  engine_type_4  \\\n",
+       " 1      -0.857143      -0.714286      -0.571429      -0.428571      -0.285714   \n",
+       " 2       0.142857      -0.714286      -0.571429      -0.428571      -0.285714   \n",
+       " 3       0.142857       0.285714      -0.571429      -0.428571      -0.285714   \n",
+       " 4       0.142857       0.285714       0.428571      -0.428571      -0.285714   \n",
+       " 5       0.142857       0.285714       0.428571       0.571429      -0.285714   \n",
+       " 6       0.142857       0.285714       0.428571       0.571429       0.714286   \n",
+       " 7       0.142857       0.285714       0.428571       0.571429       0.714286   \n",
+       "-1       0.000000       0.000000       0.000000       0.000000       0.000000   \n",
+       "-2       0.000000       0.000000       0.000000       0.000000       0.000000   \n",
+       "\n",
+       "    engine_type_5  \n",
+       " 1      -0.142857  \n",
+       " 2      -0.142857  \n",
+       " 3      -0.142857  \n",
+       " 4      -0.142857  \n",
+       " 5      -0.142857  \n",
+       " 6      -0.142857  \n",
+       " 7       0.857143  \n",
+       "-1       0.000000  \n",
+       "-2       0.000000  }])"
       ]
      },
      "execution_count": 36,
@@ -2147,70 +2264,85 @@
     }
    ],
    "source": [
-    "encoder = ce.backward_difference.BackwardDifferenceEncoder(cols=[\"engine_type\"])\n",
+    "# Specify the columns to encode then fit and transform\n",
+    "encoder = ce.BackwardDifferenceEncoder(cols=[\"engine_type\"])\n",
     "encoder.fit(obj_df, verbose=1)"
    ]
   },
   {
    "cell_type": "code",
    "execution_count": 37,
-   "metadata": {
-    "collapsed": false
-   },
+   "metadata": {},
    "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "/home/chris/miniconda3/envs/pbpcode/lib/python3.8/site-packages/category_encoders/utils.py:21: FutureWarning: is_categorical is deprecated and will be removed in a future version.  Use is_categorical_dtype instead\n",
+      "  elif pd.api.types.is_categorical(cols):\n"
+     ]
+    },
     {
      "data": {
       "text/html": [
        "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
        "<table border=\"1\" class=\"dataframe\">\n",
        "  <thead>\n",
        "    <tr style=\"text-align: right;\">\n",
        "      <th></th>\n",
-       "      <th>col_engine_type_0</th>\n",
-       "      <th>col_engine_type_1</th>\n",
-       "      <th>col_engine_type_2</th>\n",
-       "      <th>col_engine_type_3</th>\n",
-       "      <th>col_engine_type_4</th>\n",
-       "      <th>col_engine_type_5</th>\n",
-       "      <th>col_engine_type_6</th>\n",
+       "      <th>engine_type_0</th>\n",
+       "      <th>engine_type_1</th>\n",
+       "      <th>engine_type_2</th>\n",
+       "      <th>engine_type_3</th>\n",
+       "      <th>engine_type_4</th>\n",
+       "      <th>engine_type_5</th>\n",
        "    </tr>\n",
        "  </thead>\n",
        "  <tbody>\n",
        "    <tr>\n",
        "      <th>0</th>\n",
-       "      <td>1.0</td>\n",
-       "      <td>0.142857</td>\n",
-       "      <td>0.285714</td>\n",
-       "      <td>0.428571</td>\n",
-       "      <td>0.571429</td>\n",
-       "      <td>0.714286</td>\n",
+       "      <td>-0.857143</td>\n",
+       "      <td>-0.714286</td>\n",
+       "      <td>-0.571429</td>\n",
+       "      <td>-0.428571</td>\n",
+       "      <td>-0.285714</td>\n",
        "      <td>-0.142857</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>1</th>\n",
-       "      <td>1.0</td>\n",
-       "      <td>0.142857</td>\n",
-       "      <td>0.285714</td>\n",
-       "      <td>0.428571</td>\n",
-       "      <td>0.571429</td>\n",
-       "      <td>0.714286</td>\n",
+       "      <td>-0.857143</td>\n",
+       "      <td>-0.714286</td>\n",
+       "      <td>-0.571429</td>\n",
+       "      <td>-0.428571</td>\n",
+       "      <td>-0.285714</td>\n",
        "      <td>-0.142857</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>2</th>\n",
-       "      <td>1.0</td>\n",
        "      <td>0.142857</td>\n",
-       "      <td>0.285714</td>\n",
-       "      <td>0.428571</td>\n",
-       "      <td>0.571429</td>\n",
-       "      <td>0.714286</td>\n",
-       "      <td>0.857143</td>\n",
+       "      <td>-0.714286</td>\n",
+       "      <td>-0.571429</td>\n",
+       "      <td>-0.428571</td>\n",
+       "      <td>-0.285714</td>\n",
+       "      <td>-0.142857</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>3</th>\n",
-       "      <td>1.0</td>\n",
        "      <td>0.142857</td>\n",
-       "      <td>-0.714286</td>\n",
+       "      <td>0.285714</td>\n",
        "      <td>-0.571429</td>\n",
        "      <td>-0.428571</td>\n",
        "      <td>-0.285714</td>\n",
@@ -2218,9 +2350,8 @@
        "    </tr>\n",
        "    <tr>\n",
        "      <th>4</th>\n",
-       "      <td>1.0</td>\n",
        "      <td>0.142857</td>\n",
-       "      <td>-0.714286</td>\n",
+       "      <td>0.285714</td>\n",
        "      <td>-0.571429</td>\n",
        "      <td>-0.428571</td>\n",
        "      <td>-0.285714</td>\n",
@@ -2231,19 +2362,19 @@
        "</div>"
       ],
       "text/plain": [
-       "   col_engine_type_0  col_engine_type_1  col_engine_type_2  col_engine_type_3  \\\n",
-       "0                1.0           0.142857           0.285714           0.428571   \n",
-       "1                1.0           0.142857           0.285714           0.428571   \n",
-       "2                1.0           0.142857           0.285714           0.428571   \n",
-       "3                1.0           0.142857          -0.714286          -0.571429   \n",
-       "4                1.0           0.142857          -0.714286          -0.571429   \n",
+       "   engine_type_0  engine_type_1  engine_type_2  engine_type_3  engine_type_4  \\\n",
+       "0      -0.857143      -0.714286      -0.571429      -0.428571      -0.285714   \n",
+       "1      -0.857143      -0.714286      -0.571429      -0.428571      -0.285714   \n",
+       "2       0.142857      -0.714286      -0.571429      -0.428571      -0.285714   \n",
+       "3       0.142857       0.285714      -0.571429      -0.428571      -0.285714   \n",
+       "4       0.142857       0.285714      -0.571429      -0.428571      -0.285714   \n",
        "\n",
-       "   col_engine_type_4  col_engine_type_5  col_engine_type_6  \n",
-       "0           0.571429           0.714286          -0.142857  \n",
-       "1           0.571429           0.714286          -0.142857  \n",
-       "2           0.571429           0.714286           0.857143  \n",
-       "3          -0.428571          -0.285714          -0.142857  \n",
-       "4          -0.428571          -0.285714          -0.142857  "
+       "   engine_type_5  \n",
+       "0      -0.142857  \n",
+       "1      -0.142857  \n",
+       "2      -0.142857  \n",
+       "3      -0.142857  \n",
+       "4      -0.142857  "
       ]
      },
      "execution_count": 37,
@@ -2252,7 +2383,7 @@
     }
    ],
    "source": [
-    "encoder.transform(obj_df).iloc[:,0:7].head()"
+    "encoder.fit_transform(obj_df).iloc[:,8:14].head()"
    ]
   },
   {
@@ -2265,137 +2396,217 @@
   {
    "cell_type": "code",
    "execution_count": 38,
-   "metadata": {
-    "collapsed": false
-   },
+   "metadata": {},
    "outputs": [
     {
-     "data": {
-      "text/plain": [
-       "PolynomialEncoder(cols=['engine_type'], drop_invariant=False, return_df=True,\n",
-       "         verbose=0)"
-      ]
-     },
-     "execution_count": 38,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "encoder = ce.polynomial.PolynomialEncoder(cols=[\"engine_type\"])\n",
-    "encoder.fit(obj_df, verbose=1)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 39,
-   "metadata": {
-    "collapsed": false
-   },
-   "outputs": [
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "/home/chris/miniconda3/envs/pbpcode/lib/python3.8/site-packages/category_encoders/utils.py:21: FutureWarning: is_categorical is deprecated and will be removed in a future version.  Use is_categorical_dtype instead\n",
+      "  elif pd.api.types.is_categorical(cols):\n"
+     ]
+    },
     {
      "data": {
       "text/html": [
        "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
        "<table border=\"1\" class=\"dataframe\">\n",
        "  <thead>\n",
        "    <tr style=\"text-align: right;\">\n",
        "      <th></th>\n",
-       "      <th>col_engine_type_0</th>\n",
-       "      <th>col_engine_type_1</th>\n",
-       "      <th>col_engine_type_2</th>\n",
-       "      <th>col_engine_type_3</th>\n",
-       "      <th>col_engine_type_4</th>\n",
-       "      <th>col_engine_type_5</th>\n",
-       "      <th>col_engine_type_6</th>\n",
+       "      <th>engine_type_0</th>\n",
+       "      <th>engine_type_1</th>\n",
+       "      <th>engine_type_2</th>\n",
+       "      <th>engine_type_3</th>\n",
+       "      <th>engine_type_4</th>\n",
+       "      <th>engine_type_5</th>\n",
        "    </tr>\n",
        "  </thead>\n",
        "  <tbody>\n",
        "    <tr>\n",
        "      <th>0</th>\n",
-       "      <td>1.0</td>\n",
-       "      <td>-5.669467e-01</td>\n",
-       "      <td>5.455447e-01</td>\n",
-       "      <td>-4.082483e-01</td>\n",
+       "      <td>-0.566947</td>\n",
+       "      <td>0.545545</td>\n",
+       "      <td>-0.408248</td>\n",
        "      <td>0.241747</td>\n",
-       "      <td>-1.091089e-01</td>\n",
+       "      <td>-0.109109</td>\n",
        "      <td>0.032898</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>1</th>\n",
-       "      <td>1.0</td>\n",
-       "      <td>-5.669467e-01</td>\n",
-       "      <td>5.455447e-01</td>\n",
-       "      <td>-4.082483e-01</td>\n",
+       "      <td>-0.566947</td>\n",
+       "      <td>0.545545</td>\n",
+       "      <td>-0.408248</td>\n",
        "      <td>0.241747</td>\n",
-       "      <td>-1.091089e-01</td>\n",
+       "      <td>-0.109109</td>\n",
        "      <td>0.032898</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>2</th>\n",
-       "      <td>1.0</td>\n",
-       "      <td>3.779645e-01</td>\n",
-       "      <td>3.970680e-17</td>\n",
-       "      <td>-4.082483e-01</td>\n",
+       "      <td>-0.377964</td>\n",
+       "      <td>0.000000</td>\n",
+       "      <td>0.408248</td>\n",
        "      <td>-0.564076</td>\n",
-       "      <td>-4.364358e-01</td>\n",
+       "      <td>0.436436</td>\n",
        "      <td>-0.197386</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>3</th>\n",
-       "      <td>1.0</td>\n",
-       "      <td>1.347755e-17</td>\n",
-       "      <td>-4.364358e-01</td>\n",
-       "      <td>1.528598e-17</td>\n",
-       "      <td>0.483494</td>\n",
-       "      <td>8.990141e-18</td>\n",
-       "      <td>-0.657952</td>\n",
+       "      <td>-0.188982</td>\n",
+       "      <td>-0.327327</td>\n",
+       "      <td>0.408248</td>\n",
+       "      <td>0.080582</td>\n",
+       "      <td>-0.545545</td>\n",
+       "      <td>0.493464</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>4</th>\n",
-       "      <td>1.0</td>\n",
-       "      <td>1.347755e-17</td>\n",
-       "      <td>-4.364358e-01</td>\n",
-       "      <td>1.528598e-17</td>\n",
-       "      <td>0.483494</td>\n",
-       "      <td>8.990141e-18</td>\n",
-       "      <td>-0.657952</td>\n",
+       "      <td>-0.188982</td>\n",
+       "      <td>-0.327327</td>\n",
+       "      <td>0.408248</td>\n",
+       "      <td>0.080582</td>\n",
+       "      <td>-0.545545</td>\n",
+       "      <td>0.493464</td>\n",
        "    </tr>\n",
        "  </tbody>\n",
        "</table>\n",
        "</div>"
       ],
       "text/plain": [
-       "   col_engine_type_0  col_engine_type_1  col_engine_type_2  col_engine_type_3  \\\n",
-       "0                1.0      -5.669467e-01       5.455447e-01      -4.082483e-01   \n",
-       "1                1.0      -5.669467e-01       5.455447e-01      -4.082483e-01   \n",
-       "2                1.0       3.779645e-01       3.970680e-17      -4.082483e-01   \n",
-       "3                1.0       1.347755e-17      -4.364358e-01       1.528598e-17   \n",
-       "4                1.0       1.347755e-17      -4.364358e-01       1.528598e-17   \n",
+       "   engine_type_0  engine_type_1  engine_type_2  engine_type_3  engine_type_4  \\\n",
+       "0      -0.566947       0.545545      -0.408248       0.241747      -0.109109   \n",
+       "1      -0.566947       0.545545      -0.408248       0.241747      -0.109109   \n",
+       "2      -0.377964       0.000000       0.408248      -0.564076       0.436436   \n",
+       "3      -0.188982      -0.327327       0.408248       0.080582      -0.545545   \n",
+       "4      -0.188982      -0.327327       0.408248       0.080582      -0.545545   \n",
        "\n",
-       "   col_engine_type_4  col_engine_type_5  col_engine_type_6  \n",
-       "0           0.241747      -1.091089e-01           0.032898  \n",
-       "1           0.241747      -1.091089e-01           0.032898  \n",
-       "2          -0.564076      -4.364358e-01          -0.197386  \n",
-       "3           0.483494       8.990141e-18          -0.657952  \n",
-       "4           0.483494       8.990141e-18          -0.657952  "
+       "   engine_type_5  \n",
+       "0       0.032898  \n",
+       "1       0.032898  \n",
+       "2      -0.197386  \n",
+       "3       0.493464  \n",
+       "4       0.493464  "
+      ]
+     },
+     "execution_count": 38,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "encoder = ce.polynomial.PolynomialEncoder(cols=[\"engine_type\"])\n",
+    "encoder.fit_transform(obj_df, verbose=1).iloc[:,8:14].head()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Scikit-learn pipeline\n",
+    "Show an example of how to incorporate the encoding strategies into a scikit-learn pipeline"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 39,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# for the purposes of this analysis, only use a small subset of features\n",
+    "feature_cols = [\n",
+    "    'fuel_type', 'make', 'aspiration', 'highway_mpg', 'city_mpg',\n",
+    "    'curb_weight', 'drive_wheels'\n",
+    "]\n",
+    "\n",
+    "# Remove the empty price rows\n",
+    "df_ml = df.dropna(subset=['price'])\n",
+    "\n",
+    "X = df_ml[feature_cols]\n",
+    "y = df_ml['price']"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 40,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "column_trans = make_column_transformer((OneHotEncoder(handle_unknown='ignore'),\n",
+    "                                        ['fuel_type', 'make', 'drive_wheels']),\n",
+    "                                      (OrdinalEncoder(), ['aspiration']),\n",
+    "                                      remainder='passthrough')"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 41,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "linreg = LinearRegression()\n",
+    "pipe = make_pipeline(column_trans, linreg)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 42,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "array([-4476.0937653 , -1014.54842052, -4227.68553953, -4936.79899194,\n",
+       "       -1591.8291911 , -3716.06617255, -4293.79197464, -1390.00486495,\n",
+       "       -1600.57946369, -2124.30041954])"
       ]
      },
-     "execution_count": 39,
+     "execution_count": 42,
      "metadata": {},
      "output_type": "execute_result"
     }
    ],
    "source": [
-    "encoder.transform(obj_df).iloc[:,0:7].head()"
+    "cross_val_score(pipe, X, y, cv=10, scoring='neg_mean_absolute_error')"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 43,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "-2937.17"
+      ]
+     },
+     "execution_count": 43,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "# Get the average of the errors after 10 iterations\n",
+    "cross_val_score(pipe, X, y, cv=10, scoring='neg_mean_absolute_error').mean().round(2)"
    ]
   },
   {
    "cell_type": "code",
    "execution_count": null,
-   "metadata": {
-    "collapsed": true
-   },
+   "metadata": {},
    "outputs": [],
    "source": []
   }
@@ -2416,9 +2627,9 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.5.2"
+   "version": "3.8.5"
   }
  },
  "nbformat": 4,
- "nbformat_minor": 0
+ "nbformat_minor": 1
 }
diff --git a/notebooks/Common-Excel-Part-2.ipynb b/notebooks/Common-Excel-Part-2.ipynb
index 8d3c24d..e6a5c34 100644
--- a/notebooks/Common-Excel-Part-2.ipynb
+++ b/notebooks/Common-Excel-Part-2.ipynb
@@ -55,7 +55,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "df = pd.read_excel(\"../data/sample-salesv3.xlsx\")"
+    "df = pd.read_excel('https://github.com/chris1610/pbpython/blob/master/data/sample-salesv3.xlsx?raw=true')"
    ]
   },
   {
@@ -117,18 +117,18 @@
      "data": {
       "text/html": [
        "<div>\n",
-       "<style>\n",
-       "    .dataframe thead tr:only-child th {\n",
-       "        text-align: right;\n",
-       "    }\n",
-       "\n",
-       "    .dataframe thead th {\n",
-       "        text-align: left;\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
        "    }\n",
        "\n",
        "    .dataframe tbody tr th {\n",
        "        vertical-align: top;\n",
        "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
        "</style>\n",
        "<table border=\"1\" class=\"dataframe\">\n",
        "  <thead>\n",
@@ -284,18 +284,18 @@
      "data": {
       "text/html": [
        "<div>\n",
-       "<style>\n",
-       "    .dataframe thead tr:only-child th {\n",
-       "        text-align: right;\n",
-       "    }\n",
-       "\n",
-       "    .dataframe thead th {\n",
-       "        text-align: left;\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
        "    }\n",
        "\n",
        "    .dataframe tbody tr th {\n",
        "        vertical-align: top;\n",
        "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
        "</style>\n",
        "<table border=\"1\" class=\"dataframe\">\n",
        "  <thead>\n",
@@ -406,18 +406,18 @@
      "data": {
       "text/html": [
        "<div>\n",
-       "<style>\n",
-       "    .dataframe thead tr:only-child th {\n",
-       "        text-align: right;\n",
-       "    }\n",
-       "\n",
-       "    .dataframe thead th {\n",
-       "        text-align: left;\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
        "    }\n",
        "\n",
        "    .dataframe tbody tr th {\n",
        "        vertical-align: top;\n",
        "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
        "</style>\n",
        "<table border=\"1\" class=\"dataframe\">\n",
        "  <thead>\n",
@@ -528,18 +528,18 @@
      "data": {
       "text/html": [
        "<div>\n",
-       "<style>\n",
-       "    .dataframe thead tr:only-child th {\n",
-       "        text-align: right;\n",
-       "    }\n",
-       "\n",
-       "    .dataframe thead th {\n",
-       "        text-align: left;\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
        "    }\n",
        "\n",
        "    .dataframe tbody tr th {\n",
        "        vertical-align: top;\n",
        "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
        "</style>\n",
        "<table border=\"1\" class=\"dataframe\">\n",
        "  <thead>\n",
@@ -650,18 +650,18 @@
      "data": {
       "text/html": [
        "<div>\n",
-       "<style>\n",
-       "    .dataframe thead tr:only-child th {\n",
-       "        text-align: right;\n",
-       "    }\n",
-       "\n",
-       "    .dataframe thead th {\n",
-       "        text-align: left;\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
        "    }\n",
        "\n",
        "    .dataframe tbody tr th {\n",
        "        vertical-align: top;\n",
        "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
        "</style>\n",
        "<table border=\"1\" class=\"dataframe\">\n",
        "  <thead>\n",
@@ -774,18 +774,18 @@
      "data": {
       "text/html": [
        "<div>\n",
-       "<style>\n",
-       "    .dataframe thead tr:only-child th {\n",
-       "        text-align: right;\n",
-       "    }\n",
-       "\n",
-       "    .dataframe thead th {\n",
-       "        text-align: left;\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
        "    }\n",
        "\n",
        "    .dataframe tbody tr th {\n",
        "        vertical-align: top;\n",
        "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
        "</style>\n",
        "<table border=\"1\" class=\"dataframe\">\n",
        "  <thead>\n",
@@ -898,18 +898,18 @@
      "data": {
       "text/html": [
        "<div>\n",
-       "<style>\n",
-       "    .dataframe thead tr:only-child th {\n",
-       "        text-align: right;\n",
-       "    }\n",
-       "\n",
-       "    .dataframe thead th {\n",
-       "        text-align: left;\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
        "    }\n",
        "\n",
        "    .dataframe tbody tr th {\n",
        "        vertical-align: top;\n",
        "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
        "</style>\n",
        "<table border=\"1\" class=\"dataframe\">\n",
        "  <thead>\n",
@@ -1034,18 +1034,18 @@
      "data": {
       "text/html": [
        "<div>\n",
-       "<style>\n",
-       "    .dataframe thead tr:only-child th {\n",
-       "        text-align: right;\n",
-       "    }\n",
-       "\n",
-       "    .dataframe thead th {\n",
-       "        text-align: left;\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
        "    }\n",
        "\n",
        "    .dataframe tbody tr th {\n",
        "        vertical-align: top;\n",
        "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
        "</style>\n",
        "<table border=\"1\" class=\"dataframe\">\n",
        "  <thead>\n",
@@ -1157,18 +1157,18 @@
      "data": {
       "text/html": [
        "<div>\n",
-       "<style>\n",
-       "    .dataframe thead tr:only-child th {\n",
-       "        text-align: right;\n",
-       "    }\n",
-       "\n",
-       "    .dataframe thead th {\n",
-       "        text-align: left;\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
        "    }\n",
        "\n",
        "    .dataframe tbody tr th {\n",
        "        vertical-align: top;\n",
        "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
        "</style>\n",
        "<table border=\"1\" class=\"dataframe\">\n",
        "  <thead>\n",
@@ -1279,18 +1279,18 @@
      "data": {
       "text/html": [
        "<div>\n",
-       "<style>\n",
-       "    .dataframe thead tr:only-child th {\n",
-       "        text-align: right;\n",
-       "    }\n",
-       "\n",
-       "    .dataframe thead th {\n",
-       "        text-align: left;\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
        "    }\n",
        "\n",
        "    .dataframe tbody tr th {\n",
        "        vertical-align: top;\n",
        "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
        "</style>\n",
        "<table border=\"1\" class=\"dataframe\">\n",
        "  <thead>\n",
@@ -1401,18 +1401,18 @@
      "data": {
       "text/html": [
        "<div>\n",
-       "<style>\n",
-       "    .dataframe thead tr:only-child th {\n",
-       "        text-align: right;\n",
-       "    }\n",
-       "\n",
-       "    .dataframe thead th {\n",
-       "        text-align: left;\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
        "    }\n",
        "\n",
        "    .dataframe tbody tr th {\n",
        "        vertical-align: top;\n",
        "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
        "</style>\n",
        "<table border=\"1\" class=\"dataframe\">\n",
        "  <thead>\n",
@@ -1523,18 +1523,18 @@
      "data": {
       "text/html": [
        "<div>\n",
-       "<style>\n",
-       "    .dataframe thead tr:only-child th {\n",
-       "        text-align: right;\n",
-       "    }\n",
-       "\n",
-       "    .dataframe thead th {\n",
-       "        text-align: left;\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
        "    }\n",
        "\n",
        "    .dataframe tbody tr th {\n",
        "        vertical-align: top;\n",
        "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
        "</style>\n",
        "<table border=\"1\" class=\"dataframe\">\n",
        "  <thead>\n",
@@ -1638,18 +1638,18 @@
      "data": {
       "text/html": [
        "<div>\n",
-       "<style>\n",
-       "    .dataframe thead tr:only-child th {\n",
-       "        text-align: right;\n",
-       "    }\n",
-       "\n",
-       "    .dataframe thead th {\n",
-       "        text-align: left;\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
        "    }\n",
        "\n",
        "    .dataframe tbody tr th {\n",
        "        vertical-align: top;\n",
        "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
        "</style>\n",
        "<table border=\"1\" class=\"dataframe\">\n",
        "  <thead>\n",
@@ -1762,18 +1762,18 @@
      "data": {
       "text/html": [
        "<div>\n",
-       "<style>\n",
-       "    .dataframe thead tr:only-child th {\n",
-       "        text-align: right;\n",
-       "    }\n",
-       "\n",
-       "    .dataframe thead th {\n",
-       "        text-align: left;\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
        "    }\n",
        "\n",
        "    .dataframe tbody tr th {\n",
        "        vertical-align: top;\n",
        "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
        "</style>\n",
        "<table border=\"1\" class=\"dataframe\">\n",
        "  <thead>\n",
@@ -1890,18 +1890,18 @@
      "data": {
       "text/html": [
        "<div>\n",
-       "<style>\n",
-       "    .dataframe thead tr:only-child th {\n",
-       "        text-align: right;\n",
-       "    }\n",
-       "\n",
-       "    .dataframe thead th {\n",
-       "        text-align: left;\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
        "    }\n",
        "\n",
        "    .dataframe tbody tr th {\n",
        "        vertical-align: top;\n",
        "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
        "</style>\n",
        "<table border=\"1\" class=\"dataframe\">\n",
        "  <thead>\n",
@@ -2017,18 +2017,18 @@
      "data": {
       "text/html": [
        "<div>\n",
-       "<style>\n",
-       "    .dataframe thead tr:only-child th {\n",
-       "        text-align: right;\n",
-       "    }\n",
-       "\n",
-       "    .dataframe thead th {\n",
-       "        text-align: left;\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
        "    }\n",
        "\n",
        "    .dataframe tbody tr th {\n",
        "        vertical-align: top;\n",
        "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
        "</style>\n",
        "<table border=\"1\" class=\"dataframe\">\n",
        "  <thead>\n",
@@ -2137,18 +2137,18 @@
      "data": {
       "text/html": [
        "<div>\n",
-       "<style>\n",
-       "    .dataframe thead tr:only-child th {\n",
-       "        text-align: right;\n",
-       "    }\n",
-       "\n",
-       "    .dataframe thead th {\n",
-       "        text-align: left;\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
        "    }\n",
        "\n",
        "    .dataframe tbody tr th {\n",
        "        vertical-align: top;\n",
        "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
        "</style>\n",
        "<table border=\"1\" class=\"dataframe\">\n",
        "  <thead>\n",
@@ -2257,18 +2257,18 @@
      "data": {
       "text/html": [
        "<div>\n",
-       "<style>\n",
-       "    .dataframe thead tr:only-child th {\n",
-       "        text-align: right;\n",
-       "    }\n",
-       "\n",
-       "    .dataframe thead th {\n",
-       "        text-align: left;\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
        "    }\n",
        "\n",
        "    .dataframe tbody tr th {\n",
        "        vertical-align: top;\n",
        "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
        "</style>\n",
        "<table border=\"1\" class=\"dataframe\">\n",
        "  <thead>\n",
@@ -2377,18 +2377,18 @@
      "data": {
       "text/html": [
        "<div>\n",
-       "<style>\n",
-       "    .dataframe thead tr:only-child th {\n",
-       "        text-align: right;\n",
-       "    }\n",
-       "\n",
-       "    .dataframe thead th {\n",
-       "        text-align: left;\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
        "    }\n",
        "\n",
        "    .dataframe tbody tr th {\n",
        "        vertical-align: top;\n",
        "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
        "</style>\n",
        "<table border=\"1\" class=\"dataframe\">\n",
        "  <thead>\n",
@@ -2511,18 +2511,18 @@
      "data": {
       "text/html": [
        "<div>\n",
-       "<style>\n",
-       "    .dataframe thead tr:only-child th {\n",
-       "        text-align: right;\n",
-       "    }\n",
-       "\n",
-       "    .dataframe thead th {\n",
-       "        text-align: left;\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
        "    }\n",
        "\n",
        "    .dataframe tbody tr th {\n",
        "        vertical-align: top;\n",
        "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
        "</style>\n",
        "<table border=\"1\" class=\"dataframe\">\n",
        "  <thead>\n",
@@ -2640,18 +2640,18 @@
      "data": {
       "text/html": [
        "<div>\n",
-       "<style>\n",
-       "    .dataframe thead tr:only-child th {\n",
-       "        text-align: right;\n",
-       "    }\n",
-       "\n",
-       "    .dataframe thead th {\n",
-       "        text-align: left;\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
        "    }\n",
        "\n",
        "    .dataframe tbody tr th {\n",
        "        vertical-align: top;\n",
        "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
        "</style>\n",
        "<table border=\"1\" class=\"dataframe\">\n",
        "  <thead>\n",
@@ -2826,18 +2826,18 @@
      "data": {
       "text/html": [
        "<div>\n",
-       "<style>\n",
-       "    .dataframe thead tr:only-child th {\n",
-       "        text-align: right;\n",
-       "    }\n",
-       "\n",
-       "    .dataframe thead th {\n",
-       "        text-align: left;\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
        "    }\n",
        "\n",
        "    .dataframe tbody tr th {\n",
        "        vertical-align: top;\n",
        "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
        "</style>\n",
        "<table border=\"1\" class=\"dataframe\">\n",
        "  <thead>\n",
@@ -2948,18 +2948,18 @@
      "data": {
       "text/html": [
        "<div>\n",
-       "<style>\n",
-       "    .dataframe thead tr:only-child th {\n",
-       "        text-align: right;\n",
-       "    }\n",
-       "\n",
-       "    .dataframe thead th {\n",
-       "        text-align: left;\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
        "    }\n",
        "\n",
        "    .dataframe tbody tr th {\n",
        "        vertical-align: top;\n",
        "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
        "</style>\n",
        "<table border=\"1\" class=\"dataframe\">\n",
        "  <thead>\n",
@@ -3124,7 +3124,7 @@
  ],
  "metadata": {
   "kernelspec": {
-   "display_name": "Python [default]",
+   "display_name": "Python 3",
    "language": "python",
    "name": "python3"
   },
@@ -3138,7 +3138,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.5.2"
+   "version": "3.8.5"
   }
  },
  "nbformat": 4,
diff --git a/notebooks/case_study_weather/1-dwd_konverter_download.ipynb b/notebooks/case_study_weather/1-dwd_konverter_download.ipynb
new file mode 100644
index 0000000..e3659cf
--- /dev/null
+++ b/notebooks/case_study_weather/1-dwd_konverter_download.ipynb
@@ -0,0 +1,109 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Import temperature data from the DWD and process it\n",
+    "\n",
+    "This notebook pulls historical temperature data from the DWD server and formats it for future use in other projects. The data is delivered in a hourly frequencs in a .zip file for each of the available weather stations. To use the data, we need everythin in a single .csv-file, all stations side-by-side. Also, we need the daily average.\n",
+    "\n",
+    "To reduce computing time, we also crop all data earlier than 2007. \n",
+    "\n",
+    "Files should be executed in the following pipeline:\n",
+    "* 1-dwd_konverter_download\n",
+    "* 2-dwd_konverter_extract\n",
+    "* 3-dwd_konverter_build_df\n",
+    "* 4-dwd_konverter_final_processing"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## 1.) Download files from the DWD-API\n",
+    "Here we download all relevant files from the DWS Server. The DWD Server is http-based, so we scrape the download page for all links that match 'stundenwerte_TU_.\\*_hist.zip' and download them to the folder 'download'. \n",
+    "\n",
+    "Link to the relevant DWD-page: https://opendata.dwd.de/climate_environment/CDC/observations_germany/climate/hourly/air_temperature/historical/"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Done\n"
+     ]
+    }
+   ],
+   "source": [
+    "import requests\n",
+    "import re\n",
+    "from bs4 import BeautifulSoup\n",
+    "from pathlib import Path\n",
+    "\n",
+    "# Set base values\n",
+    "download_folder = Path.cwd() / 'download'\n",
+    "base_url = 'https://opendata.dwd.de/climate_environment/CDC/observations_germany/climate/hourly/air_temperature/historical/'\n",
+    "\n",
+    "\n",
+    "# Initiate Session and get the Index-Page\n",
+    "with requests.Session() as s:\n",
+    "    resp = s.get(base_url)\n",
+    "\n",
+    "# Parse the Index-Page for all relevant <a href> \n",
+    "soup = BeautifulSoup(resp.content)\n",
+    "links = soup.findAll(\"a\", href=re.compile(\"stundenwerte_TU_.*_hist.zip\"))\n",
+    "\n",
+    "# For testing, only download 10 files\n",
+    "file_max = 10\n",
+    "dl_count = 0\n",
+    "\n",
+    "#Download the .zip files to the download_folder\n",
+    "for link in links:\n",
+    "    zip_response = requests.get(base_url + link['href'], stream=True)\n",
+    "    # Limit the downloads while testing\n",
+    "    dl_count += 1\n",
+    "    if dl_count > file_max:\n",
+    "        break\n",
+    "    with open(Path(download_folder) / link['href'], 'wb') as file:\n",
+    "        for chunk in zip_response.iter_content(chunk_size=128):\n",
+    "            file.write(chunk)  \n",
+    "    \n",
+    "print('Done')"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.8.5"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 4
+}
diff --git a/notebooks/case_study_weather/2-dwd_konverter_extract.ipynb b/notebooks/case_study_weather/2-dwd_konverter_extract.ipynb
new file mode 100644
index 0000000..ac8d1c4
--- /dev/null
+++ b/notebooks/case_study_weather/2-dwd_konverter_extract.ipynb
@@ -0,0 +1,98 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Import temperature data from the DWD and process it\n",
+    "\n",
+    "This notebook pulls historical temperature data from the DWD server and formats it for future use in other projects. The data is delivered in a hourly frequencs in a .zip file for each of the available weather stations. To use the data, we need everythin in a single .csv-file, all stations side-by-side. Also, we need the daily average.\n",
+    "\n",
+    "To reduce computing time, we also crop all data earlier than 2007. \n",
+    "\n",
+    "Files should be executed in the following pipeline:\n",
+    "* 1-dwd_konverter_download\n",
+    "* 2-dwd_konverter_extract\n",
+    "* 3-dwd_konverter_build_df\n",
+    "* 4-dwd_konverter_final_processing"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## 2.) Extract all .zip-archives\n",
+    "In this next step, we extract a single file from all the downloaded .zip files and save them to the 'import' folder. Beware, there is going to be a lot of data (~6 GB of .csv files)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "'Done'"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    }
+   ],
+   "source": [
+    "from pathlib import Path\n",
+    "import glob\n",
+    "import re\n",
+    "from zipfile import ZipFile\n",
+    "\n",
+    "# Folder definitions\n",
+    "download_folder = Path.cwd() / 'download'\n",
+    "import_folder = Path.cwd() / 'import'\n",
+    "\n",
+    "# Find all .zip files and generate a list\n",
+    "unzip_files = glob.glob('download/stundenwerte_TU_*_hist.zip')\n",
+    "\n",
+    "# Set the name pattern of the file we need\n",
+    "regex_name = re.compile('produkt.*')\n",
+    "\n",
+    "# Open all files, look for files that match ne regex pattern, extract to 'import'\n",
+    "for file in unzip_files:\n",
+    "    with ZipFile(file, 'r') as zipObj:\n",
+    "        list_of_filenames = zipObj.namelist()\n",
+    "        extract_filename = list(filter(regex_name.match, list_of_filenames))[0]\n",
+    "        zipObj.extract(extract_filename, import_folder)\n",
+    "\n",
+    "display('Done')"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.8.5"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 4
+}
diff --git a/notebooks/case_study_weather/3-dwd_konverter_build_df.ipynb b/notebooks/case_study_weather/3-dwd_konverter_build_df.ipynb
new file mode 100644
index 0000000..accf54e
--- /dev/null
+++ b/notebooks/case_study_weather/3-dwd_konverter_build_df.ipynb
@@ -0,0 +1,488 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Import temperature data from the DWD and process it\n",
+    "\n",
+    "This notebook pulls historical temperature data from the DWD server and formats it for future use in other projects. The data is delivered in a hourly frequencs in a .zip file for each of the available weather stations. To use the data, we need everythin in a single .csv-file, all stations side-by-side. Also, we need the daily average.\n",
+    "\n",
+    "To reduce computing time, we also crop all data earlier than 2007. \n",
+    "\n",
+    "Files should be executed in the following pipeline:\n",
+    "* 1-dwd_konverter_download\n",
+    "* 2-dwd_konverter_extract\n",
+    "* 3-dwd_konverter_build_df\n",
+    "* 4-dwd_konverter_final_processing"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## 3.) Import the .csv files into pandas and concat into a single df\n",
+    "Now we need to import everything that we have extracted. This operation is going to take some time (aprox 20 mins). If you want to save time, you can just delete a few of the .csv-files in the 'import' folder. The script works as well with only a few files. \n",
+    "\n",
+    "### Process individual files\n",
+    "The files are imported into a single df, stripped of unnecessary columns and filtered by date. Then we set a DateTimeIndex and concatenate them into the main_df. Because the loop takes a long time, we output some status messages, to ensure the process is still running. \n",
+    "### Process the concatenated main_df\n",
+    "Then we display some infos of the main_df so we can ensure that there are no errors, mainly to ensure all data-types are recognized correctly. Also, we drop duplicate entries, in case some of the .csv files were copied.\n",
+    "### Unstack and export\n",
+    "For the final step, we unstack the main_df and save it to a .csv and a .pkl file for the next step. Also, we display some output to get a grasp of what is going on. "
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "metadata": {
+    "scrolled": false
+   },
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "'Finished file: import/produkt_tu_stunde_20041101_20191231_00078.txt'"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "text/plain": [
+       "'This is file 10'"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "text/plain": [
+       "'Shape of the main_df is: (771356, 1)'"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "text/plain": [
+       "float    771356\n",
+       "Name: TT_TU, dtype: int64"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "text/plain": [
+       "'Shape of the main_df is: (113952, 9)'"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead tr th {\n",
+       "        text-align: left;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead tr:last-of-type th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr>\n",
+       "      <th></th>\n",
+       "      <th colspan=\"9\" halign=\"left\">TT_TU</th>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>STATIONS_ID</th>\n",
+       "      <th>3</th>\n",
+       "      <th>44</th>\n",
+       "      <th>71</th>\n",
+       "      <th>73</th>\n",
+       "      <th>78</th>\n",
+       "      <th>91</th>\n",
+       "      <th>96</th>\n",
+       "      <th>102</th>\n",
+       "      <th>125</th>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>MESS_DATUM</th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>2007-01-01 00:00:00</th>\n",
+       "      <td>11.4</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>11.0</td>\n",
+       "      <td>9.4</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>9.7</td>\n",
+       "      <td>NaN</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2007-01-01 01:00:00</th>\n",
+       "      <td>12.0</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>11.4</td>\n",
+       "      <td>9.6</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>10.4</td>\n",
+       "      <td>NaN</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2007-01-01 02:00:00</th>\n",
+       "      <td>12.3</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>9.4</td>\n",
+       "      <td>10.0</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>9.9</td>\n",
+       "      <td>NaN</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2007-01-01 03:00:00</th>\n",
+       "      <td>11.5</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>9.3</td>\n",
+       "      <td>9.7</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>9.5</td>\n",
+       "      <td>NaN</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2007-01-01 04:00:00</th>\n",
+       "      <td>9.6</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>8.6</td>\n",
+       "      <td>10.2</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>8.9</td>\n",
+       "      <td>NaN</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "                    TT_TU                                      \n",
+       "STATIONS_ID           3   44  71  73    78    91  96    102 125\n",
+       "MESS_DATUM                                                     \n",
+       "2007-01-01 00:00:00  11.4 NaN NaN NaN  11.0   9.4 NaN   9.7 NaN\n",
+       "2007-01-01 01:00:00  12.0 NaN NaN NaN  11.4   9.6 NaN  10.4 NaN\n",
+       "2007-01-01 02:00:00  12.3 NaN NaN NaN   9.4  10.0 NaN   9.9 NaN\n",
+       "2007-01-01 03:00:00  11.5 NaN NaN NaN   9.3   9.7 NaN   9.5 NaN\n",
+       "2007-01-01 04:00:00   9.6 NaN NaN NaN   8.6  10.2 NaN   8.9 NaN"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead tr th {\n",
+       "        text-align: left;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr>\n",
+       "      <th></th>\n",
+       "      <th colspan=\"9\" halign=\"left\">TT_TU</th>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>STATIONS_ID</th>\n",
+       "      <th>3</th>\n",
+       "      <th>44</th>\n",
+       "      <th>71</th>\n",
+       "      <th>73</th>\n",
+       "      <th>78</th>\n",
+       "      <th>91</th>\n",
+       "      <th>96</th>\n",
+       "      <th>102</th>\n",
+       "      <th>125</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>count</th>\n",
+       "      <td>37224.000000</td>\n",
+       "      <td>111003.000000</td>\n",
+       "      <td>88391.000000</td>\n",
+       "      <td>111471.000000</td>\n",
+       "      <td>113950.000000</td>\n",
+       "      <td>113950.000000</td>\n",
+       "      <td>6399.000000</td>\n",
+       "      <td>106379.000000</td>\n",
+       "      <td>82589.000000</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>mean</th>\n",
+       "      <td>10.103922</td>\n",
+       "      <td>9.933213</td>\n",
+       "      <td>8.399764</td>\n",
+       "      <td>7.501486</td>\n",
+       "      <td>9.872268</td>\n",
+       "      <td>9.199869</td>\n",
+       "      <td>12.730255</td>\n",
+       "      <td>10.149991</td>\n",
+       "      <td>1.045942</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>std</th>\n",
+       "      <td>7.200001</td>\n",
+       "      <td>14.445973</td>\n",
+       "      <td>8.779766</td>\n",
+       "      <td>47.537112</td>\n",
+       "      <td>7.281215</td>\n",
+       "      <td>8.400713</td>\n",
+       "      <td>23.189555</td>\n",
+       "      <td>10.728030</td>\n",
+       "      <td>86.520406</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>min</th>\n",
+       "      <td>-13.600000</td>\n",
+       "      <td>-999.000000</td>\n",
+       "      <td>-999.000000</td>\n",
+       "      <td>-999.000000</td>\n",
+       "      <td>-16.200000</td>\n",
+       "      <td>-999.000000</td>\n",
+       "      <td>-999.000000</td>\n",
+       "      <td>-999.000000</td>\n",
+       "      <td>-999.000000</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>25%</th>\n",
+       "      <td>5.000000</td>\n",
+       "      <td>4.900000</td>\n",
+       "      <td>2.200000</td>\n",
+       "      <td>2.800000</td>\n",
+       "      <td>4.700000</td>\n",
+       "      <td>3.400000</td>\n",
+       "      <td>7.250000</td>\n",
+       "      <td>5.700000</td>\n",
+       "      <td>1.800000</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>50%</th>\n",
+       "      <td>9.900000</td>\n",
+       "      <td>10.000000</td>\n",
+       "      <td>8.300000</td>\n",
+       "      <td>9.300000</td>\n",
+       "      <td>9.700000</td>\n",
+       "      <td>8.900000</td>\n",
+       "      <td>13.200000</td>\n",
+       "      <td>10.200000</td>\n",
+       "      <td>8.200000</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>75%</th>\n",
+       "      <td>15.300000</td>\n",
+       "      <td>15.200000</td>\n",
+       "      <td>14.200000</td>\n",
+       "      <td>15.800000</td>\n",
+       "      <td>15.000000</td>\n",
+       "      <td>14.700000</td>\n",
+       "      <td>18.500000</td>\n",
+       "      <td>15.200000</td>\n",
+       "      <td>14.500000</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>max</th>\n",
+       "      <td>36.200000</td>\n",
+       "      <td>37.000000</td>\n",
+       "      <td>33.700000</td>\n",
+       "      <td>36.700000</td>\n",
+       "      <td>39.000000</td>\n",
+       "      <td>36.900000</td>\n",
+       "      <td>37.900000</td>\n",
+       "      <td>33.400000</td>\n",
+       "      <td>33.700000</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "                    TT_TU                                              \\\n",
+       "STATIONS_ID           3              44            71             73    \n",
+       "count        37224.000000  111003.000000  88391.000000  111471.000000   \n",
+       "mean            10.103922       9.933213      8.399764       7.501486   \n",
+       "std              7.200001      14.445973      8.779766      47.537112   \n",
+       "min            -13.600000    -999.000000   -999.000000    -999.000000   \n",
+       "25%              5.000000       4.900000      2.200000       2.800000   \n",
+       "50%              9.900000      10.000000      8.300000       9.300000   \n",
+       "75%             15.300000      15.200000     14.200000      15.800000   \n",
+       "max             36.200000      37.000000     33.700000      36.700000   \n",
+       "\n",
+       "                                                                       \\\n",
+       "STATIONS_ID            78             91           96             102   \n",
+       "count        113950.000000  113950.000000  6399.000000  106379.000000   \n",
+       "mean              9.872268       9.199869    12.730255      10.149991   \n",
+       "std               7.281215       8.400713    23.189555      10.728030   \n",
+       "min             -16.200000    -999.000000  -999.000000    -999.000000   \n",
+       "25%               4.700000       3.400000     7.250000       5.700000   \n",
+       "50%               9.700000       8.900000    13.200000      10.200000   \n",
+       "75%              15.000000      14.700000    18.500000      15.200000   \n",
+       "max              39.000000      36.900000    37.900000      33.400000   \n",
+       "\n",
+       "                           \n",
+       "STATIONS_ID           125  \n",
+       "count        82589.000000  \n",
+       "mean             1.045942  \n",
+       "std             86.520406  \n",
+       "min           -999.000000  \n",
+       "25%              1.800000  \n",
+       "50%              8.200000  \n",
+       "75%             14.500000  \n",
+       "max             33.700000  "
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    }
+   ],
+   "source": [
+    "import numpy as np\n",
+    "import pandas as pd\n",
+    "from IPython.display import clear_output\n",
+    "\n",
+    "from pathlib import Path\n",
+    "import glob\n",
+    "\n",
+    "\n",
+    "import_files = glob.glob('import/*')\n",
+    "out_file = Path.cwd() / \"export_uncleaned\" / \"to_clean\"\n",
+    "#msum_file=  Path.cwd() / \"export\" / \"monatssumme.csv\"\n",
+    "\n",
+    "obsolete_columns = [\n",
+    "    'QN_9',\n",
+    "    'RF_TU',\n",
+    "    'eor'\n",
+    "]\n",
+    "\n",
+    "main_df = pd.DataFrame()\n",
+    "i = 1\n",
+    "\n",
+    "for file in import_files:\n",
+    "\n",
+    "    # Read in the next file\n",
+    "    df = pd.read_csv(file, delimiter=\";\")\n",
+    "    # Prepare the df befor merging (Drop obsolete, convert to datetime, filter to date, set index)\n",
+    "    df.drop(columns=obsolete_columns, inplace=True)\n",
+    "    df[\"MESS_DATUM\"] = pd.to_datetime(df[\"MESS_DATUM\"], format=\"%Y%m%d%H\")\n",
+    "    df = df[df['MESS_DATUM']>= \"2007-01-01\"]\n",
+    "    df.set_index(['MESS_DATUM', 'STATIONS_ID'], inplace=True)\n",
+    "    \n",
+    "    # Merge to the main_df\n",
+    "    main_df = pd.concat([main_df, df])\n",
+    "    \n",
+    "    # Display some status messages\n",
+    "    clear_output(wait=True)\n",
+    "    display('Finished file: {}'.format(file), 'This is file {}'.format(i))\n",
+    "    display('Shape of the main_df is: {}'.format(main_df.shape))\n",
+    "    i+=1\n",
+    "\n",
+    "# Check if all types are correct\n",
+    "display(main_df['TT_TU'].apply(lambda x: type(x).__name__).value_counts())\n",
+    "    \n",
+    "# Make sure that to files or observations a duplicates, eg. scan the index for duplicate entries.\n",
+    "# The ~ is a bitwise operation, meaning it flips all bits. \n",
+    "main_df = main_df[~main_df.index.duplicated(keep='last')]\n",
+    "\n",
+    "\n",
+    "# Unstack the main_df\n",
+    "main_df = main_df.unstack('STATIONS_ID')\n",
+    "display('Shape of the main_df is: {}'.format(main_df.shape))\n",
+    "\n",
+    "# Save main_df to a .csv file and a pickle to continue working in the next cell. \n",
+    "main_df.to_pickle(Path(out_file).with_suffix('.pkl'))\n",
+    "main_df.to_csv(Path(out_file).with_suffix('.csv'), sep=\";\")\n",
+    "\n",
+    "display(main_df.head())\n",
+    "display(main_df.describe())\n",
+    "\n",
+    "\n",
+    "\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.8.5"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 4
+}
diff --git a/notebooks/case_study_weather/4-dwd_konverter_final_processing.ipynb b/notebooks/case_study_weather/4-dwd_konverter_final_processing.ipynb
new file mode 100644
index 0000000..13bb693
--- /dev/null
+++ b/notebooks/case_study_weather/4-dwd_konverter_final_processing.ipynb
@@ -0,0 +1,601 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Import temperature data from the DWD and process it\n",
+    "\n",
+    "This notebook pulls historical temperature data from the DWD server and formats it for future use in other projects. The data is delivered in a hourly frequencs in a .zip file for each of the available weather stations. To use the data, we need everythin in a single .csv-file, all stations side-by-side. Also, we need the daily average.\n",
+    "\n",
+    "To reduce computing time, we also crop all data earlier than 2007. \n",
+    "\n",
+    "Files should be executed in the following pipeline:\n",
+    "* 1-dwd_konverter_download\n",
+    "* 2-dwd_konverter_extract\n",
+    "* 3-dwd_konverter_build_df\n",
+    "* 4-dwd_konverter_final_processing"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## 4.) Final data processing\n",
+    "We load in the data that has been saved in the last step, so we don't need to calculate everything again it we pause the project and come back later. \n",
+    "### Data Cleaning\n",
+    "The data contains some errors, which need to be cleaned. You can see, by looking at the output of main_df.describe() in the last cell, that the minimum teperature on some stations is -999. That means that there is no plausible measurement for this particular hour. We change this to np.nan, so that we can safely calculate the avarage values. \n",
+    "### Change the frequency\n",
+    "Finally we resample the data to daily means."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead tr th {\n",
+       "        text-align: left;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead tr:last-of-type th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr>\n",
+       "      <th></th>\n",
+       "      <th colspan=\"9\" halign=\"left\">TT_TU</th>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>STATIONS_ID</th>\n",
+       "      <th>3</th>\n",
+       "      <th>44</th>\n",
+       "      <th>71</th>\n",
+       "      <th>73</th>\n",
+       "      <th>78</th>\n",
+       "      <th>91</th>\n",
+       "      <th>96</th>\n",
+       "      <th>102</th>\n",
+       "      <th>125</th>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>MESS_DATUM</th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>2011-12-31</th>\n",
+       "      <td>NaN</td>\n",
+       "      <td>3.88</td>\n",
+       "      <td>2.76</td>\n",
+       "      <td>1.19</td>\n",
+       "      <td>4.30</td>\n",
+       "      <td>2.43</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>3.80</td>\n",
+       "      <td>NaN</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2012-01-01</th>\n",
+       "      <td>NaN</td>\n",
+       "      <td>10.90</td>\n",
+       "      <td>8.14</td>\n",
+       "      <td>4.03</td>\n",
+       "      <td>10.96</td>\n",
+       "      <td>10.27</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>9.01</td>\n",
+       "      <td>NaN</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2012-01-02</th>\n",
+       "      <td>NaN</td>\n",
+       "      <td>7.41</td>\n",
+       "      <td>6.18</td>\n",
+       "      <td>4.77</td>\n",
+       "      <td>7.57</td>\n",
+       "      <td>7.77</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>6.48</td>\n",
+       "      <td>4.66</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2012-01-03</th>\n",
+       "      <td>NaN</td>\n",
+       "      <td>6.14</td>\n",
+       "      <td>3.61</td>\n",
+       "      <td>4.46</td>\n",
+       "      <td>6.38</td>\n",
+       "      <td>5.28</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>5.63</td>\n",
+       "      <td>3.51</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2012-01-04</th>\n",
+       "      <td>NaN</td>\n",
+       "      <td>5.80</td>\n",
+       "      <td>2.48</td>\n",
+       "      <td>4.45</td>\n",
+       "      <td>5.46</td>\n",
+       "      <td>4.57</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>5.85</td>\n",
+       "      <td>1.94</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "            TT_TU                                                 \n",
+       "STATIONS_ID   3      44    71    73     78     91  96    102   125\n",
+       "MESS_DATUM                                                        \n",
+       "2011-12-31    NaN   3.88  2.76  1.19   4.30   2.43 NaN  3.80   NaN\n",
+       "2012-01-01    NaN  10.90  8.14  4.03  10.96  10.27 NaN  9.01   NaN\n",
+       "2012-01-02    NaN   7.41  6.18  4.77   7.57   7.77 NaN  6.48  4.66\n",
+       "2012-01-03    NaN   6.14  3.61  4.46   6.38   5.28 NaN  5.63  3.51\n",
+       "2012-01-04    NaN   5.80  2.48  4.45   5.46   4.57 NaN  5.85  1.94"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead tr th {\n",
+       "        text-align: left;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr>\n",
+       "      <th></th>\n",
+       "      <th colspan=\"9\" halign=\"left\">TT_TU</th>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>STATIONS_ID</th>\n",
+       "      <th>3</th>\n",
+       "      <th>44</th>\n",
+       "      <th>71</th>\n",
+       "      <th>73</th>\n",
+       "      <th>78</th>\n",
+       "      <th>91</th>\n",
+       "      <th>96</th>\n",
+       "      <th>102</th>\n",
+       "      <th>125</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>count</th>\n",
+       "      <td>1551.000000</td>\n",
+       "      <td>4629.000000</td>\n",
+       "      <td>3683.000000</td>\n",
+       "      <td>4652.000000</td>\n",
+       "      <td>4748.000000</td>\n",
+       "      <td>4748.000000</td>\n",
+       "      <td>267.000000</td>\n",
+       "      <td>4490.000000</td>\n",
+       "      <td>3935.000000</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>mean</th>\n",
+       "      <td>10.103939</td>\n",
+       "      <td>10.088153</td>\n",
+       "      <td>8.411244</td>\n",
+       "      <td>9.686855</td>\n",
+       "      <td>9.872342</td>\n",
+       "      <td>9.208837</td>\n",
+       "      <td>13.193633</td>\n",
+       "      <td>10.220345</td>\n",
+       "      <td>8.466612</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>std</th>\n",
+       "      <td>6.742460</td>\n",
+       "      <td>6.653983</td>\n",
+       "      <td>7.511708</td>\n",
+       "      <td>7.849776</td>\n",
+       "      <td>6.658399</td>\n",
+       "      <td>7.124324</td>\n",
+       "      <td>6.762327</td>\n",
+       "      <td>6.076649</td>\n",
+       "      <td>7.711229</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>min</th>\n",
+       "      <td>-10.870000</td>\n",
+       "      <td>-10.710000</td>\n",
+       "      <td>-14.940000</td>\n",
+       "      <td>-14.320000</td>\n",
+       "      <td>-12.390000</td>\n",
+       "      <td>-15.710000</td>\n",
+       "      <td>-0.970000</td>\n",
+       "      <td>-8.170000</td>\n",
+       "      <td>-16.420000</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>25%</th>\n",
+       "      <td>5.410000</td>\n",
+       "      <td>5.250000</td>\n",
+       "      <td>2.620000</td>\n",
+       "      <td>3.397500</td>\n",
+       "      <td>5.090000</td>\n",
+       "      <td>3.870000</td>\n",
+       "      <td>7.575000</td>\n",
+       "      <td>5.790000</td>\n",
+       "      <td>2.365000</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>50%</th>\n",
+       "      <td>10.140000</td>\n",
+       "      <td>10.320000</td>\n",
+       "      <td>8.570000</td>\n",
+       "      <td>9.900000</td>\n",
+       "      <td>9.900000</td>\n",
+       "      <td>9.230000</td>\n",
+       "      <td>13.770000</td>\n",
+       "      <td>10.200000</td>\n",
+       "      <td>8.540000</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>75%</th>\n",
+       "      <td>15.350000</td>\n",
+       "      <td>15.380000</td>\n",
+       "      <td>14.070000</td>\n",
+       "      <td>16.080000</td>\n",
+       "      <td>15.122500</td>\n",
+       "      <td>14.820000</td>\n",
+       "      <td>18.195000</td>\n",
+       "      <td>15.260000</td>\n",
+       "      <td>14.545000</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>max</th>\n",
+       "      <td>28.410000</td>\n",
+       "      <td>28.450000</td>\n",
+       "      <td>27.190000</td>\n",
+       "      <td>26.940000</td>\n",
+       "      <td>29.890000</td>\n",
+       "      <td>27.550000</td>\n",
+       "      <td>26.980000</td>\n",
+       "      <td>27.330000</td>\n",
+       "      <td>28.030000</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "                   TT_TU                                                      \\\n",
+       "STATIONS_ID          3            44           71           73           78    \n",
+       "count        1551.000000  4629.000000  3683.000000  4652.000000  4748.000000   \n",
+       "mean           10.103939    10.088153     8.411244     9.686855     9.872342   \n",
+       "std             6.742460     6.653983     7.511708     7.849776     6.658399   \n",
+       "min           -10.870000   -10.710000   -14.940000   -14.320000   -12.390000   \n",
+       "25%             5.410000     5.250000     2.620000     3.397500     5.090000   \n",
+       "50%            10.140000    10.320000     8.570000     9.900000     9.900000   \n",
+       "75%            15.350000    15.380000    14.070000    16.080000    15.122500   \n",
+       "max            28.410000    28.450000    27.190000    26.940000    29.890000   \n",
+       "\n",
+       "                                                                \n",
+       "STATIONS_ID          91          96           102          125  \n",
+       "count        4748.000000  267.000000  4490.000000  3935.000000  \n",
+       "mean            9.208837   13.193633    10.220345     8.466612  \n",
+       "std             7.124324    6.762327     6.076649     7.711229  \n",
+       "min           -15.710000   -0.970000    -8.170000   -16.420000  \n",
+       "25%             3.870000    7.575000     5.790000     2.365000  \n",
+       "50%             9.230000   13.770000    10.200000     8.540000  \n",
+       "75%            14.820000   18.195000    15.260000    14.545000  \n",
+       "max            27.550000   26.980000    27.330000    28.030000  "
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead tr th {\n",
+       "        text-align: left;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead tr:last-of-type th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr>\n",
+       "      <th></th>\n",
+       "      <th colspan=\"9\" halign=\"left\">TT_TU</th>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>STATIONS_ID</th>\n",
+       "      <th>3</th>\n",
+       "      <th>44</th>\n",
+       "      <th>71</th>\n",
+       "      <th>73</th>\n",
+       "      <th>78</th>\n",
+       "      <th>91</th>\n",
+       "      <th>96</th>\n",
+       "      <th>102</th>\n",
+       "      <th>125</th>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>MESS_DATUM</th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>2007-01-01</th>\n",
+       "      <td>7.38</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>7.42</td>\n",
+       "      <td>6.55</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>8.32</td>\n",
+       "      <td>NaN</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2007-01-02</th>\n",
+       "      <td>4.67</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>4.49</td>\n",
+       "      <td>2.88</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>6.73</td>\n",
+       "      <td>0.51</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2007-01-03</th>\n",
+       "      <td>6.19</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>4.87</td>\n",
+       "      <td>4.25</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>7.12</td>\n",
+       "      <td>0.91</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2007-01-04</th>\n",
+       "      <td>7.69</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>7.82</td>\n",
+       "      <td>5.85</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>8.34</td>\n",
+       "      <td>4.43</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2007-01-05</th>\n",
+       "      <td>7.78</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>7.47</td>\n",
+       "      <td>6.03</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>8.20</td>\n",
+       "      <td>3.92</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>...</th>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2019-12-27</th>\n",
+       "      <td>NaN</td>\n",
+       "      <td>2.03</td>\n",
+       "      <td>3.95</td>\n",
+       "      <td>2.27</td>\n",
+       "      <td>2.36</td>\n",
+       "      <td>1.41</td>\n",
+       "      <td>2.21</td>\n",
+       "      <td>3.79</td>\n",
+       "      <td>2.78</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2019-12-28</th>\n",
+       "      <td>NaN</td>\n",
+       "      <td>0.38</td>\n",
+       "      <td>-0.59</td>\n",
+       "      <td>-0.27</td>\n",
+       "      <td>-0.07</td>\n",
+       "      <td>-2.10</td>\n",
+       "      <td>-0.05</td>\n",
+       "      <td>2.32</td>\n",
+       "      <td>-1.29</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2019-12-29</th>\n",
+       "      <td>NaN</td>\n",
+       "      <td>0.68</td>\n",
+       "      <td>-2.04</td>\n",
+       "      <td>-3.63</td>\n",
+       "      <td>0.07</td>\n",
+       "      <td>-2.41</td>\n",
+       "      <td>-0.97</td>\n",
+       "      <td>2.81</td>\n",
+       "      <td>-4.40</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2019-12-30</th>\n",
+       "      <td>NaN</td>\n",
+       "      <td>5.92</td>\n",
+       "      <td>1.88</td>\n",
+       "      <td>-2.46</td>\n",
+       "      <td>5.57</td>\n",
+       "      <td>-1.26</td>\n",
+       "      <td>3.78</td>\n",
+       "      <td>5.97</td>\n",
+       "      <td>-1.32</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2019-12-31</th>\n",
+       "      <td>NaN</td>\n",
+       "      <td>5.54</td>\n",
+       "      <td>1.92</td>\n",
+       "      <td>-0.41</td>\n",
+       "      <td>4.05</td>\n",
+       "      <td>-0.46</td>\n",
+       "      <td>5.56</td>\n",
+       "      <td>7.66</td>\n",
+       "      <td>1.91</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "<p>4748 rows × 9 columns</p>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "            TT_TU                                                \n",
+       "STATIONS_ID   3     44    71    73    78    91    96    102   125\n",
+       "MESS_DATUM                                                       \n",
+       "2007-01-01   7.38   NaN   NaN   NaN  7.42  6.55   NaN  8.32   NaN\n",
+       "2007-01-02   4.67   NaN   NaN   NaN  4.49  2.88   NaN  6.73  0.51\n",
+       "2007-01-03   6.19   NaN   NaN   NaN  4.87  4.25   NaN  7.12  0.91\n",
+       "2007-01-04   7.69   NaN   NaN   NaN  7.82  5.85   NaN  8.34  4.43\n",
+       "2007-01-05   7.78   NaN   NaN   NaN  7.47  6.03   NaN  8.20  3.92\n",
+       "...           ...   ...   ...   ...   ...   ...   ...   ...   ...\n",
+       "2019-12-27    NaN  2.03  3.95  2.27  2.36  1.41  2.21  3.79  2.78\n",
+       "2019-12-28    NaN  0.38 -0.59 -0.27 -0.07 -2.10 -0.05  2.32 -1.29\n",
+       "2019-12-29    NaN  0.68 -2.04 -3.63  0.07 -2.41 -0.97  2.81 -4.40\n",
+       "2019-12-30    NaN  5.92  1.88 -2.46  5.57 -1.26  3.78  5.97 -1.32\n",
+       "2019-12-31    NaN  5.54  1.92 -0.41  4.05 -0.46  5.56  7.66  1.91\n",
+       "\n",
+       "[4748 rows x 9 columns]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    }
+   ],
+   "source": [
+    "import numpy as np\n",
+    "import pandas as pd\n",
+    "from pathlib import Path\n",
+    "\n",
+    "# Import and export paths\n",
+    "pkl_file = Path.cwd() / \"export_uncleaned\" / \"to_clean.pkl\"\n",
+    "cleaned_file = Path.cwd() / \"export_cleaned\" / \"cleaned.csv\"\n",
+    "\n",
+    "# Read in the pickle file from the last cell\n",
+    "cleaning_df = pd.read_pickle(pkl_file)\n",
+    "\n",
+    "\n",
+    "# Replace all values with \"-999\", which indicate missing data\n",
+    "cleaning_df.replace(to_replace=-999, value=np.nan, inplace=True)\n",
+    "\n",
+    "# Resample to daily frequency\n",
+    "cleaning_df = cleaning_df.resample('D').mean().round(decimals=2)\n",
+    "\n",
+    "# Save as .csv\n",
+    "cleaning_df.to_csv(cleaned_file, sep=\";\", decimal=\",\")\n",
+    "\n",
+    "display(cleaning_df.loc['2011-12-31':'2012-01-04'])\n",
+    "display(cleaning_df.describe())\n",
+    "display(cleaning_df)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.8.5"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 4
+}
diff --git a/notebooks/case_study_weather/download/zipfile.txt b/notebooks/case_study_weather/download/zipfile.txt
new file mode 100644
index 0000000..767ede5
--- /dev/null
+++ b/notebooks/case_study_weather/download/zipfile.txt
@@ -0,0 +1 @@
+Zip files will be stored here.
diff --git a/notebooks/case_study_weather/export_cleaned/clean_data.txt b/notebooks/case_study_weather/export_cleaned/clean_data.txt
new file mode 100644
index 0000000..2326a9c
--- /dev/null
+++ b/notebooks/case_study_weather/export_cleaned/clean_data.txt
@@ -0,0 +1 @@
+File csv file for analysis
diff --git a/notebooks/case_study_weather/export_uncleaned/csv_pickle_file.txt b/notebooks/case_study_weather/export_uncleaned/csv_pickle_file.txt
new file mode 100644
index 0000000..168c7fa
--- /dev/null
+++ b/notebooks/case_study_weather/export_uncleaned/csv_pickle_file.txt
@@ -0,0 +1 @@
+csv and pickle files stored here
diff --git a/notebooks/case_study_weather/import/text_files.txt b/notebooks/case_study_weather/import/text_files.txt
new file mode 100644
index 0000000..1db36b9
--- /dev/null
+++ b/notebooks/case_study_weather/import/text_files.txt
@@ -0,0 +1 @@
+Raw text files with temp measurements.