From 9dc9da50f39ccf63afc9e4f8316cb0263bc49922 Mon Sep 17 00:00:00 2001 From: kthoppae Date: Fri, 18 Jul 2025 10:20:20 -0700 Subject: [PATCH] Created using Colab with the retail data using RandomForester --- Model3_Retail_Random_launch.ipynb | 595 ++++++++++++++++++++++++++++++ 1 file changed, 595 insertions(+) create mode 100644 Model3_Retail_Random_launch.ipynb diff --git a/Model3_Retail_Random_launch.ipynb b/Model3_Retail_Random_launch.ipynb new file mode 100644 index 0000000..ec44497 --- /dev/null +++ b/Model3_Retail_Random_launch.ipynb @@ -0,0 +1,595 @@ +{ + "nbformat": 4, + "nbformat_minor": 0, + "metadata": { + "colab": { + "provenance": [], + "authorship_tag": "ABX9TyNo4v0uyZnOPNmmPkbdxGNr", + "include_colab_link": true + }, + "kernelspec": { + "name": "python3", + "display_name": "Python 3" + }, + "language_info": { + "name": "python" + } + }, + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "id": "view-in-github", + "colab_type": "text" + }, + "source": [ + "\"Open" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": { + "id": "EblvaNOmtjDe" + }, + "outputs": [], + "source": [ + "import pandas as pd\n", + "\n", + "# Example: Load your preprocessed data\n", + "train_df = pd.read_csv('train.csv', low_memory=False)\n", + "\n", + "X_train = train_df.drop(['Sales', 'Date'], axis=1)\n", + "\n", + "y_train = train_df['Sales']" + ] + }, + { + "cell_type": "markdown", + "source": [], + "metadata": { + "id": "Ai4e72X2t07U" + } + }, + { + "cell_type": "code", + "source": [ + "from sklearn.ensemble import RandomForestRegressor\n", + "\n", + "# Set your model parameters (customize as needed)\n", + "model = RandomForestRegressor(n_estimators=100, random_state=42)\n", + "model.fit(X_train, y_train) # Fit model to training data" + ], + "metadata": { + "id": "aWFHXXYtzbjp", + "colab": { + "base_uri": "https://localhost:8080/", + "height": 80 + }, + "collapsed": true, + "outputId": "d166ca37-0b9c-42ed-9441-4da1a2baafc1" + }, + "execution_count": 3, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "RandomForestRegressor(random_state=42)" + ], + "text/html": [ + "
RandomForestRegressor(random_state=42)
In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
" + ] + }, + "metadata": {}, + "execution_count": 3 + } + ] + }, + { + "cell_type": "code", + "source": [ + "import joblib\n", + "\n", + "# Save your model\n", + "joblib.dump(model, 'sales_forecast_model.joblib')" + ], + "metadata": { + "id": "_HsrG5nbvGHj", + "colab": { + "base_uri": "https://localhost:8080/" + }, + "outputId": "c50d9be4-026d-4b78-ae34-b4e2c0bfe175" + }, + "execution_count": 4, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "['sales_forecast_model.joblib']" + ] + }, + "metadata": {}, + "execution_count": 4 + } + ] + }, + { + "cell_type": "code", + "source": [ + "import joblib\n", + "model = joblib.load('sales_forecast_model.joblib')\n", + "\n" + ], + "metadata": { + "id": "A6GvWcoAvKtv" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "test_df = pd.read_csv('test.csv', low_memory=False)\n", + "X_test = test_df.drop(['Date', 'Sales'], axis=1) # Assuming 'Date' and 'Sales' columns exist in test data and you want to drop them\n", + "\n", + "# Replace 'a' in 'StateHoliday' with 0\n", + "X_test['StateHoliday'] = X_test['StateHoliday'].replace('a', 1)\n", + "\n", + "preds = model.predict(X_test)" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 211 + }, + "id": "cVbDwZFM8yxL", + "outputId": "3b83af5f-346d-4ea8-eb2b-fa0b009e7c41" + }, + "execution_count": 1, + "outputs": [ + { + "output_type": "error", + "ename": "NameError", + "evalue": "name 'pd' is not defined", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mNameError\u001b[0m Traceback (most recent call last)", + "\u001b[0;32m/tmp/ipython-input-1-3782873592.py\u001b[0m in \u001b[0;36m\u001b[0;34m()\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0mtest_df\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mpd\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mread_csv\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m'test.csv'\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mlow_memory\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;32mFalse\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 2\u001b[0m \u001b[0mX_test\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mtest_df\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mdrop\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m'Date'\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m'Sales'\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0maxis\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;36m1\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;31m# Assuming 'Date' and 'Sales' columns exist in test data and you want to drop them\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 3\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 4\u001b[0m \u001b[0;31m# Replace 'a' in 'StateHoliday' with 0\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 5\u001b[0m \u001b[0mX_test\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m'StateHoliday'\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mX_test\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m'StateHoliday'\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mreplace\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m'a'\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;36m1\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;31mNameError\u001b[0m: name 'pd' is not defined" + ] + } + ] + }, + { + "cell_type": "code", + "source": [ + "import matplotlib.pyplot as plt\n", + "\n", + "model.plot(forecast)\n", + "plt.show()" + ], + "metadata": { + "id": "GdKacxyqPuHV" + }, + "execution_count": null, + "outputs": [] + } + ] +} \ No newline at end of file