From dcf3b263f8761c7a83be81d6b67183127797dd0a Mon Sep 17 00:00:00 2001
From: Salem Boyland <salemb@google.com>
Date: Mon, 29 Jan 2024 14:07:21 -0600
Subject: [PATCH 01/10] create_single_timeseries_forecasting_model_test.py code
 sample

---
 ...ingle_timeseries_forecasting_model_test.py | 66 +++++++++++++++++++
 1 file changed, 66 insertions(+)
 create mode 100644 samples/snippets/create_single_timeseries_forecasting_model_test.py

diff --git a/samples/snippets/create_single_timeseries_forecasting_model_test.py b/samples/snippets/create_single_timeseries_forecasting_model_test.py
new file mode 100644
index 0000000000..a6df5f0297
--- /dev/null
+++ b/samples/snippets/create_single_timeseries_forecasting_model_test.py
@@ -0,0 +1,66 @@
+# Copyright 2023 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (t
+# you may not use this file except in compliance wi
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in
+# distributed under the License is distributed on a
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, eit
+# See the License for the specific language governi
+# limitations under the License.
+
+
+def test_create_single_timeseries(random_model_id):
+    your_model_id = random_model_id
+
+    # [START bigquery_dataframes_single_timeseries_forecasting_model_tutorial]
+    import bigframes.pandas as bpd
+    
+    # Start by selecting the data you'll use for training. `read_gbq` accepts
+    # either a SQL query or a table ID. Since this example selects from multiple
+    # tables via a wildcard, use SQL to define this data. Watch issue
+    # https://github.com/googleapis/python-bigquery-dataframes/issues/169
+    # for updates to `read_gbq` to support wildcard tables.
+    
+    # Read and visualize the time series you want to forecast.
+    df = bpd.read_gbq('''
+        SELECT PARSE_TIMESTAMP("%Y%m%d", date) AS parsed_date,
+        SUM(totals.visits) AS total_visits
+        FROM
+        `bigquery-public-data.google_analytics_sample.ga_sessions_*`
+        GROUP BY date
+        ''')
+    X = df[["parsed_date"]]
+    y = df[["total_visits"]]
+
+    # Create an Arima-based time series model using the Google Analytics 360 data.
+    from bigframes.ml.forecasting import ARIMAPlus
+
+    ga_arima_model = ARIMAPlus()
+
+    # Fit the model to your dataframe.
+    ga_arima_model.fit(X,y)
+
+    # The model.fit() call above created a temporary model.
+    # Use the to_gbq() method to write to a permanent location.
+    ga_arima_model.to_gbq(
+    your_model_id,  # For example: "bqml_tutorial.sample_model",
+    replace=True,
+    )
+
+    # Inspect the evaluation metrics of all evaluated models.
+    # when ruuning this function use same model, dataset, model name (str)
+    evaluation = ga_arima_model.summary(
+        f'''
+        SELECT *   
+        FROM ML.ARIMA_EVALUATE(MODEL `{your_model_id}`)
+        '''
+        )
+    
+    print(evaluation)
+    # Inspect the coefficients of your model
+    
+    
\ No newline at end of file

From adc22ef8a872227ee49ee0907b35eb8e5775bab9 Mon Sep 17 00:00:00 2001
From: Salem Jorden <115185670+SalemJorden@users.noreply.github.com>
Date: Tue, 20 Feb 2024 09:46:43 -0600
Subject: [PATCH 02/10] fix: forecast method to forecast time series

---
 ...ingle_timeseries_forecasting_model_test.py | 29 ++++++++++++++-----
 1 file changed, 22 insertions(+), 7 deletions(-)

diff --git a/samples/snippets/create_single_timeseries_forecasting_model_test.py b/samples/snippets/create_single_timeseries_forecasting_model_test.py
index a6df5f0297..6429f613a4 100644
--- a/samples/snippets/create_single_timeseries_forecasting_model_test.py
+++ b/samples/snippets/create_single_timeseries_forecasting_model_test.py
@@ -52,15 +52,30 @@ def test_create_single_timeseries(random_model_id):
     )
 
     # Inspect the evaluation metrics of all evaluated models.
-    # when ruuning this function use same model, dataset, model name (str)
+    # when running this function use same model, dataset, model name (str)
     evaluation = ga_arima_model.summary(
-        f'''
-        SELECT *   
-        FROM ML.ARIMA_EVALUATE(MODEL `{your_model_id}`)
-        '''
+        show_all_candidate_models = False,
         )
     
     print(evaluation)
+
     # Inspect the coefficients of your model
-    
-    
\ No newline at end of file
+    f'''
+    SELECT *
+    FROM ML.ARIMA_COEFFICIENTS(MODEL `{your_model_id}`)
+    '''
+    evaluation.ML.ARIMA_COEFFICIENTS()
+
+    # Use your model to forecast the time series
+    #standardSQL
+    your_model_id.forecast()
+
+    # Explain and visualize the forecasting results
+    f'''
+    SELECT *
+    FROM ML.EXPLAIN_FORECAST(
+    MODEL `{your_model_id}`,
+    STRUCT(
+    [horizon AS horizon]
+    [, confidence_level AS confidence_level]))
+    '''
\ No newline at end of file

From d3ea7c79affca6d1edcf38da84c9525ed61df765 Mon Sep 17 00:00:00 2001
From: Salem Jorden <115185670+SalemJorden@users.noreply.github.com>
Date: Wed, 13 Mar 2024 12:10:10 -0500
Subject: [PATCH 03/10] pair programming PR draft creation

---
 ...ingle_timeseries_forecasting_model_test.py | 20 +++++++++----------
 1 file changed, 10 insertions(+), 10 deletions(-)

diff --git a/samples/snippets/create_single_timeseries_forecasting_model_test.py b/samples/snippets/create_single_timeseries_forecasting_model_test.py
index 6429f613a4..a91f6d07b7 100644
--- a/samples/snippets/create_single_timeseries_forecasting_model_test.py
+++ b/samples/snippets/create_single_timeseries_forecasting_model_test.py
@@ -26,21 +26,21 @@ def test_create_single_timeseries(random_model_id):
     # for updates to `read_gbq` to support wildcard tables.
     
     # Read and visualize the time series you want to forecast.
-    df = bpd.read_gbq('''
-        SELECT PARSE_TIMESTAMP("%Y%m%d", date) AS parsed_date,
-        SUM(totals.visits) AS total_visits
-        FROM
-        `bigquery-public-data.google_analytics_sample.ga_sessions_*`
-        GROUP BY date
-        ''')
-    X = df[["parsed_date"]]
-    y = df[["total_visits"]]
+    df = bpd.read_gbq(
+        'bigquery-public-data.google_analytics_sample.ga_sessions_*'
+        )
+    parsed_date = bpd.to_datetime(df.date, format= "%Y%m%d", utc = True)
+    total_visits = df.groupby(["date"])["parsed_date"].sum()
+    visits = df["totals"].struct.field("visits")
 
-    # Create an Arima-based time series model using the Google Analytics 360 data.
+    # Create an Arima-based time series model using the Google Analytics 360 data. 
     from bigframes.ml.forecasting import ARIMAPlus
 
     ga_arima_model = ARIMAPlus()
 
+    X = df[["parsed_date"]]
+    y = df[["total_visits"]]
+
     # Fit the model to your dataframe.
     ga_arima_model.fit(X,y)
 

From 17cbd68f87a85021fecfd44da2e92e91d4ecca9e Mon Sep 17 00:00:00 2001
From: Salem Jorden <115185670+SalemJorden@users.noreply.github.com>
Date: Fri, 5 Apr 2024 15:15:30 -0500
Subject: [PATCH 04/10] tutorial step 7 & 8

---
 ..._single_timeseries_forecasting_model_test.py | 17 ++++++++++++++++-
 1 file changed, 16 insertions(+), 1 deletion(-)

diff --git a/samples/snippets/create_single_timeseries_forecasting_model_test.py b/samples/snippets/create_single_timeseries_forecasting_model_test.py
index a91f6d07b7..b7ac8d8dc7 100644
--- a/samples/snippets/create_single_timeseries_forecasting_model_test.py
+++ b/samples/snippets/create_single_timeseries_forecasting_model_test.py
@@ -78,4 +78,19 @@ def test_create_single_timeseries(random_model_id):
     STRUCT(
     [horizon AS horizon]
     [, confidence_level AS confidence_level]))
-    '''
\ No newline at end of file
+    '''
+    total_visits.plot.line(x = 'history_timestamp', y = 'history_value')
+    
+    # Visualize the forecasting results without having decompose_time_series enabled.
+    df = bpd.read_gbq(
+    'bigquery-public-data.google_analytics_sample.ga_sessions_*'
+    )
+    parsed_date = bpd.to_datetime(df.date, format= "%Y%m%d", utc = True)
+    visits = df["totals"].struct.field("visits")
+    df = bpd.DataFrame(
+    {
+        'history_timestamp': parsed_date,
+        'history_value': visits,
+    }
+    )
+    total_visits = df.groupby(["history_timestamp"], as_index = False).sum(numeric_only= True)

From 2ecceaeba70b27a88aaf5421db9e8706b37bd216 Mon Sep 17 00:00:00 2001
From: Salem Jorden <115185670+SalemJorden@users.noreply.github.com>
Date: Thu, 18 Apr 2024 14:11:49 -0500
Subject: [PATCH 05/10] concat function for visualizing forecasting results

---
 .../create_single_timeseries_forecasting_model_test.py   | 9 ++++++++-
 1 file changed, 8 insertions(+), 1 deletion(-)

diff --git a/samples/snippets/create_single_timeseries_forecasting_model_test.py b/samples/snippets/create_single_timeseries_forecasting_model_test.py
index b7ac8d8dc7..f2ba60fa7d 100644
--- a/samples/snippets/create_single_timeseries_forecasting_model_test.py
+++ b/samples/snippets/create_single_timeseries_forecasting_model_test.py
@@ -68,7 +68,7 @@ def test_create_single_timeseries(random_model_id):
 
     # Use your model to forecast the time series
     #standardSQL
-    your_model_id.forecast()
+    your_model_id.predict()
 
     # Explain and visualize the forecasting results
     f'''
@@ -94,3 +94,10 @@ def test_create_single_timeseries(random_model_id):
     }
     )
     total_visits = df.groupby(["history_timestamp"], as_index = False).sum(numeric_only= True)
+
+
+    history_df = bpd.read_gbq(df)
+    forecast_df = bpd.read_gbq(total_visits)
+
+    # Concat DataFrame 
+    combined_df = bpd.concat([history_df, forecast_df], ignore_index=True)

From 8a2a61275309745f212ea63bda3d831fb388adbd Mon Sep 17 00:00:00 2001
From: Salem Jorden <115185670+SalemJorden@users.noreply.github.com>
Date: Fri, 19 Apr 2024 15:49:58 -0500
Subject: [PATCH 06/10] docs: single time series code sample step 2

---
 ...ingle_timeseries_forecasting_model_test.py | 69 -------------------
 1 file changed, 69 deletions(-)

diff --git a/samples/snippets/create_single_timeseries_forecasting_model_test.py b/samples/snippets/create_single_timeseries_forecasting_model_test.py
index f2ba60fa7d..509c163e3f 100644
--- a/samples/snippets/create_single_timeseries_forecasting_model_test.py
+++ b/samples/snippets/create_single_timeseries_forecasting_model_test.py
@@ -32,72 +32,3 @@ def test_create_single_timeseries(random_model_id):
     parsed_date = bpd.to_datetime(df.date, format= "%Y%m%d", utc = True)
     total_visits = df.groupby(["date"])["parsed_date"].sum()
     visits = df["totals"].struct.field("visits")
-
-    # Create an Arima-based time series model using the Google Analytics 360 data. 
-    from bigframes.ml.forecasting import ARIMAPlus
-
-    ga_arima_model = ARIMAPlus()
-
-    X = df[["parsed_date"]]
-    y = df[["total_visits"]]
-
-    # Fit the model to your dataframe.
-    ga_arima_model.fit(X,y)
-
-    # The model.fit() call above created a temporary model.
-    # Use the to_gbq() method to write to a permanent location.
-    ga_arima_model.to_gbq(
-    your_model_id,  # For example: "bqml_tutorial.sample_model",
-    replace=True,
-    )
-
-    # Inspect the evaluation metrics of all evaluated models.
-    # when running this function use same model, dataset, model name (str)
-    evaluation = ga_arima_model.summary(
-        show_all_candidate_models = False,
-        )
-    
-    print(evaluation)
-
-    # Inspect the coefficients of your model
-    f'''
-    SELECT *
-    FROM ML.ARIMA_COEFFICIENTS(MODEL `{your_model_id}`)
-    '''
-    evaluation.ML.ARIMA_COEFFICIENTS()
-
-    # Use your model to forecast the time series
-    #standardSQL
-    your_model_id.predict()
-
-    # Explain and visualize the forecasting results
-    f'''
-    SELECT *
-    FROM ML.EXPLAIN_FORECAST(
-    MODEL `{your_model_id}`,
-    STRUCT(
-    [horizon AS horizon]
-    [, confidence_level AS confidence_level]))
-    '''
-    total_visits.plot.line(x = 'history_timestamp', y = 'history_value')
-    
-    # Visualize the forecasting results without having decompose_time_series enabled.
-    df = bpd.read_gbq(
-    'bigquery-public-data.google_analytics_sample.ga_sessions_*'
-    )
-    parsed_date = bpd.to_datetime(df.date, format= "%Y%m%d", utc = True)
-    visits = df["totals"].struct.field("visits")
-    df = bpd.DataFrame(
-    {
-        'history_timestamp': parsed_date,
-        'history_value': visits,
-    }
-    )
-    total_visits = df.groupby(["history_timestamp"], as_index = False).sum(numeric_only= True)
-
-
-    history_df = bpd.read_gbq(df)
-    forecast_df = bpd.read_gbq(total_visits)
-
-    # Concat DataFrame 
-    combined_df = bpd.concat([history_df, forecast_df], ignore_index=True)

From 673c6e380ecc682366fba11c933b810d4d4b0bd5 Mon Sep 17 00:00:00 2001
From: Salem Jorden <115185670+SalemJorden@users.noreply.github.com>
Date: Mon, 22 Apr 2024 12:50:14 -0500
Subject: [PATCH 07/10] suggested changes to step 2

---
 ...te_single_timeseries_forecasting_model_test.py | 15 ++++++++-------
 1 file changed, 8 insertions(+), 7 deletions(-)

diff --git a/samples/snippets/create_single_timeseries_forecasting_model_test.py b/samples/snippets/create_single_timeseries_forecasting_model_test.py
index 509c163e3f..bdc59673c4 100644
--- a/samples/snippets/create_single_timeseries_forecasting_model_test.py
+++ b/samples/snippets/create_single_timeseries_forecasting_model_test.py
@@ -1,4 +1,4 @@
-# Copyright 2023 Google LLC
+# Copyright 2024 Google LLC
 #
 # Licensed under the Apache License, Version 2.0 (t
 # you may not use this file except in compliance wi
@@ -19,16 +19,17 @@ def test_create_single_timeseries(random_model_id):
     # [START bigquery_dataframes_single_timeseries_forecasting_model_tutorial]
     import bigframes.pandas as bpd
     
-    # Start by selecting the data you'll use for training. `read_gbq` accepts
-    # either a SQL query or a table ID. Since this example selects from multiple
-    # tables via a wildcard, use SQL to define this data. Watch issue
-    # https://github.com/googleapis/python-bigquery-dataframes/issues/169
-    # for updates to `read_gbq` to support wildcard tables.
+    # Start by selecting the data that you'll be querying from bigquery-public-data.google_analytics_sample.ga_sessions_* 
+    # The read_gbq function accepts table expressions or SQL
+    # the clause indicates that you are querying the ga_sessions_* tables in the google_analytics_sample dataset
     
     # Read and visualize the time series you want to forecast.
     df = bpd.read_gbq(
         'bigquery-public-data.google_analytics_sample.ga_sessions_*'
         )
     parsed_date = bpd.to_datetime(df.date, format= "%Y%m%d", utc = True)
-    total_visits = df.groupby(["date"])["parsed_date"].sum()
     visits = df["totals"].struct.field("visits")
+    total_visits = visits.groupby(parsed_date).sum()
+    total_visits.plot.line()
+
+    # [END bigquery_dataframes_single_timeseries_forecasting_model_tutorial]
\ No newline at end of file

From 647b1c20b061ad84d5e28ccac670810958a9da06 Mon Sep 17 00:00:00 2001
From: Salem Jorden <115185670+SalemJorden@users.noreply.github.com>
Date: Mon, 22 Apr 2024 12:53:00 -0500
Subject: [PATCH 08/10] docs: edit text because wildcare tables are supported

---
 .../create_single_timeseries_forecasting_model_test.py        | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/samples/snippets/create_single_timeseries_forecasting_model_test.py b/samples/snippets/create_single_timeseries_forecasting_model_test.py
index bdc59673c4..6288346bf5 100644
--- a/samples/snippets/create_single_timeseries_forecasting_model_test.py
+++ b/samples/snippets/create_single_timeseries_forecasting_model_test.py
@@ -20,8 +20,8 @@ def test_create_single_timeseries(random_model_id):
     import bigframes.pandas as bpd
     
     # Start by selecting the data that you'll be querying from bigquery-public-data.google_analytics_sample.ga_sessions_* 
-    # The read_gbq function accepts table expressions or SQL
-    # the clause indicates that you are querying the ga_sessions_* tables in the google_analytics_sample dataset
+    # The read_gbq function accepts the wildcard table expressions and this clause indicates that 
+    # you are querying the ga_sessions_* tables in the google_analytics_sample dataset
     
     # Read and visualize the time series you want to forecast.
     df = bpd.read_gbq(

From 310d33d957eb6f2fe90adb02868259b208cfb7ac Mon Sep 17 00:00:00 2001
From: Salem Jorden <115185670+SalemJorden@users.noreply.github.com>
Date: Mon, 22 Apr 2024 15:28:41 -0500
Subject: [PATCH 09/10] code review changes

---
 ..._single_timeseries_forecasting_model_test.py | 17 +++++++++++++----
 1 file changed, 13 insertions(+), 4 deletions(-)

diff --git a/samples/snippets/create_single_timeseries_forecasting_model_test.py b/samples/snippets/create_single_timeseries_forecasting_model_test.py
index 6288346bf5..23b00fb10e 100644
--- a/samples/snippets/create_single_timeseries_forecasting_model_test.py
+++ b/samples/snippets/create_single_timeseries_forecasting_model_test.py
@@ -19,9 +19,8 @@ def test_create_single_timeseries(random_model_id):
     # [START bigquery_dataframes_single_timeseries_forecasting_model_tutorial]
     import bigframes.pandas as bpd
     
-    # Start by selecting the data that you'll be querying from bigquery-public-data.google_analytics_sample.ga_sessions_* 
-    # The read_gbq function accepts the wildcard table expressions and this clause indicates that 
-    # you are querying the ga_sessions_* tables in the google_analytics_sample dataset
+    # Start by loading the historical data from BigQuerythat you want to analyze and forecast. 
+    # This clause indicates that you are querying the ga_sessions_* tables in the google_analytics_sample dataset.
     
     # Read and visualize the time series you want to forecast.
     df = bpd.read_gbq(
@@ -30,6 +29,16 @@ def test_create_single_timeseries(random_model_id):
     parsed_date = bpd.to_datetime(df.date, format= "%Y%m%d", utc = True)
     visits = df["totals"].struct.field("visits")
     total_visits = visits.groupby(parsed_date).sum()
-    total_visits.plot.line()
+    
+    # Expected output: total_visits.head()
+    # date
+    # 2016-08-01 00:00:00+00:00    1711
+    # 2016-08-02 00:00:00+00:00    2140
+    # 2016-08-03 00:00:00+00:00    2890
+    # 2016-08-04 00:00:00+00:00    3161
+    # 2016-08-05 00:00:00+00:00    2702
+    # Name: visits, dtype: Int64
 
+    total_visits.plot.line()
+    
     # [END bigquery_dataframes_single_timeseries_forecasting_model_tutorial]
\ No newline at end of file

From 88667a93e27b0a9d09c352df490b39f33dc0f77c Mon Sep 17 00:00:00 2001
From: Salem Jorden <115185670+SalemJorden@users.noreply.github.com>
Date: Mon, 22 Apr 2024 15:32:26 -0500
Subject: [PATCH 10/10] remove unused variable

---
 ...ingle_timeseries_forecasting_model_test.py | 20 ++++++++-----------
 1 file changed, 8 insertions(+), 12 deletions(-)

diff --git a/samples/snippets/create_single_timeseries_forecasting_model_test.py b/samples/snippets/create_single_timeseries_forecasting_model_test.py
index 23b00fb10e..5750933713 100644
--- a/samples/snippets/create_single_timeseries_forecasting_model_test.py
+++ b/samples/snippets/create_single_timeseries_forecasting_model_test.py
@@ -13,23 +13,19 @@
 # limitations under the License.
 
 
-def test_create_single_timeseries(random_model_id):
-    your_model_id = random_model_id
+def test_create_single_timeseries():
 
     # [START bigquery_dataframes_single_timeseries_forecasting_model_tutorial]
     import bigframes.pandas as bpd
-    
-    # Start by loading the historical data from BigQuerythat you want to analyze and forecast. 
+
+    # Start by loading the historical data from BigQuerythat you want to analyze and forecast.
     # This clause indicates that you are querying the ga_sessions_* tables in the google_analytics_sample dataset.
-    
     # Read and visualize the time series you want to forecast.
-    df = bpd.read_gbq(
-        'bigquery-public-data.google_analytics_sample.ga_sessions_*'
-        )
-    parsed_date = bpd.to_datetime(df.date, format= "%Y%m%d", utc = True)
+    df = bpd.read_gbq("bigquery-public-data.google_analytics_sample.ga_sessions_*")
+    parsed_date = bpd.to_datetime(df.date, format="%Y%m%d", utc=True)
     visits = df["totals"].struct.field("visits")
     total_visits = visits.groupby(parsed_date).sum()
-    
+
     # Expected output: total_visits.head()
     # date
     # 2016-08-01 00:00:00+00:00    1711
@@ -40,5 +36,5 @@ def test_create_single_timeseries(random_model_id):
     # Name: visits, dtype: Int64
 
     total_visits.plot.line()
-    
-    # [END bigquery_dataframes_single_timeseries_forecasting_model_tutorial]
\ No newline at end of file
+
+    # [END bigquery_dataframes_single_timeseries_forecasting_model_tutorial]