From cc598422c21067cc2b664eb4a450e5b3e4f4ce0c Mon Sep 17 00:00:00 2001
From: Chansung Park <deep.diver.csp@gmail.com>
Date: Thu, 16 Jun 2022 00:15:47 +0000
Subject: [PATCH 1/7] add wandb dependency

---
 requirements.txt | 1 +
 1 file changed, 1 insertion(+)

diff --git a/requirements.txt b/requirements.txt
index 7ca652f..971964a 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,4 +1,5 @@
 dvc[gdrive]==2.10.2
+wandb==0.12.18
 tensorflow==2.8
 typer==0.4.1
 docopt==0.6.2

From 73bf38005a9ea9fb1c4ea4df058746469e13ec55 Mon Sep 17 00:00:00 2001
From: Chansung Park <deep.diver.csp@gmail.com>
Date: Thu, 16 Jun 2022 00:29:06 +0000
Subject: [PATCH 2/7] update to adopt wandb api

---
 pipeline/train.py | 23 ++++++++++++++++++-----
 1 file changed, 18 insertions(+), 5 deletions(-)

diff --git a/pipeline/train.py b/pipeline/train.py
index df521ff..faea520 100644
--- a/pipeline/train.py
+++ b/pipeline/train.py
@@ -10,9 +10,11 @@
 import tensorflow as tf
 from tensorflow.keras.applications import resnet50
 
-from dvclive.keras import DvcLiveCallback
 import modeling
 
+import wandb
+from wandb.keras import WandbCallback
+
 if len(sys.argv) != 2:
     sys.stderr.write("Arguments error. Usage:\n")
     sys.stderr.write("\tpython prepare.py data-file\n")
@@ -56,14 +58,22 @@ def make_tarfile(output_filename, source_dir):
     with tarfile.open(output_filename, "w:gz") as tar:
         tar.add(source_dir, arcname=os.path.basename(source_dir))
 
-def run_train():
+def run_train(project_name, 
+              wandb_key):
+    wandb.login(
+        anonymous="never",
+        key=wandb_key
+    )
+    wandb_run = wandb.init(project=project_name,
+                           config=params)    
+
     train_size = params['train_size']
     train_step_size = train_size // params['batch_size']
 
     train_ds = _read_dataset(params['epoch'], params['batch_size'], train)
     test_ds = _read_dataset(params['epoch'], params['batch_size'], test)
 
-    dvcCallback = DvcLiveCallback()
+    wandbCallback = WandbCallback()
 
     m = modeling._build_keras_model()
     m = modeling._compile(m, float(params['lr']))
@@ -73,7 +83,7 @@ def run_train():
         epochs=params['epoch'],
         steps_per_epoch=train_step_size,
         validation_data=test_ds,
-        callbacks=[dvcCallback])
+        callbacks=[wandbCallback])
 
     m.save(output, 
            save_format='tf', 
@@ -81,4 +91,7 @@ def run_train():
 
     make_tarfile(f'{output}.tar.gz', output)
 
-run_train()
\ No newline at end of file
+project_name = os.environ["WANDB_PROJECT"]
+wandb_key = os.environment["WANDB_API_KEY"]
+
+run_train(project_name, wandb_key)
\ No newline at end of file

From f769a378f0abda2cf367472387108d6b69a8a0a2 Mon Sep 17 00:00:00 2001
From: Chansung Park <deep.diver.csp@gmail.com>
Date: Sat, 25 Jun 2022 17:28:48 +0000
Subject: [PATCH 3/7] add wandb run name

---
 pipeline/train.py | 20 +++++++++++---------
 1 file changed, 11 insertions(+), 9 deletions(-)

diff --git a/pipeline/train.py b/pipeline/train.py
index faea520..cbb0830 100644
--- a/pipeline/train.py
+++ b/pipeline/train.py
@@ -58,14 +58,18 @@ def make_tarfile(output_filename, source_dir):
     with tarfile.open(output_filename, "w:gz") as tar:
         tar.add(source_dir, arcname=os.path.basename(source_dir))
 
-def run_train(project_name, 
-              wandb_key):
+def run_train():
+    project_name = os.environ["WANDB_PROJECT"]
+    wandb_key = os.environ["WANDB_API_KEY"]
+    wandb_run_name = os.environ["WANDB_RUN_NAME"]
+
     wandb.login(
         anonymous="never",
         key=wandb_key
     )
-    wandb_run = wandb.init(project=project_name,
-                           config=params)    
+    _ = wandb.init(project=project_name,
+                   config=params,
+                   name=wandb_run_name)
 
     train_size = params['train_size']
     train_step_size = train_size // params['batch_size']
@@ -73,7 +77,8 @@ def run_train(project_name,
     train_ds = _read_dataset(params['epoch'], params['batch_size'], train)
     test_ds = _read_dataset(params['epoch'], params['batch_size'], test)
 
-    wandbCallback = WandbCallback()
+    wandbCallback = WandbCallback(training_data=train_ds, 
+                                  log_weights=(True), log_gradients=(True))
 
     m = modeling._build_keras_model()
     m = modeling._compile(m, float(params['lr']))
@@ -91,7 +96,4 @@ def run_train(project_name,
 
     make_tarfile(f'{output}.tar.gz', output)
 
-project_name = os.environ["WANDB_PROJECT"]
-wandb_key = os.environment["WANDB_API_KEY"]
-
-run_train(project_name, wandb_key)
\ No newline at end of file
+run_train()
\ No newline at end of file

From c0f5d79ae4f613c380c135796d5116bbae8d1c1a Mon Sep 17 00:00:00 2001
From: Chansung Park <deep.diver.csp@gmail.com>
Date: Sat, 25 Jun 2022 17:30:05 +0000
Subject: [PATCH 4/7] update according to WANDB setup

---
 README.md | 5 -----
 1 file changed, 5 deletions(-)

diff --git a/README.md b/README.md
index 22a2cff..ae29d96 100644
--- a/README.md
+++ b/README.md
@@ -29,10 +29,6 @@ This project shows how to realize MLOps in Git/GitHub. In order to achieve this
 $ dvc stage add -n train \
                 -p train.train_size,train.batch_size,train.epoch,train.lr \
                 -d pipeline/modeling.py -d pipeline/train.py -d data \
-                --plots-no-cache dvclive/scalars/loss.tsv \
-                --plots-no-cache dvclive/scalars/sparse_categorical_accuracy.tsv \
-                --plots-no-cache dvclive/scalars/val_loss.tsv \
-                --plots-no-cache dvclive/scalars/val_sparse_categorical_accuracy.tsv \
                 -o outputs/model \
                 python pipeline/train.py outputs/model
 ```
@@ -41,7 +37,6 @@ $ dvc stage add -n train \
 $ dvc stage add -n evaluate \
                 -p evaluate.test,evaluate.batch_size \
                 -d pipeline/evaluate.py -d data/test -d outputs/model \
-                -M outputs/metrics.json \
                 python pipeline/evaluate.py outputs/model
 ```
 11. Update `params.yaml` as you need.

From 90f3097853dc616331ec43f37d2df7d345b60e09 Mon Sep 17 00:00:00 2001
From: Chansung Park <deep.diver.csp@gmail.com>
Date: Sat, 25 Jun 2022 17:37:08 +0000
Subject: [PATCH 5/7] leave a note to set W&B specific GH secrets

---
 README.md | 53 ++++++++++++++++-------------------------------------
 1 file changed, 16 insertions(+), 37 deletions(-)

diff --git a/README.md b/README.md
index ae29d96..7d65432 100644
--- a/README.md
+++ b/README.md
@@ -21,10 +21,11 @@ This project shows how to realize MLOps in Git/GitHub. In order to achieve this
 4. Run `dvc add [ADDED FILE OR DIRECTORY]` to track your data with DVC
 5. Run `dvc remote add -d gdrive_storage gdrive://[ID of specific folder in gdrive]` to add Google Drive as the remote data storage
 6. Run `dvc push`, then URL to auth is provided. Copy and paste it to the browser, and autheticate
-7. Copy the content of `.dvc/tmp/gdrive-user-credentials.json` and put it as in [GitHub Secret](https://docs.github.com/en/actions/security-guides/encrypted-secrets#creating-encrypted-secrets-for-a-repository) with the name of `GDRIVE_CREDENTIALS`
-8. Run `git add . && git commit -m "initial commit" && git push origin main` to keep the initial setup
-9. Write your own pipeline under `pipeline` directory. Codes for basic image classification in TensorFlow are provided initially.
-10. Run the following `dvc stage add` for training stage
+7. Copy the content of `.dvc/tmp/gdrive-user-credentials.json` and put it as in [GitHub Secret](https://docs.github.com/en/actions/security-guides/encrypted-secrets#creating-encrypted-secrets-for-a-repository) with the name of `GDRIVE_CREDENTIAL`
+8. Add W&B PROJECT NAME and API KEY to GitHub Secret as `WANDB_PROJECT` and `WANDB_API_KEY` respectively.
+9. Run `git add . && git commit -m "initial commit" && git push origin main` to keep the initial setup
+10. Write your own pipeline under `pipeline` directory. Codes for basic image classification in TensorFlow are provided initially.
+11. Run the following `dvc stage add` for training stage
 ```bash
 $ dvc stage add -n train \
                 -p train.train_size,train.batch_size,train.epoch,train.lr \
@@ -32,23 +33,23 @@ $ dvc stage add -n train \
                 -o outputs/model \
                 python pipeline/train.py outputs/model
 ```
-10. Run the following `dvc stage add` for evaluate stage
+12. Run the following `dvc stage add` for evaluate stage
 ```bash
 $ dvc stage add -n evaluate \
                 -p evaluate.test,evaluate.batch_size \
                 -d pipeline/evaluate.py -d data/test -d outputs/model \
                 python pipeline/evaluate.py outputs/model
 ```
-11. Update `params.yaml` as you need.
-12. Run `git add . && git commit -m "add initial pipeline setup" && git push origin main`
-13. Run `dvc repro` to run the pipeline initially
-14. Run `dvc add outputs/model.tar.gz` to add compressed version of model 
-15. Run `dvc push outputs/model.tar.gz`
-16. Run `echo "/pipeline/__pycache__" >> .gitignore` to ignore unnecessary directory
-17. Run `git add . && git commit -m "add initial pipeline run" && git push origin main`
-18. Add access token and user email of [JarvisLabs.ai](https://jarvislabs.ai/) to GitHub Secret as `JARVISLABS_ACCESS_TOKEN` and `JARVISLABS_USER_EMAIL`
-19. Add GitHub access token to GitHub Secret as `GH_ACCESS_TOKEN`
-20. Create a PR and write `#train` as in comment (you have to be the onwer of the repo)
+13. Update `params.yaml` as you need.
+14. Run `git add . && git commit -m "add initial pipeline setup" && git push origin main`
+15. Run `dvc repro` to run the pipeline initially
+16. Run `dvc add outputs/model.tar.gz` to add compressed version of model 
+17. Run `dvc push outputs/model.tar.gz`
+18. Run `echo "/pipeline/__pycache__" >> .gitignore` to ignore unnecessary directory
+19. Run `git add . && git commit -m "add initial pipeline run" && git push origin main`
+20. Add access token and user email of [JarvisLabs.ai](https://jarvislabs.ai/) to GitHub Secret as `JARVISLABS_ACCESS_TOKEN` and `JARVISLABS_USER_EMAIL`
+21. Add GitHub access token to GitHub Secret as `GH_ACCESS_TOKEN`
+22. Create a PR and write `#train` as in comment (you have to be the onwer of the repo)
 
 ### HuggingFace Integration Setup
 
@@ -58,28 +59,6 @@ $ dvc stage add -n evaluate \
    - GitHub Action assumes your model is archieved as `model.tar.gz` under `outputs` directory
    - Algo GitHub Action assumes your HuggingFace Space app is written in [Gradio](https://gradio.app/) under `hf-space` directory. You need to change [`app_template.py`](https://github.com/codingpot/git-mlops/blob/main/hf-space/app_template.py) as you need(you shouldn't remove any environment variables in the file).
 
-## TODO
-
-- [X] Write solid steps to reproduce this repo for other tasks 
-- [X] Deploy experimental model to [HF Space](https://huggingface.co/spaces)
-- [ ] Deploy current model to [GKE](https://cloud.google.com/kubernetes-engine) with [auto TFServing deployment project](https://github.com/deep-diver/ml-deployment-k8s-tfserving)
-- [ ] Add more cloud providers offering GPU VMs
-  - [X] [JarvisLabs.ai](https://jarvislabs.ai/)
-  - [ ] [DataCrunch.io](https://datacrunch.io/)
-  - [ ] [GCP Vertex AI Training](https://cloud.google.com/vertex-ai#section-9)
-- [ ] Integrate more managed services for management
-  - [ ] [W&B Artifact](https://wandb.ai/site) for dataset/model versioning and experiment tracking
-  - [ ] [HugginfFace](https://huggingface.co) for dataset/model versioning
-- [ ] Integrate more managed services for deployment
-  - [ ] [AKS](https://docs.microsoft.com/en-us/azure/aks/)
-  - [ ] [EKS](https://aws.amazon.com/ko/eks/)
-  - [ ] [App Engine](https://cloud.google.com/appengine/)
-  - [ ] [AWS Lambda](https://aws.amazon.com/ko/lambda/)
-- [ ] Add more example codebase (pipeline)
-  - [ ] TensorFlow based Object Detection 
-  - [ ] PyTorch based Image Classification
-  - [ ] HuggingFace Transformers
-
 ## Brief description of each tools
 
 - **DVC(Data Version Control)**: Manages data in somewhere else(i.e. cloud storage) while keeping the version and remote information in metadata file in Git repository.

From 1436c39b998c9f6beece9f4cc8a2c484bf176875 Mon Sep 17 00:00:00 2001
From: Chansung Park <deep.diver.csp@gmail.com>
Date: Sat, 25 Jun 2022 18:10:29 +0000
Subject: [PATCH 6/7] fix typo

---
 README.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/README.md b/README.md
index 7d65432..703d862 100644
--- a/README.md
+++ b/README.md
@@ -57,7 +57,7 @@ $ dvc stage add -n evaluate \
 2. Add username of HugginfFace to GitHub Secret as `HF_USER_ID`
 3. Write `#deploy-hf` in comment of PR you want to deploy to HuggingFace Space
    - GitHub Action assumes your model is archieved as `model.tar.gz` under `outputs` directory
-   - Algo GitHub Action assumes your HuggingFace Space app is written in [Gradio](https://gradio.app/) under `hf-space` directory. You need to change [`app_template.py`](https://github.com/codingpot/git-mlops/blob/main/hf-space/app_template.py) as you need(you shouldn't remove any environment variables in the file).
+   - GitHub Action assumes your HuggingFace Space app is written in [Gradio](https://gradio.app/) under `hf-space` directory. You need to change [`app_template.py`](https://github.com/codingpot/git-mlops/blob/main/hf-space/app_template.py) as you need(you shouldn't remove any environment variables in the file).
 
 ## Brief description of each tools
 

From 2e1519f12fc7caffba07b7cc634929e39c428f93 Mon Sep 17 00:00:00 2001
From: Chansung Park <deep.diver.csp@gmail.com>
Date: Sat, 25 Jun 2022 18:12:13 +0000
Subject: [PATCH 7/7] update shell script to support W&B

---
 scripts/experiments.sh | 33 +++++++++++++--------------------
 1 file changed, 13 insertions(+), 20 deletions(-)

diff --git a/scripts/experiments.sh b/scripts/experiments.sh
index 56653e7..7590114 100644
--- a/scripts/experiments.sh
+++ b/scripts/experiments.sh
@@ -12,6 +12,10 @@ export GH_TOKEN='$GH_ACCESS_TOKEN'
 git config --global user.name "chansung"
 git config --global user.email "deep.diver.csp@gmail.com"
 
+# set W&B specific keys
+export WANDB_PROJECT='$WANDB_PROJECT'
+export WANDB_API_KEY='$WANDB_API_KEY'
+
 # move to the repo
 git clone https://github.com/codingpot/git-mlops.git
 
@@ -29,30 +33,19 @@ echo '$GDRIVE_CREDENTIAL' > .dvc/tmp/gdrive-user-credentials.json
 # pull data
 dvc pull
 
-exp_names=("base")
-dvc exp run
-
-dvc exp show > exp_results.txt
-exp_id_strings=`grep -oe "exp-[a-z0-9]\+" exp_results.txt`
-exp_ids=($exp_id_strings)
-cur_branch=$(git branch | sed -n -e 's/^\* \(.*\)/\1/p')
+export WANDB_RUN_NAME=$CUR_BRANCH
+dvc repro
 
 exp_result=$(dvc exp show --only-changed --md)
-gh pr comment $CUR_PR_ID --body "$exp_result"
+wandb_url="https://wandb.ai/codingpot/git-mlops"
+gh pr comment $CUR_PR_ID --body "[Visit W&B Log Page for this Pull Request]($wandb_url)"
 
 git reset --hard
-for idx in ${!exp_names[@]}
-do
-   echo ${exp_ids[$idx]}
-   echo ${exp_names[$idx]}
-   dvc exp branch ${exp_ids[$idx]} ${exp_names[$idx]}
-   dvc add outputs/model.tar.gz
-   dvc push outputs/model.tar.gz
-   git branch -m ${exp_names[$idx]} exp-$cur_branch-${exp_names[$idx]}
-   git checkout exp-$cur_branch-${exp_names[$idx]}
-   git push origin exp-$cur_branch-${exp_names[$idx]}
-   git checkout $CUR_BRANCH
-done
+
+echo ${exp_ids[$idx]}
+echo ${exp_names[$idx]}
+dvc add outputs/model.tar.gz
+dvc push outputs/model.tar.gz
 
 VM_ID=$(tail -n 2 /home/.jarviscloud/jarvisconfig | head -n 1)
 python clouds/jarvislabs.py vm destroy $CLOUD_AT $CLOUD_ID $VM_ID