From d1f13762bb7cd1ae8b1fd19d714c04cef1cc5b4b Mon Sep 17 00:00:00 2001
From: svlandeg <svlandeg@github.com>
Date: Tue, 11 Jul 2023 16:16:50 +0200
Subject: [PATCH 01/14] clinical trial extraction, tutorial with LLM

---
 .../llm_clinical_trials/assets/23206895.txt   |  10 ++
 .../llm_clinical_trials/assets/27144689.txt   |  14 ++
 .../llm_clinical_trials/assets/35172054.txt   |  10 ++
 .../configs/config_openai.cfg                 |  16 +++
 tutorials/llm_clinical_trials/project.yml     |  25 ++++
 .../llm_clinical_trials/requirements.txt      |   1 +
 .../llm_clinical_trials/scripts/__init__.py   |   0
 .../scripts/input_reader.py                   |  15 ++
 .../scripts/run_pipeline.py                   |  24 ++++
 .../llm_clinical_trials/scripts/trial_task.py | 128 ++++++++++++++++++
 10 files changed, 243 insertions(+)
 create mode 100644 tutorials/llm_clinical_trials/assets/23206895.txt
 create mode 100644 tutorials/llm_clinical_trials/assets/27144689.txt
 create mode 100644 tutorials/llm_clinical_trials/assets/35172054.txt
 create mode 100644 tutorials/llm_clinical_trials/configs/config_openai.cfg
 create mode 100644 tutorials/llm_clinical_trials/project.yml
 create mode 100644 tutorials/llm_clinical_trials/requirements.txt
 create mode 100644 tutorials/llm_clinical_trials/scripts/__init__.py
 create mode 100644 tutorials/llm_clinical_trials/scripts/input_reader.py
 create mode 100644 tutorials/llm_clinical_trials/scripts/run_pipeline.py
 create mode 100644 tutorials/llm_clinical_trials/scripts/trial_task.py

diff --git a/tutorials/llm_clinical_trials/assets/23206895.txt b/tutorials/llm_clinical_trials/assets/23206895.txt
new file mode 100644
index 000000000..9c071d255
--- /dev/null
+++ b/tutorials/llm_clinical_trials/assets/23206895.txt
@@ -0,0 +1,10 @@
+Effects of fish oil supplementation on inflammatory acne
+
+Abstract
+Background: Given that acne is a rare condition in societies with higher consumption of omega-3 (n-3) relative to omega-6 (n-6) fatty acids, supplementation with n-3 may suppress inflammatory cytokine production and thereby reduce acne severity.
+
+Methods: 13 individuals with inflammatory acne were given three grams of fish oil containing 930 mg of EPA to their unchanged diet and existing acne remedies for 12 weeks. Acne was assessed using an overall severity grading scale, total inflammatory lesion counts, and colorimetry.
+
+Findings: There was no significant change in acne grading and inflammatory counts at week 12 compared to baseline. However, there was a broad range of response to the intervention on an individual basis. The results showed that acne severity improved in 8 individuals, worsened in 4, and remained unchanged in 1. Interestingly, among the individuals who showed improvement, 7 were classified as having moderate to severe acne at baseline, while 3 of the 4 whose acne deteriorated were classified as having mild acne.
+
+Conclusion: There is some evidence that fish oil supplementation is associated with an improvement in overall acne severity, especially for individuals with moderate to severe acne. Divergent responses to fish oil in our pilot study indicates that dietary and supplemental lipids are worthy of further investigation in acne.
\ No newline at end of file
diff --git a/tutorials/llm_clinical_trials/assets/27144689.txt b/tutorials/llm_clinical_trials/assets/27144689.txt
new file mode 100644
index 000000000..289219afb
--- /dev/null
+++ b/tutorials/llm_clinical_trials/assets/27144689.txt
@@ -0,0 +1,14 @@
+Hemodynamic Effects of Phenylephrine, Vasopressin, and Epinephrine in Children With Pulmonary Hypertension: A Pilot Study
+
+Abstract
+Objectives: During a pulmonary hypertensive crisis, the marked increase in pulmonary vascular resistance can result in acute right ventricular failure and death. Currently, there are no therapeutic guidelines for managing an acute crisis. This pilot study examined the hemodynamic effects of phenylephrine, arginine vasopressin, and epinephrine in pediatric patients with pulmonary hypertension.
+
+Design: In this prospective, open-label, nonrandomized pilot study, we enrolled pediatric patients previously diagnosed with pulmonary hypertensive who were scheduled electively for cardiac catheterization. Primary outcome was a change in the ratio of pulmonary-to-systemic vascular resistance. Baseline hemodynamic data were collected before and after the study drug was administered.
+
+Patients: Eleven of 15 participants were women, median age was 9.2 years (range, 1.7-14.9 yr), and median weight was 26.8 kg (range, 8.5-55.2 kg). Baseline mean pulmonary artery pressure was 49 ± 19 mm Hg, and mean indexed pulmonary vascular resistance was 10 ± 5.4 Wood units. Etiology of pulmonary hypertensive varied, and all were on systemic pulmonary hypertensive medications.
+
+Interventions: Patients 1-5 received phenylephrine 1 μg/kg; patients 6-10 received arginine vasopressin 0.03 U/kg; and patients 11-15 received epinephrine 1 μg/kg. Hemodynamics was measured continuously for up to 10 minutes following study drug administration.
+
+Measurements and main results: After study drug administration, the ratio of pulmonary-to-systemic vascular resistance decreased in three of five patients receiving phenylephrine, five of five patients receiving arginine vasopressin, and three of five patients receiving epinephrine. Although all three medications resulted in an increase in aortic pressure, only arginine vasopressin consistently resulted in a decrease in the ratio of systolic pulmonary artery-to-aortic pressure.
+
+Conclusions: This prospective pilot study of phenylephrine, arginine vasopressin, and epinephrine in pediatric patients with pulmonary hypertensive showed an increase in aortic pressure with all drugs although only vasopressin resulted in a consistent decrease in the ratio of pulmonary-to-systemic vascular resistance. Studies with more subjects are warranted to define optimal dosing strategies of these medications in an acute pulmonary hypertensive crisis.
\ No newline at end of file
diff --git a/tutorials/llm_clinical_trials/assets/35172054.txt b/tutorials/llm_clinical_trials/assets/35172054.txt
new file mode 100644
index 000000000..817e77e59
--- /dev/null
+++ b/tutorials/llm_clinical_trials/assets/35172054.txt
@@ -0,0 +1,10 @@
+Oral Nirmatrelvir for High-Risk, Nonhospitalized Adults with Covid-19
+
+Abstract
+Background: Nirmatrelvir is an orally administered severe acute respiratory syndrome coronavirus 2 main protease (Mpro) inhibitor with potent pan-human-coronavirus activity in vitro.
+
+Methods: We conducted a phase 2-3 double-blind, randomized, controlled trial in which symptomatic, unvaccinated, nonhospitalized adults at high risk for progression to severe coronavirus disease 2019 (Covid-19) were assigned in a 1:1 ratio to receive either 300 mg of nirmatrelvir plus 100 mg of ritonavir (a pharmacokinetic enhancer) or placebo every 12 hours for 5 days. Covid-19-related hospitalization or death from any cause through day 28, viral load, and safety were evaluated.
+
+Results: A total of 2246 patients underwent randomization; 1120 patients received nirmatrelvir plus ritonavir (nirmatrelvir group) and 1126 received placebo (placebo group). In the planned interim analysis of patients treated within 3 days after symptom onset (modified intention-to treat population, comprising 774 of the 1361 patients in the full analysis population), the incidence of Covid-19-related hospitalization or death by day 28 was lower in the nirmatrelvir group than in the placebo group by 6.32 percentage points (95% confidence interval [CI], -9.04 to -3.59; P<0.001; relative risk reduction, 89.1%); the incidence was 0.77% (3 of 389 patients) in the nirmatrelvir group, with 0 deaths, as compared with 7.01% (27 of 385 patients) in the placebo group, with 7 deaths. Efficacy was maintained in the final analysis involving the 1379 patients in the modified intention-to-treat population, with a difference of -5.81 percentage points (95% CI, -7.78 to -3.84; P<0.001; relative risk reduction, 88.9%). All 13 deaths occurred in the placebo group. The viral load was lower with nirmatrelvir plus ritonavir than with placebo at day 5 of treatment, with an adjusted mean difference of -0.868 log10 copies per milliliter when treatment was initiated within 3 days after the onset of symptoms. The incidence of adverse events that emerged during the treatment period was similar in the two groups (any adverse event, 22.6% with nirmatrelvir plus ritonavir vs. 23.9% with placebo; serious adverse events, 1.6% vs. 6.6%; and adverse events leading to discontinuation of the drugs or placebo, 2.1% vs. 4.2%). Dysgeusia (5.6% vs. 0.3%) and diarrhea (3.1% vs. 1.6%) occurred more frequently with nirmatrelvir plus ritonavir than with placebo.
+
+Conclusions: Treatment of symptomatic Covid-19 with nirmatrelvir plus ritonavir resulted in a risk of progression to severe Covid-19 that was 89% lower than the risk with placebo, without evident safety concerns. 
\ No newline at end of file
diff --git a/tutorials/llm_clinical_trials/configs/config_openai.cfg b/tutorials/llm_clinical_trials/configs/config_openai.cfg
new file mode 100644
index 000000000..857c31a35
--- /dev/null
+++ b/tutorials/llm_clinical_trials/configs/config_openai.cfg
@@ -0,0 +1,16 @@
+[nlp]
+lang = "en"
+pipeline = ["llm"]
+batch_size = 128
+
+[components]
+
+[components.llm]
+factory = "llm"
+
+[components.llm.model]
+@llm_models = "spacy.GPT-3-5.v1"
+name = "gpt-3.5-turbo"
+
+[components.llm.task]
+@llm_tasks = "tutorial.TrialSummary.v1"
diff --git a/tutorials/llm_clinical_trials/project.yml b/tutorials/llm_clinical_trials/project.yml
new file mode 100644
index 000000000..50f2b2366
--- /dev/null
+++ b/tutorials/llm_clinical_trials/project.yml
@@ -0,0 +1,25 @@
+title: 'Clinical trial results extraction with LLMs'
+description: "Using an LLM in a spaCy pipeline to extract patient groups, treatments and outcomes in clinical trials."
+
+vars:
+  config: "config_openai.cfg"
+  pmid: 27144689
+
+# These are the directories that the project needs.
+directories: ["assets", "configs", "scripts"]
+
+# Assets that should be available in the directory.
+assets:
+  - dest: "assets/23206895.txt"
+  - dest: "assets/27144689.txt"
+  - dest: "assets/35172054.txt"
+
+# Project commands
+commands:
+  - name: summarize
+    help: "Run the LLM-powered spaCy pipeline"
+    script:
+      - "python ./scripts/run_pipeline.py ${vars.pmid} ./configs/${vars.config}"
+    deps:
+      - "assets/${vars.pmid}.txt"
+      - "configs/${vars.config}"
diff --git a/tutorials/llm_clinical_trials/requirements.txt b/tutorials/llm_clinical_trials/requirements.txt
new file mode 100644
index 000000000..0d359ce65
--- /dev/null
+++ b/tutorials/llm_clinical_trials/requirements.txt
@@ -0,0 +1 @@
+spacy-llm==0.4.0
\ No newline at end of file
diff --git a/tutorials/llm_clinical_trials/scripts/__init__.py b/tutorials/llm_clinical_trials/scripts/__init__.py
new file mode 100644
index 000000000..e69de29bb
diff --git a/tutorials/llm_clinical_trials/scripts/input_reader.py b/tutorials/llm_clinical_trials/scripts/input_reader.py
new file mode 100644
index 000000000..bdc6bf3cd
--- /dev/null
+++ b/tutorials/llm_clinical_trials/scripts/input_reader.py
@@ -0,0 +1,15 @@
+from pathlib import Path
+
+from wasabi import msg
+
+DATA_DIR = Path(__file__).parent.parent / "assets"
+
+
+def read_trial(pmid: int, verbose: bool = False) -> str:
+    file_path = DATA_DIR / f"{pmid}.txt"
+    msg.text(f"Reading article text from {file_path}", show=verbose)
+
+    with open(file_path, "r", encoding="utf8") as file:
+        data = file.read()
+
+    return data
diff --git a/tutorials/llm_clinical_trials/scripts/run_pipeline.py b/tutorials/llm_clinical_trials/scripts/run_pipeline.py
new file mode 100644
index 000000000..ac5cbba26
--- /dev/null
+++ b/tutorials/llm_clinical_trials/scripts/run_pipeline.py
@@ -0,0 +1,24 @@
+from pathlib import Path
+
+import typer
+from input_reader import read_trial
+from spacy_llm.util import assemble
+from trial_task import make_trial_task
+from wasabi import msg
+
+
+def run_pipeline(pmid: int, config_path: Path, verbose: bool = False):
+    msg.text(f"Processing PMID {pmid}", show=verbose)
+    msg.text(f"Loading config from {config_path}", show=verbose)
+    text = read_trial(pmid, verbose=verbose)
+    nlp = assemble(config_path)
+    doc = nlp(text)
+
+    print(doc._.trial_summary)
+    print()
+    for ent in doc.ents:
+        print(ent.label_, ent.text)
+
+
+if __name__ == "__main__":
+    typer.run(run_pipeline)
diff --git a/tutorials/llm_clinical_trials/scripts/trial_task.py b/tutorials/llm_clinical_trials/scripts/trial_task.py
new file mode 100644
index 000000000..704285ca3
--- /dev/null
+++ b/tutorials/llm_clinical_trials/scripts/trial_task.py
@@ -0,0 +1,128 @@
+from typing import Iterable
+
+import spacy
+from spacy.matcher import PhraseMatcher
+from spacy.tokens import Doc
+from spacy_llm.registry import registry
+from spacy_llm.ty import LLMTask
+
+
+INSTRUCTION = """
+Summarize the trial results in a structured fashion.
+First, identify all patient groups with distinct treatments. 
+Then, for each patient group, write down the following:
+
+Patient group: <name>
+Number of patients in the group: <number>
+Treatment drug or substance: <drug>
+Treatment dose: <drug>
+Treatment frequency of administration: <frequency>
+Treatment duration: <duration>
+Outcome: <outcome>
+"""
+
+
+@registry.llm_tasks("tutorial.TrialSummary.v1")
+def make_trial_task() -> "TrialSummaryTask":
+    return TrialSummaryTask(INSTRUCTION)
+
+
+class TrialSummaryTask(LLMTask):
+    def __init__(self, instruction: str):
+        self.instruction = instruction
+        Doc.set_extension("trial_summary", default="")
+
+    def generate_prompts(self, docs: Iterable[Doc]) -> Iterable[str]:
+        for doc in docs:
+            yield self.generate_prompt(doc)
+
+    def generate_prompt(self, doc: Doc) -> str:
+        prompt = "Below this instruction, I will provide you with a clinical trial abstract. \n"
+        prompt += self.instruction + "\n\n" + doc.text
+        return prompt
+
+    def parse_responses_v1(
+        self, docs: Iterable[Doc], responses: Iterable[str]
+    ) -> Iterable[Doc]:
+        for doc, response in zip(docs, responses):
+            doc._.trial_summary = response
+            yield doc
+
+    # quick and dirty implementation for now
+    def parse_responses(
+        self, docs: Iterable[Doc], responses: Iterable[str]
+    ) -> Iterable[Doc]:
+        for doc, response in zip(docs, responses):
+            response_lower = response.lower()
+
+            patient_groups = []
+            patient_numbers = []
+            drugs = []
+            doses = []
+            frequencies = []
+            durations = []
+            outcomes = []
+
+            start = response_lower.find("patient group:")
+            while start >= 0:
+                patient_group_start = response_lower.find("patient group:", start)
+                patient_group_end = patient_group_start + len("patient group:")
+
+                patient_number_start = response_lower.find("number of patients in the group:", start)
+                patient_number_end = patient_number_start + len("number of patients in the group:")
+
+                treatment_drug_start = response_lower.find("treatment drug or substance:", start)
+                treatment_drug_end = treatment_drug_start + len("treatment drug or substance:")
+
+                treatment_dose_start = response_lower.find("treatment dose:", start)
+                treatment_dose_end = treatment_dose_start + len("treatment dose:")
+
+                treatment_frequency_start = response_lower.find("treatment frequency of administration:", start)
+                treatment_frequency_end = treatment_frequency_start + len("treatment frequency of administration:")
+
+                treatment_duration_start = response_lower.find("treatment duration:", start)
+                treatment_duration_end = treatment_duration_start + len("treatment duration:")
+
+                outcome_start = response_lower.find("outcome:", start)
+                outcome_end = outcome_start + len("outcome:")
+
+                patient_group = response[patient_group_end:patient_number_start].strip()
+                patient_groups.append(patient_group)
+
+                patient_number = response[patient_number_end:treatment_drug_start].strip()
+                patient_numbers.append(patient_number)
+
+                treatment_drug = response[treatment_drug_end:treatment_dose_start].strip()
+                drugs.append(treatment_drug)
+
+                treatment_dose = response[treatment_dose_end:treatment_frequency_start].strip()
+                doses.append(treatment_dose)
+
+                treatment_frequency = response[treatment_frequency_end:treatment_duration_start].strip()
+                frequencies.append(treatment_frequency)
+
+                treatment_duration = response[treatment_duration_end:outcome_start].strip()
+                durations.append(treatment_duration)
+
+                start = response_lower.find("patient group:", outcome_end)
+
+                outcome = response[outcome_end:start].strip()
+                outcomes.append(outcome)
+
+            matcher = PhraseMatcher(doc.vocab, attr="LOWER")
+            nlp = spacy.blank("en")
+            matcher.add("Patient_Group", [nlp.make_doc(text) for text in patient_groups])
+            matcher.add("Patient_Number", [nlp.make_doc(text) for text in patient_numbers])
+            matcher.add("Treatment_Drug", [nlp.make_doc(text) for text in drugs])
+            matcher.add("Dose", [nlp.make_doc(text) for text in doses])
+            matcher.add("Frequency", [nlp.make_doc(text) for text in frequencies])
+            matcher.add("Duration", [nlp.make_doc(text) for text in durations])
+            matcher.add("Outcome", [nlp.make_doc(text) for text in outcomes])
+
+            matches = matcher(doc, as_spans=True)
+            matches = spacy.util.filter_spans(matches)
+
+            # This assumes that no entities were set prior to this component
+            doc.ents = matches
+            doc._.trial_summary = response
+            yield doc

From 5db24165c301e199b7b58a45ec54ac5cc6a2c1cf Mon Sep 17 00:00:00 2001
From: svlandeg <svlandeg@github.com>
Date: Thu, 13 Jul 2023 16:31:01 +0200
Subject: [PATCH 02/14] NER and trial task

---
 .../configs/ner_openai.cfg                    | 17 +++++++++++++++
 .../{config_openai.cfg => trial_openai.cfg}   |  0
 tutorials/llm_clinical_trials/project.yml     | 15 ++++++++++---
 .../{run_pipeline.py => extract_results.py}   |  0
 .../scripts/visualise_entities.py             | 21 +++++++++++++++++++
 5 files changed, 50 insertions(+), 3 deletions(-)
 create mode 100644 tutorials/llm_clinical_trials/configs/ner_openai.cfg
 rename tutorials/llm_clinical_trials/configs/{config_openai.cfg => trial_openai.cfg} (100%)
 rename tutorials/llm_clinical_trials/scripts/{run_pipeline.py => extract_results.py} (100%)
 create mode 100644 tutorials/llm_clinical_trials/scripts/visualise_entities.py

diff --git a/tutorials/llm_clinical_trials/configs/ner_openai.cfg b/tutorials/llm_clinical_trials/configs/ner_openai.cfg
new file mode 100644
index 000000000..643e083be
--- /dev/null
+++ b/tutorials/llm_clinical_trials/configs/ner_openai.cfg
@@ -0,0 +1,17 @@
+[nlp]
+lang = "en"
+pipeline = ["llm"]
+batch_size = 128
+
+[components]
+
+[components.llm]
+factory = "llm"
+
+[components.llm.model]
+@llm_models = "spacy.GPT-3-5.v1"
+name = "gpt-3.5-turbo"
+
+[components.llm.task]
+@llm_tasks = "spacy.NER.v2"
+labels = "Drug,Dose"
diff --git a/tutorials/llm_clinical_trials/configs/config_openai.cfg b/tutorials/llm_clinical_trials/configs/trial_openai.cfg
similarity index 100%
rename from tutorials/llm_clinical_trials/configs/config_openai.cfg
rename to tutorials/llm_clinical_trials/configs/trial_openai.cfg
diff --git a/tutorials/llm_clinical_trials/project.yml b/tutorials/llm_clinical_trials/project.yml
index 50f2b2366..1bc667514 100644
--- a/tutorials/llm_clinical_trials/project.yml
+++ b/tutorials/llm_clinical_trials/project.yml
@@ -2,7 +2,8 @@ title: 'Clinical trial results extraction with LLMs'
 description: "Using an LLM in a spaCy pipeline to extract patient groups, treatments and outcomes in clinical trials."
 
 vars:
-  config: "config_openai.cfg"
+  ner_config: "ner_openai.cfg"
+  trial_config: "trial_openai.cfg"
   pmid: 27144689
 
 # These are the directories that the project needs.
@@ -16,10 +17,18 @@ assets:
 
 # Project commands
 commands:
+  - name: zero_shot_ner
+    help: "Run an LLM pipeline with zero-shot NER"
+    script:
+      - "python ./scripts/visualise_entities.py ${vars.pmid} ./configs/${vars.ner_config}"
+    deps:
+      - "assets/${vars.pmid}.txt"
+      - "configs/${vars.ner_config}"
+
   - name: summarize
     help: "Run the LLM-powered spaCy pipeline"
     script:
-      - "python ./scripts/run_pipeline.py ${vars.pmid} ./configs/${vars.config}"
+      - "python ./scripts/extract_results.py ${vars.pmid} ./configs/${vars.trial_config}"
     deps:
       - "assets/${vars.pmid}.txt"
-      - "configs/${vars.config}"
+      - "configs/${vars.trial_config}"
diff --git a/tutorials/llm_clinical_trials/scripts/run_pipeline.py b/tutorials/llm_clinical_trials/scripts/extract_results.py
similarity index 100%
rename from tutorials/llm_clinical_trials/scripts/run_pipeline.py
rename to tutorials/llm_clinical_trials/scripts/extract_results.py
diff --git a/tutorials/llm_clinical_trials/scripts/visualise_entities.py b/tutorials/llm_clinical_trials/scripts/visualise_entities.py
new file mode 100644
index 000000000..3c5041566
--- /dev/null
+++ b/tutorials/llm_clinical_trials/scripts/visualise_entities.py
@@ -0,0 +1,21 @@
+from pathlib import Path
+
+import typer
+from input_reader import read_trial
+from spacy import displacy
+from spacy_llm.util import assemble
+from wasabi import msg
+
+
+def visualise_entities(pmid: int, config_path: Path, verbose: bool = False):
+    msg.text(f"Processing PMID {pmid}", show=verbose)
+    msg.text(f"Loading config from {config_path}", show=verbose)
+    text = read_trial(pmid, verbose=verbose)
+    nlp = assemble(config_path)
+    doc = nlp(text)
+    options = {"ents": ["Drug", "Dose"], "colors": {"Drug": "pink", "Dose": "orange"}}
+    displacy.serve(doc, style="ent", options=options)
+
+
+if __name__ == "__main__":
+    typer.run(visualise_entities)

From 0d50bc9f4a6a89f41bfa70be1781d581e9d08189 Mon Sep 17 00:00:00 2001
From: svlandeg <svlandeg@github.com>
Date: Fri, 14 Jul 2023 15:30:45 +0200
Subject: [PATCH 03/14] add Falcon files

---
 .../llm_clinical_trials/configs/ner_dolly.cfg   | 17 +++++++++++++++++
 .../llm_clinical_trials/configs/ner_falcon.cfg  | 17 +++++++++++++++++
 .../llm_clinical_trials/falcon_requirements.txt |  4 ++++
 tutorials/llm_clinical_trials/project.yml       |  2 +-
 .../scripts/visualise_entities.py               |  8 ++++++--
 5 files changed, 45 insertions(+), 3 deletions(-)
 create mode 100644 tutorials/llm_clinical_trials/configs/ner_dolly.cfg
 create mode 100644 tutorials/llm_clinical_trials/configs/ner_falcon.cfg
 create mode 100644 tutorials/llm_clinical_trials/falcon_requirements.txt

diff --git a/tutorials/llm_clinical_trials/configs/ner_dolly.cfg b/tutorials/llm_clinical_trials/configs/ner_dolly.cfg
new file mode 100644
index 000000000..c8c533257
--- /dev/null
+++ b/tutorials/llm_clinical_trials/configs/ner_dolly.cfg
@@ -0,0 +1,17 @@
+[nlp]
+lang = "en"
+pipeline = ["llm"]
+batch_size = 128
+
+[components]
+
+[components.llm]
+factory = "llm"
+
+[components.llm.model]
+@llm_models = "spacy.Dolly.v1"
+name = "dolly-v2-3b"
+
+[components.llm.task]
+@llm_tasks = "spacy.NER.v2"
+labels = "Drug,Dose"
diff --git a/tutorials/llm_clinical_trials/configs/ner_falcon.cfg b/tutorials/llm_clinical_trials/configs/ner_falcon.cfg
new file mode 100644
index 000000000..89bb970b6
--- /dev/null
+++ b/tutorials/llm_clinical_trials/configs/ner_falcon.cfg
@@ -0,0 +1,17 @@
+[nlp]
+lang = "en"
+pipeline = ["llm"]
+batch_size = 128
+
+[components]
+
+[components.llm]
+factory = "llm"
+
+[components.llm.model]
+@llm_models = "spacy.Falcon.v1"
+name = "falcon-7b-instruct"
+
+[components.llm.task]
+@llm_tasks = "spacy.NER.v2"
+labels = "Drug,Dose"
diff --git a/tutorials/llm_clinical_trials/falcon_requirements.txt b/tutorials/llm_clinical_trials/falcon_requirements.txt
new file mode 100644
index 000000000..6bbb84935
--- /dev/null
+++ b/tutorials/llm_clinical_trials/falcon_requirements.txt
@@ -0,0 +1,4 @@
+cupy-cuda117
+torch==1.13.1+cu117 -f https://download.pytorch.org/whl/torch_stable.html
+transformers
+einops
\ No newline at end of file
diff --git a/tutorials/llm_clinical_trials/project.yml b/tutorials/llm_clinical_trials/project.yml
index 1bc667514..0f33a7977 100644
--- a/tutorials/llm_clinical_trials/project.yml
+++ b/tutorials/llm_clinical_trials/project.yml
@@ -2,7 +2,7 @@ title: 'Clinical trial results extraction with LLMs'
 description: "Using an LLM in a spaCy pipeline to extract patient groups, treatments and outcomes in clinical trials."
 
 vars:
-  ner_config: "ner_openai.cfg"
+  ner_config: "ner_dolly.cfg" # "ner_falcon.cfg"  # "ner_openai.cfg"
   trial_config: "trial_openai.cfg"
   pmid: 27144689
 
diff --git a/tutorials/llm_clinical_trials/scripts/visualise_entities.py b/tutorials/llm_clinical_trials/scripts/visualise_entities.py
index 3c5041566..da8c06539 100644
--- a/tutorials/llm_clinical_trials/scripts/visualise_entities.py
+++ b/tutorials/llm_clinical_trials/scripts/visualise_entities.py
@@ -13,8 +13,12 @@ def visualise_entities(pmid: int, config_path: Path, verbose: bool = False):
     text = read_trial(pmid, verbose=verbose)
     nlp = assemble(config_path)
     doc = nlp(text)
-    options = {"ents": ["Drug", "Dose"], "colors": {"Drug": "pink", "Dose": "orange"}}
-    displacy.serve(doc, style="ent", options=options)
+    # options = {"ents": ["Drug", "Dose"], "colors": {"Drug": "pink", "Dose": "orange"}}
+    ents = list(doc.ents)
+    print("ents", len(ents))
+    for ent in ents:
+        print(ent.text, ent.label_)
+    #displacy.serve(doc, style="ent", options=options)
 
 
 if __name__ == "__main__":

From 63fb645a3db3ff3482b26741f3934284742821a5 Mon Sep 17 00:00:00 2001
From: svlandeg <svlandeg@github.com>
Date: Mon, 24 Jul 2023 11:33:54 +0200
Subject: [PATCH 04/14] add debugging info

---
 tutorials/llm_clinical_trials/falcon_requirements.txt  |  5 +++--
 tutorials/llm_clinical_trials/project.yml              |  2 +-
 .../llm_clinical_trials/scripts/visualise_entities.py  | 10 ++++++++--
 3 files changed, 12 insertions(+), 5 deletions(-)

diff --git a/tutorials/llm_clinical_trials/falcon_requirements.txt b/tutorials/llm_clinical_trials/falcon_requirements.txt
index 6bbb84935..3be110192 100644
--- a/tutorials/llm_clinical_trials/falcon_requirements.txt
+++ b/tutorials/llm_clinical_trials/falcon_requirements.txt
@@ -1,4 +1,5 @@
 cupy-cuda117
-torch==1.13.1+cu117 -f https://download.pytorch.org/whl/torch_stable.html
+torch==2.0.1+cu117 -f https://download.pytorch.org/whl/torch_stable.html
 transformers
-einops
\ No newline at end of file
+einops
+xformers
\ No newline at end of file
diff --git a/tutorials/llm_clinical_trials/project.yml b/tutorials/llm_clinical_trials/project.yml
index 0f33a7977..550413040 100644
--- a/tutorials/llm_clinical_trials/project.yml
+++ b/tutorials/llm_clinical_trials/project.yml
@@ -2,7 +2,7 @@ title: 'Clinical trial results extraction with LLMs'
 description: "Using an LLM in a spaCy pipeline to extract patient groups, treatments and outcomes in clinical trials."
 
 vars:
-  ner_config: "ner_dolly.cfg" # "ner_falcon.cfg"  # "ner_openai.cfg"
+  ner_config: "ner_falcon.cfg" # "ner_dolly.cfg"   # "ner_openai.cfg"
   trial_config: "trial_openai.cfg"
   pmid: 27144689
 
diff --git a/tutorials/llm_clinical_trials/scripts/visualise_entities.py b/tutorials/llm_clinical_trials/scripts/visualise_entities.py
index da8c06539..12b063ca3 100644
--- a/tutorials/llm_clinical_trials/scripts/visualise_entities.py
+++ b/tutorials/llm_clinical_trials/scripts/visualise_entities.py
@@ -8,8 +8,14 @@
 
 
 def visualise_entities(pmid: int, config_path: Path, verbose: bool = False):
-    msg.text(f"Processing PMID {pmid}", show=verbose)
-    msg.text(f"Loading config from {config_path}", show=verbose)
+    import logging
+    import spacy_llm
+
+    spacy_llm.logger.addHandler(logging.StreamHandler())
+    spacy_llm.logger.setLevel(logging.DEBUG)
+
+    #msg.text(f"Processing PMID {pmid}", show=verbose)
+    #msg.text(f"Loading config from {config_path}", show=verbose)
     text = read_trial(pmid, verbose=verbose)
     nlp = assemble(config_path)
     doc = nlp(text)

From aca5e1e67bc5f90dd58838dc83b99f997b1b3ad2 Mon Sep 17 00:00:00 2001
From: svlandeg <svlandeg@github.com>
Date: Tue, 16 Jan 2024 17:10:11 +0100
Subject: [PATCH 05/14] cleanup

---
 .../scripts/visualise_entities.py             | 31 ++++++++++++-------
 1 file changed, 20 insertions(+), 11 deletions(-)

diff --git a/tutorials/llm_clinical_trials/scripts/visualise_entities.py b/tutorials/llm_clinical_trials/scripts/visualise_entities.py
index 12b063ca3..0030c779b 100644
--- a/tutorials/llm_clinical_trials/scripts/visualise_entities.py
+++ b/tutorials/llm_clinical_trials/scripts/visualise_entities.py
@@ -1,30 +1,39 @@
+import logging
 from pathlib import Path
 
+import spacy_llm
 import typer
 from input_reader import read_trial
 from spacy import displacy
 from spacy_llm.util import assemble
 from wasabi import msg
 
+DEBUG = False
+PRINT_CONSOLE = False
+PRINT_DISPLACY = True
 
-def visualise_entities(pmid: int, config_path: Path, verbose: bool = False):
-    import logging
-    import spacy_llm
 
+def visualise_entities(pmid: int, config_path: Path, verbose: bool = False):
     spacy_llm.logger.addHandler(logging.StreamHandler())
-    spacy_llm.logger.setLevel(logging.DEBUG)
+    if DEBUG:
+        spacy_llm.logger.setLevel(logging.DEBUG)
 
-    #msg.text(f"Processing PMID {pmid}", show=verbose)
-    #msg.text(f"Loading config from {config_path}", show=verbose)
+    msg.text(f"Processing PMID {pmid}", show=verbose)
+    msg.text(f"Loading config from {config_path}", show=verbose)
     text = read_trial(pmid, verbose=verbose)
     nlp = assemble(config_path)
     doc = nlp(text)
-    # options = {"ents": ["Drug", "Dose"], "colors": {"Drug": "pink", "Dose": "orange"}}
     ents = list(doc.ents)
-    print("ents", len(ents))
-    for ent in ents:
-        print(ent.text, ent.label_)
-    #displacy.serve(doc, style="ent", options=options)
+    if PRINT_CONSOLE:
+        print("ents", len(ents))
+        for ent in ents:
+            print(ent.text, ent.label_)
+    if PRINT_DISPLACY:
+        options = {
+            "ents": ["Drug", "Dose"],
+            "colors": {"Drug": "pink", "Dose": "orange"},
+        }
+        displacy.serve(doc, style="ent", options=options)
 
 
 if __name__ == "__main__":

From a0c1d852323f00457e79e97b33d56bc4c0abc907 Mon Sep 17 00:00:00 2001
From: svlandeg <svlandeg@github.com>
Date: Fri, 19 Jan 2024 14:15:52 +0100
Subject: [PATCH 06/14] zeroshot config with GPT4

---
 .../configs/ner_zeroshot_openai.cfg              | 16 ++++++++++++++++
 tutorials/llm_clinical_trials/project.yml        |  8 ++++----
 2 files changed, 20 insertions(+), 4 deletions(-)
 create mode 100644 tutorials/llm_clinical_trials/configs/ner_zeroshot_openai.cfg

diff --git a/tutorials/llm_clinical_trials/configs/ner_zeroshot_openai.cfg b/tutorials/llm_clinical_trials/configs/ner_zeroshot_openai.cfg
new file mode 100644
index 000000000..fc152e9bb
--- /dev/null
+++ b/tutorials/llm_clinical_trials/configs/ner_zeroshot_openai.cfg
@@ -0,0 +1,16 @@
+[nlp]
+lang = "en"
+pipeline = ["llm"]
+batch_size = 128
+
+[components]
+
+[components.llm]
+factory = "llm"
+
+[components.llm.model]
+@llm_models = "spacy.GPT-4.v2"
+
+[components.llm.task]
+@llm_tasks = "spacy.NER.v2"
+labels = "Drug,Dose"
diff --git a/tutorials/llm_clinical_trials/project.yml b/tutorials/llm_clinical_trials/project.yml
index 550413040..4448551cf 100644
--- a/tutorials/llm_clinical_trials/project.yml
+++ b/tutorials/llm_clinical_trials/project.yml
@@ -2,7 +2,7 @@ title: 'Clinical trial results extraction with LLMs'
 description: "Using an LLM in a spaCy pipeline to extract patient groups, treatments and outcomes in clinical trials."
 
 vars:
-  ner_config: "ner_falcon.cfg" # "ner_dolly.cfg"   # "ner_openai.cfg"
+  ner_config: "ner_zeroshot_openai.cfg"  # "ner_dolly.cfg"   # "ner_falcon.cfg"
   trial_config: "trial_openai.cfg"
   pmid: 27144689
 
@@ -17,15 +17,15 @@ assets:
 
 # Project commands
 commands:
-  - name: zero_shot_ner
-    help: "Run an LLM pipeline with zero-shot NER"
+  - name: ner
+    help: "Run an LLM pipeline with zero-shot NER and visualise the predicted entities"
     script:
       - "python ./scripts/visualise_entities.py ${vars.pmid} ./configs/${vars.ner_config}"
     deps:
       - "assets/${vars.pmid}.txt"
       - "configs/${vars.ner_config}"
 
-  - name: summarize
+  - name: summarization
     help: "Run the LLM-powered spaCy pipeline"
     script:
       - "python ./scripts/extract_results.py ${vars.pmid} ./configs/${vars.trial_config}"

From efbae959986d52bd5b38af0cdb76bedb1ca5b206 Mon Sep 17 00:00:00 2001
From: svlandeg <svlandeg@github.com>
Date: Fri, 19 Jan 2024 14:24:11 +0100
Subject: [PATCH 07/14] add seed and temperature to the zeroshot config

---
 tutorials/llm_clinical_trials/configs/ner_zeroshot_openai.cfg | 1 +
 1 file changed, 1 insertion(+)

diff --git a/tutorials/llm_clinical_trials/configs/ner_zeroshot_openai.cfg b/tutorials/llm_clinical_trials/configs/ner_zeroshot_openai.cfg
index fc152e9bb..33e1bdb8e 100644
--- a/tutorials/llm_clinical_trials/configs/ner_zeroshot_openai.cfg
+++ b/tutorials/llm_clinical_trials/configs/ner_zeroshot_openai.cfg
@@ -10,6 +10,7 @@ factory = "llm"
 
 [components.llm.model]
 @llm_models = "spacy.GPT-4.v2"
+config = {"seed": 342, "temperature": 0.0}
 
 [components.llm.task]
 @llm_tasks = "spacy.NER.v2"

From ffb89964e7bfe4486d375532605d6737e7a988bc Mon Sep 17 00:00:00 2001
From: svlandeg <svlandeg@github.com>
Date: Fri, 19 Jan 2024 16:45:29 +0100
Subject: [PATCH 08/14] add fewshot config with NER.v3

---
 .../configs/fewshot_drugs_dose.json           | 48 +++++++++++++++++++
 .../configs/ner_fewshot_openai.cfg            | 27 +++++++++++
 .../configs/ner_openai.cfg                    | 17 -------
 .../configs/ner_zeroshot_openai.cfg           |  2 +-
 tutorials/llm_clinical_trials/project.yml     |  2 +-
 .../scripts/visualise_entities.py             | 12 ++---
 6 files changed, 83 insertions(+), 25 deletions(-)
 create mode 100644 tutorials/llm_clinical_trials/configs/fewshot_drugs_dose.json
 create mode 100644 tutorials/llm_clinical_trials/configs/ner_fewshot_openai.cfg
 delete mode 100644 tutorials/llm_clinical_trials/configs/ner_openai.cfg

diff --git a/tutorials/llm_clinical_trials/configs/fewshot_drugs_dose.json b/tutorials/llm_clinical_trials/configs/fewshot_drugs_dose.json
new file mode 100644
index 000000000..912483186
--- /dev/null
+++ b/tutorials/llm_clinical_trials/configs/fewshot_drugs_dose.json
@@ -0,0 +1,48 @@
+[
+  {
+    "text": "The patient was given 1mg of paracetamol.",
+    "spans": [
+      {
+        "text": "paracetamol",
+        "is_entity": true,
+        "label": "Drug",
+        "reason": "is a drug name, used as medication"
+      },
+      {
+        "text": "1mg",
+        "is_entity": true,
+        "label": "Dose",
+        "reason": "is the quantity or dose of the given medication"
+      },
+      {
+        "text": "patient",
+        "is_entity": false,
+        "label": "==NONE==",
+        "reason": "is a person, not a drug or dose"
+      }
+    ]
+  },
+  {
+    "text": "Throughout the treatment, they received Aspirin 1mg/kg.",
+    "spans": [
+      {
+        "text": "Aspirin",
+        "is_entity": true,
+        "label": "Drug",
+        "reason": "is a drug brand, used as medication"
+      },
+      {
+        "text": "1mg/kg",
+        "is_entity": true,
+        "label": "Dose",
+        "reason": "is the quantity or dose of the given drug"
+      },
+      {
+        "text": "Aspirin 1mg/kg",
+        "is_entity": false,
+        "label": "==NONE==",
+        "reason": "contains both the drug and the dose - these should be two entities instead"
+      }
+    ]
+  }
+]
\ No newline at end of file
diff --git a/tutorials/llm_clinical_trials/configs/ner_fewshot_openai.cfg b/tutorials/llm_clinical_trials/configs/ner_fewshot_openai.cfg
new file mode 100644
index 000000000..e8f850334
--- /dev/null
+++ b/tutorials/llm_clinical_trials/configs/ner_fewshot_openai.cfg
@@ -0,0 +1,27 @@
+[nlp]
+lang = "en"
+pipeline = ["llm"]
+batch_size = 128
+
+[components]
+
+[components.llm]
+factory = "llm"
+
+[components.llm.model]
+@llm_models = "spacy.GPT-4.v2"
+config = {"seed": 342, "temperature": 0.0}
+
+[components.llm.task]
+@llm_tasks = "spacy.NER.v3"
+labels = ["Drug", "Dose"]
+description = Entities are drugs or their doses. They can be uppercased, title-cased, or lowercased.
+    Each occurrence of an entity in the text should be extracted.
+
+[components.llm.task.label_definitions]
+Drug = "A medicine or drug given to a patient as a treatment. Can be a generic name or brand name, e.g. paracetamol, Aspirin"
+Dose = "The measured quantity (dose) of a certain medicine given to patients, e.g. 1mg. This should exclude the drug name."
+
+[components.llm.task.examples]
+@misc = "spacy.FewShotReader.v1"
+path = "configs/fewshot_drugs_dose.json"
\ No newline at end of file
diff --git a/tutorials/llm_clinical_trials/configs/ner_openai.cfg b/tutorials/llm_clinical_trials/configs/ner_openai.cfg
deleted file mode 100644
index 643e083be..000000000
--- a/tutorials/llm_clinical_trials/configs/ner_openai.cfg
+++ /dev/null
@@ -1,17 +0,0 @@
-[nlp]
-lang = "en"
-pipeline = ["llm"]
-batch_size = 128
-
-[components]
-
-[components.llm]
-factory = "llm"
-
-[components.llm.model]
-@llm_models = "spacy.GPT-3-5.v1"
-name = "gpt-3.5-turbo"
-
-[components.llm.task]
-@llm_tasks = "spacy.NER.v2"
-labels = "Drug,Dose"
diff --git a/tutorials/llm_clinical_trials/configs/ner_zeroshot_openai.cfg b/tutorials/llm_clinical_trials/configs/ner_zeroshot_openai.cfg
index 33e1bdb8e..98e4d1e54 100644
--- a/tutorials/llm_clinical_trials/configs/ner_zeroshot_openai.cfg
+++ b/tutorials/llm_clinical_trials/configs/ner_zeroshot_openai.cfg
@@ -14,4 +14,4 @@ config = {"seed": 342, "temperature": 0.0}
 
 [components.llm.task]
 @llm_tasks = "spacy.NER.v2"
-labels = "Drug,Dose"
+labels = ["Drug", "Dose"]
diff --git a/tutorials/llm_clinical_trials/project.yml b/tutorials/llm_clinical_trials/project.yml
index 4448551cf..965d8b819 100644
--- a/tutorials/llm_clinical_trials/project.yml
+++ b/tutorials/llm_clinical_trials/project.yml
@@ -2,7 +2,7 @@ title: 'Clinical trial results extraction with LLMs'
 description: "Using an LLM in a spaCy pipeline to extract patient groups, treatments and outcomes in clinical trials."
 
 vars:
-  ner_config: "ner_zeroshot_openai.cfg"  # "ner_dolly.cfg"   # "ner_falcon.cfg"
+  ner_config: "ner_fewshot_openai.cfg"   # "ner_zeroshot_openai.cfg"
   trial_config: "trial_openai.cfg"
   pmid: 27144689
 
diff --git a/tutorials/llm_clinical_trials/scripts/visualise_entities.py b/tutorials/llm_clinical_trials/scripts/visualise_entities.py
index 0030c779b..5ef3af552 100644
--- a/tutorials/llm_clinical_trials/scripts/visualise_entities.py
+++ b/tutorials/llm_clinical_trials/scripts/visualise_entities.py
@@ -9,8 +9,8 @@
 from wasabi import msg
 
 DEBUG = False
-PRINT_CONSOLE = False
-PRINT_DISPLACY = True
+PRINT_CONSOLE = True
+PRINT_DISPLACY = False
 
 
 def visualise_entities(pmid: int, config_path: Path, verbose: bool = False):
@@ -18,16 +18,16 @@ def visualise_entities(pmid: int, config_path: Path, verbose: bool = False):
     if DEBUG:
         spacy_llm.logger.setLevel(logging.DEBUG)
 
-    msg.text(f"Processing PMID {pmid}", show=verbose)
-    msg.text(f"Loading config from {config_path}", show=verbose)
+    msg.info(f"Processing PMID {pmid}", show=verbose)
+    msg.info(f"Loading config from {config_path}", show=verbose)
     text = read_trial(pmid, verbose=verbose)
     nlp = assemble(config_path)
     doc = nlp(text)
     ents = list(doc.ents)
     if PRINT_CONSOLE:
-        print("ents", len(ents))
+        msg.text(f" - Number of entities: {len(ents)}")
         for ent in ents:
-            print(ent.text, ent.label_)
+            msg.text(f"    - {ent.text} [{ent.label_}]")
     if PRINT_DISPLACY:
         options = {
             "ents": ["Drug", "Dose"],

From eabe0ed7bc4092f1f756b0f038377d3a434d3ac8 Mon Sep 17 00:00:00 2001
From: svlandeg <svlandeg@github.com>
Date: Fri, 19 Jan 2024 18:29:45 +0100
Subject: [PATCH 09/14] typo fix

---
 tutorials/llm_clinical_trials/scripts/trial_task.py | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/tutorials/llm_clinical_trials/scripts/trial_task.py b/tutorials/llm_clinical_trials/scripts/trial_task.py
index 704285ca3..279330796 100644
--- a/tutorials/llm_clinical_trials/scripts/trial_task.py
+++ b/tutorials/llm_clinical_trials/scripts/trial_task.py
@@ -15,7 +15,7 @@
 Patient group: <name>
 Number of patients in the group: <number>
 Treatment drug or substance: <drug>
-Treatment dose: <drug>
+Treatment dose: <dose>
 Treatment frequency of administration: <frequency>
 Treatment duration: <duration>
 Outcome: <outcome>
@@ -114,9 +114,9 @@ def parse_responses(
             matcher.add("Patient_Group", [nlp.make_doc(text) for text in patient_groups])
             matcher.add("Patient_Number", [nlp.make_doc(text) for text in patient_numbers])
             matcher.add("Treatment_Drug", [nlp.make_doc(text) for text in drugs])
-            matcher.add("Dose", [nlp.make_doc(text) for text in doses])
-            matcher.add("Frequency", [nlp.make_doc(text) for text in frequencies])
-            matcher.add("Duration", [nlp.make_doc(text) for text in durations])
+            matcher.add("Treatment_Dose", [nlp.make_doc(text) for text in doses])
+            matcher.add("Treatment_Frequency", [nlp.make_doc(text) for text in frequencies])
+            matcher.add("Treatment_Duration", [nlp.make_doc(text) for text in durations])
             matcher.add("Outcome", [nlp.make_doc(text) for text in outcomes])
 
             matches = matcher(doc, as_spans=True)

From 6a141a2fbc4773299fcdb3f7689354cf83d8475a Mon Sep 17 00:00:00 2001
From: svlandeg <svlandeg@github.com>
Date: Fri, 19 Jan 2024 18:30:00 +0100
Subject: [PATCH 10/14] update trial config to use GPT 4

---
 tutorials/llm_clinical_trials/configs/trial_openai.cfg | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/tutorials/llm_clinical_trials/configs/trial_openai.cfg b/tutorials/llm_clinical_trials/configs/trial_openai.cfg
index 857c31a35..b35420c98 100644
--- a/tutorials/llm_clinical_trials/configs/trial_openai.cfg
+++ b/tutorials/llm_clinical_trials/configs/trial_openai.cfg
@@ -9,8 +9,8 @@ batch_size = 128
 factory = "llm"
 
 [components.llm.model]
-@llm_models = "spacy.GPT-3-5.v1"
-name = "gpt-3.5-turbo"
+@llm_models = "spacy.GPT-4.v2"
+config = {"seed": 342, "temperature": 0.0}
 
 [components.llm.task]
 @llm_tasks = "tutorial.TrialSummary.v1"

From c8914dc952307fa0dc36d31b565a6682e1551e1d Mon Sep 17 00:00:00 2001
From: svlandeg <svlandeg@github.com>
Date: Fri, 19 Jan 2024 18:30:37 +0100
Subject: [PATCH 11/14] some additional OS configs (WIP)

---
 .../configs/ner_fewshot_mistral.cfg           | 27 +++++++++++++++++++
 .../configs/ner_zeroshot_llama.cfg            | 17 ++++++++++++
 .../configs/ner_zeroshot_mistral.cfg          | 17 ++++++++++++
 3 files changed, 61 insertions(+)
 create mode 100644 tutorials/llm_clinical_trials/configs/ner_fewshot_mistral.cfg
 create mode 100644 tutorials/llm_clinical_trials/configs/ner_zeroshot_llama.cfg
 create mode 100644 tutorials/llm_clinical_trials/configs/ner_zeroshot_mistral.cfg

diff --git a/tutorials/llm_clinical_trials/configs/ner_fewshot_mistral.cfg b/tutorials/llm_clinical_trials/configs/ner_fewshot_mistral.cfg
new file mode 100644
index 000000000..440554c67
--- /dev/null
+++ b/tutorials/llm_clinical_trials/configs/ner_fewshot_mistral.cfg
@@ -0,0 +1,27 @@
+[nlp]
+lang = "en"
+pipeline = ["llm"]
+batch_size = 128
+
+[components]
+
+[components.llm]
+factory = "llm"
+
+[components.llm.model]
+@llm_models = "spacy.Mistral.v1"
+name = "Mistral-7B-v0.1"
+
+[components.llm.task]
+@llm_tasks = "spacy.NER.v3"
+labels = ["Drug", "Dose"]
+description = Entities are drugs or their doses. They can be uppercased, title-cased, or lowercased.
+    Each occurrence of an entity in the text should be extracted.
+
+[components.llm.task.label_definitions]
+Drug = "A medicine or drug given to a patient as a treatment. Can be a generic name or brand name, e.g. paracetamol, Aspirin"
+Dose = "The measured quantity (dose) of a certain medicine given to patients, e.g. 1mg. This should exclude the drug name."
+
+[components.llm.task.examples]
+@misc = "spacy.FewShotReader.v1"
+path = "configs/fewshot_drugs_dose.json"
\ No newline at end of file
diff --git a/tutorials/llm_clinical_trials/configs/ner_zeroshot_llama.cfg b/tutorials/llm_clinical_trials/configs/ner_zeroshot_llama.cfg
new file mode 100644
index 000000000..27b4326b9
--- /dev/null
+++ b/tutorials/llm_clinical_trials/configs/ner_zeroshot_llama.cfg
@@ -0,0 +1,17 @@
+[nlp]
+lang = "en"
+pipeline = ["llm"]
+batch_size = 128
+
+[components]
+
+[components.llm]
+factory = "llm"
+
+[components.llm.model]
+@llm_models = "spacy.Llama2.v1"
+name = "Llama-2-7b-hf"
+
+[components.llm.task]
+@llm_tasks = "spacy.NER.v2"
+labels = ["Drug", "Dose"]
diff --git a/tutorials/llm_clinical_trials/configs/ner_zeroshot_mistral.cfg b/tutorials/llm_clinical_trials/configs/ner_zeroshot_mistral.cfg
new file mode 100644
index 000000000..761e393c7
--- /dev/null
+++ b/tutorials/llm_clinical_trials/configs/ner_zeroshot_mistral.cfg
@@ -0,0 +1,17 @@
+[nlp]
+lang = "en"
+pipeline = ["llm"]
+batch_size = 128
+
+[components]
+
+[components.llm]
+factory = "llm"
+
+[components.llm.model]
+@llm_models = "spacy.Mistral.v1"
+name = "Mistral-7B-v0.1"
+
+[components.llm.task]
+@llm_tasks = "spacy.NER.v2"
+labels = ["Drug", "Dose"]

From 4f301849f2fa6b65807dc587aba44b3264197c93 Mon Sep 17 00:00:00 2001
From: svlandeg <svlandeg@github.com>
Date: Mon, 22 Jan 2024 11:46:02 +0100
Subject: [PATCH 12/14] add mistral config

---
 .../llm_clinical_trials/configs/ner_mistral.cfg | 17 +++++++++++++++++
 1 file changed, 17 insertions(+)
 create mode 100644 tutorials/llm_clinical_trials/configs/ner_mistral.cfg

diff --git a/tutorials/llm_clinical_trials/configs/ner_mistral.cfg b/tutorials/llm_clinical_trials/configs/ner_mistral.cfg
new file mode 100644
index 000000000..4f7038c83
--- /dev/null
+++ b/tutorials/llm_clinical_trials/configs/ner_mistral.cfg
@@ -0,0 +1,17 @@
+[nlp]
+lang = "en"
+pipeline = ["llm"]
+batch_size = 128
+
+[components]
+
+[components.llm]
+factory = "llm"
+
+[components.llm.model]
+@llm_models = "spacy.Mistral.v1"
+name = "Mistral-7B-v0.1"
+
+[components.llm.task]
+@llm_tasks = "spacy.NER.v2"
+labels = ["PERSON", "LOCATION"]

From 138e36a85c2eeab4e5f629478e729c1b0c020cb5 Mon Sep 17 00:00:00 2001
From: svlandeg <svlandeg@github.com>
Date: Wed, 27 Mar 2024 10:18:57 +0100
Subject: [PATCH 13/14] update gpt4 model

---
 tutorials/llm_clinical_trials/configs/ner_zeroshot_openai.cfg | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/tutorials/llm_clinical_trials/configs/ner_zeroshot_openai.cfg b/tutorials/llm_clinical_trials/configs/ner_zeroshot_openai.cfg
index 98e4d1e54..7860850f3 100644
--- a/tutorials/llm_clinical_trials/configs/ner_zeroshot_openai.cfg
+++ b/tutorials/llm_clinical_trials/configs/ner_zeroshot_openai.cfg
@@ -9,7 +9,8 @@ batch_size = 128
 factory = "llm"
 
 [components.llm.model]
-@llm_models = "spacy.GPT-4.v2"
+@llm_models = "spacy.GPT-4.v3"
+name = "gpt-4"
 config = {"seed": 342, "temperature": 0.0}
 
 [components.llm.task]

From ec07267901a880183258bd25100521b9c25810fc Mon Sep 17 00:00:00 2001
From: svlandeg <svlandeg@github.com>
Date: Wed, 27 Mar 2024 10:19:25 +0100
Subject: [PATCH 14/14] set debug to True

---
 tutorials/llm_clinical_trials/scripts/visualise_entities.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tutorials/llm_clinical_trials/scripts/visualise_entities.py b/tutorials/llm_clinical_trials/scripts/visualise_entities.py
index 5ef3af552..8329933a7 100644
--- a/tutorials/llm_clinical_trials/scripts/visualise_entities.py
+++ b/tutorials/llm_clinical_trials/scripts/visualise_entities.py
@@ -8,7 +8,7 @@
 from spacy_llm.util import assemble
 from wasabi import msg
 
-DEBUG = False
+DEBUG = True
 PRINT_CONSOLE = True
 PRINT_DISPLACY = False