diff --git a/tutorials/llm_clinical_trials/assets/23206895.txt b/tutorials/llm_clinical_trials/assets/23206895.txt new file mode 100644 index 00000000..9c071d25 --- /dev/null +++ b/tutorials/llm_clinical_trials/assets/23206895.txt @@ -0,0 +1,10 @@ +Effects of fish oil supplementation on inflammatory acne + +Abstract +Background: Given that acne is a rare condition in societies with higher consumption of omega-3 (n-3) relative to omega-6 (n-6) fatty acids, supplementation with n-3 may suppress inflammatory cytokine production and thereby reduce acne severity. + +Methods: 13 individuals with inflammatory acne were given three grams of fish oil containing 930 mg of EPA to their unchanged diet and existing acne remedies for 12 weeks. Acne was assessed using an overall severity grading scale, total inflammatory lesion counts, and colorimetry. + +Findings: There was no significant change in acne grading and inflammatory counts at week 12 compared to baseline. However, there was a broad range of response to the intervention on an individual basis. The results showed that acne severity improved in 8 individuals, worsened in 4, and remained unchanged in 1. Interestingly, among the individuals who showed improvement, 7 were classified as having moderate to severe acne at baseline, while 3 of the 4 whose acne deteriorated were classified as having mild acne. + +Conclusion: There is some evidence that fish oil supplementation is associated with an improvement in overall acne severity, especially for individuals with moderate to severe acne. Divergent responses to fish oil in our pilot study indicates that dietary and supplemental lipids are worthy of further investigation in acne. \ No newline at end of file diff --git a/tutorials/llm_clinical_trials/assets/27144689.txt b/tutorials/llm_clinical_trials/assets/27144689.txt new file mode 100644 index 00000000..289219af --- /dev/null +++ b/tutorials/llm_clinical_trials/assets/27144689.txt @@ -0,0 +1,14 @@ +Hemodynamic Effects of Phenylephrine, Vasopressin, and Epinephrine in Children With Pulmonary Hypertension: A Pilot Study + +Abstract +Objectives: During a pulmonary hypertensive crisis, the marked increase in pulmonary vascular resistance can result in acute right ventricular failure and death. Currently, there are no therapeutic guidelines for managing an acute crisis. This pilot study examined the hemodynamic effects of phenylephrine, arginine vasopressin, and epinephrine in pediatric patients with pulmonary hypertension. + +Design: In this prospective, open-label, nonrandomized pilot study, we enrolled pediatric patients previously diagnosed with pulmonary hypertensive who were scheduled electively for cardiac catheterization. Primary outcome was a change in the ratio of pulmonary-to-systemic vascular resistance. Baseline hemodynamic data were collected before and after the study drug was administered. + +Patients: Eleven of 15 participants were women, median age was 9.2 years (range, 1.7-14.9 yr), and median weight was 26.8 kg (range, 8.5-55.2 kg). Baseline mean pulmonary artery pressure was 49 ± 19 mm Hg, and mean indexed pulmonary vascular resistance was 10 ± 5.4 Wood units. Etiology of pulmonary hypertensive varied, and all were on systemic pulmonary hypertensive medications. + +Interventions: Patients 1-5 received phenylephrine 1 μg/kg; patients 6-10 received arginine vasopressin 0.03 U/kg; and patients 11-15 received epinephrine 1 μg/kg. Hemodynamics was measured continuously for up to 10 minutes following study drug administration. + +Measurements and main results: After study drug administration, the ratio of pulmonary-to-systemic vascular resistance decreased in three of five patients receiving phenylephrine, five of five patients receiving arginine vasopressin, and three of five patients receiving epinephrine. Although all three medications resulted in an increase in aortic pressure, only arginine vasopressin consistently resulted in a decrease in the ratio of systolic pulmonary artery-to-aortic pressure. + +Conclusions: This prospective pilot study of phenylephrine, arginine vasopressin, and epinephrine in pediatric patients with pulmonary hypertensive showed an increase in aortic pressure with all drugs although only vasopressin resulted in a consistent decrease in the ratio of pulmonary-to-systemic vascular resistance. Studies with more subjects are warranted to define optimal dosing strategies of these medications in an acute pulmonary hypertensive crisis. \ No newline at end of file diff --git a/tutorials/llm_clinical_trials/assets/35172054.txt b/tutorials/llm_clinical_trials/assets/35172054.txt new file mode 100644 index 00000000..817e77e5 --- /dev/null +++ b/tutorials/llm_clinical_trials/assets/35172054.txt @@ -0,0 +1,10 @@ +Oral Nirmatrelvir for High-Risk, Nonhospitalized Adults with Covid-19 + +Abstract +Background: Nirmatrelvir is an orally administered severe acute respiratory syndrome coronavirus 2 main protease (Mpro) inhibitor with potent pan-human-coronavirus activity in vitro. + +Methods: We conducted a phase 2-3 double-blind, randomized, controlled trial in which symptomatic, unvaccinated, nonhospitalized adults at high risk for progression to severe coronavirus disease 2019 (Covid-19) were assigned in a 1:1 ratio to receive either 300 mg of nirmatrelvir plus 100 mg of ritonavir (a pharmacokinetic enhancer) or placebo every 12 hours for 5 days. Covid-19-related hospitalization or death from any cause through day 28, viral load, and safety were evaluated. + +Results: A total of 2246 patients underwent randomization; 1120 patients received nirmatrelvir plus ritonavir (nirmatrelvir group) and 1126 received placebo (placebo group). In the planned interim analysis of patients treated within 3 days after symptom onset (modified intention-to treat population, comprising 774 of the 1361 patients in the full analysis population), the incidence of Covid-19-related hospitalization or death by day 28 was lower in the nirmatrelvir group than in the placebo group by 6.32 percentage points (95% confidence interval [CI], -9.04 to -3.59; P<0.001; relative risk reduction, 89.1%); the incidence was 0.77% (3 of 389 patients) in the nirmatrelvir group, with 0 deaths, as compared with 7.01% (27 of 385 patients) in the placebo group, with 7 deaths. Efficacy was maintained in the final analysis involving the 1379 patients in the modified intention-to-treat population, with a difference of -5.81 percentage points (95% CI, -7.78 to -3.84; P<0.001; relative risk reduction, 88.9%). All 13 deaths occurred in the placebo group. The viral load was lower with nirmatrelvir plus ritonavir than with placebo at day 5 of treatment, with an adjusted mean difference of -0.868 log10 copies per milliliter when treatment was initiated within 3 days after the onset of symptoms. The incidence of adverse events that emerged during the treatment period was similar in the two groups (any adverse event, 22.6% with nirmatrelvir plus ritonavir vs. 23.9% with placebo; serious adverse events, 1.6% vs. 6.6%; and adverse events leading to discontinuation of the drugs or placebo, 2.1% vs. 4.2%). Dysgeusia (5.6% vs. 0.3%) and diarrhea (3.1% vs. 1.6%) occurred more frequently with nirmatrelvir plus ritonavir than with placebo. + +Conclusions: Treatment of symptomatic Covid-19 with nirmatrelvir plus ritonavir resulted in a risk of progression to severe Covid-19 that was 89% lower than the risk with placebo, without evident safety concerns. \ No newline at end of file diff --git a/tutorials/llm_clinical_trials/configs/fewshot_drugs_dose.json b/tutorials/llm_clinical_trials/configs/fewshot_drugs_dose.json new file mode 100644 index 00000000..91248318 --- /dev/null +++ b/tutorials/llm_clinical_trials/configs/fewshot_drugs_dose.json @@ -0,0 +1,48 @@ +[ + { + "text": "The patient was given 1mg of paracetamol.", + "spans": [ + { + "text": "paracetamol", + "is_entity": true, + "label": "Drug", + "reason": "is a drug name, used as medication" + }, + { + "text": "1mg", + "is_entity": true, + "label": "Dose", + "reason": "is the quantity or dose of the given medication" + }, + { + "text": "patient", + "is_entity": false, + "label": "==NONE==", + "reason": "is a person, not a drug or dose" + } + ] + }, + { + "text": "Throughout the treatment, they received Aspirin 1mg/kg.", + "spans": [ + { + "text": "Aspirin", + "is_entity": true, + "label": "Drug", + "reason": "is a drug brand, used as medication" + }, + { + "text": "1mg/kg", + "is_entity": true, + "label": "Dose", + "reason": "is the quantity or dose of the given drug" + }, + { + "text": "Aspirin 1mg/kg", + "is_entity": false, + "label": "==NONE==", + "reason": "contains both the drug and the dose - these should be two entities instead" + } + ] + } +] \ No newline at end of file diff --git a/tutorials/llm_clinical_trials/configs/ner_dolly.cfg b/tutorials/llm_clinical_trials/configs/ner_dolly.cfg new file mode 100644 index 00000000..c8c53325 --- /dev/null +++ b/tutorials/llm_clinical_trials/configs/ner_dolly.cfg @@ -0,0 +1,17 @@ +[nlp] +lang = "en" +pipeline = ["llm"] +batch_size = 128 + +[components] + +[components.llm] +factory = "llm" + +[components.llm.model] +@llm_models = "spacy.Dolly.v1" +name = "dolly-v2-3b" + +[components.llm.task] +@llm_tasks = "spacy.NER.v2" +labels = "Drug,Dose" diff --git a/tutorials/llm_clinical_trials/configs/ner_falcon.cfg b/tutorials/llm_clinical_trials/configs/ner_falcon.cfg new file mode 100644 index 00000000..89bb970b --- /dev/null +++ b/tutorials/llm_clinical_trials/configs/ner_falcon.cfg @@ -0,0 +1,17 @@ +[nlp] +lang = "en" +pipeline = ["llm"] +batch_size = 128 + +[components] + +[components.llm] +factory = "llm" + +[components.llm.model] +@llm_models = "spacy.Falcon.v1" +name = "falcon-7b-instruct" + +[components.llm.task] +@llm_tasks = "spacy.NER.v2" +labels = "Drug,Dose" diff --git a/tutorials/llm_clinical_trials/configs/ner_fewshot_mistral.cfg b/tutorials/llm_clinical_trials/configs/ner_fewshot_mistral.cfg new file mode 100644 index 00000000..440554c6 --- /dev/null +++ b/tutorials/llm_clinical_trials/configs/ner_fewshot_mistral.cfg @@ -0,0 +1,27 @@ +[nlp] +lang = "en" +pipeline = ["llm"] +batch_size = 128 + +[components] + +[components.llm] +factory = "llm" + +[components.llm.model] +@llm_models = "spacy.Mistral.v1" +name = "Mistral-7B-v0.1" + +[components.llm.task] +@llm_tasks = "spacy.NER.v3" +labels = ["Drug", "Dose"] +description = Entities are drugs or their doses. They can be uppercased, title-cased, or lowercased. + Each occurrence of an entity in the text should be extracted. + +[components.llm.task.label_definitions] +Drug = "A medicine or drug given to a patient as a treatment. Can be a generic name or brand name, e.g. paracetamol, Aspirin" +Dose = "The measured quantity (dose) of a certain medicine given to patients, e.g. 1mg. This should exclude the drug name." + +[components.llm.task.examples] +@misc = "spacy.FewShotReader.v1" +path = "configs/fewshot_drugs_dose.json" \ No newline at end of file diff --git a/tutorials/llm_clinical_trials/configs/ner_fewshot_openai.cfg b/tutorials/llm_clinical_trials/configs/ner_fewshot_openai.cfg new file mode 100644 index 00000000..e8f85033 --- /dev/null +++ b/tutorials/llm_clinical_trials/configs/ner_fewshot_openai.cfg @@ -0,0 +1,27 @@ +[nlp] +lang = "en" +pipeline = ["llm"] +batch_size = 128 + +[components] + +[components.llm] +factory = "llm" + +[components.llm.model] +@llm_models = "spacy.GPT-4.v2" +config = {"seed": 342, "temperature": 0.0} + +[components.llm.task] +@llm_tasks = "spacy.NER.v3" +labels = ["Drug", "Dose"] +description = Entities are drugs or their doses. They can be uppercased, title-cased, or lowercased. + Each occurrence of an entity in the text should be extracted. + +[components.llm.task.label_definitions] +Drug = "A medicine or drug given to a patient as a treatment. Can be a generic name or brand name, e.g. paracetamol, Aspirin" +Dose = "The measured quantity (dose) of a certain medicine given to patients, e.g. 1mg. This should exclude the drug name." + +[components.llm.task.examples] +@misc = "spacy.FewShotReader.v1" +path = "configs/fewshot_drugs_dose.json" \ No newline at end of file diff --git a/tutorials/llm_clinical_trials/configs/ner_mistral.cfg b/tutorials/llm_clinical_trials/configs/ner_mistral.cfg new file mode 100644 index 00000000..4f7038c8 --- /dev/null +++ b/tutorials/llm_clinical_trials/configs/ner_mistral.cfg @@ -0,0 +1,17 @@ +[nlp] +lang = "en" +pipeline = ["llm"] +batch_size = 128 + +[components] + +[components.llm] +factory = "llm" + +[components.llm.model] +@llm_models = "spacy.Mistral.v1" +name = "Mistral-7B-v0.1" + +[components.llm.task] +@llm_tasks = "spacy.NER.v2" +labels = ["PERSON", "LOCATION"] diff --git a/tutorials/llm_clinical_trials/configs/ner_zeroshot_llama.cfg b/tutorials/llm_clinical_trials/configs/ner_zeroshot_llama.cfg new file mode 100644 index 00000000..27b4326b --- /dev/null +++ b/tutorials/llm_clinical_trials/configs/ner_zeroshot_llama.cfg @@ -0,0 +1,17 @@ +[nlp] +lang = "en" +pipeline = ["llm"] +batch_size = 128 + +[components] + +[components.llm] +factory = "llm" + +[components.llm.model] +@llm_models = "spacy.Llama2.v1" +name = "Llama-2-7b-hf" + +[components.llm.task] +@llm_tasks = "spacy.NER.v2" +labels = ["Drug", "Dose"] diff --git a/tutorials/llm_clinical_trials/configs/ner_zeroshot_mistral.cfg b/tutorials/llm_clinical_trials/configs/ner_zeroshot_mistral.cfg new file mode 100644 index 00000000..761e393c --- /dev/null +++ b/tutorials/llm_clinical_trials/configs/ner_zeroshot_mistral.cfg @@ -0,0 +1,17 @@ +[nlp] +lang = "en" +pipeline = ["llm"] +batch_size = 128 + +[components] + +[components.llm] +factory = "llm" + +[components.llm.model] +@llm_models = "spacy.Mistral.v1" +name = "Mistral-7B-v0.1" + +[components.llm.task] +@llm_tasks = "spacy.NER.v2" +labels = ["Drug", "Dose"] diff --git a/tutorials/llm_clinical_trials/configs/ner_zeroshot_openai.cfg b/tutorials/llm_clinical_trials/configs/ner_zeroshot_openai.cfg new file mode 100644 index 00000000..7860850f --- /dev/null +++ b/tutorials/llm_clinical_trials/configs/ner_zeroshot_openai.cfg @@ -0,0 +1,18 @@ +[nlp] +lang = "en" +pipeline = ["llm"] +batch_size = 128 + +[components] + +[components.llm] +factory = "llm" + +[components.llm.model] +@llm_models = "spacy.GPT-4.v3" +name = "gpt-4" +config = {"seed": 342, "temperature": 0.0} + +[components.llm.task] +@llm_tasks = "spacy.NER.v2" +labels = ["Drug", "Dose"] diff --git a/tutorials/llm_clinical_trials/configs/trial_openai.cfg b/tutorials/llm_clinical_trials/configs/trial_openai.cfg new file mode 100644 index 00000000..b35420c9 --- /dev/null +++ b/tutorials/llm_clinical_trials/configs/trial_openai.cfg @@ -0,0 +1,16 @@ +[nlp] +lang = "en" +pipeline = ["llm"] +batch_size = 128 + +[components] + +[components.llm] +factory = "llm" + +[components.llm.model] +@llm_models = "spacy.GPT-4.v2" +config = {"seed": 342, "temperature": 0.0} + +[components.llm.task] +@llm_tasks = "tutorial.TrialSummary.v1" diff --git a/tutorials/llm_clinical_trials/falcon_requirements.txt b/tutorials/llm_clinical_trials/falcon_requirements.txt new file mode 100644 index 00000000..3be11019 --- /dev/null +++ b/tutorials/llm_clinical_trials/falcon_requirements.txt @@ -0,0 +1,5 @@ +cupy-cuda117 +torch==2.0.1+cu117 -f https://download.pytorch.org/whl/torch_stable.html +transformers +einops +xformers \ No newline at end of file diff --git a/tutorials/llm_clinical_trials/project.yml b/tutorials/llm_clinical_trials/project.yml new file mode 100644 index 00000000..965d8b81 --- /dev/null +++ b/tutorials/llm_clinical_trials/project.yml @@ -0,0 +1,34 @@ +title: 'Clinical trial results extraction with LLMs' +description: "Using an LLM in a spaCy pipeline to extract patient groups, treatments and outcomes in clinical trials." + +vars: + ner_config: "ner_fewshot_openai.cfg" # "ner_zeroshot_openai.cfg" + trial_config: "trial_openai.cfg" + pmid: 27144689 + +# These are the directories that the project needs. +directories: ["assets", "configs", "scripts"] + +# Assets that should be available in the directory. +assets: + - dest: "assets/23206895.txt" + - dest: "assets/27144689.txt" + - dest: "assets/35172054.txt" + +# Project commands +commands: + - name: ner + help: "Run an LLM pipeline with zero-shot NER and visualise the predicted entities" + script: + - "python ./scripts/visualise_entities.py ${vars.pmid} ./configs/${vars.ner_config}" + deps: + - "assets/${vars.pmid}.txt" + - "configs/${vars.ner_config}" + + - name: summarization + help: "Run the LLM-powered spaCy pipeline" + script: + - "python ./scripts/extract_results.py ${vars.pmid} ./configs/${vars.trial_config}" + deps: + - "assets/${vars.pmid}.txt" + - "configs/${vars.trial_config}" diff --git a/tutorials/llm_clinical_trials/requirements.txt b/tutorials/llm_clinical_trials/requirements.txt new file mode 100644 index 00000000..0d359ce6 --- /dev/null +++ b/tutorials/llm_clinical_trials/requirements.txt @@ -0,0 +1 @@ +spacy-llm==0.4.0 \ No newline at end of file diff --git a/tutorials/llm_clinical_trials/scripts/__init__.py b/tutorials/llm_clinical_trials/scripts/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/tutorials/llm_clinical_trials/scripts/extract_results.py b/tutorials/llm_clinical_trials/scripts/extract_results.py new file mode 100644 index 00000000..ac5cbba2 --- /dev/null +++ b/tutorials/llm_clinical_trials/scripts/extract_results.py @@ -0,0 +1,24 @@ +from pathlib import Path + +import typer +from input_reader import read_trial +from spacy_llm.util import assemble +from trial_task import make_trial_task +from wasabi import msg + + +def run_pipeline(pmid: int, config_path: Path, verbose: bool = False): + msg.text(f"Processing PMID {pmid}", show=verbose) + msg.text(f"Loading config from {config_path}", show=verbose) + text = read_trial(pmid, verbose=verbose) + nlp = assemble(config_path) + doc = nlp(text) + + print(doc._.trial_summary) + print() + for ent in doc.ents: + print(ent.label_, ent.text) + + +if __name__ == "__main__": + typer.run(run_pipeline) diff --git a/tutorials/llm_clinical_trials/scripts/input_reader.py b/tutorials/llm_clinical_trials/scripts/input_reader.py new file mode 100644 index 00000000..bdc6bf3c --- /dev/null +++ b/tutorials/llm_clinical_trials/scripts/input_reader.py @@ -0,0 +1,15 @@ +from pathlib import Path + +from wasabi import msg + +DATA_DIR = Path(__file__).parent.parent / "assets" + + +def read_trial(pmid: int, verbose: bool = False) -> str: + file_path = DATA_DIR / f"{pmid}.txt" + msg.text(f"Reading article text from {file_path}", show=verbose) + + with open(file_path, "r", encoding="utf8") as file: + data = file.read() + + return data diff --git a/tutorials/llm_clinical_trials/scripts/trial_task.py b/tutorials/llm_clinical_trials/scripts/trial_task.py new file mode 100644 index 00000000..27933079 --- /dev/null +++ b/tutorials/llm_clinical_trials/scripts/trial_task.py @@ -0,0 +1,128 @@ +from typing import Iterable + +import spacy +from spacy.matcher import PhraseMatcher +from spacy.tokens import Doc +from spacy_llm.registry import registry +from spacy_llm.ty import LLMTask + + +INSTRUCTION = """ +Summarize the trial results in a structured fashion. +First, identify all patient groups with distinct treatments. +Then, for each patient group, write down the following: + +Patient group: +Number of patients in the group: +Treatment drug or substance: +Treatment dose: +Treatment frequency of administration: +Treatment duration: +Outcome: +""" + + +@registry.llm_tasks("tutorial.TrialSummary.v1") +def make_trial_task() -> "TrialSummaryTask": + return TrialSummaryTask(INSTRUCTION) + + +class TrialSummaryTask(LLMTask): + def __init__(self, instruction: str): + self.instruction = instruction + Doc.set_extension("trial_summary", default="") + + def generate_prompts(self, docs: Iterable[Doc]) -> Iterable[str]: + for doc in docs: + yield self.generate_prompt(doc) + + def generate_prompt(self, doc: Doc) -> str: + prompt = "Below this instruction, I will provide you with a clinical trial abstract. \n" + prompt += self.instruction + "\n\n" + doc.text + return prompt + + def parse_responses_v1( + self, docs: Iterable[Doc], responses: Iterable[str] + ) -> Iterable[Doc]: + for doc, response in zip(docs, responses): + doc._.trial_summary = response + yield doc + + # quick and dirty implementation for now + def parse_responses( + self, docs: Iterable[Doc], responses: Iterable[str] + ) -> Iterable[Doc]: + for doc, response in zip(docs, responses): + response_lower = response.lower() + + patient_groups = [] + patient_numbers = [] + drugs = [] + doses = [] + frequencies = [] + durations = [] + outcomes = [] + + start = response_lower.find("patient group:") + while start >= 0: + patient_group_start = response_lower.find("patient group:", start) + patient_group_end = patient_group_start + len("patient group:") + + patient_number_start = response_lower.find("number of patients in the group:", start) + patient_number_end = patient_number_start + len("number of patients in the group:") + + treatment_drug_start = response_lower.find("treatment drug or substance:", start) + treatment_drug_end = treatment_drug_start + len("treatment drug or substance:") + + treatment_dose_start = response_lower.find("treatment dose:", start) + treatment_dose_end = treatment_dose_start + len("treatment dose:") + + treatment_frequency_start = response_lower.find("treatment frequency of administration:", start) + treatment_frequency_end = treatment_frequency_start + len("treatment frequency of administration:") + + treatment_duration_start = response_lower.find("treatment duration:", start) + treatment_duration_end = treatment_duration_start + len("treatment duration:") + + outcome_start = response_lower.find("outcome:", start) + outcome_end = outcome_start + len("outcome:") + + patient_group = response[patient_group_end:patient_number_start].strip() + patient_groups.append(patient_group) + + patient_number = response[patient_number_end:treatment_drug_start].strip() + patient_numbers.append(patient_number) + + treatment_drug = response[treatment_drug_end:treatment_dose_start].strip() + drugs.append(treatment_drug) + + treatment_dose = response[treatment_dose_end:treatment_frequency_start].strip() + doses.append(treatment_dose) + + treatment_frequency = response[treatment_frequency_end:treatment_duration_start].strip() + frequencies.append(treatment_frequency) + + treatment_duration = response[treatment_duration_end:outcome_start].strip() + durations.append(treatment_duration) + + start = response_lower.find("patient group:", outcome_end) + + outcome = response[outcome_end:start].strip() + outcomes.append(outcome) + + matcher = PhraseMatcher(doc.vocab, attr="LOWER") + nlp = spacy.blank("en") + matcher.add("Patient_Group", [nlp.make_doc(text) for text in patient_groups]) + matcher.add("Patient_Number", [nlp.make_doc(text) for text in patient_numbers]) + matcher.add("Treatment_Drug", [nlp.make_doc(text) for text in drugs]) + matcher.add("Treatment_Dose", [nlp.make_doc(text) for text in doses]) + matcher.add("Treatment_Frequency", [nlp.make_doc(text) for text in frequencies]) + matcher.add("Treatment_Duration", [nlp.make_doc(text) for text in durations]) + matcher.add("Outcome", [nlp.make_doc(text) for text in outcomes]) + + matches = matcher(doc, as_spans=True) + matches = spacy.util.filter_spans(matches) + + # This assumes that no entities were set prior to this component + doc.ents = matches + doc._.trial_summary = response + yield doc diff --git a/tutorials/llm_clinical_trials/scripts/visualise_entities.py b/tutorials/llm_clinical_trials/scripts/visualise_entities.py new file mode 100644 index 00000000..8329933a --- /dev/null +++ b/tutorials/llm_clinical_trials/scripts/visualise_entities.py @@ -0,0 +1,40 @@ +import logging +from pathlib import Path + +import spacy_llm +import typer +from input_reader import read_trial +from spacy import displacy +from spacy_llm.util import assemble +from wasabi import msg + +DEBUG = True +PRINT_CONSOLE = True +PRINT_DISPLACY = False + + +def visualise_entities(pmid: int, config_path: Path, verbose: bool = False): + spacy_llm.logger.addHandler(logging.StreamHandler()) + if DEBUG: + spacy_llm.logger.setLevel(logging.DEBUG) + + msg.info(f"Processing PMID {pmid}", show=verbose) + msg.info(f"Loading config from {config_path}", show=verbose) + text = read_trial(pmid, verbose=verbose) + nlp = assemble(config_path) + doc = nlp(text) + ents = list(doc.ents) + if PRINT_CONSOLE: + msg.text(f" - Number of entities: {len(ents)}") + for ent in ents: + msg.text(f" - {ent.text} [{ent.label_}]") + if PRINT_DISPLACY: + options = { + "ents": ["Drug", "Dose"], + "colors": {"Drug": "pink", "Dose": "orange"}, + } + displacy.serve(doc, style="ent", options=options) + + +if __name__ == "__main__": + typer.run(visualise_entities)