From ebeaf99bfddf7292689218dc3ed32990ced075f3 Mon Sep 17 00:00:00 2001 From: Ines Montani Date: Mon, 21 Sep 2020 16:43:15 +0200 Subject: [PATCH 1/4] Set up CI with Azure Pipelines [skip ci] --- azure-pipelines.yml | 36 ++++++++++++++++++++++++++++++++++++ 1 file changed, 36 insertions(+) create mode 100644 azure-pipelines.yml diff --git a/azure-pipelines.yml b/azure-pipelines.yml new file mode 100644 index 000000000..a99aa08ff --- /dev/null +++ b/azure-pipelines.yml @@ -0,0 +1,36 @@ +trigger: + batch: true + branches: + include: + - 'v3' + +variables: + # Make sure we're exiting training as early as possible + SPACY_CONFIG_OVERRIDES: '--training.max_epochs=1 --training.max_steps=1' + WASABI_LOG_FRIENDLY: 1 + +jobs: + - job: 'Test' + strategy: + matrix: + Python37Linux: + imageName: 'ubuntu-16.04' + python.version: '3.7' + Python37Windows: + imageName: 'vs2017-win2016' + python.version: '3.7' + maxParallel: 4 + pool: + vmImage: $(imageName) + + steps: + - task: UsePythonVersion@0 + inputs: + versionSpec: '$(python.version)' + architecture: 'x64' + + - script: pip install "spacy-nightly>=3.0.0a19" + displayName: 'Install dependencies' + + - script: python -m pytest -s benchmarks experimental integrations pipelines tutorials + displayName: 'Run tests' From b6051bf38fc3e98f3968691db145d5a400e95aea Mon Sep 17 00:00:00 2001 From: Sofie Van Landeghem Date: Tue, 20 Apr 2021 19:42:10 +0200 Subject: [PATCH 2/4] Filter NEL duplicates by task instead of text (#53) * Filter NEL duplicates by task instead of text * restrict to spacy v2 * remove CI for spacy v2 projects --- azure-pipelines.yml | 36 -------------------------------- nel-emerson/scripts/el_recipe.py | 2 +- 2 files changed, 1 insertion(+), 37 deletions(-) delete mode 100644 azure-pipelines.yml diff --git a/azure-pipelines.yml b/azure-pipelines.yml deleted file mode 100644 index a99aa08ff..000000000 --- a/azure-pipelines.yml +++ /dev/null @@ -1,36 +0,0 @@ -trigger: - batch: true - branches: - include: - - 'v3' - -variables: - # Make sure we're exiting training as early as possible - SPACY_CONFIG_OVERRIDES: '--training.max_epochs=1 --training.max_steps=1' - WASABI_LOG_FRIENDLY: 1 - -jobs: - - job: 'Test' - strategy: - matrix: - Python37Linux: - imageName: 'ubuntu-16.04' - python.version: '3.7' - Python37Windows: - imageName: 'vs2017-win2016' - python.version: '3.7' - maxParallel: 4 - pool: - vmImage: $(imageName) - - steps: - - task: UsePythonVersion@0 - inputs: - versionSpec: '$(python.version)' - architecture: 'x64' - - - script: pip install "spacy-nightly>=3.0.0a19" - displayName: 'Install dependencies' - - - script: python -m pytest -s benchmarks experimental integrations pipelines tutorials - displayName: 'Run tests' diff --git a/nel-emerson/scripts/el_recipe.py b/nel-emerson/scripts/el_recipe.py index 7d7b5f2a0..2a2aaa4c6 100644 --- a/nel-emerson/scripts/el_recipe.py +++ b/nel-emerson/scripts/el_recipe.py @@ -45,7 +45,7 @@ def entity_linker_manual(dataset, source, nlp_dir, kb_loc, entity_loc): # For each NER mention, add the candidates from the KB to the annotation task stream = _add_options(stream, kb, id_dict) - stream = filter_duplicates(stream, by_input=True, by_task=False) + stream = filter_duplicates(stream, by_input=False, by_task=True) return { "dataset": dataset, From 8ab0b3b873f6c2393e9f1d9b9b30238ef1dddd8a Mon Sep 17 00:00:00 2001 From: Ned Booker <63317915+epb378@users.noreply.github.com> Date: Thu, 29 Apr 2021 09:22:03 -0400 Subject: [PATCH 3/4] Update notebook_video.ipynb (#55) --- nel-emerson/scripts/notebook_video.ipynb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/nel-emerson/scripts/notebook_video.ipynb b/nel-emerson/scripts/notebook_video.ipynb index 3ce1a0a1f..90f44915e 100644 --- a/nel-emerson/scripts/notebook_video.ipynb +++ b/nel-emerson/scripts/notebook_video.ipynb @@ -314,7 +314,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "If you are watching [the video](https://www.youtube.com/watch?v=8u57WSXVpmw), it will explain how to obtain annotated data with Prodigy. The final result will be a JSONL file that is distributed alongside this notebook. We'll now use this JSONL file to train our entity linker. If you want to skip the annotation part in the video, you can fast forward to [this secion](https://www.youtube.com/watch?v=8u57WSXVpmw&t=19m19s)." + "If you are watching [the video](https://www.youtube.com/watch?v=8u57WSXVpmw), it will explain how to obtain annotated data with Prodigy. The final result will be a JSONL file that is distributed alongside this notebook. We'll now use this JSONL file to train our entity linker. If you want to skip the annotation part in the video, you can fast forward to [this section](https://www.youtube.com/watch?v=8u57WSXVpmw&t=19m19s)." ] }, { From a6678cc7d2a76a4a99b524e6179a2ce4b64fbf4e Mon Sep 17 00:00:00 2001 From: Sofie Van Landeghem Date: Wed, 27 Mar 2024 11:10:47 +0100 Subject: [PATCH 4/4] Refer to v3 version (#206) --- nel-wikipedia/README.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/nel-wikipedia/README.md b/nel-wikipedia/README.md index 4e628a19f..a1e089267 100644 --- a/nel-wikipedia/README.md +++ b/nel-wikipedia/README.md @@ -3,6 +3,8 @@ These scripts are provided on an as-is basis. Questions around usage of these scripts are best asked at StackOverflow. Note that processing the whole of Wikipedia will require a lot of resources and can be overkill for your specific use-case. +Note: this project was created for spaCy v2.x. If you want to run this functionality with spaCy v3.x, check out [wikid](https://github.com/explosion/wikid) and https://github.com/explosion/projects/tree/v3/benchmarks/nel. + ### Step 1: Create a Knowledge Base (KB) and training data Run `wikidata_pretrain_kb.py`