diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml
index f7452fd2..7e20bc46 100644
--- a/.github/workflows/build.yml
+++ b/.github/workflows/build.yml
@@ -9,11 +9,11 @@ jobs:
max-parallel: 5
steps:
- - uses: actions/checkout@v2
+ - uses: actions/checkout@v4
- name: Set up Python 3.10
- uses: actions/setup-python@v2
+ uses: actions/setup-python@v5
with:
- python-version: '3.10'
+ python-version: '3.9'
- name: Add conda to system path
run: |
# $CONDA is an environment variable pointing to the root of the miniconda directory
diff --git a/.gitignore b/.gitignore
index aab7ea06..d82ba130 100755
--- a/.gitignore
+++ b/.gitignore
@@ -108,3 +108,5 @@ dmypy.json
.pyre/
# End of https://www.gitignore.io/api/python
+
+scripts/clean-unused-shm-objects
diff --git a/MANIFEST.in b/MANIFEST.in
new file mode 100644
index 00000000..3a66b077
--- /dev/null
+++ b/MANIFEST.in
@@ -0,0 +1,2 @@
+include rnaseq_pipeline/credentials.json
+graft rnaseq_pipeline/webviewer/templates
diff --git a/Makefile b/Makefile
new file mode 100644
index 00000000..ebc6aeb7
--- /dev/null
+++ b/Makefile
@@ -0,0 +1,43 @@
+DESTDIR :=
+PREFIX := /usr
+
+CONDA := conda
+PIP := pip
+
+all: scripts contrib/RSEM
+
+contrib/RSEM:
+ $(MAKE) -C $@
+
+scripts:
+ $(MAKE) -C $@
+
+install: install-python install-systemd-units install-RSEM install-scripts install-conda-env install-fish-completion
+
+install-fish-completion:
+ mkdir -p "${DESTDIR}/etc/fish/completions"
+ install data/luigi.fish "${DESTDIR}/etc/fish/completions/"
+
+install-scripts:
+ $(MAKE) -C scripts install
+
+install-python:
+ $(PIP) install --prefix="${DESTDIR}${PREFIX}" .
+
+install-systemd-units:
+ mkdir -p "${DESTDIR}/etc/systemd/system/"
+ install data/systemd/*.{service,timer,target} "${DESTDIR}/etc/systemd/system/"
+ @echo "Remember to run 'systemctl override rnaseq-pipeline-viewer' and 'systemctl override rnaseq-pipeline-worker@' and set CONDA_BIN, CONDA_ENV, GEMMA_USERNAME and GEMMA_PASSWORD environment variables."
+
+install-RSEM:
+ $(MAKE) -C contrib/RSEM install prefix="${DESTDIR}${PREFIX}"
+
+install-conda-env: environment.yml
+ mkdir -p "${DESTDIR}/share/rnaseq-pipeline/"
+ $(CONDA) env create -p "${DESTDIR}${PREFIX}/share/rnaseq-pipeline/conda-env" -f environment.yml
+
+clean:
+ $(MAKE) -C contrib/RSEM clean
+ $(MAKE) -C scripts clean
+
+.PHONY: all scripts contrib/RSEM install install-python install-systemd-unit install-RSEM
diff --git a/README.md b/README.md
index 2d9de162..8f541f98 100755
--- a/README.md
+++ b/README.md
@@ -2,7 +2,12 @@
[](https://github.com/PavlidisLab/rnaseq-pipeline/actions/workflows/build.yml)
-This documentation is principally written to support the Pavlidis Lab, and we're still updating it. But this pipeline should be fairly easy to configure on any Linux servers using these instructions. External users interested in using this pipeline for RNASeq quantification should contact [@mbelmadani](https://github.com/mbelmadani) - manuel.belmadani@msl.ubc.ca if troubleshooting assistance is needed.
+This documentation is principally written to support the Pavlidis Lab, and
+we're still updating it. But this pipeline should be fairly easy to configure
+on any Linux servers using these instructions. External users interested in
+using this pipeline for RNA-Seq quantification should contact our
+[helpdesk](mailto:MSL-PAVLAB-SUPPORT@LISTS.UBC.CA) if troubleshooting
+assistance is needed.
## Features
@@ -31,29 +36,29 @@ Create and activate a Conda environment with all the required software
dependencies:
```bash
-conda env setup -f environment.yml
+conda env create -f environment.yml
conda activate rnaseq-pipeline
```
Build the shared memory cleanup tool:
-```
+```bash
make -C scripts
```
-**Note:** We remove unused shared memory objects allocated by STAR in Slurm task prolog and
+**Note:** We remove unused shared memory objects allocated by STAR in Slurm task prolog and
epilog scripts.
Build RSEM:
-```
+```bash
make -C contrib/RSEM
```
Install the pipeline Python package in the Conda environment:
```bash
-python setup.py install # use develop instead of install of you want to edit the pipeline
+pip install . # use -e if you want to edit the pipeline
```
Create a copy of `the example.luigi.cfg` file to `luigi.cfg`. It should work
@@ -70,7 +75,7 @@ For convenience, we provide a `luigi-wrapper` script that sets the `--module`
flag to `rnaseq_pipeline.tasks` for you.
```bash
-./luigi-wrapper
+luigi-wrapper
```
## Setting up a genomic reference
@@ -90,7 +95,7 @@ following files under `pipeline-output/genomes/mm10_ensembl98`:
The top-level task you will likely want to use is `rnaseq_pipeline.tasks.GenerateReportForExperiment`.
```bash
-./luigi-wrapper rnaseq_pipeline.tasks.GenerateReportForExperiment --source geo --taxon mouse --reference mm10_ensembl98 --experiment-id GSE80745
+luigi-wrapper rnaseq_pipeline.tasks.GenerateReportForExperiment --source geo --taxon mouse --reference mm10_ensembl98 --experiment-id GSE80745
```
The output is organized as follow:
@@ -106,8 +111,13 @@ pipeline-output/
report/// # MultiQC reports for reads and alignments
```
-You can adjust the pipeline output directory by setting `rnaseq_pipeline.core.pipeline_output`
-in the configuration.
+You can adjust the pipeline output directory by setting `OUTPUT_DIR` under
+`[rnaseq_pipeline]` in the configuration.
+
+```ini
+[rnaseq_pipeline]
+OUTPUT_DIR=/scratch/rnaseq-pipeline
+```
## Setting up distributed computation
@@ -126,7 +136,41 @@ scheduler_extra_args=[]
The pipeline comes with a Web viewer that provides convenient endpoints for
consulting QC reports.
+When installing, add the `webviewer` extra require which will include [Flask](https://flask.palletsprojects.com/) and [gunicorn](https://gunicorn.org/):
+
+```bash
+pip install .[webviewer]
+```
+
```bash
gunicorn rnaseq_pipeline.viewer:app
```
+## Gemma integration
+
+The RNA-Seq pipeline is capable of communicating with Gemma using its [RESTful API](https://gemma.msl.ubc.ca/resources/restapidocs/).
+
+## External spreadsheet via Google Sheets API
+
+The RNA-Seq pipeline can pull experiment IDs from a collaborative spreadsheet
+through the Google Sheets API. This feature requires extra dependencies that
+are supplied by the `gsheet` extra require:
+
+```bash
+pip install .[gsheet]
+```
+
+The `rnaseq_pipelines.tasks.SubmitExperimentsFromGoogleSpreadsheetToGemma` task
+becomes available. We also have
+
+```bash
+submit-experiments-from-gsheet --spreadsheet-id --sheet-name
+```
+
+The remote spreadsheet must be structured to have the following columns:
+
+ - `experiment_id`, the Gemma exeriment short name
+ - `priority`, the Luigi task priority, an integer
+ - `data`, the status of the data, allowed values: `ok`, `resubmit` (forces a rerun), `needs attention`, all other values are ignored
+
+Only experiments with strictly positive priority are scheduled.
diff --git a/contrib/RSEM b/contrib/RSEM
index eb61aa85..d4e9a049 160000
--- a/contrib/RSEM
+++ b/contrib/RSEM
@@ -1 +1 @@
-Subproject commit eb61aa850aef6b19407b2f5e561ddce1aadd181c
+Subproject commit d4e9a049094dfa7d75420d4a39eede17f32dae84
diff --git a/data/fish/completions/luigi-wrapper.fish b/data/fish/completions/luigi-wrapper.fish
new file mode 100644
index 00000000..3c6583d0
--- /dev/null
+++ b/data/fish/completions/luigi-wrapper.fish
@@ -0,0 +1,729 @@
+complete -c luigi-wrapper -e
+complete -c luigi-wrapper -f
+complete -c luigi-wrapper -f -l TestNotificationsTask-raise-in-complete -d 'If true, fail in complete() instead of run()'
+complete -c luigi-wrapper -f -l email-force-send -d 'Send e-mail even from a tty'
+complete -c luigi-wrapper -f -l email-format -r -d 'Format type for sent e-mails Choices: {html, none, plain}'
+complete -c luigi-wrapper -f -l email-method -r -d 'Method for sending e-mail Choices: {ses, sns, sendgrid, smtp}'
+complete -c luigi-wrapper -f -l email-prefix -r -d 'Prefix for subject lines of all e-mails'
+complete -c luigi-wrapper -f -l email-receiver -r -d 'Address to send error e-mails to'
+complete -c luigi-wrapper -f -l email-traceback-max-length -r -d 'Max length for error traceback'
+complete -c luigi-wrapper -f -l email-sender -r -d 'Address to send e-mails from'
+complete -c luigi-wrapper -f -l smtp-host -r -d 'Hostname of smtp server'
+complete -c luigi-wrapper -f -l smtp-local-hostname -r -d 'If specified, local_hostname is used as the FQDN of the local host in the HELO/EHLO command'
+complete -c luigi-wrapper -f -l smtp-no-tls -d 'Do not use TLS in SMTP connections'
+complete -c luigi-wrapper -f -l smtp-password -r -d 'Password for the SMTP server login'
+complete -c luigi-wrapper -f -l smtp-port -r -d 'Port number for smtp server'
+complete -c luigi-wrapper -f -l smtp-ssl -d 'Use SSL for the SMTP connection.'
+complete -c luigi-wrapper -f -l smtp-timeout -r -d 'Number of seconds before timing out the smtp connection'
+complete -c luigi-wrapper -f -l smtp-username -r -d 'Username used to log in to the SMTP host'
+complete -c luigi-wrapper -f -l sendgrid-apikey -r -d 'API key for SendGrid login'
+complete -c luigi-wrapper -f -l batch-email-email-interval -r -d 'Number of minutes between e-mail sends (default: 60)'
+complete -c luigi-wrapper -f -l batch-email-batch-mode -r -d 'Method used for batching failures in e-mail. If "family" all failures for tasks with the same family will be batched. If "unbatched_params", all failures for tasks with the same family and non-batched parameters will be batched. If "all", tasks will only be batched if they have identical names. Choices: {family, all, unbatched_params}'
+complete -c luigi-wrapper -f -l batch-email-error-lines -r -d 'Number of lines to show from each error message. 0 means show all'
+complete -c luigi-wrapper -f -l batch-email-error-messages -r -d 'Number of error messages to show for each group'
+complete -c luigi-wrapper -f -l batch-email-group-by-error-messages -d 'Group items with the same error messages together'
+complete -c luigi-wrapper -f -l scheduler-retry-delay -r
+complete -c luigi-wrapper -f -l scheduler-remove-delay -r
+complete -c luigi-wrapper -f -l scheduler-worker-disconnect-delay -r
+complete -c luigi-wrapper -f -l scheduler-state-path -r
+complete -c luigi-wrapper -f -l scheduler-batch-emails -d 'Send e-mails in batches rather than immediately'
+complete -c luigi-wrapper -f -l scheduler-disable-window -r
+complete -c luigi-wrapper -f -l scheduler-retry-count -r
+complete -c luigi-wrapper -f -l scheduler-disable-hard-timeout -r
+complete -c luigi-wrapper -f -l scheduler-disable-persist -r
+complete -c luigi-wrapper -f -l scheduler-max-shown-tasks -r
+complete -c luigi-wrapper -f -l scheduler-max-graph-nodes -r
+complete -c luigi-wrapper -f -l scheduler-record-task-history
+complete -c luigi-wrapper -f -l scheduler-prune-on-get-work
+complete -c luigi-wrapper -f -l scheduler-pause-enabled
+complete -c luigi-wrapper -f -l scheduler-send-messages
+complete -c luigi-wrapper -f -l scheduler-metrics-collector -r
+complete -c luigi-wrapper -f -l scheduler-metrics-custom-import -r
+complete -c luigi-wrapper -f -l scheduler-stable-done-cooldown-secs -r -d 'Sets cooldown period to avoid running the same task twice'
+complete -c luigi-wrapper -f -l worker-id -r -d 'Override the auto-generated worker_id'
+complete -c luigi-wrapper -f -l worker-ping-interval -r
+complete -c luigi-wrapper -f -l worker-keep-alive
+complete -c luigi-wrapper -f -l worker-count-uniques -d 'worker-count-uniques means that we will keep a worker alive only if it has a unique pending task, as well as having keep-alive true'
+complete -c luigi-wrapper -f -l worker-count-last-scheduled -d 'Keep a worker alive only if there are pending tasks which it was the last to schedule.'
+complete -c luigi-wrapper -f -l worker-wait-interval -r
+complete -c luigi-wrapper -f -l worker-wait-jitter -r
+complete -c luigi-wrapper -f -l worker-max-keep-alive-idle-duration -r
+complete -c luigi-wrapper -f -l worker-max-reschedules -r
+complete -c luigi-wrapper -f -l worker-timeout -r
+complete -c luigi-wrapper -f -l worker-task-limit -r
+complete -c luigi-wrapper -f -l worker-retry-external-tasks -d 'If true, incomplete external tasks will be retested for completion while Luigi is running.'
+complete -c luigi-wrapper -f -l worker-send-failure-email -d 'If true, send e-mails directly from the workeron failure'
+complete -c luigi-wrapper -f -l worker-no-install-shutdown-handler -d 'If true, the SIGUSR1 shutdown handler willNOT be install on the worker'
+complete -c luigi-wrapper -f -l worker-check-unfulfilled-deps -d 'If true, check for completeness of dependencies before running a task'
+complete -c luigi-wrapper -f -l worker-check-complete-on-run -d 'If true, only mark tasks as done after running if they are complete. Regardless of this setting, the worker will always check if external tasks are complete before marking them as done.'
+complete -c luigi-wrapper -f -l worker-force-multiprocessing -d 'If true, use multiprocessing also when running with 1 worker'
+complete -c luigi-wrapper -f -l worker-task-process-context -r -d 'If set to a fully qualified class name, the class will be instantiated with a TaskProcess as its constructor parameter and applied as a context manager around its run() call, so this can be used for obtaining high level customizable monitoring or logging of each individual Task run.'
+complete -c luigi-wrapper -f -l worker-cache-task-completion -d 'If true, cache the response of successful completion checks of tasks assigned to a worker. This can especially speed up tasks with dynamic dependencies but assumes that the completion status does not change after it was true the first time.'
+complete -c luigi-wrapper -f -l execution-summary-summary-length -r
+complete -c luigi-wrapper -f -l local-scheduler -d 'Use an in-memory central scheduler. Useful for testing.'
+complete -c luigi-wrapper -f -l scheduler-host -r -d 'Hostname of machine running remote scheduler'
+complete -c luigi-wrapper -f -l scheduler-port -r -d 'Port of remote scheduler api process'
+complete -c luigi-wrapper -f -l scheduler-url -r -d 'Full path to remote scheduler'
+complete -c luigi-wrapper -f -l lock-size -r -d 'Maximum number of workers running the same command'
+complete -c luigi-wrapper -f -l no-lock -d 'Ignore if similar process is already running'
+complete -c luigi-wrapper -f -l lock-pid-dir -r -d 'Directory to store the pid file'
+complete -c luigi-wrapper -f -l take-lock -d 'Signal other processes to stop getting work if already running'
+complete -c luigi-wrapper -f -l workers -r -d 'Maximum number of parallel tasks to run'
+complete -c luigi-wrapper -f -l logging-conf-file -r -d 'Configuration file for logging'
+complete -c luigi-wrapper -f -l log-level -r -d 'Default log level to use when logging_conf_file is not set Choices: {WARNING, DEBUG, INFO, ERROR, CRITICAL, NOTSET}'
+complete -c luigi-wrapper -f -l module -r -d 'Used for dynamic loading of modules'
+complete -c luigi-wrapper -f -l parallel-scheduling -d 'Use multiprocessing to do scheduling in parallel.'
+complete -c luigi-wrapper -f -l parallel-scheduling-processes -r -d 'The number of processes to use for scheduling in parallel. By default the number of available CPUs will be used'
+complete -c luigi-wrapper -f -l assistant -d 'Run any task from the scheduler.'
+complete -c luigi-wrapper -f -l help -d 'Show most common flags and all task-specific flags'
+complete -c luigi-wrapper -f -l help-all -d 'Show all command line flags'
+complete -c luigi-wrapper -f -l RangeBase-of -r -d 'task name to be completed. The task must take a single datetime parameter'
+complete -c luigi-wrapper -f -l RangeBase-of-params -r -d 'Arguments to be provided to the '"'"'of'"'"' class when instantiating'
+complete -c luigi-wrapper -f -l RangeBase-start -r
+complete -c luigi-wrapper -f -l RangeBase-stop -r
+complete -c luigi-wrapper -f -l RangeBase-reverse -d 'specifies the preferred order for catching up. False - work from the oldest missing outputs onward; True - from the newest backward'
+complete -c luigi-wrapper -f -l RangeBase-task-limit -r -d 'how many of '"'"'of'"'"' tasks to require. Guards against scheduling insane amounts of tasks in one go'
+complete -c luigi-wrapper -f -l RangeBase-now -r -d 'set to override current time. In seconds since epoch'
+complete -c luigi-wrapper -f -l RangeBase-param-name -r -d 'parameter name used to pass in parameterized value. Defaults to None, meaning use first positional parameter'
+complete -c luigi-wrapper -f -l RangeDailyBase-of -r -d 'task name to be completed. The task must take a single datetime parameter'
+complete -c luigi-wrapper -f -l RangeDailyBase-of-params -r -d 'Arguments to be provided to the '"'"'of'"'"' class when instantiating'
+complete -c luigi-wrapper -f -l RangeDailyBase-reverse -d 'specifies the preferred order for catching up. False - work from the oldest missing outputs onward; True - from the newest backward'
+complete -c luigi-wrapper -f -l RangeDailyBase-task-limit -r -d 'how many of '"'"'of'"'"' tasks to require. Guards against scheduling insane amounts of tasks in one go'
+complete -c luigi-wrapper -f -l RangeDailyBase-now -r -d 'set to override current time. In seconds since epoch'
+complete -c luigi-wrapper -f -l RangeDailyBase-param-name -r -d 'parameter name used to pass in parameterized value. Defaults to None, meaning use first positional parameter'
+complete -c luigi-wrapper -f -l RangeDailyBase-start -r -d 'beginning date, inclusive. Default: None - work backward forever (requires reverse=True)'
+complete -c luigi-wrapper -f -l RangeDailyBase-stop -r -d 'ending date, exclusive. Default: None - work forward forever'
+complete -c luigi-wrapper -f -l RangeDailyBase-days-back -r -d 'extent to which contiguousness is to be assured into past, in days from current time. Prevents infinite loop when start is none. If the dataset has limited retention (i.e. old outputs get removed), this should be set shorter to that, too, to prevent the oldest outputs flapping. Increase freely if you intend to process old dates - worker'"'"'s memory is the limit'
+complete -c luigi-wrapper -f -l RangeDailyBase-days-forward -r -d 'extent to which contiguousness is to be assured into future, in days from current time. Prevents infinite loop when stop is none'
+complete -c luigi-wrapper -f -l RangeHourlyBase-of -r -d 'task name to be completed. The task must take a single datetime parameter'
+complete -c luigi-wrapper -f -l RangeHourlyBase-of-params -r -d 'Arguments to be provided to the '"'"'of'"'"' class when instantiating'
+complete -c luigi-wrapper -f -l RangeHourlyBase-reverse -d 'specifies the preferred order for catching up. False - work from the oldest missing outputs onward; True - from the newest backward'
+complete -c luigi-wrapper -f -l RangeHourlyBase-task-limit -r -d 'how many of '"'"'of'"'"' tasks to require. Guards against scheduling insane amounts of tasks in one go'
+complete -c luigi-wrapper -f -l RangeHourlyBase-now -r -d 'set to override current time. In seconds since epoch'
+complete -c luigi-wrapper -f -l RangeHourlyBase-param-name -r -d 'parameter name used to pass in parameterized value. Defaults to None, meaning use first positional parameter'
+complete -c luigi-wrapper -f -l RangeHourlyBase-start -r -d 'beginning datehour, inclusive. Default: None - work backward forever (requires reverse=True)'
+complete -c luigi-wrapper -f -l RangeHourlyBase-stop -r -d 'ending datehour, exclusive. Default: None - work forward forever'
+complete -c luigi-wrapper -f -l RangeHourlyBase-hours-back -r -d 'extent to which contiguousness is to be assured into past, in hours from current time. Prevents infinite loop when start is none. If the dataset has limited retention (i.e. old outputs get removed), this should be set shorter to that, too, to prevent the oldest outputs flapping. Increase freely if you intend to process old dates - worker'"'"'s memory is the limit'
+complete -c luigi-wrapper -f -l RangeHourlyBase-hours-forward -r -d 'extent to which contiguousness is to be assured into future, in hours from current time. Prevents infinite loop when stop is none'
+complete -c luigi-wrapper -f -l RangeByMinutesBase-of -r -d 'task name to be completed. The task must take a single datetime parameter'
+complete -c luigi-wrapper -f -l RangeByMinutesBase-of-params -r -d 'Arguments to be provided to the '"'"'of'"'"' class when instantiating'
+complete -c luigi-wrapper -f -l RangeByMinutesBase-reverse -d 'specifies the preferred order for catching up. False - work from the oldest missing outputs onward; True - from the newest backward'
+complete -c luigi-wrapper -f -l RangeByMinutesBase-task-limit -r -d 'how many of '"'"'of'"'"' tasks to require. Guards against scheduling insane amounts of tasks in one go'
+complete -c luigi-wrapper -f -l RangeByMinutesBase-now -r -d 'set to override current time. In seconds since epoch'
+complete -c luigi-wrapper -f -l RangeByMinutesBase-param-name -r -d 'parameter name used to pass in parameterized value. Defaults to None, meaning use first positional parameter'
+complete -c luigi-wrapper -f -l RangeByMinutesBase-start -r -d 'beginning date-hour-minute, inclusive. Default: None - work backward forever (requires reverse=True)'
+complete -c luigi-wrapper -f -l RangeByMinutesBase-stop -r -d 'ending date-hour-minute, exclusive. Default: None - work forward forever'
+complete -c luigi-wrapper -f -l RangeByMinutesBase-minutes-back -r -d 'extent to which contiguousness is to be assured into past, in minutes from current time. Prevents infinite loop when start is none. If the dataset has limited retention (i.e. old outputs get removed), this should be set shorter to that, too, to prevent the oldest outputs flapping. Increase freely if you intend to process old dates - worker'"'"'s memory is the limit'
+complete -c luigi-wrapper -f -l RangeByMinutesBase-minutes-forward -r -d 'extent to which contiguousness is to be assured into future, in minutes from current time. Prevents infinite loop when stop is none'
+complete -c luigi-wrapper -f -l RangeByMinutesBase-minutes-interval -r -d 'separation between events in minutes. It must evenly divide 60'
+complete -c luigi-wrapper -f -l RangeMonthly-of -r -d 'task name to be completed. The task must take a single datetime parameter'
+complete -c luigi-wrapper -f -l RangeMonthly-of-params -r -d 'Arguments to be provided to the '"'"'of'"'"' class when instantiating'
+complete -c luigi-wrapper -f -l RangeMonthly-reverse -d 'specifies the preferred order for catching up. False - work from the oldest missing outputs onward; True - from the newest backward'
+complete -c luigi-wrapper -f -l RangeMonthly-task-limit -r -d 'how many of '"'"'of'"'"' tasks to require. Guards against scheduling insane amounts of tasks in one go'
+complete -c luigi-wrapper -f -l RangeMonthly-now -r -d 'set to override current time. In seconds since epoch'
+complete -c luigi-wrapper -f -l RangeMonthly-param-name -r -d 'parameter name used to pass in parameterized value. Defaults to None, meaning use first positional parameter'
+complete -c luigi-wrapper -f -l RangeMonthly-start -r -d 'beginning month, inclusive. Default: None - work backward forever (requires reverse=True)'
+complete -c luigi-wrapper -f -l RangeMonthly-stop -r -d 'ending month, exclusive. Default: None - work forward forever'
+complete -c luigi-wrapper -f -l RangeMonthly-months-back -r -d 'extent to which contiguousness is to be assured into past, in months from current time. Prevents infinite loop when start is none. If the dataset has limited retention (i.e. old outputs get removed), this should be set shorter to that, too, to prevent the oldest outputs flapping. Increase freely if you intend to process old dates - worker'"'"'s memory is the limit'
+complete -c luigi-wrapper -f -l RangeMonthly-months-forward -r -d 'extent to which contiguousness is to be assured into future, in months from current time. Prevents infinite loop when stop is none'
+complete -c luigi-wrapper -f -l RangeDaily-of -r -d 'task name to be completed. The task must take a single datetime parameter'
+complete -c luigi-wrapper -f -l RangeDaily-of-params -r -d 'Arguments to be provided to the '"'"'of'"'"' class when instantiating'
+complete -c luigi-wrapper -f -l RangeDaily-reverse -d 'specifies the preferred order for catching up. False - work from the oldest missing outputs onward; True - from the newest backward'
+complete -c luigi-wrapper -f -l RangeDaily-task-limit -r -d 'how many of '"'"'of'"'"' tasks to require. Guards against scheduling insane amounts of tasks in one go'
+complete -c luigi-wrapper -f -l RangeDaily-now -r -d 'set to override current time. In seconds since epoch'
+complete -c luigi-wrapper -f -l RangeDaily-param-name -r -d 'parameter name used to pass in parameterized value. Defaults to None, meaning use first positional parameter'
+complete -c luigi-wrapper -f -l RangeDaily-start -r -d 'beginning date, inclusive. Default: None - work backward forever (requires reverse=True)'
+complete -c luigi-wrapper -f -l RangeDaily-stop -r -d 'ending date, exclusive. Default: None - work forward forever'
+complete -c luigi-wrapper -f -l RangeDaily-days-back -r -d 'extent to which contiguousness is to be assured into past, in days from current time. Prevents infinite loop when start is none. If the dataset has limited retention (i.e. old outputs get removed), this should be set shorter to that, too, to prevent the oldest outputs flapping. Increase freely if you intend to process old dates - worker'"'"'s memory is the limit'
+complete -c luigi-wrapper -f -l RangeDaily-days-forward -r -d 'extent to which contiguousness is to be assured into future, in days from current time. Prevents infinite loop when stop is none'
+complete -c luigi-wrapper -f -l RangeHourly-of -r -d 'task name to be completed. The task must take a single datetime parameter'
+complete -c luigi-wrapper -f -l RangeHourly-of-params -r -d 'Arguments to be provided to the '"'"'of'"'"' class when instantiating'
+complete -c luigi-wrapper -f -l RangeHourly-reverse -d 'specifies the preferred order for catching up. False - work from the oldest missing outputs onward; True - from the newest backward'
+complete -c luigi-wrapper -f -l RangeHourly-task-limit -r -d 'how many of '"'"'of'"'"' tasks to require. Guards against scheduling insane amounts of tasks in one go'
+complete -c luigi-wrapper -f -l RangeHourly-now -r -d 'set to override current time. In seconds since epoch'
+complete -c luigi-wrapper -f -l RangeHourly-param-name -r -d 'parameter name used to pass in parameterized value. Defaults to None, meaning use first positional parameter'
+complete -c luigi-wrapper -f -l RangeHourly-start -r -d 'beginning datehour, inclusive. Default: None - work backward forever (requires reverse=True)'
+complete -c luigi-wrapper -f -l RangeHourly-stop -r -d 'ending datehour, exclusive. Default: None - work forward forever'
+complete -c luigi-wrapper -f -l RangeHourly-hours-back -r -d 'extent to which contiguousness is to be assured into past, in hours from current time. Prevents infinite loop when start is none. If the dataset has limited retention (i.e. old outputs get removed), this should be set shorter to that, too, to prevent the oldest outputs flapping. Increase freely if you intend to process old dates - worker'"'"'s memory is the limit'
+complete -c luigi-wrapper -f -l RangeHourly-hours-forward -r -d 'extent to which contiguousness is to be assured into future, in hours from current time. Prevents infinite loop when stop is none'
+complete -c luigi-wrapper -f -l RangeByMinutes-of -r -d 'task name to be completed. The task must take a single datetime parameter'
+complete -c luigi-wrapper -f -l RangeByMinutes-of-params -r -d 'Arguments to be provided to the '"'"'of'"'"' class when instantiating'
+complete -c luigi-wrapper -f -l RangeByMinutes-reverse -d 'specifies the preferred order for catching up. False - work from the oldest missing outputs onward; True - from the newest backward'
+complete -c luigi-wrapper -f -l RangeByMinutes-task-limit -r -d 'how many of '"'"'of'"'"' tasks to require. Guards against scheduling insane amounts of tasks in one go'
+complete -c luigi-wrapper -f -l RangeByMinutes-now -r -d 'set to override current time. In seconds since epoch'
+complete -c luigi-wrapper -f -l RangeByMinutes-param-name -r -d 'parameter name used to pass in parameterized value. Defaults to None, meaning use first positional parameter'
+complete -c luigi-wrapper -f -l RangeByMinutes-start -r -d 'beginning date-hour-minute, inclusive. Default: None - work backward forever (requires reverse=True)'
+complete -c luigi-wrapper -f -l RangeByMinutes-stop -r -d 'ending date-hour-minute, exclusive. Default: None - work forward forever'
+complete -c luigi-wrapper -f -l RangeByMinutes-minutes-back -r -d 'extent to which contiguousness is to be assured into past, in minutes from current time. Prevents infinite loop when start is none. If the dataset has limited retention (i.e. old outputs get removed), this should be set shorter to that, too, to prevent the oldest outputs flapping. Increase freely if you intend to process old dates - worker'"'"'s memory is the limit'
+complete -c luigi-wrapper -f -l RangeByMinutes-minutes-forward -r -d 'extent to which contiguousness is to be assured into future, in minutes from current time. Prevents infinite loop when stop is none'
+complete -c luigi-wrapper -f -l RangeByMinutes-minutes-interval -r -d 'separation between events in minutes. It must evenly divide 60'
+complete -c luigi-wrapper -f -l retcode-unhandled-exception -r -d 'For internal luigi errors.'
+complete -c luigi-wrapper -f -l retcode-missing-data -r -d 'For when there are incomplete ExternalTask dependencies.'
+complete -c luigi-wrapper -f -l retcode-task-failed -r -d 'For when a task'"'"'s run() method fails.'
+complete -c luigi-wrapper -f -l retcode-already-running -r -d 'For both local --lock and luigid "lock"'
+complete -c luigi-wrapper -f -l retcode-scheduling-error -r -d 'For when a task'"'"'s complete() or requires() fails, or task-limit reached'
+complete -c luigi-wrapper -f -l retcode-not-run -r -d 'For when a task is not granted run permission by the scheduler.'
+complete -c luigi-wrapper -f -l ExternalProgramTask-capture-output
+complete -c luigi-wrapper -f -l ExternalProgramTask-stream-for-searching-tracking-url -r -d 'Stream for searching tracking URL Choices: {stderr, none, stdout}'
+complete -c luigi-wrapper -f -l ExternalProgramTask-tracking-url-pattern -r -d 'Regex pattern used for searching URL in the logs of the external program'
+complete -c luigi-wrapper -f -l ExternalPythonProgramTask-capture-output
+complete -c luigi-wrapper -f -l ExternalPythonProgramTask-stream-for-searching-tracking-url -r -d 'Stream for searching tracking URL Choices: {stderr, none, stdout}'
+complete -c luigi-wrapper -f -l ExternalPythonProgramTask-tracking-url-pattern -r -d 'Regex pattern used for searching URL in the logs of the external program'
+complete -c luigi-wrapper -f -l ExternalPythonProgramTask-virtualenv -r -d 'path to the virtualenv directory to use. It should point to the directory containing the ``bin/activate`` file used for enabling the virtualenv.'
+complete -c luigi-wrapper -f -l ExternalPythonProgramTask-extra-pythonpath -r -d 'extend the search path for modules by prepending this value to the ``PYTHONPATH`` environment variable.'
+complete -c luigi-wrapper -f -l bioluigi-scheduler -r -d 'Default scheduler to use in ScheduledExternalProgram'
+complete -c luigi-wrapper -f -l bioluigi-scheduler-partition -r -d 'Node partition to use for scheduling jobs if supported'
+complete -c luigi-wrapper -f -l bioluigi-scheduler-extra-args -r -d 'List of extra arguments to pass to the scheduler'
+complete -c luigi-wrapper -f -l bioluigi-prefetch-bin -r
+complete -c luigi-wrapper -f -l bioluigi-fastqdump-bin -r
+complete -c luigi-wrapper -f -l bioluigi-cutadapt-bin -r
+complete -c luigi-wrapper -f -l bioluigi-fastqc-bin -r
+complete -c luigi-wrapper -f -l bioluigi-star-bin -r
+complete -c luigi-wrapper -f -l bioluigi-rsem-dir -r
+complete -c luigi-wrapper -f -l bioluigi-bcftools-bin -r
+complete -c luigi-wrapper -f -l bioluigi-vep-bin -r
+complete -c luigi-wrapper -f -l bioluigi-vep-dir -r
+complete -c luigi-wrapper -f -l bioluigi-multiqc-bin -r
+complete -c luigi-wrapper -f -l ScheduledExternalProgramTask-capture-output
+complete -c luigi-wrapper -f -l ScheduledExternalProgramTask-stream-for-searching-tracking-url -r -d 'Stream for searching tracking URL Choices: {stderr, none, stdout}'
+complete -c luigi-wrapper -f -l ScheduledExternalProgramTask-tracking-url-pattern -r -d 'Regex pattern used for searching URL in the logs of the external program'
+complete -c luigi-wrapper -f -l ScheduledExternalProgramTask-scheduler -r -d 'Scheduler to use for running the task Choices: {slurm, local}'
+complete -c luigi-wrapper -f -l ScheduledExternalProgramTask-scheduler-partition -r -d 'Scheduler partition (or queue) to use if supported'
+complete -c luigi-wrapper -f -l ScheduledExternalProgramTask-scheduler-extra-args -r -d 'Extra arguments to pass to the scheduler'
+complete -c luigi-wrapper -f -l ScheduledExternalProgramTask-walltime -r -d 'Amout of time to allocate for the task, default value of zero implies unlimited time'
+complete -c luigi-wrapper -f -l ScheduledExternalProgramTask-cpus -r -d 'Number of CPUs to allocate for the task'
+complete -c luigi-wrapper -f -l ScheduledExternalProgramTask-memory -r -d 'Amount of memory (in gigabyte) to allocate for the task'
+complete -c luigi-wrapper -f -l fastqc.GenerateReport-capture-output
+complete -c luigi-wrapper -f -l fastqc.GenerateReport-stream-for-searching-tracking-url -r -d 'Stream for searching tracking URL Choices: {stderr, none, stdout}'
+complete -c luigi-wrapper -f -l fastqc.GenerateReport-tracking-url-pattern -r -d 'Regex pattern used for searching URL in the logs of the external program'
+complete -c luigi-wrapper -f -l fastqc.GenerateReport-scheduler -r -d 'Scheduler to use for running the task Choices: {slurm, local}'
+complete -c luigi-wrapper -f -l fastqc.GenerateReport-scheduler-partition -r -d 'Scheduler partition (or queue) to use if supported'
+complete -c luigi-wrapper -f -l fastqc.GenerateReport-scheduler-extra-args -r -d 'Extra arguments to pass to the scheduler'
+complete -c luigi-wrapper -f -l fastqc.GenerateReport-walltime -r -d 'Amout of time to allocate for the task, default value of zero implies unlimited time'
+complete -c luigi-wrapper -f -l fastqc.GenerateReport-cpus -r -d 'Number of CPUs to allocate for the task'
+complete -c luigi-wrapper -f -l fastqc.GenerateReport-memory -r -d 'Amount of memory (in gigabyte) to allocate for the task'
+complete -c luigi-wrapper -f -l fastqc.GenerateReport-input-file -r
+complete -c luigi-wrapper -f -l fastqc.GenerateReport-output-dir -r
+complete -c luigi-wrapper -f -l multiqc.GenerateReport-capture-output
+complete -c luigi-wrapper -f -l multiqc.GenerateReport-stream-for-searching-tracking-url -r -d 'Stream for searching tracking URL Choices: {stderr, none, stdout}'
+complete -c luigi-wrapper -f -l multiqc.GenerateReport-tracking-url-pattern -r -d 'Regex pattern used for searching URL in the logs of the external program'
+complete -c luigi-wrapper -f -l multiqc.GenerateReport-scheduler -r -d 'Scheduler to use for running the task Choices: {slurm, local}'
+complete -c luigi-wrapper -f -l multiqc.GenerateReport-scheduler-partition -r -d 'Scheduler partition (or queue) to use if supported'
+complete -c luigi-wrapper -f -l multiqc.GenerateReport-scheduler-extra-args -r -d 'Extra arguments to pass to the scheduler'
+complete -c luigi-wrapper -f -l multiqc.GenerateReport-walltime -r -d 'Amout of time to allocate for the task, default value of zero implies unlimited time'
+complete -c luigi-wrapper -f -l multiqc.GenerateReport-cpus -r -d 'Number of CPUs to allocate for the task'
+complete -c luigi-wrapper -f -l multiqc.GenerateReport-memory -r -d 'Amount of memory (in gigabyte) to allocate for the task'
+complete -c luigi-wrapper -f -l multiqc.GenerateReport-input-dirs -r
+complete -c luigi-wrapper -f -l multiqc.GenerateReport-output-dir -r
+complete -c luigi-wrapper -f -l multiqc.GenerateReport-sample-names -r
+complete -c luigi-wrapper -f -l multiqc.GenerateReport-replace-names -r
+complete -c luigi-wrapper -f -l multiqc.GenerateReport-title -r
+complete -c luigi-wrapper -f -l multiqc.GenerateReport-comment -r
+complete -c luigi-wrapper -f -l multiqc.GenerateReport-force
+complete -c luigi-wrapper -f -l rnaseq-pipeline-GENOMES -r
+complete -c luigi-wrapper -f -l rnaseq-pipeline-OUTPUT-DIR -r
+complete -c luigi-wrapper -f -l rnaseq-pipeline-REFERENCES -r
+complete -c luigi-wrapper -f -l rnaseq-pipeline-METADATA -r
+complete -c luigi-wrapper -f -l rnaseq-pipeline-DATA -r
+complete -c luigi-wrapper -f -l rnaseq-pipeline-DATAQCDIR -r
+complete -c luigi-wrapper -f -l rnaseq-pipeline-ALIGNDIR -r
+complete -c luigi-wrapper -f -l rnaseq-pipeline-ALIGNQCDIR -r
+complete -c luigi-wrapper -f -l rnaseq-pipeline-QUANTDIR -r
+complete -c luigi-wrapper -f -l rnaseq-pipeline-BATCHINFODIR -r
+complete -c luigi-wrapper -f -l rnaseq-pipeline-RSEM-DIR -r
+complete -c luigi-wrapper -f -l rnaseq-pipeline-SLACK-WEBHOOK-URL -r
+complete -c luigi-wrapper -f -l cutadapt.CutadaptTask-capture-output
+complete -c luigi-wrapper -f -l cutadapt.CutadaptTask-stream-for-searching-tracking-url -r -d 'Stream for searching tracking URL Choices: {stderr, none, stdout}'
+complete -c luigi-wrapper -f -l cutadapt.CutadaptTask-tracking-url-pattern -r -d 'Regex pattern used for searching URL in the logs of the external program'
+complete -c luigi-wrapper -f -l cutadapt.CutadaptTask-scheduler -r -d 'Scheduler to use for running the task Choices: {slurm, local}'
+complete -c luigi-wrapper -f -l cutadapt.CutadaptTask-scheduler-partition -r -d 'Scheduler partition (or queue) to use if supported'
+complete -c luigi-wrapper -f -l cutadapt.CutadaptTask-scheduler-extra-args -r -d 'Extra arguments to pass to the scheduler'
+complete -c luigi-wrapper -f -l cutadapt.CutadaptTask-walltime -r -d 'Amout of time to allocate for the task, default value of zero implies unlimited time'
+complete -c luigi-wrapper -f -l cutadapt.CutadaptTask-cpus -r -d 'Number of CPUs to allocate for the task'
+complete -c luigi-wrapper -f -l cutadapt.CutadaptTask-memory -r -d 'Amount of memory (in gigabyte) to allocate for the task'
+complete -c luigi-wrapper -f -l cutadapt.CutadaptTask-adapter-3prime -r
+complete -c luigi-wrapper -f -l cutadapt.CutadaptTask-adapter-5prime -r
+complete -c luigi-wrapper -f -l cutadapt.CutadaptTask-cut -r
+complete -c luigi-wrapper -f -l cutadapt.CutadaptTask-trim-n
+complete -c luigi-wrapper -f -l cutadapt.CutadaptTask-minimum-length -r
+complete -c luigi-wrapper -f -l cutadapt.CutadaptTask-report-file -r -d 'Destination for the JSON report'
+complete -c luigi-wrapper -f -l cutadapt.TrimReads-capture-output
+complete -c luigi-wrapper -f -l cutadapt.TrimReads-stream-for-searching-tracking-url -r -d 'Stream for searching tracking URL Choices: {stderr, none, stdout}'
+complete -c luigi-wrapper -f -l cutadapt.TrimReads-tracking-url-pattern -r -d 'Regex pattern used for searching URL in the logs of the external program'
+complete -c luigi-wrapper -f -l cutadapt.TrimReads-scheduler -r -d 'Scheduler to use for running the task Choices: {slurm, local}'
+complete -c luigi-wrapper -f -l cutadapt.TrimReads-scheduler-partition -r -d 'Scheduler partition (or queue) to use if supported'
+complete -c luigi-wrapper -f -l cutadapt.TrimReads-scheduler-extra-args -r -d 'Extra arguments to pass to the scheduler'
+complete -c luigi-wrapper -f -l cutadapt.TrimReads-walltime -r -d 'Amout of time to allocate for the task, default value of zero implies unlimited time'
+complete -c luigi-wrapper -f -l cutadapt.TrimReads-cpus -r -d 'Number of CPUs to allocate for the task'
+complete -c luigi-wrapper -f -l cutadapt.TrimReads-memory -r -d 'Amount of memory (in gigabyte) to allocate for the task'
+complete -c luigi-wrapper -f -l cutadapt.TrimReads-adapter-3prime -r
+complete -c luigi-wrapper -f -l cutadapt.TrimReads-adapter-5prime -r
+complete -c luigi-wrapper -f -l cutadapt.TrimReads-cut -r
+complete -c luigi-wrapper -f -l cutadapt.TrimReads-trim-n
+complete -c luigi-wrapper -f -l cutadapt.TrimReads-minimum-length -r
+complete -c luigi-wrapper -f -l cutadapt.TrimReads-report-file -r -d 'Destination for the JSON report'
+complete -c luigi-wrapper -f -l cutadapt.TrimReads-input-file -r
+complete -c luigi-wrapper -f -l cutadapt.TrimReads-output-file -r
+complete -c luigi-wrapper -f -l cutadapt.TrimPairedReads-capture-output
+complete -c luigi-wrapper -f -l cutadapt.TrimPairedReads-stream-for-searching-tracking-url -r -d 'Stream for searching tracking URL Choices: {stderr, none, stdout}'
+complete -c luigi-wrapper -f -l cutadapt.TrimPairedReads-tracking-url-pattern -r -d 'Regex pattern used for searching URL in the logs of the external program'
+complete -c luigi-wrapper -f -l cutadapt.TrimPairedReads-scheduler -r -d 'Scheduler to use for running the task Choices: {slurm, local}'
+complete -c luigi-wrapper -f -l cutadapt.TrimPairedReads-scheduler-partition -r -d 'Scheduler partition (or queue) to use if supported'
+complete -c luigi-wrapper -f -l cutadapt.TrimPairedReads-scheduler-extra-args -r -d 'Extra arguments to pass to the scheduler'
+complete -c luigi-wrapper -f -l cutadapt.TrimPairedReads-walltime -r -d 'Amout of time to allocate for the task, default value of zero implies unlimited time'
+complete -c luigi-wrapper -f -l cutadapt.TrimPairedReads-cpus -r -d 'Number of CPUs to allocate for the task'
+complete -c luigi-wrapper -f -l cutadapt.TrimPairedReads-memory -r -d 'Amount of memory (in gigabyte) to allocate for the task'
+complete -c luigi-wrapper -f -l cutadapt.TrimPairedReads-adapter-3prime -r
+complete -c luigi-wrapper -f -l cutadapt.TrimPairedReads-adapter-5prime -r
+complete -c luigi-wrapper -f -l cutadapt.TrimPairedReads-cut -r
+complete -c luigi-wrapper -f -l cutadapt.TrimPairedReads-trim-n
+complete -c luigi-wrapper -f -l cutadapt.TrimPairedReads-minimum-length -r
+complete -c luigi-wrapper -f -l cutadapt.TrimPairedReads-report-file -r -d 'Destination for the JSON report'
+complete -c luigi-wrapper -f -l cutadapt.TrimPairedReads-input-file -r
+complete -c luigi-wrapper -f -l cutadapt.TrimPairedReads-input2-file -r
+complete -c luigi-wrapper -f -l cutadapt.TrimPairedReads-output-file -r
+complete -c luigi-wrapper -f -l cutadapt.TrimPairedReads-output2-file -r
+complete -c luigi-wrapper -f -l cutadapt.TrimPairedReads-reverse-adapter-3prime -r
+complete -c luigi-wrapper -f -l cutadapt.TrimPairedReads-reverse-adapter-5prime -r
+complete -c luigi-wrapper -f -l rnaseq-pipeline.sources.arrayexpress.DownloadArrayExpressFastq-sample-id -r
+complete -c luigi-wrapper -f -l rnaseq-pipeline.sources.arrayexpress.DownloadArrayExpressFastq-fastq-url -r
+complete -c luigi-wrapper -f -l rnaseq-pipeline.sources.arrayexpress.DownloadArrayExpressSample-experiment-id -r
+complete -c luigi-wrapper -f -l rnaseq-pipeline.sources.arrayexpress.DownloadArrayExpressSample-sample-id -r
+complete -c luigi-wrapper -f -l rnaseq-pipeline.sources.arrayexpress.DownloadArrayExpressSample-fastq-urls -r
+complete -c luigi-wrapper -f -l rnaseq-pipeline.sources.arrayexpress.DownloadArrayExpressExperiment-experiment-id -r
+complete -c luigi-wrapper -f -l rnaseq-pipeline.gemma-baseurl -r
+complete -c luigi-wrapper -f -l rnaseq-pipeline.gemma-appdata-dir -r
+complete -c luigi-wrapper -f -l rnaseq-pipeline.gemma-cli-bin -r
+complete -c luigi-wrapper -f -l rnaseq-pipeline.gemma-cli-JAVA-HOME -r
+complete -c luigi-wrapper -f -l rnaseq-pipeline.gemma-cli-JAVA-OPTS -r
+complete -c luigi-wrapper -f -l rnaseq-pipeline.gemma-human-reference-id -r
+complete -c luigi-wrapper -f -l rnaseq-pipeline.gemma-mouse-reference-id -r
+complete -c luigi-wrapper -f -l rnaseq-pipeline.gemma-rat-reference-id -r
+complete -c luigi-wrapper -f -l rnaseq-pipeline.gemma.GemmaCliTask-capture-output
+complete -c luigi-wrapper -f -l rnaseq-pipeline.gemma.GemmaCliTask-stream-for-searching-tracking-url -r -d 'Stream for searching tracking URL Choices: {stderr, none, stdout}'
+complete -c luigi-wrapper -f -l rnaseq-pipeline.gemma.GemmaCliTask-tracking-url-pattern -r -d 'Regex pattern used for searching URL in the logs of the external program'
+complete -c luigi-wrapper -f -l rnaseq-pipeline.gemma.GemmaCliTask-experiment-id -r
+complete -c luigi-wrapper -f -l sratoolkit.Prefetch-capture-output
+complete -c luigi-wrapper -f -l sratoolkit.Prefetch-stream-for-searching-tracking-url -r -d 'Stream for searching tracking URL Choices: {stderr, none, stdout}'
+complete -c luigi-wrapper -f -l sratoolkit.Prefetch-tracking-url-pattern -r -d 'Regex pattern used for searching URL in the logs of the external program'
+complete -c luigi-wrapper -f -l sratoolkit.Prefetch-scheduler -r -d 'Scheduler to use for running the task Choices: {slurm, local}'
+complete -c luigi-wrapper -f -l sratoolkit.Prefetch-scheduler-partition -r -d 'Scheduler partition (or queue) to use if supported'
+complete -c luigi-wrapper -f -l sratoolkit.Prefetch-scheduler-extra-args -r -d 'Extra arguments to pass to the scheduler'
+complete -c luigi-wrapper -f -l sratoolkit.Prefetch-walltime -r -d 'Amout of time to allocate for the task, default value of zero implies unlimited time'
+complete -c luigi-wrapper -f -l sratoolkit.Prefetch-cpus -r -d 'Number of CPUs to allocate for the task'
+complete -c luigi-wrapper -f -l sratoolkit.Prefetch-memory -r -d 'Amount of memory (in gigabyte) to allocate for the task'
+complete -c luigi-wrapper -f -l sratoolkit.Prefetch-metadata -r
+complete -c luigi-wrapper -f -l sratoolkit.Prefetch-srr-accession -r
+complete -c luigi-wrapper -f -l sratoolkit.Prefetch-output-file -r
+complete -c luigi-wrapper -f -l sratoolkit.Prefetch-max-size -r -d 'Maximum download size in gigabytes'
+complete -c luigi-wrapper -f -l sratoolkit.Prefetch-extra-args -r -d 'Extra arguments to pass to prefetch which can be used to setup Aspera'
+complete -c luigi-wrapper -f -l sratoolkit.FastqDump-capture-output
+complete -c luigi-wrapper -f -l sratoolkit.FastqDump-stream-for-searching-tracking-url -r -d 'Stream for searching tracking URL Choices: {stderr, none, stdout}'
+complete -c luigi-wrapper -f -l sratoolkit.FastqDump-tracking-url-pattern -r -d 'Regex pattern used for searching URL in the logs of the external program'
+complete -c luigi-wrapper -f -l sratoolkit.FastqDump-scheduler -r -d 'Scheduler to use for running the task Choices: {slurm, local}'
+complete -c luigi-wrapper -f -l sratoolkit.FastqDump-scheduler-partition -r -d 'Scheduler partition (or queue) to use if supported'
+complete -c luigi-wrapper -f -l sratoolkit.FastqDump-scheduler-extra-args -r -d 'Extra arguments to pass to the scheduler'
+complete -c luigi-wrapper -f -l sratoolkit.FastqDump-walltime -r -d 'Amout of time to allocate for the task, default value of zero implies unlimited time'
+complete -c luigi-wrapper -f -l sratoolkit.FastqDump-cpus -r -d 'Number of CPUs to allocate for the task'
+complete -c luigi-wrapper -f -l sratoolkit.FastqDump-memory -r -d 'Amount of memory (in gigabyte) to allocate for the task'
+complete -c luigi-wrapper -f -l sratoolkit.FastqDump-metadata -r
+complete -c luigi-wrapper -f -l sratoolkit.FastqDump-input-file -r -d 'A file path or a SRA archive, or a SRA run accession'
+complete -c luigi-wrapper -f -l sratoolkit.FastqDump-output-dir -r -d 'Destination directory for the extracted FASTQs'
+complete -c luigi-wrapper -f -l sratoolkit.FastqDump-minimum-read-length -r -d 'Minimum read length to be extracted from the archive'
+complete -c luigi-wrapper -f -l rnaseq-pipeline.sources.sra.PrefetchSraRun-metadata -r
+complete -c luigi-wrapper -f -l rnaseq-pipeline.sources.sra.PrefetchSraRun-srr -r -d 'SRA run identifier'
+complete -c luigi-wrapper -f -l rnaseq-pipeline.sources.sra.DumpSraRun-metadata -r
+complete -c luigi-wrapper -f -l rnaseq-pipeline.sources.sra.DumpSraRun-srr -r -d 'SRA run identifier'
+complete -c luigi-wrapper -f -l rnaseq-pipeline.sources.sra.DumpSraRun-srx -r -d 'SRA experiment identifier'
+complete -c luigi-wrapper -f -l rnaseq-pipeline.sources.sra.DumpSraRun-paired-reads -d 'Indicate of reads have paired or single mates'
+complete -c luigi-wrapper -f -l rnaseq-pipeline.sources.sra.DownloadSraExperimentRunInfo-metadata -r
+complete -c luigi-wrapper -f -l rnaseq-pipeline.sources.sra.DownloadSraExperimentRunInfo-rerun
+complete -c luigi-wrapper -f -l rnaseq-pipeline.sources.sra.DownloadSraExperimentRunInfo-srx -r -d 'SRX accession to use'
+complete -c luigi-wrapper -f -l rnaseq-pipeline.sources.sra.DownloadSraExperiment-metadata -r
+complete -c luigi-wrapper -f -l rnaseq-pipeline.sources.sra.DownloadSraExperiment-rerun
+complete -c luigi-wrapper -f -l rnaseq-pipeline.sources.sra.DownloadSraExperiment-srx -r -d 'SRX accession to use'
+complete -c luigi-wrapper -f -l rnaseq-pipeline.sources.sra.DownloadSraExperiment-srr -r -d 'Specific SRA run accession to use (defaults to latest)'
+complete -c luigi-wrapper -f -l rnaseq-pipeline.sources.sra.DownloadSraExperiment-force-single-end -d 'Force the library layout to be single-end'
+complete -c luigi-wrapper -f -l rnaseq-pipeline.sources.sra.DownloadSraExperiment-force-paired-reads -d 'Force the library layout to be paired'
+complete -c luigi-wrapper -f -l rnaseq-pipeline.sources.sra.DownloadSraProjectRunInfo-metadata -r
+complete -c luigi-wrapper -f -l rnaseq-pipeline.sources.sra.DownloadSraProjectRunInfo-rerun
+complete -c luigi-wrapper -f -l rnaseq-pipeline.sources.sra.DownloadSraProjectRunInfo-srp -r -d 'SRA project identifier'
+complete -c luigi-wrapper -f -l rnaseq-pipeline.sources.sra.DownloadSraProject-metadata -r
+complete -c luigi-wrapper -f -l rnaseq-pipeline.sources.sra.DownloadSraProject-rerun
+complete -c luigi-wrapper -f -l rnaseq-pipeline.sources.sra.DownloadSraProject-srp -r -d 'SRA project identifier'
+complete -c luigi-wrapper -f -l rnaseq-pipeline.sources.sra.DownloadSraProject-ignored-samples -r -d 'Ignored SRX identifiers'
+complete -c luigi-wrapper -f -l rnaseq-pipeline.sources.sra.ExtractSraProjectBatchInfo-metadata -r
+complete -c luigi-wrapper -f -l rnaseq-pipeline.sources.sra.ExtractSraProjectBatchInfo-rerun
+complete -c luigi-wrapper -f -l rnaseq-pipeline.sources.sra.ExtractSraProjectBatchInfo-srp -r -d 'SRA project identifier'
+complete -c luigi-wrapper -f -l rnaseq-pipeline.sources.sra.ExtractSraProjectBatchInfo-ignored-samples -r -d 'Ignored SRX identifiers'
+complete -c luigi-wrapper -f -l rnaseq-pipeline.sources.geo.DownloadGeoSampleMetadata-metadata -r
+complete -c luigi-wrapper -f -l rnaseq-pipeline.sources.geo.DownloadGeoSampleMetadata-rerun
+complete -c luigi-wrapper -f -l rnaseq-pipeline.sources.geo.DownloadGeoSampleMetadata-gsm -r
+complete -c luigi-wrapper -f -l rnaseq-pipeline.sources.geo.DownloadGeoSample-metadata -r
+complete -c luigi-wrapper -f -l rnaseq-pipeline.sources.geo.DownloadGeoSample-rerun
+complete -c luigi-wrapper -f -l rnaseq-pipeline.sources.geo.DownloadGeoSample-gsm -r
+complete -c luigi-wrapper -f -l rnaseq-pipeline.sources.geo.DownloadGeoSeriesMetadata-metadata -r
+complete -c luigi-wrapper -f -l rnaseq-pipeline.sources.geo.DownloadGeoSeriesMetadata-rerun
+complete -c luigi-wrapper -f -l rnaseq-pipeline.sources.geo.DownloadGeoSeriesMetadata-gse -r
+complete -c luigi-wrapper -f -l rnaseq-pipeline.sources.geo.DownloadGeoSeries-metadata -r
+complete -c luigi-wrapper -f -l rnaseq-pipeline.sources.geo.DownloadGeoSeries-rerun
+complete -c luigi-wrapper -f -l rnaseq-pipeline.sources.geo.DownloadGeoSeries-gse -r
+complete -c luigi-wrapper -f -l rnaseq-pipeline.sources.geo.DownloadGeoSeries-ignored-samples -r -d 'Ignored GSM identifiers'
+complete -c luigi-wrapper -f -l rnaseq-pipeline.sources.geo.ExtractGeoSeriesBatchInfo-metadata -r
+complete -c luigi-wrapper -f -l rnaseq-pipeline.sources.geo.ExtractGeoSeriesBatchInfo-rerun
+complete -c luigi-wrapper -f -l rnaseq-pipeline.sources.geo.ExtractGeoSeriesBatchInfo-gse -r
+complete -c luigi-wrapper -f -l rnaseq-pipeline.sources.geo.ExtractGeoSeriesBatchInfo-ignored-samples -r -d 'Ignored GSM identifiers'
+complete -c luigi-wrapper -f -l rnaseq-pipeline.sources.gemma.DownloadGemmaExperiment-experiment-id -r
+complete -c luigi-wrapper -f -l rnaseq-pipeline.sources.gemma.ExtractGemmaExperimentBatchInfo-experiment-id -r
+complete -c luigi-wrapper -f -l rnaseq-pipeline.sources.local.DownloadLocalSample-experiment-id -r
+complete -c luigi-wrapper -f -l rnaseq-pipeline.sources.local.DownloadLocalSample-sample-id -r
+complete -c luigi-wrapper -f -l rnaseq-pipeline.sources.local.DownloadLocalExperiment-experiment-id -r
+complete -c luigi-wrapper -f -l rnaseq-pipeline.tasks.DownloadSample-experiment-id -r
+complete -c luigi-wrapper -f -l rnaseq-pipeline.tasks.DownloadSample-sample-id -r
+complete -c luigi-wrapper -f -l rnaseq-pipeline.tasks.DownloadSample-source -r -d 'Choices: {arrayexpress, local, gemma, sra, geo}'
+complete -c luigi-wrapper -f -l rnaseq-pipeline.tasks.DownloadExperiment-experiment-id -r
+complete -c luigi-wrapper -f -l rnaseq-pipeline.tasks.DownloadExperiment-source -r -d 'Choices: {arrayexpress, local, gemma, sra, geo}'
+complete -c luigi-wrapper -f -l rnaseq-pipeline.tasks.TrimSample-experiment-id -r
+complete -c luigi-wrapper -f -l rnaseq-pipeline.tasks.TrimSample-sample-id -r
+complete -c luigi-wrapper -f -l rnaseq-pipeline.tasks.TrimSample-source -r -d 'Choices: {arrayexpress, local, gemma, sra, geo}'
+complete -c luigi-wrapper -f -l rnaseq-pipeline.tasks.TrimSample-ignore-mate -r -d 'Choices: {forward, reverse, neither}'
+complete -c luigi-wrapper -f -l rnaseq-pipeline.tasks.TrimSample-minimum-length -r
+complete -c luigi-wrapper -f -l rnaseq-pipeline.tasks.TrimExperiment-experiment-id -r
+complete -c luigi-wrapper -f -l rnaseq-pipeline.tasks.TrimExperiment-source -r -d 'Choices: {arrayexpress, local, gemma, sra, geo}'
+complete -c luigi-wrapper -f -l rnaseq-pipeline.tasks.QualityControlSample-experiment-id -r
+complete -c luigi-wrapper -f -l rnaseq-pipeline.tasks.QualityControlSample-sample-id -r
+complete -c luigi-wrapper -f -l rnaseq-pipeline.tasks.QualityControlSample-source -r -d 'Choices: {arrayexpress, local, gemma, sra, geo}'
+complete -c luigi-wrapper -f -l rnaseq-pipeline.tasks.QualityControlSample-ignore-mate -r -d 'Choices: {forward, reverse, neither}'
+complete -c luigi-wrapper -f -l rnaseq-pipeline.tasks.QualityControlSample-minimum-length -r
+complete -c luigi-wrapper -f -l rnaseq-pipeline.tasks.QualityControlExperiment-experiment-id -r
+complete -c luigi-wrapper -f -l rnaseq-pipeline.tasks.QualityControlExperiment-source -r -d 'Choices: {arrayexpress, local, gemma, sra, geo}'
+complete -c luigi-wrapper -f -l rnaseq-pipeline.tasks.PrepareReference-capture-output
+complete -c luigi-wrapper -f -l rnaseq-pipeline.tasks.PrepareReference-stream-for-searching-tracking-url -r -d 'Stream for searching tracking URL Choices: {stderr, none, stdout}'
+complete -c luigi-wrapper -f -l rnaseq-pipeline.tasks.PrepareReference-tracking-url-pattern -r -d 'Regex pattern used for searching URL in the logs of the external program'
+complete -c luigi-wrapper -f -l rnaseq-pipeline.tasks.PrepareReference-scheduler -r -d 'Scheduler to use for running the task Choices: {slurm, local}'
+complete -c luigi-wrapper -f -l rnaseq-pipeline.tasks.PrepareReference-scheduler-partition -r -d 'Scheduler partition (or queue) to use if supported'
+complete -c luigi-wrapper -f -l rnaseq-pipeline.tasks.PrepareReference-scheduler-extra-args -r -d 'Extra arguments to pass to the scheduler'
+complete -c luigi-wrapper -f -l rnaseq-pipeline.tasks.PrepareReference-walltime -r -d 'Amout of time to allocate for the task, default value of zero implies unlimited time'
+complete -c luigi-wrapper -f -l rnaseq-pipeline.tasks.PrepareReference-taxon -r
+complete -c luigi-wrapper -f -l rnaseq-pipeline.tasks.PrepareReference-reference-id -r
+complete -c luigi-wrapper -f -l rnaseq-pipeline.tasks.AlignSample-capture-output
+complete -c luigi-wrapper -f -l rnaseq-pipeline.tasks.AlignSample-stream-for-searching-tracking-url -r -d 'Stream for searching tracking URL Choices: {stderr, none, stdout}'
+complete -c luigi-wrapper -f -l rnaseq-pipeline.tasks.AlignSample-tracking-url-pattern -r -d 'Regex pattern used for searching URL in the logs of the external program'
+complete -c luigi-wrapper -f -l rnaseq-pipeline.tasks.AlignSample-scheduler -r -d 'Scheduler to use for running the task Choices: {slurm, local}'
+complete -c luigi-wrapper -f -l rnaseq-pipeline.tasks.AlignSample-scheduler-partition -r -d 'Scheduler partition (or queue) to use if supported'
+complete -c luigi-wrapper -f -l rnaseq-pipeline.tasks.AlignSample-experiment-id -r
+complete -c luigi-wrapper -f -l rnaseq-pipeline.tasks.AlignSample-sample-id -r
+complete -c luigi-wrapper -f -l rnaseq-pipeline.tasks.AlignSample-source -r -d 'Choices: {arrayexpress, local, gemma, sra, geo}'
+complete -c luigi-wrapper -f -l rnaseq-pipeline.tasks.AlignSample-ignore-mate -r -d 'Choices: {forward, reverse, neither}'
+complete -c luigi-wrapper -f -l rnaseq-pipeline.tasks.AlignSample-minimum-length -r
+complete -c luigi-wrapper -f -l rnaseq-pipeline.tasks.AlignSample-taxon -r
+complete -c luigi-wrapper -f -l rnaseq-pipeline.tasks.AlignSample-reference-id -r
+complete -c luigi-wrapper -f -l rnaseq-pipeline.tasks.AlignSample-strand-specific
+complete -c luigi-wrapper -f -l rnaseq-pipeline.tasks.AlignSample-scope -r
+complete -c luigi-wrapper -f -l rnaseq-pipeline.tasks.AlignExperiment-experiment-id -r
+complete -c luigi-wrapper -f -l rnaseq-pipeline.tasks.AlignExperiment-source -r -d 'Choices: {arrayexpress, local, gemma, sra, geo}'
+complete -c luigi-wrapper -f -l rnaseq-pipeline.tasks.AlignExperiment-taxon -r
+complete -c luigi-wrapper -f -l rnaseq-pipeline.tasks.AlignExperiment-reference-id -r
+complete -c luigi-wrapper -f -l rnaseq-pipeline.tasks.AlignExperiment-scope -r
+complete -c luigi-wrapper -f -l rnaseq-pipeline.tasks.GenerateReportForExperiment-rerun
+complete -c luigi-wrapper -f -l rnaseq-pipeline.tasks.GenerateReportForExperiment-experiment-id -r
+complete -c luigi-wrapper -f -l rnaseq-pipeline.tasks.GenerateReportForExperiment-source -r -d 'Choices: {arrayexpress, local, gemma, sra, geo}'
+complete -c luigi-wrapper -f -l rnaseq-pipeline.tasks.GenerateReportForExperiment-taxon -r
+complete -c luigi-wrapper -f -l rnaseq-pipeline.tasks.GenerateReportForExperiment-reference-id -r
+complete -c luigi-wrapper -f -l rnaseq-pipeline.tasks.GenerateReportForExperiment-scope -r
+complete -c luigi-wrapper -f -l rnaseq-pipeline.tasks.CountExperiment-experiment-id -r
+complete -c luigi-wrapper -f -l rnaseq-pipeline.tasks.CountExperiment-source -r -d 'Choices: {arrayexpress, local, gemma, sra, geo}'
+complete -c luigi-wrapper -f -l rnaseq-pipeline.tasks.CountExperiment-taxon -r
+complete -c luigi-wrapper -f -l rnaseq-pipeline.tasks.CountExperiment-reference-id -r
+complete -c luigi-wrapper -f -l rnaseq-pipeline.tasks.CountExperiment-scope -r
+complete -c luigi-wrapper -f -l rnaseq-pipeline.tasks.SubmitExperiment-rerun
+complete -c luigi-wrapper -f -l rnaseq-pipeline.tasks.SubmitExperiment-experiment-id -r
+complete -c luigi-wrapper -f -l rnaseq-pipeline.tasks.SubmitExperiment-source -r -d 'Choices: {arrayexpress, local, gemma, sra, geo}'
+complete -c luigi-wrapper -f -l rnaseq-pipeline.tasks.SubmitExperiment-taxon -r
+complete -c luigi-wrapper -f -l rnaseq-pipeline.tasks.SubmitExperiment-reference-id -r
+complete -c luigi-wrapper -f -l rnaseq-pipeline.tasks.SubmitExperiment-scope -r
+complete -c luigi-wrapper -f -l rnaseq-pipeline.tasks.SubmitExperiment-priority -r
+complete -c luigi-wrapper -f -l rnaseq-pipeline.tasks.SubmitExperimentBatchInfoToGemma-capture-output
+complete -c luigi-wrapper -f -l rnaseq-pipeline.tasks.SubmitExperimentBatchInfoToGemma-stream-for-searching-tracking-url -r -d 'Stream for searching tracking URL Choices: {stderr, none, stdout}'
+complete -c luigi-wrapper -f -l rnaseq-pipeline.tasks.SubmitExperimentBatchInfoToGemma-tracking-url-pattern -r -d 'Regex pattern used for searching URL in the logs of the external program'
+complete -c luigi-wrapper -f -l rnaseq-pipeline.tasks.SubmitExperimentBatchInfoToGemma-experiment-id -r
+complete -c luigi-wrapper -f -l rnaseq-pipeline.tasks.SubmitExperimentBatchInfoToGemma-rerun
+complete -c luigi-wrapper -f -l rnaseq-pipeline.tasks.SubmitExperimentBatchInfoToGemma-ignored-samples -r
+complete -c luigi-wrapper -f -l rnaseq-pipeline.tasks.SubmitExperimentDataToGemma-capture-output
+complete -c luigi-wrapper -f -l rnaseq-pipeline.tasks.SubmitExperimentDataToGemma-stream-for-searching-tracking-url -r -d 'Stream for searching tracking URL Choices: {stderr, none, stdout}'
+complete -c luigi-wrapper -f -l rnaseq-pipeline.tasks.SubmitExperimentDataToGemma-tracking-url-pattern -r -d 'Regex pattern used for searching URL in the logs of the external program'
+complete -c luigi-wrapper -f -l rnaseq-pipeline.tasks.SubmitExperimentDataToGemma-experiment-id -r
+complete -c luigi-wrapper -f -l rnaseq-pipeline.tasks.SubmitExperimentDataToGemma-rerun
+complete -c luigi-wrapper -f -l rnaseq-pipeline.tasks.SubmitExperimentReportToGemma-rerun
+complete -c luigi-wrapper -f -l rnaseq-pipeline.tasks.SubmitExperimentReportToGemma-experiment-id -r
+complete -c luigi-wrapper -f -l rnaseq-pipeline.tasks.SubmitExperimentReportToGemma-priority -r
+complete -c luigi-wrapper -f -l rnaseq-pipeline.tasks.SubmitExperimentToGemma-capture-output
+complete -c luigi-wrapper -f -l rnaseq-pipeline.tasks.SubmitExperimentToGemma-stream-for-searching-tracking-url -r -d 'Stream for searching tracking URL Choices: {stderr, none, stdout}'
+complete -c luigi-wrapper -f -l rnaseq-pipeline.tasks.SubmitExperimentToGemma-tracking-url-pattern -r -d 'Regex pattern used for searching URL in the logs of the external program'
+complete -c luigi-wrapper -f -l rnaseq-pipeline.tasks.SubmitExperimentToGemma-experiment-id -r
+complete -c luigi-wrapper -f -l rnaseq-pipeline.tasks.SubmitExperimentToGemma-rerun
+complete -c luigi-wrapper -f -l rnaseq-pipeline.tasks.SubmitExperimentToGemma-ignored-samples -r
+complete -c luigi-wrapper -f -l rnaseq-pipeline.tasks.SubmitExperimentToGemma-priority -r
+complete -c luigi-wrapper -f -l rnaseq-pipeline.tasks.SubmitExperimentsFromFileToGemma-ignore-priority -d 'Ignore the priority column and inherit the priority of the this task. Rows with zero priority are nonetheless ignored.'
+complete -c luigi-wrapper -f -l rnaseq-pipeline.tasks.SubmitExperimentsFromFileToGemma-input-file -r
+complete -c luigi-wrapper -f -l rnaseq-pipeline.tasks.SubmitExperimentsFromGoogleSpreadsheetToGemma-ignore-priority -d 'Ignore the priority column and inherit the priority of the this task. Rows with zero priority are nonetheless ignored.'
+complete -c luigi-wrapper -f -l rnaseq-pipeline.tasks.SubmitExperimentsFromGoogleSpreadsheetToGemma-spreadsheet-id -r -d 'Spreadsheet ID in Google Sheets (lookup {spreadsheetId} in https://docs.google.com/spreadsheet s/d/{spreadsheetId}/edit)'
+complete -c luigi-wrapper -f -l rnaseq-pipeline.tasks.SubmitExperimentsFromGoogleSpreadsheetToGemma-sheet-name -r -d 'Name of the spreadsheet in the document'
+complete -c luigi-wrapper -f -l rnaseq-pipeline.tasks.SubmitExperimentsFromGoogleSpreadsheetToGemma-revision-id -r -d 'Revision ID of the spreadsheet (not yet supported, but will default to the latest)'
+complete -c luigi-wrapper -n 'not __fish_seen_subcommand_from rnaseq_pipeline.tasks.AlignExperiment rnaseq_pipeline.tasks.AlignSample rnaseq_pipeline.tasks.CountExperiment rnaseq_pipeline.tasks.DownloadExperiment rnaseq_pipeline.tasks.DownloadSample rnaseq_pipeline.tasks.GenerateReportForExperiment rnaseq_pipeline.tasks.PrepareReference rnaseq_pipeline.tasks.QualityControlExperiment rnaseq_pipeline.tasks.QualityControlSample rnaseq_pipeline.tasks.SubmitExperiment rnaseq_pipeline.tasks.SubmitExperimentBatchInfoToGemma rnaseq_pipeline.tasks.SubmitExperimentDataToGemma rnaseq_pipeline.tasks.SubmitExperimentReportToGemma rnaseq_pipeline.tasks.SubmitExperimentToGemma rnaseq_pipeline.tasks.SubmitExperimentsFromFileToGemma rnaseq_pipeline.tasks.SubmitExperimentsFromGoogleSpreadsheetToGemma rnaseq_pipeline.tasks.TrimExperiment rnaseq_pipeline.tasks.TrimSample' -a 'rnaseq_pipeline.tasks.AlignExperiment rnaseq_pipeline.tasks.AlignSample rnaseq_pipeline.tasks.CountExperiment rnaseq_pipeline.tasks.DownloadExperiment rnaseq_pipeline.tasks.DownloadSample rnaseq_pipeline.tasks.GenerateReportForExperiment rnaseq_pipeline.tasks.PrepareReference rnaseq_pipeline.tasks.QualityControlExperiment rnaseq_pipeline.tasks.QualityControlSample rnaseq_pipeline.tasks.SubmitExperiment rnaseq_pipeline.tasks.SubmitExperimentBatchInfoToGemma rnaseq_pipeline.tasks.SubmitExperimentDataToGemma rnaseq_pipeline.tasks.SubmitExperimentReportToGemma rnaseq_pipeline.tasks.SubmitExperimentToGemma rnaseq_pipeline.tasks.SubmitExperimentsFromFileToGemma rnaseq_pipeline.tasks.SubmitExperimentsFromGoogleSpreadsheetToGemma rnaseq_pipeline.tasks.TrimExperiment rnaseq_pipeline.tasks.TrimSample'
+complete -c luigi-wrapper -n '__fish_seen_subcommand_from rnaseq_pipeline.tasks.AlignExperiment' -f -l local-scheduler -d 'Use an in-memory central scheduler. Useful for testing.'
+complete -c luigi-wrapper -n '__fish_seen_subcommand_from rnaseq_pipeline.tasks.AlignExperiment' -f -l module -r -d 'Used for dynamic loading of modules'
+complete -c luigi-wrapper -n '__fish_seen_subcommand_from rnaseq_pipeline.tasks.AlignExperiment' -f -l help -d 'Show most common flags and all task-specific flags'
+complete -c luigi-wrapper -n '__fish_seen_subcommand_from rnaseq_pipeline.tasks.AlignExperiment' -f -l help-all -d 'Show all command line flags'
+complete -c luigi-wrapper -n '__fish_seen_subcommand_from rnaseq_pipeline.tasks.AlignExperiment' -f -l rnaseq-pipeline.tasks.AlignExperiment-experiment-id -r
+complete -c luigi-wrapper -n '__fish_seen_subcommand_from rnaseq_pipeline.tasks.AlignExperiment' -f -l experiment-id -r
+complete -c luigi-wrapper -n '__fish_seen_subcommand_from rnaseq_pipeline.tasks.AlignExperiment' -f -l rnaseq-pipeline.tasks.AlignExperiment-source -r -d 'Choices: {gemma, local, arrayexpress, geo, sra}'
+complete -c luigi-wrapper -n '__fish_seen_subcommand_from rnaseq_pipeline.tasks.AlignExperiment' -f -l source -r -d 'Choices: {gemma, local, arrayexpress, geo, sra}'
+complete -c luigi-wrapper -n '__fish_seen_subcommand_from rnaseq_pipeline.tasks.AlignExperiment' -f -l rnaseq-pipeline.tasks.AlignExperiment-taxon -r
+complete -c luigi-wrapper -n '__fish_seen_subcommand_from rnaseq_pipeline.tasks.AlignExperiment' -f -l taxon -r
+complete -c luigi-wrapper -n '__fish_seen_subcommand_from rnaseq_pipeline.tasks.AlignExperiment' -f -l rnaseq-pipeline.tasks.AlignExperiment-reference-id -r
+complete -c luigi-wrapper -n '__fish_seen_subcommand_from rnaseq_pipeline.tasks.AlignExperiment' -f -l reference-id -r
+complete -c luigi-wrapper -n '__fish_seen_subcommand_from rnaseq_pipeline.tasks.AlignExperiment' -f -l rnaseq-pipeline.tasks.AlignExperiment-scope -r
+complete -c luigi-wrapper -n '__fish_seen_subcommand_from rnaseq_pipeline.tasks.AlignExperiment' -f -l scope -r
+complete -c luigi-wrapper -n '__fish_seen_subcommand_from rnaseq_pipeline.tasks.AlignSample' -f -l local-scheduler -d 'Use an in-memory central scheduler. Useful for testing.'
+complete -c luigi-wrapper -n '__fish_seen_subcommand_from rnaseq_pipeline.tasks.AlignSample' -f -l module -r -d 'Used for dynamic loading of modules'
+complete -c luigi-wrapper -n '__fish_seen_subcommand_from rnaseq_pipeline.tasks.AlignSample' -f -l help -d 'Show most common flags and all task-specific flags'
+complete -c luigi-wrapper -n '__fish_seen_subcommand_from rnaseq_pipeline.tasks.AlignSample' -f -l help-all -d 'Show all command line flags'
+complete -c luigi-wrapper -n '__fish_seen_subcommand_from rnaseq_pipeline.tasks.AlignSample' -f -l rnaseq-pipeline.tasks.AlignSample-capture-output
+complete -c luigi-wrapper -n '__fish_seen_subcommand_from rnaseq_pipeline.tasks.AlignSample' -f -l capture-output
+complete -c luigi-wrapper -n '__fish_seen_subcommand_from rnaseq_pipeline.tasks.AlignSample' -f -l rnaseq-pipeline.tasks.AlignSample-stream-for-searching-tracking-url -r -d 'Stream for searching tracking URL Choices: {none, stdout, stderr}'
+complete -c luigi-wrapper -n '__fish_seen_subcommand_from rnaseq_pipeline.tasks.AlignSample' -f -l stream-for-searching-tracking-url -r -d 'Stream for searching tracking URL Choices: {none, stdout, stderr}'
+complete -c luigi-wrapper -n '__fish_seen_subcommand_from rnaseq_pipeline.tasks.AlignSample' -f -l rnaseq-pipeline.tasks.AlignSample-tracking-url-pattern -r -d 'Regex pattern used for searching URL in the logs of the external program'
+complete -c luigi-wrapper -n '__fish_seen_subcommand_from rnaseq_pipeline.tasks.AlignSample' -f -l tracking-url-pattern -r -d 'Regex pattern used for searching URL in the logs of the external program'
+complete -c luigi-wrapper -n '__fish_seen_subcommand_from rnaseq_pipeline.tasks.AlignSample' -f -l rnaseq-pipeline.tasks.AlignSample-scheduler -r -d 'Scheduler to use for running the task Choices: {slurm, local}'
+complete -c luigi-wrapper -n '__fish_seen_subcommand_from rnaseq_pipeline.tasks.AlignSample' -f -l scheduler -r -d 'Scheduler to use for running the task Choices: {slurm, local}'
+complete -c luigi-wrapper -n '__fish_seen_subcommand_from rnaseq_pipeline.tasks.AlignSample' -f -l rnaseq-pipeline.tasks.AlignSample-scheduler-partition -r -d 'Scheduler partition (or queue) to use if supported'
+complete -c luigi-wrapper -n '__fish_seen_subcommand_from rnaseq_pipeline.tasks.AlignSample' -f -l scheduler-partition -r -d 'Scheduler partition (or queue) to use if supported'
+complete -c luigi-wrapper -n '__fish_seen_subcommand_from rnaseq_pipeline.tasks.AlignSample' -f -l rnaseq-pipeline.tasks.AlignSample-experiment-id -r
+complete -c luigi-wrapper -n '__fish_seen_subcommand_from rnaseq_pipeline.tasks.AlignSample' -f -l experiment-id -r
+complete -c luigi-wrapper -n '__fish_seen_subcommand_from rnaseq_pipeline.tasks.AlignSample' -f -l rnaseq-pipeline.tasks.AlignSample-sample-id -r
+complete -c luigi-wrapper -n '__fish_seen_subcommand_from rnaseq_pipeline.tasks.AlignSample' -f -l sample-id -r
+complete -c luigi-wrapper -n '__fish_seen_subcommand_from rnaseq_pipeline.tasks.AlignSample' -f -l rnaseq-pipeline.tasks.AlignSample-source -r -d 'Choices: {gemma, local, arrayexpress, geo, sra}'
+complete -c luigi-wrapper -n '__fish_seen_subcommand_from rnaseq_pipeline.tasks.AlignSample' -f -l source -r -d 'Choices: {gemma, local, arrayexpress, geo, sra}'
+complete -c luigi-wrapper -n '__fish_seen_subcommand_from rnaseq_pipeline.tasks.AlignSample' -f -l rnaseq-pipeline.tasks.AlignSample-ignore-mate -r -d 'Choices: {reverse, neither, forward}'
+complete -c luigi-wrapper -n '__fish_seen_subcommand_from rnaseq_pipeline.tasks.AlignSample' -f -l ignore-mate -r -d 'Choices: {reverse, neither, forward}'
+complete -c luigi-wrapper -n '__fish_seen_subcommand_from rnaseq_pipeline.tasks.AlignSample' -f -l rnaseq-pipeline.tasks.AlignSample-minimum-length -r
+complete -c luigi-wrapper -n '__fish_seen_subcommand_from rnaseq_pipeline.tasks.AlignSample' -f -l minimum-length -r
+complete -c luigi-wrapper -n '__fish_seen_subcommand_from rnaseq_pipeline.tasks.AlignSample' -f -l rnaseq-pipeline.tasks.AlignSample-taxon -r
+complete -c luigi-wrapper -n '__fish_seen_subcommand_from rnaseq_pipeline.tasks.AlignSample' -f -l taxon -r
+complete -c luigi-wrapper -n '__fish_seen_subcommand_from rnaseq_pipeline.tasks.AlignSample' -f -l rnaseq-pipeline.tasks.AlignSample-reference-id -r
+complete -c luigi-wrapper -n '__fish_seen_subcommand_from rnaseq_pipeline.tasks.AlignSample' -f -l reference-id -r
+complete -c luigi-wrapper -n '__fish_seen_subcommand_from rnaseq_pipeline.tasks.AlignSample' -f -l rnaseq-pipeline.tasks.AlignSample-strand-specific
+complete -c luigi-wrapper -n '__fish_seen_subcommand_from rnaseq_pipeline.tasks.AlignSample' -f -l strand-specific
+complete -c luigi-wrapper -n '__fish_seen_subcommand_from rnaseq_pipeline.tasks.AlignSample' -f -l rnaseq-pipeline.tasks.AlignSample-scope -r
+complete -c luigi-wrapper -n '__fish_seen_subcommand_from rnaseq_pipeline.tasks.AlignSample' -f -l scope -r
+complete -c luigi-wrapper -n '__fish_seen_subcommand_from rnaseq_pipeline.tasks.CountExperiment' -f -l local-scheduler -d 'Use an in-memory central scheduler. Useful for testing.'
+complete -c luigi-wrapper -n '__fish_seen_subcommand_from rnaseq_pipeline.tasks.CountExperiment' -f -l module -r -d 'Used for dynamic loading of modules'
+complete -c luigi-wrapper -n '__fish_seen_subcommand_from rnaseq_pipeline.tasks.CountExperiment' -f -l help -d 'Show most common flags and all task-specific flags'
+complete -c luigi-wrapper -n '__fish_seen_subcommand_from rnaseq_pipeline.tasks.CountExperiment' -f -l help-all -d 'Show all command line flags'
+complete -c luigi-wrapper -n '__fish_seen_subcommand_from rnaseq_pipeline.tasks.CountExperiment' -f -l rnaseq-pipeline.tasks.CountExperiment-experiment-id -r
+complete -c luigi-wrapper -n '__fish_seen_subcommand_from rnaseq_pipeline.tasks.CountExperiment' -f -l experiment-id -r
+complete -c luigi-wrapper -n '__fish_seen_subcommand_from rnaseq_pipeline.tasks.CountExperiment' -f -l rnaseq-pipeline.tasks.CountExperiment-source -r -d 'Choices: {arrayexpress, local, gemma, geo, sra}'
+complete -c luigi-wrapper -n '__fish_seen_subcommand_from rnaseq_pipeline.tasks.CountExperiment' -f -l source -r -d 'Choices: {arrayexpress, local, gemma, geo, sra}'
+complete -c luigi-wrapper -n '__fish_seen_subcommand_from rnaseq_pipeline.tasks.CountExperiment' -f -l rnaseq-pipeline.tasks.CountExperiment-taxon -r
+complete -c luigi-wrapper -n '__fish_seen_subcommand_from rnaseq_pipeline.tasks.CountExperiment' -f -l taxon -r
+complete -c luigi-wrapper -n '__fish_seen_subcommand_from rnaseq_pipeline.tasks.CountExperiment' -f -l rnaseq-pipeline.tasks.CountExperiment-reference-id -r
+complete -c luigi-wrapper -n '__fish_seen_subcommand_from rnaseq_pipeline.tasks.CountExperiment' -f -l reference-id -r
+complete -c luigi-wrapper -n '__fish_seen_subcommand_from rnaseq_pipeline.tasks.CountExperiment' -f -l rnaseq-pipeline.tasks.CountExperiment-scope -r
+complete -c luigi-wrapper -n '__fish_seen_subcommand_from rnaseq_pipeline.tasks.CountExperiment' -f -l scope -r
+complete -c luigi-wrapper -n '__fish_seen_subcommand_from rnaseq_pipeline.tasks.DownloadExperiment' -f -l local-scheduler -d 'Use an in-memory central scheduler. Useful for testing.'
+complete -c luigi-wrapper -n '__fish_seen_subcommand_from rnaseq_pipeline.tasks.DownloadExperiment' -f -l module -r -d 'Used for dynamic loading of modules'
+complete -c luigi-wrapper -n '__fish_seen_subcommand_from rnaseq_pipeline.tasks.DownloadExperiment' -f -l help -d 'Show most common flags and all task-specific flags'
+complete -c luigi-wrapper -n '__fish_seen_subcommand_from rnaseq_pipeline.tasks.DownloadExperiment' -f -l help-all -d 'Show all command line flags'
+complete -c luigi-wrapper -n '__fish_seen_subcommand_from rnaseq_pipeline.tasks.DownloadExperiment' -f -l rnaseq-pipeline.tasks.DownloadExperiment-experiment-id -r
+complete -c luigi-wrapper -n '__fish_seen_subcommand_from rnaseq_pipeline.tasks.DownloadExperiment' -f -l experiment-id -r
+complete -c luigi-wrapper -n '__fish_seen_subcommand_from rnaseq_pipeline.tasks.DownloadExperiment' -f -l rnaseq-pipeline.tasks.DownloadExperiment-source -r -d 'Choices: {local, sra, arrayexpress, geo, gemma}'
+complete -c luigi-wrapper -n '__fish_seen_subcommand_from rnaseq_pipeline.tasks.DownloadExperiment' -f -l source -r -d 'Choices: {local, sra, arrayexpress, geo, gemma}'
+complete -c luigi-wrapper -n '__fish_seen_subcommand_from rnaseq_pipeline.tasks.DownloadSample' -f -l local-scheduler -d 'Use an in-memory central scheduler. Useful for testing.'
+complete -c luigi-wrapper -n '__fish_seen_subcommand_from rnaseq_pipeline.tasks.DownloadSample' -f -l module -r -d 'Used for dynamic loading of modules'
+complete -c luigi-wrapper -n '__fish_seen_subcommand_from rnaseq_pipeline.tasks.DownloadSample' -f -l help -d 'Show most common flags and all task-specific flags'
+complete -c luigi-wrapper -n '__fish_seen_subcommand_from rnaseq_pipeline.tasks.DownloadSample' -f -l help-all -d 'Show all command line flags'
+complete -c luigi-wrapper -n '__fish_seen_subcommand_from rnaseq_pipeline.tasks.DownloadSample' -f -l rnaseq-pipeline.tasks.DownloadSample-experiment-id -r
+complete -c luigi-wrapper -n '__fish_seen_subcommand_from rnaseq_pipeline.tasks.DownloadSample' -f -l experiment-id -r
+complete -c luigi-wrapper -n '__fish_seen_subcommand_from rnaseq_pipeline.tasks.DownloadSample' -f -l rnaseq-pipeline.tasks.DownloadSample-sample-id -r
+complete -c luigi-wrapper -n '__fish_seen_subcommand_from rnaseq_pipeline.tasks.DownloadSample' -f -l sample-id -r
+complete -c luigi-wrapper -n '__fish_seen_subcommand_from rnaseq_pipeline.tasks.DownloadSample' -f -l rnaseq-pipeline.tasks.DownloadSample-source -r -d 'Choices: {sra, geo, arrayexpress, gemma, local}'
+complete -c luigi-wrapper -n '__fish_seen_subcommand_from rnaseq_pipeline.tasks.DownloadSample' -f -l source -r -d 'Choices: {sra, geo, arrayexpress, gemma, local}'
+complete -c luigi-wrapper -n '__fish_seen_subcommand_from rnaseq_pipeline.tasks.GenerateReportForExperiment' -f -l local-scheduler -d 'Use an in-memory central scheduler. Useful for testing.'
+complete -c luigi-wrapper -n '__fish_seen_subcommand_from rnaseq_pipeline.tasks.GenerateReportForExperiment' -f -l module -r -d 'Used for dynamic loading of modules'
+complete -c luigi-wrapper -n '__fish_seen_subcommand_from rnaseq_pipeline.tasks.GenerateReportForExperiment' -f -l help -d 'Show most common flags and all task-specific flags'
+complete -c luigi-wrapper -n '__fish_seen_subcommand_from rnaseq_pipeline.tasks.GenerateReportForExperiment' -f -l help-all -d 'Show all command line flags'
+complete -c luigi-wrapper -n '__fish_seen_subcommand_from rnaseq_pipeline.tasks.GenerateReportForExperiment' -f -l rnaseq-pipeline.tasks.GenerateReportForExperiment-rerun
+complete -c luigi-wrapper -n '__fish_seen_subcommand_from rnaseq_pipeline.tasks.GenerateReportForExperiment' -f -l rerun
+complete -c luigi-wrapper -n '__fish_seen_subcommand_from rnaseq_pipeline.tasks.GenerateReportForExperiment' -f -l rnaseq-pipeline.tasks.GenerateReportForExperiment-experiment-id -r
+complete -c luigi-wrapper -n '__fish_seen_subcommand_from rnaseq_pipeline.tasks.GenerateReportForExperiment' -f -l experiment-id -r
+complete -c luigi-wrapper -n '__fish_seen_subcommand_from rnaseq_pipeline.tasks.GenerateReportForExperiment' -f -l rnaseq-pipeline.tasks.GenerateReportForExperiment-source -r -d 'Choices: {local, arrayexpress, gemma, sra, geo}'
+complete -c luigi-wrapper -n '__fish_seen_subcommand_from rnaseq_pipeline.tasks.GenerateReportForExperiment' -f -l source -r -d 'Choices: {local, arrayexpress, gemma, sra, geo}'
+complete -c luigi-wrapper -n '__fish_seen_subcommand_from rnaseq_pipeline.tasks.GenerateReportForExperiment' -f -l rnaseq-pipeline.tasks.GenerateReportForExperiment-taxon -r
+complete -c luigi-wrapper -n '__fish_seen_subcommand_from rnaseq_pipeline.tasks.GenerateReportForExperiment' -f -l taxon -r
+complete -c luigi-wrapper -n '__fish_seen_subcommand_from rnaseq_pipeline.tasks.GenerateReportForExperiment' -f -l rnaseq-pipeline.tasks.GenerateReportForExperiment-reference-id -r
+complete -c luigi-wrapper -n '__fish_seen_subcommand_from rnaseq_pipeline.tasks.GenerateReportForExperiment' -f -l reference-id -r
+complete -c luigi-wrapper -n '__fish_seen_subcommand_from rnaseq_pipeline.tasks.GenerateReportForExperiment' -f -l rnaseq-pipeline.tasks.GenerateReportForExperiment-scope -r
+complete -c luigi-wrapper -n '__fish_seen_subcommand_from rnaseq_pipeline.tasks.GenerateReportForExperiment' -f -l scope -r
+complete -c luigi-wrapper -n '__fish_seen_subcommand_from rnaseq_pipeline.tasks.PrepareReference' -f -l local-scheduler -d 'Use an in-memory central scheduler. Useful for testing.'
+complete -c luigi-wrapper -n '__fish_seen_subcommand_from rnaseq_pipeline.tasks.PrepareReference' -f -l module -r -d 'Used for dynamic loading of modules'
+complete -c luigi-wrapper -n '__fish_seen_subcommand_from rnaseq_pipeline.tasks.PrepareReference' -f -l help -d 'Show most common flags and all task-specific flags'
+complete -c luigi-wrapper -n '__fish_seen_subcommand_from rnaseq_pipeline.tasks.PrepareReference' -f -l help-all -d 'Show all command line flags'
+complete -c luigi-wrapper -n '__fish_seen_subcommand_from rnaseq_pipeline.tasks.PrepareReference' -f -l rnaseq-pipeline.tasks.PrepareReference-capture-output
+complete -c luigi-wrapper -n '__fish_seen_subcommand_from rnaseq_pipeline.tasks.PrepareReference' -f -l capture-output
+complete -c luigi-wrapper -n '__fish_seen_subcommand_from rnaseq_pipeline.tasks.PrepareReference' -f -l rnaseq-pipeline.tasks.PrepareReference-stream-for-searching-tracking-url -r -d 'Stream for searching tracking URL Choices: {stdout, stderr, none}'
+complete -c luigi-wrapper -n '__fish_seen_subcommand_from rnaseq_pipeline.tasks.PrepareReference' -f -l stream-for-searching-tracking-url -r -d 'Stream for searching tracking URL Choices: {stdout, stderr, none}'
+complete -c luigi-wrapper -n '__fish_seen_subcommand_from rnaseq_pipeline.tasks.PrepareReference' -f -l rnaseq-pipeline.tasks.PrepareReference-tracking-url-pattern -r -d 'Regex pattern used for searching URL in the logs of the external program'
+complete -c luigi-wrapper -n '__fish_seen_subcommand_from rnaseq_pipeline.tasks.PrepareReference' -f -l tracking-url-pattern -r -d 'Regex pattern used for searching URL in the logs of the external program'
+complete -c luigi-wrapper -n '__fish_seen_subcommand_from rnaseq_pipeline.tasks.PrepareReference' -f -l rnaseq-pipeline.tasks.PrepareReference-scheduler -r -d 'Scheduler to use for running the task Choices: {slurm, local}'
+complete -c luigi-wrapper -n '__fish_seen_subcommand_from rnaseq_pipeline.tasks.PrepareReference' -f -l scheduler -r -d 'Scheduler to use for running the task Choices: {slurm, local}'
+complete -c luigi-wrapper -n '__fish_seen_subcommand_from rnaseq_pipeline.tasks.PrepareReference' -f -l rnaseq-pipeline.tasks.PrepareReference-scheduler-partition -r -d 'Scheduler partition (or queue) to use if supported'
+complete -c luigi-wrapper -n '__fish_seen_subcommand_from rnaseq_pipeline.tasks.PrepareReference' -f -l scheduler-partition -r -d 'Scheduler partition (or queue) to use if supported'
+complete -c luigi-wrapper -n '__fish_seen_subcommand_from rnaseq_pipeline.tasks.PrepareReference' -f -l rnaseq-pipeline.tasks.PrepareReference-scheduler-extra-args -r -d 'Extra arguments to pass to the scheduler'
+complete -c luigi-wrapper -n '__fish_seen_subcommand_from rnaseq_pipeline.tasks.PrepareReference' -f -l scheduler-extra-args -r -d 'Extra arguments to pass to the scheduler'
+complete -c luigi-wrapper -n '__fish_seen_subcommand_from rnaseq_pipeline.tasks.PrepareReference' -f -l rnaseq-pipeline.tasks.PrepareReference-walltime -r -d 'Amout of time to allocate for the task, default value of zero implies unlimited time'
+complete -c luigi-wrapper -n '__fish_seen_subcommand_from rnaseq_pipeline.tasks.PrepareReference' -f -l walltime -r -d 'Amout of time to allocate for the task, default value of zero implies unlimited time'
+complete -c luigi-wrapper -n '__fish_seen_subcommand_from rnaseq_pipeline.tasks.PrepareReference' -f -l rnaseq-pipeline.tasks.PrepareReference-taxon -r
+complete -c luigi-wrapper -n '__fish_seen_subcommand_from rnaseq_pipeline.tasks.PrepareReference' -f -l taxon -r
+complete -c luigi-wrapper -n '__fish_seen_subcommand_from rnaseq_pipeline.tasks.PrepareReference' -f -l rnaseq-pipeline.tasks.PrepareReference-reference-id -r
+complete -c luigi-wrapper -n '__fish_seen_subcommand_from rnaseq_pipeline.tasks.PrepareReference' -f -l reference-id -r
+complete -c luigi-wrapper -n '__fish_seen_subcommand_from rnaseq_pipeline.tasks.QualityControlExperiment' -f -l local-scheduler -d 'Use an in-memory central scheduler. Useful for testing.'
+complete -c luigi-wrapper -n '__fish_seen_subcommand_from rnaseq_pipeline.tasks.QualityControlExperiment' -f -l module -r -d 'Used for dynamic loading of modules'
+complete -c luigi-wrapper -n '__fish_seen_subcommand_from rnaseq_pipeline.tasks.QualityControlExperiment' -f -l help -d 'Show most common flags and all task-specific flags'
+complete -c luigi-wrapper -n '__fish_seen_subcommand_from rnaseq_pipeline.tasks.QualityControlExperiment' -f -l help-all -d 'Show all command line flags'
+complete -c luigi-wrapper -n '__fish_seen_subcommand_from rnaseq_pipeline.tasks.QualityControlExperiment' -f -l rnaseq-pipeline.tasks.QualityControlExperiment-experiment-id -r
+complete -c luigi-wrapper -n '__fish_seen_subcommand_from rnaseq_pipeline.tasks.QualityControlExperiment' -f -l experiment-id -r
+complete -c luigi-wrapper -n '__fish_seen_subcommand_from rnaseq_pipeline.tasks.QualityControlExperiment' -f -l rnaseq-pipeline.tasks.QualityControlExperiment-source -r -d 'Choices: {geo, gemma, local, sra, arrayexpress}'
+complete -c luigi-wrapper -n '__fish_seen_subcommand_from rnaseq_pipeline.tasks.QualityControlExperiment' -f -l source -r -d 'Choices: {geo, gemma, local, sra, arrayexpress}'
+complete -c luigi-wrapper -n '__fish_seen_subcommand_from rnaseq_pipeline.tasks.QualityControlSample' -f -l local-scheduler -d 'Use an in-memory central scheduler. Useful for testing.'
+complete -c luigi-wrapper -n '__fish_seen_subcommand_from rnaseq_pipeline.tasks.QualityControlSample' -f -l module -r -d 'Used for dynamic loading of modules'
+complete -c luigi-wrapper -n '__fish_seen_subcommand_from rnaseq_pipeline.tasks.QualityControlSample' -f -l help -d 'Show most common flags and all task-specific flags'
+complete -c luigi-wrapper -n '__fish_seen_subcommand_from rnaseq_pipeline.tasks.QualityControlSample' -f -l help-all -d 'Show all command line flags'
+complete -c luigi-wrapper -n '__fish_seen_subcommand_from rnaseq_pipeline.tasks.QualityControlSample' -f -l rnaseq-pipeline.tasks.QualityControlSample-experiment-id -r
+complete -c luigi-wrapper -n '__fish_seen_subcommand_from rnaseq_pipeline.tasks.QualityControlSample' -f -l experiment-id -r
+complete -c luigi-wrapper -n '__fish_seen_subcommand_from rnaseq_pipeline.tasks.QualityControlSample' -f -l rnaseq-pipeline.tasks.QualityControlSample-sample-id -r
+complete -c luigi-wrapper -n '__fish_seen_subcommand_from rnaseq_pipeline.tasks.QualityControlSample' -f -l sample-id -r
+complete -c luigi-wrapper -n '__fish_seen_subcommand_from rnaseq_pipeline.tasks.QualityControlSample' -f -l rnaseq-pipeline.tasks.QualityControlSample-source -r -d 'Choices: {geo, gemma, arrayexpress, local, sra}'
+complete -c luigi-wrapper -n '__fish_seen_subcommand_from rnaseq_pipeline.tasks.QualityControlSample' -f -l source -r -d 'Choices: {geo, gemma, arrayexpress, local, sra}'
+complete -c luigi-wrapper -n '__fish_seen_subcommand_from rnaseq_pipeline.tasks.QualityControlSample' -f -l rnaseq-pipeline.tasks.QualityControlSample-ignore-mate -r -d 'Choices: {forward, neither, reverse}'
+complete -c luigi-wrapper -n '__fish_seen_subcommand_from rnaseq_pipeline.tasks.QualityControlSample' -f -l ignore-mate -r -d 'Choices: {forward, neither, reverse}'
+complete -c luigi-wrapper -n '__fish_seen_subcommand_from rnaseq_pipeline.tasks.QualityControlSample' -f -l rnaseq-pipeline.tasks.QualityControlSample-minimum-length -r
+complete -c luigi-wrapper -n '__fish_seen_subcommand_from rnaseq_pipeline.tasks.QualityControlSample' -f -l minimum-length -r
+complete -c luigi-wrapper -n '__fish_seen_subcommand_from rnaseq_pipeline.tasks.SubmitExperiment' -f -l local-scheduler -d 'Use an in-memory central scheduler. Useful for testing.'
+complete -c luigi-wrapper -n '__fish_seen_subcommand_from rnaseq_pipeline.tasks.SubmitExperiment' -f -l module -r -d 'Used for dynamic loading of modules'
+complete -c luigi-wrapper -n '__fish_seen_subcommand_from rnaseq_pipeline.tasks.SubmitExperiment' -f -l help -d 'Show most common flags and all task-specific flags'
+complete -c luigi-wrapper -n '__fish_seen_subcommand_from rnaseq_pipeline.tasks.SubmitExperiment' -f -l help-all -d 'Show all command line flags'
+complete -c luigi-wrapper -n '__fish_seen_subcommand_from rnaseq_pipeline.tasks.SubmitExperiment' -f -l rnaseq-pipeline.tasks.SubmitExperiment-rerun
+complete -c luigi-wrapper -n '__fish_seen_subcommand_from rnaseq_pipeline.tasks.SubmitExperiment' -f -l rerun
+complete -c luigi-wrapper -n '__fish_seen_subcommand_from rnaseq_pipeline.tasks.SubmitExperiment' -f -l rnaseq-pipeline.tasks.SubmitExperiment-experiment-id -r
+complete -c luigi-wrapper -n '__fish_seen_subcommand_from rnaseq_pipeline.tasks.SubmitExperiment' -f -l experiment-id -r
+complete -c luigi-wrapper -n '__fish_seen_subcommand_from rnaseq_pipeline.tasks.SubmitExperiment' -f -l rnaseq-pipeline.tasks.SubmitExperiment-source -r -d 'Choices: {geo, sra, gemma, arrayexpress, local}'
+complete -c luigi-wrapper -n '__fish_seen_subcommand_from rnaseq_pipeline.tasks.SubmitExperiment' -f -l source -r -d 'Choices: {geo, sra, gemma, arrayexpress, local}'
+complete -c luigi-wrapper -n '__fish_seen_subcommand_from rnaseq_pipeline.tasks.SubmitExperiment' -f -l rnaseq-pipeline.tasks.SubmitExperiment-taxon -r
+complete -c luigi-wrapper -n '__fish_seen_subcommand_from rnaseq_pipeline.tasks.SubmitExperiment' -f -l taxon -r
+complete -c luigi-wrapper -n '__fish_seen_subcommand_from rnaseq_pipeline.tasks.SubmitExperiment' -f -l rnaseq-pipeline.tasks.SubmitExperiment-reference-id -r
+complete -c luigi-wrapper -n '__fish_seen_subcommand_from rnaseq_pipeline.tasks.SubmitExperiment' -f -l reference-id -r
+complete -c luigi-wrapper -n '__fish_seen_subcommand_from rnaseq_pipeline.tasks.SubmitExperiment' -f -l rnaseq-pipeline.tasks.SubmitExperiment-scope -r
+complete -c luigi-wrapper -n '__fish_seen_subcommand_from rnaseq_pipeline.tasks.SubmitExperiment' -f -l scope -r
+complete -c luigi-wrapper -n '__fish_seen_subcommand_from rnaseq_pipeline.tasks.SubmitExperiment' -f -l rnaseq-pipeline.tasks.SubmitExperiment-priority -r
+complete -c luigi-wrapper -n '__fish_seen_subcommand_from rnaseq_pipeline.tasks.SubmitExperiment' -f -l priority -r
+complete -c luigi-wrapper -n '__fish_seen_subcommand_from rnaseq_pipeline.tasks.SubmitExperimentBatchInfoToGemma' -f -l local-scheduler -d 'Use an in-memory central scheduler. Useful for testing.'
+complete -c luigi-wrapper -n '__fish_seen_subcommand_from rnaseq_pipeline.tasks.SubmitExperimentBatchInfoToGemma' -f -l module -r -d 'Used for dynamic loading of modules'
+complete -c luigi-wrapper -n '__fish_seen_subcommand_from rnaseq_pipeline.tasks.SubmitExperimentBatchInfoToGemma' -f -l help -d 'Show most common flags and all task-specific flags'
+complete -c luigi-wrapper -n '__fish_seen_subcommand_from rnaseq_pipeline.tasks.SubmitExperimentBatchInfoToGemma' -f -l help-all -d 'Show all command line flags'
+complete -c luigi-wrapper -n '__fish_seen_subcommand_from rnaseq_pipeline.tasks.SubmitExperimentBatchInfoToGemma' -f -l rnaseq-pipeline.tasks.SubmitExperimentBatchInfoToGemma-capture-output
+complete -c luigi-wrapper -n '__fish_seen_subcommand_from rnaseq_pipeline.tasks.SubmitExperimentBatchInfoToGemma' -f -l capture-output
+complete -c luigi-wrapper -n '__fish_seen_subcommand_from rnaseq_pipeline.tasks.SubmitExperimentBatchInfoToGemma' -f -l rnaseq-pipeline.tasks.SubmitExperimentBatchInfoToGemma-stream-for-searching-tracking-url -r -d 'Stream for searching tracking URL Choices: {none, stdout, stderr}'
+complete -c luigi-wrapper -n '__fish_seen_subcommand_from rnaseq_pipeline.tasks.SubmitExperimentBatchInfoToGemma' -f -l stream-for-searching-tracking-url -r -d 'Stream for searching tracking URL Choices: {none, stdout, stderr}'
+complete -c luigi-wrapper -n '__fish_seen_subcommand_from rnaseq_pipeline.tasks.SubmitExperimentBatchInfoToGemma' -f -l rnaseq-pipeline.tasks.SubmitExperimentBatchInfoToGemma-tracking-url-pattern -r -d 'Regex pattern used for searching URL in the logs of the external program'
+complete -c luigi-wrapper -n '__fish_seen_subcommand_from rnaseq_pipeline.tasks.SubmitExperimentBatchInfoToGemma' -f -l tracking-url-pattern -r -d 'Regex pattern used for searching URL in the logs of the external program'
+complete -c luigi-wrapper -n '__fish_seen_subcommand_from rnaseq_pipeline.tasks.SubmitExperimentBatchInfoToGemma' -f -l rnaseq-pipeline.tasks.SubmitExperimentBatchInfoToGemma-experiment-id -r
+complete -c luigi-wrapper -n '__fish_seen_subcommand_from rnaseq_pipeline.tasks.SubmitExperimentBatchInfoToGemma' -f -l experiment-id -r
+complete -c luigi-wrapper -n '__fish_seen_subcommand_from rnaseq_pipeline.tasks.SubmitExperimentBatchInfoToGemma' -f -l rnaseq-pipeline.tasks.SubmitExperimentBatchInfoToGemma-rerun
+complete -c luigi-wrapper -n '__fish_seen_subcommand_from rnaseq_pipeline.tasks.SubmitExperimentBatchInfoToGemma' -f -l rerun
+complete -c luigi-wrapper -n '__fish_seen_subcommand_from rnaseq_pipeline.tasks.SubmitExperimentBatchInfoToGemma' -f -l rnaseq-pipeline.tasks.SubmitExperimentBatchInfoToGemma-ignored-samples -r
+complete -c luigi-wrapper -n '__fish_seen_subcommand_from rnaseq_pipeline.tasks.SubmitExperimentBatchInfoToGemma' -f -l ignored-samples -r
+complete -c luigi-wrapper -n '__fish_seen_subcommand_from rnaseq_pipeline.tasks.SubmitExperimentDataToGemma' -f -l local-scheduler -d 'Use an in-memory central scheduler. Useful for testing.'
+complete -c luigi-wrapper -n '__fish_seen_subcommand_from rnaseq_pipeline.tasks.SubmitExperimentDataToGemma' -f -l module -r -d 'Used for dynamic loading of modules'
+complete -c luigi-wrapper -n '__fish_seen_subcommand_from rnaseq_pipeline.tasks.SubmitExperimentDataToGemma' -f -l help -d 'Show most common flags and all task-specific flags'
+complete -c luigi-wrapper -n '__fish_seen_subcommand_from rnaseq_pipeline.tasks.SubmitExperimentDataToGemma' -f -l help-all -d 'Show all command line flags'
+complete -c luigi-wrapper -n '__fish_seen_subcommand_from rnaseq_pipeline.tasks.SubmitExperimentDataToGemma' -f -l rnaseq-pipeline.tasks.SubmitExperimentDataToGemma-capture-output
+complete -c luigi-wrapper -n '__fish_seen_subcommand_from rnaseq_pipeline.tasks.SubmitExperimentDataToGemma' -f -l capture-output
+complete -c luigi-wrapper -n '__fish_seen_subcommand_from rnaseq_pipeline.tasks.SubmitExperimentDataToGemma' -f -l rnaseq-pipeline.tasks.SubmitExperimentDataToGemma-stream-for-searching-tracking-url -r -d 'Stream for searching tracking URL Choices: {stdout, none, stderr}'
+complete -c luigi-wrapper -n '__fish_seen_subcommand_from rnaseq_pipeline.tasks.SubmitExperimentDataToGemma' -f -l stream-for-searching-tracking-url -r -d 'Stream for searching tracking URL Choices: {stdout, none, stderr}'
+complete -c luigi-wrapper -n '__fish_seen_subcommand_from rnaseq_pipeline.tasks.SubmitExperimentDataToGemma' -f -l rnaseq-pipeline.tasks.SubmitExperimentDataToGemma-tracking-url-pattern -r -d 'Regex pattern used for searching URL in the logs of the external program'
+complete -c luigi-wrapper -n '__fish_seen_subcommand_from rnaseq_pipeline.tasks.SubmitExperimentDataToGemma' -f -l tracking-url-pattern -r -d 'Regex pattern used for searching URL in the logs of the external program'
+complete -c luigi-wrapper -n '__fish_seen_subcommand_from rnaseq_pipeline.tasks.SubmitExperimentDataToGemma' -f -l rnaseq-pipeline.tasks.SubmitExperimentDataToGemma-experiment-id -r
+complete -c luigi-wrapper -n '__fish_seen_subcommand_from rnaseq_pipeline.tasks.SubmitExperimentDataToGemma' -f -l experiment-id -r
+complete -c luigi-wrapper -n '__fish_seen_subcommand_from rnaseq_pipeline.tasks.SubmitExperimentDataToGemma' -f -l rnaseq-pipeline.tasks.SubmitExperimentDataToGemma-rerun
+complete -c luigi-wrapper -n '__fish_seen_subcommand_from rnaseq_pipeline.tasks.SubmitExperimentDataToGemma' -f -l rerun
+complete -c luigi-wrapper -n '__fish_seen_subcommand_from rnaseq_pipeline.tasks.SubmitExperimentReportToGemma' -f -l local-scheduler -d 'Use an in-memory central scheduler. Useful for testing.'
+complete -c luigi-wrapper -n '__fish_seen_subcommand_from rnaseq_pipeline.tasks.SubmitExperimentReportToGemma' -f -l module -r -d 'Used for dynamic loading of modules'
+complete -c luigi-wrapper -n '__fish_seen_subcommand_from rnaseq_pipeline.tasks.SubmitExperimentReportToGemma' -f -l help -d 'Show most common flags and all task-specific flags'
+complete -c luigi-wrapper -n '__fish_seen_subcommand_from rnaseq_pipeline.tasks.SubmitExperimentReportToGemma' -f -l help-all -d 'Show all command line flags'
+complete -c luigi-wrapper -n '__fish_seen_subcommand_from rnaseq_pipeline.tasks.SubmitExperimentReportToGemma' -f -l rnaseq-pipeline.tasks.SubmitExperimentReportToGemma-rerun
+complete -c luigi-wrapper -n '__fish_seen_subcommand_from rnaseq_pipeline.tasks.SubmitExperimentReportToGemma' -f -l rerun
+complete -c luigi-wrapper -n '__fish_seen_subcommand_from rnaseq_pipeline.tasks.SubmitExperimentReportToGemma' -f -l rnaseq-pipeline.tasks.SubmitExperimentReportToGemma-experiment-id -r
+complete -c luigi-wrapper -n '__fish_seen_subcommand_from rnaseq_pipeline.tasks.SubmitExperimentReportToGemma' -f -l experiment-id -r
+complete -c luigi-wrapper -n '__fish_seen_subcommand_from rnaseq_pipeline.tasks.SubmitExperimentReportToGemma' -f -l rnaseq-pipeline.tasks.SubmitExperimentReportToGemma-priority -r
+complete -c luigi-wrapper -n '__fish_seen_subcommand_from rnaseq_pipeline.tasks.SubmitExperimentReportToGemma' -f -l priority -r
+complete -c luigi-wrapper -n '__fish_seen_subcommand_from rnaseq_pipeline.tasks.SubmitExperimentToGemma' -f -l local-scheduler -d 'Use an in-memory central scheduler. Useful for testing.'
+complete -c luigi-wrapper -n '__fish_seen_subcommand_from rnaseq_pipeline.tasks.SubmitExperimentToGemma' -f -l module -r -d 'Used for dynamic loading of modules'
+complete -c luigi-wrapper -n '__fish_seen_subcommand_from rnaseq_pipeline.tasks.SubmitExperimentToGemma' -f -l help -d 'Show most common flags and all task-specific flags'
+complete -c luigi-wrapper -n '__fish_seen_subcommand_from rnaseq_pipeline.tasks.SubmitExperimentToGemma' -f -l help-all -d 'Show all command line flags'
+complete -c luigi-wrapper -n '__fish_seen_subcommand_from rnaseq_pipeline.tasks.SubmitExperimentToGemma' -f -l rnaseq-pipeline.tasks.SubmitExperimentToGemma-capture-output
+complete -c luigi-wrapper -n '__fish_seen_subcommand_from rnaseq_pipeline.tasks.SubmitExperimentToGemma' -f -l capture-output
+complete -c luigi-wrapper -n '__fish_seen_subcommand_from rnaseq_pipeline.tasks.SubmitExperimentToGemma' -f -l rnaseq-pipeline.tasks.SubmitExperimentToGemma-stream-for-searching-tracking-url -r -d 'Stream for searching tracking URL Choices: {stderr, none, stdout}'
+complete -c luigi-wrapper -n '__fish_seen_subcommand_from rnaseq_pipeline.tasks.SubmitExperimentToGemma' -f -l stream-for-searching-tracking-url -r -d 'Stream for searching tracking URL Choices: {stderr, none, stdout}'
+complete -c luigi-wrapper -n '__fish_seen_subcommand_from rnaseq_pipeline.tasks.SubmitExperimentToGemma' -f -l rnaseq-pipeline.tasks.SubmitExperimentToGemma-tracking-url-pattern -r -d 'Regex pattern used for searching URL in the logs of the external program'
+complete -c luigi-wrapper -n '__fish_seen_subcommand_from rnaseq_pipeline.tasks.SubmitExperimentToGemma' -f -l tracking-url-pattern -r -d 'Regex pattern used for searching URL in the logs of the external program'
+complete -c luigi-wrapper -n '__fish_seen_subcommand_from rnaseq_pipeline.tasks.SubmitExperimentToGemma' -f -l rnaseq-pipeline.tasks.SubmitExperimentToGemma-experiment-id -r
+complete -c luigi-wrapper -n '__fish_seen_subcommand_from rnaseq_pipeline.tasks.SubmitExperimentToGemma' -f -l experiment-id -r
+complete -c luigi-wrapper -n '__fish_seen_subcommand_from rnaseq_pipeline.tasks.SubmitExperimentToGemma' -f -l rnaseq-pipeline.tasks.SubmitExperimentToGemma-rerun
+complete -c luigi-wrapper -n '__fish_seen_subcommand_from rnaseq_pipeline.tasks.SubmitExperimentToGemma' -f -l rerun
+complete -c luigi-wrapper -n '__fish_seen_subcommand_from rnaseq_pipeline.tasks.SubmitExperimentToGemma' -f -l rnaseq-pipeline.tasks.SubmitExperimentToGemma-ignored-samples -r
+complete -c luigi-wrapper -n '__fish_seen_subcommand_from rnaseq_pipeline.tasks.SubmitExperimentToGemma' -f -l ignored-samples -r
+complete -c luigi-wrapper -n '__fish_seen_subcommand_from rnaseq_pipeline.tasks.SubmitExperimentToGemma' -f -l rnaseq-pipeline.tasks.SubmitExperimentToGemma-priority -r
+complete -c luigi-wrapper -n '__fish_seen_subcommand_from rnaseq_pipeline.tasks.SubmitExperimentToGemma' -f -l priority -r
+complete -c luigi-wrapper -n '__fish_seen_subcommand_from rnaseq_pipeline.tasks.SubmitExperimentsFromFileToGemma' -f -l local-scheduler -d 'Use an in-memory central scheduler. Useful for testing.'
+complete -c luigi-wrapper -n '__fish_seen_subcommand_from rnaseq_pipeline.tasks.SubmitExperimentsFromFileToGemma' -f -l module -r -d 'Used for dynamic loading of modules'
+complete -c luigi-wrapper -n '__fish_seen_subcommand_from rnaseq_pipeline.tasks.SubmitExperimentsFromFileToGemma' -f -l help -d 'Show most common flags and all task-specific flags'
+complete -c luigi-wrapper -n '__fish_seen_subcommand_from rnaseq_pipeline.tasks.SubmitExperimentsFromFileToGemma' -f -l help-all -d 'Show all command line flags'
+complete -c luigi-wrapper -n '__fish_seen_subcommand_from rnaseq_pipeline.tasks.SubmitExperimentsFromFileToGemma' -f -l rnaseq-pipeline.tasks.SubmitExperimentsFromFileToGemma-ignore-priority -d 'Ignore the priority column and inherit the priority of the this task. Rows with zero priority are nonetheless ignored.'
+complete -c luigi-wrapper -n '__fish_seen_subcommand_from rnaseq_pipeline.tasks.SubmitExperimentsFromFileToGemma' -f -l ignore-priority -d 'Ignore the priority column and inherit the priority of the this task. Rows with zero priority are nonetheless ignored.'
+complete -c luigi-wrapper -n '__fish_seen_subcommand_from rnaseq_pipeline.tasks.SubmitExperimentsFromFileToGemma' -f -l rnaseq-pipeline.tasks.SubmitExperimentsFromFileToGemma-input-file -r
+complete -c luigi-wrapper -n '__fish_seen_subcommand_from rnaseq_pipeline.tasks.SubmitExperimentsFromFileToGemma' -f -l input-file -r
+complete -c luigi-wrapper -n '__fish_seen_subcommand_from rnaseq_pipeline.tasks.SubmitExperimentsFromGoogleSpreadsheetToGemma' -f -l local-scheduler -d 'Use an in-memory central scheduler. Useful for testing.'
+complete -c luigi-wrapper -n '__fish_seen_subcommand_from rnaseq_pipeline.tasks.SubmitExperimentsFromGoogleSpreadsheetToGemma' -f -l module -r -d 'Used for dynamic loading of modules'
+complete -c luigi-wrapper -n '__fish_seen_subcommand_from rnaseq_pipeline.tasks.SubmitExperimentsFromGoogleSpreadsheetToGemma' -f -l help -d 'Show most common flags and all task-specific flags'
+complete -c luigi-wrapper -n '__fish_seen_subcommand_from rnaseq_pipeline.tasks.SubmitExperimentsFromGoogleSpreadsheetToGemma' -f -l help-all -d 'Show all command line flags'
+complete -c luigi-wrapper -n '__fish_seen_subcommand_from rnaseq_pipeline.tasks.SubmitExperimentsFromGoogleSpreadsheetToGemma' -f -l rnaseq-pipeline.tasks.SubmitExperimentsFromGoogleSpreadsheetToGemma-ignore-priority -d 'Ignore the priority column and inherit the priority of the this task. Rows with zero priority are nonetheless ignored.'
+complete -c luigi-wrapper -n '__fish_seen_subcommand_from rnaseq_pipeline.tasks.SubmitExperimentsFromGoogleSpreadsheetToGemma' -f -l ignore-priority -d 'Ignore the priority column and inherit the priority of the this task. Rows with zero priority are nonetheless ignored.'
+complete -c luigi-wrapper -n '__fish_seen_subcommand_from rnaseq_pipeline.tasks.SubmitExperimentsFromGoogleSpreadsheetToGemma' -f -l rnaseq-pipeline.tasks.SubmitExperimentsFromGoogleSpreadsheetToGemma-spreadsheet-id -r -d 'Spreadsheet ID in Google Sheets (lookup {spreadsheetId} in https://docs.google.com/spreadsheet s/d/{spreadsheetId}/edit)'
+complete -c luigi-wrapper -n '__fish_seen_subcommand_from rnaseq_pipeline.tasks.SubmitExperimentsFromGoogleSpreadsheetToGemma' -f -l spreadsheet-id -r -d 'Spreadsheet ID in Google Sheets (lookup {spreadsheetId} in https://docs.google.com/spreadsheet s/d/{spreadsheetId}/edit)'
+complete -c luigi-wrapper -n '__fish_seen_subcommand_from rnaseq_pipeline.tasks.SubmitExperimentsFromGoogleSpreadsheetToGemma' -f -l rnaseq-pipeline.tasks.SubmitExperimentsFromGoogleSpreadsheetToGemma-sheet-name -r -d 'Name of the spreadsheet in the document'
+complete -c luigi-wrapper -n '__fish_seen_subcommand_from rnaseq_pipeline.tasks.SubmitExperimentsFromGoogleSpreadsheetToGemma' -f -l sheet-name -r -d 'Name of the spreadsheet in the document'
+complete -c luigi-wrapper -n '__fish_seen_subcommand_from rnaseq_pipeline.tasks.SubmitExperimentsFromGoogleSpreadsheetToGemma' -f -l rnaseq-pipeline.tasks.SubmitExperimentsFromGoogleSpreadsheetToGemma-revision-id -r -d 'Revision ID of the spreadsheet (not yet supported, but will default to the latest)'
+complete -c luigi-wrapper -n '__fish_seen_subcommand_from rnaseq_pipeline.tasks.SubmitExperimentsFromGoogleSpreadsheetToGemma' -f -l revision-id -r -d 'Revision ID of the spreadsheet (not yet supported, but will default to the latest)'
+complete -c luigi-wrapper -n '__fish_seen_subcommand_from rnaseq_pipeline.tasks.TrimExperiment' -f -l local-scheduler -d 'Use an in-memory central scheduler. Useful for testing.'
+complete -c luigi-wrapper -n '__fish_seen_subcommand_from rnaseq_pipeline.tasks.TrimExperiment' -f -l module -r -d 'Used for dynamic loading of modules'
+complete -c luigi-wrapper -n '__fish_seen_subcommand_from rnaseq_pipeline.tasks.TrimExperiment' -f -l help -d 'Show most common flags and all task-specific flags'
+complete -c luigi-wrapper -n '__fish_seen_subcommand_from rnaseq_pipeline.tasks.TrimExperiment' -f -l help-all -d 'Show all command line flags'
+complete -c luigi-wrapper -n '__fish_seen_subcommand_from rnaseq_pipeline.tasks.TrimExperiment' -f -l rnaseq-pipeline.tasks.TrimExperiment-experiment-id -r
+complete -c luigi-wrapper -n '__fish_seen_subcommand_from rnaseq_pipeline.tasks.TrimExperiment' -f -l experiment-id -r
+complete -c luigi-wrapper -n '__fish_seen_subcommand_from rnaseq_pipeline.tasks.TrimExperiment' -f -l rnaseq-pipeline.tasks.TrimExperiment-source -r -d 'Choices: {gemma, arrayexpress, local, geo, sra}'
+complete -c luigi-wrapper -n '__fish_seen_subcommand_from rnaseq_pipeline.tasks.TrimExperiment' -f -l source -r -d 'Choices: {gemma, arrayexpress, local, geo, sra}'
+complete -c luigi-wrapper -n '__fish_seen_subcommand_from rnaseq_pipeline.tasks.TrimSample' -f -l local-scheduler -d 'Use an in-memory central scheduler. Useful for testing.'
+complete -c luigi-wrapper -n '__fish_seen_subcommand_from rnaseq_pipeline.tasks.TrimSample' -f -l module -r -d 'Used for dynamic loading of modules'
+complete -c luigi-wrapper -n '__fish_seen_subcommand_from rnaseq_pipeline.tasks.TrimSample' -f -l help -d 'Show most common flags and all task-specific flags'
+complete -c luigi-wrapper -n '__fish_seen_subcommand_from rnaseq_pipeline.tasks.TrimSample' -f -l help-all -d 'Show all command line flags'
+complete -c luigi-wrapper -n '__fish_seen_subcommand_from rnaseq_pipeline.tasks.TrimSample' -f -l rnaseq-pipeline.tasks.TrimSample-experiment-id -r
+complete -c luigi-wrapper -n '__fish_seen_subcommand_from rnaseq_pipeline.tasks.TrimSample' -f -l experiment-id -r
+complete -c luigi-wrapper -n '__fish_seen_subcommand_from rnaseq_pipeline.tasks.TrimSample' -f -l rnaseq-pipeline.tasks.TrimSample-sample-id -r
+complete -c luigi-wrapper -n '__fish_seen_subcommand_from rnaseq_pipeline.tasks.TrimSample' -f -l sample-id -r
+complete -c luigi-wrapper -n '__fish_seen_subcommand_from rnaseq_pipeline.tasks.TrimSample' -f -l rnaseq-pipeline.tasks.TrimSample-source -r -d 'Choices: {local, geo, sra, arrayexpress, gemma}'
+complete -c luigi-wrapper -n '__fish_seen_subcommand_from rnaseq_pipeline.tasks.TrimSample' -f -l source -r -d 'Choices: {local, geo, sra, arrayexpress, gemma}'
+complete -c luigi-wrapper -n '__fish_seen_subcommand_from rnaseq_pipeline.tasks.TrimSample' -f -l rnaseq-pipeline.tasks.TrimSample-ignore-mate -r -d 'Choices: {neither, forward, reverse}'
+complete -c luigi-wrapper -n '__fish_seen_subcommand_from rnaseq_pipeline.tasks.TrimSample' -f -l ignore-mate -r -d 'Choices: {neither, forward, reverse}'
+complete -c luigi-wrapper -n '__fish_seen_subcommand_from rnaseq_pipeline.tasks.TrimSample' -f -l rnaseq-pipeline.tasks.TrimSample-minimum-length -r
+complete -c luigi-wrapper -n '__fish_seen_subcommand_from rnaseq_pipeline.tasks.TrimSample' -f -l minimum-length -r
diff --git a/data/fish/completions/luigi.fish b/data/fish/completions/luigi.fish
new file mode 100644
index 00000000..876a85fd
--- /dev/null
+++ b/data/fish/completions/luigi.fish
@@ -0,0 +1,165 @@
+complete -c luigi -e
+complete -c luigi -f
+complete -c luigi -f -l TestNotificationsTask-raise-in-complete -d 'If true, fail in complete() instead of run()'
+complete -c luigi -f -l email-force-send -d 'Send e-mail even from a tty'
+complete -c luigi -f -l email-format -r -d 'Format type for sent e-mails Choices: {plain, html, none}'
+complete -c luigi -f -l email-method -r -d 'Method for sending e-mail Choices: {smtp, ses, sendgrid, sns}'
+complete -c luigi -f -l email-prefix -r -d 'Prefix for subject lines of all e-mails'
+complete -c luigi -f -l email-receiver -r -d 'Address to send error e-mails to'
+complete -c luigi -f -l email-traceback-max-length -r -d 'Max length for error traceback'
+complete -c luigi -f -l email-sender -r -d 'Address to send e-mails from'
+complete -c luigi -f -l smtp-host -r -d 'Hostname of smtp server'
+complete -c luigi -f -l smtp-local-hostname -r -d 'If specified, local_hostname is used as the FQDN of the local host in the HELO/EHLO command'
+complete -c luigi -f -l smtp-no-tls -d 'Do not use TLS in SMTP connections'
+complete -c luigi -f -l smtp-password -r -d 'Password for the SMTP server login'
+complete -c luigi -f -l smtp-port -r -d 'Port number for smtp server'
+complete -c luigi -f -l smtp-ssl -d 'Use SSL for the SMTP connection.'
+complete -c luigi -f -l smtp-timeout -r -d 'Number of seconds before timing out the smtp connection'
+complete -c luigi -f -l smtp-username -r -d 'Username used to log in to the SMTP host'
+complete -c luigi -f -l sendgrid-apikey -r -d 'API key for SendGrid login'
+complete -c luigi -f -l batch-email-email-interval -r -d 'Number of minutes between e-mail sends (default: 60)'
+complete -c luigi -f -l batch-email-batch-mode -r -d 'Method used for batching failures in e-mail. If "family" all failures for tasks with the same family will be batched. If "unbatched_params", all failures for tasks with the same family and non-batched parameters will be batched. If "all", tasks will only be batched if they have identical names. Choices: {family, all, unbatched_params}'
+complete -c luigi -f -l batch-email-error-lines -r -d 'Number of lines to show from each error message. 0 means show all'
+complete -c luigi -f -l batch-email-error-messages -r -d 'Number of error messages to show for each group'
+complete -c luigi -f -l batch-email-group-by-error-messages -d 'Group items with the same error messages together'
+complete -c luigi -f -l scheduler-retry-delay -r
+complete -c luigi -f -l scheduler-remove-delay -r
+complete -c luigi -f -l scheduler-worker-disconnect-delay -r
+complete -c luigi -f -l scheduler-state-path -r
+complete -c luigi -f -l scheduler-batch-emails -d 'Send e-mails in batches rather than immediately'
+complete -c luigi -f -l scheduler-disable-window -r
+complete -c luigi -f -l scheduler-retry-count -r
+complete -c luigi -f -l scheduler-disable-hard-timeout -r
+complete -c luigi -f -l scheduler-disable-persist -r
+complete -c luigi -f -l scheduler-max-shown-tasks -r
+complete -c luigi -f -l scheduler-max-graph-nodes -r
+complete -c luigi -f -l scheduler-record-task-history
+complete -c luigi -f -l scheduler-prune-on-get-work
+complete -c luigi -f -l scheduler-pause-enabled
+complete -c luigi -f -l scheduler-send-messages
+complete -c luigi -f -l scheduler-metrics-collector -r
+complete -c luigi -f -l scheduler-metrics-custom-import -r
+complete -c luigi -f -l scheduler-stable-done-cooldown-secs -r -d 'Sets cooldown period to avoid running the same task twice'
+complete -c luigi -f -l worker-id -r -d 'Override the auto-generated worker_id'
+complete -c luigi -f -l worker-ping-interval -r
+complete -c luigi -f -l worker-keep-alive
+complete -c luigi -f -l worker-count-uniques -d 'worker-count-uniques means that we will keep a worker alive only if it has a unique pending task, as well as having keep-alive true'
+complete -c luigi -f -l worker-count-last-scheduled -d 'Keep a worker alive only if there are pending tasks which it was the last to schedule.'
+complete -c luigi -f -l worker-wait-interval -r
+complete -c luigi -f -l worker-wait-jitter -r
+complete -c luigi -f -l worker-max-keep-alive-idle-duration -r
+complete -c luigi -f -l worker-max-reschedules -r
+complete -c luigi -f -l worker-timeout -r
+complete -c luigi -f -l worker-task-limit -r
+complete -c luigi -f -l worker-retry-external-tasks -d 'If true, incomplete external tasks will be retested for completion while Luigi is running.'
+complete -c luigi -f -l worker-send-failure-email -d 'If true, send e-mails directly from the workeron failure'
+complete -c luigi -f -l worker-no-install-shutdown-handler -d 'If true, the SIGUSR1 shutdown handler willNOT be install on the worker'
+complete -c luigi -f -l worker-check-unfulfilled-deps -d 'If true, check for completeness of dependencies before running a task'
+complete -c luigi -f -l worker-check-complete-on-run -d 'If true, only mark tasks as done after running if they are complete. Regardless of this setting, the worker will always check if external tasks are complete before marking them as done.'
+complete -c luigi -f -l worker-force-multiprocessing -d 'If true, use multiprocessing also when running with 1 worker'
+complete -c luigi -f -l worker-task-process-context -r -d 'If set to a fully qualified class name, the class will be instantiated with a TaskProcess as its constructor parameter and applied as a context manager around its run() call, so this can be used for obtaining high level customizable monitoring or logging of each individual Task run.'
+complete -c luigi -f -l worker-cache-task-completion -d 'If true, cache the response of successful completion checks of tasks assigned to a worker. This can especially speed up tasks with dynamic dependencies but assumes that the completion status does not change after it was true the first time.'
+complete -c luigi -f -l execution-summary-summary-length -r
+complete -c luigi -f -l local-scheduler -d 'Use an in-memory central scheduler. Useful for testing.'
+complete -c luigi -f -l scheduler-host -r -d 'Hostname of machine running remote scheduler'
+complete -c luigi -f -l scheduler-port -r -d 'Port of remote scheduler api process'
+complete -c luigi -f -l scheduler-url -r -d 'Full path to remote scheduler'
+complete -c luigi -f -l lock-size -r -d 'Maximum number of workers running the same command'
+complete -c luigi -f -l no-lock -d 'Ignore if similar process is already running'
+complete -c luigi -f -l lock-pid-dir -r -d 'Directory to store the pid file'
+complete -c luigi -f -l take-lock -d 'Signal other processes to stop getting work if already running'
+complete -c luigi -f -l workers -r -d 'Maximum number of parallel tasks to run'
+complete -c luigi -f -l logging-conf-file -r -d 'Configuration file for logging'
+complete -c luigi -f -l log-level -r -d 'Default log level to use when logging_conf_file is not set Choices: {NOTSET, WARNING, ERROR, CRITICAL, INFO, DEBUG}'
+complete -c luigi -f -l module -r -d 'Used for dynamic loading of modules'
+complete -c luigi -f -l parallel-scheduling -d 'Use multiprocessing to do scheduling in parallel.'
+complete -c luigi -f -l parallel-scheduling-processes -r -d 'The number of processes to use for scheduling in parallel. By default the number of available CPUs will be used'
+complete -c luigi -f -l assistant -d 'Run any task from the scheduler.'
+complete -c luigi -f -l help -d 'Show most common flags and all task-specific flags'
+complete -c luigi -f -l help-all -d 'Show all command line flags'
+complete -c luigi -f -l RangeBase-of -r -d 'task name to be completed. The task must take a single datetime parameter'
+complete -c luigi -f -l RangeBase-of-params -r -d 'Arguments to be provided to the '"'"'of'"'"' class when instantiating'
+complete -c luigi -f -l RangeBase-start -r
+complete -c luigi -f -l RangeBase-stop -r
+complete -c luigi -f -l RangeBase-reverse -d 'specifies the preferred order for catching up. False - work from the oldest missing outputs onward; True - from the newest backward'
+complete -c luigi -f -l RangeBase-task-limit -r -d 'how many of '"'"'of'"'"' tasks to require. Guards against scheduling insane amounts of tasks in one go'
+complete -c luigi -f -l RangeBase-now -r -d 'set to override current time. In seconds since epoch'
+complete -c luigi -f -l RangeBase-param-name -r -d 'parameter name used to pass in parameterized value. Defaults to None, meaning use first positional parameter'
+complete -c luigi -f -l RangeDailyBase-of -r -d 'task name to be completed. The task must take a single datetime parameter'
+complete -c luigi -f -l RangeDailyBase-of-params -r -d 'Arguments to be provided to the '"'"'of'"'"' class when instantiating'
+complete -c luigi -f -l RangeDailyBase-reverse -d 'specifies the preferred order for catching up. False - work from the oldest missing outputs onward; True - from the newest backward'
+complete -c luigi -f -l RangeDailyBase-task-limit -r -d 'how many of '"'"'of'"'"' tasks to require. Guards against scheduling insane amounts of tasks in one go'
+complete -c luigi -f -l RangeDailyBase-now -r -d 'set to override current time. In seconds since epoch'
+complete -c luigi -f -l RangeDailyBase-param-name -r -d 'parameter name used to pass in parameterized value. Defaults to None, meaning use first positional parameter'
+complete -c luigi -f -l RangeDailyBase-start -r -d 'beginning date, inclusive. Default: None - work backward forever (requires reverse=True)'
+complete -c luigi -f -l RangeDailyBase-stop -r -d 'ending date, exclusive. Default: None - work forward forever'
+complete -c luigi -f -l RangeDailyBase-days-back -r -d 'extent to which contiguousness is to be assured into past, in days from current time. Prevents infinite loop when start is none. If the dataset has limited retention (i.e. old outputs get removed), this should be set shorter to that, too, to prevent the oldest outputs flapping. Increase freely if you intend to process old dates - worker'"'"'s memory is the limit'
+complete -c luigi -f -l RangeDailyBase-days-forward -r -d 'extent to which contiguousness is to be assured into future, in days from current time. Prevents infinite loop when stop is none'
+complete -c luigi -f -l RangeHourlyBase-of -r -d 'task name to be completed. The task must take a single datetime parameter'
+complete -c luigi -f -l RangeHourlyBase-of-params -r -d 'Arguments to be provided to the '"'"'of'"'"' class when instantiating'
+complete -c luigi -f -l RangeHourlyBase-reverse -d 'specifies the preferred order for catching up. False - work from the oldest missing outputs onward; True - from the newest backward'
+complete -c luigi -f -l RangeHourlyBase-task-limit -r -d 'how many of '"'"'of'"'"' tasks to require. Guards against scheduling insane amounts of tasks in one go'
+complete -c luigi -f -l RangeHourlyBase-now -r -d 'set to override current time. In seconds since epoch'
+complete -c luigi -f -l RangeHourlyBase-param-name -r -d 'parameter name used to pass in parameterized value. Defaults to None, meaning use first positional parameter'
+complete -c luigi -f -l RangeHourlyBase-start -r -d 'beginning datehour, inclusive. Default: None - work backward forever (requires reverse=True)'
+complete -c luigi -f -l RangeHourlyBase-stop -r -d 'ending datehour, exclusive. Default: None - work forward forever'
+complete -c luigi -f -l RangeHourlyBase-hours-back -r -d 'extent to which contiguousness is to be assured into past, in hours from current time. Prevents infinite loop when start is none. If the dataset has limited retention (i.e. old outputs get removed), this should be set shorter to that, too, to prevent the oldest outputs flapping. Increase freely if you intend to process old dates - worker'"'"'s memory is the limit'
+complete -c luigi -f -l RangeHourlyBase-hours-forward -r -d 'extent to which contiguousness is to be assured into future, in hours from current time. Prevents infinite loop when stop is none'
+complete -c luigi -f -l RangeByMinutesBase-of -r -d 'task name to be completed. The task must take a single datetime parameter'
+complete -c luigi -f -l RangeByMinutesBase-of-params -r -d 'Arguments to be provided to the '"'"'of'"'"' class when instantiating'
+complete -c luigi -f -l RangeByMinutesBase-reverse -d 'specifies the preferred order for catching up. False - work from the oldest missing outputs onward; True - from the newest backward'
+complete -c luigi -f -l RangeByMinutesBase-task-limit -r -d 'how many of '"'"'of'"'"' tasks to require. Guards against scheduling insane amounts of tasks in one go'
+complete -c luigi -f -l RangeByMinutesBase-now -r -d 'set to override current time. In seconds since epoch'
+complete -c luigi -f -l RangeByMinutesBase-param-name -r -d 'parameter name used to pass in parameterized value. Defaults to None, meaning use first positional parameter'
+complete -c luigi -f -l RangeByMinutesBase-start -r -d 'beginning date-hour-minute, inclusive. Default: None - work backward forever (requires reverse=True)'
+complete -c luigi -f -l RangeByMinutesBase-stop -r -d 'ending date-hour-minute, exclusive. Default: None - work forward forever'
+complete -c luigi -f -l RangeByMinutesBase-minutes-back -r -d 'extent to which contiguousness is to be assured into past, in minutes from current time. Prevents infinite loop when start is none. If the dataset has limited retention (i.e. old outputs get removed), this should be set shorter to that, too, to prevent the oldest outputs flapping. Increase freely if you intend to process old dates - worker'"'"'s memory is the limit'
+complete -c luigi -f -l RangeByMinutesBase-minutes-forward -r -d 'extent to which contiguousness is to be assured into future, in minutes from current time. Prevents infinite loop when stop is none'
+complete -c luigi -f -l RangeByMinutesBase-minutes-interval -r -d 'separation between events in minutes. It must evenly divide 60'
+complete -c luigi -f -l RangeMonthly-of -r -d 'task name to be completed. The task must take a single datetime parameter'
+complete -c luigi -f -l RangeMonthly-of-params -r -d 'Arguments to be provided to the '"'"'of'"'"' class when instantiating'
+complete -c luigi -f -l RangeMonthly-reverse -d 'specifies the preferred order for catching up. False - work from the oldest missing outputs onward; True - from the newest backward'
+complete -c luigi -f -l RangeMonthly-task-limit -r -d 'how many of '"'"'of'"'"' tasks to require. Guards against scheduling insane amounts of tasks in one go'
+complete -c luigi -f -l RangeMonthly-now -r -d 'set to override current time. In seconds since epoch'
+complete -c luigi -f -l RangeMonthly-param-name -r -d 'parameter name used to pass in parameterized value. Defaults to None, meaning use first positional parameter'
+complete -c luigi -f -l RangeMonthly-start -r -d 'beginning month, inclusive. Default: None - work backward forever (requires reverse=True)'
+complete -c luigi -f -l RangeMonthly-stop -r -d 'ending month, exclusive. Default: None - work forward forever'
+complete -c luigi -f -l RangeMonthly-months-back -r -d 'extent to which contiguousness is to be assured into past, in months from current time. Prevents infinite loop when start is none. If the dataset has limited retention (i.e. old outputs get removed), this should be set shorter to that, too, to prevent the oldest outputs flapping. Increase freely if you intend to process old dates - worker'"'"'s memory is the limit'
+complete -c luigi -f -l RangeMonthly-months-forward -r -d 'extent to which contiguousness is to be assured into future, in months from current time. Prevents infinite loop when stop is none'
+complete -c luigi -f -l RangeDaily-of -r -d 'task name to be completed. The task must take a single datetime parameter'
+complete -c luigi -f -l RangeDaily-of-params -r -d 'Arguments to be provided to the '"'"'of'"'"' class when instantiating'
+complete -c luigi -f -l RangeDaily-reverse -d 'specifies the preferred order for catching up. False - work from the oldest missing outputs onward; True - from the newest backward'
+complete -c luigi -f -l RangeDaily-task-limit -r -d 'how many of '"'"'of'"'"' tasks to require. Guards against scheduling insane amounts of tasks in one go'
+complete -c luigi -f -l RangeDaily-now -r -d 'set to override current time. In seconds since epoch'
+complete -c luigi -f -l RangeDaily-param-name -r -d 'parameter name used to pass in parameterized value. Defaults to None, meaning use first positional parameter'
+complete -c luigi -f -l RangeDaily-start -r -d 'beginning date, inclusive. Default: None - work backward forever (requires reverse=True)'
+complete -c luigi -f -l RangeDaily-stop -r -d 'ending date, exclusive. Default: None - work forward forever'
+complete -c luigi -f -l RangeDaily-days-back -r -d 'extent to which contiguousness is to be assured into past, in days from current time. Prevents infinite loop when start is none. If the dataset has limited retention (i.e. old outputs get removed), this should be set shorter to that, too, to prevent the oldest outputs flapping. Increase freely if you intend to process old dates - worker'"'"'s memory is the limit'
+complete -c luigi -f -l RangeDaily-days-forward -r -d 'extent to which contiguousness is to be assured into future, in days from current time. Prevents infinite loop when stop is none'
+complete -c luigi -f -l RangeHourly-of -r -d 'task name to be completed. The task must take a single datetime parameter'
+complete -c luigi -f -l RangeHourly-of-params -r -d 'Arguments to be provided to the '"'"'of'"'"' class when instantiating'
+complete -c luigi -f -l RangeHourly-reverse -d 'specifies the preferred order for catching up. False - work from the oldest missing outputs onward; True - from the newest backward'
+complete -c luigi -f -l RangeHourly-task-limit -r -d 'how many of '"'"'of'"'"' tasks to require. Guards against scheduling insane amounts of tasks in one go'
+complete -c luigi -f -l RangeHourly-now -r -d 'set to override current time. In seconds since epoch'
+complete -c luigi -f -l RangeHourly-param-name -r -d 'parameter name used to pass in parameterized value. Defaults to None, meaning use first positional parameter'
+complete -c luigi -f -l RangeHourly-start -r -d 'beginning datehour, inclusive. Default: None - work backward forever (requires reverse=True)'
+complete -c luigi -f -l RangeHourly-stop -r -d 'ending datehour, exclusive. Default: None - work forward forever'
+complete -c luigi -f -l RangeHourly-hours-back -r -d 'extent to which contiguousness is to be assured into past, in hours from current time. Prevents infinite loop when start is none. If the dataset has limited retention (i.e. old outputs get removed), this should be set shorter to that, too, to prevent the oldest outputs flapping. Increase freely if you intend to process old dates - worker'"'"'s memory is the limit'
+complete -c luigi -f -l RangeHourly-hours-forward -r -d 'extent to which contiguousness is to be assured into future, in hours from current time. Prevents infinite loop when stop is none'
+complete -c luigi -f -l RangeByMinutes-of -r -d 'task name to be completed. The task must take a single datetime parameter'
+complete -c luigi -f -l RangeByMinutes-of-params -r -d 'Arguments to be provided to the '"'"'of'"'"' class when instantiating'
+complete -c luigi -f -l RangeByMinutes-reverse -d 'specifies the preferred order for catching up. False - work from the oldest missing outputs onward; True - from the newest backward'
+complete -c luigi -f -l RangeByMinutes-task-limit -r -d 'how many of '"'"'of'"'"' tasks to require. Guards against scheduling insane amounts of tasks in one go'
+complete -c luigi -f -l RangeByMinutes-now -r -d 'set to override current time. In seconds since epoch'
+complete -c luigi -f -l RangeByMinutes-param-name -r -d 'parameter name used to pass in parameterized value. Defaults to None, meaning use first positional parameter'
+complete -c luigi -f -l RangeByMinutes-start -r -d 'beginning date-hour-minute, inclusive. Default: None - work backward forever (requires reverse=True)'
+complete -c luigi -f -l RangeByMinutes-stop -r -d 'ending date-hour-minute, exclusive. Default: None - work forward forever'
+complete -c luigi -f -l RangeByMinutes-minutes-back -r -d 'extent to which contiguousness is to be assured into past, in minutes from current time. Prevents infinite loop when start is none. If the dataset has limited retention (i.e. old outputs get removed), this should be set shorter to that, too, to prevent the oldest outputs flapping. Increase freely if you intend to process old dates - worker'"'"'s memory is the limit'
+complete -c luigi -f -l RangeByMinutes-minutes-forward -r -d 'extent to which contiguousness is to be assured into future, in minutes from current time. Prevents infinite loop when stop is none'
+complete -c luigi -f -l RangeByMinutes-minutes-interval -r -d 'separation between events in minutes. It must evenly divide 60'
+complete -c luigi -f -l retcode-unhandled-exception -r -d 'For internal luigi errors.'
+complete -c luigi -f -l retcode-missing-data -r -d 'For when there are incomplete ExternalTask dependencies.'
+complete -c luigi -f -l retcode-task-failed -r -d 'For when a task'"'"'s run() method fails.'
+complete -c luigi -f -l retcode-already-running -r -d 'For both local --lock and luigid "lock"'
+complete -c luigi -f -l retcode-scheduling-error -r -d 'For when a task'"'"'s complete() or requires() fails, or task-limit reached'
+complete -c luigi -f -l retcode-not-run -r -d 'For when a task is not granted run permission by the scheduler.'
diff --git a/data/systemd/README.md b/data/systemd/README.md
new file mode 100644
index 00000000..9bdfd137
--- /dev/null
+++ b/data/systemd/README.md
@@ -0,0 +1,43 @@
+This directory contains systemctl unit and timers to launch the RNA-Seq
+pipeline and its webviewer.
+
+To edit the configuration, issue: `systemctl edit rnaseq-pipeline-worker@.service` and `systemctl edit rnaseq-pipeline-viewer.service`
+to add the following environment variables:
+
+```
+[Service]
+WorkingDirectory=/location/of/the/pipeline/configuration
+Environment="CONDA_ENV=/path/to/conda/env"
+Environment="GEMMA_USERNAME={username}"
+Environment="GEMMA_PASSWORD={password}"
+```
+
+# Launch workers
+
+We run two workers, one is generally idle and awaits new datasets to be
+processed in a collaborative spreadsheet by our curators, the other is
+generally busy. Those are configured to restart after 20 minutes after exiting.
+
+```
+systemctl start rnaseq-pipeline-worker@1.service rnaseq-pipeline-worker@2.service
+```
+
+# Launch the viewer
+
+```
+systemctl start rnaseq-pipeline-viewer.service
+```
+
+# Enable the cleanup unit
+
+Edit the configuration to set the working directory where the cleanup script
+can be resolved.
+
+```
+[Service]
+WorkingDirectory=/location/of/the/pipeline/configuration
+```
+
+```
+systemctl start rnaseq-pipeline-cleanup.timer
+```
diff --git a/data/systemd/rnaseq-pipeline-cleanup.service b/data/systemd/rnaseq-pipeline-cleanup.service
new file mode 100644
index 00000000..dffe71f9
--- /dev/null
+++ b/data/systemd/rnaseq-pipeline-cleanup.service
@@ -0,0 +1,6 @@
+[Unit]
+Description=PavLab RNA-Seq Pipeline Cleanup
+
+[Service]
+Type=simple
+ExecStart=/bin/bash -c "./scripts/remove-old-data"
diff --git a/data/systemd/rnaseq-pipeline-cleanup.timer b/data/systemd/rnaseq-pipeline-cleanup.timer
new file mode 100644
index 00000000..95c66b57
--- /dev/null
+++ b/data/systemd/rnaseq-pipeline-cleanup.timer
@@ -0,0 +1,8 @@
+[Unit]
+Description=Cleanup the RNA-Seq Pipeline hourly
+
+[Timer]
+OnCalendar=hourly
+
+[Install]
+WantedBy=timers.target
diff --git a/data/systemd/rnaseq-pipeline-viewer.service b/data/systemd/rnaseq-pipeline-viewer.service
new file mode 100644
index 00000000..89c0cd4b
--- /dev/null
+++ b/data/systemd/rnaseq-pipeline-viewer.service
@@ -0,0 +1,11 @@
+[Unit]
+Description=PavLab RNA-Seq Pipeline Viewer
+Requires=luigid.service
+After=network.target
+
+[Service]
+Type=simple
+ExecStart=/bin/bash -c "conda run --no-capture-output -p $CONDA_ENV gunicorn -e SCRIPT_NAME=/rnaseq-pipeline -b 0.0.0.0:8000 rnaseq_pipeline.webviewer:app --access-logfile pipeline-output/webviewer-logs/access.log --error-logfile pipeline-output/webviewer-logs/error.log"
+
+[Install]
+WantedBy=multi-user.target
diff --git a/data/systemd/rnaseq-pipeline-worker@.service b/data/systemd/rnaseq-pipeline-worker@.service
new file mode 100644
index 00000000..aa883859
--- /dev/null
+++ b/data/systemd/rnaseq-pipeline-worker@.service
@@ -0,0 +1,8 @@
+[Unit]
+Description=PavLab RNA-Seq Pipeline Worker #%i
+Requires=luigid.service
+After=network.target
+
+[Service]
+Type=simple
+ExecStart=bash -c "conda run --no-capture-output -p $CONDA_ENV luigi-wrapper --module rnaseq_pipeline.tasks --assistant --workers 200"
diff --git a/environment.yml b/environment.yml
index ef4833da..0dbbd7c2 100644
--- a/environment.yml
+++ b/environment.yml
@@ -2,13 +2,15 @@ name: rnaseq-pipeline
channels:
- conda-forge
- bioconda
+- nodefaults
dependencies:
-- python=3
+- python=3.10
- pip
-- cutadapt<4.0
-- multiqc==1.12
+- cutadapt==4.8
+- multiqc==1.29
+- polars-lts-cpu # for our older servers that lack support for AVX2
- sra-tools
-- fastqc
+- fastqc==0.12.1
- star==2.7.3a
- entrez-direct
-- gunicorn
+- perl # rsem expects this
diff --git a/example.luigi.cfg b/example.luigi.cfg
index d1a78a48..715e0940 100644
--- a/example.luigi.cfg
+++ b/example.luigi.cfg
@@ -7,6 +7,9 @@
# execution.
#
+[core]
+autoload_range=true
+
[resources]
# in number of available CPUs
cpus=16
@@ -45,18 +48,18 @@ ALIGNQCDIR=aligned-qc
QUANTDIR=quantified
BATCHINFODIR=batch-info
-# STAR
-STAR_CLEANUP_SCRIPT=scripts/clean-unused-shm-objects
-
# RSEM
RSEM_DIR=contrib/RSEM
-# Gemma integrations
-JAVA_HOME=
-JAVA_OPTS=
-GEMMACLI=gemma-cli
-
SLACK_WEBHOOK_URL=
-[rnaseq_pipeline.sources.sra]
-paired_read_experiments=[]
+[rnaseq_pipeline.gemma]
+cli_bin=gemma-cli
+# values for $JAVA_HOME and $JAVA_OPTS environment variables
+cli_JAVA_HOME=
+cli_JAVA_OPTS=
+baseurl=https://gemma.msl.ubc.ca
+appdata_dir=/space/gemmaData
+human_reference_id=hg38_ncbi
+mouse_reference_id=mm10_ncbi
+rat_reference_id=rn7_ncbi
diff --git a/pyproject.toml b/pyproject.toml
new file mode 100644
index 00000000..fed528d4
--- /dev/null
+++ b/pyproject.toml
@@ -0,0 +1,3 @@
+[build-system]
+requires = ["setuptools"]
+build-backend = "setuptools.build_meta"
diff --git a/rnaseq_pipeline/config.py b/rnaseq_pipeline/config.py
index 9c59498b..3081e4ca 100644
--- a/rnaseq_pipeline/config.py
+++ b/rnaseq_pipeline/config.py
@@ -16,15 +16,6 @@ class rnaseq_pipeline(luigi.Config):
QUANTDIR = luigi.Parameter()
BATCHINFODIR = luigi.Parameter()
- STAR_CLEANUP_SCRIPT = luigi.Parameter()
-
RSEM_DIR = luigi.Parameter()
- GEMMACLI = luigi.Parameter()
- JAVA_HOME = luigi.Parameter()
- JAVA_OPTS = luigi.Parameter()
-
SLACK_WEBHOOK_URL = luigi.OptionalParameter(default=None)
-
- def asenv(self, attrs):
- return {attr: getattr(self, attr) for attr in attrs}
diff --git a/credentials.json b/rnaseq_pipeline/credentials.json
similarity index 100%
rename from credentials.json
rename to rnaseq_pipeline/credentials.json
diff --git a/rnaseq_pipeline/gemma.py b/rnaseq_pipeline/gemma.py
index c1ee9cf9..13047a7e 100644
--- a/rnaseq_pipeline/gemma.py
+++ b/rnaseq_pipeline/gemma.py
@@ -1,53 +1,62 @@
-from getpass import getpass
import os
-from os.path import join
import subprocess
+from getpass import getpass
+from os.path import join
import luigi
-from luigi.contrib.external_program import ExternalProgramTask
import requests
+from luigi.contrib.external_program import ExternalProgramTask
from requests.auth import HTTPBasicAuth
-from .config import rnaseq_pipeline
+class gemma(luigi.Config):
+ task_namespace = 'rnaseq_pipeline'
+ baseurl = luigi.Parameter()
+ appdata_dir = luigi.Parameter()
+ cli_bin = luigi.Parameter()
+ cli_JAVA_HOME = luigi.Parameter()
+ cli_JAVA_OPTS = luigi.Parameter()
+ human_reference_id = luigi.Parameter()
+ mouse_reference_id = luigi.Parameter()
+ rat_reference_id = luigi.Parameter()
-cfg = rnaseq_pipeline()
+cfg = gemma()
class GemmaApi:
def __init__(self):
self._session = requests.Session()
- self._session.auth = HTTPBasicAuth(os.getenv('GEMMA_USERNAME'), self._get_password()) if os.getenv('GEMMA_USERNAME') else None
+ self._session.auth = HTTPBasicAuth(os.getenv('GEMMA_USERNAME'), self._get_password()) if os.getenv(
+ 'GEMMA_USERNAME') else None
def _get_password(self):
if 'GEMMA_PASSWORD' in os.environ:
return os.environ['GEMMA_PASSWORD']
elif 'GEMMA_PASSWORD_CMD' in os.environ:
- proc = subprocess.run(os.environ['GEMMA_PASSWORD_CMD'], shell=True, check=True, text=True, stdout=subprocess.PIPE)
- return proc.stdout
+ proc = subprocess.run(os.environ['GEMMA_PASSWORD_CMD'], shell=True, check=True, text=True,
+ stdout=subprocess.PIPE)
+ return proc.stdout.splitlines()[0]
else:
return getpass()
def _query_api(self, endpoint):
- res = self._session.get(join('https://gemma.msl.ubc.ca/rest/v2', endpoint))
+ res = self._session.get(join(cfg.baseurl, 'rest/v2', endpoint))
res.raise_for_status()
return res.json()['data']
def datasets(self, experiment_id):
return self._query_api(join('datasets', experiment_id))
+ def dataset_has_batch(self, experiment_id):
+ return self._query_api(join('datasets', experiment_id, 'hasbatch'))
+
def samples(self, experiment_id):
return self._query_api(join('datasets', experiment_id, 'samples'))
def platforms(self, experiment_id):
return self._query_api(join('datasets', experiment_id, 'platforms'))
-class GemmaTask(ExternalProgramTask):
- """
- Base class for tasks that wraps Gemma CLI.
- """
+class GemmaTaskMixin:
experiment_id = luigi.Parameter()
- subcommand = None
-
def __init__(self, *kwargs, **kwds):
super().__init__(*kwargs, **kwds)
self._gemma_api = GemmaApi()
@@ -79,12 +88,13 @@ def external_uri(self):
@property
def taxon(self):
- return self.dataset_info['taxon']
+ return self.dataset_info['taxon']['commonName']
@property
def reference_id(self):
try:
- return {'human': 'hg38_ncbi', 'mouse': 'mm10_ncbi', 'rat': 'm6_ncbi'}[self.taxon]
+ return {'human': cfg.human_reference_id, 'mouse': cfg.mouse_reference_id, 'rat': cfg.rat_reference_id}[
+ self.taxon]
except KeyError:
raise ValueError('Unsupported Gemma taxon {}.'.format(self.taxon))
@@ -92,13 +102,20 @@ def reference_id(self):
def platform_short_name(self):
return f'Generic_{self.taxon}_ncbiIds'
+class GemmaCliTask(GemmaTaskMixin, ExternalProgramTask):
+ """
+ Base class for tasks that wraps Gemma CLI.
+ """
+ subcommand = None
+
def program_environment(self):
env = super().program_environment()
- env.update(cfg.asenv(['JAVA_HOME', 'JAVA_OPTS']))
+ env['JAVA_HOME'] = cfg.cli_JAVA_HOME
+ env['JAVA_OPTS'] = cfg.cli_JAVA_OPTS
return env
def program_args(self):
- args = [cfg.GEMMACLI,
+ args = [cfg.cli_bin,
self.subcommand,
'-e', self.experiment_id]
args.extend(self.subcommand_args())
@@ -106,4 +123,3 @@ def program_args(self):
def subcommand_args(self):
return []
-
diff --git a/rnaseq_pipeline/gsheet.py b/rnaseq_pipeline/gsheet.py
index d139f24d..43c60d84 100644
--- a/rnaseq_pipeline/gsheet.py
+++ b/rnaseq_pipeline/gsheet.py
@@ -1,19 +1,19 @@
-import argparse
+import logging
import logging
import os
import os.path
import pickle
-import sys
-from os.path import dirname, expanduser, join
+from os.path import join
-from googleapiclient.discovery import build
-from google_auth_oauthlib.flow import InstalledAppFlow
-from google.auth.transport.requests import Request
-import luigi
import pandas as pd
import xdg.BaseDirectory
+from google.auth.transport.requests import Request
+from google_auth_oauthlib.flow import InstalledAppFlow
+from googleapiclient.discovery import build
+from pkg_resources import resource_filename
SCOPES = ['https://www.googleapis.com/auth/spreadsheets.readonly']
+CREDENTIALS_FILE = resource_filename('rnaseq_pipeline', 'credentials.json')
logger = logging.getLogger('luigi-interface')
@@ -33,8 +33,8 @@ def _authenticate():
creds.refresh(Request())
else:
flow = InstalledAppFlow.from_client_secrets_file(
- 'credentials.json', SCOPES)
- creds = flow.run_console()
+ CREDENTIALS_FILE, SCOPES)
+ creds = flow.run_local_server(port=0)
# Save the credentials for the next run
with open(token_path, 'wb') as token:
pickle.dump(creds, token)
@@ -45,7 +45,8 @@ def retrieve_spreadsheet(spreadsheet_id, sheet_name):
service = build('sheets', 'v4', credentials=_authenticate(), cache_discovery=None)
# Retrieve the documents contents from the Docs service.
- rnaseq_pipeline_queue = service.spreadsheets().values().get(spreadsheetId=spreadsheet_id, range=sheet_name).execute()
+ rnaseq_pipeline_queue = service.spreadsheets().values().get(spreadsheetId=spreadsheet_id,
+ range=sheet_name).execute()
# this will fail if people add new columns
df = pd.DataFrame(rnaseq_pipeline_queue['values'][1:], columns=rnaseq_pipeline_queue['values'][0])
diff --git a/rnaseq_pipeline/miniml_utils.py b/rnaseq_pipeline/miniml_utils.py
index 1fe754a5..fe0ccbb8 100644
--- a/rnaseq_pipeline/miniml_utils.py
+++ b/rnaseq_pipeline/miniml_utils.py
@@ -26,13 +26,20 @@ def collect_geo_samples(f):
for x in root.findall('miniml:Sample', ns):
gsm_id = x.find("miniml:Accession[@database='GEO']", ns)
- library_strategy = x.find('miniml:Library-Strategy', ns)
platform_id = x.find('miniml:Platform-Ref', ns)
sra_relation = x.find("miniml:Relation[@type='SRA']", ns)
- if gsm_id is None or platform_id is None or library_strategy is None or sra_relation is None:
+ if gsm_id is None or platform_id is None or sra_relation is None:
continue
- if library_strategy.text == 'RNA-Seq':
- gsm_identifiers.add(gsm_id.text)
+ # this has to match the logic in Gemma for bulk RNA-Seq, see GeoConverterImpl.java
+ sample_type = x.find('miniml:Type', ns)
+ if sample_type is None:
+ continue
+ if sample_type.text == 'SRA':
+ library_source = x.find('miniml:Library-Source', ns)
+ if library_source is not None and library_source.text == 'transcriptomic':
+ library_strategy = x.find('miniml:Library-Strategy', ns)
+ if library_strategy is not None and library_strategy.text in ['RNA-Seq', 'ssRNA-seq', 'OTHER']:
+ gsm_identifiers.add(gsm_id.text)
return gsm_identifiers
diff --git a/rnaseq_pipeline/platforms.py b/rnaseq_pipeline/platforms.py
index b3099c3b..47d57338 100644
--- a/rnaseq_pipeline/platforms.py
+++ b/rnaseq_pipeline/platforms.py
@@ -13,7 +13,7 @@ def get_trim_single_end_reads_task(r1, dest, **kwargs):
pass
@abstractmethod
- def get_trim_paired_reads_task(r1,r2, r1_dest, r2_dest, **kwargs):
+ def get_trim_paired_reads_task(r1, r2, r1_dest, r2_dest, **kwargs):
pass
class BgiPlatform(Platform):
@@ -33,18 +33,18 @@ def __init__(self, instrument):
def get_trim_single_end_reads_task(self, r1, dest, **kwargs):
return cutadapt.TrimReads(
- r1,
- dest,
- adapter_3prime=BgiPlatform.FORWARD_FILTER,
- **kwargs)
+ r1,
+ dest,
+ adapter_3prime=BgiPlatform.FORWARD_FILTER,
+ **kwargs)
def get_trim_paired_reads_task(self, r1, r2, r1_dest, r2_dest, **kwargs):
return cutadapt.TrimPairedReads(
- r1, r2,
- r1_dest, r2_dest,
- adapter_3prime=BgiPlatform.FORWARD_FILTER,
- reverse_adapter_3prime=BgiPlatform.REVERSE_FILTER,
- **kwargs)
+ r1, r2,
+ r1_dest, r2_dest,
+ adapter_3prime=BgiPlatform.FORWARD_FILTER,
+ reverse_adapter_3prime=BgiPlatform.REVERSE_FILTER,
+ **kwargs)
class IlluminaPlatform(Platform):
"""
@@ -59,18 +59,18 @@ def __init__(self, instrument):
def get_trim_single_end_reads_task(self, r1, dest, **kwargs):
return cutadapt.TrimReads(
- r1,
- dest,
- adapter_3prime=IlluminaPlatform.UNIVERSAL_ADAPTER,
- **kwargs)
+ r1,
+ dest,
+ adapter_3prime=IlluminaPlatform.UNIVERSAL_ADAPTER,
+ **kwargs)
def get_trim_paired_reads_task(self, r1, r2, r1_dest, r2_dest, **kwargs):
return cutadapt.TrimPairedReads(
- r1, r2,
- r1_dest, r2_dest,
- adapter_3prime=IlluminaPlatform.UNIVERSAL_ADAPTER,
- reverse_adapter_3prime=IlluminaPlatform.UNIVERSAL_ADAPTER,
- **kwargs)
+ r1, r2,
+ r1_dest, r2_dest,
+ adapter_3prime=IlluminaPlatform.UNIVERSAL_ADAPTER,
+ reverse_adapter_3prime=IlluminaPlatform.UNIVERSAL_ADAPTER,
+ **kwargs)
class IlluminaNexteraPlatform(Platform):
"""
@@ -85,11 +85,11 @@ def __init__(self, instrument):
def get_trim_single_end_reads_task(self, r1, dest, **kwargs):
return cutadapt.TrimReads(
- r1,
- dest,
- cut=12,
- adapter_3prime=IlluminaNexteraPlatform.NEXTERA_ADAPTER,
- **kwargs)
+ r1,
+ dest,
+ cut=12,
+ adapter_3prime=IlluminaNexteraPlatform.NEXTERA_ADAPTER,
+ **kwargs)
def get_trim_paired_reads_task(self, r1, r2, r1_dest, r2_dest, **kwargs):
raise NotImplementedError
diff --git a/rnaseq_pipeline/sources/arrayexpress.py b/rnaseq_pipeline/sources/arrayexpress.py
index 1f2b0e44..edb68dfd 100644
--- a/rnaseq_pipeline/sources/arrayexpress.py
+++ b/rnaseq_pipeline/sources/arrayexpress.py
@@ -1,11 +1,11 @@
-from urllib.request import urlretrieve
import os
from os.path import join
+from urllib.request import urlretrieve
import luigi
-from luigi.task import WrapperTask
import pandas as pd
from bioluigi.tasks.utils import TaskWithOutputMixin
+from luigi.task import WrapperTask
from ..config import rnaseq_pipeline
from ..platforms import IlluminaPlatform
@@ -21,11 +21,13 @@ class DownloadArrayExpressFastq(luigi.Task):
def run(self):
with self.output().temporary_path() as dest_filename:
urlretrieve(self.fastq_url,
- reporthook=lambda numblocks, blocksize, totalsize: self.set_progress_percentage(100.0 * numblocks * blocksize / totalsize),
+ reporthook=lambda numblocks, blocksize, totalsize: self.set_progress_percentage(
+ 100.0 * numblocks * blocksize / totalsize),
filename=dest_filename)
def output(self):
- return luigi.LocalTarget(join(cfg.OUTPUT_DIR, cfg.DATA, 'arrayexpress', self.sample_id, os.path.basename(self.fastq_url)))
+ return luigi.LocalTarget(
+ join(cfg.OUTPUT_DIR, cfg.DATA, 'arrayexpress', self.sample_id, os.path.basename(self.fastq_url)))
class DownloadArrayExpressSample(TaskWithOutputMixin, WrapperTask):
experiment_id = luigi.Parameter()
@@ -47,8 +49,10 @@ class DownloadArrayExpressExperiment(TaskWithOutputMixin, WrapperTask):
def run(self):
# store metadata locally under metadata/arrayexpress/.sdrf.txt
- ae_df = pd.read_csv('http://www.ebi.ac.uk/arrayexpress/files/{0}/{0}.sdrf.txt'.format(self.experiment_id), sep='\t')
+ ae_df = pd.read_csv('http://www.ebi.ac.uk/arrayexpress/files/{0}/{0}.sdrf.txt'.format(self.experiment_id),
+ sep='\t')
ae_df = ae_df[ae_df['Comment[LIBRARY_STRATEGY]'] == 'RNA-Seq']
# FIXME: properly handle the order of paired FASTQs
- yield [DownloadArrayExpressSample(experiment_id=self.experiment_id, sample_id=sample_id, fastq_urls=s['Comment[FASTQ_URI]'].sort_values().tolist())
+ yield [DownloadArrayExpressSample(experiment_id=self.experiment_id, sample_id=sample_id,
+ fastq_urls=s['Comment[FASTQ_URI]'].sort_values().tolist())
for sample_id, s in ae_df.groupby('Comment[ENA_SAMPLE]')]
diff --git a/rnaseq_pipeline/sources/gemma.py b/rnaseq_pipeline/sources/gemma.py
index 7edb98e7..47ebcfae 100644
--- a/rnaseq_pipeline/sources/gemma.py
+++ b/rnaseq_pipeline/sources/gemma.py
@@ -3,8 +3,8 @@
import os
from os.path import join
-from bioluigi.tasks.utils import DynamicTaskWithOutputMixin, DynamicWrapperTask
import luigi
+from bioluigi.tasks.utils import DynamicTaskWithOutputMixin, DynamicWrapperTask
from luigi.util import requires
from .geo import DownloadGeoSample
@@ -33,9 +33,12 @@ def run(self):
accession = sample['accession']['accession']
external_database = sample['accession']['externalDatabase']['name']
if external_database == 'GEO':
- download_sample_tasks.append(DownloadGeoSample(accession))
+ download_sample_tasks.append(
+ DownloadGeoSample(accession, metadata=dict(experiment_id=self.experiment_id, sample_id=accession)))
elif external_database == 'SRA':
- download_sample_tasks.append(DownloadSraExperiment(accession))
+ download_sample_tasks.append(DownloadSraExperiment(accession,
+ metadata=dict(experiment_id=self.experiment_id,
+ sample_id=accession)))
else:
logger.warning('Downloading %s from %s is not supported.', accession, external_database)
continue
@@ -51,7 +54,9 @@ def run(self):
continue
if len(sample.output()) == 0:
- logger.warning('GEO sample %s has no associated FASTQs from which batch information can be extracted.', sample.sample_id)
+ logger.warning(
+ 'GEO sample %s has no associated FASTQs from which batch information can be extracted.',
+ sample.sample_id)
continue
# TODO: find a cleaner way to obtain the SRA run accession
diff --git a/rnaseq_pipeline/sources/geo.py b/rnaseq_pipeline/sources/geo.py
index 018db16c..aab6758d 100644
--- a/rnaseq_pipeline/sources/geo.py
+++ b/rnaseq_pipeline/sources/geo.py
@@ -4,25 +4,28 @@
import gzip
import logging
-from subprocess import Popen
import os
+import re
+import tarfile
+import tempfile
+from datetime import timedelta
+from functools import lru_cache
from os.path import join
from urllib.parse import urlparse, parse_qs
-from functools import lru_cache
-import re
-import requests
-import xml.etree.ElementTree
+from xml.etree import ElementTree
-from bioluigi.tasks.utils import DynamicTaskWithOutputMixin, DynamicWrapperTask
import luigi
-from luigi.util import requires
+import luigi.format
import requests
+from bioluigi.tasks.utils import DynamicTaskWithOutputMixin, DynamicWrapperTask, TaskWithMetadataMixin
+from luigi.util import requires
+from .sra import DownloadSraExperiment
from ..config import rnaseq_pipeline
from ..miniml_utils import collect_geo_samples, collect_geo_samples_info
-from ..platforms import Platform, BgiPlatform, IlluminaPlatform
+from ..platforms import BgiPlatform, IlluminaPlatform
+from ..targets import ExpirableLocalTarget
from ..utils import RerunnableTaskMixin
-from .sra import DownloadSraExperiment
cfg = rnaseq_pipeline()
@@ -35,7 +38,7 @@ def retrieve_geo_platform_miniml(geo_platform):
"""Retrieve a GEO platform MINiML metadata"""
res = requests.get('https://www.ncbi.nlm.nih.gov/geo/query/acc.cgi', params=dict(acc=geo_platform, form='xml'))
res.raise_for_status()
- return xml.etree.ElementTree.fromstring(res.text).find('miniml:Platform', ns)
+ return ElementTree.fromstring(res.text).find('miniml:Platform', ns)
def match_geo_platform(geo_platform):
"""Infer the type of platform given a GEO platform"""
@@ -47,7 +50,8 @@ def match_geo_platform(geo_platform):
return BgiPlatform(geo_platform_title.split(' ')[0])
# Illumina HiSeq X and NextSeq 550 platforms are not prefixed with Illumina
- illumina_regex = [r'Illumina (.+) \(.+\)', r'(HiSeq X .+) \(.+\)', r'(NextSeq 550) \(.+\)']
+ illumina_regex = [r'Illumina (.+) \(.+\)', r'(HiSeq X .+) \(.+\)', r'(NextSeq 550) \(.+\)',
+ r'(NextSeq 2000) \(.+\)']
for r in illumina_regex:
illumina_match = re.match(r, geo_platform_title)
@@ -56,7 +60,7 @@ def match_geo_platform(geo_platform):
raise NotImplementedError(f'Unsupported GEO platform: {geo_platform_title} ({geo_platform}).')
-class DownloadGeoSampleMetadata(RerunnableTaskMixin, luigi.Task):
+class DownloadGeoSampleMetadata(TaskWithMetadataMixin, RerunnableTaskMixin, luigi.Task):
"""
Download the MiNiML metadata for a given GEO Sample.
"""
@@ -64,14 +68,23 @@ class DownloadGeoSampleMetadata(RerunnableTaskMixin, luigi.Task):
resources = {'geo_http_connections': 1}
+ retry_count = 3
+
def run(self):
+ if self.output().is_stale():
+ logger.info('%s is stale, redownloading...', self.output())
res = requests.get('https://www.ncbi.nlm.nih.gov/geo/query/acc.cgi', params=dict(acc=self.gsm, form='xml'))
res.raise_for_status()
+ try:
+ ElementTree.fromstring(res.text)
+ except ElementTree.ParseError as e:
+ raise Exception('Failed to parse XML from GEO sample metadata of ' + self.gsm) from e
with self.output().open('w') as f:
f.write(res.text)
def output(self):
- return luigi.LocalTarget(join(cfg.OUTPUT_DIR, cfg.METADATA, 'geo', '{}.xml'.format(self.gsm)))
+ return ExpirableLocalTarget(join(cfg.OUTPUT_DIR, cfg.METADATA, 'geo', '{}.xml'.format(self.gsm)),
+ ttl=timedelta(days=14))
@requires(DownloadGeoSampleMetadata)
class DownloadGeoSample(DynamicTaskWithOutputMixin, DynamicWrapperTask):
@@ -97,9 +110,13 @@ def run(self):
raise RuntimeError('{} GEO record is not linked to SRA.'.format(self.gsm))
platform, srx_url = samples_info[self.gsm]
srx = parse_qs(urlparse(srx_url).query)['term'][0]
- yield DownloadSraExperiment(srx)
+ metadata = dict(self.metadata)
+ # do not override the sample_id when invoked from DownloadGemmaExperiment
+ if 'sample_id' not in metadata:
+ metadata['sample_id'] = self.sample_id
+ yield DownloadSraExperiment(srx, metadata=metadata)
-class DownloadGeoSeriesMetadata(RerunnableTaskMixin, luigi.Task):
+class DownloadGeoSeriesMetadata(TaskWithMetadataMixin, RerunnableTaskMixin, luigi.Task):
"""
Download a GEO Series metadata containg information about related GEO
Samples.
@@ -108,15 +125,29 @@ class DownloadGeoSeriesMetadata(RerunnableTaskMixin, luigi.Task):
resources = {'geo_http_connections': 1}
+ retry_count = 3
+
def run(self):
- res = requests.get('https://www.ncbi.nlm.nih.gov/geo/query/acc.cgi', params=dict(acc=self.gse, form='xml', targ='gsm'))
+ if self.output().is_stale():
+ logger.info('%s is stale, redownloading...', self.output())
+ res = requests.get('https://ftp.ncbi.nlm.nih.gov/geo/series/' + self.gse[
+ :-3] + 'nnn/' + self.gse + '/miniml/' + self.gse + '_family.xml.tgz',
+ stream=True)
res.raise_for_status()
- with self.output().open('w') as f:
- f.write(res.text)
+ # we need to use a temporary file because Response.raw does not allow seeking
+ with tempfile.TemporaryFile() as tmp:
+ for chunk in res.iter_content(chunk_size=1024):
+ tmp.write(chunk)
+ tmp.seek(0)
+ with tarfile.open(fileobj=tmp, mode='r:gz') as fin, self.output().open('w') as f:
+ reader = fin.extractfile(self.gse + '_family.xml')
+ while chunk := reader.read(1024):
+ f.write(chunk)
def output(self):
# TODO: remove the _family suffix
- return luigi.LocalTarget(join(cfg.OUTPUT_DIR, cfg.METADATA, 'geo', '{}_family.xml'.format(self.gse)))
+ return ExpirableLocalTarget(join(cfg.OUTPUT_DIR, cfg.METADATA, 'geo', '{}_family.xml'.format(self.gse)),
+ ttl=timedelta(days=14), format=luigi.format.Nop)
@requires(DownloadGeoSeriesMetadata)
class DownloadGeoSeries(DynamicTaskWithOutputMixin, DynamicWrapperTask):
@@ -124,11 +155,14 @@ class DownloadGeoSeries(DynamicTaskWithOutputMixin, DynamicWrapperTask):
Download all GEO Samples related to a GEO Series.
"""
+ ignored_samples = luigi.ListParameter(default=[], description='Ignored GSM identifiers')
+
def run(self):
gsms = collect_geo_samples(self.input().path)
+ gsms = [gsm for gsm in gsms if gsm not in self.ignored_samples]
if not gsms:
raise ValueError('{} has no related GEO samples with RNA-Seq data.'.format(self.gse))
- yield [DownloadGeoSample(gsm) for gsm in gsms]
+ yield [DownloadGeoSample(gsm, metadata=self.metadata) for gsm in gsms]
@requires(DownloadGeoSeriesMetadata, DownloadGeoSeries)
class ExtractGeoSeriesBatchInfo(luigi.Task):
@@ -144,7 +178,9 @@ def run(self):
with self.output().open('w') as info_out:
for sample in samples:
if len(sample.output()) == 0:
- logger.warning('GEO sample %s has no associated FASTQs from which batch information can be extracted.', sample.sample_id)
+ logger.warning(
+ 'GEO sample %s has no associated FASTQs from which batch information can be extracted.',
+ sample.sample_id)
continue
# TODO: find a cleaner way to obtain the SRA run accession
diff --git a/rnaseq_pipeline/sources/local.py b/rnaseq_pipeline/sources/local.py
index 45192196..1588ac3b 100644
--- a/rnaseq_pipeline/sources/local.py
+++ b/rnaseq_pipeline/sources/local.py
@@ -1,9 +1,9 @@
-from glob import glob
import os
+from glob import glob
from os.path import join
-from bioluigi.tasks.utils import DynamicTaskWithOutputMixin, DynamicWrapperTask
import luigi
+from bioluigi.tasks.utils import DynamicTaskWithOutputMixin, DynamicWrapperTask
from ..config import rnaseq_pipeline
@@ -23,11 +23,12 @@ def platform(self):
def output(self):
# we sort to make sure that pair ends are in correct order
- return [luigi.LocalTarget(f) for f in sorted(glob(join(cfg.OUTPUT_DIR, cfg.DATA, 'local', self.experiment_id, self.sample_id, '*.fastq.gz')))]
+ return [luigi.LocalTarget(f) for f in
+ sorted(glob(join(cfg.OUTPUT_DIR, cfg.DATA, 'local', self.experiment_id, self.sample_id, '*.fastq.gz')))]
class DownloadLocalExperiment(DynamicTaskWithOutputMixin, DynamicWrapperTask):
experiment_id = luigi.Parameter()
def run(self):
yield [DownloadLocalSample(self.experiment_id, os.path.basename(f))
- for f in glob(join(cfg.OUTPUT_DIR, cfg.DATA, 'local', self.experiment_id, '*'))]
+ for f in glob(join(cfg.OUTPUT_DIR, cfg.DATA, 'local', self.experiment_id, '*'))]
diff --git a/rnaseq_pipeline/sources/sra.py b/rnaseq_pipeline/sources/sra.py
index 7c4e3a21..e0e4e639 100644
--- a/rnaseq_pipeline/sources/sra.py
+++ b/rnaseq_pipeline/sources/sra.py
@@ -1,36 +1,42 @@
import gzip
import logging
import os
-from os.path import join
-import shlex
import subprocess
-from subprocess import Popen, check_output, PIPE
import xml.etree.ElementTree as ET
+from datetime import timedelta
+from os.path import join
+from subprocess import Popen, check_output, PIPE
-from bioluigi.tasks import sratoolkit
-from bioluigi.tasks.utils import DynamicTaskWithOutputMixin, DynamicWrapperTask
import luigi
-from luigi.util import requires
import pandas as pd
+from bioluigi.tasks import sratoolkit
+from bioluigi.tasks.utils import DynamicTaskWithOutputMixin, DynamicWrapperTask, TaskWithMetadataMixin
+from luigi.util import requires
from ..config import rnaseq_pipeline
+from ..platforms import IlluminaPlatform
+from ..targets import ExpirableLocalTarget
from ..utils import remove_task_output, RerunnableTaskMixin
-class sra(luigi.Config):
- task_namespace = 'rnaseq_pipeline.sources'
-
- paired_read_experiments = luigi.ListParameter(description='List of SRA experiments known to contain paired reads')
-
cfg = rnaseq_pipeline()
-sra_cfg = sra()
logger = logging.getLogger('luigi-interface')
+def read_runinfo(path):
+ SRA_RUNINFO_COLUMNS = 'Run,ReleaseDate,LoadDate,spots,bases,spots_with_mates,avgLength,size_MB,AssemblyName,download_path,Experiment,LibraryName,LibraryStrategy,LibrarySelection,LibrarySource,LibraryLayout,InsertSize,InsertDev,Platform,Model,SRAStudy,BioProject,Study_Pubmed_id,ProjectID,Sample,BioSample,SampleType,TaxID,ScientificName,SampleName,g1k_pop_code,source,g1k_analysis_group,Subject_ID,Sex,Disease,Tumor,Affection_Status,Analyte_Type,Histological_Type,Body_Site,CenterName,Submission,dbgap_study_accession,Consent,RunHash,ReadHash'.split(
+ ',')
+ df = pd.read_csv(path)
+ if df.columns[0] != 'Run':
+ logger.warning('Runinfo file %s is missing a header, a fallback will be used instead.', path)
+ # re-read with a list of known columns as a fallback
+ df = pd.read_csv(path, names=SRA_RUNINFO_COLUMNS[:len(df.columns)])
+ return df
+
"""
This module contains all the logic to retrieve RNA-Seq data from SRA.
"""
-class PrefetchSraRun(luigi.Task):
+class PrefetchSraRun(TaskWithMetadataMixin, luigi.Task):
"""
Prefetch a SRA run using prefetch from sratoolkit
@@ -38,6 +44,8 @@ class PrefetchSraRun(luigi.Task):
"""
srr = luigi.Parameter(description='SRA run identifier')
+ retry_count = 3
+
@staticmethod
def _get_ncbi_public_dir():
ret = subprocess.run(['vdb-config', '-p'], stdout=subprocess.PIPE, universal_newlines=True)
@@ -47,8 +55,10 @@ def _get_ncbi_public_dir():
def run(self):
yield sratoolkit.Prefetch(self.srr,
self.output().path,
- max_size=65,
- scheduler_partition='Wormhole')
+ max_size=100,
+ scheduler_partition='Wormhole',
+ metadata=self.metadata,
+ walltime=timedelta(hours=2))
def output(self):
return luigi.LocalTarget(join(self._get_ncbi_public_dir(), 'sra', f'{self.srr}.sra'))
@@ -60,7 +70,7 @@ class DumpSraRun(luigi.Task):
"""
srx = luigi.Parameter(description='SRA experiment identifier')
- paired_reads = luigi.BoolParameter(positional=False, significant=False, description='Indicate of reads have paired or single mates')
+ paired_reads = luigi.BoolParameter(positional=False, description='Indicate of reads have paired or single mates')
def on_success(self):
# cleanup SRA archive once dumped if it's still hanging around
@@ -70,9 +80,12 @@ def on_success(self):
def run(self):
yield sratoolkit.FastqDump(self.input().path,
- join(cfg.OUTPUT_DIR, cfg.DATA, 'sra', self.srx))
+ join(cfg.OUTPUT_DIR, cfg.DATA, 'sra', self.srx),
+ metadata=self.metadata)
if not self.complete():
- raise RuntimeError(f'{repr(self)} was not completed after successful fastq-dump execution. Is it possible the SRA run is mislabelled as single-end?')
+ labelling = 'paired' if self.paired_reads else 'single-end'
+ raise RuntimeError(
+ f'{repr(self)} was not completed after successful fastq-dump execution. Is it possible the SRA run is mislabelled as {labelling}?')
def output(self):
output_dir = join(cfg.OUTPUT_DIR, cfg.DATA, 'sra', self.srx)
@@ -88,12 +101,12 @@ def retrieve_runinfo(sra_accession):
"""Retrieve a SRA runinfo using search and efetch utilities"""
esearch_proc = Popen(['esearch', '-db', 'sra', '-query', sra_accession], stdout=PIPE)
runinfo_data = check_output(['efetch', '-format', 'runinfo'], universal_newlines=True, stdin=esearch_proc.stdout)
- if not runinfo_data.strip():
+ if not runinfo_data.strip() or (len(runinfo_data.splitlines()) == 1 and runinfo_data[:3] == 'Run'):
raise EmptyRunInfoError(f"Runinfo for {sra_accession} is empty.")
return runinfo_data
-class DownloadSraExperimentRunInfo(RerunnableTaskMixin, luigi.Task):
- srx = luigi.Parameter()
+class DownloadSraExperimentRunInfo(TaskWithMetadataMixin, RerunnableTaskMixin, luigi.Task):
+ srx = luigi.Parameter(description='SRX accession to use')
resources = {'edirect_http_connections': 1}
@@ -101,11 +114,14 @@ class DownloadSraExperimentRunInfo(RerunnableTaskMixin, luigi.Task):
retry_count = 1
def run(self):
+ if self.output().is_stale():
+ logger.info('%s is stale, redownloading...', self.output())
with self.output().open('w') as f:
f.write(retrieve_runinfo(self.srx))
def output(self):
- return luigi.LocalTarget(join(cfg.OUTPUT_DIR, cfg.METADATA, 'sra', '{}.runinfo'.format(self.srx)))
+ return ExpirableLocalTarget(join(cfg.OUTPUT_DIR, cfg.METADATA, 'sra', '{}.runinfo'.format(self.srx)),
+ ttl=timedelta(days=14))
@requires(DownloadSraExperimentRunInfo)
class DownloadSraExperiment(DynamicTaskWithOutputMixin, DynamicWrapperTask):
@@ -118,6 +134,11 @@ class DownloadSraExperiment(DynamicTaskWithOutputMixin, DynamicWrapperTask):
"""
srr = luigi.OptionalParameter(default=None, description='Specific SRA run accession to use (defaults to latest)')
+ force_single_end = luigi.BoolParameter(positional=False, significant=False, default=False,
+ description='Force the library layout to be single-end')
+ force_paired_reads = luigi.BoolParameter(positional=False, significant=False, default=False,
+ description='Force the library layout to be paired')
+
@property
def sample_id(self):
return self.srx
@@ -128,20 +149,27 @@ def platform(self):
def run(self):
# this will raise an error of no FASTQs are related
- df = pd.read_csv(self.input().path)
+ df = read_runinfo(self.input().path)
if self.srr is not None:
run = df[df.Run == self.srr].iloc[0]
else:
run = df.sort_values('Run', ascending=False).iloc[0]
- # layout is very often not annotated correctly and it is best to rely
- # on the number of mates per spot
- is_paired = (self.sample_id in sra_cfg.paired_read_experiments) or (run.spots_with_mates > 0) or (run.LibraryLayout == 'PAIRED')
+ if self.force_paired_reads:
+ is_paired = True
+ elif self.force_single_end:
+ is_paired = False
+ else:
+ is_paired = run.LibraryLayout == 'PAIRED'
- yield DumpSraRun(run.Run, self.srx, paired_reads=is_paired)
+ metadata = dict(self.metadata)
+ # do not override the sample_id when invoked from DownloadGeoSample or DownloadGemmaExperiment
+ if 'sample_id' not in metadata:
+ metadata['sample_id'] = self.sample_id
+ yield DumpSraRun(run.Run, self.srx, paired_reads=is_paired, metadata=metadata)
-class DownloadSraProjectRunInfo(RerunnableTaskMixin, luigi.Task):
+class DownloadSraProjectRunInfo(TaskWithMetadataMixin, RerunnableTaskMixin, luigi.Task):
"""
Download a SRA project
"""
@@ -157,13 +185,17 @@ def run(self):
f.write(retrieve_runinfo(self.srp))
def output(self):
- return luigi.LocalTarget(join(cfg.OUTPUT_DIR, cfg.METADATA, 'sra', '{}.runinfo'.format(self.srp)))
+ return ExpirableLocalTarget(join(cfg.OUTPUT_DIR, cfg.METADATA, 'sra', '{}.runinfo'.format(self.srp)),
+ ttl=timedelta(days=14))
@requires(DownloadSraProjectRunInfo)
class DownloadSraProject(DynamicTaskWithOutputMixin, DynamicWrapperTask):
+ ignored_samples = luigi.ListParameter(default=[], description='Ignored SRX identifiers')
+
def run(self):
- df = pd.read_csv(self.input().path)
- yield [DownloadSraExperiment(experiment) for experiment, runs in df.groupby('Experiment')]
+ df = read_runinfo(self.input().path)
+ yield [DownloadSraExperiment(experiment, metadata=self.metadata) for experiment, runs in
+ df.groupby('Experiment') if experiment not in self.ignored_samples]
@requires(DownloadSraProjectRunInfo, DownloadSraProject)
class ExtractSraProjectBatchInfo(luigi.Task):
@@ -186,4 +218,5 @@ def run(self):
info_out.write('\t'.join([experiment_id, fastq_id, row.Platform, srx_uri, fastq_header]) + '\n')
def output(self):
- return luigi.LocalTarget(join(cfg.OUTPUT_DIR, cfg.BATCHINFODIR, 'sra', '{}.fastq-headers-table'.format(self.srp)))
+ return luigi.LocalTarget(
+ join(cfg.OUTPUT_DIR, cfg.BATCHINFODIR, 'sra', '{}.fastq-headers-table'.format(self.srp)))
diff --git a/rnaseq_pipeline/targets.py b/rnaseq_pipeline/targets.py
index b97543fc..8900805d 100644
--- a/rnaseq_pipeline/targets.py
+++ b/rnaseq_pipeline/targets.py
@@ -1,15 +1,16 @@
-import os
-from os.path import join, exists
+from datetime import timedelta
+from os.path import join, exists, getctime, getmtime
+from time import time
import luigi
-import requests
-from requests.auth import HTTPBasicAuth
+
from .gemma import GemmaApi
class RsemReference(luigi.Target):
"""
Represents the target of rsem-prepare-reference script.
"""
+
def __init__(self, path, taxon):
self.path = path
self.taxon = taxon
@@ -19,9 +20,9 @@ def prefix(self):
return join(self.path, '{}_0'.format(self.taxon))
def exists(self):
- exts = ['chrlist', 'grp', 'idx.fa', 'ng2.idx.fa', 'seq', 'ti', 'transcripts.fa']
+ exts = ['chrlist', 'grp', 'idx.fa', 'n2g.idx.fa', 'seq', 'ti', 'transcripts.fa']
return all(exists(self.prefix + '.' + ext)
- for ext in exts)
+ for ext in exts)
class GemmaDatasetPlatform(luigi.Target):
"""
@@ -41,16 +42,45 @@ def exists(self):
def __repr__(self):
return 'GemmaDatasetPlatform(dataset_short_name={}, platform={})'.format(self.dataset_short_name, self.platform)
-class GemmaDatasetFactor(luigi.Target):
+class GemmaDatasetHasBatch(luigi.Target):
"""
- Represents a batch info factor associated to a Gemma dataset.
+ Check if there is a BatchInformationFetchingEvent event attached
"""
- def __init__(self, dataset_short_name, factor):
+
+ def __init__(self, dataset_short_name):
self.dataset_short_name = dataset_short_name
- self.factor = factor
self._gemma_api = GemmaApi()
def exists(self):
- # all samples must have a batch factor
- return all(self.factor in sample['sample']['factors'].values()
- for sample in self._gemma_api.samples(self.dataset_short_name))
+ return self._gemma_api.dataset_has_batch(self.dataset_short_name)
+
+class ExpirableLocalTarget(luigi.LocalTarget):
+ """
+ A local target that can expire according to a TTL value
+
+ The TTL can either be a timedelta of a float representing the number of
+ seconds past the creation time of the target that it will be considered
+ fresh. Once that delay expired, the target will not be considered as
+ existing.
+
+ By default, change time is used as per os.path.getctime. Use the
+ `use_mtime` parameter to use the modification time instead.
+ """
+
+ def __init__(self, path, ttl, use_mtime=False, format=None):
+ super().__init__(path, format=format)
+ if not isinstance(ttl, timedelta):
+ self._ttl = timedelta(seconds=ttl)
+ else:
+ self._ttl = ttl
+ self._use_mtime = use_mtime
+
+ def is_stale(self):
+ try:
+ creation_time = getmtime(self.path) if self._use_mtime else getctime(self.path)
+ except OSError:
+ return False # file is missing, assume non-stale
+ return creation_time + self._ttl.total_seconds() < time()
+
+ def exists(self):
+ return super().exists() and not self.is_stale()
diff --git a/rnaseq_pipeline/tasks.py b/rnaseq_pipeline/tasks.py
index 3675d281..c86b3b4f 100755
--- a/rnaseq_pipeline/tasks.py
+++ b/rnaseq_pipeline/tasks.py
@@ -1,9 +1,10 @@
import datetime
import logging
import os
-from glob import glob
-from os.path import abspath, join
+import tempfile
import uuid
+from glob import glob
+from os.path import join, dirname
import luigi
import luigi.task
@@ -12,22 +13,23 @@
from bioluigi.scheduled_external_program import ScheduledExternalProgramTask
from bioluigi.tasks import fastqc, multiqc
from bioluigi.tasks.utils import DynamicTaskWithOutputMixin, TaskWithOutputMixin, DynamicWrapperTask
-from luigi.task import flatten, flatten_output, WrapperTask
+from luigi.task import flatten_output, WrapperTask
from luigi.util import requires
from .config import rnaseq_pipeline
+from .gemma import GemmaCliTask, gemma
from .sources.arrayexpress import DownloadArrayExpressSample, DownloadArrayExpressExperiment
from .sources.gemma import DownloadGemmaExperiment
from .sources.geo import DownloadGeoSample, DownloadGeoSeries, ExtractGeoSeriesBatchInfo
from .sources.local import DownloadLocalSample, DownloadLocalExperiment
from .sources.sra import DownloadSraProject, DownloadSraExperiment, ExtractSraProjectBatchInfo
-from .targets import GemmaDatasetPlatform, GemmaDatasetFactor, RsemReference
-from .utils import no_retry, IlluminaFastqHeader, TaskWithPriorityMixin, RerunnableTaskMixin, remove_task_output
-from .gemma import GemmaTask
+from .targets import GemmaDatasetPlatform, GemmaDatasetHasBatch, RsemReference
+from .utils import no_retry, RerunnableTaskMixin, remove_task_output
logger = logging.getLogger('luigi-interface')
cfg = rnaseq_pipeline()
+gemma_cfg = gemma()
class DownloadSample(TaskWithOutputMixin, WrapperTask):
"""
@@ -39,13 +41,16 @@ class DownloadSample(TaskWithOutputMixin, WrapperTask):
experiment_id = luigi.Parameter()
sample_id = luigi.Parameter()
- source = luigi.ChoiceParameter(default='local', choices=['gemma', 'geo', 'arrayexpress', 'local', 'sra'], positional=False)
+ source = luigi.ChoiceParameter(default='local', choices=['gemma', 'geo', 'arrayexpress', 'local', 'sra'],
+ positional=False)
def requires(self):
if self.source in ['geo', 'gemma']:
- return DownloadGeoSample(self.sample_id)
+ return DownloadGeoSample(self.sample_id,
+ metadata=dict(experiment_id=self.experiment_id, sample_id=self.sample_id))
elif self.source == 'sra':
- return DownloadSraExperiment(self.sample_id)
+ return DownloadSraExperiment(self.sample_id,
+ metadata=dict(experiment_id=self.experiment_id, sample_id=self.sample_id))
elif self.source == 'arrayexpress':
return DownloadArrayExpressSample(self.experiment_id, self.sample_id)
elif self.source == 'local':
@@ -53,7 +58,7 @@ def requires(self):
else:
raise ValueError('Unknown source for sample: {}.'.format(self.source))
-class DownloadExperiment(TaskWithPriorityMixin, TaskWithOutputMixin, WrapperTask):
+class DownloadExperiment(TaskWithOutputMixin, WrapperTask):
"""
This is a generic task that detects which kind of experiment is intended to
be downloaded so that downstream tasks can process regardless of the data
@@ -64,7 +69,8 @@ class DownloadExperiment(TaskWithPriorityMixin, TaskWithOutputMixin, WrapperTask
"""
experiment_id = luigi.Parameter()
- source = luigi.ChoiceParameter(default='local', choices=['gemma', 'geo', 'sra', 'arrayexpress', 'local'], positional=False)
+ source = luigi.ChoiceParameter(default='local', choices=['gemma', 'geo', 'sra', 'arrayexpress', 'local'],
+ positional=False)
def requires(self):
if self.source == 'gemma':
@@ -99,47 +105,49 @@ def run(self):
if len(self.input()) == 1:
r1, = self.input()
yield platform.get_trim_single_end_reads_task(
- r1.path,
- join(destdir, os.path.basename(r1.path)),
- minimum_length=self.minimum_length,
- report_file=join(destdir, os.path.basename(r1.path) + '.cutadapt.json'),
- cpus=4)
+ r1.path,
+ join(destdir, os.path.basename(r1.path)),
+ minimum_length=self.minimum_length,
+ report_file=join(destdir, os.path.basename(r1.path) + '.cutadapt.json'),
+ cpus=4)
elif len(self.input()) == 2:
r1, r2 = self.input()
r1, r2 = self.input()
if self.ignore_mate == 'forward':
logger.info('Forward mate is ignored for %s.', repr(self))
yield platform.get_trim_single_end_reads_task(
- r2.path,
- join(destdir, os.path.basename(r2.path)),
- minimum_length=self.minimum_length,
- report_file=join(destdir, os.path.basename(r2.path) + '.cutadapt.json'),
- cpus=4)
+ r2.path,
+ join(destdir, os.path.basename(r2.path)),
+ minimum_length=self.minimum_length,
+ report_file=join(destdir, os.path.basename(r2.path) + '.cutadapt.json'),
+ cpus=4)
elif self.ignore_mate == 'reverse':
logger.info('Reverse mate is ignored for %s.', repr(self))
yield platform.get_trim_single_end_reads_task(
- r1.path,
- join(destdir, os.path.basename(r1.path)),
- minimum_length=self.minimum_length,
- report_file=join(destdir, os.path.basename(r1.path) + '.cutadapt.json'),
- cpus=4)
+ r1.path,
+ join(destdir, os.path.basename(r1.path)),
+ minimum_length=self.minimum_length,
+ report_file=join(destdir, os.path.basename(r1.path) + '.cutadapt.json'),
+ cpus=4)
else:
yield platform.get_trim_paired_reads_task(
- r1.path, r2.path,
- join(destdir, os.path.basename(r1.path)),
- join(destdir, os.path.basename(r2.path)),
- minimum_length=self.minimum_length,
- report_file=join(destdir, os.path.basename(r1.path) + '_' + os.path.basename(r2.path) + '.cutadapt.json'),
- cpus=4)
+ r1.path, r2.path,
+ join(destdir, os.path.basename(r1.path)),
+ join(destdir, os.path.basename(r2.path)),
+ minimum_length=self.minimum_length,
+ report_file=join(destdir,
+ os.path.basename(r1.path) + '_' + os.path.basename(r2.path) + '.cutadapt.json'),
+ cpus=4)
else:
raise NotImplementedError('Trimming more than two mates is not supported.')
-class TrimExperiment(TaskWithPriorityMixin, DynamicTaskWithOutputMixin, DynamicWrapperTask):
+class TrimExperiment(DynamicTaskWithOutputMixin, DynamicWrapperTask):
"""
Quality control all the samples in a given experiment.
"""
experiment_id = luigi.Parameter()
- source = luigi.ChoiceParameter(default='local', choices=['gemma', 'geo', 'sra', 'arrayexpress', 'local'], positional=False)
+ source = luigi.ChoiceParameter(default='local', choices=['gemma', 'geo', 'sra', 'arrayexpress', 'local'],
+ positional=False)
def requires(self):
return DownloadExperiment(self.experiment_id, source=self.source).requires().requires()
@@ -155,17 +163,20 @@ class QualityControlSample(DynamicTaskWithOutputMixin, DynamicWrapperTask):
"""
Perform post-download quality control on the FASTQs.
"""
+
def run(self):
destdir = join(cfg.OUTPUT_DIR, cfg.DATAQCDIR, self.experiment_id, self.sample_id)
os.makedirs(destdir, exist_ok=True)
- yield [fastqc.GenerateReport(fastq_in.path, destdir) for fastq_in in self.input()]
+ yield [fastqc.GenerateReport(fastq_in.path, destdir, temp_dir=tempfile.gettempdir()) for fastq_in in
+ self.input()]
-class QualityControlExperiment(TaskWithPriorityMixin, DynamicTaskWithOutputMixin, DynamicWrapperTask):
+class QualityControlExperiment(DynamicTaskWithOutputMixin, DynamicWrapperTask):
"""
Quality control all the samples in a given experiment.
"""
experiment_id = luigi.Parameter()
- source = luigi.ChoiceParameter(default='local', choices=['gemma', 'geo', 'sra', 'arrayexpress', 'local'], positional=False)
+ source = luigi.ChoiceParameter(default='local', choices=['gemma', 'geo', 'sra', 'arrayexpress', 'local'],
+ positional=False)
def requires(self):
return DownloadExperiment(self.experiment_id, source=self.source).requires().requires()
@@ -185,8 +196,8 @@ class PrepareReference(ScheduledExternalProgramTask):
:param taxon: Taxon
:param reference_id: Reference annotation build to use (i.e. ensembl98, hg38_ncbi)
"""
- taxon = luigi.Parameter(default='human')
- reference_id = luigi.Parameter(default='hg38_ncbi')
+ taxon = luigi.Parameter()
+ reference_id = luigi.Parameter()
cpus = 16
memory = 32
@@ -194,11 +205,12 @@ class PrepareReference(ScheduledExternalProgramTask):
def input(self):
genome_dir = join(cfg.OUTPUT_DIR, cfg.GENOMES, self.reference_id)
gtf_files = glob(join(genome_dir, '*.gtf'))
- fasta_files = glob(join(genome_dir, '*.f*a')) # FIXME: this pattern is too broad
+ fasta_files = glob(join(genome_dir, '*.f*a')) # FIXME: this pattern is too broad
if len(gtf_files) != 1:
raise ValueError('Exactly one GTF file is expected in {}.'.format(genome_dir))
if len(fasta_files) < 1:
- raise ValueError('At least one FASTA (with .fa or .fna extension) file is expected in {}.'.format(genome_dir))
+ raise ValueError(
+ 'At least one FASTA (with .fa or .fna extension) file is expected in {}.'.format(genome_dir))
return [luigi.LocalTarget(gtf_files[0]),
[luigi.LocalTarget(f) for f in fasta_files]]
@@ -244,7 +256,8 @@ class AlignSample(ScheduledExternalProgramTask):
walltime = datetime.timedelta(days=1)
# cleanup unused shared memory objects before and after the task is run
- scheduler_extra_args = ['--task-prolog', abspath(cfg.STAR_CLEANUP_SCRIPT), '--task-epilog', abspath(cfg.STAR_CLEANUP_SCRIPT)]
+ # FIXME: move this into the configuration
+ scheduler_extra_args = ['--gres=scratch:60G']
def run(self):
self.output().makedirs()
@@ -254,7 +267,8 @@ def _get_output_prefix(self):
return join(cfg.OUTPUT_DIR, cfg.ALIGNDIR, self.reference_id, self.experiment_id, self.sample_id)
def program_args(self):
- args = [join(cfg.RSEM_DIR, 'rsem-calculate-expression'), '-p', self.cpus]
+ args = ['scripts/rsem-calculate-expression-wrapper', join(cfg.RSEM_DIR, 'rsem-calculate-expression'), '-p',
+ self.cpus]
args.extend([
'--time',
@@ -278,7 +292,7 @@ def program_args(self):
raise NotImplementedError('Alignment of more than two input FASTQs is not supported.')
# reference for alignments and quantifications
- args.append(join(reference.prefix, '{}_0'.format(self.taxon)))
+ args.append(reference.prefix)
# output prefix
args.append(self._get_output_prefix())
@@ -288,7 +302,7 @@ def program_args(self):
def output(self):
return luigi.LocalTarget(self._get_output_prefix() + f'.{self.scope}.results')
-class AlignExperiment(TaskWithPriorityMixin, DynamicTaskWithOutputMixin, DynamicWrapperTask):
+class AlignExperiment(DynamicTaskWithOutputMixin, DynamicWrapperTask):
"""
Align all the samples in a given experiment.
@@ -296,9 +310,10 @@ class AlignExperiment(TaskWithPriorityMixin, DynamicTaskWithOutputMixin, Dynamic
experiment.
"""
experiment_id = luigi.Parameter()
- source = luigi.ChoiceParameter(default='local', choices=['gemma', 'geo', 'sra', 'arrayexpress', 'local'], positional=False)
- taxon = luigi.Parameter(default='human', positional=False)
- reference_id = luigi.Parameter(default='hg38_ncbi', positional=False)
+ source = luigi.ChoiceParameter(default='local', choices=['gemma', 'geo', 'sra', 'arrayexpress', 'local'],
+ positional=False)
+ taxon = luigi.Parameter(positional=False)
+ reference_id = luigi.Parameter(positional=False)
scope = luigi.Parameter(default='genes', positional=False)
def requires(self):
@@ -316,7 +331,7 @@ def run(self):
@no_retry
@requires(TrimExperiment, QualityControlExperiment, AlignExperiment)
-class GenerateReportForExperiment(TaskWithPriorityMixin, RerunnableTaskMixin, luigi.Task):
+class GenerateReportForExperiment(RerunnableTaskMixin, luigi.Task):
"""
Generate a summary report for an experiment with MultiQC.
@@ -324,18 +339,42 @@ class GenerateReportForExperiment(TaskWithPriorityMixin, RerunnableTaskMixin, lu
"""
def run(self):
+ fastqc_dir = join(cfg.OUTPUT_DIR, cfg.DATAQCDIR, self.experiment_id)
search_dirs = [
join(cfg.OUTPUT_DIR, 'data-trimmed', self.experiment_id),
- join(cfg.OUTPUT_DIR, cfg.DATAQCDIR, self.experiment_id),
+ fastqc_dir,
join(cfg.OUTPUT_DIR, cfg.ALIGNDIR, self.reference_id, self.experiment_id)]
self.output().makedirs()
- yield multiqc.GenerateReport(search_dirs, os.path.dirname(self.output().path), force=self.rerun)
+
+ # generate sample mapping for FastQC files
+ fastqc_suffix = '_fastqc.zip'
+ sample_names_file = join(cfg.OUTPUT_DIR, 'report', self.reference_id, self.experiment_id, 'sample_names.tsv')
+ with open(sample_names_file, 'w') as out:
+ for root, dirs, files in os.walk(fastqc_dir):
+ for f in files:
+ if f.endswith(fastqc_suffix):
+ fastqc_sample_id = f[:-len(fastqc_suffix)]
+ sample_id = os.path.basename(root)
+ # To avoid sample name clashes for paired-read
+ # sequencing, we need to add a suffix to the sample ID
+ # In single-end sequencing, fastq-dump does not
+ # produces _1, _2 suffixes, so the FastQC metrics will
+ # appear in the same row
+ if fastqc_sample_id.endswith('_1'):
+ sample_id += '_1'
+ elif fastqc_sample_id.endswith('_2'):
+ sample_id += '_2'
+ out.write(f'{fastqc_sample_id}\t{sample_id}\n')
+
+ yield multiqc.GenerateReport(search_dirs, dirname(self.output().path), replace_names=sample_names_file,
+ force=self.rerun)
def output(self):
- return luigi.LocalTarget(join(cfg.OUTPUT_DIR, 'report', self.reference_id, self.experiment_id, 'multiqc_report.html'))
+ return luigi.LocalTarget(
+ join(cfg.OUTPUT_DIR, 'report', self.reference_id, self.experiment_id, 'multiqc_report.html'))
@requires(AlignExperiment)
-class CountExperiment(TaskWithPriorityMixin, luigi.Task):
+class CountExperiment(luigi.Task):
"""
Combine the RSEM quantifications results from all the samples in a given
experiment.
@@ -349,8 +388,10 @@ def run(self):
# Each DownloadSample-like tasks have a sample_id property! Use that!
keys = [os.path.basename(f.path).replace(f'.{self.scope}.results', '') for f in self.input()]
- counts_buffer = pd.concat([pd.read_csv(f.path, sep='\t', index_col=0).expected_count for f in self.input()], keys=keys, axis=1).to_csv(sep='\t')
- fpkm_buffer = pd.concat([pd.read_csv(f.path, sep='\t', index_col=0).FPKM for f in self.input()], keys=keys, axis=1).to_csv(sep='\t')
+ counts_buffer = pd.concat([pd.read_csv(f.path, sep='\t', index_col=0).expected_count for f in self.input()],
+ keys=keys, axis=1).to_csv(sep='\t')
+ fpkm_buffer = pd.concat([pd.read_csv(f.path, sep='\t', index_col=0).FPKM for f in self.input()], keys=keys,
+ axis=1).to_csv(sep='\t')
with self.output()[0].open('w') as counts_out, self.output()[1].open('w') as fpkm_out:
counts_out.write(counts_buffer)
@@ -361,7 +402,7 @@ def output(self):
return [luigi.LocalTarget(join(destdir, f'{self.experiment_id}_counts.{self.scope}')),
luigi.LocalTarget(join(destdir, f'{self.experiment_id}_fpkm.{self.scope}'))]
-class SubmitExperimentBatchInfoToGemma(TaskWithPriorityMixin, GemmaTask):
+class SubmitExperimentBatchInfoToGemma(RerunnableTaskMixin, GemmaCliTask):
"""
Submit the batch information of an experiment to Gemma.
"""
@@ -370,24 +411,26 @@ class SubmitExperimentBatchInfoToGemma(TaskWithPriorityMixin, GemmaTask):
resources = {'submit_batch_info_jobs': 1}
+ ignored_samples = luigi.ListParameter(default=[])
+
def requires(self):
# TODO: Have a generic strategy for extracting batch info that would
# work for all sources
if self.external_database == 'GEO':
- return ExtractGeoSeriesBatchInfo(self.accession)
+ return ExtractGeoSeriesBatchInfo(self.accession, metadata=dict(experiment_id=self.experiment_id),
+ ignored_samples=self.ignored_samples)
elif self.external_database == 'SRA':
- return ExtractSraProjectBatchInfo(self.accession)
+ return ExtractSraProjectBatchInfo(self.accession, metadata=dict(experiment_id=self.experiment_id),
+ ignored_samples=self.ignored_samples)
else:
- raise NotImplementedError('Extracting batch information from {} is not supported.'.format(self.external_database))
-
- def subcommand_args(self):
- return ['-f', self.input().path]
+ raise NotImplementedError(
+ 'Extracting batch information from {} is not supported.'.format(self.external_database))
def output(self):
- return GemmaDatasetFactor(self.experiment_id, 'batch')
+ return GemmaDatasetHasBatch(self.experiment_id)
@no_retry
-class SubmitExperimentDataToGemma(TaskWithPriorityMixin, RerunnableTaskMixin, GemmaTask):
+class SubmitExperimentDataToGemma(RerunnableTaskMixin, GemmaCliTask):
"""
Submit an experiment to Gemma.
@@ -418,26 +461,46 @@ def subcommand_args(self):
def output(self):
return GemmaDatasetPlatform(self.experiment_id, self.platform_short_name)
-class SubmitExperimentReportToGemma(TaskWithPriorityMixin, WrapperTask, GemmaTask):
+class SubmitExperimentReportToGemma(RerunnableTaskMixin, GemmaCliTask):
"""
Submit an experiment QC report to Gemma.
-
- TODO: This is not yet fully implemented, so only a report is being generated.
"""
+ experiment_id = luigi.Parameter()
+
+ subcommand = 'addMetadataFile'
+
def requires(self):
return GenerateReportForExperiment(self.experiment_id,
taxon=self.taxon,
reference_id=self.reference_id,
- source='gemma')
+ source='gemma',
+ rerun=self.rerun)
+
+ def subcommand_args(self):
+ return ['-e', self.experiment_id, '--file-type', 'MULTIQC_REPORT', '--changelog-entry',
+ 'Adding MultiQC report generated by the RNA-Seq pipeline', self.input().path]
+
+ def output(self):
+ return luigi.LocalTarget(
+ join(gemma_cfg.appdata_dir, 'metadata', self.experiment_id, 'MultiQCReports/multiqc_report.html'))
@requires(SubmitExperimentDataToGemma, SubmitExperimentBatchInfoToGemma, SubmitExperimentReportToGemma)
-class SubmitExperimentToGemma(TaskWithPriorityMixin, TaskWithOutputMixin, WrapperTask):
+class SubmitExperimentToGemma(TaskWithOutputMixin, WrapperTask):
"""
Submit an experiment data, QC reports, and batch information to Gemma.
TODO: add QC report submission
"""
+ # Makes it so that we recheck if the task is complete after 20 minutes.
+ # This is because Gemma Web API is caching reply for 1200 seconds, so the
+ # batch factor will not appear until until the query is evicted.
+ # See https://github.com/PavlidisLab/rnaseq-pipeline/issues/76 for details
+ retry_count = 1
+ retry_delay = 1200
+
+ priority = luigi.IntParameter(default=100, positional=False, significant=False)
+
def _targets_to_remove(self):
outs = []
# original data
@@ -452,7 +515,9 @@ def _targets_to_remove(self):
def on_success(self):
# report success to curators
if cfg.SLACK_WEBHOOK_URL is not None:
- payload = {'text': ' data and batch information have been successfully submitted to Gemma.'.format(self.experiment_id)}
+ payload = {
+ 'text': ' data and batch information have been successfully submitted to Gemma.'.format(
+ self.experiment_id)}
requests.post(cfg.SLACK_WEBHOOK_URL, json=payload)
return super().on_success()
@@ -473,25 +538,44 @@ def complete(self):
return super().complete() and all(not out.exists() for out in self._targets_to_remove())
class SubmitExperimentsFromDataFrameMixin:
- ignore_priority = luigi.BoolParameter(positional=False, significant=False, description='Ignore the priority column and use 100 everywhere as priority')
+ ignore_priority = luigi.BoolParameter(positional=False, significant=False,
+ description='Ignore the priority column and inherit the priority of the this task. Rows with zero priority are nonetheless ignored.')
+
def requires(self):
df = self._retrieve_dataframe()
- return [SubmitExperimentToGemma(row.experiment_id, priority=100 if self.ignore_priority else row.get('priority', 0), rerun=row['data']=='resubmit')
- for _, row in df.iterrows() if row.get('priority', 0) > 0]
+ # using None, the worker will inherit the priority from this task for all its dependencies
+ try:
+ return [SubmitExperimentToGemma(row.experiment_id,
+ priority=100 if self.ignore_priority else row.get('priority', 100),
+ rerun=row.get('data') == 'resubmit')
+ for _, row in df.iterrows() if row.get('priority', 1) > 0]
+ except AttributeError as e:
+ raise Exception(f'Failed to read experiments from {self._filename()}, is it valid?') from e
class SubmitExperimentsFromFileToGemma(SubmitExperimentsFromDataFrameMixin, TaskWithOutputMixin, WrapperTask):
input_file = luigi.Parameter()
+
+ def _filename(self):
+ return self.input_file
+
def _retrieve_dataframe(self):
return pd.read_csv(self.input_file, sep='\t', converters={'priority': lambda x: 0 if x == '' else int(x)})
class SubmitExperimentsFromGoogleSpreadsheetToGemma(SubmitExperimentsFromDataFrameMixin, WrapperTask):
- spreadsheet_id = luigi.Parameter(description='Spreadsheet ID in Google Sheets (lookup {spreadsheetId} in https://docs.google.com/spreadsheets/d/{spreadsheetId}/edit)')
+ spreadsheet_id = luigi.Parameter(
+ description='Spreadsheet ID in Google Sheets (lookup {spreadsheetId} in https://docs.google.com/spreadsheets/d/{spreadsheetId}/edit)')
sheet_name = luigi.Parameter(description='Name of the spreadsheet in the document')
+
+ def _filename(self):
+ return 'https://docs.google.com/spreadsheets/d/' + self.spreadsheet_id
+
# TODO: use the spreadsheet revision ID
# For now, all that does is distinguishing spreadsheet tasks which might
# refer to different revisions, which in turn allows newly added tasks to
# be executed
- revision_id = luigi.Parameter(default=str(uuid.uuid4()), description='Revision ID of the spreadsheet (not yet supported, but will default to the latest)')
+ revision_id = luigi.Parameter(default=str(uuid.uuid4()),
+ description='Revision ID of the spreadsheet (not yet supported, but will default to the latest)')
+
def _retrieve_dataframe(self):
from .gsheet import retrieve_spreadsheet
return retrieve_spreadsheet(self.spreadsheet_id, self.sheet_name)
diff --git a/rnaseq_pipeline/utils.py b/rnaseq_pipeline/utils.py
index b0c09a60..4a673683 100644
--- a/rnaseq_pipeline/utils.py
+++ b/rnaseq_pipeline/utils.py
@@ -1,9 +1,7 @@
import logging
-import uuid
import luigi
from luigi.task import flatten_output
-from luigi.parameter import ParameterVisibility
logger = logging.getLogger('luigi-interface')
@@ -42,17 +40,15 @@ def max_retry(count):
Set the maximum number of time a task can be retried before being disabled
as per Luigi retry policy.
"""
+
def wrapper(cls):
cls.retry_count = count
return cls
+
return wrapper
no_retry = max_retry(0)
-class TaskWithPriorityMixin:
- """Mixin that adds a --priority flag to a given task."""
- priority = luigi.IntParameter(default=0, positional=False, significant=False)
-
class RerunnableTaskMixin:
"""
Mixin for a task that can be rerun regardless of its completion status.
@@ -74,6 +70,7 @@ def complete(self):
class CheckAfterCompleteMixin:
"""Ensures that a task is completed after a successful run()."""
+
def run(self):
ret = super().run()
if not self.complete():
diff --git a/rnaseq_pipeline/webviewer/__init__.py b/rnaseq_pipeline/webviewer/__init__.py
index b8e36d5b..3d92fc0a 100644
--- a/rnaseq_pipeline/webviewer/__init__.py
+++ b/rnaseq_pipeline/webviewer/__init__.py
@@ -1,12 +1,16 @@
-from os.path import basename
+from os.path import basename, getctime, join, dirname
+import datetime
+from glob import glob
+from os.path import basename, getctime, join, dirname
import luigi
-from flask import Flask, send_file, render_template, url_for, request, abort
import pandas as pd
+from flask import Flask, send_file, render_template, abort
from rnaseq_pipeline.config import rnaseq_pipeline
-from rnaseq_pipeline.tasks import GenerateReportForExperiment, CountExperiment, ExtractGeoSeriesBatchInfo, SubmitExperimentDataToGemma, SubmitExperimentBatchInfoToGemma
-from rnaseq_pipeline.gemma import GemmaTask
+from rnaseq_pipeline.gemma import GemmaTaskMixin
+from rnaseq_pipeline.tasks import GenerateReportForExperiment, CountExperiment, SubmitExperimentDataToGemma, \
+ SubmitExperimentBatchInfoToGemma
app = Flask('rnaseq_pipeline.webviewer')
@@ -14,6 +18,9 @@
references = ['hg38_ncbi', 'mm10_ncbi', 'm6_ncbi']
+class FakeGemmaTask(GemmaTaskMixin, luigi.Task):
+ pass
+
@app.errorhandler(400)
def bad_request(e):
return render_template('400.html', e=e), 400
@@ -22,6 +29,14 @@ def bad_request(e):
def not_found(e):
return render_template('404.html', e=e), 404
+@app.route('/')
+def home():
+ report_dir = join(cfg.OUTPUT_DIR, 'report')
+ latest_experiments = [(basename(path), basename(dirname(path)),
+ datetime.datetime.now() - datetime.datetime.fromtimestamp(getctime(path))) for path in
+ sorted(glob(join(report_dir, '*', '*')), key=lambda path: -getctime(path))]
+ return render_template('index.html', latest_experiments=latest_experiments[:10])
+
@app.route('/experiment/')
def experiment_summary(experiment_id):
try:
@@ -32,14 +47,16 @@ def experiment_summary(experiment_id):
submit_batch_info_task = SubmitExperimentBatchInfoToGemma(experiment_id)
ebi_task = submit_batch_info_task.requires()
if ebi_task.complete():
- batch_info = pd.read_csv(ebi_task.output().path, sep='\t', names=['geo_sample_id', 'sra_run_id', 'geo_platform_id', 'sra_experiment_url', 'fastq_header'])
+ batch_info = pd.read_csv(ebi_task.output().path, sep='\t',
+ names=['geo_sample_id', 'sra_run_id', 'geo_platform_id', 'sra_experiment_url',
+ 'fastq_header'])
else:
batch_info = None
return render_template('experiment-summary.html',
- experiment_id=experiment_id, batch_info=batch_info,
- submit_data_task=submit_data_task,
- submit_batch_info_task=submit_batch_info_task)
+ experiment_id=experiment_id, batch_info=batch_info,
+ submit_data_task=submit_data_task,
+ submit_batch_info_task=submit_batch_info_task)
@app.route('/experiment//batch-info')
def experiment_batch_info(experiment_id):
@@ -54,13 +71,18 @@ def experiment_batch_info(experiment_id):
@app.route('/experiment//by-reference-id//quantifications/')
def experiment_quantifications(experiment_id, mode, reference_id=None):
if reference_id is None:
- gemma_task = GemmaTask(experiment_id)
+ gemma_task = FakeGemmaTask(experiment_id)
reference_id = gemma_task.reference_id
+ taxon = gemma_task.taxon
+ source = 'gemma'
+ else:
+ taxon = 'human'
+ source = 'local'
try:
mode_ix = ['counts', 'fpkm'].index(mode)
except ValueError:
abort(400, f'Unknown mode {mode} for quantifications, try either counts or fpkm.')
- count_experiment_task = CountExperiment(experiment_id, reference_id=reference_id)
+ count_experiment_task = CountExperiment(experiment_id, reference_id=reference_id, taxon=taxon, source=source)
if not count_experiment_task.complete():
abort(404, f'No quantifications available for {experiment_id} in {reference_id}.')
file_path = count_experiment_task.output()[mode_ix].path
@@ -70,9 +92,15 @@ def experiment_quantifications(experiment_id, mode, reference_id=None):
@app.route('/experiment//by-reference-id//report')
def experiment_report(experiment_id, reference_id=None):
if reference_id is None:
- gemma_task = GemmaTask(experiment_id)
+ gemma_task = FakeGemmaTask(experiment_id)
reference_id = gemma_task.reference_id
- generate_report_task = GenerateReportForExperiment(experiment_id, reference_id=reference_id)
+ taxon = gemma_task.taxon
+ source = 'gemma'
+ else:
+ taxon = 'human'
+ source = 'local'
+ generate_report_task = GenerateReportForExperiment(experiment_id, reference_id=reference_id, taxon=taxon,
+ source=source)
if not generate_report_task.complete():
abort(404, f'No report available for {experiment_id} in {reference_id}.')
return send_file(generate_report_task.output().path)
diff --git a/rnaseq_pipeline/webviewer/templates/experiment-summary.html b/rnaseq_pipeline/webviewer/templates/experiment-summary.html
index 60370bcb..38757f1c 100644
--- a/rnaseq_pipeline/webviewer/templates/experiment-summary.html
+++ b/rnaseq_pipeline/webviewer/templates/experiment-summary.html
@@ -20,29 +20,29 @@ {{ experiment_id }}
- Gemma Dataset:
- - {{ submit_data_task.dataset_short_name }}
+ - {{ submit_data_task.dataset_short_name }}
- External Dataset:
- - {{ submit_data_task.accession }} on {{ submit_data_task.external_database }}
+ - {{ submit_data_task.accession }} on {{ submit_data_task.external_database }}
- Taxon:
- {{ submit_data_task.taxon }}
- Platform:
- - {{ submit_data_task.platform_short_name }}
+ - {{ submit_data_task.platform_short_name }}
- Reference:
- {{ submit_data_task.reference_id }}
- Quality Report:
- - MultiQC Report
+ - MultiQC Report
Quantification Matrices
- Genes
-
- Counts,
- FPKM
+ Counts,
+ FPKM
- Isoforms
- Nothing, yet.
diff --git a/rnaseq_pipeline/webviewer/templates/index.html b/rnaseq_pipeline/webviewer/templates/index.html
new file mode 100644
index 00000000..f38af208
--- /dev/null
+++ b/rnaseq_pipeline/webviewer/templates/index.html
@@ -0,0 +1,54 @@
+
+
+
+
+
+
+ Pavlidis Lab RNA-Seq Pipeline
+
+
+
+
+
Pavlidis Lab RNA-Seq Pipeline
+
There isn't much at this time here, but you can take a look at the following links:
+
+
Latest Reports
+
+ {% for id, reference_id, t in latest_experiments %}
+ - {{ id }} {{ t }} ago
+ {% endfor %}
+
+
Endpoints
+
+ - /experiment/{experimentId}
+ -
+ Summary
+ (example)
+
+ - /experiment/{experimentId}/batch-info
+ -
+ Batch Information
+ (example)
+
+ - /experiment/{experimentId}/quantifications/counts
+ -
+ Quantifications (counts)
+ (example)
+
+ - /experiment/{experimentId}/quantifications/fpkm
+ -
+ Quantifications (FPKM)
+ (example)
+
+ - /experiment/{experimentId}/report
+ -
+ MultiQC Report
+ (example)
+
+
+
+
+
diff --git a/scripts/Makefile b/scripts/Makefile
index 23b7eb21..2dd956bd 100644
--- a/scripts/Makefile
+++ b/scripts/Makefile
@@ -1,2 +1,14 @@
+DESTDIR :=
+PREFIX := /usr
+
clean-unused-shm-objects: clean-unused-shm-objects.c
- gcc -o $@ $<
+ $(CC) -o $@ $<
+
+install:
+ mkdir -p "${DESTDIR}${PREFIX}/bin"
+ install clean-unused-shm-objects "${DESTDIR}${PREFIX}/bin"
+
+clean:
+ rm clean-unused-shm-objects
+
+.PHONY: install clean
diff --git a/scripts/forgive-and-reenable b/scripts/forgive-and-reenable
new file mode 100755
index 00000000..41f4585d
--- /dev/null
+++ b/scripts/forgive-and-reenable
@@ -0,0 +1,17 @@
+#!/bin/sh
+
+#
+#
+#
+
+for task_id in $(bioluigi list --status DISABLED "$1" | cut -f 1); do
+ bioluigi reenable "$task_id" &
+done
+
+wait
+
+for task_id in $(bioluigi list --status FAILED "$1" | cut -f 1); do
+ bioluigi forgive "$task_id" &
+done
+
+wait
diff --git a/scripts/generate-fish-completion b/scripts/generate-fish-completion
new file mode 100755
index 00000000..4c097f12
--- /dev/null
+++ b/scripts/generate-fish-completion
@@ -0,0 +1,70 @@
+#!/usr/bin/env python3
+
+import subprocess
+from shlex import quote
+import argparse
+
+parser = argparse.ArgumentParser()
+parser.add_argument('--executable', required=False, default='luigi')
+parser.add_argument('--module', required=False)
+args = parser.parse_args()
+
+luigi_executable = args.executable
+module = args.module
+
+# erase previous completion
+print('complete -c ' + quote(luigi_executable) + ' -e')
+print('complete -c ' + quote(luigi_executable) + ' -f')
+
+def generate_completion(lines, command=None):
+ opt = None
+ hasarg = False
+ desc = ''
+ def generate_option():
+ nonlocal opt, hasarg, desc
+ print('complete -c ' + quote(luigi_executable) + (' -n ' + quote('__fish_seen_subcommand_from ' + command) if command else '') + ' -f -l ' + quote(opt) + (' -r' if hasarg else '') + (' -d ' + quote(desc) if desc else ''))
+ opt = None
+ hasarg = False
+ desc = ''
+ for line in lines:
+ line = line.strip()
+ if not line:
+ continue
+ if line.startswith('--'):
+ if opt:
+ generate_option()
+ pieces = line.split(maxsplit=2)
+ opt = pieces[0][2:]
+ if len(pieces) > 1:
+ # if it's all upper, it's an argument name
+ if pieces[1] == pieces[1].upper():
+ hasarg = True
+ if len(pieces) == 3:
+ desc = pieces[2]
+ else:
+ hasarg = False
+ desc = ' '.join(pieces[1:])
+ else:
+ desc = ''
+ hasarg = False
+ elif opt:
+ if desc:
+ desc += ' ' + line
+ else:
+ desc = line
+ if opt:
+ generate_option()
+
+# basic help, no module
+proc = subprocess.run([luigi_executable, '--help-all'], capture_output=True, text=True)
+generate_completion(proc.stdout.splitlines())
+
+if module:
+ proc = subprocess.run([luigi_executable, '--module', module, 'dummy'], capture_output=True, text=True)
+ _, commands = proc.stderr.strip().split('Candidates are: ', maxsplit=2)
+ commands = commands.split(',')
+ commands = [c for c in commands if c.startswith(module)]
+ print('complete -c ' + quote(luigi_executable) + ' -n ' + quote('not __fish_seen_subcommand_from ' + ' '.join(commands)) + ' -a ' + quote(' '.join(commands)))
+ for command in commands:
+ proc = subprocess.run([luigi_executable, '--module', module, command, '--help'], capture_output=True, text=True)
+ generate_completion(proc.stdout.splitlines(), command=command)
diff --git a/scripts/luigi-wrapper b/scripts/luigi-wrapper
index 2e968a78..36396613 100755
--- a/scripts/luigi-wrapper
+++ b/scripts/luigi-wrapper
@@ -2,4 +2,4 @@
umask 002
-luigi --module rnaseq_pipeline.tasks $@
+exec luigi --module rnaseq_pipeline.tasks "$@"
diff --git a/scripts/map-gene-ids b/scripts/map-gene-ids
new file mode 100755
index 00000000..51209c0f
--- /dev/null
+++ b/scripts/map-gene-ids
@@ -0,0 +1,41 @@
+#!/usr/bin/env python
+
+
+# This script remaps incorrect gene IDs from some of our references
+
+import gzip
+import re
+import sys
+
+gene_id_pattern = re.compile('gene_id "(.+?)"')
+dbxref_pattern = re.compile('db_xref "GeneID:(.+?)"')
+
+def main(args):
+ if len(args) != 2:
+ return 1
+ broken_gtf, gene_mapping = args
+ seen_broken_gene_ids = set()
+ broken_ids_without_fix = set()
+ with open(broken_gtf) as bo, open(gene_mapping, 'w') as gmo:
+ for line in bo:
+ if line[0] == '#':
+ continue
+ broken_gene_id = gene_id_pattern.findall(line)
+ fixed_gene_id = dbxref_pattern.findall(line)
+ if broken_gene_id and fixed_gene_id:
+ broken_gene_id = broken_gene_id[0]
+ fixed_gene_id = fixed_gene_id[0]
+ if broken_gene_id != fixed_gene_id:
+ if broken_gene_id in seen_broken_gene_ids:
+ continue
+ seen_broken_gene_ids.add(broken_gene_id)
+ gmo.write(broken_gene_id + '\t' + fixed_gene_id + '\n')
+ elif broken_gene_id: # but no fixed ID
+ broken_ids_without_fix.add(broken_gene_id[0])
+
+ print(f'Generated a mapping for {len(seen_broken_gene_ids)} broken IDs')
+ if broken_ids_without_fix:
+ print('The following broken IDs do not have fixed IDs:', ', '.join(sorted(broken_ids_without_fix)))
+
+if __name__ == '__main__':
+ sys.exit(main(sys.argv[1:]))
diff --git a/scripts/prepare-ncbi-gtf-for-gemma b/scripts/prepare-ncbi-gtf-for-gemma
new file mode 100755
index 00000000..e2fe9a3a
--- /dev/null
+++ b/scripts/prepare-ncbi-gtf-for-gemma
@@ -0,0 +1,44 @@
+#!/usr/bin/env python
+
+# This script renames gene_id with the NCBI numerical gene ID
+
+import argparse
+import re
+import gzip
+import urllib.request
+
+gene_id_pattern = re.compile('gene_id "(.*?)"')
+transcript_id_pattern = re.compile('transcript_id "(.*?)"')
+dbxref_ncbi_gene_id_pattern = re.compile('db_xref "GeneID:(.*?)"')
+
+parser = argparse.ArgumentParser()
+parser.add_argument('--gtf', required=True)
+parser.add_argument('--output-gtf', required=True)
+parser.add_argument('--output-gene-ids', required=True)
+args = parser.parse_args()
+
+converted_lines = 0
+seen_gene_ids = set()
+ncbi_id_by_gene_id = {}
+with gzip.open(args.gtf, 'rt') as f, open(args.output_gtf, 'w') as fout:
+ for lineno, line in enumerate(f):
+ gene_id = gene_id_pattern.findall(line)
+ transcript_id = transcript_id_pattern.findall(line)
+ ncbi_id = dbxref_ncbi_gene_id_pattern.findall(line)
+ if gene_id:
+ gene_id = gene_id[0]
+ if ncbi_id:
+ ncbi_id = int(ncbi_id[0])
+ ncbi_id_by_gene_id[gene_id] = ncbi_id
+ else:
+ print(f'The following line [{lineno}] does not have a GeneID db_xref record, retrieving it from a previously seen record:')
+ print(line)
+ ncbi_id = ncbi_id_by_gene_id[gene_id]
+ if transcript_id and transcript_id[0] != '':
+ seen_gene_ids.add(ncbi_id)
+ line = gene_id_pattern.sub(f'gene_id "{ncbi_id}"', line, count=1)
+ converted_lines += 1
+ fout.write(line)
+
+with open(args.output_gene_ids, 'w') as f:
+ f.write('\n'.join(map(str, sorted(seen_gene_ids))) + '\n')
diff --git a/scripts/purge-problematic-sra-data b/scripts/purge-problematic-sra-data
new file mode 100755
index 00000000..a0dd34e5
--- /dev/null
+++ b/scripts/purge-problematic-sra-data
@@ -0,0 +1,29 @@
+#!/bin/sh
+
+# Remove runinfo, prefetched SRA records and dumped FASTQs from failed SRA
+# experiments
+#
+# Preferably run this on carl, there's a lot of I/O operations involved.
+
+NCBI_CACHE_DIR=/cosmos/scratch/ncbi
+PIPELINE_DIR=/cosmos/data/pipeline-output/rnaseq
+
+for task_id in $(bioluigi list --status DISABLED rnaseq_pipeline.sources.sra.DumpSraRun | cut -f 1); do
+ srx=$(echo $task_id | cut -f 4 -d _)
+ srr=$(echo $task_id | cut -f 3 -d _)
+ # at some point, we made paired_reads a significant parameter...
+ if [ "$srr" == True ] || [ "$srr" == False ]; then
+ srx=$(echo $task_id | cut -f 5 -d _)
+ srr=$(echo $task_id | cut -f 4 -d _)
+ fi
+ echo "Removing $NCBI_CACHE_DIR/public/sra/$srr.sra..."
+ rm -f "$NCBI_CACHE_DIR/public/sra/$srr.sra"
+ echo "Removing $PIPELINE_DIR/metadata/sra/$srx.runinfo..."
+ rm -f "$PIPELINE_DIR/metadata/sra/$srx.runinfo"
+ echo Removing $PIPELINE_DIR/data/sra/$srx/$srr*.fastq.gz
+ rm -f $PIPELINE_DIR/data/sra/$srx/$srr*.fastq.gz
+ # refresh the task (dependencies that were previously done will be reverified and marked as pending)
+ luigi-wrapper rnaseq_pipeline.sources.sra.DumpSraRun --srx "$srx" --srr "$srr" --workers 0
+ # re-enable and forgive so it can be run immediatly
+ bioluigi reenable --forgive "$task_id"
+done
diff --git a/scripts/remap-data-matrix b/scripts/remap-data-matrix
new file mode 100755
index 00000000..a075032e
--- /dev/null
+++ b/scripts/remap-data-matrix
@@ -0,0 +1,30 @@
+#!/usr/bin/env python
+
+import sys
+from glob import glob
+from os.path import basename, join
+
+remapping_file = sys.argv[1]
+reference_id = sys.argv[2]
+
+remapping = {}
+with open(remapping_file) as f:
+ for line in f:
+ a, b = line.strip().split('\t')
+ remapping[a] = b
+
+for expression_data_file in glob(join('pipeline-output/quantified', reference_id, "*.genes")):
+ name = basename(expression_data_file)
+ output_expression_data_file = join('pipeline-output/quantified', reference_id + "_fixed", name)
+ print(name)
+ with open(expression_data_file) as f, open(output_expression_data_file, 'w') as fout:
+ header = True
+ for line in f:
+ if header:
+ fout.write(line)
+ header = False
+ continue
+ gene_id, rest = line.split('\t', maxsplit=1)
+ if gene_id in remapping:
+ gene_id = remapping[gene_id]
+ fout.write(gene_id + '\t' + rest)
diff --git a/scripts/remove-old-data b/scripts/remove-old-data
new file mode 100755
index 00000000..b9a9f7c9
--- /dev/null
+++ b/scripts/remove-old-data
@@ -0,0 +1,6 @@
+#!/bin/sh
+
+output_dir=/cosmos/data/pipeline-output/rnaseq
+
+find "$output_dir/data/sra" -mtime +30 -delete -print
+find "$output_dir/data-trimmed" -mtime +30 -delete -print
diff --git a/scripts/rsem-calculate-expression-wrapper b/scripts/rsem-calculate-expression-wrapper
new file mode 100755
index 00000000..7f3e90f6
--- /dev/null
+++ b/scripts/rsem-calculate-expression-wrapper
@@ -0,0 +1,10 @@
+#!/bin/sh
+
+set -e
+
+echo "Removing stale temporary directories..."
+find "$TMPDIR/rsem-*" -ctime +0 -delete || echo "No temporary directory were removed."
+echo "Removing unused shared memory objects..."
+scripts/clean-unused-shm-objects || echo "Failed to remove unused shared memory objects."
+
+exec "$@"
diff --git a/scripts/submit-experiment b/scripts/submit-experiment
new file mode 100755
index 00000000..20390d92
--- /dev/null
+++ b/scripts/submit-experiment
@@ -0,0 +1,42 @@
+#!/usr/bin/env python
+
+import argparse
+import sys
+import os
+from contextlib import contextmanager
+
+import luigi
+
+from rnaseq_pipeline.tasks import SubmitExperimentToGemma
+
+@contextmanager
+def umask(umask):
+ print(f'Setting umask to 0x{umask:03o}')
+ prev_umask = os.umask(umask)
+ try:
+ yield None
+ finally:
+ print(f'Restoring umask to 0x{prev_umask:03o}')
+ os.umask(prev_umask)
+
+def parse_octal(s):
+ return int(s, 8)
+
+def main(argv):
+ parser = argparse.ArgumentParser()
+ parser.add_argument('--experiment-id', required=True, help='Experiment ID to submit to Gemma')
+ parser.add_argument('--resubmit-batch-info', action='store_true', help='Only resubmit batch information')
+ parser.add_argument('--umask', type=parse_octal, default='002', help='Set a umask (defaults to 002 to make created files group-writable)')
+ parser.add_argument('--workers', type=int, default=30, help='Number of workers to use (defaults to 30)')
+ parser.add_argument('--local-scheduler', action='store_true', default=False)
+ args = parser.parse_args(argv)
+ with umask(args.umask):
+ if args.resubmit_batch_info:
+ task = SubmitExperimentBatchInfoToGemma(experiment_id=args.experiment_id, rerun=True)
+ else:
+ task = SubmitExperimentToGemma(experiment_id=args.experiment_id)
+ results = luigi.build([task], workers=args.workers, detailed_summary=True, local_scheduler=args.local_scheduler)
+ print(results.summary_text)
+
+if __name__ == '__main__':
+ sys.exit(main(sys.argv[1:]))
diff --git a/scripts/submit-experiments-from-gsheet b/scripts/submit-experiments-from-gsheet
index c1b235ce..6786660f 100755
--- a/scripts/submit-experiments-from-gsheet
+++ b/scripts/submit-experiments-from-gsheet
@@ -24,14 +24,15 @@ def parse_octal(s):
def main(argv):
parser = argparse.ArgumentParser()
- parser.add_argument('--spreadsheet-id')
- parser.add_argument('--sheet-name')
- parser.add_argument('--umask', type=parse_octal, default='002')
- parser.add_argument('--workers', type=int, default=100)
- parser.add_argument('--ignore-priority', action='store_true')
+ parser.add_argument('--spreadsheet-id', required=True, help='Spreadsheet ID')
+ parser.add_argument('--sheet-name', required=True, help='Sheet name')
+ parser.add_argument('--umask', type=parse_octal, default='002', help='Set a umask (defaults to 002 to make created files group-writable)')
+ parser.add_argument('--workers', type=int, default=200, help='Number of workers to use (defaults to 200)')
+ parser.add_argument('--ignore-priority', action='store_true', help='Ignore the priority column in the spreadsheet')
+ parser.add_argument('--local-scheduler', action='store_true', default=False)
args = parser.parse_args(argv)
with umask(args.umask):
- results = luigi.build([SubmitExperimentsFromGoogleSpreadsheetToGemma(args.spreadsheet_id, args.sheet_name, ignore_priority=args.ignore_priority)], workers=args.workers, detailed_summary=True)
+ results = luigi.build([SubmitExperimentsFromGoogleSpreadsheetToGemma(args.spreadsheet_id, args.sheet_name, ignore_priority=args.ignore_priority)], workers=args.workers, detailed_summary=True, local_scheduler=args.local_scheduler)
print(results.summary_text)
if __name__ == '__main__':
diff --git a/scripts/sync-multiqc-reports b/scripts/sync-multiqc-reports
new file mode 100755
index 00000000..79012ca5
--- /dev/null
+++ b/scripts/sync-multiqc-reports
@@ -0,0 +1,22 @@
+#!/bin/bash
+
+set -e
+
+PIPELINE_OUTPUT_DIR=/cosmos/data/pipeline-output/rnaseq
+GEMMA_DATA_DIR=/space/gemmaData
+
+human_reference_id=$(grep 'human_reference_id=' luigi.cfg | sed 's/human_reference_id=//')
+mouse_reference_id=$(grep 'mouse_reference_id=' luigi.cfg | sed 's/mouse_reference_id=//')
+rat_reference_id=$(grep 'rat_reference_id=' luigi.cfg | sed 's/rat_reference_id=//')
+
+echo $human_reference_id $mouse_reference_id $rat_reference_id
+
+echo "Copying MultiQC reports for the following references: $human_reference_id, $mouse_reference_id, $rat_reference_id"
+
+for f in $PIPELINE_OUTPUT_DIR/report/{$human_reference_id,$mouse_reference_id,$rat_reference_id}/*; do
+ experiment_id=$(basename $f)
+ destdir=$GEMMA_DATA_DIR/metadata/$experiment_id/MultiQCReports
+ echo "Copying MultiQC report for $experiment_id from $f to $destdir..."
+ mkdir -p "$destdir"
+ rsync -a "$f/" "$destdir/"
+done
diff --git a/scripts/webviewer b/scripts/webviewer
new file mode 100755
index 00000000..3a9b9b7f
--- /dev/null
+++ b/scripts/webviewer
@@ -0,0 +1,4 @@
+#!/bin/fish
+
+conda activate rnaseq-pipeline
+exec gunicorn -e SCRIPT_NAME=/rnaseq-pipeline -b 0.0.0.0:8000 rnaseq_pipeline.webviewer:app --access-logfile pipeline-output/webviewer-logs/access.log --error-logfile pipeline-output/webviewer-logs/error.log
diff --git a/setup.py b/setup.py
index 6857f34d..666fed53 100644
--- a/setup.py
+++ b/setup.py
@@ -1,7 +1,7 @@
from setuptools import setup, find_packages
setup(name='rnaseq_pipeline',
- version='2.1.7',
+ version='2.1.12',
description='RNA-Seq pipeline for the Pavlidis Lab',
license='Public Domain',
long_description='file: README.md',
@@ -9,10 +9,11 @@
url='https://github.com/pavlidisLab/rnaseq-pipeline',
author='Guillaume Poirier-Morency',
author_email='poirigui@msl.ubc.ca',
- classifiers=['License :: Public Domain'],
+ classifiers=['License :: Public Domain', 'Private :: Do Not Upload'],
packages=find_packages(),
- install_requires=['luigi', 'bioluigi', 'requests', 'pandas'],
+ include_package_data=True,
+ install_requires=['luigi', 'python-daemon<3.0.0', 'bioluigi>=0.2.1', 'requests', 'pandas'],
extras_require={
'gsheet': ['google-api-python-client', 'google-auth-httplib2', 'google-auth-oauthlib', 'pyxdg'],
- 'webviewer': ['Flask']},
- scripts=['scripts/luigi-wrapper', 'scripts/submit-experiments-from-gsheet'])
+ 'webviewer': ['Flask', 'gunicorn']},
+ scripts=['scripts/luigi-wrapper', 'scripts/submit-experiments-from-gsheet', 'scripts/submit-experiment'])
diff --git a/tests/data/SRX12752257.runinfo b/tests/data/SRX12752257.runinfo
new file mode 100644
index 00000000..146467ad
--- /dev/null
+++ b/tests/data/SRX12752257.runinfo
@@ -0,0 +1 @@
+SRR16550084,2021-10-25 00:01:09,2021-10-23 19:53:39,33629838,3430243476,33629838,102,2159,,https://sra-pub-run-odp.s3.amazonaws.com/sra/SRR16550084/SRR16550084,SRX12752257,GSM5652607,RNA-Seq,cDNA,TRANSCRIPTOMIC,PAIRED,0,0,ILLUMINA,Illumina HiSeq 2500,SRP342859,PRJNA773954,,773954,SRS10699972,SAMN22547146,simple,10090,Mus musculus,GSM5652607,,,,,,,no,,,,,"CENTER FOR GENOMIC MEDICINE, MASSACHUSETTS GENERAL HOSPITAL",SRA1315997,,public,B454959FE49095B284D48A56D5A9B125,DEDDE16D10CD226A257DB549B3530BF9
diff --git a/tests/test_gemma.py b/tests/test_gemma.py
index 647d8ec7..a457940c 100644
--- a/tests/test_gemma.py
+++ b/tests/test_gemma.py
@@ -1,5 +1,4 @@
from rnaseq_pipeline.gemma import *
-import os
def test_gemma_api():
gemma_api = GemmaApi()
@@ -7,7 +6,7 @@ def test_gemma_api():
gemma_api.samples('GSE110256')
def test_gemma_task():
- task = GemmaTask(experiment_id='GSE110256')
+ task = GemmaCliTask(experiment_id='GSE110256')
env = task.program_environment()
assert 'JAVA_OPTS' in env
assert 'JAVA_HOME' in env
diff --git a/tests/test_geo.py b/tests/test_geo.py
new file mode 100644
index 00000000..026619b2
--- /dev/null
+++ b/tests/test_geo.py
@@ -0,0 +1,25 @@
+import luigi
+
+from rnaseq_pipeline.platforms import IlluminaPlatform
+from rnaseq_pipeline.sources.geo import match_geo_platform, DownloadGeoSampleMetadata, \
+ DownloadGeoSeriesMetadata
+from rnaseq_pipeline.utils import remove_task_output
+
+def test_parse_illumina_platform():
+ platform = match_geo_platform('GPL30172')
+ assert isinstance(platform, IlluminaPlatform)
+ assert platform.name == 'Illumina'
+ assert platform.instrument == 'NextSeq 2000'
+
+def test_download_geo_sample_metadata():
+ t = DownloadGeoSampleMetadata('GSM6753395')
+ remove_task_output(t)
+ assert not t.complete()
+ luigi.build([t], local_scheduler=True)
+ assert t.complete()
+
+def test_download_geo_series_metadata():
+ t = DownloadGeoSeriesMetadata('GSE220114')
+ remove_task_output(t)
+ luigi.build([t], local_scheduler=True)
+ assert t.complete()
diff --git a/tests/test_platforms.py b/tests/test_platforms.py
index 5bf87499..355ba4a6 100644
--- a/tests/test_platforms.py
+++ b/tests/test_platforms.py
@@ -1,4 +1,4 @@
-from rnaseq_pipeline.platforms import Platform, BgiPlatform, IlluminaPlatform, IlluminaNexteraPlatform
+from rnaseq_pipeline.platforms import BgiPlatform, IlluminaPlatform, IlluminaNexteraPlatform
def test_bgi_platform_trim_single_end_reads():
task = BgiPlatform('BGISEQ-500').get_trim_single_end_reads_task('r1', 'r1_dest')
diff --git a/tests/test_sra.py b/tests/test_sra.py
index e29ff529..4efde06c 100644
--- a/tests/test_sra.py
+++ b/tests/test_sra.py
@@ -1,6 +1,12 @@
+import os
+import shutil
+from os.path import dirname
+
+import luigi
import pytest
-from rnaseq_pipeline.sources.sra import DownloadSraExperimentRunInfo, DownloadSraProjectRunInfo, EmptyRunInfoError
+from rnaseq_pipeline.sources.sra import DownloadSraExperimentRunInfo, DownloadSraProjectRunInfo, EmptyRunInfoError, \
+ DownloadSraExperiment
def test_download_sra_experiment_run_info():
task = DownloadSraExperimentRunInfo(srx='SRX12752257')
@@ -17,6 +23,14 @@ def test_empty_sra_file_raises_exception():
assert not task.output().exists()
assert not task.complete()
+def test_sra_file_with_missing_header():
+ download_runinfo_task = DownloadSraExperimentRunInfo(srx='SRX12752257')
+ download_runinfo_task.output()
+ os.makedirs(dirname(download_runinfo_task.output().path), exist_ok=True)
+ shutil.copy('tests/data/SRX12752257.runinfo', download_runinfo_task.output().path)
+ assert download_runinfo_task.complete()
+ assert luigi.build([DownloadSraExperiment(srx='SRX12752257')], local_scheduler=True)
+
def test_download_sra_project_run_info():
task = DownloadSraProjectRunInfo(srp='SRP342859')
task.run()
diff --git a/tests/test_targets.py b/tests/test_targets.py
index 438218fb..44f1c2eb 100644
--- a/tests/test_targets.py
+++ b/tests/test_targets.py
@@ -1,5 +1,29 @@
-from rnaseq_pipeline.targets import GemmaDatasetPlatform, GemmaDatasetFactor
+import tempfile
+from datetime import timedelta
+from time import sleep
+
+from rnaseq_pipeline.targets import GemmaDatasetPlatform, GemmaDatasetHasBatch, ExpirableLocalTarget
def test_gemma_targets():
- assert GemmaDatasetFactor('GSE110256', 'batch').exists()
+ assert GemmaDatasetHasBatch('GSE110256').exists()
assert GemmaDatasetPlatform('GSE110256', 'Generic_mouse_ncbiIds').exists()
+
+def test_expirable_local_target():
+ with tempfile.TemporaryDirectory() as tmp_dir:
+ t = ExpirableLocalTarget(tmp_dir + '/test', ttl=timedelta(seconds=1))
+ assert not t.exists()
+ with t.open('w') as f:
+ pass
+ assert t.exists()
+ sleep(1)
+ assert not t.exists()
+
+def test_expirable_local_target_with_float_ttl():
+ with tempfile.TemporaryDirectory() as tmp_dir:
+ t = ExpirableLocalTarget(tmp_dir + '/test', ttl=1.0)
+ assert not t.exists()
+ with t.open('w') as f:
+ pass
+ assert t.exists()
+ sleep(1)
+ assert not t.exists()
diff --git a/tests/test_tasks.py b/tests/test_tasks.py
index 0802befd..8b898f17 100644
--- a/tests/test_tasks.py
+++ b/tests/test_tasks.py
@@ -1,10 +1,7 @@
-import datetime
import pytest
-from rnaseq_pipeline.config import rnaseq_pipeline
-from rnaseq_pipeline.tasks import *
-
from rnaseq_pipeline.sources.geo import match_geo_platform
+from rnaseq_pipeline.tasks import *
cfg = rnaseq_pipeline()
@@ -38,11 +35,14 @@ def test_platform_retrieval_by_name_when_unknown_instrument():
match_geo_platform('GPL29597')
def test_align_sample_task():
- task = AlignSample('GSE', 'GSM', reference_id='hg38_ncbi', scope='genes')
+ task = AlignSample('GSE', 'GSM', reference_id='hg38_ncbi', taxon='human', scope='genes')
assert task.output().path == join(cfg.OUTPUT_DIR, cfg.ALIGNDIR, 'hg38_ncbi', 'GSE', 'GSM.genes.results')
assert task.walltime == datetime.timedelta(days=1)
-def test_gemma_task():
+def test_gemma_task_mixin():
+ class GemmaTask(GemmaTaskMixin, luigi.Task):
+ pass
+
gemma_task = GemmaTask('GSE110256')
assert gemma_task.taxon == 'mouse'
assert gemma_task.accession == 'GSE110256'