diff --git a/.chglog/CHANGELOG.tpl.md b/.chglog/CHANGELOG.tpl.md deleted file mode 100755 index 05e21ad..0000000 --- a/.chglog/CHANGELOG.tpl.md +++ /dev/null @@ -1,42 +0,0 @@ -{{ if .Versions -}} - -## [Unreleased] - -{{ if .Unreleased.CommitGroups -}} -{{ range .Unreleased.CommitGroups -}} -### {{ .Title }} -{{ range .Commits -}} -- {{ if .Scope }}**{{ .Scope }}:** {{ end }}{{ .Subject }} -{{ end }} -{{ end -}} -{{ end -}} -{{ end -}} - -{{ range .Versions }} - -## {{ if .Tag.Previous }}[{{ .Tag.Name }}]{{ else }}{{ .Tag.Name }}{{ end }} - {{ datetime "2006-01-02" .Tag.Date }} -{{ range .CommitGroups -}} -### {{ .Title }} -{{ range .Commits -}} -- {{ if .Scope }}**{{ .Scope }}:** {{ end }}{{ .Subject }} -{{ end }} -{{ end -}} - -{{- if .NoteGroups -}} -{{ range .NoteGroups -}} -### {{ .Title }} -{{ range .Notes }} -{{ .Body }} -{{ end }} -{{ end -}} -{{ end -}} -{{ end -}} - -{{- if .Versions }} -[Unreleased]: {{ .Info.RepositoryURL }}/compare/{{ $latest := index .Versions 0 }}{{ $latest.Tag.Name }}...HEAD -{{ range .Versions -}} -{{ if .Tag.Previous -}} -[{{ .Tag.Name }}]: {{ $.Info.RepositoryURL }}/compare/{{ .Tag.Previous.Name }}...{{ .Tag.Name }} -{{ end -}} -{{ end -}} -{{ end -}} diff --git a/.chglog/config.yml b/.chglog/config.yml deleted file mode 100755 index 0ea5514..0000000 --- a/.chglog/config.yml +++ /dev/null @@ -1,45 +0,0 @@ -style: github -template: CHANGELOG.tpl.md -info: - title: CHANGELOG - repository_url: https://github.com/lycosystem/lyscripts -options: - commits: - filters: - Type: - - feat - - fix - - perf - - refac - - docs - - chore - - test - issues: - prefix: - - # - refs: - actions: - - closes - - fixes - merges: - pattern: "^merge:" - reverts: - pattern: "^revert:" - commit_groups: - title_maps: - feat: Features - fix: Bug Fixes - perf: Performance Improvements - refac: Code Refactoring - docs: Documentation - chore: Maintenance - test: Testing - header: - pattern: "^(\\w*)(?:\$([\\w\\$\\.\\-\\*\\s]*)\$)?\\:\\s(.*)$" - pattern_maps: - - Type - - Scope - - Subject - notes: - keywords: - - BREAKING CHANGE diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml deleted file mode 100644 index 3f47cab..0000000 --- a/.github/workflows/release.yml +++ /dev/null @@ -1,56 +0,0 @@ -name: Build - -on: - release: - types: [ created ] - - workflow_dispatch: - -jobs: - build: - name: Build package from source - runs-on: ubuntu-latest - - steps: - - uses: actions/checkout@v4 - with: - persist-credentials: false - fetch-depth: 0 - - name: Install Python 3 - uses: actions/setup-python@v5 - with: - python-version: '3.10' - - name: Install build tools - run: | - python3 -m pip install build --user - - name: Build package - run: | - python3 -m build - - name: Upload to CI runner - uses: actions/upload-artifact@v4 - with: - name: built-package - path: dist/ - - pypi-publish: - name: Publish built package on PyPI - runs-on: ubuntu-latest - needs: - - build - - # Specifying a GitHub environment is optional, but strongly encouraged - environment: - name: pypi - url: https://pypi.org/p/lyscripts - permissions: - # IMPORTANT: this permission is mandatory for Trusted Publishing - id-token: write - steps: - # retrieve your distributions here - - name: Download from CI runner - uses: actions/download-artifact@v4 - with: - name: built-package - path: dist/ - - name: Publish on PyPI - uses: pypa/gh-action-pypi-publish@release/v1 diff --git a/.github/workflows/testpypi.yml b/.github/workflows/testpypi.yml deleted file mode 100644 index 9029f11..0000000 --- a/.github/workflows/testpypi.yml +++ /dev/null @@ -1,59 +0,0 @@ -name: Test Build - -on: - push: - branches: [ main ] - pull_request: - branches: [ main ] - workflow_dispatch: - -jobs: - build: - name: Build package from source - runs-on: ubuntu-latest - - steps: - - uses: actions/checkout@v4 - with: - persist-credentials: false - fetch-depth: 0 - - name: Install Python 3 - uses: actions/setup-python@v5 - with: - python-version: '3.10' - - name: Install build tools - run: | - python3 -m pip install build --user - - name: Build package - run: | - python3 -m build - - name: Upload to CI runner - uses: actions/upload-artifact@v4 - with: - name: built-package - path: dist/ - - testpypi-publish: - name: Publish built package on TestPyPI - runs-on: ubuntu-latest - needs: - - build - - # Specifying a GitHub environment is optional, but strongly encouraged - environment: - name: testpypi - url: https://test.pypi.org/p/lyscripts - permissions: - # IMPORTANT: this permission is mandatory for Trusted Publishing - id-token: write - steps: - # retrieve your distributions here - - name: Download from CI runner - uses: actions/download-artifact@v4 - with: - name: built-package - path: dist/ - - name: Publish on PyPI - uses: pypa/gh-action-pypi-publish@release/v1 - with: - repository-url: https://test.pypi.org/legacy/ diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml deleted file mode 100644 index 84ebe00..0000000 --- a/.github/workflows/tests.yml +++ /dev/null @@ -1,56 +0,0 @@ -name: tests - -on: - push: - branches: [ main ] - pull_request: - branches: [ main ] - - workflow_dispatch: - -jobs: - tests: - name: Run tests & report coverage - runs-on: ubuntu-latest - permissions: - pull-requests: write - contents: write - steps: - - uses: actions/checkout@v4 - - name: Set up Python - uses: actions/setup-python@v5 - with: - python-version: "3.10" - - - name: Install dependencies - run: | - python -m pip install --upgrade pip - python -m pip install .[tests] - - # Below, we first run pytest in the `tests/` folder. Because we use a `src` - # layout, this will fail if the package is not installed correctly. - - name: Test package is installable - run: pytest --cov=lyscripts --cov-config=pyproject.toml tests - env: - COVERAGE_FILE: .coverage.is_installable - - # Now, we execute all doctests in the `src` tree. This will NOT run with - # the installed code, but it doesn't matter, because we already know it is - # installable from the step above. - - name: Run doctests - if: success() || failure() # run these even if previous step fails - run: pytest --cov=lyscripts --cov-config=pyproject.toml --doctest-modules src - env: - COVERAGE_FILE: .coverage.doctests - GITHUB_TOKEN: ${{ secrets.LYCOSYSTEM_READALL }} - - # Lastly, we collect all files that start with `.coverage` into one file and - # create a report either as a comment on the PR or in a separate branch if its - # a commit to the main branch. From that branch we can put badges and coverage - # reports into e.g. our main README.md - - name: Add coverage comment - if: success() || failure() # run these even if previous step fails - uses: py-cov-action/python-coverage-comment-action@v3 - with: - GITHUB_TOKEN: ${{ github.token }} - MERGE_COVERAGE_FILES: true diff --git a/.gitignore b/.gitignore deleted file mode 100644 index d737ce6..0000000 --- a/.gitignore +++ /dev/null @@ -1,137 +0,0 @@ -# VS Code -.vscode/ - -# Byte-compiled / optimized / DLL files -__pycache__/ -*.py[cod] -*$py.class - -# C extensions -*.so - -# Distribution / packaging -.Python -build/ -develop-eggs/ -dist/ -downloads/ -eggs/ -.eggs/ -lib/ -lib64/ -parts/ -sdist/ -var/ -wheels/ -pip-wheel-metadata/ -share/python-wheels/ -*.egg-info/ -.installed.cfg -*.egg -MANIFEST - -# PyInstaller -# Usually these files are written by a python script from a template -# before PyInstaller builds the exe, so as to inject date/other infos into it. -*.manifest -*.spec - -# Installer logs -pip-log.txt -pip-delete-this-directory.txt - -# Unit test / coverage reports -htmlcov/ -.tox/ -.nox/ -.coverage -.coverage.* -.cache -nosetests.xml -coverage.xml -*.cover -*.py,cover -.hypothesis/ -.pytest_cache/ -_version.py - -# Translations -*.mo -*.pot - -# Django stuff: -*.log -local_settings.py -db.sqlite3 -db.sqlite3-journal - -# Flask stuff: -instance/ -.webassets-cache - -# Scrapy stuff: -.scrapy - -# PyBuilder -target/ - -# Jupyter Notebook -.ipynb_checkpoints - -# IPython -profile_default/ -ipython_config.py - -# pyenv -.python-version - -# pipenv -# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. -# However, in case of collaboration, if having platform-specific dependencies or dependencies -# having no cross-platform support, pipenv may install dependencies that don't work, or not -# install all needed dependencies. -#Pipfile.lock - -# uv -# This could be tracked, but is not really necessary for library development -uv.lock - -# PEP 582; used by e.g. github.com/David-OConnor/pyflow -__pypackages__/ - -# Celery stuff -celerybeat-schedule -celerybeat.pid - -# SageMath parsed files -*.sage.py - -# Environments -.env -.venv -env/ -venv/ -ENV/ -env.bak/ -venv.bak/ - -# Spyder project settings -.spyderproject -.spyproject - -# Rope project settings -.ropeproject - -# mkdocs documentation -/site - -# mypy -.mypy_cache/ -.dmypy.json -dmypy.json - -# Pyre type checker -.pyre/ - -# Test result folders -/tests/plot/results diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml deleted file mode 100644 index 3b059e6..0000000 --- a/.pre-commit-config.yaml +++ /dev/null @@ -1,38 +0,0 @@ -default_install_hook_types: [pre-commit, commit-msg] - -repos: -- repo: https://github.com/pre-commit/pre-commit-hooks - rev: v5.0.0 - hooks: - - id: trailing-whitespace - - id: end-of-file-fixer - - id: check-toml - - id: check-yaml - - id: check-json -- repo: https://github.com/astral-sh/ruff-pre-commit - rev: v0.12.0 - hooks: - - id: ruff - args: [ --fix ] - - id: ruff-format -- repo: https://github.com/compilerla/conventional-pre-commit - rev: v4.2.0 - hooks: - - id: conventional-pre-commit - stages: [commit-msg] - args: - - build # changes of the build system or dependencies - - change # commit alters the implementation of an existing feature - - chore # technical or maintenance task not related to feature or user story - - ci # edits to the continuous integration scripts/configuration - - deprecate # a feature or functionality will be deprecated - - docs # add, update of revise the documentation - - feat # a new feature was implemented (bump MINOR version) - - fix # an issue or bug has been fixed (bump PATCH version) - - perf # performance improvements that do not alter existing behavior - - refac # update shuffles code around but does not alter functionality - - remove # a feature or functionality is removed - - style # source code is improved w.r.t. its code quality - - test # commits enhance or add to the test suite - - merge # merge one branch into another. Should be ignored by git-chglog - - revert # revert previous commit(s). Should be ignored by git-chglog diff --git a/.readthedocs.yml b/.readthedocs.yml deleted file mode 100644 index bb033d9..0000000 --- a/.readthedocs.yml +++ /dev/null @@ -1,25 +0,0 @@ -# .readthedocs.yml -# Read the Docs configuration file -# See https://docs.readthedocs.io/en/stable/config-file/v2.html for details - -# Required -version: 2 - -build: - os: ubuntu-22.04 - tools: - python: "3.10" - -# Build documentation in the docs/ directory with Sphinx -sphinx: - builder: html - configuration: docs/source/conf.py - fail_on_warning: false - -# tell RTD to install the package with the docs optional requirements -python: - install: - - method: pip - path: . - extra_requirements: - - docs diff --git a/CHANGELOG.md b/CHANGELOG.md deleted file mode 100644 index 84abe31..0000000 --- a/CHANGELOG.md +++ /dev/null @@ -1,999 +0,0 @@ -# Changelog - -All notable changes to this project will be documented in this file. - -## [1.0.2] - 2026-04-08 - -### Bug Fixes - -- Correct TI evaluation and leave out uncertainty reporting. Fixes [#81]. - -### Miscellaneous Tasks - -- Change email addresses. - -## [1.0.1] - 2025-09-10 - -### Bug Fixes - -- Convert dtypes during joining using lydata's `cast_dtypes()`. - -## [1.0.0] - 2025-09-04 - -### Bug Fixes - -- Change `info` to `core` for mid-level lydata columns -- Use bug-fixed lydata `.ly.enhance()` method. - -### Documentation - -- Update documentation for `integrate` and `evidence` commands. -- Improve `data collect` description. - -### Features - -- Add `integrate` command for thermodynamic integration. Thanks [@noemibuehrer]! -- Add command spawning websever for interactive data collection. - -### Miscellaneous Tasks - -- Add missing links to changelog. -- Add CITATION.cff. - -### Testing - -- Update tests for new lydata. - -### Build - -- Add uvicorn, fastapi to deps. -- Require at least lydata 0.4.0. - -### Change - -- Make compatible with new lyDATA version. -- Centralize inverse temperature schedule generation. -- Store selected log-level globally. -- Disable properties in collector. - -## [1.0.0rc3] - 2025-07-22 - -### Documentation - -- Fix `join` command's example call. - -### Features - -- Add `data fetch` command. Fixes [#75]. - -### Change - -- Access data only via lydata for compatibility.\ - We have changed the lydata 2nd level headers slightly for the - patient and tumor info (see - https://github.com/lycosystem/lydata/issues/21 for more info).\ - Since the lydata package was already updated to be compatible with - that change, we simply need to route every access the lyscripts - make to the data through lydata package and hence be compatible - too. -- Make CLI work with new lydata format.\ - This is also related to https://github.com/lycosystem/lydata/issues/21 - -## [1.0.0rc2] - 2025-06-26 - -### Bug Fixes - -- Divide by midext prevalence. Fixes [#72].\ - This fixes a bug we reintroduced where we didn't compute observed and - model prevalence in an analogous and comparable way. - -### Documentation - -- Fix build badge in README. -- Fix outdated links to rmnldwg. - -### Miscellaneous Tasks - -- Update pre-commit & ruff rules. - -### Testing - -- Fix the dataset used for testing prevalences. - -### Build - -- Switch to `src` layout. Fixes [#74]. - -### Ci - -- Use tests action with coverage. - -## [1.0.0rc1] - 2025-05-27 - -### Bug Fixes - -- Use 'fork' start method under MacOS. -- Specify config file encoding for other OSes. -- Cast scenario pattern to bool if possible. Fixes [#70].\ - Since we allow defining a pattern with keywords like `"involved"` or `1` - instead of only `True`, we also need to make sure that is correctly cast - to its boolean value for lydata's `C` objects. - -### Documentation - -- Add warning for Windows & MacOS regarding multiprocess(ing). - -### Miscellaneous Tasks - -- Add link to changelog. - -### Testing - -- Ensure observed prevalence is correct. Related [#70]. - -### Build - -- Exclude buggy pydantic-settings. is present in -version 2.9.0 and 2.9.1 of pydantic-settings. So, these versions must be -excluded. - -### Ci - -- Update release scripts to use [OIDC](https://docs.pypi.org/trusted-publishers/). This is more secure. - -### Remove - -- Unnecessary custom help formatter. - -## [1.0.0.a7] - 2025-04-15 - -### Bug Fixes - -- Make config -> model -> config round trip test pass. -- Remove `thin_by` factor in wrong place. -- Pass involvement & diagnosis correctly to risks and prevalences. - -### Features - -- Create Modality config from model. -- Create Graph config from model. -- Create Model config from model. - -Note that the three features above come with certain limitations. It is not possible -to export all aspects of a model to a configuration. Especially the distributions -over diagnosis times cannot be converted to a `DistributionConfig`. - -### Miscellaneous Tasks - -- Fix changelog version link. -- Update ly schema. - -### Testing - -- Round trip config -> model -> config. - -### Merge - -- Branch 'main' into 'dev'. -- Branch 'configs-from-model' into 'dev'.\ - Constructing configs from models is only partially possible with the - current implementation of the `lymph` model. - -## [1.0.0.a6] - 2025-03-12 - -### Bug Fixes - -- Better handle midline model. \ - This means disabling the evolution over midline extension. Also, since the new - version of `lymph-model`, the `midext_prob` parameter is not epected to be the - first one anymore when passed to `set_params()`. -- Pass only ipsilateral diagnosis to unilateral model. -- Pass diagnose & involvement correctly to models as dict. - -### Testing - -- Ensure unilateral model receives correct diagnosis. -- Test that diagnosis is used correctly in posteriors. - -### Build - -- Bump lydata & lymph-model dependency. - -## [1.0.0.a5] - 2025-02-05 - -### Bug Fixes - -- Provide rich `console` to compute progress bars. -- Correctly build deprecated models. - -### Features - -- Enable sampling only named parameter subset. - -### Testing - -- Check model construction & named params. -- Update integration test config YAML files. -- Add external model symbol check. -- Adding dists works correctly.\ - Previously, it could happen that in a `Bilateral` or `Midline` model the - individual submodel's distributions where not synced. - -### Build - -- Bump lymph-model. - -### Change - -- Require every YAML file to have `version`. -- Better version-related error and docs. -- Make in-/output names more consistent. - -### Remove - -- Drop CLI required argument `version`. - -## [1.0.0.a4] - 2025-01-23 - -### Bug Fixes - -- Update data loading to new lydata API. -- Add sampling config back to sample CLI. -- Finish `data filter` command. -- Correctly log number of excluded pateints in `lyproxify`. -- Allow extra args in CLI cmds. -- Logging during progress bar. - -### Documentation - -- Deactivate help of removed commands. -- Link only to stable versions. -- Fix intersphinx links. -- Update link to schedule module. -- Configure how pydantic models are displayed. -- Add more info about schema. -- Better explain sampling. -- Add proper info to `cli_cmd()` methods. - -### Features - -- Add mandatory `version` field to command settings.\ - This will allow to differentiate between old and new configs and create - the models accordingly. -- Add translation of old to new model configs. -- Add dynamic YAML config source. -- Configure logging nicely. -- Update `data enhance` command. -- Update `data join` command. -- Update `data filter` command. -- Update `data split` command. -- Capture lydata logging output. -- Update YAML schema for CLIs. -- Update `data lyproxify` cmd. -- Allow sampling only specified params.\ - Via a new CLI arg named `param_names` one may restrict the parameters - sampled to a named subset. In combination with the fact that any Python - model may be loaded, this results in an enormously flexible sampler. -- Update inv temp `schedule` cmd. -- Allow providing start state to sampling func. - -### Testing - -- Replace subprocess calls with monkeypatch.\ - This allows for better debugging during test calls. -- Load generated data correctly.\ - The synthetic data for testing already has "early" and "late" as - T-stages. Thus, the mapping needed to be adapted. - -### Build - -- pydantic-settings >= 2.7 needed. - -### Change - -- Make `version` field in command settings required. -- Use pydantic for subcommands. -- Use loguru over default logging. -- Rename `data` field to `input`. -- Use pydantic for plot utils, too. -- Use rich logging handler. - -### Refac - -- Slightly change CLI inheritances. -- Sort configs alphabetically. -- Make sampling more reusable. - -### Remove - -- Unused utility functions. -- Plotting scripts except histogram/betaposterp helpers. - -## [1.0.0.a3] - 2024-11-15 - -### Bug Fixes - -- (**plot**) Turn off label by passing `None`. -- (**plot**) Don't fail on wrong wrong in `.draw()`. -- Polish dataclass configuration. -- Make pydantic work with subparsers. -- (**data**) Correct argument name of save function. -- (**comp**) Add distributions & fix dir type for priors. -- (**config**) Don't thin samples twice. - -### Documentation - -- (**configs**) Add to sphinx docs. -- Add intersphinx link to lydata. -- (**config**) Improve `construct_model` docstring. -- Correct copyright year. -- Clean up refs to deleted modules. -- Fix links to documentation in readme. - -### Features - -- (**config**) Write new methods to assemble model. -- (**plot**) Add func `split_legends()`. Related [#60].\ - This allows the user to separate many plot's labels into a number of - different legends. -- (**configs**) Add graph config. -- (**sample**) Better config management.\ - The new sample command can merge configurations from multiple YAML files - as well as the command line. It does a better job at reporting progress - when the log-level is set to debug and it more consistently stores - samples and metric logs during burn-in. -- (**sample**) Add JSON schema for params.\ - With this JSON schema one can get auto-completion in most popular - code editors for the configs of most commands. -- (**config**) Allow loading external model.\ - It is now possible to not only specify a model using a fixed YAML - schema, but also via a plain Python file that defines a global `model` - variable. -- (**sample**) Add inverse temperature.\ - With this addition, it is now in principle possible to do thermodynamic - integration. But it is not yet fully fleshed out for a nice user - experience. -- Add tiny script to generate JSON schema.\ - The script creates a JSON schema based on all CLI commands' settings. -- (**comp**) Rewrite posteriors command that uses pydantic and joblib -- (**comp**) Rewrite risks command that uses pydantic and joblib -- (**sample**) Add iterations/second column to burnin progress display. -- (**sample**) Show total iterations to sampling progress display. -- (**config**) Allow converting diagnosis to involvement. -- (**compute**) Rewrite prevalence command that uses pydantic and joblib - -### Miscellaneous Tasks - -- Bump pre-commit hooks. -- Add ruff linting rules. -- Switch to ruff, drop pycln & isort. -- Run ruff & clean up codebase. -- Ignore some ruff rules in tests dir. - -### Styling - -- More cleanup to satisfy ruff. -- Improve docstrings and code style a bit. - -### Testing - -- Add some basic testing for config. -- (**data**) Basic integration test for `generate`. -- Add sampling step to integration tests. -- (**config**) Check the external loading feature. -- Extend & unify integration test for priors. - -### Build - -- Remove upper cap in dependencies. -- Bump lymph-model to 1.2.3. -- Remove `dev` from optional dependencies.\ - This is because a lot of dev tools like ruff, pre-commit, ... are - installed globally (e.g. with pipx) instead of per venv. -- Bump `lydata` dependency to 0.2.0. - -### Change - -- (**plot**) Improve beta post & hist. Fixes [60].\ - The histograms and beta posteriors are now better implemented, allowing - a user to extend the `draw()` function's abilities by adding classes - similar to `Histogram` and `BetaPosterior`. -- (**plot**) More flexible labels. -- Use pydantic over dacite. -- Switch to pydantic config for sampling (WIP). -- (**compute**) Use pydantic & joblib over dataclasses and custom caching. -- ⚠ **BREAKING** (**compute**) Add priors joblib cache -- ⚠ **BREAKING** (**data**) Replace the `generate` cmd.\ - This was just supposed to be a little script to generate data for an - integration test, but it turns out that it could just be used to update - the old `generate` command. - BREAKING CHANGES: `generate` command is better configurable -- (**config**) Merge sample/sampling configs. -- Use lydata's `ModalityConfig`.\ - Since the [lydata](https://github.com/lycosystem/lydata) package is - evolving quickly, I added it as a dependency and moved the first bit of - code over there. -- Enable use of lydata to load patient data. -- (**comp**) HDF5 file storage more versatile. -- (**sample**) Store history in .tmp file.\ - This serves an indication about whether or not a burn-in sampling round - has been interrupted. The sampler may then continue from where it left off. -- ⚠ **BREAKING** (**compute**) Update prevalence computation. -- More useful YAML load/merge logging. -- Improve logging of some utilities. - -### Remove - -- Outdated streamlit app. -- Temporary test file. -- Delete remaining streamlit code. -- ⚠ **BREAKING** Unused HDF5 cache and scenarios.\ - Both these things are superseded by better stuff based on pydantic. - -## [1.0.0.a2] - 2024-04-28 - -### 🚀 Features - -- *(sample)* Allow no multiprocessing (0 cores) -- *(scenario)* Add `from_dict` classmethod -- *(plot)* Add `offset` to hist and beta dist - -### 🐛 Bug Fixes - -- Use correct heaer rows, fixes [#57] -- *(compute)* Observe bilateral prevalence -- *(scenario)* Dataclass with prpoerty issue -- Ensure sides in scenario diagnose/involvement -- *(compute)* Correct obs and pred prevalences - -### 📚 Documentation - -- *(data)* Refactor lyproxify docstrings -- Update badge to link to RTD, fixes [#53] - -### 🧪 Testing - -- Fix testing setup & add new histogram test - -### Change - -- *(data)* Make copy before edit in-place -- *(scenario)* Make scenario dataclass - -### Merge - -- Branch '57-lyproxify-loads-wrong-number-of-header-rows' into 'dev' - -## [1.0.0.a1] - 2024-04-03 - -### 🚀 Features - -- *(data)* Add simple data filter command, fixes [#51] -- Customize log handler for better filename -- *(sample)* Allow custom T-stage mapping -- *(sample)* Allow to load `side` data -- *(utils)* Allow `Unilateral.binary` in params -- *(sample)* Display time elapsed during burnin -- *(predict)* Add cmd to precompute state dists -- *(post)* Start with posterior cmd (WIP) -- *(precompute)* Add `priors` cmd, related to [#54] -- *(precompute)* Work on posterior (WIP) -- *(utils)* Allow keywords in modalities def -- *(post)* Compute for multiple scenarios -- *(predict)* Update prevalences cmd (WIP) -- Add class for storing scenarios -- *(precompute)* Priors from list of scenarios -- *(predict)* Finish prevalences cmd -- *(data)* Implement custom pandas accessor -- *(scenario)* Track laterality as well -- *(compute)* Risk works with lymph v1, too, now - -### 🐛 Bug Fixes - -- [**breaking**] Use modern lydata cols for t_stage matching -- Wrong lnls in predict prevalences -- [**breaking**] Update prev prediction to new lymph API -- *(sample)* Match T-stage mapping with lymph API -- *(data)* Stop dtype change during `concat` -- *(sample)* Skip 0 iter convergence check -- *(sample)* Missing import -- *(sample)* Only pass `side` to unilateral model -- *(sample)* Display converged message nicely -- *(utils)* Correct default args for `get_chain()` -- Wrong posterior shape -- *(predict)* Even out some bugs -- Don't pycln accessor import -- *(data)* Enhance failed due to copy on write - -### 📚 Documentation - -- Start with basic sphinx setup -- Start organizing top-level cmds with sphinx -- Include all modules in docs -- Update docstrings to reST format -- Add document files for precompute subcmd -- Allow links to lymph docs -- Fix `temp_schedule` docstring -- Shorten titles -- *(predict)* Update prevalence module docstring -- Refactor docs for new `compute` subcommand -- Fix typos and missing modules - -### 🧪 Testing - -- *(data)* Ensure new joining works correctly -- *(sample)* Check some sampling methods -- Fix typos in tests -- Update failing tests - -### ⚙️ Miscellaneous Tasks - -- Add readthedocs config file -- Remove pdoc action -- Update changelog - -### Build - -- Bump lymph-model to v1.0 -- Bump lymph version & add sphinx deps - -### Change - -- *(sample)* [**breaking**] Start on new sample command (WIP) -- *(sample)* [**breaking**] Reimplement sampling command -- *(precompute)* Use HDF5 cache -- *(precompute)* Make recursive. Related: [#54] -- *(prevs)* Start on updated `prevalences` cmd -- Replace 'diagnose' & bump lymph to 1.2.0 -- Simplify scenario handling (WIP) -- *(precompute)* Posteriors only from priors -- *(scenario)* Shorten hash to 6 digits - -### Merge - -- Branch 'main' into 'dev' -- Branch '51-filter-command' into 'dev' -- Branch '53-use-sphinx-for-documentation' into 'dev' -- Branch '54-add-precompute-commands' into 'dev' - -### Refac - -- *(sample)* Better progress tracking -- *(precompute)* Comp state dist in own submod -- *(utils)* Move funcs out of `precompute` -- Bundle adding scneario args to parser -- *(compute)* Predict & precompute -> compute - -### Remove - -- [**breaking**] Midline_ext in create_patient_row for now - -## [1.0.0.a0] - 2023-12-20 - -### Bug Fixes - -- Update imports to new lymph version -- Remove references to `clean` command - -### Miscellaneous Tasks - -- [**breaking**] Lyprox to lymph convert not necessary anymore - -### Build - -- Bump lymph-model version to `>=1.0.0.a4` -- Bump type hints to Python 3.10 - -### Change - -- [**breaking**] `evaluate`: command does not depend on lymph model anymore -- Simplify `log_state()` decorator -- [**breaking**] Change model initialization in some places -- [**breaking**] Use deorated function name for `log_state()` message - -### Refac - -- [**breaking**] Deduplicate data loading functions: `load_csv_table()` was removed and `load_data_for_model()` renamed to `load_patient_data()` -- Change function names & remove logger from decorated function calls - -### Remove - -- [**breaking**] Delete unnecessary functions - -## [0.7.3] - 2023-08-29 - -### Bug Fixes - -- **data:** `enhance` command is now deterministic, fixes [#40] -- **plot:** correct color keyword arguments & swap arguments in `save_figure` function, fixes [#45] -- **sample:** use global numpy random state, fixes [#31] - -### Maintenance - -- fix upper version bound of lymph-model package - -### Testing - -- **sample:** add test for determinism of sampling, related to [#31] - -## [0.7.2] - 2023-07-31 - -### Bug Fixes - -- `enhance`: fix bug introduced in [0.7.1] - -## [0.7.1] - 2023-07-31 - -### Bug Fixes - -- `enhance`: negative sublevels don't overwrite superlevels anymore. Fixes [#44]. - -### Maintenance - -- bump pre-commit hooks - -## [0.7.0] - 2023-06-26 - -### Bug Fixes - -- add modalities from params in synthetic data generation - -### Features - -- add extensible & versatile logging decorator -- add `--log-level` option to top-level lyscripts command -- add log-level to `log_state` decorator - -### Other - -- all commands now use the logging library for status updates/ouputs. This fixes [#2]. - -## [0.6.9] - 2023-06-21 - -### Bug Fixes - -- change the indentation length in the generated markdown data documentation to 4 spaces. Fixes [#41]. - -## [0.6.8] - 2023-05-30 - -### Bug Fixes - -- flattening error in `lyproxify` -- more robust lyproxify working again - -### Documentation - -- add detail to docstring of `lyproxify` func - -### Features - -- add func to generate md docs from column map -- add two new dict modifying functions - -## [0.6.7] - 2023-05-23 - -### Bug Fixes - -- make flatten/unflatten funcs more consistent -- add `max_depth` option for `flatten` function -- bump isort version to avoid error - -### Features - -- add `unflatten` function - -## [0.6.6] - 2022-12-01 - -### Bug Fixes - -- pull another function out of a `rich` context, this time in the `join` command. Related to [#33]. - -## [0.6.5] - 2022-12-01 - -### Bug Fixes - -- swap arguments in the `save_figure` call of the `corner` command -- pull a function using [`rich`] to report its status out of an enclosing [`rich`] context. This fixes [#33]. - -## [0.6.4] - 2022-12-01 - -### Bug Fixes - -- `hist_kwargs` now overrides the default plot settings for `Histogram`. This fixes [#30] - -### Features - -- the `lyscripts sample` command now has an argument `--seed` with the aim of making sampling runs reproducible via a random number generator seed. However, it seems as if the [`emcee`] package does not properly support this as runs using the same seed still produce different results. Related to, but not resolving [#31]. - -## [0.6.3] - 2022-11-25 - -### Bug Fixes - -- `lyproxify`: apply re-indexing only *after* excluding patients -- fix `SettingWithCopyWarning` during re-indexing in `lyproxify` - -## [0.6.2] - 2022-11-25 - -### Bug Fixes - -- `lyproxify` cleans empty header cell names - -### Documentation - -- update lyproxify's `main` docstring -- improve `report_state` & `exclude_patients` documentation -- update top-level `lyproxify` help in README.md - -### Features - -- allow muting `report_state` decorator globally for a decorated function, while also allowing to override the verbosity per function call -- allow adding an index column during `lyproxify` -- add options to `lyproxify` for dropping rows and columns before starting transformation of raw data -- the `report_state` decorator can now be configured to exit the program when encountering an unexpected exception - -## [0.6.1] - 2022-11-24 - -### Features - -- add new command under `lyscripts data` to preprocess any raw data into a format that can be parsed by [LyProX]. Fixes [#25] - -## [0.6.0] - 2022-11-23 - -### Bug Fixes - -- display errors and stop, but don't reraise -- add & update main entry point for script use - -### Code Refactoring - -- use `lyscripts.utils` consistently across data commands -- use `lyscripts.utils` for `evaluate` script -- pull out method to compare prevalence for one sample -- write modular functions for loading YAML, CSV and HDF5 data -- make `lyscripts data join` command a bit more readable -- further modularize `lyscripts data ...` scripts -- standardize CSV saving process -- start to add `utils` for data commands -- put data commands in separate submodule, fixes [#5] (**BREAKING CHANGE!**) - -### Documentation - -- expand documentation on data, plot & predict subcommands -- enrich the module documentation of predict scripts -- update docstrings of data commands - -### Features - -- add YAML scenario output to prevalence app -- working version of prevalence app -- add prevalence plot to app -- allow constructing the `lyscripts.plot.utils.Histogram` and `lyscripts.plot.utils.Posterior` from plain data without HDF5 file (**BREAKING CHANGE!**) -- `lyscripts.temp_schedule` output does not have pretty border anymore, making copy & paste easier -- use generators for risk & prevalence prediction, fixes [#23] -- add more params widgets for prevalence app -- add t_stage, midline_ext, ... to prevalence app -- add `LyScriptsError` for passing up messages -- make smart decorators for status reporting -- implement setup of prevalence app -- start implementing streamlit apps - -### Testing - -- add GitHub action for tests -- fix missing import for corner doctests -- generally, the module is now partially covered by unit tests - -## [0.5.11] - 2022-11-06 - -### Bug Fixes - -- remove useless import & rename prediction `utils` -- histogram & respective posterior have same color -- fix two bugs detected during integration test: - 1. The custom enumerate with optional progress bar did not enumerate - 2. Function checking if midline extension matches did not work for some lymph classes -- fix wrong import in submodules `plot` & `predict` -- correct relative imports & remove unused functions -- fix small inconsistency in script call - -### Code Refactoring - -- use function for loading YAML in all scripts -- further modularize tasks, e.g. params loading -- rename test modules -- put function saving figures in separate utility -- replace common main tasks with `util` functions -- pull out function to extract model param labels -- make `utils` public and hence documented -- greatly simplify histogram plotting script -- pull shared functions into `_utils` -- update [`rich_argparse`] & add highlighting (fixes [#20]) -- outsource rich enumeration of predictions -- make prevalence prediction much cleaner -- risk & prevalence share method to clean pattern - -### Documentation - -- update help in predict's docstrings - -### Features - -- write decorators for general tasks -- add nice helper functions to plot (fixes [#21]) -- risk & prevalence can use thinned samples -- write neat recursive functions to flatten dictionaries - -### Testing - -- implement more `utils` tests -- add test to the `save_figure` utility -- add test to new params loader -- add small doctest to `get_size` plot utility -- add checks for plotting utils -- write simple tests for prevalence prediction -- add doctest & pytest for predict `utils` - -## [0.5.10] - 2022-10-13 - -### Bug Fixes - -- pick correct consensus method for enhancement ([#17]) -- sample does not crash when `pools` not given ([#16]) -- add thinning to convergence sampling, too ([#15]) - -### Documentation - -- fix typos & add favicon to docs - -## [0.5.9] - 2022-09-16 - -### Documentation - -- don't use relative path for social card - -### Features - -- `sample` command has a new optional argument `--pools` with which one can adjust the number of multiprocessing pools used during the sampling procedure. Fixes [#13] - -## [0.5.8] - 2022-09-12 - -### Bug Fixes - -- The function `get_midline_ext_prob` in the prevalence prediction now -does not throw an error anymore when unilateral data is provided, but -returns `None` instead. Fixes [#11] - -### Features - -- add entry points to CLI. This enables one to call `lyscripts ...` directly, instead of having to use `python -m lyscripts ...` all the time. - -### Documentation - -- add social card to README -- remove `python -m` prefix from command usage in docstrings - -## [0.5.7] - 2022-08-29 - -### Bug Fixes - -- fix `enhance`'s issue with varying LNLs across modalities ([#8]) - -### Features - -- add progress bar to `enhance` script - -## [0.5.6] - 2022-08-29 - -### Bug Fixes - -- can choose list of defined mods in params. This allows one to choose different lists of modalities for e.g. the `enhance` script and the `sampling` one. - -### Documentation - -- correct typos in the changed docstrings -- update docstring of changed scripts - -## [0.5.5] - 2022-08-25 - -### Bug Fixes - -- clean script was using deprecated lymph.utils. This script has now been incorporated into these scripts. - -### Documentation - -- update README and add docstrings about `enhance` - -### Features - -- add enhancement scipt that computes additional diagnostic modalities, combining existing ones. - -## [0.5.4] - 2022-08-24 - -### Documentation - -- add call signature to docs every subcommand's `main()` -- add badges, installation & usage to README -- fix pdoc issue with importing `__main__` files - -### Maintenance - -- make pyproject.toml look nice on PyPI -- tell git to ignore docs dir -- set up git-chglog for creating changelogs -- add pre-commit hook to check commit msg - -## [0.5.3] - 2022-08-22 - - -[1.0.2]: https://github.com/lycosystem/lyscripts/compare/1.0.1...1.0.2 -[1.0.1]: https://github.com/lycosystem/lyscripts/compare/1.0.0...1.0.1 -[1.0.0]: https://github.com/lycosystem/lyscripts/compare/1.0.0rc3...1.0.0 -[1.0.0rc3]: https://github.com/lycosystem/lyscripts/compare/1.0.0rc2...1.0.0rc3 -[1.0.0rc2]: https://github.com/lycosystem/lyscripts/compare/1.0.0rc1...1.0.0rc2 -[1.0.0rc1]: https://github.com/lycosystem/lyscripts/compare/1.0.0.a7...1.0.0rc1 -[1.0.0.a7]: https://github.com/lycosystem/lyscripts/compare/1.0.0.a6...1.0.0.a7 -[1.0.0.a6]: https://github.com/lycosystem/lyscripts/compare/1.0.0.a5...1.0.0.a6 -[1.0.0.a5]: https://github.com/lycosystem/lyscripts/compare/1.0.0.a4...1.0.0.a5 -[1.0.0.a4]: https://github.com/lycosystem/lyscripts/compare/1.0.0.a3...1.0.0.a4 -[1.0.0.a3]: https://github.com/lycosystem/lyscripts/compare/1.0.0.a2...1.0.0.a3 -[1.0.0.a2]: https://github.com/lycosystem/lyscripts/compare/1.0.0.a1...1.0.0.a2 -[1.0.0.a1]: https://github.com/lycosystem/lyscripts/compare/1.0.0.a0...1.0.0.a1 -[1.0.0.a0]: https://github.com/lycosystem/lyscripts/compare/0.7.3...1.0.0.a0 -[0.7.3]: https://github.com/lycosystem/lyscripts/compare/0.7.2...0.7.3 -[0.7.2]: https://github.com/lycosystem/lyscripts/compare/0.7.1...0.7.2 -[0.7.1]: https://github.com/lycosystem/lyscripts/compare/0.7.0...0.7.1 -[0.7.0]: https://github.com/lycosystem/lyscripts/compare/0.6.9...0.7.0 -[0.6.9]: https://github.com/lycosystem/lyscripts/compare/0.6.8...0.6.9 -[0.6.8]: https://github.com/lycosystem/lyscripts/compare/0.6.7...0.6.8 -[0.6.7]: https://github.com/lycosystem/lyscripts/compare/0.6.6...0.6.7 -[0.6.6]: https://github.com/lycosystem/lyscripts/compare/0.6.5...0.6.6 -[0.6.5]: https://github.com/lycosystem/lyscripts/compare/0.6.4...0.6.5 -[0.6.4]: https://github.com/lycosystem/lyscripts/compare/0.6.3...0.6.4 -[0.6.3]: https://github.com/lycosystem/lyscripts/compare/0.6.2...0.6.3 -[0.6.2]: https://github.com/lycosystem/lyscripts/compare/0.6.1...0.6.2 -[0.6.1]: https://github.com/lycosystem/lyscripts/compare/0.6.0...0.6.1 -[0.6.0]: https://github.com/lycosystem/lyscripts/compare/0.5.11...0.6.0 -[0.5.11]: https://github.com/lycosystem/lyscripts/compare/0.5.10...0.5.11 -[0.5.10]: https://github.com/lycosystem/lyscripts/compare/0.5.9...0.5.10 -[0.5.9]: https://github.com/lycosystem/lyscripts/compare/0.5.8...0.5.9 -[0.5.8]: https://github.com/lycosystem/lyscripts/compare/0.5.7...0.5.8 -[0.5.7]: https://github.com/lycosystem/lyscripts/compare/0.5.6...0.5.7 -[0.5.6]: https://github.com/lycosystem/lyscripts/compare/0.5.5...0.5.6 -[0.5.5]: https://github.com/lycosystem/lyscripts/compare/0.5.4...0.5.5 -[0.5.4]: https://github.com/lycosystem/lyscripts/compare/0.5.3...0.5.4 -[0.5.3]: https://github.com/lycosystem/lyscripts/compare/0.5.2...0.5.3 - -[#2]: https://github.com/lycosystem/lyscripts/issues/2 -[#5]: https://github.com/lycosystem/lyscripts/issues/5 -[#8]: https://github.com/lycosystem/lyscripts/issues/8 -[#11]: https://github.com/lycosystem/lyscripts/issues/11 -[#13]: https://github.com/lycosystem/lyscripts/issues/13 -[#15]: https://github.com/lycosystem/lyscripts/issues/15 -[#16]: https://github.com/lycosystem/lyscripts/issues/16 -[#17]: https://github.com/lycosystem/lyscripts/issues/17 -[#20]: https://github.com/lycosystem/lyscripts/issues/20 -[#21]: https://github.com/lycosystem/lyscripts/issues/21 -[#23]: https://github.com/lycosystem/lyscripts/issues/23 -[#25]: https://github.com/lycosystem/lyscripts/issues/25 -[#30]: https://github.com/lycosystem/lyscripts/issues/30 -[#31]: https://github.com/lycosystem/lyscripts/issues/31 -[#33]: https://github.com/lycosystem/lyscripts/issues/33 -[#40]: https://github.com/lycosystem/lyscripts/issues/40 -[#41]: https://github.com/lycosystem/lyscripts/issues/41 -[#44]: https://github.com/lycosystem/lyscripts/issues/44 -[#45]: https://github.com/lycosystem/lyscripts/issues/45 -[#51]: https://github.com/lycosystem/lyscripts/issues/51 -[#53]: https://github.com/lycosystem/lyscripts/issues/53 -[#54]: https://github.com/lycosystem/lyscripts/issues/54 -[#57]: https://github.com/lycosystem/lyscripts/issues/57 -[#70]: https://github.com/lycosystem/lyscripts/issues/70 -[#72]: https://github.com/lycosystem/lyscripts/issues/72 -[#74]: https://github.com/lycosystem/lyscripts/issues/74 -[#75]: https://github.com/lycosystem/lyscripts/issues/75 -[#81]: https://github.com/lycosystem/lyscripts/issues/81 - -[`emcee`]: https://emcee.readthedocs.io/en/stable/ -[`rich`]: https://rich.readthedocs.io/en/latest/ -[`rich_argparse`]: https://github.com/hamdanal/rich_argparse -[LyProX]: https://lyprox.org -[@noemibuehrer]: https://github.com/noemibuehrer diff --git a/CITATION.cff b/CITATION.cff deleted file mode 100755 index 70404a2..0000000 --- a/CITATION.cff +++ /dev/null @@ -1,25 +0,0 @@ -# This CITATION.cff file was generated with cffinit. -# Visit https://bit.ly/cffinit to generate yours today! - -cff-version: 1.2.0 -title: lyscripts -message: >- - If you use this software, please cite it using the - metadata from this file. -type: software -authors: - - given-names: Roman - family-names: Ludwig - orcid: 'https://orcid.org/0000-0001-9434-328X' - affiliation: University Hospital Zurich -repository-code: 'https://github.com/lycosystem/lyscripts' -url: 'https://lyscripts.readthedocs.io' -abstract: >- - Scripts for reproducible research on lymphatic tumor - progression in head and neck cancer. -keywords: - - cancer - - metastasis - - lymphatic system - - head and neck -license: MIT diff --git a/LICENSE b/LICENSE deleted file mode 100644 index f68adb1..0000000 --- a/LICENSE +++ /dev/null @@ -1,21 +0,0 @@ -MIT License - -Copyright (c) 2022 Roman Ludwig - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in all -copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -SOFTWARE. diff --git a/README.md b/README.md index e64323b..70b6bf9 100644 --- a/README.md +++ b/README.md @@ -1,72 +1,70 @@ -

+# Repository Coverage + +[Full report](https://htmlpreview.github.io/?https://github.com/lycosystem/lyscripts/blob/python-coverage-comment-action-data/htmlcov/index.html) + +| Name | Stmts | Miss | Cover | Missing | +|------------------------------------------- | -------: | -------: | ------: | --------: | +| src/lyscripts/\_\_init\_\_.py | 30 | 7 | 77% |58-59, 67-73 | +| src/lyscripts/\_\_main\_\_.py | 3 | 3 | 0% | 3-6 | +| src/lyscripts/\_version.py | 11 | 0 | 100% | | +| src/lyscripts/cli.py | 45 | 26 | 42% |65-71, 85-91, 104-120 | +| src/lyscripts/compute/\_\_init\_\_.py | 5 | 1 | 80% | 22 | +| src/lyscripts/compute/\_\_main\_\_.py | 5 | 5 | 0% | 3-8 | +| src/lyscripts/compute/evidence.py | 65 | 41 | 37% |45, 60-61, 72-95, 124-192, 196-197 | +| src/lyscripts/compute/posteriors.py | 46 | 19 | 59% |97-137, 141-142 | +| src/lyscripts/compute/prevalences.py | 82 | 7 | 91% |60-62, 96-101, 235-236 | +| src/lyscripts/compute/priors.py | 35 | 2 | 94% | 110-111 | +| src/lyscripts/compute/risks.py | 51 | 33 | 35% |47-65, 81-135, 139-140 | +| src/lyscripts/compute/utils.py | 118 | 6 | 95% |95, 146, 177, 188, 240, 250 | +| src/lyscripts/configs.py | 280 | 41 | 85% |90, 122, 165, 173, 217, 271, 277-278, 286, 472-474, 482, 491-492, 527-536, 547, 581-584, 589, 660, 699-712, 755 | +| src/lyscripts/data/\_\_init\_\_.py | 6 | 1 | 83% | 53 | +| src/lyscripts/data/\_\_main\_\_.py | 18 | 18 | 0% | 3-36 | +| src/lyscripts/data/collect/\_\_init\_\_.py | 58 | 27 | 53% |56-58, 64, 76, 92-116, 137-144 | +| src/lyscripts/data/enhance.py | 19 | 6 | 68% |40-48, 52-53 | +| src/lyscripts/data/fetch.py | 21 | 7 | 67% |42-52, 56-57 | +| src/lyscripts/data/filter.py | 48 | 30 | 38% |43-66, 76-94, 98-99 | +| src/lyscripts/data/generate.py | 35 | 4 | 89% |58, 63, 95-96 | +| src/lyscripts/data/join.py | 22 | 10 | 55% |61-77, 81-82 | +| src/lyscripts/data/lyproxify.py | 123 | 67 | 46% |31-34, 39-46, 90-119, 132-142, 173, 250-282, 293-307, 340-341 | +| src/lyscripts/data/split.py | 29 | 14 | 52% |33-65, 72-73 | +| src/lyscripts/data/utils.py | 9 | 0 | 100% | | +| src/lyscripts/decorators.py | 41 | 4 | 90% | 53-55, 70 | +| src/lyscripts/evaluate.py | 70 | 52 | 26% |29-35, 43-70, 87, 102-103, 114-134, 139-197, 201-205 | +| src/lyscripts/integrate.py | 46 | 22 | 52% |46-53, 104-154, 162-163 | +| src/lyscripts/plots.py | 160 | 18 | 89% |46-47, 56, 185-186, 336, 341, 370-392, 399 | +| src/lyscripts/sample.py | 137 | 12 | 91% |35-36, 74, 132-135, 175, 191, 304, 425-426 | +| src/lyscripts/schedule.py | 11 | 5 | 55% |24-28, 32-33 | +| src/lyscripts/schema.py | 22 | 3 | 86% | 61-62, 66 | +| src/lyscripts/utils.py | 84 | 5 | 94% |25, 141-142, 196-197 | +| **TOTAL** | **1735** | **496** | **71%** | | + + +## Setup coverage badge + +Below are examples of the badges you can use in your main branch `README` file. + +### Direct image + +[![Coverage badge](https://raw.githubusercontent.com/lycosystem/lyscripts/python-coverage-comment-action-data/badge.svg)](https://htmlpreview.github.io/?https://github.com/lycosystem/lyscripts/blob/python-coverage-comment-action-data/htmlcov/index.html) + +This is the one to use if your repository is private or if you don't want to customize anything. + +### [Shields.io](https://shields.io) Json Endpoint -[![MIT license](https://img.shields.io/badge/license-MIT-blue.svg?style=flat)](https://github.com/lycosystem/lyscripts/blob/main/LICENSE) -[![GitHub repo](https://img.shields.io/badge/lycosystem%2Flymph-grey.svg?style=flat&logo=github)](https://github.com/lycosystem/lyscripts) -[![build badge](https://github.com/lycosystem/lyscripts/actions/workflows/release.yml/badge.svg?style=flat)](https://pypi.org/project/lyscripts/) -[![docs badge](https://readthedocs.org/projects/lyscripts/badge/?version=latest)](https://lyscripts.readthedocs.io/en/latest/?badge=latest) -[![tests badge](https://github.com/lycosystem/lyscripts/actions/workflows/tests.yml/badge.svg?style=flat)](https://lyscripts.readthedocs.io/en/latest/?badge=latest) [![Coverage badge](https://img.shields.io/endpoint?url=https://raw.githubusercontent.com/lycosystem/lyscripts/python-coverage-comment-action-data/endpoint.json)](https://htmlpreview.github.io/?https://github.com/lycosystem/lyscripts/blob/python-coverage-comment-action-data/htmlcov/index.html) -## What are these `lyscripts`? +Using this one will allow you to [customize](https://shields.io/endpoint) the look of your badge. +It won't work with private repositories. It won't be refreshed more than once per five minutes. -This package provides convenient scripts for performing inference and learning regarding the lymphatic spread of head & neck cancer. Essentially, it provides a *command line interface* (CLI) to the [lymph](https://github.com/lycosystem/lymph) library and the [lydata](https://github.com/lycosystem/lydata) repository that stores lymphatic progression data. +### [Shields.io](https://shields.io) Dynamic Badge -We are making these "convenience" scripts public, because doing so is one necessary requirement to making our research easily and fully reproducible. There exists another repository, [lynference](https://github.com/lycosystem/lynference), where we stored the pipelines that produced our published results in a persistent way. +[![Coverage badge](https://img.shields.io/badge/dynamic/json?color=brightgreen&label=coverage&query=%24.message&url=https%3A%2F%2Fraw.githubusercontent.com%2Flycosystem%2Flyscripts%2Fpython-coverage-comment-action-data%2Fendpoint.json)](https://htmlpreview.github.io/?https://github.com/lycosystem/lyscripts/blob/python-coverage-comment-action-data/htmlcov/index.html) -## Installation +This one will always be the same color. It won't work for private repos. I'm not even sure why we included it. -These scripts can be installed via `pip`: +## What is that? -```bash -pip install lyscripts -``` - -or installed from source by cloning this repo - -```bash -git clone https://github.com/lycosystem/lyscripts.git -cd lyscripts -pip install . -``` - -## Usage - -This package is intended to be mainly used as a collection of Python scripts that would be scattered throughout my projects, if I didn't bundle them here. Hence, they're mostly command line tools that do basic and repetitive stuff. - -### As a Command Line Tool - -Simply run - -``` -lyscripts --help -``` - -in your terminal to display the help text for the main command. It will list all subcommands that are avialable, which you can then also call with `lyscripts --help` to get more information on its use and the available arguments. - -For example, one subcommand is `lyscripts data collect`, which will launch a small web server that allows a user to enter patient records on lymphatic involvement in head and neck cancer one row at a time and construct a standardized CSV file from it. - - - -You can also refer to the [documentation] for a written-down version of all these help texts and even more context on how and why to use the provided commands. - -### As a Library - -Head over to the [documentation] for more information on the individual modules, classes, and functions that are implemented in this package. - -[documentation]: https://lyscripts.readthedocs.io - -### Configuration YAML Schema - -Most of the CLI commands allow passing a list of `--configs` in the form of YAML files. If for a particular CLI argument no value is passed directly, the program looks for the corresponding value in the merged YAML files (if multiple files are provided, later ones may overwrite earlier ones). - -For these YAML files we provide a unified schema containing all possible fields that any of the CLIs may accept. It is located at `schemas/ly.json` in this repository. So, one could configure e.g. VS Code to consider this schema for all `*.ly.yaml` files. Here is how that could look like in the JSON settings of VS Code: - -```json -{ - "yaml.schemas": { - "https://raw.githubusercontent.com/lycosystem/lyscripts/main/schemas/ly.json": "*.ly.yaml" - } -} -``` - -Subsequently, all files ending in `.ly.yaml` will have helpful autocompletion on the allowed/expected types available. +This branch is part of the +[python-coverage-comment-action](https://github.com/marketplace/actions/python-coverage-comment) +GitHub Action. All the files in this branch are automatically generated and may be +overwritten at any moment. \ No newline at end of file diff --git a/badge.svg b/badge.svg new file mode 100644 index 0000000..6f50e5c --- /dev/null +++ b/badge.svg @@ -0,0 +1 @@ + \ No newline at end of file diff --git a/data.json b/data.json new file mode 100644 index 0000000..ed45deb --- /dev/null +++ b/data.json @@ -0,0 +1 @@ +{"coverage": 71.41210374639769, "raw_data": {"meta": {"format": 3, "version": "7.13.5", "timestamp": "2026-04-08T14:17:37.564076", "branch_coverage": false, "show_contexts": false}, "files": {"src/lyscripts/__init__.py": {"executed_lines": [7, 8, 10, 11, 12, 13, 20, 21, 22, 23, 25, 26, 27, 28, 29, 33, 35, 38, 41, 45, 56, 61, 76], "summary": {"covered_lines": 23, "num_statements": 30, "percent_covered": 76.66666666666667, "percent_covered_display": "77", "missing_lines": 7, "excluded_lines": 0, "percent_statements_covered": 76.66666666666667, "percent_statements_covered_display": "77"}, "missing_lines": [58, 59, 67, 69, 70, 71, 73], "excluded_lines": [], "functions": {"LyscriptsCLI.__init__": {"executed_lines": [], "summary": {"covered_lines": 0, "num_statements": 2, "percent_covered": 0.0, "percent_covered_display": "0", "missing_lines": 2, "excluded_lines": 0, "percent_statements_covered": 0.0, "percent_statements_covered_display": "0"}, "missing_lines": [58, 59], "excluded_lines": [], "start_line": 56}, "LyscriptsCLI.cli_cmd": {"executed_lines": [], "summary": {"covered_lines": 0, "num_statements": 5, "percent_covered": 0.0, "percent_covered_display": "0", "missing_lines": 5, "excluded_lines": 0, "percent_statements_covered": 0.0, "percent_statements_covered_display": "0"}, "missing_lines": [67, 69, 70, 71, 73], "excluded_lines": [], "start_line": 61}, "": {"executed_lines": [7, 8, 10, 11, 12, 13, 20, 21, 22, 23, 25, 26, 27, 28, 29, 33, 35, 38, 41, 45, 56, 61, 76], "summary": {"covered_lines": 23, "num_statements": 23, "percent_covered": 100.0, "percent_covered_display": "100", "missing_lines": 0, "excluded_lines": 0, "percent_statements_covered": 100.0, "percent_statements_covered_display": "100"}, "missing_lines": [], "excluded_lines": [], "start_line": 1}}, "classes": {"LyscriptsCLI": {"executed_lines": [], "summary": {"covered_lines": 0, "num_statements": 7, "percent_covered": 0.0, "percent_covered_display": "0", "missing_lines": 7, "excluded_lines": 0, "percent_statements_covered": 0.0, "percent_statements_covered_display": "0"}, "missing_lines": [58, 59, 67, 69, 70, 71, 73], "excluded_lines": [], "start_line": 38}, "": {"executed_lines": [7, 8, 10, 11, 12, 13, 20, 21, 22, 23, 25, 26, 27, 28, 29, 33, 35, 38, 41, 45, 56, 61, 76], "summary": {"covered_lines": 23, "num_statements": 23, "percent_covered": 100.0, "percent_covered_display": "100", "missing_lines": 0, "excluded_lines": 0, "percent_statements_covered": 100.0, "percent_statements_covered_display": "100"}, "missing_lines": [], "excluded_lines": [], "start_line": 1}}}, "src/lyscripts/__main__.py": {"executed_lines": [], "summary": {"covered_lines": 0, "num_statements": 3, "percent_covered": 0.0, "percent_covered_display": "0", "missing_lines": 3, "excluded_lines": 0, "percent_statements_covered": 0.0, "percent_statements_covered_display": "0"}, "missing_lines": [3, 5, 6], "excluded_lines": [], "functions": {"": {"executed_lines": [], "summary": {"covered_lines": 0, "num_statements": 3, "percent_covered": 0.0, "percent_covered_display": "0", "missing_lines": 3, "excluded_lines": 0, "percent_statements_covered": 0.0, "percent_statements_covered_display": "0"}, "missing_lines": [3, 5, 6], "excluded_lines": [], "start_line": 1}}, "classes": {"": {"executed_lines": [], "summary": {"covered_lines": 0, "num_statements": 3, "percent_covered": 0.0, "percent_covered_display": "0", "missing_lines": 3, "excluded_lines": 0, "percent_statements_covered": 0.0, "percent_statements_covered_display": "0"}, "missing_lines": [3, 5, 6], "excluded_lines": [], "start_line": 1}}}, "src/lyscripts/_version.py": {"executed_lines": [3, 5, 14, 15, 16, 17, 18, 19, 21, 22, 24], "summary": {"covered_lines": 11, "num_statements": 11, "percent_covered": 100.0, "percent_covered_display": "100", "missing_lines": 0, "excluded_lines": 0, "percent_statements_covered": 100.0, "percent_statements_covered_display": "100"}, "missing_lines": [], "excluded_lines": [], "functions": {"": {"executed_lines": [3, 5, 14, 15, 16, 17, 18, 19, 21, 22, 24], "summary": {"covered_lines": 11, "num_statements": 11, "percent_covered": 100.0, "percent_covered_display": "100", "missing_lines": 0, "excluded_lines": 0, "percent_statements_covered": 100.0, "percent_statements_covered_display": "100"}, "missing_lines": [], "excluded_lines": [], "start_line": 1}}, "classes": {"": {"executed_lines": [3, 5, 14, 15, 16, 17, 18, 19, 21, 22, 24], "summary": {"covered_lines": 11, "num_statements": 11, "percent_covered": 100.0, "percent_covered_display": "100", "missing_lines": 0, "excluded_lines": 0, "percent_statements_covered": 100.0, "percent_statements_covered_display": "100"}, "missing_lines": [], "excluded_lines": [], "start_line": 1}}}, "src/lyscripts/cli.py": {"executed_lines": [11, 12, 13, 14, 16, 17, 18, 19, 20, 22, 25, 39, 41, 48, 50, 53, 74, 98, 101], "summary": {"covered_lines": 19, "num_statements": 45, "percent_covered": 42.22222222222222, "percent_covered_display": "42", "missing_lines": 26, "excluded_lines": 0, "percent_statements_covered": 42.22222222222222, "percent_statements_covered_display": "42"}, "missing_lines": [65, 66, 67, 68, 69, 71, 85, 86, 88, 89, 90, 91, 104, 105, 106, 107, 110, 111, 112, 113, 114, 115, 116, 117, 118, 120], "excluded_lines": [], "functions": {"assemble_main": {"executed_lines": [39, 50], "summary": {"covered_lines": 2, "num_statements": 2, "percent_covered": 100.0, "percent_covered_display": "100", "missing_lines": 0, "excluded_lines": 0, "percent_statements_covered": 100.0, "percent_statements_covered_display": "100"}, "missing_lines": [], "excluded_lines": [], "start_line": 25}, "assemble_main.main": {"executed_lines": [41, 48], "summary": {"covered_lines": 2, "num_statements": 2, "percent_covered": 100.0, "percent_covered_display": "100", "missing_lines": 0, "excluded_lines": 0, "percent_statements_covered": 100.0, "percent_statements_covered_display": "100"}, "missing_lines": [], "excluded_lines": [], "start_line": 39}, "somewhat_safely_get_loglevel": {"executed_lines": [], "summary": {"covered_lines": 0, "num_statements": 6, "percent_covered": 0.0, "percent_covered_display": "0", "missing_lines": 6, "excluded_lines": 0, "percent_statements_covered": 0.0, "percent_statements_covered_display": "0"}, "missing_lines": [65, 66, 67, 68, 69, 71], "excluded_lines": [], "start_line": 53}, "configure_logging": {"executed_lines": [], "summary": {"covered_lines": 0, "num_statements": 6, "percent_covered": 0.0, "percent_covered_display": "0", "missing_lines": 6, "excluded_lines": 0, "percent_statements_covered": 0.0, "percent_statements_covered_display": "0"}, "missing_lines": [85, 86, 88, 89, 90, 91], "excluded_lines": [], "start_line": 74}, "InterceptHandler.emit": {"executed_lines": [], "summary": {"covered_lines": 0, "num_statements": 14, "percent_covered": 0.0, "percent_covered_display": "0", "missing_lines": 14, "excluded_lines": 0, "percent_statements_covered": 0.0, "percent_statements_covered_display": "0"}, "missing_lines": [104, 105, 106, 107, 110, 111, 112, 113, 114, 115, 116, 117, 118, 120], "excluded_lines": [], "start_line": 101}, "": {"executed_lines": [11, 12, 13, 14, 16, 17, 18, 19, 20, 22, 25, 53, 74, 98, 101], "summary": {"covered_lines": 15, "num_statements": 15, "percent_covered": 100.0, "percent_covered_display": "100", "missing_lines": 0, "excluded_lines": 0, "percent_statements_covered": 100.0, "percent_statements_covered_display": "100"}, "missing_lines": [], "excluded_lines": [], "start_line": 1}}, "classes": {"InterceptHandler": {"executed_lines": [], "summary": {"covered_lines": 0, "num_statements": 14, "percent_covered": 0.0, "percent_covered_display": "0", "missing_lines": 14, "excluded_lines": 0, "percent_statements_covered": 0.0, "percent_statements_covered_display": "0"}, "missing_lines": [104, 105, 106, 107, 110, 111, 112, 113, 114, 115, 116, 117, 118, 120], "excluded_lines": [], "start_line": 98}, "": {"executed_lines": [11, 12, 13, 14, 16, 17, 18, 19, 20, 22, 25, 39, 41, 48, 50, 53, 74, 98, 101], "summary": {"covered_lines": 19, "num_statements": 31, "percent_covered": 61.29032258064516, "percent_covered_display": "61", "missing_lines": 12, "excluded_lines": 0, "percent_statements_covered": 61.29032258064516, "percent_statements_covered_display": "61"}, "missing_lines": [65, 66, 67, 68, 69, 71, 85, 86, 88, 89, 90, 91], "excluded_lines": [], "start_line": 1}}}, "src/lyscripts/compute/__init__.py": {"executed_lines": [6, 8, 11, 20], "summary": {"covered_lines": 4, "num_statements": 5, "percent_covered": 80.0, "percent_covered_display": "80", "missing_lines": 1, "excluded_lines": 0, "percent_statements_covered": 80.0, "percent_statements_covered_display": "80"}, "missing_lines": [22], "excluded_lines": [], "functions": {"ComputeCLI.cli_cmd": {"executed_lines": [], "summary": {"covered_lines": 0, "num_statements": 1, "percent_covered": 0.0, "percent_covered_display": "0", "missing_lines": 1, "excluded_lines": 0, "percent_statements_covered": 0.0, "percent_statements_covered_display": "0"}, "missing_lines": [22], "excluded_lines": [], "start_line": 20}, "": {"executed_lines": [6, 8, 11, 20], "summary": {"covered_lines": 4, "num_statements": 4, "percent_covered": 100.0, "percent_covered_display": "100", "missing_lines": 0, "excluded_lines": 0, "percent_statements_covered": 100.0, "percent_statements_covered_display": "100"}, "missing_lines": [], "excluded_lines": [], "start_line": 1}}, "classes": {"ComputeCLI": {"executed_lines": [], "summary": {"covered_lines": 0, "num_statements": 1, "percent_covered": 0.0, "percent_covered_display": "0", "missing_lines": 1, "excluded_lines": 0, "percent_statements_covered": 0.0, "percent_statements_covered_display": "0"}, "missing_lines": [22], "excluded_lines": [], "start_line": 11}, "": {"executed_lines": [6, 8, 11, 20], "summary": {"covered_lines": 4, "num_statements": 4, "percent_covered": 100.0, "percent_covered_display": "100", "missing_lines": 0, "excluded_lines": 0, "percent_statements_covered": 100.0, "percent_statements_covered_display": "100"}, "missing_lines": [], "excluded_lines": [], "start_line": 1}}}, "src/lyscripts/compute/__main__.py": {"executed_lines": [], "summary": {"covered_lines": 0, "num_statements": 5, "percent_covered": 0.0, "percent_covered_display": "0", "missing_lines": 5, "excluded_lines": 0, "percent_statements_covered": 0.0, "percent_statements_covered_display": "0"}, "missing_lines": [3, 4, 6, 7, 8], "excluded_lines": [], "functions": {"": {"executed_lines": [], "summary": {"covered_lines": 0, "num_statements": 5, "percent_covered": 0.0, "percent_covered_display": "0", "missing_lines": 5, "excluded_lines": 0, "percent_statements_covered": 0.0, "percent_statements_covered_display": "0"}, "missing_lines": [3, 4, 6, 7, 8], "excluded_lines": [], "start_line": 1}}, "classes": {"": {"executed_lines": [], "summary": {"covered_lines": 0, "num_statements": 5, "percent_covered": 0.0, "percent_covered_display": "0", "missing_lines": 5, "excluded_lines": 0, "percent_statements_covered": 0.0, "percent_statements_covered_display": "0"}, "missing_lines": [3, 4, 6, 7, 8], "excluded_lines": [], "start_line": 1}}}, "src/lyscripts/compute/evidence.py": {"executed_lines": [7, 9, 10, 12, 13, 14, 15, 16, 17, 18, 20, 21, 28, 31, 48, 64, 98, 101, 102, 103, 106, 110, 115, 195], "summary": {"covered_lines": 24, "num_statements": 65, "percent_covered": 36.92307692307692, "percent_covered_display": "37", "missing_lines": 41, "excluded_lines": 0, "percent_statements_covered": 36.92307692307692, "percent_statements_covered_display": "37"}, "missing_lines": [45, 60, 61, 72, 74, 75, 80, 81, 82, 84, 85, 90, 92, 93, 95, 124, 126, 128, 130, 132, 137, 138, 141, 142, 150, 156, 158, 168, 169, 172, 173, 178, 179, 181, 186, 187, 189, 190, 192, 196, 197], "excluded_lines": [], "functions": {"comp_bic": {"executed_lines": [], "summary": {"covered_lines": 0, "num_statements": 1, "percent_covered": 0.0, "percent_covered_display": "0", "missing_lines": 1, "excluded_lines": 0, "percent_statements_covered": 0.0, "percent_statements_covered_display": "0"}, "missing_lines": [45], "excluded_lines": [], "start_line": 31}, "compute_evidence": {"executed_lines": [], "summary": {"covered_lines": 0, "num_statements": 2, "percent_covered": 0.0, "percent_covered_display": "0", "missing_lines": 2, "excluded_lines": 0, "percent_statements_covered": 0.0, "percent_statements_covered_display": "0"}, "missing_lines": [60, 61], "excluded_lines": [], "start_line": 48}, "compute_ti_results": {"executed_lines": [], "summary": {"covered_lines": 0, "num_statements": 12, "percent_covered": 0.0, "percent_covered_display": "0", "missing_lines": 12, "excluded_lines": 0, "percent_statements_covered": 0.0, "percent_statements_covered_display": "0"}, "missing_lines": [72, 74, 75, 80, 81, 82, 84, 85, 90, 92, 93, 95], "excluded_lines": [], "start_line": 64}, "EvidenceCLI.cli_cmd": {"executed_lines": [], "summary": {"covered_lines": 0, "num_statements": 24, "percent_covered": 0.0, "percent_covered_display": "0", "missing_lines": 24, "excluded_lines": 0, "percent_statements_covered": 0.0, "percent_statements_covered_display": "0"}, "missing_lines": [124, 126, 128, 130, 132, 137, 138, 141, 142, 150, 156, 158, 168, 169, 172, 173, 178, 179, 181, 186, 187, 189, 190, 192], "excluded_lines": [], "start_line": 115}, "": {"executed_lines": [7, 9, 10, 12, 13, 14, 15, 16, 17, 18, 20, 21, 28, 31, 48, 64, 98, 101, 102, 103, 106, 110, 115, 195], "summary": {"covered_lines": 24, "num_statements": 26, "percent_covered": 92.3076923076923, "percent_covered_display": "92", "missing_lines": 2, "excluded_lines": 0, "percent_statements_covered": 92.3076923076923, "percent_statements_covered_display": "92"}, "missing_lines": [196, 197], "excluded_lines": [], "start_line": 1}}, "classes": {"EvidenceCLI": {"executed_lines": [], "summary": {"covered_lines": 0, "num_statements": 24, "percent_covered": 0.0, "percent_covered_display": "0", "missing_lines": 24, "excluded_lines": 0, "percent_statements_covered": 0.0, "percent_statements_covered_display": "0"}, "missing_lines": [124, 126, 128, 130, 132, 137, 138, 141, 142, 150, 156, 158, 168, 169, 172, 173, 178, 179, 181, 186, 187, 189, 190, 192], "excluded_lines": [], "start_line": 98}, "": {"executed_lines": [7, 9, 10, 12, 13, 14, 15, 16, 17, 18, 20, 21, 28, 31, 48, 64, 98, 101, 102, 103, 106, 110, 115, 195], "summary": {"covered_lines": 24, "num_statements": 41, "percent_covered": 58.53658536585366, "percent_covered_display": "59", "missing_lines": 17, "excluded_lines": 0, "percent_statements_covered": 58.53658536585366, "percent_statements_covered_display": "59"}, "missing_lines": [45, 60, 61, 72, 74, 75, 80, 81, 82, 84, 85, 90, 92, 93, 95, 196, 197], "excluded_lines": [], "start_line": 1}}}, "src/lyscripts/compute/posteriors.py": {"executed_lines": [9, 11, 12, 13, 14, 15, 17, 18, 19, 20, 29, 32, 51, 52, 53, 54, 55, 57, 58, 60, 66, 75, 78, 81, 87, 91, 140], "summary": {"covered_lines": 27, "num_statements": 46, "percent_covered": 58.69565217391305, "percent_covered_display": "59", "missing_lines": 19, "excluded_lines": 0, "percent_statements_covered": 58.69565217391305, "percent_statements_covered_display": "59"}, "missing_lines": [97, 99, 102, 104, 105, 106, 107, 109, 110, 111, 113, 122, 123, 125, 135, 136, 137, 141, 142], "excluded_lines": [], "functions": {"compute_posteriors": {"executed_lines": [51, 52, 53, 54, 55, 57, 58, 60, 66, 75], "summary": {"covered_lines": 10, "num_statements": 10, "percent_covered": 100.0, "percent_covered_display": "100", "missing_lines": 0, "excluded_lines": 0, "percent_statements_covered": 100.0, "percent_statements_covered_display": "100"}, "missing_lines": [], "excluded_lines": [], "start_line": 32}, "PosteriorsCLI.cli_cmd": {"executed_lines": [], "summary": {"covered_lines": 0, "num_statements": 17, "percent_covered": 0.0, "percent_covered_display": "0", "missing_lines": 17, "excluded_lines": 0, "percent_statements_covered": 0.0, "percent_statements_covered_display": "0"}, "missing_lines": [97, 99, 102, 104, 105, 106, 107, 109, 110, 111, 113, 122, 123, 125, 135, 136, 137], "excluded_lines": [], "start_line": 91}, "": {"executed_lines": [9, 11, 12, 13, 14, 15, 17, 18, 19, 20, 29, 32, 78, 81, 87, 91, 140], "summary": {"covered_lines": 17, "num_statements": 19, "percent_covered": 89.47368421052632, "percent_covered_display": "89", "missing_lines": 2, "excluded_lines": 0, "percent_statements_covered": 89.47368421052632, "percent_statements_covered_display": "89"}, "missing_lines": [141, 142], "excluded_lines": [], "start_line": 1}}, "classes": {"PosteriorsCLI": {"executed_lines": [], "summary": {"covered_lines": 0, "num_statements": 17, "percent_covered": 0.0, "percent_covered_display": "0", "missing_lines": 17, "excluded_lines": 0, "percent_statements_covered": 0.0, "percent_statements_covered_display": "0"}, "missing_lines": [97, 99, 102, 104, 105, 106, 107, 109, 110, 111, 113, 122, 123, 125, 135, 136, 137], "excluded_lines": [], "start_line": 78}, "": {"executed_lines": [9, 11, 12, 13, 14, 15, 17, 18, 19, 20, 29, 32, 51, 52, 53, 54, 55, 57, 58, 60, 66, 75, 78, 81, 87, 91, 140], "summary": {"covered_lines": 27, "num_statements": 29, "percent_covered": 93.10344827586206, "percent_covered_display": "93", "missing_lines": 2, "excluded_lines": 0, "percent_statements_covered": 93.10344827586206, "percent_statements_covered_display": "93"}, "missing_lines": [141, 142], "excluded_lines": [], "start_line": 1}}}, "src/lyscripts/compute/prevalences.py": {"executed_lines": [8, 9, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 23, 24, 25, 30, 42, 45, 56, 57, 59, 64, 65, 66, 68, 74, 75, 80, 81, 83, 89, 103, 105, 108, 110, 111, 112, 113, 114, 115, 116, 119, 143, 144, 145, 147, 148, 149, 151, 158, 164, 167, 173, 178, 180, 181, 184, 186, 187, 188, 189, 191, 192, 193, 195, 204, 205, 207, 217, 222, 223, 224, 225, 234], "summary": {"covered_lines": 75, "num_statements": 82, "percent_covered": 91.46341463414635, "percent_covered_display": "91", "missing_lines": 7, "excluded_lines": 0, "percent_statements_covered": 91.46341463414635, "percent_statements_covered_display": "91"}, "missing_lines": [60, 61, 62, 96, 101, 235, 236], "excluded_lines": [], "functions": {"compute_prevalences": {"executed_lines": [56, 57, 59, 64, 65, 66, 68, 74, 75, 80, 81, 83, 89, 103, 105], "summary": {"covered_lines": 15, "num_statements": 20, "percent_covered": 75.0, "percent_covered_display": "75", "missing_lines": 5, "excluded_lines": 0, "percent_statements_covered": 75.0, "percent_statements_covered_display": "75"}, "missing_lines": [60, 61, 62, 96, 101], "excluded_lines": [], "start_line": 45}, "generate_query_from_diagnosis": {"executed_lines": [110, 111, 112, 113, 114, 115, 116], "summary": {"covered_lines": 7, "num_statements": 7, "percent_covered": 100.0, "percent_covered_display": "100", "missing_lines": 0, "excluded_lines": 0, "percent_statements_covered": 100.0, "percent_statements_covered_display": "100"}, "missing_lines": [], "excluded_lines": [], "start_line": 108}, "observe_prevalence": {"executed_lines": [143, 144, 145, 147, 148, 149, 151, 158], "summary": {"covered_lines": 8, "num_statements": 8, "percent_covered": 100.0, "percent_covered_display": "100", "missing_lines": 0, "excluded_lines": 0, "percent_statements_covered": 100.0, "percent_statements_covered_display": "100"}, "missing_lines": [], "excluded_lines": [], "start_line": 119}, "PrevalencesCLI.cli_cmd": {"executed_lines": [180, 181, 184, 186, 187, 188, 189, 191, 192, 193, 195, 204, 205, 207, 217, 222, 223, 224, 225], "summary": {"covered_lines": 19, "num_statements": 19, "percent_covered": 100.0, "percent_covered_display": "100", "missing_lines": 0, "excluded_lines": 0, "percent_statements_covered": 100.0, "percent_statements_covered_display": "100"}, "missing_lines": [], "excluded_lines": [], "start_line": 178}, "": {"executed_lines": [8, 9, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 23, 24, 25, 30, 42, 45, 108, 119, 164, 167, 173, 178, 234], "summary": {"covered_lines": 26, "num_statements": 28, "percent_covered": 92.85714285714286, "percent_covered_display": "93", "missing_lines": 2, "excluded_lines": 0, "percent_statements_covered": 92.85714285714286, "percent_statements_covered_display": "93"}, "missing_lines": [235, 236], "excluded_lines": [], "start_line": 1}}, "classes": {"PrevalencesCLI": {"executed_lines": [180, 181, 184, 186, 187, 188, 189, 191, 192, 193, 195, 204, 205, 207, 217, 222, 223, 224, 225], "summary": {"covered_lines": 19, "num_statements": 19, "percent_covered": 100.0, "percent_covered_display": "100", "missing_lines": 0, "excluded_lines": 0, "percent_statements_covered": 100.0, "percent_statements_covered_display": "100"}, "missing_lines": [], "excluded_lines": [], "start_line": 164}, "": {"executed_lines": [8, 9, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 23, 24, 25, 30, 42, 45, 56, 57, 59, 64, 65, 66, 68, 74, 75, 80, 81, 83, 89, 103, 105, 108, 110, 111, 112, 113, 114, 115, 116, 119, 143, 144, 145, 147, 148, 149, 151, 158, 164, 167, 173, 178, 234], "summary": {"covered_lines": 56, "num_statements": 63, "percent_covered": 88.88888888888889, "percent_covered_display": "89", "missing_lines": 7, "excluded_lines": 0, "percent_statements_covered": 88.88888888888889, "percent_statements_covered_display": "89"}, "missing_lines": [60, 61, 62, 96, 101, 235, 236], "excluded_lines": [], "start_line": 1}}}, "src/lyscripts/compute/priors.py": {"executed_lines": [7, 9, 10, 11, 12, 14, 15, 16, 23, 26, 42, 43, 44, 46, 52, 53, 60, 63, 66, 68, 84, 85, 86, 88, 89, 90, 92, 93, 94, 96, 105, 106, 109], "summary": {"covered_lines": 33, "num_statements": 35, "percent_covered": 94.28571428571429, "percent_covered_display": "94", "missing_lines": 2, "excluded_lines": 0, "percent_statements_covered": 94.28571428571429, "percent_statements_covered_display": "94"}, "missing_lines": [110, 111], "excluded_lines": [], "functions": {"compute_priors": {"executed_lines": [42, 43, 44, 46, 52, 53, 60], "summary": {"covered_lines": 7, "num_statements": 7, "percent_covered": 100.0, "percent_covered_display": "100", "missing_lines": 0, "excluded_lines": 0, "percent_statements_covered": 100.0, "percent_statements_covered_display": "100"}, "missing_lines": [], "excluded_lines": [], "start_line": 26}, "PriorsCLI.cli_cmd": {"executed_lines": [84, 85, 86, 88, 89, 90, 92, 93, 94, 96, 105, 106], "summary": {"covered_lines": 12, "num_statements": 12, "percent_covered": 100.0, "percent_covered_display": "100", "missing_lines": 0, "excluded_lines": 0, "percent_statements_covered": 100.0, "percent_statements_covered_display": "100"}, "missing_lines": [], "excluded_lines": [], "start_line": 68}, "": {"executed_lines": [7, 9, 10, 11, 12, 14, 15, 16, 23, 26, 63, 66, 68, 109], "summary": {"covered_lines": 14, "num_statements": 16, "percent_covered": 87.5, "percent_covered_display": "88", "missing_lines": 2, "excluded_lines": 0, "percent_statements_covered": 87.5, "percent_statements_covered_display": "88"}, "missing_lines": [110, 111], "excluded_lines": [], "start_line": 1}}, "classes": {"PriorsCLI": {"executed_lines": [84, 85, 86, 88, 89, 90, 92, 93, 94, 96, 105, 106], "summary": {"covered_lines": 12, "num_statements": 12, "percent_covered": 100.0, "percent_covered_display": "100", "missing_lines": 0, "excluded_lines": 0, "percent_statements_covered": 100.0, "percent_statements_covered_display": "100"}, "missing_lines": [], "excluded_lines": [], "start_line": 63}, "": {"executed_lines": [7, 9, 10, 11, 12, 14, 15, 16, 23, 26, 42, 43, 44, 46, 52, 53, 60, 63, 66, 68, 109], "summary": {"covered_lines": 21, "num_statements": 23, "percent_covered": 91.30434782608695, "percent_covered_display": "91", "missing_lines": 2, "excluded_lines": 0, "percent_statements_covered": 91.30434782608695, "percent_statements_covered_display": "91"}, "missing_lines": [110, 111], "excluded_lines": [], "start_line": 1}}}, "src/lyscripts/compute/risks.py": {"executed_lines": [8, 10, 11, 12, 13, 14, 16, 17, 18, 19, 20, 29, 32, 68, 71, 77, 79, 138], "summary": {"covered_lines": 18, "num_statements": 51, "percent_covered": 35.294117647058826, "percent_covered_display": "35", "missing_lines": 33, "excluded_lines": 0, "percent_statements_covered": 35.294117647058826, "percent_statements_covered_display": "35"}, "missing_lines": [47, 48, 49, 50, 52, 53, 55, 61, 65, 81, 82, 85, 87, 88, 89, 90, 91, 93, 94, 95, 97, 106, 107, 109, 119, 120, 122, 132, 133, 134, 135, 139, 140], "excluded_lines": [], "functions": {"compute_risks": {"executed_lines": [], "summary": {"covered_lines": 0, "num_statements": 9, "percent_covered": 0.0, "percent_covered_display": "0", "missing_lines": 9, "excluded_lines": 0, "percent_statements_covered": 0.0, "percent_statements_covered_display": "0"}, "missing_lines": [47, 48, 49, 50, 52, 53, 55, 61, 65], "excluded_lines": [], "start_line": 32}, "RisksCLI.cli_cmd": {"executed_lines": [], "summary": {"covered_lines": 0, "num_statements": 22, "percent_covered": 0.0, "percent_covered_display": "0", "missing_lines": 22, "excluded_lines": 0, "percent_statements_covered": 0.0, "percent_statements_covered_display": "0"}, "missing_lines": [81, 82, 85, 87, 88, 89, 90, 91, 93, 94, 95, 97, 106, 107, 109, 119, 120, 122, 132, 133, 134, 135], "excluded_lines": [], "start_line": 79}, "": {"executed_lines": [8, 10, 11, 12, 13, 14, 16, 17, 18, 19, 20, 29, 32, 68, 71, 77, 79, 138], "summary": {"covered_lines": 18, "num_statements": 20, "percent_covered": 90.0, "percent_covered_display": "90", "missing_lines": 2, "excluded_lines": 0, "percent_statements_covered": 90.0, "percent_statements_covered_display": "90"}, "missing_lines": [139, 140], "excluded_lines": [], "start_line": 1}}, "classes": {"RisksCLI": {"executed_lines": [], "summary": {"covered_lines": 0, "num_statements": 22, "percent_covered": 0.0, "percent_covered_display": "0", "missing_lines": 22, "excluded_lines": 0, "percent_statements_covered": 0.0, "percent_statements_covered_display": "0"}, "missing_lines": [81, 82, 85, 87, 88, 89, 90, 91, 93, 94, 95, 97, 106, 107, 109, 119, 120, 122, 132, 133, 134, 135], "excluded_lines": [], "start_line": 68}, "": {"executed_lines": [8, 10, 11, 12, 13, 14, 16, 17, 18, 19, 20, 29, 32, 68, 71, 77, 79, 138], "summary": {"covered_lines": 18, "num_statements": 29, "percent_covered": 62.06896551724138, "percent_covered_display": "62", "missing_lines": 11, "excluded_lines": 0, "percent_statements_covered": 62.06896551724138, "percent_statements_covered_display": "62"}, "missing_lines": [47, 48, 49, 50, 52, 53, 55, 61, 65, 139, 140], "excluded_lines": [], "start_line": 1}}}, "src/lyscripts/compute/utils.py": {"executed_lines": [3, 4, 5, 6, 8, 9, 10, 11, 12, 14, 24, 28, 29, 36, 40, 47, 49, 55, 57, 58, 59, 60, 62, 63, 66, 68, 69, 70, 71, 72, 73, 74, 77, 92, 94, 97, 98, 99, 101, 104, 106, 107, 108, 111, 112, 115, 118, 121, 128, 145, 148, 149, 151, 153, 155, 156, 158, 159, 161, 163, 165, 166, 168, 169, 171, 173, 175, 176, 178, 180, 182, 184, 186, 187, 189, 191, 194, 210, 211, 212, 213, 214, 215, 216, 217, 219, 222, 239, 242, 243, 244, 246, 247, 248, 249, 252, 254, 257, 259, 260, 261, 263, 264, 265, 266, 267, 269, 271, 272, 273, 275, 276], "summary": {"covered_lines": 112, "num_statements": 118, "percent_covered": 94.91525423728814, "percent_covered_display": "95", "missing_lines": 6, "excluded_lines": 0, "percent_statements_covered": 94.91525423728814, "percent_statements_covered_display": "95"}, "missing_lines": [95, 146, 177, 188, 240, 250], "excluded_lines": [], "functions": {"is_hdf5_compatible": {"executed_lines": [49], "summary": {"covered_lines": 1, "num_statements": 1, "percent_covered": 100.0, "percent_covered_display": "100", "missing_lines": 0, "excluded_lines": 0, "percent_statements_covered": 100.0, "percent_statements_covered_display": "100"}, "missing_lines": [], "excluded_lines": [], "start_line": 47}, "to_hdf5_attrs": {"executed_lines": [57, 58, 59, 60, 62, 63], "summary": {"covered_lines": 6, "num_statements": 6, "percent_covered": 100.0, "percent_covered_display": "100", "missing_lines": 0, "excluded_lines": 0, "percent_statements_covered": 100.0, "percent_statements_covered_display": "100"}, "missing_lines": [], "excluded_lines": [], "start_line": 55}, "from_hdf5_attrs": {"executed_lines": [68, 69, 70, 71, 72, 73, 74], "summary": {"covered_lines": 7, "num_statements": 7, "percent_covered": 100.0, "percent_covered_display": "100", "missing_lines": 0, "excluded_lines": 0, "percent_statements_covered": 100.0, "percent_statements_covered_display": "100"}, "missing_lines": [], "excluded_lines": [], "start_line": 66}, "extract_modalities": {"executed_lines": [92, 94, 97, 98, 99, 101], "summary": {"covered_lines": 6, "num_statements": 7, "percent_covered": 85.71428571428571, "percent_covered_display": "86", "missing_lines": 1, "excluded_lines": 0, "percent_statements_covered": 85.71428571428571, "percent_statements_covered_display": "86"}, "missing_lines": [95], "excluded_lines": [], "start_line": 77}, "ensure_parent_dir": {"executed_lines": [106, 107, 108], "summary": {"covered_lines": 3, "num_statements": 3, "percent_covered": 100.0, "percent_covered_display": "100", "missing_lines": 0, "excluded_lines": 0, "percent_statements_covered": 100.0, "percent_statements_covered_display": "100"}, "missing_lines": [], "excluded_lines": [], "start_line": 104}, "HDF5FileStorage._get_dataset": {"executed_lines": [145, 148, 149], "summary": {"covered_lines": 3, "num_statements": 4, "percent_covered": 75.0, "percent_covered_display": "75", "missing_lines": 1, "excluded_lines": 0, "percent_statements_covered": 75.0, "percent_statements_covered_display": "75"}, "missing_lines": [146], "excluded_lines": [], "start_line": 128}, "HDF5FileStorage.load": {"executed_lines": [153, 155, 156, 158, 159], "summary": {"covered_lines": 5, "num_statements": 5, "percent_covered": 100.0, "percent_covered_display": "100", "missing_lines": 0, "excluded_lines": 0, "percent_statements_covered": 100.0, "percent_statements_covered_display": "100"}, "missing_lines": [], "excluded_lines": [], "start_line": 151}, "HDF5FileStorage.get_attrs": {"executed_lines": [163, 165, 166, 168, 169], "summary": {"covered_lines": 5, "num_statements": 5, "percent_covered": 100.0, "percent_covered_display": "100", "missing_lines": 0, "excluded_lines": 0, "percent_statements_covered": 100.0, "percent_statements_covered_display": "100"}, "missing_lines": [], "excluded_lines": [], "start_line": 161}, "HDF5FileStorage.save": {"executed_lines": [173, 175, 176, 178, 180], "summary": {"covered_lines": 5, "num_statements": 6, "percent_covered": 83.33333333333333, "percent_covered_display": "83", "missing_lines": 1, "excluded_lines": 0, "percent_statements_covered": 83.33333333333333, "percent_statements_covered_display": "83"}, "missing_lines": [177], "excluded_lines": [], "start_line": 171}, "HDF5FileStorage.set_attrs": {"executed_lines": [184, 186, 187, 189, 191], "summary": {"covered_lines": 5, "num_statements": 6, "percent_covered": 83.33333333333333, "percent_covered_display": "83", "missing_lines": 1, "excluded_lines": 0, "percent_statements_covered": 83.33333333333333, "percent_statements_covered_display": "83"}, "missing_lines": [188], "excluded_lines": [], "start_line": 182}, "reduce_pattern": {"executed_lines": [210, 211, 212, 213, 214, 215, 216, 217, 219], "summary": {"covered_lines": 9, "num_statements": 9, "percent_covered": 100.0, "percent_covered_display": "100", "missing_lines": 0, "excluded_lines": 0, "percent_statements_covered": 100.0, "percent_statements_covered_display": "100"}, "missing_lines": [], "excluded_lines": [], "start_line": 194}, "complete_pattern": {"executed_lines": [239, 242, 243, 244, 246, 247, 248, 249, 252, 254], "summary": {"covered_lines": 10, "num_statements": 12, "percent_covered": 83.33333333333333, "percent_covered_display": "83", "missing_lines": 2, "excluded_lines": 0, "percent_statements_covered": 83.33333333333333, "percent_statements_covered_display": "83"}, "missing_lines": [240, 250], "excluded_lines": [], "start_line": 222}, "get_cached": {"executed_lines": [259, 260, 261, 263, 264, 275, 276], "summary": {"covered_lines": 7, "num_statements": 7, "percent_covered": 100.0, "percent_covered_display": "100", "missing_lines": 0, "excluded_lines": 0, "percent_statements_covered": 100.0, "percent_statements_covered_display": "100"}, "missing_lines": [], "excluded_lines": [], "start_line": 257}, "get_cached.log_cache_info_wrapper": {"executed_lines": [265, 266, 267, 269, 271, 272, 273], "summary": {"covered_lines": 7, "num_statements": 7, "percent_covered": 100.0, "percent_covered_display": "100", "missing_lines": 0, "excluded_lines": 0, "percent_statements_covered": 100.0, "percent_statements_covered_display": "100"}, "missing_lines": [], "excluded_lines": [], "start_line": 264}, "": {"executed_lines": [3, 4, 5, 6, 8, 9, 10, 11, 12, 14, 24, 28, 29, 36, 40, 47, 55, 66, 77, 104, 111, 112, 115, 118, 121, 128, 151, 161, 171, 182, 194, 222, 257], "summary": {"covered_lines": 33, "num_statements": 33, "percent_covered": 100.0, "percent_covered_display": "100", "missing_lines": 0, "excluded_lines": 0, "percent_statements_covered": 100.0, "percent_statements_covered_display": "100"}, "missing_lines": [], "excluded_lines": [], "start_line": 1}}, "classes": {"BaseComputeCLI": {"executed_lines": [], "summary": {"covered_lines": 0, "num_statements": 0, "percent_covered": 100.0, "percent_covered_display": "100", "missing_lines": 0, "excluded_lines": 0, "percent_statements_covered": 100.0, "percent_statements_covered_display": "100"}, "missing_lines": [], "excluded_lines": [], "start_line": 24}, "HDF5FileStorage": {"executed_lines": [145, 148, 149, 153, 155, 156, 158, 159, 163, 165, 166, 168, 169, 173, 175, 176, 178, 180, 184, 186, 187, 189, 191], "summary": {"covered_lines": 23, "num_statements": 26, "percent_covered": 88.46153846153847, "percent_covered_display": "88", "missing_lines": 3, "excluded_lines": 0, "percent_statements_covered": 88.46153846153847, "percent_statements_covered_display": "88"}, "missing_lines": [146, 177, 188], "excluded_lines": [], "start_line": 115}, "": {"executed_lines": [3, 4, 5, 6, 8, 9, 10, 11, 12, 14, 24, 28, 29, 36, 40, 47, 49, 55, 57, 58, 59, 60, 62, 63, 66, 68, 69, 70, 71, 72, 73, 74, 77, 92, 94, 97, 98, 99, 101, 104, 106, 107, 108, 111, 112, 115, 118, 121, 128, 151, 161, 171, 182, 194, 210, 211, 212, 213, 214, 215, 216, 217, 219, 222, 239, 242, 243, 244, 246, 247, 248, 249, 252, 254, 257, 259, 260, 261, 263, 264, 265, 266, 267, 269, 271, 272, 273, 275, 276], "summary": {"covered_lines": 89, "num_statements": 92, "percent_covered": 96.73913043478261, "percent_covered_display": "97", "missing_lines": 3, "excluded_lines": 0, "percent_statements_covered": 96.73913043478261, "percent_statements_covered_display": "97"}, "missing_lines": [95, 240, 250], "excluded_lines": [], "start_line": 1}}}, "src/lyscripts/configs.py": {"executed_lines": [12, 14, 15, 16, 17, 18, 19, 20, 21, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 39, 44, 46, 48, 51, 56, 59, 63, 69, 72, 78, 82, 87, 89, 92, 94, 96, 102, 104, 107, 110, 115, 120, 128, 131, 135, 139, 145, 148, 153, 159, 161, 162, 164, 167, 168, 170, 171, 176, 179, 182, 186, 187, 189, 190, 202, 204, 205, 206, 208, 209, 211, 214, 216, 219, 220, 222, 225, 228, 232, 236, 240, 244, 253, 258, 259, 261, 270, 273, 275, 276, 280, 281, 282, 283, 285, 288, 296, 298, 299, 306, 316, 321, 325, 332, 336, 341, 343, 348, 349, 350, 355, 357, 359, 360, 362, 363, 364, 366, 367, 368, 370, 372, 379, 380, 381, 387, 390, 393, 396, 400, 404, 412, 416, 420, 424, 431, 435, 441, 445, 449, 457, 463, 470, 477, 485, 495, 502, 505, 509, 513, 517, 525, 539, 541, 542, 544, 545, 550, 553, 557, 562, 566, 570, 571, 573, 575, 576, 578, 580, 586, 588, 594, 596, 597, 598, 599, 600, 601, 604, 625, 626, 628, 629, 630, 636, 637, 640, 647, 648, 649, 651, 653, 654, 655, 656, 657, 658, 662, 663, 664, 665, 667, 669, 670, 673, 679, 680, 681, 683, 684, 685, 687, 688, 691, 715, 718, 732, 747, 748, 750, 752, 753, 754, 759, 761, 763, 767, 768, 774, 776, 778, 788, 791, 793, 803, 804, 813, 818, 819], "summary": {"covered_lines": 239, "num_statements": 280, "percent_covered": 85.35714285714286, "percent_covered_display": "85", "missing_lines": 41, "excluded_lines": 0, "percent_statements_covered": 85.35714285714286, "percent_statements_covered_display": "85"}, "missing_lines": [90, 122, 165, 173, 217, 271, 277, 278, 286, 472, 473, 474, 482, 491, 492, 527, 528, 529, 531, 532, 533, 535, 536, 547, 581, 582, 584, 589, 660, 699, 700, 702, 703, 704, 706, 707, 708, 710, 711, 712, 755], "excluded_lines": [], "functions": {"DataConfig.load": {"executed_lines": [89, 92], "summary": {"covered_lines": 2, "num_statements": 3, "percent_covered": 66.66666666666667, "percent_covered_display": "67", "missing_lines": 1, "excluded_lines": 0, "percent_statements_covered": 66.66666666666667, "percent_statements_covered_display": "67"}, "missing_lines": [90], "excluded_lines": [], "start_line": 87}, "DataConfig.get_load_kwargs": {"executed_lines": [96], "summary": {"covered_lines": 1, "num_statements": 1, "percent_covered": 100.0, "percent_covered_display": "100", "missing_lines": 0, "excluded_lines": 0, "percent_statements_covered": 100.0, "percent_statements_covered_display": "100"}, "missing_lines": [], "excluded_lines": [], "start_line": 94}, "check_pattern": {"executed_lines": [104], "summary": {"covered_lines": 1, "num_statements": 1, "percent_covered": 100.0, "percent_covered_display": "100", "missing_lines": 0, "excluded_lines": 0, "percent_statements_covered": 100.0, "percent_statements_covered_display": "100"}, "missing_lines": [], "excluded_lines": [], "start_line": 102}, "DiagnosisConfig.to_involvement": {"executed_lines": [], "summary": {"covered_lines": 0, "num_statements": 1, "percent_covered": 0.0, "percent_covered_display": "0", "missing_lines": 1, "excluded_lines": 0, "percent_statements_covered": 0.0, "percent_statements_covered_display": "0"}, "missing_lines": [122], "excluded_lines": [], "start_line": 120}, "retrieve_graph_representation": {"executed_lines": [161, 162, 164, 167, 168, 170, 171], "summary": {"covered_lines": 7, "num_statements": 9, "percent_covered": 77.77777777777777, "percent_covered_display": "78", "missing_lines": 2, "excluded_lines": 0, "percent_statements_covered": 77.77777777777777, "percent_statements_covered_display": "78"}, "missing_lines": [165, 173], "excluded_lines": [], "start_line": 159}, "GraphConfig.from_model": {"executed_lines": [189, 190], "summary": {"covered_lines": 2, "num_statements": 2, "percent_covered": 100.0, "percent_covered_display": "100", "missing_lines": 0, "excluded_lines": 0, "percent_statements_covered": 100.0, "percent_statements_covered_display": "100"}, "missing_lines": [], "excluded_lines": [], "start_line": 187}, "has_model_symbol": {"executed_lines": [204, 205, 206, 208, 209, 211], "summary": {"covered_lines": 6, "num_statements": 6, "percent_covered": 100.0, "percent_covered_display": "100", "missing_lines": 0, "excluded_lines": 0, "percent_statements_covered": 100.0, "percent_statements_covered_display": "100"}, "missing_lines": [], "excluded_lines": [], "start_line": 202}, "get_symmetry_kwargs": {"executed_lines": [216, 219, 220, 222], "summary": {"covered_lines": 4, "num_statements": 5, "percent_covered": 80.0, "percent_covered_display": "80", "missing_lines": 1, "excluded_lines": 0, "percent_statements_covered": 80.0, "percent_statements_covered_display": "80"}, "missing_lines": [217], "excluded_lines": [], "start_line": 214}, "ModelConfig.from_model": {"executed_lines": [261, 270, 273, 275, 276, 280, 281, 282, 283, 285, 288], "summary": {"covered_lines": 11, "num_statements": 15, "percent_covered": 73.33333333333333, "percent_covered_display": "73", "missing_lines": 4, "excluded_lines": 0, "percent_statements_covered": 73.33333333333333, "percent_statements_covered_display": "73"}, "missing_lines": [271, 277, 278, 286], "excluded_lines": [], "start_line": 259}, "modalityconfig_from_model": {"executed_lines": [298, 299], "summary": {"covered_lines": 2, "num_statements": 2, "percent_covered": 100.0, "percent_covered_display": "100", "missing_lines": 0, "excluded_lines": 0, "percent_statements_covered": 100.0, "percent_statements_covered_display": "100"}, "missing_lines": [], "excluded_lines": [], "start_line": 296}, "DeprecatedModelConfig.model_post_init": {"executed_lines": [343, 348, 349, 350, 355], "summary": {"covered_lines": 5, "num_statements": 5, "percent_covered": 100.0, "percent_covered_display": "100", "missing_lines": 0, "excluded_lines": 0, "percent_statements_covered": 100.0, "percent_statements_covered_display": "100"}, "missing_lines": [], "excluded_lines": [], "start_line": 341}, "DeprecatedModelConfig.translate": {"executed_lines": [359, 360, 362, 363, 364, 366, 367, 368, 370, 372, 379, 380, 381, 387], "summary": {"covered_lines": 14, "num_statements": 14, "percent_covered": 100.0, "percent_covered_display": "100", "missing_lines": 0, "excluded_lines": 0, "percent_statements_covered": 100.0, "percent_statements_covered_display": "100"}, "missing_lines": [], "excluded_lines": [], "start_line": 357}, "SamplingConfig.load": {"executed_lines": [463], "summary": {"covered_lines": 1, "num_statements": 1, "percent_covered": 100.0, "percent_covered_display": "100", "missing_lines": 0, "excluded_lines": 0, "percent_statements_covered": 100.0, "percent_statements_covered_display": "100"}, "missing_lines": [], "excluded_lines": [], "start_line": 457}, "geometric_schedule": {"executed_lines": [], "summary": {"covered_lines": 0, "num_statements": 3, "percent_covered": 0.0, "percent_covered_display": "0", "missing_lines": 3, "excluded_lines": 0, "percent_statements_covered": 0.0, "percent_statements_covered_display": "0"}, "missing_lines": [472, 473, 474], "excluded_lines": [], "start_line": 470}, "linear_schedule": {"executed_lines": [], "summary": {"covered_lines": 0, "num_statements": 1, "percent_covered": 0.0, "percent_covered_display": "0", "missing_lines": 1, "excluded_lines": 0, "percent_statements_covered": 0.0, "percent_statements_covered_display": "0"}, "missing_lines": [482], "excluded_lines": [], "start_line": 477}, "power_schedule": {"executed_lines": [], "summary": {"covered_lines": 0, "num_statements": 2, "percent_covered": 0.0, "percent_covered_display": "0", "missing_lines": 2, "excluded_lines": 0, "percent_statements_covered": 0.0, "percent_statements_covered_display": "0"}, "missing_lines": [491, 492], "excluded_lines": [], "start_line": 485}, "ScheduleConfig.get_schedule": {"executed_lines": [], "summary": {"covered_lines": 0, "num_statements": 8, "percent_covered": 0.0, "percent_covered_display": "0", "missing_lines": 8, "excluded_lines": 0, "percent_statements_covered": 0.0, "percent_statements_covered_display": "0"}, "missing_lines": [527, 528, 529, 531, 532, 533, 535, 536], "excluded_lines": [], "start_line": 525}, "map_to_optional_bool": {"executed_lines": [541, 542, 544, 545], "summary": {"covered_lines": 4, "num_statements": 5, "percent_covered": 80.0, "percent_covered_display": "80", "missing_lines": 1, "excluded_lines": 0, "percent_statements_covered": 80.0, "percent_statements_covered_display": "80"}, "missing_lines": [547], "excluded_lines": [], "start_line": 539}, "ScenarioConfig.model_post_init": {"executed_lines": [575, 576], "summary": {"covered_lines": 2, "num_statements": 2, "percent_covered": 100.0, "percent_covered_display": "100", "missing_lines": 0, "excluded_lines": 0, "percent_statements_covered": 100.0, "percent_statements_covered_display": "100"}, "missing_lines": [], "excluded_lines": [], "start_line": 573}, "ScenarioConfig.interpolate": {"executed_lines": [580], "summary": {"covered_lines": 1, "num_statements": 4, "percent_covered": 25.0, "percent_covered_display": "25", "missing_lines": 3, "excluded_lines": 0, "percent_statements_covered": 25.0, "percent_statements_covered_display": "25"}, "missing_lines": [581, 582, 584], "excluded_lines": [], "start_line": 578}, "ScenarioConfig.normalize": {"executed_lines": [588], "summary": {"covered_lines": 1, "num_statements": 2, "percent_covered": 50.0, "percent_covered_display": "50", "missing_lines": 1, "excluded_lines": 0, "percent_statements_covered": 50.0, "percent_statements_covered_display": "50"}, "missing_lines": [589], "excluded_lines": [], "start_line": 586}, "_construct_model_from_external": {"executed_lines": [596, 597, 598, 599, 600, 601], "summary": {"covered_lines": 6, "num_statements": 6, "percent_covered": 100.0, "percent_covered_display": "100", "missing_lines": 0, "excluded_lines": 0, "percent_statements_covered": 100.0, "percent_statements_covered_display": "100"}, "missing_lines": [], "excluded_lines": [], "start_line": 594}, "construct_model": {"executed_lines": [625, 626, 628, 629, 630, 636, 637], "summary": {"covered_lines": 7, "num_statements": 7, "percent_covered": 100.0, "percent_covered_display": "100", "missing_lines": 0, "excluded_lines": 0, "percent_statements_covered": 100.0, "percent_statements_covered_display": "100"}, "missing_lines": [], "excluded_lines": [], "start_line": 604}, "add_distributions": {"executed_lines": [647, 648, 649, 651, 653, 654, 655, 656, 657, 658, 662, 663, 664, 665, 667, 669, 670], "summary": {"covered_lines": 17, "num_statements": 18, "percent_covered": 94.44444444444444, "percent_covered_display": "94", "missing_lines": 1, "excluded_lines": 0, "percent_statements_covered": 94.44444444444444, "percent_statements_covered_display": "94"}, "missing_lines": [660], "excluded_lines": [], "start_line": 640}, "add_modalities": {"executed_lines": [679, 680, 681, 683, 684, 685, 687, 688], "summary": {"covered_lines": 8, "num_statements": 8, "percent_covered": 100.0, "percent_covered_display": "100", "missing_lines": 0, "excluded_lines": 0, "percent_statements_covered": 100.0, "percent_statements_covered_display": "100"}, "missing_lines": [], "excluded_lines": [], "start_line": 673}, "add_data": {"executed_lines": [], "summary": {"covered_lines": 0, "num_statements": 11, "percent_covered": 0.0, "percent_covered_display": "0", "missing_lines": 11, "excluded_lines": 0, "percent_statements_covered": 0.0, "percent_statements_covered_display": "0"}, "missing_lines": [699, 700, 702, 703, 704, 706, 707, 708, 710, 711, 712], "excluded_lines": [], "start_line": 691}, "DynamicYamlConfigSettingsSource.__init__": {"executed_lines": [747, 748], "summary": {"covered_lines": 2, "num_statements": 2, "percent_covered": 100.0, "percent_covered_display": "100", "missing_lines": 0, "excluded_lines": 0, "percent_statements_covered": 100.0, "percent_statements_covered_display": "100"}, "missing_lines": [], "excluded_lines": [], "start_line": 732}, "DynamicYamlConfigSettingsSource._read_file": {"executed_lines": [752, 753, 754, 759], "summary": {"covered_lines": 4, "num_statements": 5, "percent_covered": 80.0, "percent_covered_display": "80", "missing_lines": 1, "excluded_lines": 0, "percent_statements_covered": 80.0, "percent_statements_covered_display": "80"}, "missing_lines": [755], "excluded_lines": [], "start_line": 750}, "DynamicYamlConfigSettingsSource.__call__": {"executed_lines": [763, 767, 768, 774], "summary": {"covered_lines": 4, "num_statements": 4, "percent_covered": 100.0, "percent_covered_display": "100", "missing_lines": 0, "excluded_lines": 0, "percent_statements_covered": 100.0, "percent_statements_covered_display": "100"}, "missing_lines": [], "excluded_lines": [], "start_line": 761}, "DynamicYamlConfigSettingsSource.__repr__": {"executed_lines": [778], "summary": {"covered_lines": 1, "num_statements": 1, "percent_covered": 100.0, "percent_covered_display": "100", "missing_lines": 0, "excluded_lines": 0, "percent_statements_covered": 100.0, "percent_statements_covered_display": "100"}, "missing_lines": [], "excluded_lines": [], "start_line": 776}, "BaseCLI.settings_customise_sources": {"executed_lines": [813, 818, 819], "summary": {"covered_lines": 3, "num_statements": 3, "percent_covered": 100.0, "percent_covered_display": "100", "missing_lines": 0, "excluded_lines": 0, "percent_statements_covered": 100.0, "percent_statements_covered_display": "100"}, "missing_lines": [], "excluded_lines": [], "start_line": 804}, "": {"executed_lines": [12, 14, 15, 16, 17, 18, 19, 20, 21, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 39, 44, 46, 48, 51, 56, 59, 63, 69, 72, 78, 82, 87, 94, 102, 107, 110, 115, 120, 128, 131, 135, 139, 145, 148, 153, 159, 176, 179, 182, 186, 187, 202, 214, 225, 228, 232, 236, 240, 244, 253, 258, 259, 296, 306, 316, 321, 325, 332, 336, 341, 357, 390, 393, 396, 400, 404, 412, 416, 420, 424, 431, 435, 441, 445, 449, 457, 470, 477, 485, 495, 502, 505, 509, 513, 517, 525, 539, 550, 553, 557, 562, 566, 570, 571, 573, 578, 586, 594, 604, 640, 673, 691, 715, 718, 732, 750, 761, 776, 788, 791, 793, 803, 804], "summary": {"covered_lines": 123, "num_statements": 123, "percent_covered": 100.0, "percent_covered_display": "100", "missing_lines": 0, "excluded_lines": 0, "percent_statements_covered": 100.0, "percent_statements_covered_display": "100"}, "missing_lines": [], "excluded_lines": [], "start_line": 1}}, "classes": {"CrossValidationConfig": {"executed_lines": [], "summary": {"covered_lines": 0, "num_statements": 0, "percent_covered": 100.0, "percent_covered_display": "100", "missing_lines": 0, "excluded_lines": 0, "percent_statements_covered": 100.0, "percent_statements_covered_display": "100"}, "missing_lines": [], "excluded_lines": [], "start_line": 56}, "DataConfig": {"executed_lines": [89, 92, 96], "summary": {"covered_lines": 3, "num_statements": 4, "percent_covered": 75.0, "percent_covered_display": "75", "missing_lines": 1, "excluded_lines": 0, "percent_statements_covered": 75.0, "percent_statements_covered_display": "75"}, "missing_lines": [90], "excluded_lines": [], "start_line": 69}, "DiagnosisConfig": {"executed_lines": [], "summary": {"covered_lines": 0, "num_statements": 1, "percent_covered": 0.0, "percent_covered_display": "0", "missing_lines": 1, "excluded_lines": 0, "percent_statements_covered": 0.0, "percent_statements_covered_display": "0"}, "missing_lines": [122], "excluded_lines": [], "start_line": 107}, "DistributionConfig": {"executed_lines": [], "summary": {"covered_lines": 0, "num_statements": 0, "percent_covered": 100.0, "percent_covered_display": "100", "missing_lines": 0, "excluded_lines": 0, "percent_statements_covered": 100.0, "percent_statements_covered_display": "100"}, "missing_lines": [], "excluded_lines": [], "start_line": 128}, "InvolvementConfig": {"executed_lines": [], "summary": {"covered_lines": 0, "num_statements": 0, "percent_covered": 100.0, "percent_covered_display": "100", "missing_lines": 0, "excluded_lines": 0, "percent_statements_covered": 100.0, "percent_statements_covered_display": "100"}, "missing_lines": [], "excluded_lines": [], "start_line": 145}, "GraphConfig": {"executed_lines": [189, 190], "summary": {"covered_lines": 2, "num_statements": 2, "percent_covered": 100.0, "percent_covered_display": "100", "missing_lines": 0, "excluded_lines": 0, "percent_statements_covered": 100.0, "percent_statements_covered_display": "100"}, "missing_lines": [], "excluded_lines": [], "start_line": 176}, "ModelConfig": {"executed_lines": [261, 270, 273, 275, 276, 280, 281, 282, 283, 285, 288], "summary": {"covered_lines": 11, "num_statements": 15, "percent_covered": 73.33333333333333, "percent_covered_display": "73", "missing_lines": 4, "excluded_lines": 0, "percent_statements_covered": 73.33333333333333, "percent_statements_covered_display": "73"}, "missing_lines": [271, 277, 278, 286], "excluded_lines": [], "start_line": 225}, "DeprecatedModelConfig": {"executed_lines": [343, 348, 349, 350, 355, 359, 360, 362, 363, 364, 366, 367, 368, 370, 372, 379, 380, 381, 387], "summary": {"covered_lines": 19, "num_statements": 19, "percent_covered": 100.0, "percent_covered_display": "100", "missing_lines": 0, "excluded_lines": 0, "percent_statements_covered": 100.0, "percent_statements_covered_display": "100"}, "missing_lines": [], "excluded_lines": [], "start_line": 306}, "SamplingConfig": {"executed_lines": [463], "summary": {"covered_lines": 1, "num_statements": 1, "percent_covered": 100.0, "percent_covered_display": "100", "missing_lines": 0, "excluded_lines": 0, "percent_statements_covered": 100.0, "percent_statements_covered_display": "100"}, "missing_lines": [], "excluded_lines": [], "start_line": 390}, "ScheduleConfig": {"executed_lines": [], "summary": {"covered_lines": 0, "num_statements": 8, "percent_covered": 0.0, "percent_covered_display": "0", "missing_lines": 8, "excluded_lines": 0, "percent_statements_covered": 0.0, "percent_statements_covered_display": "0"}, "missing_lines": [527, 528, 529, 531, 532, 533, 535, 536], "excluded_lines": [], "start_line": 502}, "ScenarioConfig": {"executed_lines": [575, 576, 580, 588], "summary": {"covered_lines": 4, "num_statements": 8, "percent_covered": 50.0, "percent_covered_display": "50", "missing_lines": 4, "excluded_lines": 0, "percent_statements_covered": 50.0, "percent_statements_covered_display": "50"}, "missing_lines": [581, 582, 584, 589], "excluded_lines": [], "start_line": 550}, "DynamicYamlConfigSettingsSource": {"executed_lines": [747, 748, 752, 753, 754, 759, 763, 767, 768, 774, 778], "summary": {"covered_lines": 11, "num_statements": 12, "percent_covered": 91.66666666666667, "percent_covered_display": "92", "missing_lines": 1, "excluded_lines": 0, "percent_statements_covered": 91.66666666666667, "percent_statements_covered_display": "92"}, "missing_lines": [755], "excluded_lines": [], "start_line": 718}, "BaseCLI": {"executed_lines": [813, 818, 819], "summary": {"covered_lines": 3, "num_statements": 3, "percent_covered": 100.0, "percent_covered_display": "100", "missing_lines": 0, "excluded_lines": 0, "percent_statements_covered": 100.0, "percent_statements_covered_display": "100"}, "missing_lines": [], "excluded_lines": [], "start_line": 788}, "": {"executed_lines": [12, 14, 15, 16, 17, 18, 19, 20, 21, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 39, 44, 46, 48, 51, 56, 59, 63, 69, 72, 78, 82, 87, 94, 102, 104, 107, 110, 115, 120, 128, 131, 135, 139, 145, 148, 153, 159, 161, 162, 164, 167, 168, 170, 171, 176, 179, 182, 186, 187, 202, 204, 205, 206, 208, 209, 211, 214, 216, 219, 220, 222, 225, 228, 232, 236, 240, 244, 253, 258, 259, 296, 298, 299, 306, 316, 321, 325, 332, 336, 341, 357, 390, 393, 396, 400, 404, 412, 416, 420, 424, 431, 435, 441, 445, 449, 457, 470, 477, 485, 495, 502, 505, 509, 513, 517, 525, 539, 541, 542, 544, 545, 550, 553, 557, 562, 566, 570, 571, 573, 578, 586, 594, 596, 597, 598, 599, 600, 601, 604, 625, 626, 628, 629, 630, 636, 637, 640, 647, 648, 649, 651, 653, 654, 655, 656, 657, 658, 662, 663, 664, 665, 667, 669, 670, 673, 679, 680, 681, 683, 684, 685, 687, 688, 691, 715, 718, 732, 750, 761, 776, 788, 791, 793, 803, 804], "summary": {"covered_lines": 185, "num_statements": 207, "percent_covered": 89.3719806763285, "percent_covered_display": "89", "missing_lines": 22, "excluded_lines": 0, "percent_statements_covered": 89.3719806763285, "percent_statements_covered_display": "89"}, "missing_lines": [165, 173, 217, 472, 473, 474, 482, 491, 492, 547, 660, 699, 700, 702, 703, 704, 706, 707, 708, 710, 711, 712], "excluded_lines": [], "start_line": 1}}}, "src/lyscripts/data/__init__.py": {"executed_lines": [23, 25, 36, 39, 51], "summary": {"covered_lines": 5, "num_statements": 6, "percent_covered": 83.33333333333333, "percent_covered_display": "83", "missing_lines": 1, "excluded_lines": 0, "percent_statements_covered": 83.33333333333333, "percent_statements_covered_display": "83"}, "missing_lines": [53], "excluded_lines": [], "functions": {"DataCLI.cli_cmd": {"executed_lines": [], "summary": {"covered_lines": 0, "num_statements": 1, "percent_covered": 0.0, "percent_covered_display": "0", "missing_lines": 1, "excluded_lines": 0, "percent_statements_covered": 0.0, "percent_statements_covered_display": "0"}, "missing_lines": [53], "excluded_lines": [], "start_line": 51}, "": {"executed_lines": [23, 25, 36, 39, 51], "summary": {"covered_lines": 5, "num_statements": 5, "percent_covered": 100.0, "percent_covered_display": "100", "missing_lines": 0, "excluded_lines": 0, "percent_statements_covered": 100.0, "percent_statements_covered_display": "100"}, "missing_lines": [], "excluded_lines": [], "start_line": 1}}, "classes": {"DataCLI": {"executed_lines": [], "summary": {"covered_lines": 0, "num_statements": 1, "percent_covered": 0.0, "percent_covered_display": "0", "missing_lines": 1, "excluded_lines": 0, "percent_statements_covered": 0.0, "percent_statements_covered_display": "0"}, "missing_lines": [53], "excluded_lines": [], "start_line": 39}, "": {"executed_lines": [23, 25, 36, 39, 51], "summary": {"covered_lines": 5, "num_statements": 5, "percent_covered": 100.0, "percent_covered_display": "100", "missing_lines": 0, "excluded_lines": 0, "percent_statements_covered": 100.0, "percent_statements_covered_display": "100"}, "missing_lines": [], "excluded_lines": [], "start_line": 1}}}, "src/lyscripts/data/__main__.py": {"executed_lines": [], "summary": {"covered_lines": 0, "num_statements": 18, "percent_covered": 0.0, "percent_covered_display": "0", "missing_lines": 18, "excluded_lines": 0, "percent_statements_covered": 0.0, "percent_statements_covered_display": "0"}, "missing_lines": [3, 5, 6, 7, 10, 13, 15, 20, 21, 25, 26, 27, 28, 29, 31, 32, 35, 36], "excluded_lines": [], "functions": {"main": {"executed_lines": [], "summary": {"covered_lines": 0, "num_statements": 10, "percent_covered": 0.0, "percent_covered_display": "0", "missing_lines": 10, "excluded_lines": 0, "percent_statements_covered": 0.0, "percent_statements_covered_display": "0"}, "missing_lines": [15, 20, 21, 25, 26, 27, 28, 29, 31, 32], "excluded_lines": [], "start_line": 13}, "": {"executed_lines": [], "summary": {"covered_lines": 0, "num_statements": 8, "percent_covered": 0.0, "percent_covered_display": "0", "missing_lines": 8, "excluded_lines": 0, "percent_statements_covered": 0.0, "percent_statements_covered_display": "0"}, "missing_lines": [3, 5, 6, 7, 10, 13, 35, 36], "excluded_lines": [], "start_line": 1}}, "classes": {"": {"executed_lines": [], "summary": {"covered_lines": 0, "num_statements": 18, "percent_covered": 0.0, "percent_covered_display": "0", "missing_lines": 18, "excluded_lines": 0, "percent_statements_covered": 0.0, "percent_statements_covered_display": "0"}, "missing_lines": [3, 5, 6, 7, 10, 13, 15, 20, 21, 25, 26, 27, 28, 29, 31, 32, 35, 36], "excluded_lines": [], "start_line": 1}}}, "src/lyscripts/data/collect/__init__.py": {"executed_lines": [21, 22, 23, 24, 26, 27, 28, 29, 30, 31, 32, 33, 35, 36, 38, 47, 48, 49, 50, 53, 54, 61, 62, 67, 68, 79, 80, 123, 126, 130, 135], "summary": {"covered_lines": 31, "num_statements": 58, "percent_covered": 53.44827586206897, "percent_covered_display": "53", "missing_lines": 27, "excluded_lines": 0, "percent_statements_covered": 53.44827586206897, "percent_statements_covered_display": "53"}, "missing_lines": [56, 57, 58, 64, 76, 92, 94, 95, 96, 101, 103, 104, 105, 106, 108, 109, 110, 112, 113, 114, 115, 116, 137, 138, 141, 142, 144], "excluded_lines": [], "functions": {"serve_index_html": {"executed_lines": [], "summary": {"covered_lines": 0, "num_statements": 3, "percent_covered": 0.0, "percent_covered_display": "0", "missing_lines": 3, "excluded_lines": 0, "percent_statements_covered": 0.0, "percent_statements_covered_display": "0"}, "missing_lines": [56, 57, 58], "excluded_lines": [], "start_line": 54}, "serve_schema": {"executed_lines": [], "summary": {"covered_lines": 0, "num_statements": 1, "percent_covered": 0.0, "percent_covered_display": "0", "missing_lines": 1, "excluded_lines": 0, "percent_statements_covered": 0.0, "percent_statements_covered_display": "0"}, "missing_lines": [64], "excluded_lines": [], "start_line": 62}, "serve_collector_js": {"executed_lines": [], "summary": {"covered_lines": 0, "num_statements": 1, "percent_covered": 0.0, "percent_covered_display": "0", "missing_lines": 1, "excluded_lines": 0, "percent_statements_covered": 0.0, "percent_statements_covered_display": "0"}, "missing_lines": [76], "excluded_lines": [], "start_line": 68}, "process": {"executed_lines": [], "summary": {"covered_lines": 0, "num_statements": 17, "percent_covered": 0.0, "percent_covered_display": "0", "missing_lines": 17, "excluded_lines": 0, "percent_statements_covered": 0.0, "percent_statements_covered_display": "0"}, "missing_lines": [92, 94, 95, 96, 101, 103, 104, 105, 106, 108, 109, 110, 112, 113, 114, 115, 116], "excluded_lines": [], "start_line": 80}, "CollectorCLI.cli_cmd": {"executed_lines": [], "summary": {"covered_lines": 0, "num_statements": 5, "percent_covered": 0.0, "percent_covered_display": "0", "missing_lines": 5, "excluded_lines": 0, "percent_statements_covered": 0.0, "percent_statements_covered_display": "0"}, "missing_lines": [137, 138, 141, 142, 144], "excluded_lines": [], "start_line": 135}, "": {"executed_lines": [21, 22, 23, 24, 26, 27, 28, 29, 30, 31, 32, 33, 35, 36, 38, 47, 48, 49, 50, 53, 54, 61, 62, 67, 68, 79, 80, 123, 126, 130, 135], "summary": {"covered_lines": 31, "num_statements": 31, "percent_covered": 100.0, "percent_covered_display": "100", "missing_lines": 0, "excluded_lines": 0, "percent_statements_covered": 100.0, "percent_statements_covered_display": "100"}, "missing_lines": [], "excluded_lines": [], "start_line": 1}}, "classes": {"CollectorCLI": {"executed_lines": [], "summary": {"covered_lines": 0, "num_statements": 5, "percent_covered": 0.0, "percent_covered_display": "0", "missing_lines": 5, "excluded_lines": 0, "percent_statements_covered": 0.0, "percent_statements_covered_display": "0"}, "missing_lines": [137, 138, 141, 142, 144], "excluded_lines": [], "start_line": 123}, "": {"executed_lines": [21, 22, 23, 24, 26, 27, 28, 29, 30, 31, 32, 33, 35, 36, 38, 47, 48, 49, 50, 53, 54, 61, 62, 67, 68, 79, 80, 123, 126, 130, 135], "summary": {"covered_lines": 31, "num_statements": 53, "percent_covered": 58.490566037735846, "percent_covered_display": "58", "missing_lines": 22, "excluded_lines": 0, "percent_statements_covered": 58.490566037735846, "percent_statements_covered_display": "58"}, "missing_lines": [56, 57, 58, 64, 76, 92, 94, 95, 96, 101, 103, 104, 105, 106, 108, 109, 110, 112, 113, 114, 115, 116], "excluded_lines": [], "start_line": 1}}}, "src/lyscripts/data/enhance.py": {"executed_lines": [9, 11, 12, 13, 15, 16, 17, 20, 24, 25, 26, 33, 51], "summary": {"covered_lines": 13, "num_statements": 19, "percent_covered": 68.42105263157895, "percent_covered_display": "68", "missing_lines": 6, "excluded_lines": 0, "percent_statements_covered": 68.42105263157895, "percent_statements_covered_display": "68"}, "missing_lines": [40, 42, 43, 48, 52, 53], "excluded_lines": [], "functions": {"EnhanceCLI.cli_cmd": {"executed_lines": [], "summary": {"covered_lines": 0, "num_statements": 4, "percent_covered": 0.0, "percent_covered_display": "0", "missing_lines": 4, "excluded_lines": 0, "percent_statements_covered": 0.0, "percent_statements_covered_display": "0"}, "missing_lines": [40, 42, 43, 48], "excluded_lines": [], "start_line": 33}, "": {"executed_lines": [9, 11, 12, 13, 15, 16, 17, 20, 24, 25, 26, 33, 51], "summary": {"covered_lines": 13, "num_statements": 15, "percent_covered": 86.66666666666667, "percent_covered_display": "87", "missing_lines": 2, "excluded_lines": 0, "percent_statements_covered": 86.66666666666667, "percent_statements_covered_display": "87"}, "missing_lines": [52, 53], "excluded_lines": [], "start_line": 1}}, "classes": {"EnhanceCLI": {"executed_lines": [], "summary": {"covered_lines": 0, "num_statements": 4, "percent_covered": 0.0, "percent_covered_display": "0", "missing_lines": 4, "excluded_lines": 0, "percent_statements_covered": 0.0, "percent_statements_covered_display": "0"}, "missing_lines": [40, 42, 43, 48], "excluded_lines": [], "start_line": 20}, "": {"executed_lines": [9, 11, 12, 13, 15, 16, 17, 20, 24, 25, 26, 33, 51], "summary": {"covered_lines": 13, "num_statements": 15, "percent_covered": 86.66666666666667, "percent_covered_display": "87", "missing_lines": 2, "excluded_lines": 0, "percent_statements_covered": 86.66666666666667, "percent_statements_covered_display": "87"}, "missing_lines": [52, 53], "excluded_lines": [], "start_line": 1}}}, "src/lyscripts/data/fetch.py": {"executed_lines": [3, 5, 6, 7, 8, 10, 11, 14, 17, 24, 31, 38, 40, 55], "summary": {"covered_lines": 14, "num_statements": 21, "percent_covered": 66.66666666666667, "percent_covered_display": "67", "missing_lines": 7, "excluded_lines": 0, "percent_statements_covered": 66.66666666666667, "percent_statements_covered_display": "67"}, "missing_lines": [42, 43, 45, 51, 52, 56, 57], "excluded_lines": [], "functions": {"FetchCLI.cli_cmd": {"executed_lines": [], "summary": {"covered_lines": 0, "num_statements": 5, "percent_covered": 0.0, "percent_covered_display": "0", "missing_lines": 5, "excluded_lines": 0, "percent_statements_covered": 0.0, "percent_statements_covered_display": "0"}, "missing_lines": [42, 43, 45, 51, 52], "excluded_lines": [], "start_line": 40}, "": {"executed_lines": [3, 5, 6, 7, 8, 10, 11, 14, 17, 24, 31, 38, 40, 55], "summary": {"covered_lines": 14, "num_statements": 16, "percent_covered": 87.5, "percent_covered_display": "88", "missing_lines": 2, "excluded_lines": 0, "percent_statements_covered": 87.5, "percent_statements_covered_display": "88"}, "missing_lines": [56, 57], "excluded_lines": [], "start_line": 1}}, "classes": {"FetchCLI": {"executed_lines": [], "summary": {"covered_lines": 0, "num_statements": 5, "percent_covered": 0.0, "percent_covered_display": "0", "missing_lines": 5, "excluded_lines": 0, "percent_statements_covered": 0.0, "percent_statements_covered_display": "0"}, "missing_lines": [42, 43, 45, 51, 52], "excluded_lines": [], "start_line": 14}, "": {"executed_lines": [3, 5, 6, 7, 8, 10, 11, 14, 17, 24, 31, 38, 40, 55], "summary": {"covered_lines": 14, "num_statements": 16, "percent_covered": 87.5, "percent_covered_display": "88", "missing_lines": 2, "excluded_lines": 0, "percent_statements_covered": 87.5, "percent_statements_covered_display": "88"}, "missing_lines": [56, 57], "excluded_lines": [], "start_line": 1}}}, "src/lyscripts/data/filter.py": {"executed_lines": [7, 8, 10, 11, 12, 13, 15, 16, 17, 20, 24, 28, 35, 38, 39, 41, 68, 97], "summary": {"covered_lines": 18, "num_statements": 48, "percent_covered": 37.5, "percent_covered_display": "38", "missing_lines": 30, "excluded_lines": 0, "percent_statements_covered": 37.5, "percent_statements_covered_display": "38"}, "missing_lines": [43, 44, 45, 46, 47, 49, 54, 55, 56, 57, 58, 60, 61, 62, 63, 64, 66, 76, 78, 79, 84, 85, 87, 88, 89, 91, 92, 94, 98, 99], "excluded_lines": [], "functions": {"FilterCLI.model_post_init": {"executed_lines": [], "summary": {"covered_lines": 0, "num_statements": 17, "percent_covered": 0.0, "percent_covered_display": "0", "missing_lines": 17, "excluded_lines": 0, "percent_statements_covered": 0.0, "percent_statements_covered_display": "0"}, "missing_lines": [43, 44, 45, 46, 47, 49, 54, 55, 56, 57, 58, 60, 61, 62, 63, 64, 66], "excluded_lines": [], "start_line": 41}, "FilterCLI.cli_cmd": {"executed_lines": [], "summary": {"covered_lines": 0, "num_statements": 11, "percent_covered": 0.0, "percent_covered_display": "0", "missing_lines": 11, "excluded_lines": 0, "percent_statements_covered": 0.0, "percent_statements_covered_display": "0"}, "missing_lines": [76, 78, 79, 84, 85, 87, 88, 89, 91, 92, 94], "excluded_lines": [], "start_line": 68}, "": {"executed_lines": [7, 8, 10, 11, 12, 13, 15, 16, 17, 20, 24, 28, 35, 38, 39, 41, 68, 97], "summary": {"covered_lines": 18, "num_statements": 20, "percent_covered": 90.0, "percent_covered_display": "90", "missing_lines": 2, "excluded_lines": 0, "percent_statements_covered": 90.0, "percent_statements_covered_display": "90"}, "missing_lines": [98, 99], "excluded_lines": [], "start_line": 1}}, "classes": {"FilterCLI": {"executed_lines": [], "summary": {"covered_lines": 0, "num_statements": 28, "percent_covered": 0.0, "percent_covered_display": "0", "missing_lines": 28, "excluded_lines": 0, "percent_statements_covered": 0.0, "percent_statements_covered_display": "0"}, "missing_lines": [43, 44, 45, 46, 47, 49, 54, 55, 56, 57, 58, 60, 61, 62, 63, 64, 66, 76, 78, 79, 84, 85, 87, 88, 89, 91, 92, 94], "excluded_lines": [], "start_line": 20}, "": {"executed_lines": [7, 8, 10, 11, 12, 13, 15, 16, 17, 20, 24, 28, 35, 38, 39, 41, 68, 97], "summary": {"covered_lines": 18, "num_statements": 20, "percent_covered": 90.0, "percent_covered_display": "90", "missing_lines": 2, "excluded_lines": 0, "percent_statements_covered": 90.0, "percent_statements_covered_display": "90"}, "missing_lines": [98, 99], "excluded_lines": [], "start_line": 1}}}, "src/lyscripts/data/generate.py": {"executed_lines": [11, 12, 13, 14, 16, 17, 26, 29, 33, 34, 41, 49, 51, 53, 55, 56, 57, 60, 62, 65, 67, 76, 78, 79, 80, 81, 82, 84, 89, 91, 94], "summary": {"covered_lines": 31, "num_statements": 35, "percent_covered": 88.57142857142857, "percent_covered_display": "89", "missing_lines": 4, "excluded_lines": 0, "percent_statements_covered": 88.57142857142857, "percent_statements_covered_display": "89"}, "missing_lines": [58, 63, 95, 96], "excluded_lines": [], "functions": {"GenerateCLI.model_post_init": {"executed_lines": [55, 56, 57, 60, 62, 65], "summary": {"covered_lines": 6, "num_statements": 8, "percent_covered": 75.0, "percent_covered_display": "75", "missing_lines": 2, "excluded_lines": 0, "percent_statements_covered": 75.0, "percent_statements_covered_display": "75"}, "missing_lines": [58, 63], "excluded_lines": [], "start_line": 53}, "GenerateCLI.cli_cmd": {"executed_lines": [76, 78, 79, 80, 81, 82, 84, 89, 91], "summary": {"covered_lines": 9, "num_statements": 9, "percent_covered": 100.0, "percent_covered_display": "100", "missing_lines": 0, "excluded_lines": 0, "percent_statements_covered": 100.0, "percent_statements_covered_display": "100"}, "missing_lines": [], "excluded_lines": [], "start_line": 67}, "": {"executed_lines": [11, 12, 13, 14, 16, 17, 26, 29, 33, 34, 41, 49, 51, 53, 67, 94], "summary": {"covered_lines": 16, "num_statements": 18, "percent_covered": 88.88888888888889, "percent_covered_display": "89", "missing_lines": 2, "excluded_lines": 0, "percent_statements_covered": 88.88888888888889, "percent_statements_covered_display": "89"}, "missing_lines": [95, 96], "excluded_lines": [], "start_line": 1}}, "classes": {"GenerateCLI": {"executed_lines": [55, 56, 57, 60, 62, 65, 76, 78, 79, 80, 81, 82, 84, 89, 91], "summary": {"covered_lines": 15, "num_statements": 17, "percent_covered": 88.23529411764706, "percent_covered_display": "88", "missing_lines": 2, "excluded_lines": 0, "percent_statements_covered": 88.23529411764706, "percent_statements_covered_display": "88"}, "missing_lines": [58, 63], "excluded_lines": [], "start_line": 29}, "": {"executed_lines": [11, 12, 13, 14, 16, 17, 26, 29, 33, 34, 41, 49, 51, 53, 67, 94], "summary": {"covered_lines": 16, "num_statements": 18, "percent_covered": 88.88888888888889, "percent_covered_display": "89", "missing_lines": 2, "excluded_lines": 0, "percent_statements_covered": 88.88888888888889, "percent_statements_covered_display": "89"}, "missing_lines": [95, 96], "excluded_lines": [], "start_line": 1}}}, "src/lyscripts/data/join.py": {"executed_lines": [3, 5, 6, 7, 9, 10, 11, 14, 17, 18, 20, 80], "summary": {"covered_lines": 12, "num_statements": 22, "percent_covered": 54.54545454545455, "percent_covered_display": "55", "missing_lines": 10, "excluded_lines": 0, "percent_statements_covered": 54.54545454545455, "percent_statements_covered_display": "55"}, "missing_lines": [61, 63, 64, 67, 68, 69, 71, 77, 81, 82], "excluded_lines": [], "functions": {"JoinCLI.cli_cmd": {"executed_lines": [], "summary": {"covered_lines": 0, "num_statements": 8, "percent_covered": 0.0, "percent_covered_display": "0", "missing_lines": 8, "excluded_lines": 0, "percent_statements_covered": 0.0, "percent_statements_covered_display": "0"}, "missing_lines": [61, 63, 64, 67, 68, 69, 71, 77], "excluded_lines": [], "start_line": 20}, "": {"executed_lines": [3, 5, 6, 7, 9, 10, 11, 14, 17, 18, 20, 80], "summary": {"covered_lines": 12, "num_statements": 14, "percent_covered": 85.71428571428571, "percent_covered_display": "86", "missing_lines": 2, "excluded_lines": 0, "percent_statements_covered": 85.71428571428571, "percent_statements_covered_display": "86"}, "missing_lines": [81, 82], "excluded_lines": [], "start_line": 1}}, "classes": {"JoinCLI": {"executed_lines": [], "summary": {"covered_lines": 0, "num_statements": 8, "percent_covered": 0.0, "percent_covered_display": "0", "missing_lines": 8, "excluded_lines": 0, "percent_statements_covered": 0.0, "percent_statements_covered_display": "0"}, "missing_lines": [61, 63, 64, 67, 68, 69, 71, 77], "excluded_lines": [], "start_line": 14}, "": {"executed_lines": [3, 5, 6, 7, 9, 10, 11, 14, 17, 18, 20, 80], "summary": {"covered_lines": 12, "num_statements": 14, "percent_covered": 85.71428571428571, "percent_covered_display": "86", "missing_lines": 2, "excluded_lines": 0, "percent_statements_covered": 85.71428571428571, "percent_statements_covered_display": "86"}, "missing_lines": [81, 82], "excluded_lines": [], "start_line": 1}}}, "src/lyscripts/data/lyproxify.py": {"executed_lines": [10, 11, 12, 13, 15, 16, 17, 18, 19, 21, 22, 23, 24, 26, 29, 37, 49, 52, 53, 57, 67, 74, 78, 80, 122, 126, 145, 162, 163, 164, 165, 167, 169, 176, 199, 200, 201, 202, 203, 204, 205, 206, 208, 210, 213, 285, 310, 327, 328, 330, 331, 332, 334, 335, 336, 339], "summary": {"covered_lines": 56, "num_statements": 123, "percent_covered": 45.52845528455285, "percent_covered_display": "46", "missing_lines": 67, "excluded_lines": 1, "percent_statements_covered": 45.52845528455285, "percent_statements_covered_display": "46"}, "missing_lines": [31, 32, 34, 39, 40, 41, 43, 44, 46, 90, 92, 96, 102, 103, 104, 105, 106, 108, 109, 110, 111, 113, 114, 116, 117, 119, 132, 134, 135, 136, 141, 142, 173, 250, 252, 253, 255, 256, 258, 259, 260, 261, 262, 263, 264, 265, 267, 268, 271, 272, 276, 281, 282, 293, 294, 295, 297, 298, 299, 300, 302, 303, 304, 306, 307, 340, 341], "excluded_lines": [146], "functions": {"ensure_python_file": {"executed_lines": [], "summary": {"covered_lines": 0, "num_statements": 3, "percent_covered": 0.0, "percent_covered_display": "0", "missing_lines": 3, "excluded_lines": 0, "percent_statements_covered": 0.0, "percent_statements_covered_display": "0"}, "missing_lines": [31, 32, 34], "excluded_lines": [], "start_line": 29}, "ensure_column_map": {"executed_lines": [], "summary": {"covered_lines": 0, "num_statements": 6, "percent_covered": 0.0, "percent_covered_display": "0", "missing_lines": 6, "excluded_lines": 0, "percent_statements_covered": 0.0, "percent_statements_covered_display": "0"}, "missing_lines": [39, 40, 41, 43, 44, 46], "excluded_lines": [], "start_line": 37}, "LyproxifyCLI.cli_cmd": {"executed_lines": [], "summary": {"covered_lines": 0, "num_statements": 17, "percent_covered": 0.0, "percent_covered_display": "0", "missing_lines": 17, "excluded_lines": 0, "percent_statements_covered": 0.0, "percent_statements_covered_display": "0"}, "missing_lines": [90, 92, 96, 102, 103, 104, 105, 106, 108, 109, 110, 111, 113, 114, 116, 117, 119], "excluded_lines": [], "start_line": 80}, "clean_header": {"executed_lines": [], "summary": {"covered_lines": 0, "num_statements": 6, "percent_covered": 0.0, "percent_covered_display": "0", "missing_lines": 6, "excluded_lines": 0, "percent_statements_covered": 0.0, "percent_statements_covered_display": "0"}, "missing_lines": [132, 134, 135, 136, 141, 142], "excluded_lines": [], "start_line": 126}, "get_instruction_depth": {"executed_lines": [162, 163, 164, 165, 167, 169], "summary": {"covered_lines": 6, "num_statements": 7, "percent_covered": 85.71428571428571, "percent_covered_display": "86", "missing_lines": 1, "excluded_lines": 1, "percent_statements_covered": 85.71428571428571, "percent_statements_covered_display": "86"}, "missing_lines": [173], "excluded_lines": [146], "start_line": 145}, "generate_markdown_docs": {"executed_lines": [199, 200, 201, 202, 203, 204, 205, 206, 208, 210], "summary": {"covered_lines": 10, "num_statements": 10, "percent_covered": 100.0, "percent_covered_display": "100", "missing_lines": 0, "excluded_lines": 0, "percent_statements_covered": 100.0, "percent_statements_covered_display": "100"}, "missing_lines": [], "excluded_lines": [], "start_line": 176}, "transform_to_lyprox": {"executed_lines": [], "summary": {"covered_lines": 0, "num_statements": 20, "percent_covered": 0.0, "percent_covered_display": "0", "missing_lines": 20, "excluded_lines": 0, "percent_statements_covered": 0.0, "percent_statements_covered_display": "0"}, "missing_lines": [250, 252, 253, 255, 256, 258, 259, 260, 261, 262, 263, 264, 265, 267, 268, 271, 272, 276, 281, 282], "excluded_lines": [], "start_line": 213}, "leftright_to_ipsicontra": {"executed_lines": [], "summary": {"covered_lines": 0, "num_statements": 12, "percent_covered": 0.0, "percent_covered_display": "0", "missing_lines": 12, "excluded_lines": 0, "percent_statements_covered": 0.0, "percent_statements_covered_display": "0"}, "missing_lines": [293, 294, 295, 297, 298, 299, 300, 302, 303, 304, 306, 307], "excluded_lines": [], "start_line": 285}, "exclude_patients": {"executed_lines": [327, 328, 330, 331, 332, 334, 335, 336], "summary": {"covered_lines": 8, "num_statements": 8, "percent_covered": 100.0, "percent_covered_display": "100", "missing_lines": 0, "excluded_lines": 0, "percent_statements_covered": 100.0, "percent_statements_covered_display": "100"}, "missing_lines": [], "excluded_lines": [], "start_line": 310}, "": {"executed_lines": [10, 11, 12, 13, 15, 16, 17, 18, 19, 21, 22, 23, 24, 26, 29, 37, 49, 52, 53, 57, 67, 74, 78, 80, 122, 126, 145, 176, 213, 285, 310, 339], "summary": {"covered_lines": 32, "num_statements": 34, "percent_covered": 94.11764705882354, "percent_covered_display": "94", "missing_lines": 2, "excluded_lines": 0, "percent_statements_covered": 94.11764705882354, "percent_statements_covered_display": "94"}, "missing_lines": [340, 341], "excluded_lines": [], "start_line": 1}}, "classes": {"LyproxifyCLI": {"executed_lines": [], "summary": {"covered_lines": 0, "num_statements": 17, "percent_covered": 0.0, "percent_covered_display": "0", "missing_lines": 17, "excluded_lines": 0, "percent_statements_covered": 0.0, "percent_statements_covered_display": "0"}, "missing_lines": [90, 92, 96, 102, 103, 104, 105, 106, 108, 109, 110, 111, 113, 114, 116, 117, 119], "excluded_lines": [], "start_line": 49}, "ParsingError": {"executed_lines": [], "summary": {"covered_lines": 0, "num_statements": 0, "percent_covered": 100.0, "percent_covered_display": "100", "missing_lines": 0, "excluded_lines": 0, "percent_statements_covered": 100.0, "percent_statements_covered_display": "100"}, "missing_lines": [], "excluded_lines": [], "start_line": 122}, "": {"executed_lines": [10, 11, 12, 13, 15, 16, 17, 18, 19, 21, 22, 23, 24, 26, 29, 37, 49, 52, 53, 57, 67, 74, 78, 80, 122, 126, 145, 162, 163, 164, 165, 167, 169, 176, 199, 200, 201, 202, 203, 204, 205, 206, 208, 210, 213, 285, 310, 327, 328, 330, 331, 332, 334, 335, 336, 339], "summary": {"covered_lines": 56, "num_statements": 106, "percent_covered": 52.83018867924528, "percent_covered_display": "53", "missing_lines": 50, "excluded_lines": 1, "percent_statements_covered": 52.83018867924528, "percent_statements_covered_display": "53"}, "missing_lines": [31, 32, 34, 39, 40, 41, 43, 44, 46, 132, 134, 135, 136, 141, 142, 173, 250, 252, 253, 255, 256, 258, 259, 260, 261, 262, 263, 264, 265, 267, 268, 271, 272, 276, 281, 282, 293, 294, 295, 297, 298, 299, 300, 302, 303, 304, 306, 307, 340, 341], "excluded_lines": [146], "start_line": 1}}}, "src/lyscripts/data/split.py": {"executed_lines": [3, 4, 6, 7, 8, 9, 11, 12, 13, 15, 18, 22, 23, 25, 71], "summary": {"covered_lines": 15, "num_statements": 29, "percent_covered": 51.724137931034484, "percent_covered_display": "52", "missing_lines": 14, "excluded_lines": 0, "percent_statements_covered": 51.724137931034484, "percent_statements_covered_display": "52"}, "missing_lines": [33, 35, 36, 38, 40, 46, 50, 51, 54, 59, 61, 65, 72, 73], "excluded_lines": [], "functions": {"SplitCLI.cli_cmd": {"executed_lines": [], "summary": {"covered_lines": 0, "num_statements": 12, "percent_covered": 0.0, "percent_covered_display": "0", "missing_lines": 12, "excluded_lines": 0, "percent_statements_covered": 0.0, "percent_statements_covered_display": "0"}, "missing_lines": [33, 35, 36, 38, 40, 46, 50, 51, 54, 59, 61, 65], "excluded_lines": [], "start_line": 25}, "": {"executed_lines": [3, 4, 6, 7, 8, 9, 11, 12, 13, 15, 18, 22, 23, 25, 71], "summary": {"covered_lines": 15, "num_statements": 17, "percent_covered": 88.23529411764706, "percent_covered_display": "88", "missing_lines": 2, "excluded_lines": 0, "percent_statements_covered": 88.23529411764706, "percent_statements_covered_display": "88"}, "missing_lines": [72, 73], "excluded_lines": [], "start_line": 1}}, "classes": {"SplitCLI": {"executed_lines": [], "summary": {"covered_lines": 0, "num_statements": 12, "percent_covered": 0.0, "percent_covered_display": "0", "missing_lines": 12, "excluded_lines": 0, "percent_statements_covered": 0.0, "percent_statements_covered_display": "0"}, "missing_lines": [33, 35, 36, 38, 40, 46, 50, 51, 54, 59, 61, 65], "excluded_lines": [], "start_line": 18}, "": {"executed_lines": [3, 4, 6, 7, 8, 9, 11, 12, 13, 15, 18, 22, 23, 25, 71], "summary": {"covered_lines": 15, "num_statements": 17, "percent_covered": 88.23529411764706, "percent_covered_display": "88", "missing_lines": 2, "excluded_lines": 0, "percent_statements_covered": 88.23529411764706, "percent_statements_covered_display": "88"}, "missing_lines": [72, 73], "excluded_lines": [], "start_line": 1}}}, "src/lyscripts/data/utils.py": {"executed_lines": [3, 5, 6, 8, 11, 12, 14, 15, 16], "summary": {"covered_lines": 9, "num_statements": 9, "percent_covered": 100.0, "percent_covered_display": "100", "missing_lines": 0, "excluded_lines": 0, "percent_statements_covered": 100.0, "percent_statements_covered_display": "100"}, "missing_lines": [], "excluded_lines": [], "functions": {"save_table_to_csv": {"executed_lines": [14, 15, 16], "summary": {"covered_lines": 3, "num_statements": 3, "percent_covered": 100.0, "percent_covered_display": "100", "missing_lines": 0, "excluded_lines": 0, "percent_statements_covered": 100.0, "percent_statements_covered_display": "100"}, "missing_lines": [], "excluded_lines": [], "start_line": 12}, "": {"executed_lines": [3, 5, 6, 8, 11, 12], "summary": {"covered_lines": 6, "num_statements": 6, "percent_covered": 100.0, "percent_covered_display": "100", "missing_lines": 0, "excluded_lines": 0, "percent_statements_covered": 100.0, "percent_statements_covered_display": "100"}, "missing_lines": [], "excluded_lines": [], "start_line": 1}}, "classes": {"": {"executed_lines": [3, 5, 6, 8, 11, 12, 14, 15, 16], "summary": {"covered_lines": 9, "num_statements": 9, "percent_covered": 100.0, "percent_covered_display": "100", "missing_lines": 0, "excluded_lines": 0, "percent_statements_covered": 100.0, "percent_statements_covered_display": "100"}, "missing_lines": [], "excluded_lines": [], "start_line": 1}}}, "src/lyscripts/decorators.py": {"executed_lines": [8, 9, 10, 11, 12, 13, 16, 18, 19, 20, 23, 30, 33, 34, 36, 37, 38, 39, 41, 42, 51, 57, 59, 62, 65, 66, 68, 69, 72, 74, 77, 80, 81, 83, 84, 86, 88], "summary": {"covered_lines": 37, "num_statements": 41, "percent_covered": 90.2439024390244, "percent_covered_display": "90", "missing_lines": 4, "excluded_lines": 0, "percent_statements_covered": 90.2439024390244, "percent_statements_covered_display": "90"}, "missing_lines": [53, 54, 55, 70], "excluded_lines": [], "functions": {"assemble_signature": {"executed_lines": [18, 19, 20], "summary": {"covered_lines": 3, "num_statements": 3, "percent_covered": 100.0, "percent_covered_display": "100", "missing_lines": 0, "excluded_lines": 0, "percent_statements_covered": 100.0, "percent_statements_covered_display": "100"}, "missing_lines": [], "excluded_lines": [], "start_line": 16}, "log_state": {"executed_lines": [30, 59], "summary": {"covered_lines": 2, "num_statements": 2, "percent_covered": 100.0, "percent_covered_display": "100", "missing_lines": 0, "excluded_lines": 0, "percent_statements_covered": 100.0, "percent_statements_covered_display": "100"}, "missing_lines": [], "excluded_lines": [], "start_line": 23}, "log_state.log_decorator": {"executed_lines": [33, 34, 57], "summary": {"covered_lines": 3, "num_statements": 3, "percent_covered": 100.0, "percent_covered_display": "100", "missing_lines": 0, "excluded_lines": 0, "percent_statements_covered": 100.0, "percent_statements_covered_display": "100"}, "missing_lines": [], "excluded_lines": [], "start_line": 30}, "log_state.log_decorator.wrapper": {"executed_lines": [36, 37, 38, 39, 41, 42, 51], "summary": {"covered_lines": 7, "num_statements": 10, "percent_covered": 70.0, "percent_covered_display": "70", "missing_lines": 3, "excluded_lines": 0, "percent_statements_covered": 70.0, "percent_statements_covered_display": "70"}, "missing_lines": [53, 54, 55], "excluded_lines": [], "start_line": 34}, "check_input_file_exists": {"executed_lines": [65, 66, 74], "summary": {"covered_lines": 3, "num_statements": 3, "percent_covered": 100.0, "percent_covered_display": "100", "missing_lines": 0, "excluded_lines": 0, "percent_statements_covered": 100.0, "percent_statements_covered_display": "100"}, "missing_lines": [], "excluded_lines": [], "start_line": 62}, "check_input_file_exists.inner": {"executed_lines": [68, 69, 72], "summary": {"covered_lines": 3, "num_statements": 4, "percent_covered": 75.0, "percent_covered_display": "75", "missing_lines": 1, "excluded_lines": 0, "percent_statements_covered": 75.0, "percent_statements_covered_display": "75"}, "missing_lines": [70], "excluded_lines": [], "start_line": 66}, "check_output_dir_exists": {"executed_lines": [80, 81, 88], "summary": {"covered_lines": 3, "num_statements": 3, "percent_covered": 100.0, "percent_covered_display": "100", "missing_lines": 0, "excluded_lines": 0, "percent_statements_covered": 100.0, "percent_statements_covered_display": "100"}, "missing_lines": [], "excluded_lines": [], "start_line": 77}, "check_output_dir_exists.inner": {"executed_lines": [83, 84, 86], "summary": {"covered_lines": 3, "num_statements": 3, "percent_covered": 100.0, "percent_covered_display": "100", "missing_lines": 0, "excluded_lines": 0, "percent_statements_covered": 100.0, "percent_statements_covered_display": "100"}, "missing_lines": [], "excluded_lines": [], "start_line": 81}, "": {"executed_lines": [8, 9, 10, 11, 12, 13, 16, 23, 62, 77], "summary": {"covered_lines": 10, "num_statements": 10, "percent_covered": 100.0, "percent_covered_display": "100", "missing_lines": 0, "excluded_lines": 0, "percent_statements_covered": 100.0, "percent_statements_covered_display": "100"}, "missing_lines": [], "excluded_lines": [], "start_line": 1}}, "classes": {"": {"executed_lines": [8, 9, 10, 11, 12, 13, 16, 18, 19, 20, 23, 30, 33, 34, 36, 37, 38, 39, 41, 42, 51, 57, 59, 62, 65, 66, 68, 69, 72, 74, 77, 80, 81, 83, 84, 86, 88], "summary": {"covered_lines": 37, "num_statements": 41, "percent_covered": 90.2439024390244, "percent_covered_display": "90", "missing_lines": 4, "excluded_lines": 0, "percent_statements_covered": 90.2439024390244, "percent_statements_covered_display": "90"}, "missing_lines": [53, 54, 55, 70], "excluded_lines": [], "start_line": 1}}}, "src/lyscripts/evaluate.py": {"executed_lines": [8, 9, 10, 12, 13, 14, 15, 16, 17, 19, 21, 24, 38, 73, 90, 106, 137, 200], "summary": {"covered_lines": 18, "num_statements": 70, "percent_covered": 25.714285714285715, "percent_covered_display": "26", "missing_lines": 52, "excluded_lines": 0, "percent_statements_covered": 25.714285714285715, "percent_statements_covered_display": "26"}, "missing_lines": [29, 35, 43, 48, 50, 57, 63, 70, 87, 102, 103, 114, 115, 117, 118, 123, 124, 125, 127, 128, 129, 131, 132, 134, 139, 141, 142, 143, 144, 145, 148, 149, 156, 162, 164, 174, 175, 178, 179, 180, 183, 184, 186, 191, 192, 194, 195, 197, 201, 202, 204, 205], "excluded_lines": [], "functions": {"_add_parser": {"executed_lines": [], "summary": {"covered_lines": 0, "num_statements": 2, "percent_covered": 0.0, "percent_covered_display": "0", "missing_lines": 2, "excluded_lines": 0, "percent_statements_covered": 0.0, "percent_statements_covered_display": "0"}, "missing_lines": [29, 35], "excluded_lines": [], "start_line": 24}, "_add_arguments": {"executed_lines": [], "summary": {"covered_lines": 0, "num_statements": 6, "percent_covered": 0.0, "percent_covered_display": "0", "missing_lines": 6, "excluded_lines": 0, "percent_statements_covered": 0.0, "percent_statements_covered_display": "0"}, "missing_lines": [43, 48, 50, 57, 63, 70], "excluded_lines": [], "start_line": 38}, "comp_bic": {"executed_lines": [], "summary": {"covered_lines": 0, "num_statements": 1, "percent_covered": 0.0, "percent_covered_display": "0", "missing_lines": 1, "excluded_lines": 0, "percent_statements_covered": 0.0, "percent_statements_covered_display": "0"}, "missing_lines": [87], "excluded_lines": [], "start_line": 73}, "compute_evidence": {"executed_lines": [], "summary": {"covered_lines": 0, "num_statements": 2, "percent_covered": 0.0, "percent_covered_display": "0", "missing_lines": 2, "excluded_lines": 0, "percent_statements_covered": 0.0, "percent_statements_covered_display": "0"}, "missing_lines": [102, 103], "excluded_lines": [], "start_line": 90}, "compute_ti_results": {"executed_lines": [], "summary": {"covered_lines": 0, "num_statements": 13, "percent_covered": 0.0, "percent_covered_display": "0", "missing_lines": 13, "excluded_lines": 0, "percent_statements_covered": 0.0, "percent_statements_covered_display": "0"}, "missing_lines": [114, 115, 117, 118, 123, 124, 125, 127, 128, 129, 131, 132, 134], "excluded_lines": [], "start_line": 106}, "main": {"executed_lines": [], "summary": {"covered_lines": 0, "num_statements": 24, "percent_covered": 0.0, "percent_covered_display": "0", "missing_lines": 24, "excluded_lines": 0, "percent_statements_covered": 0.0, "percent_statements_covered_display": "0"}, "missing_lines": [139, 141, 142, 143, 144, 145, 148, 149, 156, 162, 164, 174, 175, 178, 179, 180, 183, 184, 186, 191, 192, 194, 195, 197], "excluded_lines": [], "start_line": 137}, "": {"executed_lines": [8, 9, 10, 12, 13, 14, 15, 16, 17, 19, 21, 24, 38, 73, 90, 106, 137, 200], "summary": {"covered_lines": 18, "num_statements": 22, "percent_covered": 81.81818181818181, "percent_covered_display": "82", "missing_lines": 4, "excluded_lines": 0, "percent_statements_covered": 81.81818181818181, "percent_statements_covered_display": "82"}, "missing_lines": [201, 202, 204, 205], "excluded_lines": [], "start_line": 1}}, "classes": {"": {"executed_lines": [8, 9, 10, 12, 13, 14, 15, 16, 17, 19, 21, 24, 38, 73, 90, 106, 137, 200], "summary": {"covered_lines": 18, "num_statements": 70, "percent_covered": 25.714285714285715, "percent_covered_display": "26", "missing_lines": 52, "excluded_lines": 0, "percent_statements_covered": 25.714285714285715, "percent_statements_covered_display": "26"}, "missing_lines": [29, 35, 43, 48, 50, 57, 63, 70, 87, 102, 103, 114, 115, 117, 118, 123, 124, 125, 127, 128, 129, 131, 132, 134, 139, 141, 142, 143, 144, 145, 148, 149, 156, 162, 164, 174, 175, 178, 179, 180, 183, 184, 186, 191, 192, 194, 195, 197, 201, 202, 204, 205], "excluded_lines": [], "start_line": 1}}}, "src/lyscripts/integrate.py": {"executed_lines": [9, 11, 12, 14, 15, 16, 17, 18, 19, 21, 22, 23, 35, 38, 66, 69, 70, 71, 78, 84, 85, 86, 90, 161], "summary": {"covered_lines": 24, "num_statements": 46, "percent_covered": 52.17391304347826, "percent_covered_display": "52", "missing_lines": 22, "excluded_lines": 0, "percent_statements_covered": 52.17391304347826, "percent_statements_covered_display": "52"}, "missing_lines": [46, 47, 53, 104, 106, 110, 111, 112, 113, 114, 117, 119, 122, 124, 125, 126, 134, 144, 153, 154, 162, 163], "excluded_lines": [], "functions": {"init_ti_sampler": {"executed_lines": [], "summary": {"covered_lines": 0, "num_statements": 3, "percent_covered": 0.0, "percent_covered_display": "0", "missing_lines": 3, "excluded_lines": 0, "percent_statements_covered": 0.0, "percent_statements_covered_display": "0"}, "missing_lines": [46, 47, 53], "excluded_lines": [], "start_line": 38}, "IntegrateCLI.cli_cmd": {"executed_lines": [], "summary": {"covered_lines": 0, "num_statements": 17, "percent_covered": 0.0, "percent_covered_display": "0", "missing_lines": 17, "excluded_lines": 0, "percent_statements_covered": 0.0, "percent_statements_covered_display": "0"}, "missing_lines": [104, 106, 110, 111, 112, 113, 114, 117, 119, 122, 124, 125, 126, 134, 144, 153, 154], "excluded_lines": [], "start_line": 90}, "": {"executed_lines": [9, 11, 12, 14, 15, 16, 17, 18, 19, 21, 22, 23, 35, 38, 66, 69, 70, 71, 78, 84, 85, 86, 90, 161], "summary": {"covered_lines": 24, "num_statements": 26, "percent_covered": 92.3076923076923, "percent_covered_display": "92", "missing_lines": 2, "excluded_lines": 0, "percent_statements_covered": 92.3076923076923, "percent_statements_covered_display": "92"}, "missing_lines": [162, 163], "excluded_lines": [], "start_line": 1}}, "classes": {"IntegrateCLI": {"executed_lines": [], "summary": {"covered_lines": 0, "num_statements": 17, "percent_covered": 0.0, "percent_covered_display": "0", "missing_lines": 17, "excluded_lines": 0, "percent_statements_covered": 0.0, "percent_statements_covered_display": "0"}, "missing_lines": [104, 106, 110, 111, 112, 113, 114, 117, 119, 122, 124, 125, 126, 134, 144, 153, 154], "excluded_lines": [], "start_line": 66}, "": {"executed_lines": [9, 11, 12, 14, 15, 16, 17, 18, 19, 21, 22, 23, 35, 38, 66, 69, 70, 71, 78, 84, 85, 86, 90, 161], "summary": {"covered_lines": 24, "num_statements": 29, "percent_covered": 82.75862068965517, "percent_covered_display": "83", "missing_lines": 5, "excluded_lines": 0, "percent_statements_covered": 82.75862068965517, "percent_statements_covered_display": "83"}, "missing_lines": [46, 47, 53, 162, 163], "excluded_lines": [], "start_line": 1}}}, "src/lyscripts/plots.py": {"executed_lines": [3, 5, 6, 7, 8, 9, 10, 12, 13, 14, 15, 16, 17, 19, 30, 37, 38, 41, 50, 59, 61, 64, 66, 69, 74, 75, 76, 77, 78, 81, 84, 87, 88, 89, 91, 92, 96, 97, 101, 102, 106, 109, 110, 112, 115, 118, 120, 121, 123, 125, 126, 135, 136, 137, 138, 139, 140, 142, 144, 146, 148, 150, 152, 154, 155, 157, 158, 160, 163, 166, 167, 169, 170, 179, 180, 181, 182, 183, 184, 190, 198, 199, 201, 202, 204, 206, 208, 216, 218, 225, 227, 234, 239, 240, 241, 243, 244, 246, 247, 249, 252, 268, 269, 270, 271, 273, 274, 275, 276, 279, 281, 282, 288, 289, 290, 291, 294, 303, 306, 307, 310, 311, 314, 335, 338, 340, 343, 345, 354, 355, 357, 358, 360, 363, 395, 396, 397, 402, 403, 404, 410, 411], "summary": {"covered_lines": 142, "num_statements": 160, "percent_covered": 88.75, "percent_covered_display": "89", "missing_lines": 18, "excluded_lines": 9, "percent_statements_covered": 88.75, "percent_statements_covered_display": "89"}, "missing_lines": [46, 47, 56, 185, 186, 336, 341, 370, 375, 377, 378, 380, 381, 382, 383, 385, 392, 399], "excluded_lines": [25, 26, 27, 94, 95, 99, 100, 104, 105], "functions": {"floor_at_decimal": {"executed_lines": [], "summary": {"covered_lines": 0, "num_statements": 2, "percent_covered": 0.0, "percent_covered_display": "0", "missing_lines": 2, "excluded_lines": 0, "percent_statements_covered": 0.0, "percent_statements_covered_display": "0"}, "missing_lines": [46, 47], "excluded_lines": [], "start_line": 41}, "ceil_at_decimal": {"executed_lines": [], "summary": {"covered_lines": 0, "num_statements": 1, "percent_covered": 0.0, "percent_covered_display": "0", "missing_lines": 1, "excluded_lines": 0, "percent_statements_covered": 0.0, "percent_statements_covered_display": "0"}, "missing_lines": [56], "excluded_lines": [], "start_line": 50}, "floor_to_step": {"executed_lines": [61], "summary": {"covered_lines": 1, "num_statements": 1, "percent_covered": 100.0, "percent_covered_display": "100", "missing_lines": 0, "excluded_lines": 0, "percent_statements_covered": 100.0, "percent_statements_covered_display": "100"}, "missing_lines": [], "excluded_lines": [], "start_line": 59}, "ceil_to_step": {"executed_lines": [66], "summary": {"covered_lines": 1, "num_statements": 1, "percent_covered": 100.0, "percent_covered_display": "100", "missing_lines": 0, "excluded_lines": 0, "percent_statements_covered": 100.0, "percent_statements_covered_display": "100"}, "missing_lines": [], "excluded_lines": [], "start_line": 64}, "clean_and_check": {"executed_lines": [74, 75, 76, 77, 78], "summary": {"covered_lines": 5, "num_statements": 5, "percent_covered": 100.0, "percent_covered_display": "100", "missing_lines": 0, "excluded_lines": 0, "percent_statements_covered": 100.0, "percent_statements_covered_display": "100"}, "missing_lines": [], "excluded_lines": [], "start_line": 69}, "AbstractDistribution.draw": {"executed_lines": [], "summary": {"covered_lines": 0, "num_statements": 0, "percent_covered": 100.0, "percent_covered_display": "100", "missing_lines": 0, "excluded_lines": 1, "percent_statements_covered": 100.0, "percent_statements_covered_display": "100"}, "missing_lines": [], "excluded_lines": [94], "start_line": 92}, "AbstractDistribution.left_percentile": {"executed_lines": [], "summary": {"covered_lines": 0, "num_statements": 0, "percent_covered": 100.0, "percent_covered_display": "100", "missing_lines": 0, "excluded_lines": 1, "percent_statements_covered": 100.0, "percent_statements_covered_display": "100"}, "missing_lines": [], "excluded_lines": [99], "start_line": 97}, "AbstractDistribution.right_percentile": {"executed_lines": [], "summary": {"covered_lines": 0, "num_statements": 0, "percent_covered": 100.0, "percent_covered_display": "100", "missing_lines": 0, "excluded_lines": 1, "percent_statements_covered": 100.0, "percent_statements_covered_display": "100"}, "missing_lines": [], "excluded_lines": [104], "start_line": 102}, "AbstractDistribution._get_label": {"executed_lines": [], "summary": {"covered_lines": 0, "num_statements": 0, "percent_covered": 100.0, "percent_covered_display": "100", "missing_lines": 0, "excluded_lines": 0, "percent_statements_covered": 100.0, "percent_statements_covered_display": "100"}, "missing_lines": [], "excluded_lines": [], "start_line": 106}, "AbstractDistribution.label": {"executed_lines": [112], "summary": {"covered_lines": 1, "num_statements": 1, "percent_covered": 100.0, "percent_covered_display": "100", "missing_lines": 0, "excluded_lines": 0, "percent_statements_covered": 100.0, "percent_statements_covered_display": "100"}, "missing_lines": [], "excluded_lines": [], "start_line": 110}, "Histogram.values": {"executed_lines": [123], "summary": {"covered_lines": 1, "num_statements": 1, "percent_covered": 100.0, "percent_covered_display": "100", "missing_lines": 0, "excluded_lines": 0, "percent_statements_covered": 100.0, "percent_statements_covered_display": "100"}, "missing_lines": [], "excluded_lines": [], "start_line": 121}, "Histogram.from_hdf5": {"executed_lines": [135, 136, 137, 138, 139, 140], "summary": {"covered_lines": 6, "num_statements": 6, "percent_covered": 100.0, "percent_covered_display": "100", "missing_lines": 0, "excluded_lines": 0, "percent_statements_covered": 100.0, "percent_statements_covered_display": "100"}, "missing_lines": [], "excluded_lines": [], "start_line": 126}, "Histogram.left_percentile": {"executed_lines": [144], "summary": {"covered_lines": 1, "num_statements": 1, "percent_covered": 100.0, "percent_covered_display": "100", "missing_lines": 0, "excluded_lines": 0, "percent_statements_covered": 100.0, "percent_statements_covered_display": "100"}, "missing_lines": [], "excluded_lines": [], "start_line": 142}, "Histogram.right_percentile": {"executed_lines": [148], "summary": {"covered_lines": 1, "num_statements": 1, "percent_covered": 100.0, "percent_covered_display": "100", "missing_lines": 0, "excluded_lines": 0, "percent_statements_covered": 100.0, "percent_statements_covered_display": "100"}, "missing_lines": [], "excluded_lines": [], "start_line": 146}, "Histogram.draw": {"executed_lines": [152, 154, 155, 157, 158, 160], "summary": {"covered_lines": 6, "num_statements": 6, "percent_covered": 100.0, "percent_covered_display": "100", "missing_lines": 0, "excluded_lines": 0, "percent_statements_covered": 100.0, "percent_statements_covered_display": "100"}, "missing_lines": [], "excluded_lines": [], "start_line": 150}, "BetaPosterior.from_hdf5": {"executed_lines": [179, 180, 181, 182, 183, 184, 190], "summary": {"covered_lines": 7, "num_statements": 9, "percent_covered": 77.77777777777777, "percent_covered_display": "78", "missing_lines": 2, "excluded_lines": 0, "percent_statements_covered": 77.77777777777777, "percent_statements_covered_display": "78"}, "missing_lines": [185, 186], "excluded_lines": [], "start_line": 170}, "BetaPosterior._get_label": {"executed_lines": [199], "summary": {"covered_lines": 1, "num_statements": 1, "percent_covered": 100.0, "percent_covered_display": "100", "missing_lines": 0, "excluded_lines": 0, "percent_statements_covered": 100.0, "percent_statements_covered_display": "100"}, "missing_lines": [], "excluded_lines": [], "start_line": 198}, "BetaPosterior.num_fail": {"executed_lines": [204], "summary": {"covered_lines": 1, "num_statements": 1, "percent_covered": 100.0, "percent_covered_display": "100", "missing_lines": 0, "excluded_lines": 0, "percent_statements_covered": 100.0, "percent_statements_covered_display": "100"}, "missing_lines": [], "excluded_lines": [], "start_line": 202}, "BetaPosterior.pdf": {"executed_lines": [208], "summary": {"covered_lines": 1, "num_statements": 1, "percent_covered": 100.0, "percent_covered_display": "100", "missing_lines": 0, "excluded_lines": 0, "percent_statements_covered": 100.0, "percent_statements_covered_display": "100"}, "missing_lines": [], "excluded_lines": [], "start_line": 206}, "BetaPosterior.left_percentile": {"executed_lines": [218], "summary": {"covered_lines": 1, "num_statements": 1, "percent_covered": 100.0, "percent_covered_display": "100", "missing_lines": 0, "excluded_lines": 0, "percent_statements_covered": 100.0, "percent_statements_covered_display": "100"}, "missing_lines": [], "excluded_lines": [], "start_line": 216}, "BetaPosterior.right_percentile": {"executed_lines": [227], "summary": {"covered_lines": 1, "num_statements": 1, "percent_covered": 100.0, "percent_covered_display": "100", "missing_lines": 0, "excluded_lines": 0, "percent_statements_covered": 100.0, "percent_statements_covered_display": "100"}, "missing_lines": [], "excluded_lines": [], "start_line": 225}, "BetaPosterior.draw": {"executed_lines": [239, 240, 241, 243, 244, 246, 247, 249], "summary": {"covered_lines": 8, "num_statements": 8, "percent_covered": 100.0, "percent_covered_display": "100", "missing_lines": 0, "excluded_lines": 0, "percent_statements_covered": 100.0, "percent_statements_covered_display": "100"}, "missing_lines": [], "excluded_lines": [], "start_line": 234}, "get_size": {"executed_lines": [268, 269, 270, 271, 273, 274, 275, 276], "summary": {"covered_lines": 8, "num_statements": 8, "percent_covered": 100.0, "percent_covered_display": "100", "missing_lines": 0, "excluded_lines": 0, "percent_statements_covered": 100.0, "percent_statements_covered_display": "100"}, "missing_lines": [], "excluded_lines": [], "start_line": 252}, "get_label": {"executed_lines": [281, 282, 288, 289, 290, 291], "summary": {"covered_lines": 6, "num_statements": 6, "percent_covered": 100.0, "percent_covered_display": "100", "missing_lines": 0, "excluded_lines": 0, "percent_statements_covered": 100.0, "percent_statements_covered_display": "100"}, "missing_lines": [], "excluded_lines": [], "start_line": 279}, "get_xlims": {"executed_lines": [303, 306, 307, 310, 311], "summary": {"covered_lines": 5, "num_statements": 5, "percent_covered": 100.0, "percent_covered_display": "100", "missing_lines": 0, "excluded_lines": 0, "percent_statements_covered": 100.0, "percent_statements_covered_display": "100"}, "missing_lines": [], "excluded_lines": [], "start_line": 294}, "draw": {"executed_lines": [335, 338, 340, 343, 345, 354, 355, 357, 358, 360], "summary": {"covered_lines": 10, "num_statements": 12, "percent_covered": 83.33333333333333, "percent_covered_display": "83", "missing_lines": 2, "excluded_lines": 0, "percent_statements_covered": 83.33333333333333, "percent_statements_covered_display": "83"}, "missing_lines": [336, 341], "excluded_lines": [], "start_line": 314}, "split_legends": {"executed_lines": [], "summary": {"covered_lines": 0, "num_statements": 10, "percent_covered": 0.0, "percent_covered_display": "0", "missing_lines": 10, "excluded_lines": 0, "percent_statements_covered": 0.0, "percent_statements_covered_display": "0"}, "missing_lines": [370, 375, 377, 378, 380, 381, 382, 383, 385, 392], "excluded_lines": [], "start_line": 363}, "use_mpl_stylesheet": {"executed_lines": [], "summary": {"covered_lines": 0, "num_statements": 1, "percent_covered": 0.0, "percent_covered_display": "0", "missing_lines": 1, "excluded_lines": 0, "percent_statements_covered": 0.0, "percent_statements_covered_display": "0"}, "missing_lines": [399], "excluded_lines": [], "start_line": 397}, "save_figure": {"executed_lines": [410, 411], "summary": {"covered_lines": 2, "num_statements": 2, "percent_covered": 100.0, "percent_covered_display": "100", "missing_lines": 0, "excluded_lines": 0, "percent_statements_covered": 100.0, "percent_statements_covered_display": "100"}, "missing_lines": [], "excluded_lines": [], "start_line": 404}, "": {"executed_lines": [3, 5, 6, 7, 8, 9, 10, 12, 13, 14, 15, 16, 17, 19, 30, 37, 38, 41, 50, 59, 64, 69, 81, 84, 87, 88, 89, 91, 92, 96, 97, 101, 102, 106, 109, 110, 115, 118, 120, 121, 125, 126, 142, 146, 150, 163, 166, 167, 169, 170, 198, 201, 202, 206, 216, 225, 234, 252, 279, 294, 314, 363, 395, 396, 397, 402, 403, 404], "summary": {"covered_lines": 68, "num_statements": 68, "percent_covered": 100.0, "percent_covered_display": "100", "missing_lines": 0, "excluded_lines": 6, "percent_statements_covered": 100.0, "percent_statements_covered_display": "100"}, "missing_lines": [], "excluded_lines": [25, 26, 27, 95, 100, 105], "start_line": 1}}, "classes": {"AbstractDistribution": {"executed_lines": [112], "summary": {"covered_lines": 1, "num_statements": 1, "percent_covered": 100.0, "percent_covered_display": "100", "missing_lines": 0, "excluded_lines": 3, "percent_statements_covered": 100.0, "percent_statements_covered_display": "100"}, "missing_lines": [], "excluded_lines": [94, 99, 104], "start_line": 84}, "Histogram": {"executed_lines": [123, 135, 136, 137, 138, 139, 140, 144, 148, 152, 154, 155, 157, 158, 160], "summary": {"covered_lines": 15, "num_statements": 15, "percent_covered": 100.0, "percent_covered_display": "100", "missing_lines": 0, "excluded_lines": 0, "percent_statements_covered": 100.0, "percent_statements_covered_display": "100"}, "missing_lines": [], "excluded_lines": [], "start_line": 115}, "BetaPosterior": {"executed_lines": [179, 180, 181, 182, 183, 184, 190, 199, 204, 208, 218, 227, 239, 240, 241, 243, 244, 246, 247, 249], "summary": {"covered_lines": 20, "num_statements": 22, "percent_covered": 90.9090909090909, "percent_covered_display": "91", "missing_lines": 2, "excluded_lines": 0, "percent_statements_covered": 90.9090909090909, "percent_statements_covered_display": "91"}, "missing_lines": [185, 186], "excluded_lines": [], "start_line": 163}, "": {"executed_lines": [3, 5, 6, 7, 8, 9, 10, 12, 13, 14, 15, 16, 17, 19, 30, 37, 38, 41, 50, 59, 61, 64, 66, 69, 74, 75, 76, 77, 78, 81, 84, 87, 88, 89, 91, 92, 96, 97, 101, 102, 106, 109, 110, 115, 118, 120, 121, 125, 126, 142, 146, 150, 163, 166, 167, 169, 170, 198, 201, 202, 206, 216, 225, 234, 252, 268, 269, 270, 271, 273, 274, 275, 276, 279, 281, 282, 288, 289, 290, 291, 294, 303, 306, 307, 310, 311, 314, 335, 338, 340, 343, 345, 354, 355, 357, 358, 360, 363, 395, 396, 397, 402, 403, 404, 410, 411], "summary": {"covered_lines": 106, "num_statements": 122, "percent_covered": 86.88524590163935, "percent_covered_display": "87", "missing_lines": 16, "excluded_lines": 6, "percent_statements_covered": 86.88524590163935, "percent_statements_covered_display": "87"}, "missing_lines": [46, 47, 56, 336, 341, 370, 375, 377, 378, 380, 381, 382, 383, 385, 392, 399], "excluded_lines": [25, 26, 27, 95, 100, 105], "start_line": 1}}}, "src/lyscripts/sample.py": {"executed_lines": [19, 21, 22, 23, 25, 27, 29, 30, 31, 32, 34, 38, 40, 41, 42, 43, 44, 45, 46, 47, 49, 60, 63, 66, 68, 69, 71, 73, 75, 78, 81, 83, 84, 85, 86, 89, 92, 93, 95, 97, 98, 100, 101, 103, 106, 109, 110, 112, 114, 115, 117, 118, 120, 123, 126, 138, 143, 144, 145, 149, 150, 151, 153, 156, 165, 166, 178, 187, 188, 190, 193, 196, 212, 218, 220, 228, 260, 261, 263, 264, 265, 267, 268, 270, 271, 272, 273, 278, 280, 281, 283, 294, 300, 301, 303, 307, 310, 314, 319, 325, 328, 330, 331, 337, 350, 353, 354, 355, 362, 368, 369, 371, 388, 390, 394, 395, 396, 397, 398, 401, 403, 404, 405, 414, 424], "summary": {"covered_lines": 125, "num_statements": 137, "percent_covered": 91.24087591240875, "percent_covered_display": "91", "missing_lines": 12, "excluded_lines": 5, "percent_statements_covered": 91.24087591240875, "percent_statements_covered_display": "91"}, "missing_lines": [35, 36, 74, 132, 133, 134, 135, 175, 191, 304, 425, 426], "excluded_lines": [312, 313, 316, 317, 318], "functions": {"CompletedItersColumn.__init__": {"executed_lines": [68, 69], "summary": {"covered_lines": 2, "num_statements": 2, "percent_covered": 100.0, "percent_covered_display": "100", "missing_lines": 0, "excluded_lines": 0, "percent_statements_covered": 100.0, "percent_statements_covered_display": "100"}, "missing_lines": [], "excluded_lines": [], "start_line": 66}, "CompletedItersColumn.render": {"executed_lines": [73, 75], "summary": {"covered_lines": 2, "num_statements": 3, "percent_covered": 66.66666666666667, "percent_covered_display": "67", "missing_lines": 1, "excluded_lines": 0, "percent_statements_covered": 66.66666666666667, "percent_statements_covered_display": "67"}, "missing_lines": [74], "excluded_lines": [], "start_line": 71}, "ItersPerSecondColumn.render": {"executed_lines": [83, 84, 85, 86], "summary": {"covered_lines": 4, "num_statements": 4, "percent_covered": 100.0, "percent_covered_display": "100", "missing_lines": 0, "excluded_lines": 0, "percent_statements_covered": 100.0, "percent_statements_covered_display": "100"}, "missing_lines": [], "excluded_lines": [], "start_line": 81}, "AcorTime.update": {"executed_lines": [97, 98], "summary": {"covered_lines": 2, "num_statements": 2, "percent_covered": 100.0, "percent_covered_display": "100", "missing_lines": 0, "excluded_lines": 0, "percent_statements_covered": 100.0, "percent_statements_covered_display": "100"}, "missing_lines": [], "excluded_lines": [], "start_line": 95}, "AcorTime.relative_diff": {"executed_lines": [103], "summary": {"covered_lines": 1, "num_statements": 1, "percent_covered": 100.0, "percent_covered_display": "100", "missing_lines": 0, "excluded_lines": 0, "percent_statements_covered": 100.0, "percent_statements_covered_display": "100"}, "missing_lines": [], "excluded_lines": [], "start_line": 101}, "NumAccepted.update": {"executed_lines": [114, 115], "summary": {"covered_lines": 2, "num_statements": 2, "percent_covered": 100.0, "percent_covered_display": "100", "missing_lines": 0, "excluded_lines": 0, "percent_statements_covered": 100.0, "percent_statements_covered_display": "100"}, "missing_lines": [], "excluded_lines": [], "start_line": 112}, "NumAccepted.newly_accepted": {"executed_lines": [120], "summary": {"covered_lines": 1, "num_statements": 1, "percent_covered": 100.0, "percent_covered_display": "100", "missing_lines": 0, "excluded_lines": 0, "percent_statements_covered": 100.0, "percent_statements_covered_display": "100"}, "missing_lines": [], "excluded_lines": [], "start_line": 118}, "log_prob_fn": {"executed_lines": [], "summary": {"covered_lines": 0, "num_statements": 4, "percent_covered": 0.0, "percent_covered_display": "0", "missing_lines": 4, "excluded_lines": 0, "percent_statements_covered": 0.0, "percent_statements_covered_display": "0"}, "missing_lines": [132, 133, 134, 135], "excluded_lines": [], "start_line": 126}, "ensure_initial_state": {"executed_lines": [143, 144, 145, 149, 150, 151, 153], "summary": {"covered_lines": 7, "num_statements": 7, "percent_covered": 100.0, "percent_covered_display": "100", "missing_lines": 0, "excluded_lines": 0, "percent_statements_covered": 100.0, "percent_statements_covered_display": "100"}, "missing_lines": [], "excluded_lines": [], "start_line": 138}, "ensure_history_table": {"executed_lines": [165, 166], "summary": {"covered_lines": 2, "num_statements": 3, "percent_covered": 66.66666666666667, "percent_covered_display": "67", "missing_lines": 1, "excluded_lines": 0, "percent_statements_covered": 66.66666666666667, "percent_statements_covered_display": "67"}, "missing_lines": [175], "excluded_lines": [], "start_line": 156}, "update_history_table": {"executed_lines": [187, 188, 190, 193], "summary": {"covered_lines": 4, "num_statements": 5, "percent_covered": 80.0, "percent_covered_display": "80", "missing_lines": 1, "excluded_lines": 0, "percent_statements_covered": 80.0, "percent_statements_covered_display": "80"}, "missing_lines": [191], "excluded_lines": [], "start_line": 178}, "is_converged": {"executed_lines": [212], "summary": {"covered_lines": 1, "num_statements": 1, "percent_covered": 100.0, "percent_covered_display": "100", "missing_lines": 0, "excluded_lines": 0, "percent_statements_covered": 100.0, "percent_statements_covered_display": "100"}, "missing_lines": [], "excluded_lines": [], "start_line": 196}, "_get_columns": {"executed_lines": [220], "summary": {"covered_lines": 1, "num_statements": 1, "percent_covered": 100.0, "percent_covered_display": "100", "missing_lines": 0, "excluded_lines": 0, "percent_statements_covered": 100.0, "percent_statements_covered_display": "100"}, "missing_lines": [], "excluded_lines": [], "start_line": 218}, "run_sampling": {"executed_lines": [260, 261, 263, 264, 265, 267, 268, 270, 271, 272, 273, 278, 280, 281, 283, 294, 300, 301, 303], "summary": {"covered_lines": 19, "num_statements": 20, "percent_covered": 95.0, "percent_covered_display": "95", "missing_lines": 1, "excluded_lines": 0, "percent_statements_covered": 95.0, "percent_statements_covered_display": "95"}, "missing_lines": [304], "excluded_lines": [], "start_line": 228}, "DummyPool.__enter__": {"executed_lines": [], "summary": {"covered_lines": 0, "num_statements": 0, "percent_covered": 100.0, "percent_covered_display": "100", "missing_lines": 0, "excluded_lines": 1, "percent_statements_covered": 100.0, "percent_statements_covered_display": "100"}, "missing_lines": [], "excluded_lines": [312], "start_line": 310}, "DummyPool.__exit__": {"executed_lines": [], "summary": {"covered_lines": 0, "num_statements": 0, "percent_covered": 100.0, "percent_covered_display": "100", "missing_lines": 0, "excluded_lines": 1, "percent_statements_covered": 100.0, "percent_statements_covered_display": "100"}, "missing_lines": [], "excluded_lines": [316], "start_line": 314}, "get_pool": {"executed_lines": [325], "summary": {"covered_lines": 1, "num_statements": 1, "percent_covered": 100.0, "percent_covered_display": "100", "missing_lines": 0, "excluded_lines": 0, "percent_statements_covered": 100.0, "percent_statements_covered_display": "100"}, "missing_lines": [], "excluded_lines": [], "start_line": 319}, "init_sampler": {"executed_lines": [330, 331, 337], "summary": {"covered_lines": 3, "num_statements": 3, "percent_covered": 100.0, "percent_covered_display": "100", "missing_lines": 0, "excluded_lines": 0, "percent_statements_covered": 100.0, "percent_statements_covered_display": "100"}, "missing_lines": [], "excluded_lines": [], "start_line": 328}, "SampleCLI.cli_cmd": {"executed_lines": [388, 390, 394, 395, 396, 397, 398, 401, 403, 404, 405, 414], "summary": {"covered_lines": 12, "num_statements": 12, "percent_covered": 100.0, "percent_covered_display": "100", "missing_lines": 0, "excluded_lines": 0, "percent_statements_covered": 100.0, "percent_statements_covered_display": "100"}, "missing_lines": [], "excluded_lines": [], "start_line": 371}, "": {"executed_lines": [19, 21, 22, 23, 25, 27, 29, 30, 31, 32, 34, 38, 40, 41, 42, 43, 44, 45, 46, 47, 49, 60, 63, 66, 71, 78, 81, 89, 92, 93, 95, 100, 101, 106, 109, 110, 112, 117, 118, 123, 126, 138, 156, 178, 196, 218, 228, 307, 310, 314, 319, 328, 350, 353, 354, 355, 362, 368, 369, 371, 424], "summary": {"covered_lines": 61, "num_statements": 65, "percent_covered": 93.84615384615384, "percent_covered_display": "94", "missing_lines": 4, "excluded_lines": 3, "percent_statements_covered": 93.84615384615384, "percent_statements_covered_display": "94"}, "missing_lines": [35, 36, 425, 426], "excluded_lines": [313, 317, 318], "start_line": 1}}, "classes": {"CompletedItersColumn": {"executed_lines": [68, 69, 73, 75], "summary": {"covered_lines": 4, "num_statements": 5, "percent_covered": 80.0, "percent_covered_display": "80", "missing_lines": 1, "excluded_lines": 0, "percent_statements_covered": 80.0, "percent_statements_covered_display": "80"}, "missing_lines": [74], "excluded_lines": [], "start_line": 63}, "ItersPerSecondColumn": {"executed_lines": [83, 84, 85, 86], "summary": {"covered_lines": 4, "num_statements": 4, "percent_covered": 100.0, "percent_covered_display": "100", "missing_lines": 0, "excluded_lines": 0, "percent_statements_covered": 100.0, "percent_statements_covered_display": "100"}, "missing_lines": [], "excluded_lines": [], "start_line": 78}, "AcorTime": {"executed_lines": [97, 98, 103], "summary": {"covered_lines": 3, "num_statements": 3, "percent_covered": 100.0, "percent_covered_display": "100", "missing_lines": 0, "excluded_lines": 0, "percent_statements_covered": 100.0, "percent_statements_covered_display": "100"}, "missing_lines": [], "excluded_lines": [], "start_line": 89}, "NumAccepted": {"executed_lines": [114, 115, 120], "summary": {"covered_lines": 3, "num_statements": 3, "percent_covered": 100.0, "percent_covered_display": "100", "missing_lines": 0, "excluded_lines": 0, "percent_statements_covered": 100.0, "percent_statements_covered_display": "100"}, "missing_lines": [], "excluded_lines": [], "start_line": 106}, "DummyPool": {"executed_lines": [], "summary": {"covered_lines": 0, "num_statements": 0, "percent_covered": 100.0, "percent_covered_display": "100", "missing_lines": 0, "excluded_lines": 2, "percent_statements_covered": 100.0, "percent_statements_covered_display": "100"}, "missing_lines": [], "excluded_lines": [312, 316], "start_line": 307}, "SampleCLI": {"executed_lines": [388, 390, 394, 395, 396, 397, 398, 401, 403, 404, 405, 414], "summary": {"covered_lines": 12, "num_statements": 12, "percent_covered": 100.0, "percent_covered_display": "100", "missing_lines": 0, "excluded_lines": 0, "percent_statements_covered": 100.0, "percent_statements_covered_display": "100"}, "missing_lines": [], "excluded_lines": [], "start_line": 350}, "": {"executed_lines": [19, 21, 22, 23, 25, 27, 29, 30, 31, 32, 34, 38, 40, 41, 42, 43, 44, 45, 46, 47, 49, 60, 63, 66, 71, 78, 81, 89, 92, 93, 95, 100, 101, 106, 109, 110, 112, 117, 118, 123, 126, 138, 143, 144, 145, 149, 150, 151, 153, 156, 165, 166, 178, 187, 188, 190, 193, 196, 212, 218, 220, 228, 260, 261, 263, 264, 265, 267, 268, 270, 271, 272, 273, 278, 280, 281, 283, 294, 300, 301, 303, 307, 310, 314, 319, 325, 328, 330, 331, 337, 350, 353, 354, 355, 362, 368, 369, 371, 424], "summary": {"covered_lines": 99, "num_statements": 110, "percent_covered": 90.0, "percent_covered_display": "90", "missing_lines": 11, "excluded_lines": 3, "percent_statements_covered": 90.0, "percent_statements_covered_display": "90"}, "missing_lines": [35, 36, 132, 133, 134, 135, 175, 191, 304, 425, 426], "excluded_lines": [313, 317, 318], "start_line": 1}}}, "src/lyscripts/schedule.py": {"executed_lines": [13, 15, 16, 19, 22, 31], "summary": {"covered_lines": 6, "num_statements": 11, "percent_covered": 54.54545454545455, "percent_covered_display": "55", "missing_lines": 5, "excluded_lines": 0, "percent_statements_covered": 54.54545454545455, "percent_statements_covered_display": "55"}, "missing_lines": [24, 26, 28, 32, 33], "excluded_lines": [], "functions": {"ScheduleCLI.cli_cmd": {"executed_lines": [], "summary": {"covered_lines": 0, "num_statements": 3, "percent_covered": 0.0, "percent_covered_display": "0", "missing_lines": 3, "excluded_lines": 0, "percent_statements_covered": 0.0, "percent_statements_covered_display": "0"}, "missing_lines": [24, 26, 28], "excluded_lines": [], "start_line": 22}, "": {"executed_lines": [13, 15, 16, 19, 22, 31], "summary": {"covered_lines": 6, "num_statements": 8, "percent_covered": 75.0, "percent_covered_display": "75", "missing_lines": 2, "excluded_lines": 0, "percent_statements_covered": 75.0, "percent_statements_covered_display": "75"}, "missing_lines": [32, 33], "excluded_lines": [], "start_line": 1}}, "classes": {"ScheduleCLI": {"executed_lines": [], "summary": {"covered_lines": 0, "num_statements": 3, "percent_covered": 0.0, "percent_covered_display": "0", "missing_lines": 3, "excluded_lines": 0, "percent_statements_covered": 0.0, "percent_statements_covered_display": "0"}, "missing_lines": [24, 26, 28], "excluded_lines": [], "start_line": 19}, "": {"executed_lines": [13, 15, 16, 19, 22, 31], "summary": {"covered_lines": 6, "num_statements": 8, "percent_covered": 75.0, "percent_covered_display": "75", "missing_lines": 2, "excluded_lines": 0, "percent_statements_covered": 75.0, "percent_statements_covered_display": "75"}, "missing_lines": [32, 33], "excluded_lines": [], "start_line": 1}}}, "src/lyscripts/schema.py": {"executed_lines": [27, 29, 30, 32, 35, 38, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 59, 65], "summary": {"covered_lines": 19, "num_statements": 22, "percent_covered": 86.36363636363636, "percent_covered_display": "86", "missing_lines": 3, "excluded_lines": 0, "percent_statements_covered": 86.36363636363636, "percent_statements_covered_display": "86"}, "missing_lines": [61, 62, 66], "excluded_lines": [], "functions": {"main": {"executed_lines": [], "summary": {"covered_lines": 0, "num_statements": 2, "percent_covered": 0.0, "percent_covered_display": "0", "missing_lines": 2, "excluded_lines": 0, "percent_statements_covered": 0.0, "percent_statements_covered_display": "0"}, "missing_lines": [61, 62], "excluded_lines": [], "start_line": 59}, "": {"executed_lines": [27, 29, 30, 32, 35, 38, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 59, 65], "summary": {"covered_lines": 19, "num_statements": 20, "percent_covered": 95.0, "percent_covered_display": "95", "missing_lines": 1, "excluded_lines": 0, "percent_statements_covered": 95.0, "percent_statements_covered_display": "95"}, "missing_lines": [66], "excluded_lines": [], "start_line": 1}}, "classes": {"SchemaSettings": {"executed_lines": [], "summary": {"covered_lines": 0, "num_statements": 0, "percent_covered": 100.0, "percent_covered_display": "100", "missing_lines": 0, "excluded_lines": 0, "percent_statements_covered": 100.0, "percent_statements_covered_display": "100"}, "missing_lines": [], "excluded_lines": [], "start_line": 35}, "": {"executed_lines": [27, 29, 30, 32, 35, 38, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 59, 65], "summary": {"covered_lines": 19, "num_statements": 22, "percent_covered": 86.36363636363636, "percent_covered_display": "86", "missing_lines": 3, "excluded_lines": 0, "percent_statements_covered": 86.36363636363636, "percent_statements_covered_display": "86"}, "missing_lines": [61, 62, 66], "excluded_lines": [], "start_line": 1}}}, "src/lyscripts/utils.py": {"executed_lines": [3, 5, 6, 7, 8, 9, 10, 11, 13, 18, 21, 23, 24, 26, 27, 30, 33, 42, 43, 45, 46, 47, 48, 50, 53, 63, 65, 66, 67, 68, 70, 72, 75, 91, 93, 94, 95, 97, 98, 100, 102, 105, 115, 117, 118, 119, 120, 122, 124, 127, 137, 138, 139, 140, 143, 146, 151, 152, 154, 155, 156, 159, 160, 162, 163, 164, 165, 168, 169, 177, 178, 179, 180, 183, 184, 192, 193, 195, 199], "summary": {"covered_lines": 79, "num_statements": 84, "percent_covered": 94.04761904761905, "percent_covered_display": "94", "missing_lines": 5, "excluded_lines": 0, "percent_statements_covered": 94.04761904761905, "percent_statements_covered_display": "94"}, "missing_lines": [25, 141, 142, 196, 197], "excluded_lines": [], "functions": {"binom_pmf": {"executed_lines": [23, 24, 26, 27, 30], "summary": {"covered_lines": 5, "num_statements": 6, "percent_covered": 83.33333333333333, "percent_covered_display": "83", "missing_lines": 1, "excluded_lines": 0, "percent_statements_covered": 83.33333333333333, "percent_statements_covered_display": "83"}, "missing_lines": [25], "excluded_lines": [], "start_line": 21}, "get_dict_depth": {"executed_lines": [42, 43, 45, 46, 47, 48, 50], "summary": {"covered_lines": 7, "num_statements": 7, "percent_covered": 100.0, "percent_covered_display": "100", "missing_lines": 0, "excluded_lines": 0, "percent_statements_covered": 100.0, "percent_statements_covered_display": "100"}, "missing_lines": [], "excluded_lines": [], "start_line": 33}, "delete_private_keys": {"executed_lines": [63, 65, 66, 67, 68, 70, 72], "summary": {"covered_lines": 7, "num_statements": 7, "percent_covered": 100.0, "percent_covered_display": "100", "missing_lines": 0, "excluded_lines": 0, "percent_statements_covered": 100.0, "percent_statements_covered_display": "100"}, "missing_lines": [], "excluded_lines": [], "start_line": 53}, "flatten": {"executed_lines": [91, 93, 94, 95, 97, 98, 100, 102], "summary": {"covered_lines": 8, "num_statements": 8, "percent_covered": 100.0, "percent_covered_display": "100", "missing_lines": 0, "excluded_lines": 0, "percent_statements_covered": 100.0, "percent_statements_covered_display": "100"}, "missing_lines": [], "excluded_lines": [], "start_line": 75}, "unflatten": {"executed_lines": [115, 117, 118, 119, 120, 122, 124], "summary": {"covered_lines": 7, "num_statements": 7, "percent_covered": 100.0, "percent_covered_display": "100", "missing_lines": 0, "excluded_lines": 0, "percent_statements_covered": 100.0, "percent_statements_covered_display": "100"}, "missing_lines": [], "excluded_lines": [], "start_line": 105}, "get_modalities_subset": {"executed_lines": [137, 138, 139, 140, 143], "summary": {"covered_lines": 5, "num_statements": 7, "percent_covered": 71.42857142857143, "percent_covered_display": "71", "missing_lines": 2, "excluded_lines": 0, "percent_statements_covered": 71.42857142857143, "percent_statements_covered_display": "71"}, "missing_lines": [141, 142], "excluded_lines": [], "start_line": 127}, "load_patient_data": {"executed_lines": [151, 152, 154, 155, 156], "summary": {"covered_lines": 5, "num_statements": 5, "percent_covered": 100.0, "percent_covered_display": "100", "missing_lines": 0, "excluded_lines": 0, "percent_statements_covered": 100.0, "percent_statements_covered_display": "100"}, "missing_lines": [], "excluded_lines": [], "start_line": 146}, "load_yaml_params": {"executed_lines": [162, 163, 164, 165], "summary": {"covered_lines": 4, "num_statements": 4, "percent_covered": 100.0, "percent_covered_display": "100", "missing_lines": 0, "excluded_lines": 0, "percent_statements_covered": 100.0, "percent_statements_covered_display": "100"}, "missing_lines": [], "excluded_lines": [], "start_line": 160}, "load_model_samples": {"executed_lines": [177, 178, 179, 180], "summary": {"covered_lines": 4, "num_statements": 4, "percent_covered": 100.0, "percent_covered_display": "100", "missing_lines": 0, "excluded_lines": 0, "percent_statements_covered": 100.0, "percent_statements_covered_display": "100"}, "missing_lines": [], "excluded_lines": [], "start_line": 169}, "get_hdf5_backend": {"executed_lines": [192, 193, 195, 199], "summary": {"covered_lines": 4, "num_statements": 6, "percent_covered": 66.66666666666667, "percent_covered_display": "67", "missing_lines": 2, "excluded_lines": 0, "percent_statements_covered": 66.66666666666667, "percent_statements_covered_display": "67"}, "missing_lines": [196, 197], "excluded_lines": [], "start_line": 184}, "": {"executed_lines": [3, 5, 6, 7, 8, 9, 10, 11, 13, 18, 21, 33, 53, 75, 105, 127, 146, 159, 160, 168, 169, 183, 184], "summary": {"covered_lines": 23, "num_statements": 23, "percent_covered": 100.0, "percent_covered_display": "100", "missing_lines": 0, "excluded_lines": 0, "percent_statements_covered": 100.0, "percent_statements_covered_display": "100"}, "missing_lines": [], "excluded_lines": [], "start_line": 1}}, "classes": {"": {"executed_lines": [3, 5, 6, 7, 8, 9, 10, 11, 13, 18, 21, 23, 24, 26, 27, 30, 33, 42, 43, 45, 46, 47, 48, 50, 53, 63, 65, 66, 67, 68, 70, 72, 75, 91, 93, 94, 95, 97, 98, 100, 102, 105, 115, 117, 118, 119, 120, 122, 124, 127, 137, 138, 139, 140, 143, 146, 151, 152, 154, 155, 156, 159, 160, 162, 163, 164, 165, 168, 169, 177, 178, 179, 180, 183, 184, 192, 193, 195, 199], "summary": {"covered_lines": 79, "num_statements": 84, "percent_covered": 94.04761904761905, "percent_covered_display": "94", "missing_lines": 5, "excluded_lines": 0, "percent_statements_covered": 94.04761904761905, "percent_statements_covered_display": "94"}, "missing_lines": [25, 141, 142, 196, 197], "excluded_lines": [], "start_line": 1}}}}, "totals": {"covered_lines": 1239, "num_statements": 1735, "percent_covered": 71.41210374639769, "percent_covered_display": "71", "missing_lines": 496, "excluded_lines": 15, "percent_statements_covered": 71.41210374639769, "percent_statements_covered_display": "71"}}, "coverage_path": "."} \ No newline at end of file diff --git a/docs/Makefile b/docs/Makefile deleted file mode 100644 index d0c3cbf..0000000 --- a/docs/Makefile +++ /dev/null @@ -1,20 +0,0 @@ -# Minimal makefile for Sphinx documentation -# - -# You can set these variables from the command line, and also -# from the environment for the first two. -SPHINXOPTS ?= -SPHINXBUILD ?= sphinx-build -SOURCEDIR = source -BUILDDIR = build - -# Put it first so that "make" without argument is like "make help". -help: - @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) - -.PHONY: help Makefile - -# Catch-all target: route all unknown targets to Sphinx using the new -# "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS). -%: Makefile - @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) diff --git a/docs/make.bat b/docs/make.bat deleted file mode 100644 index 747ffb7..0000000 --- a/docs/make.bat +++ /dev/null @@ -1,35 +0,0 @@ -@ECHO OFF - -pushd %~dp0 - -REM Command file for Sphinx documentation - -if "%SPHINXBUILD%" == "" ( - set SPHINXBUILD=sphinx-build -) -set SOURCEDIR=source -set BUILDDIR=build - -%SPHINXBUILD% >NUL 2>NUL -if errorlevel 9009 ( - echo. - echo.The 'sphinx-build' command was not found. Make sure you have Sphinx - echo.installed, then set the SPHINXBUILD environment variable to point - echo.to the full path of the 'sphinx-build' executable. Alternatively you - echo.may add the Sphinx directory to PATH. - echo. - echo.If you don't have Sphinx installed, grab it from - echo.https://www.sphinx-doc.org/ - exit /b 1 -) - -if "%1" == "" goto help - -%SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O% -goto end - -:help -%SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O% - -:end -popd diff --git a/docs/source/_static/css/custom.css b/docs/source/_static/css/custom.css deleted file mode 100644 index 4e87d9c..0000000 --- a/docs/source/_static/css/custom.css +++ /dev/null @@ -1,11 +0,0 @@ -/* Indent by four spaces */ -.sig-param::before { - content: "\a\20\20\20\20"; - white-space: pre; -} - -/* Don't indent closing bracket */ -dt em.sig-param:last-of-type::after { - content: "\a"; - white-space: pre; -} diff --git a/docs/source/_static/favicon.png b/docs/source/_static/favicon.png deleted file mode 100644 index 6666b97..0000000 Binary files a/docs/source/_static/favicon.png and /dev/null differ diff --git a/docs/source/_static/github-social-card.png b/docs/source/_static/github-social-card.png deleted file mode 100644 index 94a83c6..0000000 Binary files a/docs/source/_static/github-social-card.png and /dev/null differ diff --git a/docs/source/compute/init.rst b/docs/source/compute/init.rst deleted file mode 100644 index af6e244..0000000 --- a/docs/source/compute/init.rst +++ /dev/null @@ -1,25 +0,0 @@ -.. currentmodule:: lyscripts.compute - -Model Predictions -================= - -.. automodule:: lyscripts.compute - :members: - -Command Help ------------- - -.. program-output:: lyscripts compute --help - - -Submodules ----------- - -.. toctree:: - :maxdepth: 1 - - priors - posteriors - prevalences - risks - utils diff --git a/docs/source/compute/posteriors.rst b/docs/source/compute/posteriors.rst deleted file mode 100644 index a99c0ef..0000000 --- a/docs/source/compute/posteriors.rst +++ /dev/null @@ -1,13 +0,0 @@ -.. currentmodule:: lyscripts.compute.posteriors - -Posterior State Distributions -============================= - -.. automodule:: lyscripts.compute.posteriors - :members: - :show-inheritance: - -Command Help ------------- - -.. program-output:: lyscripts compute posteriors --help diff --git a/docs/source/compute/prevalences.rst b/docs/source/compute/prevalences.rst deleted file mode 100644 index 35b6276..0000000 --- a/docs/source/compute/prevalences.rst +++ /dev/null @@ -1,13 +0,0 @@ -.. currentmodule:: lyscripts.compute.prevalences - -Predict Prevalence of Involvement -================================= - -.. automodule:: lyscripts.compute.prevalences - :members: - :show-inheritance: - -Command Help ------------- - -.. program-output:: lyscripts compute prevalences --help diff --git a/docs/source/compute/priors.rst b/docs/source/compute/priors.rst deleted file mode 100644 index 41813b3..0000000 --- a/docs/source/compute/priors.rst +++ /dev/null @@ -1,13 +0,0 @@ -.. currentmodule:: lyscripts.compute.priors - -Prior State Distributions -========================= - -.. automodule:: lyscripts.compute.priors - :members: - :show-inheritance: - -Command Help ------------- - -.. program-output:: lyscripts compute priors --help diff --git a/docs/source/compute/risks.rst b/docs/source/compute/risks.rst deleted file mode 100644 index e23d041..0000000 --- a/docs/source/compute/risks.rst +++ /dev/null @@ -1,13 +0,0 @@ -.. currentmodule:: lyscripts.compute.risks - -Compute Risk of Involvement -=========================== - -.. automodule:: lyscripts.compute.risks - :members: - :show-inheritance: - -Command Help ------------- - -.. program-output:: lyscripts compute risks --help diff --git a/docs/source/compute/utils.rst b/docs/source/compute/utils.rst deleted file mode 100644 index 1335e51..0000000 --- a/docs/source/compute/utils.rst +++ /dev/null @@ -1,8 +0,0 @@ -.. currentmodule:: lyscripts.compute.utils - -Helpers for Computing Quantities -================================ - -.. automodule:: lyscripts.compute.utils - :members: - :show-inheritance: diff --git a/docs/source/conf.py b/docs/source/conf.py deleted file mode 100644 index 1c7a8d3..0000000 --- a/docs/source/conf.py +++ /dev/null @@ -1,98 +0,0 @@ -"""Configuration file for the Sphinx documentation builder. - -For the full list of built-in configuration values, see the documentation: -https://www.sphinx-doc.org/en/master/usage/configuration.html. -""" - -import lyscripts - -# -- Project information ----------------------------------------------------- -# https://www.sphinx-doc.org/en/master/usage/configuration.html#project-information - -project = "lyscripts" -copyright = "2022, Roman Ludwig" # noqa: A001 -author = "Roman Ludwig" -gh_username = "rmnldwg" -version = lyscripts.__version__ -release = lyscripts.__version__ - -# -- General configuration --------------------------------------------------- -# https://www.sphinx-doc.org/en/master/usage/configuration.html#general-configuration - -extensions = [ - "sphinx.ext.intersphinx", - "sphinx.ext.autodoc", - "sphinx.ext.mathjax", - "sphinx.ext.viewcode", - "sphinx.ext.napoleon", - "sphinxcontrib.programoutput", - "sphinxcontrib.autodoc_pydantic", - "myst_parser", -] - -# markdown to reST -source_suffix = [".rst", ".md"] - -templates_path = ["_templates"] -exclude_patterns = [] - -# document classes and their constructors -autoclass_content = "class" - -# sort members by source -autodoc_member_order = "bysource" - -# show type hints -autodoc_typehints = "signature" - -# create links to other projects -intersphinx_mapping = { - "python": ("https://docs.python.org/3.10", None), - "lymph": ("https://lymph-model.readthedocs.io/stable/", None), - "pandas": ("https://pandas.pydata.org/pandas-docs/stable/", None), - "numpy": ("https://numpy.org/doc/stable/", None), - "lydata": ("https://lydata.readthedocs.io/stable/", None), - "emcee": ("https://emcee.readthedocs.io/en/stable/", None), -} - -# autodoc_pydantic settings -autodoc_pydantic_model_show_config_summary = False -autodoc_pydantic_model_show_validator_members = False -autodoc_pydantic_model_show_field_summary = False -autodoc_pydantic_model_member_order = "bysource" -autodoc_pydantic_settings_show_config_summary = False -autodoc_pydantic_settings_show_validator_members = False -autodoc_pydantic_settings_show_field_summary = False -autodoc_pydantic_settings_member_order = "bysource" -autodoc_pydantic_field_show_constraints = False - - -# -- Options for HTML output ------------------------------------------------- - -# The theme to use for HTML and HTML Help pages. See the documentation for -# a list of builtin themes. -# - -html_theme = "sphinx_book_theme" -html_theme_options = { - "repository_url": f"https://github.com/{gh_username}/{project}", - "repository_branch": "main", - "use_repository_button": True, - "show_navbar_depth": 3, - "home_page_in_toc": True, -} -html_favicon = "_static/favicon.png" - -# import sphinx_modern_theme -# html_theme = "sphinx_modern_theme" -# html_theme_path = [sphinx_modern_theme.get_html_theme_path()] - -# html_theme = "bootstrap-astropy" - -# Add any paths that contain custom static files (such as style sheets) here, -# relative to this directory. They are copied after the builtin static files, -# so a file named "default.css" will overwrite the builtin "default.css". -html_static_path = ["./_static"] -html_css_files = [ - "css/custom.css", -] diff --git a/docs/source/configs.rst b/docs/source/configs.rst deleted file mode 100644 index 3d209de..0000000 --- a/docs/source/configs.rst +++ /dev/null @@ -1,8 +0,0 @@ -.. currentmodule:: lyscripts.configs - -Pydantic Configurations -======================= - -.. automodule:: lyscripts.configs - :members: - :show-inheritance: diff --git a/docs/source/data/collect.rst b/docs/source/data/collect.rst deleted file mode 100644 index 8c8b08b..0000000 --- a/docs/source/data/collect.rst +++ /dev/null @@ -1,13 +0,0 @@ -.. currentmodule:: lyscripts.data.collect - -Collect lyDATA Tables Interactively -=================================== - -.. automodule:: lyscripts.data.collect - :members: - :show-inheritance: - -Command Help ------------- - -.. program-output:: lyscripts data collect --help diff --git a/docs/source/data/enhance.rst b/docs/source/data/enhance.rst deleted file mode 100644 index d1f28e7..0000000 --- a/docs/source/data/enhance.rst +++ /dev/null @@ -1,13 +0,0 @@ -.. currentmodule:: lyscripts.data.enhance - -Infer Additional Data Columns -============================= - -.. automodule:: lyscripts.data.enhance - :members: - :show-inheritance: - -Command Help ------------- - -.. program-output:: lyscripts data enhance --help diff --git a/docs/source/data/filter.rst b/docs/source/data/filter.rst deleted file mode 100644 index e8e6824..0000000 --- a/docs/source/data/filter.rst +++ /dev/null @@ -1,13 +0,0 @@ -.. currentmodule:: lyscripts.data.filter - -Filtering Datasets -================== - -.. automodule:: lyscripts.data.filter - :members: - :show-inheritance: - -Command Help ------------- - -.. program-output:: lyscripts data filter --help diff --git a/docs/source/data/generate.rst b/docs/source/data/generate.rst deleted file mode 100644 index 7ba9820..0000000 --- a/docs/source/data/generate.rst +++ /dev/null @@ -1,13 +0,0 @@ -.. currentmodule:: lyscripts.data.generate - -Generating Synthetic Data -========================= - -.. automodule:: lyscripts.data.generate - :members: - :show-inheritance: - -Command Help ------------- - -.. program-output:: lyscripts data generate --help diff --git a/docs/source/data/init.rst b/docs/source/data/init.rst deleted file mode 100644 index 014bb85..0000000 --- a/docs/source/data/init.rst +++ /dev/null @@ -1,29 +0,0 @@ -.. currentmodule:: lyscripts.data - -Data Commands/Helpers -===================== - -.. automodule:: lyscripts.data - :members: - :show-inheritance: - -Command Help ------------- - -.. program-output:: lyscripts data --help - - -Submodules ----------- - -.. toctree:: - :maxdepth: 1 - - collect - lyproxify - join - split - filter - enhance - generate - utils diff --git a/docs/source/data/join.rst b/docs/source/data/join.rst deleted file mode 100644 index a400a72..0000000 --- a/docs/source/data/join.rst +++ /dev/null @@ -1,13 +0,0 @@ -.. currentmodule:: lyscripts.data.join - -Join Data Files -=============== - -.. automodule:: lyscripts.data.join - :members: - :show-inheritance: - -Command Help ------------- - -.. program-output:: lyscripts data join --help diff --git a/docs/source/data/lyproxify.rst b/docs/source/data/lyproxify.rst deleted file mode 100644 index 653819c..0000000 --- a/docs/source/data/lyproxify.rst +++ /dev/null @@ -1,13 +0,0 @@ -.. currentmodule:: lyscripts.data.lyproxify - -Map to LyProX Format -==================== - -.. automodule:: lyscripts.data.lyproxify - :members: - :show-inheritance: - -Command Help ------------- - -.. program-output:: lyscripts data lyproxify --help diff --git a/docs/source/data/split.rst b/docs/source/data/split.rst deleted file mode 100644 index e8a831b..0000000 --- a/docs/source/data/split.rst +++ /dev/null @@ -1,13 +0,0 @@ -.. currentmodule:: lyscripts.data.split - -Split Data -========== - -.. automodule:: lyscripts.data.split - :members: - :show-inheritance: - -Command Help ------------- - -.. program-output:: lyscripts data split --help diff --git a/docs/source/data/utils.rst b/docs/source/data/utils.rst deleted file mode 100644 index 89df120..0000000 --- a/docs/source/data/utils.rst +++ /dev/null @@ -1,7 +0,0 @@ -.. currentmodule:: lyscripts.data.utils - -Utilities Related to Data Processing -==================================== - -.. automodule:: lyscripts.data.utils - :members: diff --git a/docs/source/decorators.rst b/docs/source/decorators.rst deleted file mode 100644 index e2adbed..0000000 --- a/docs/source/decorators.rst +++ /dev/null @@ -1,7 +0,0 @@ -.. currentmodule:: lyscripts.decorators - -Decorators -========== - -.. automodule:: lyscripts.decorators - :members: diff --git a/docs/source/evaluate.rst b/docs/source/evaluate.rst deleted file mode 100644 index b87d741..0000000 --- a/docs/source/evaluate.rst +++ /dev/null @@ -1,14 +0,0 @@ -.. currentmodule:: lyscripts.evaluate - -Evaluation -========== - -.. automodule:: lyscripts.evaluate - :members: - :show-inheritance: - -Command Help ------------- - -.. - .. program-output:: lyscripts evaluate --help diff --git a/docs/source/index.rst b/docs/source/index.rst deleted file mode 100644 index 647e4e1..0000000 --- a/docs/source/index.rst +++ /dev/null @@ -1,39 +0,0 @@ -.. lyscripts documentation master file, created by - sphinx-quickstart on Wed Mar 20 20:56:17 2024. - You can adapt this file completely to your liking, but it should at least - contain the root `toctree` directive. - -Introduction -============ - - -.. include:: ../../README.md - :end-before: - :parser: myst_parser.sphinx_ - - -Documentation -------------- - -.. toctree:: - :maxdepth: 1 - - init - configs - data/init - sample - compute/init - evaluate - plots - schedule - schema - utils - decorators - - -Indices and tables ------------------- - -* :ref:`genindex` -* :ref:`modindex` -* :ref:`search` diff --git a/docs/source/init.rst b/docs/source/init.rst deleted file mode 100644 index 72ac180..0000000 --- a/docs/source/init.rst +++ /dev/null @@ -1,12 +0,0 @@ -.. currentmodule:: lyscripts - -Main Lyscripts CLI -================== - -.. automodule:: lyscripts - :members: - -Command Help ------------- - -.. program-output:: lyscripts --help diff --git a/docs/source/plots.rst b/docs/source/plots.rst deleted file mode 100644 index 2c3f504..0000000 --- a/docs/source/plots.rst +++ /dev/null @@ -1,7 +0,0 @@ -.. currentmodule:: lyscripts.plots - -Plotting Utilities -================== - -.. automodule:: lyscripts.plots - :members: diff --git a/docs/source/sample.rst b/docs/source/sample.rst deleted file mode 100644 index ff5d825..0000000 --- a/docs/source/sample.rst +++ /dev/null @@ -1,13 +0,0 @@ -.. currentmodule:: lyscripts.sample - -MCMC Sampling -============= - -.. automodule:: lyscripts.sample - :members: - :show-inheritance: - -Command Help ------------- - -.. program-output:: lyscripts sample --help diff --git a/docs/source/schedule.rst b/docs/source/schedule.rst deleted file mode 100644 index 4869206..0000000 --- a/docs/source/schedule.rst +++ /dev/null @@ -1,12 +0,0 @@ -.. currentmodule:: lyscripts.schedule - -Temperature Schedule -==================== - -.. automodule:: lyscripts.schedule - :members: - -Command Help ------------- - -.. program-output:: lyscripts schedule --help diff --git a/docs/source/schema.rst b/docs/source/schema.rst deleted file mode 100644 index 9bab995..0000000 --- a/docs/source/schema.rst +++ /dev/null @@ -1,7 +0,0 @@ -.. currentmodule:: lyscripts.schema - -JSON Schema -=========== - -.. automodule:: lyscripts.schema - :members: diff --git a/docs/source/utils.rst b/docs/source/utils.rst deleted file mode 100644 index 5f06cba..0000000 --- a/docs/source/utils.rst +++ /dev/null @@ -1,7 +0,0 @@ -.. currentmodule:: lyscripts.utils - -Top Level Utilities -=================== - -.. automodule:: lyscripts.utils - :members: diff --git a/endpoint.json b/endpoint.json new file mode 100644 index 0000000..92f3222 --- /dev/null +++ b/endpoint.json @@ -0,0 +1 @@ +{"schemaVersion": 1, "label": "Coverage", "message": "71%", "color": "orange"} \ No newline at end of file diff --git a/favicon.png b/favicon.png deleted file mode 100644 index 6666b97..0000000 Binary files a/favicon.png and /dev/null differ diff --git a/github-social-card.png b/github-social-card.png deleted file mode 100644 index 94a83c6..0000000 Binary files a/github-social-card.png and /dev/null differ diff --git a/htmlcov/class_index.html b/htmlcov/class_index.html new file mode 100644 index 0000000..2fb0b5d --- /dev/null +++ b/htmlcov/class_index.html @@ -0,0 +1,812 @@ + + + + + Coverage report + + + + + +

Coverage report: + 71% +

+ + +

+ Files + Functions + Classes +

+ coverage.py v7.13.5, + created at 2026-04-08 14:17 +0000 +

File	class	statements	missing	excluded	coverage
src / lyscripts / __init__.py	LyscriptsCLI	7	7	0	0%
src / lyscripts / __init__.py	(no class)	23	0	0	100%
src / lyscripts / __main__.py	(no class)	3	3	0	0%
src / lyscripts / _version.py	(no class)	11	0	0	100%
src / lyscripts / cli.py	InterceptHandler	14	14	0	0%
src / lyscripts / cli.py	(no class)	31	12	0	61%
src / lyscripts / compute / __init__.py	ComputeCLI	1	1	0	0%
src / lyscripts / compute / __init__.py	(no class)	4	0	0	100%
src / lyscripts / compute / __main__.py	(no class)	5	5	0	0%
src / lyscripts / compute / evidence.py	EvidenceCLI	24	24	0	0%
src / lyscripts / compute / evidence.py	(no class)	41	17	0	59%
src / lyscripts / compute / posteriors.py	PosteriorsCLI	17	17	0	0%
src / lyscripts / compute / posteriors.py	(no class)	29	2	0	93%
src / lyscripts / compute / prevalences.py	PrevalencesCLI	19	0	0	100%
src / lyscripts / compute / prevalences.py	(no class)	63	7	0	89%
src / lyscripts / compute / priors.py	PriorsCLI	12	0	0	100%
src / lyscripts / compute / priors.py	(no class)	23	2	0	91%
src / lyscripts / compute / risks.py	RisksCLI	22	22	0	0%
src / lyscripts / compute / risks.py	(no class)	29	11	0	62%
src / lyscripts / compute / utils.py	HDF5FileStorage	26	3	0	88%
src / lyscripts / compute / utils.py	(no class)	92	3	0	97%
src / lyscripts / configs.py	DataConfig	4	1	0	75%
src / lyscripts / configs.py	DiagnosisConfig	1	1	0	0%
src / lyscripts / configs.py	GraphConfig	2	0	0	100%
src / lyscripts / configs.py	ModelConfig	15	4	0	73%
src / lyscripts / configs.py	DeprecatedModelConfig	19	0	0	100%
src / lyscripts / configs.py	SamplingConfig	1	0	0	100%
src / lyscripts / configs.py	ScheduleConfig	8	8	0	0%
src / lyscripts / configs.py	ScenarioConfig	8	4	0	50%
src / lyscripts / configs.py	DynamicYamlConfigSettingsSource	12	1	0	92%
src / lyscripts / configs.py	BaseCLI	3	0	0	100%
src / lyscripts / configs.py	(no class)	207	22	0	89%
src / lyscripts / data / __init__.py	DataCLI	1	1	0	0%
src / lyscripts / data / __init__.py	(no class)	5	0	0	100%
src / lyscripts / data / __main__.py	(no class)	18	18	0	0%
src / lyscripts / data / collect / __init__.py	CollectorCLI	5	5	0	0%
src / lyscripts / data / collect / __init__.py	(no class)	53	22	0	58%
src / lyscripts / data / enhance.py	EnhanceCLI	4	4	0	0%
src / lyscripts / data / enhance.py	(no class)	15	2	0	87%
src / lyscripts / data / fetch.py	FetchCLI	5	5	0	0%
src / lyscripts / data / fetch.py	(no class)	16	2	0	88%
src / lyscripts / data / filter.py	FilterCLI	28	28	0	0%
src / lyscripts / data / filter.py	(no class)	20	2	0	90%
src / lyscripts / data / generate.py	GenerateCLI	17	2	0	88%
src / lyscripts / data / generate.py	(no class)	18	2	0	89%
src / lyscripts / data / join.py	JoinCLI	8	8	0	0%
src / lyscripts / data / join.py	(no class)	14	2	0	86%
src / lyscripts / data / lyproxify.py	LyproxifyCLI	17	17	0	0%
src / lyscripts / data / lyproxify.py	(no class)	106	50	1	53%
src / lyscripts / data / split.py	SplitCLI	12	12	0	0%
src / lyscripts / data / split.py	(no class)	17	2	0	88%
src / lyscripts / data / utils.py	(no class)	9	0	0	100%
src / lyscripts / decorators.py	(no class)	41	4	0	90%
src / lyscripts / evaluate.py	(no class)	70	52	0	26%
src / lyscripts / integrate.py	IntegrateCLI	17	17	0	0%
src / lyscripts / integrate.py	(no class)	29	5	0	83%
src / lyscripts / plots.py	AbstractDistribution	1	0	3	100%
src / lyscripts / plots.py	Histogram	15	0	0	100%
src / lyscripts / plots.py	BetaPosterior	22	2	0	91%
src / lyscripts / plots.py	(no class)	122	16	6	87%
src / lyscripts / sample.py	CompletedItersColumn	5	1	0	80%
src / lyscripts / sample.py	ItersPerSecondColumn	4	0	0	100%
src / lyscripts / sample.py	AcorTime	3	0	0	100%
src / lyscripts / sample.py	NumAccepted	3	0	0	100%
src / lyscripts / sample.py	SampleCLI	12	0	0	100%
src / lyscripts / sample.py	(no class)	110	11	3	90%
src / lyscripts / schedule.py	ScheduleCLI	3	3	0	0%
src / lyscripts / schedule.py	(no class)	8	2	0	75%
src / lyscripts / schema.py	(no class)	22	3	0	86%
src / lyscripts / utils.py	(no class)	84	5	0	94%
Total		1735	496	15	71%

+ No items found using the specified filter. +

7 empty classes skipped.

+ + + diff --git a/htmlcov/coverage_html_cb_dd2e7eb5.js b/htmlcov/coverage_html_cb_dd2e7eb5.js new file mode 100644 index 0000000..6f87174 --- /dev/null +++ b/htmlcov/coverage_html_cb_dd2e7eb5.js @@ -0,0 +1,735 @@ +// Licensed under the Apache License: http://www.apache.org/licenses/LICENSE-2.0 +// For details: https://github.com/coveragepy/coveragepy/blob/main/NOTICE.txt + +// Coverage.py HTML report browser code. +/*jslint browser: true, sloppy: true, vars: true, plusplus: true, maxerr: 50, indent: 4 */ +/*global coverage: true, document, window, $ */ + +coverage = {}; + +// General helpers +function debounce(callback, wait) { + let timeoutId = null; + return function(...args) { + clearTimeout(timeoutId); + timeoutId = setTimeout(() => { + callback.apply(this, args); + }, wait); + }; +}; + +function checkVisible(element) { + const rect = element.getBoundingClientRect(); + const viewBottom = Math.max(document.documentElement.clientHeight, window.innerHeight); + const viewTop = 30; + return !(rect.bottom < viewTop || rect.top >= viewBottom); +} + +function on_click(sel, fn) { + const elt = document.querySelector(sel); + if (elt) { + elt.addEventListener("click", fn); + } +} + +// Helpers for table sorting +function getCellValue(row, column = 0) { + const cell = row.cells[column] // nosemgrep: eslint.detect-object-injection + if (cell.childElementCount == 1) { + var child = cell.firstElementChild; + if (child.tagName === "A") { + child = child.firstElementChild; + } + if (child instanceof HTMLDataElement && child.value) { + return child.value; + } + } + return cell.innerText || cell.textContent; +} + +function rowComparator(rowA, rowB, column = 0) { + let valueA = getCellValue(rowA, column); + let valueB = getCellValue(rowB, column); + if (!isNaN(valueA) && !isNaN(valueB)) { + return valueA - valueB; + } + return valueA.localeCompare(valueB, undefined, {numeric: true}); +} + +function sortColumn(th) { + // Get the current sorting direction of the selected header, + // clear state on other headers and then set the new sorting direction. + const currentSortOrder = th.getAttribute("aria-sort"); + [...th.parentElement.cells].forEach(header => header.setAttribute("aria-sort", "none")); + var direction; + if (currentSortOrder === "none") { + direction = th.dataset.defaultSortOrder || "ascending"; + } + else if (currentSortOrder === "ascending") { + direction = "descending"; + } + else { + direction = "ascending"; + } + th.setAttribute("aria-sort", direction); + + const column = [...th.parentElement.cells].indexOf(th) + + // Sort all rows and afterwards append them in order to move them in the DOM. + Array.from(th.closest("table").querySelectorAll("tbody tr")) + .sort((rowA, rowB) => rowComparator(rowA, rowB, column) * (direction === "ascending" ? 1 : -1)) + .forEach(tr => tr.parentElement.appendChild(tr)); + + // Save the sort order for next time. + if (th.id !== "region") { + let th_id = "file"; // Sort by file if we don't have a column id + let current_direction = direction; + const stored_list = localStorage.getItem(coverage.INDEX_SORT_STORAGE); + if (stored_list) { + ({th_id, direction} = JSON.parse(stored_list)) + } + localStorage.setItem(coverage.INDEX_SORT_STORAGE, JSON.stringify({ + "th_id": th.id, + "direction": current_direction + })); + if (th.id !== th_id || document.getElementById("region")) { + // Sort column has changed, unset sorting by function or class. + localStorage.setItem(coverage.SORTED_BY_REGION, JSON.stringify({ + "by_region": false, + "region_direction": current_direction + })); + } + } + else { + // Sort column has changed to by function or class, remember that. + localStorage.setItem(coverage.SORTED_BY_REGION, JSON.stringify({ + "by_region": true, + "region_direction": direction + })); + } +} + +// Find all the elements with data-shortcut attribute, and use them to assign a shortcut key. +coverage.assign_shortkeys = function () { + document.querySelectorAll("[data-shortcut]").forEach(element => { + document.addEventListener("keypress", event => { + if (event.target.tagName.toLowerCase() === "input") { + return; // ignore keypress from search filter + } + if (event.key === element.dataset.shortcut) { + element.click(); + } + }); + }); +}; + +// Create the events for the filter box. +coverage.wire_up_filter = function () { + // Populate the filter and hide100 inputs if there are saved values for them. + const saved_filter_value = localStorage.getItem(coverage.FILTER_STORAGE); + if (saved_filter_value) { + document.getElementById("filter").value = saved_filter_value; + } + const saved_hide100_value = localStorage.getItem(coverage.HIDE100_STORAGE); + if (saved_hide100_value) { + document.getElementById("hide100").checked = JSON.parse(saved_hide100_value); + } + + // Cache elements. + const table = document.querySelector("table.index"); + const table_body_rows = table.querySelectorAll("tbody tr"); + const no_rows = document.getElementById("no_rows"); + + const footer = table.tFoot.rows[0]; + const ratio_columns = Array.from(footer.cells).map(cell => Boolean(cell.dataset.ratio)); + + // Observe filter keyevents. + const filter_handler = (event => { + // Keep running total of each metric, first index contains number of shown rows + const totals = ratio_columns.map( + is_ratio => is_ratio ? {"numer": 0, "denom": 0} : 0 + ); + + var text = document.getElementById("filter").value; + // Store filter value + localStorage.setItem(coverage.FILTER_STORAGE, text); + const casefold = (text === text.toLowerCase()); + const hide100 = document.getElementById("hide100").checked; + // Store hide value. + localStorage.setItem(coverage.HIDE100_STORAGE, JSON.stringify(hide100)); + + // Hide / show elements. + table_body_rows.forEach(row => { + var show = false; + // Check the text filter. + for (let column = 0; column < totals.length; column++) { + cell = row.cells[column]; + if (cell.classList.contains("name")) { + var celltext = cell.textContent; + if (casefold) { + celltext = celltext.toLowerCase(); + } + if (celltext.includes(text)) { + show = true; + } + } + } + + // Check the "hide covered" filter. + if (show && hide100) { + const [numer, denom] = row.cells[row.cells.length - 1].dataset.ratio.split(" "); + show = (numer !== denom); + } + + if (!show) { + // hide + row.classList.add("hidden"); + return; + } + + // show + row.classList.remove("hidden"); + totals[0]++; + + for (let column = 0; column < totals.length; column++) { + // Accumulate dynamic totals + cell = row.cells[column] // nosemgrep: eslint.detect-object-injection + if (cell.matches(".name, .spacer")) { + continue; + } + if (ratio_columns[column] && cell.dataset.ratio) { + // Column stores a ratio + const [numer, denom] = cell.dataset.ratio.split(" "); + totals[column]["numer"] += parseInt(numer, 10); // nosemgrep: eslint.detect-object-injection + totals[column]["denom"] += parseInt(denom, 10); // nosemgrep: eslint.detect-object-injection + } + else { + totals[column] += parseInt(cell.textContent, 10); // nosemgrep: eslint.detect-object-injection + } + } + }); + + // Show placeholder if no rows will be displayed. + if (!totals[0]) { + // Show placeholder, hide table. + no_rows.style.display = "block"; + table.style.display = "none"; + return; + } + + // Hide placeholder, show table. + no_rows.style.display = null; + table.style.display = null; + + // Calculate new dynamic sum values based on visible rows. + for (let column = 0; column < totals.length; column++) { + // Get footer cell element. + const cell = footer.cells[column]; // nosemgrep: eslint.detect-object-injection + if (cell.matches(".name, .spacer")) { + continue; + } + + // Set value into dynamic footer cell element. + if (ratio_columns[column]) { + // Percentage column uses the numerator and denominator, + // and adapts to the number of decimal places. + const match = /\.([0-9]+)/.exec(cell.textContent); + const places = match ? match[1].length : 0; + const { numer, denom } = totals[column]; // nosemgrep: eslint.detect-object-injection + cell.dataset.ratio = `${numer} ${denom}`; + // Check denom to prevent NaN if filtered files contain no statements + cell.textContent = denom + ? `${(numer * 100 / denom).toFixed(places)}%` + : `${(100).toFixed(places)}%`; + } + else { + cell.textContent = totals[column]; // nosemgrep: eslint.detect-object-injection + } + } + }); + + document.getElementById("filter").addEventListener("input", debounce(filter_handler)); + document.getElementById("hide100").addEventListener("input", debounce(filter_handler)); + + // Trigger change event on setup, to force filter on page refresh + // (filter value may still be present). + document.getElementById("filter").dispatchEvent(new Event("input")); + document.getElementById("hide100").dispatchEvent(new Event("input")); +}; +coverage.FILTER_STORAGE = "COVERAGE_FILTER_VALUE"; +coverage.HIDE100_STORAGE = "COVERAGE_HIDE100_VALUE"; + +// Set up the click-to-sort columns. +coverage.wire_up_sorting = function () { + document.querySelectorAll("[data-sortable] th[aria-sort]").forEach( + th => th.addEventListener("click", e => sortColumn(e.target)) + ); + + // Look for a localStorage item containing previous sort settings: + let th_id = "file", direction = "ascending"; + const stored_list = localStorage.getItem(coverage.INDEX_SORT_STORAGE); + if (stored_list) { + ({th_id, direction} = JSON.parse(stored_list)); + } + let by_region = false, region_direction = "ascending"; + const sorted_by_region = localStorage.getItem(coverage.SORTED_BY_REGION); + if (sorted_by_region) { + ({ + by_region, + region_direction + } = JSON.parse(sorted_by_region)); + } + + const region_id = "region"; + if (by_region && document.getElementById(region_id)) { + direction = region_direction; + } + // If we are in a page that has a column with id of "region", sort on + // it if the last sort was by function or class. + let th; + if (document.getElementById(region_id)) { + th = document.getElementById(by_region ? region_id : th_id); + } + else { + th = document.getElementById(th_id); + } + th.setAttribute("aria-sort", direction === "ascending" ? "descending" : "ascending"); + th.click() +}; + +coverage.INDEX_SORT_STORAGE = "COVERAGE_INDEX_SORT_2"; +coverage.SORTED_BY_REGION = "COVERAGE_SORT_REGION"; + +// Loaded on index.html +coverage.index_ready = function () { + coverage.assign_shortkeys(); + coverage.wire_up_filter(); + coverage.wire_up_sorting(); + + on_click(".button_prev_file", coverage.to_prev_file); + on_click(".button_next_file", coverage.to_next_file); + + on_click(".button_show_hide_help", coverage.show_hide_help); +}; + +// -- pyfile stuff -- + +coverage.LINE_FILTERS_STORAGE = "COVERAGE_LINE_FILTERS"; + +coverage.pyfile_ready = function () { + // If we're directed to a particular line number, highlight the line. + var frag = location.hash; + if (frag.length > 2 && frag[1] === "t") { + document.querySelector(frag).closest(".n").classList.add("highlight"); + coverage.set_sel(parseInt(frag.substr(2), 10)); + } + else { + coverage.set_sel(0); + } + + on_click(".button_toggle_run", coverage.toggle_lines); + on_click(".button_toggle_mis", coverage.toggle_lines); + on_click(".button_toggle_exc", coverage.toggle_lines); + on_click(".button_toggle_par", coverage.toggle_lines); + + on_click(".button_next_chunk", coverage.to_next_chunk_nicely); + on_click(".button_prev_chunk", coverage.to_prev_chunk_nicely); + on_click(".button_top_of_page", coverage.to_top); + on_click(".button_first_chunk", coverage.to_first_chunk); + + on_click(".button_prev_file", coverage.to_prev_file); + on_click(".button_next_file", coverage.to_next_file); + on_click(".button_to_index", coverage.to_index); + + on_click(".button_show_hide_help", coverage.show_hide_help); + + coverage.filters = undefined; + try { + coverage.filters = localStorage.getItem(coverage.LINE_FILTERS_STORAGE); + } catch(err) {} + + if (coverage.filters) { + coverage.filters = JSON.parse(coverage.filters); + } + else { + coverage.filters = {run: false, exc: true, mis: true, par: true}; + } + + for (cls in coverage.filters) { + coverage.set_line_visibilty(cls, coverage.filters[cls]); // nosemgrep: eslint.detect-object-injection + } + + coverage.assign_shortkeys(); + coverage.init_scroll_markers(); + coverage.wire_up_sticky_header(); + + document.querySelectorAll("[id^=ctxs]").forEach( + cbox => cbox.addEventListener("click", coverage.expand_contexts) + ); + + // Rebuild scroll markers when the window height changes. + window.addEventListener("resize", coverage.build_scroll_markers); +}; + +coverage.toggle_lines = function (event) { + const btn = event.target.closest("button"); + const category = btn.value + const show = !btn.classList.contains("show_" + category); + coverage.set_line_visibilty(category, show); + coverage.build_scroll_markers(); + coverage.filters[category] = show; + try { + localStorage.setItem(coverage.LINE_FILTERS_STORAGE, JSON.stringify(coverage.filters)); + } catch(err) {} +}; + +coverage.set_line_visibilty = function (category, should_show) { + const cls = "show_" + category; + const btn = document.querySelector(".button_toggle_" + category); + if (btn) { + if (should_show) { + document.querySelectorAll("#source ." + category).forEach(e => e.classList.add(cls)); + btn.classList.add(cls); + } + else { + document.querySelectorAll("#source ." + category).forEach(e => e.classList.remove(cls)); + btn.classList.remove(cls); + } + } +}; + +// Return the nth line div. +coverage.line_elt = function (n) { + return document.getElementById("t" + n)?.closest("p"); +}; + +// Set the selection. b and e are line numbers. +coverage.set_sel = function (b, e) { + // The first line selected. + coverage.sel_begin = b; + // The next line not selected. + coverage.sel_end = (e === undefined) ? b+1 : e; +}; + +coverage.to_top = function () { + coverage.set_sel(0, 1); + coverage.scroll_window(0); +}; + +coverage.to_first_chunk = function () { + coverage.set_sel(0, 1); + coverage.to_next_chunk(); +}; + +coverage.to_prev_file = function () { + window.location = document.getElementById("prevFileLink").href; +} + +coverage.to_next_file = function () { + window.location = document.getElementById("nextFileLink").href; +} + +coverage.to_index = function () { + location.href = document.getElementById("indexLink").href; +} + +coverage.show_hide_help = function () { + const helpCheck = document.getElementById("help_panel_state") + helpCheck.checked = !helpCheck.checked; +} + +// Return a string indicating what kind of chunk this line belongs to, +// or null if not a chunk. +coverage.chunk_indicator = function (line_elt) { + const classes = line_elt?.className; + if (!classes) { + return null; + } + const match = classes.match(/\bshow_\w+\b/); + if (!match) { + return null; + } + return match[0]; +}; + +coverage.to_next_chunk = function () { + const c = coverage; + + // Find the start of the next colored chunk. + var probe = c.sel_end; + var chunk_indicator, probe_line; + while (true) { + probe_line = c.line_elt(probe); + if (!probe_line) { + return; + } + chunk_indicator = c.chunk_indicator(probe_line); + if (chunk_indicator) { + break; + } + probe++; + } + + // There's a next chunk, `probe` points to it. + var begin = probe; + + // Find the end of this chunk. + var next_indicator = chunk_indicator; + while (next_indicator === chunk_indicator) { + probe++; + probe_line = c.line_elt(probe); + next_indicator = c.chunk_indicator(probe_line); + } + c.set_sel(begin, probe); + c.show_selection(); +}; + +coverage.to_prev_chunk = function () { + const c = coverage; + + // Find the end of the prev colored chunk. + var probe = c.sel_begin-1; + var probe_line = c.line_elt(probe); + if (!probe_line) { + return; + } + var chunk_indicator = c.chunk_indicator(probe_line); + while (probe > 1 && !chunk_indicator) { + probe--; + probe_line = c.line_elt(probe); + if (!probe_line) { + return; + } + chunk_indicator = c.chunk_indicator(probe_line); + } + + // There's a prev chunk, `probe` points to its last line. + var end = probe+1; + + // Find the beginning of this chunk. + var prev_indicator = chunk_indicator; + while (prev_indicator === chunk_indicator) { + probe--; + if (probe <= 0) { + return; + } + probe_line = c.line_elt(probe); + prev_indicator = c.chunk_indicator(probe_line); + } + c.set_sel(probe+1, end); + c.show_selection(); +}; + +// Returns 0, 1, or 2: how many of the two ends of the selection are on +// the screen right now? +coverage.selection_ends_on_screen = function () { + if (coverage.sel_begin === 0) { + return 0; + } + + const begin = coverage.line_elt(coverage.sel_begin); + const end = coverage.line_elt(coverage.sel_end-1); + + return ( + (checkVisible(begin) ? 1 : 0) + + (checkVisible(end) ? 1 : 0) + ); +}; + +coverage.to_next_chunk_nicely = function () { + if (coverage.selection_ends_on_screen() === 0) { + // The selection is entirely off the screen: + // Set the top line on the screen as selection. + + // This will select the top-left of the viewport + // As this is most likely the span with the line number we take the parent + const line = document.elementFromPoint(0, 0).parentElement; + if (line.parentElement !== document.getElementById("source")) { + // The element is not a source line but the header or similar + coverage.select_line_or_chunk(1); + } + else { + // We extract the line number from the id + coverage.select_line_or_chunk(parseInt(line.id.substring(1), 10)); + } + } + coverage.to_next_chunk(); +}; + +coverage.to_prev_chunk_nicely = function () { + if (coverage.selection_ends_on_screen() === 0) { + // The selection is entirely off the screen: + // Set the lowest line on the screen as selection. + + // This will select the bottom-left of the viewport + // As this is most likely the span with the line number we take the parent + const line = document.elementFromPoint(document.documentElement.clientHeight-1, 0).parentElement; + if (line.parentElement !== document.getElementById("source")) { + // The element is not a source line but the header or similar + coverage.select_line_or_chunk(coverage.lines_len); + } + else { + // We extract the line number from the id + coverage.select_line_or_chunk(parseInt(line.id.substring(1), 10)); + } + } + coverage.to_prev_chunk(); +}; + +// Select line number lineno, or if it is in a colored chunk, select the +// entire chunk +coverage.select_line_or_chunk = function (lineno) { + var c = coverage; + var probe_line = c.line_elt(lineno); + if (!probe_line) { + return; + } + var the_indicator = c.chunk_indicator(probe_line); + if (the_indicator) { + // The line is in a highlighted chunk. + // Search backward for the first line. + var probe = lineno; + var indicator = the_indicator; + while (probe > 0 && indicator === the_indicator) { + probe--; + probe_line = c.line_elt(probe); + if (!probe_line) { + break; + } + indicator = c.chunk_indicator(probe_line); + } + var begin = probe + 1; + + // Search forward for the last line. + probe = lineno; + indicator = the_indicator; + while (indicator === the_indicator) { + probe++; + probe_line = c.line_elt(probe); + indicator = c.chunk_indicator(probe_line); + } + + coverage.set_sel(begin, probe); + } + else { + coverage.set_sel(lineno); + } +}; + +coverage.show_selection = function () { + // Highlight the lines in the chunk + document.querySelectorAll("#source .highlight").forEach(e => e.classList.remove("highlight")); + for (let probe = coverage.sel_begin; probe < coverage.sel_end; probe++) { + coverage.line_elt(probe).querySelector(".n").classList.add("highlight"); + } + + coverage.scroll_to_selection(); +}; + +coverage.scroll_to_selection = function () { + // Scroll the page if the chunk isn't fully visible. + if (coverage.selection_ends_on_screen() < 2) { + const element = coverage.line_elt(coverage.sel_begin); + coverage.scroll_window(element.offsetTop - 60); + } +}; + +coverage.scroll_window = function (to_pos) { + window.scroll({top: to_pos, behavior: "smooth"}); +}; + +coverage.init_scroll_markers = function () { + // Init some variables + coverage.lines_len = document.querySelectorAll("#source > p").length; + + // Build html + coverage.build_scroll_markers(); +}; + +coverage.build_scroll_markers = function () { + const temp_scroll_marker = document.getElementById("scroll_marker") + if (temp_scroll_marker) temp_scroll_marker.remove(); + // Don't build markers if the window has no scroll bar. + if (document.body.scrollHeight <= window.innerHeight) { + return; + } + + const marker_scale = window.innerHeight / document.body.scrollHeight; + const line_height = Math.min(Math.max(3, window.innerHeight / coverage.lines_len), 10); + + let previous_line = -99, last_mark, last_top; + + const scroll_marker = document.createElement("div"); + scroll_marker.id = "scroll_marker"; + document.getElementById("source").querySelectorAll( + "p.show_run, p.show_mis, p.show_exc, p.show_exc, p.show_par" + ).forEach(element => { + const line_top = Math.floor(element.offsetTop * marker_scale); + const line_number = parseInt(element.querySelector(".n a").id.substr(1)); + + if (line_number === previous_line + 1) { + // If this solid missed block just make previous mark higher. + last_mark.style.height = `${line_top + line_height - last_top}px`; + } + else { + // Add colored line in scroll_marker block. + last_mark = document.createElement("div"); + last_mark.id = `m${line_number}`; + last_mark.classList.add("marker"); + last_mark.style.height = `${line_height}px`; + last_mark.style.top = `${line_top}px`; + scroll_marker.append(last_mark); + last_top = line_top; + } + + previous_line = line_number; + }); + + // Append last to prevent layout calculation + document.body.append(scroll_marker); +}; + +coverage.wire_up_sticky_header = function () { + const header = document.querySelector("header"); + const header_bottom = ( + header.querySelector(".content h2").getBoundingClientRect().top - + header.getBoundingClientRect().top + ); + + function updateHeader() { + if (window.scrollY > header_bottom) { + header.classList.add("sticky"); + } + else { + header.classList.remove("sticky"); + } + } + + window.addEventListener("scroll", updateHeader); + updateHeader(); +}; + +coverage.expand_contexts = function (e) { + var ctxs = e.target.parentNode.querySelector(".ctxs"); + + if (!ctxs.classList.contains("expanded")) { + var ctxs_text = ctxs.textContent; + var width = Number(ctxs_text[0]); + ctxs.textContent = ""; + for (var i = 1; i < ctxs_text.length; i += width) { + key = ctxs_text.substring(i, i + width).trim(); + ctxs.appendChild(document.createTextNode(contexts[key])); + ctxs.appendChild(document.createElement("br")); + } + ctxs.classList.add("expanded"); + } +}; + +document.addEventListener("DOMContentLoaded", () => { + if (document.body.classList.contains("indexfile")) { + coverage.index_ready(); + } + else { + coverage.pyfile_ready(); + } +}); diff --git a/htmlcov/favicon_32_cb_c827f16f.png b/htmlcov/favicon_32_cb_c827f16f.png new file mode 100644 index 0000000..8649f04 Binary files /dev/null and b/htmlcov/favicon_32_cb_c827f16f.png differ diff --git a/htmlcov/function_index.html b/htmlcov/function_index.html new file mode 100644 index 0000000..5c9412e --- /dev/null +++ b/htmlcov/function_index.html @@ -0,0 +1,2052 @@ + + + + + Coverage report + + + + + +

Coverage report: + 71% +

+ + +

+ Files + Functions + Classes +

+ coverage.py v7.13.5, + created at 2026-04-08 14:17 +0000 +

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +

File	function	statements	missing	excluded	coverage
src / lyscripts / __init__.py	LyscriptsCLI.__init__	2	2	0	0%
src / lyscripts / __init__.py	LyscriptsCLI.cli_cmd	5	5	0	0%
src / lyscripts / __init__.py	(no function)	23	0	0	100%
src / lyscripts / __main__.py	(no function)	3	3	0	0%
src / lyscripts / _version.py	(no function)	11	0	0	100%
src / lyscripts / cli.py	assemble_main	2	0	0	100%
src / lyscripts / cli.py	assemble_main.main	2	0	0	100%
src / lyscripts / cli.py	somewhat_safely_get_loglevel	6	6	0	0%
src / lyscripts / cli.py	configure_logging	6	6	0	0%
src / lyscripts / cli.py	InterceptHandler.emit	14	14	0	0%
src / lyscripts / cli.py	(no function)	15	0	0	100%
src / lyscripts / compute / __init__.py	ComputeCLI.cli_cmd	1	1	0	0%
src / lyscripts / compute / __init__.py	(no function)	4	0	0	100%
src / lyscripts / compute / __main__.py	(no function)	5	5	0	0%
src / lyscripts / compute / evidence.py	comp_bic	1	1	0	0%
src / lyscripts / compute / evidence.py	compute_evidence	2	2	0	0%
src / lyscripts / compute / evidence.py	compute_ti_results	12	12	0	0%
src / lyscripts / compute / evidence.py	EvidenceCLI.cli_cmd	24	24	0	0%
src / lyscripts / compute / evidence.py	(no function)	26	2	0	92%
src / lyscripts / compute / posteriors.py	compute_posteriors	10	0	0	100%
src / lyscripts / compute / posteriors.py	PosteriorsCLI.cli_cmd	17	17	0	0%
src / lyscripts / compute / posteriors.py	(no function)	19	2	0	89%
src / lyscripts / compute / prevalences.py	compute_prevalences	20	5	0	75%
src / lyscripts / compute / prevalences.py	generate_query_from_diagnosis	7	0	0	100%
src / lyscripts / compute / prevalences.py	observe_prevalence	8	0	0	100%
src / lyscripts / compute / prevalences.py	PrevalencesCLI.cli_cmd	19	0	0	100%
src / lyscripts / compute / prevalences.py	(no function)	28	2	0	93%
src / lyscripts / compute / priors.py	compute_priors	7	0	0	100%
src / lyscripts / compute / priors.py	PriorsCLI.cli_cmd	12	0	0	100%
src / lyscripts / compute / priors.py	(no function)	16	2	0	88%
src / lyscripts / compute / risks.py	compute_risks	9	9	0	0%
src / lyscripts / compute / risks.py	RisksCLI.cli_cmd	22	22	0	0%
src / lyscripts / compute / risks.py	(no function)	20	2	0	90%
src / lyscripts / compute / utils.py	is_hdf5_compatible	1	0	0	100%
src / lyscripts / compute / utils.py	to_hdf5_attrs	6	0	0	100%
src / lyscripts / compute / utils.py	from_hdf5_attrs	7	0	0	100%
src / lyscripts / compute / utils.py	extract_modalities	7	1	0	86%
src / lyscripts / compute / utils.py	ensure_parent_dir	3	0	0	100%
src / lyscripts / compute / utils.py	HDF5FileStorage._get_dataset	4	1	0	75%
src / lyscripts / compute / utils.py	HDF5FileStorage.load	5	0	0	100%
src / lyscripts / compute / utils.py	HDF5FileStorage.get_attrs	5	0	0	100%
src / lyscripts / compute / utils.py	HDF5FileStorage.save	6	1	0	83%
src / lyscripts / compute / utils.py	HDF5FileStorage.set_attrs	6	1	0	83%
src / lyscripts / compute / utils.py	reduce_pattern	9	0	0	100%
src / lyscripts / compute / utils.py	complete_pattern	12	2	0	83%
src / lyscripts / compute / utils.py	get_cached	7	0	0	100%
src / lyscripts / compute / utils.py	get_cached.log_cache_info_wrapper	7	0	0	100%
src / lyscripts / compute / utils.py	(no function)	33	0	0	100%
src / lyscripts / configs.py	DataConfig.load	3	1	0	67%
src / lyscripts / configs.py	DataConfig.get_load_kwargs	1	0	0	100%
src / lyscripts / configs.py	check_pattern	1	0	0	100%
src / lyscripts / configs.py	DiagnosisConfig.to_involvement	1	1	0	0%
src / lyscripts / configs.py	retrieve_graph_representation	9	2	0	78%
src / lyscripts / configs.py	GraphConfig.from_model	2	0	0	100%
src / lyscripts / configs.py	has_model_symbol	6	0	0	100%
src / lyscripts / configs.py	get_symmetry_kwargs	5	1	0	80%
src / lyscripts / configs.py	ModelConfig.from_model	15	4	0	73%
src / lyscripts / configs.py	modalityconfig_from_model	2	0	0	100%
src / lyscripts / configs.py	DeprecatedModelConfig.model_post_init	5	0	0	100%
src / lyscripts / configs.py	DeprecatedModelConfig.translate	14	0	0	100%
src / lyscripts / configs.py	SamplingConfig.load	1	0	0	100%
src / lyscripts / configs.py	geometric_schedule	3	3	0	0%
src / lyscripts / configs.py	linear_schedule	1	1	0	0%
src / lyscripts / configs.py	power_schedule	2	2	0	0%
src / lyscripts / configs.py	ScheduleConfig.get_schedule	8	8	0	0%
src / lyscripts / configs.py	map_to_optional_bool	5	1	0	80%
src / lyscripts / configs.py	ScenarioConfig.model_post_init	2	0	0	100%
src / lyscripts / configs.py	ScenarioConfig.interpolate	4	3	0	25%
src / lyscripts / configs.py	ScenarioConfig.normalize	2	1	0	50%
src / lyscripts / configs.py	_construct_model_from_external	6	0	0	100%
src / lyscripts / configs.py	construct_model	7	0	0	100%
src / lyscripts / configs.py	add_distributions	18	1	0	94%
src / lyscripts / configs.py	add_modalities	8	0	0	100%
src / lyscripts / configs.py	add_data	11	11	0	0%
src / lyscripts / configs.py	DynamicYamlConfigSettingsSource.__init__	2	0	0	100%
src / lyscripts / configs.py	DynamicYamlConfigSettingsSource._read_file	5	1	0	80%
src / lyscripts / configs.py	DynamicYamlConfigSettingsSource.__call__	4	0	0	100%
src / lyscripts / configs.py	DynamicYamlConfigSettingsSource.__repr__	1	0	0	100%
src / lyscripts / configs.py	BaseCLI.settings_customise_sources	3	0	0	100%
src / lyscripts / configs.py	(no function)	123	0	0	100%
src / lyscripts / data / __init__.py	DataCLI.cli_cmd	1	1	0	0%
src / lyscripts / data / __init__.py	(no function)	5	0	0	100%
src / lyscripts / data / __main__.py	main	10	10	0	0%
src / lyscripts / data / __main__.py	(no function)	8	8	0	0%
src / lyscripts / data / collect / __init__.py	serve_index_html	3	3	0	0%
src / lyscripts / data / collect / __init__.py	serve_schema	1	1	0	0%
src / lyscripts / data / collect / __init__.py	serve_collector_js	1	1	0	0%
src / lyscripts / data / collect / __init__.py	process	17	17	0	0%
src / lyscripts / data / collect / __init__.py	CollectorCLI.cli_cmd	5	5	0	0%
src / lyscripts / data / collect / __init__.py	(no function)	31	0	0	100%
src / lyscripts / data / enhance.py	EnhanceCLI.cli_cmd	4	4	0	0%
src / lyscripts / data / enhance.py	(no function)	15	2	0	87%
src / lyscripts / data / fetch.py	FetchCLI.cli_cmd	5	5	0	0%
src / lyscripts / data / fetch.py	(no function)	16	2	0	88%
src / lyscripts / data / filter.py	FilterCLI.model_post_init	17	17	0	0%
src / lyscripts / data / filter.py	FilterCLI.cli_cmd	11	11	0	0%
src / lyscripts / data / filter.py	(no function)	20	2	0	90%
src / lyscripts / data / generate.py	GenerateCLI.model_post_init	8	2	0	75%
src / lyscripts / data / generate.py	GenerateCLI.cli_cmd	9	0	0	100%
src / lyscripts / data / generate.py	(no function)	18	2	0	89%
src / lyscripts / data / join.py	JoinCLI.cli_cmd	8	8	0	0%
src / lyscripts / data / join.py	(no function)	14	2	0	86%
src / lyscripts / data / lyproxify.py	ensure_python_file	3	3	0	0%
src / lyscripts / data / lyproxify.py	ensure_column_map	6	6	0	0%
src / lyscripts / data / lyproxify.py	LyproxifyCLI.cli_cmd	17	17	0	0%
src / lyscripts / data / lyproxify.py	clean_header	6	6	0	0%
src / lyscripts / data / lyproxify.py	get_instruction_depth	7	1	1	86%
src / lyscripts / data / lyproxify.py	generate_markdown_docs	10	0	0	100%
src / lyscripts / data / lyproxify.py	transform_to_lyprox	20	20	0	0%
src / lyscripts / data / lyproxify.py	leftright_to_ipsicontra	12	12	0	0%
src / lyscripts / data / lyproxify.py	exclude_patients	8	0	0	100%
src / lyscripts / data / lyproxify.py	(no function)	34	2	0	94%
src / lyscripts / data / split.py	SplitCLI.cli_cmd	12	12	0	0%
src / lyscripts / data / split.py	(no function)	17	2	0	88%
src / lyscripts / data / utils.py	save_table_to_csv	3	0	0	100%
src / lyscripts / data / utils.py	(no function)	6	0	0	100%
src / lyscripts / decorators.py	assemble_signature	3	0	0	100%
src / lyscripts / decorators.py	log_state	2	0	0	100%
src / lyscripts / decorators.py	log_state.log_decorator	3	0	0	100%
src / lyscripts / decorators.py	log_state.log_decorator.wrapper	10	3	0	70%
src / lyscripts / decorators.py	check_input_file_exists	3	0	0	100%
src / lyscripts / decorators.py	check_input_file_exists.inner	4	1	0	75%
src / lyscripts / decorators.py	check_output_dir_exists	3	0	0	100%
src / lyscripts / decorators.py	check_output_dir_exists.inner	3	0	0	100%
src / lyscripts / decorators.py	(no function)	10	0	0	100%
src / lyscripts / evaluate.py	_add_parser	2	2	0	0%
src / lyscripts / evaluate.py	_add_arguments	6	6	0	0%
src / lyscripts / evaluate.py	comp_bic	1	1	0	0%
src / lyscripts / evaluate.py	compute_evidence	2	2	0	0%
src / lyscripts / evaluate.py	compute_ti_results	13	13	0	0%
src / lyscripts / evaluate.py	main	24	24	0	0%
src / lyscripts / evaluate.py	(no function)	22	4	0	82%
src / lyscripts / integrate.py	init_ti_sampler	3	3	0	0%
src / lyscripts / integrate.py	IntegrateCLI.cli_cmd	17	17	0	0%
src / lyscripts / integrate.py	(no function)	26	2	0	92%
src / lyscripts / plots.py	floor_at_decimal	2	2	0	0%
src / lyscripts / plots.py	ceil_at_decimal	1	1	0	0%
src / lyscripts / plots.py	floor_to_step	1	0	0	100%
src / lyscripts / plots.py	ceil_to_step	1	0	0	100%
src / lyscripts / plots.py	clean_and_check	5	0	0	100%
src / lyscripts / plots.py	AbstractDistribution.label	1	0	0	100%
src / lyscripts / plots.py	Histogram.values	1	0	0	100%
src / lyscripts / plots.py	Histogram.from_hdf5	6	0	0	100%
src / lyscripts / plots.py	Histogram.left_percentile	1	0	0	100%
src / lyscripts / plots.py	Histogram.right_percentile	1	0	0	100%
src / lyscripts / plots.py	Histogram.draw	6	0	0	100%
src / lyscripts / plots.py	BetaPosterior.from_hdf5	9	2	0	78%
src / lyscripts / plots.py	BetaPosterior._get_label	1	0	0	100%
src / lyscripts / plots.py	BetaPosterior.num_fail	1	0	0	100%
src / lyscripts / plots.py	BetaPosterior.pdf	1	0	0	100%
src / lyscripts / plots.py	BetaPosterior.left_percentile	1	0	0	100%
src / lyscripts / plots.py	BetaPosterior.right_percentile	1	0	0	100%
src / lyscripts / plots.py	BetaPosterior.draw	8	0	0	100%
src / lyscripts / plots.py	get_size	8	0	0	100%
src / lyscripts / plots.py	get_label	6	0	0	100%
src / lyscripts / plots.py	get_xlims	5	0	0	100%
src / lyscripts / plots.py	draw	12	2	0	83%
src / lyscripts / plots.py	split_legends	10	10	0	0%
src / lyscripts / plots.py	use_mpl_stylesheet	1	1	0	0%
src / lyscripts / plots.py	save_figure	2	0	0	100%
src / lyscripts / plots.py	(no function)	68	0	6	100%
src / lyscripts / sample.py	CompletedItersColumn.__init__	2	0	0	100%
src / lyscripts / sample.py	CompletedItersColumn.render	3	1	0	67%
src / lyscripts / sample.py	ItersPerSecondColumn.render	4	0	0	100%
src / lyscripts / sample.py	AcorTime.update	2	0	0	100%
src / lyscripts / sample.py	AcorTime.relative_diff	1	0	0	100%
src / lyscripts / sample.py	NumAccepted.update	2	0	0	100%
src / lyscripts / sample.py	NumAccepted.newly_accepted	1	0	0	100%
src / lyscripts / sample.py	log_prob_fn	4	4	0	0%
src / lyscripts / sample.py	ensure_initial_state	7	0	0	100%
src / lyscripts / sample.py	ensure_history_table	3	1	0	67%
src / lyscripts / sample.py	update_history_table	5	1	0	80%
src / lyscripts / sample.py	is_converged	1	0	0	100%
src / lyscripts / sample.py	_get_columns	1	0	0	100%
src / lyscripts / sample.py	run_sampling	20	1	0	95%
src / lyscripts / sample.py	get_pool	1	0	0	100%
src / lyscripts / sample.py	init_sampler	3	0	0	100%
src / lyscripts / sample.py	SampleCLI.cli_cmd	12	0	0	100%
src / lyscripts / sample.py	(no function)	65	4	3	94%
src / lyscripts / schedule.py	ScheduleCLI.cli_cmd	3	3	0	0%
src / lyscripts / schedule.py	(no function)	8	2	0	75%
src / lyscripts / schema.py	main	2	2	0	0%
src / lyscripts / schema.py	(no function)	20	1	0	95%
src / lyscripts / utils.py	binom_pmf	6	1	0	83%
src / lyscripts / utils.py	get_dict_depth	7	0	0	100%
src / lyscripts / utils.py	delete_private_keys	7	0	0	100%
src / lyscripts / utils.py	flatten	8	0	0	100%
src / lyscripts / utils.py	unflatten	7	0	0	100%
src / lyscripts / utils.py	get_modalities_subset	7	2	0	71%
src / lyscripts / utils.py	load_patient_data	5	0	0	100%
src / lyscripts / utils.py	load_yaml_params	4	0	0	100%
src / lyscripts / utils.py	load_model_samples	4	0	0	100%
src / lyscripts / utils.py	get_hdf5_backend	6	2	0	67%
src / lyscripts / utils.py	(no function)	23	0	0	100%
Total		1735	496	15	71%

+ No items found using the specified filter. +

6 empty functions skipped.

+ + + diff --git a/htmlcov/index.html b/htmlcov/index.html new file mode 100644 index 0000000..c2b5d31 --- /dev/null +++ b/htmlcov/index.html @@ -0,0 +1,396 @@ + + + + + Coverage report + + + + + +

Coverage report: + 71% +

+ + +

+ Files + Functions + Classes +

+ coverage.py v7.13.5, + created at 2026-04-08 14:17 +0000 +

File	statements	missing	excluded	coverage
src / lyscripts / __init__.py	30	7	0	77%
src / lyscripts / __main__.py	3	3	0	0%
src / lyscripts / _version.py	11	0	0	100%
src / lyscripts / cli.py	45	26	0	42%
src / lyscripts / compute / __init__.py	5	1	0	80%
src / lyscripts / compute / __main__.py	5	5	0	0%
src / lyscripts / compute / evidence.py	65	41	0	37%
src / lyscripts / compute / posteriors.py	46	19	0	59%
src / lyscripts / compute / prevalences.py	82	7	0	91%
src / lyscripts / compute / priors.py	35	2	0	94%
src / lyscripts / compute / risks.py	51	33	0	35%
src / lyscripts / compute / utils.py	118	6	0	95%
src / lyscripts / configs.py	280	41	0	85%
src / lyscripts / data / __init__.py	6	1	0	83%
src / lyscripts / data / __main__.py	18	18	0	0%
src / lyscripts / data / collect / __init__.py	58	27	0	53%
src / lyscripts / data / enhance.py	19	6	0	68%
src / lyscripts / data / fetch.py	21	7	0	67%
src / lyscripts / data / filter.py	48	30	0	38%
src / lyscripts / data / generate.py	35	4	0	89%
src / lyscripts / data / join.py	22	10	0	55%
src / lyscripts / data / lyproxify.py	123	67	1	46%
src / lyscripts / data / split.py	29	14	0	52%
src / lyscripts / data / utils.py	9	0	0	100%
src / lyscripts / decorators.py	41	4	0	90%
src / lyscripts / evaluate.py	70	52	0	26%
src / lyscripts / integrate.py	46	22	0	52%
src / lyscripts / plots.py	160	18	9	89%
src / lyscripts / sample.py	137	12	5	91%
src / lyscripts / schedule.py	11	5	0	55%
src / lyscripts / schema.py	22	3	0	86%
src / lyscripts / utils.py	84	5	0	94%
Total	1735	496	15	71%

+ No items found using the specified filter. +

+ + + diff --git a/htmlcov/keybd_closed_cb_900cfef5.png b/htmlcov/keybd_closed_cb_900cfef5.png new file mode 100644 index 0000000..ba119c4 Binary files /dev/null and b/htmlcov/keybd_closed_cb_900cfef5.png differ diff --git a/htmlcov/status.json b/htmlcov/status.json new file mode 100644 index 0000000..6495e3e --- /dev/null +++ b/htmlcov/status.json @@ -0,0 +1 @@ +{"note":"This file is an internal implementation detail to speed up HTML report generation. Its format can change at any time. You might be looking for the JSON report: https://coverage.rtfd.io/cmd.html#cmd-json","format":5,"version":"7.13.5","globals":"50bf532352c807e4f5a5d7e355804c07","files":{"z_5bf5c588c698c6cc___init___py":{"hash":"a0eae5c6356ead96e996c23a7b93503e","index":{"url":"z_5bf5c588c698c6cc___init___py.html","file":"src/lyscripts/__init__.py","description":"","nums":{"precision":0,"n_files":1,"n_statements":30,"n_excluded":0,"n_missing":7,"n_branches":0,"n_partial_branches":0,"n_missing_branches":0}}},"z_5bf5c588c698c6cc___main___py":{"hash":"f0c35da943d35886efff34f2d0b2b729","index":{"url":"z_5bf5c588c698c6cc___main___py.html","file":"src/lyscripts/__main__.py","description":"","nums":{"precision":0,"n_files":1,"n_statements":3,"n_excluded":0,"n_missing":3,"n_branches":0,"n_partial_branches":0,"n_missing_branches":0}}},"z_5bf5c588c698c6cc__version_py":{"hash":"7dda3ad4c1b3f5d2a35e683e9bfb8ef5","index":{"url":"z_5bf5c588c698c6cc__version_py.html","file":"src/lyscripts/_version.py","description":"","nums":{"precision":0,"n_files":1,"n_statements":11,"n_excluded":0,"n_missing":0,"n_branches":0,"n_partial_branches":0,"n_missing_branches":0}}},"z_5bf5c588c698c6cc_cli_py":{"hash":"668153c451c926231097db51313ff65e","index":{"url":"z_5bf5c588c698c6cc_cli_py.html","file":"src/lyscripts/cli.py","description":"","nums":{"precision":0,"n_files":1,"n_statements":45,"n_excluded":0,"n_missing":26,"n_branches":0,"n_partial_branches":0,"n_missing_branches":0}}},"z_055061514423972c___init___py":{"hash":"3a51d37a0111360f944df234b225a1f3","index":{"url":"z_055061514423972c___init___py.html","file":"src/lyscripts/compute/__init__.py","description":"","nums":{"precision":0,"n_files":1,"n_statements":5,"n_excluded":0,"n_missing":1,"n_branches":0,"n_partial_branches":0,"n_missing_branches":0}}},"z_055061514423972c___main___py":{"hash":"f6b9779e777d0dfdc91f504f1fd62dd3","index":{"url":"z_055061514423972c___main___py.html","file":"src/lyscripts/compute/__main__.py","description":"","nums":{"precision":0,"n_files":1,"n_statements":5,"n_excluded":0,"n_missing":5,"n_branches":0,"n_partial_branches":0,"n_missing_branches":0}}},"z_055061514423972c_evidence_py":{"hash":"f38ea0f3323181b306eccc21072086be","index":{"url":"z_055061514423972c_evidence_py.html","file":"src/lyscripts/compute/evidence.py","description":"","nums":{"precision":0,"n_files":1,"n_statements":65,"n_excluded":0,"n_missing":41,"n_branches":0,"n_partial_branches":0,"n_missing_branches":0}}},"z_055061514423972c_posteriors_py":{"hash":"9d56db3d46c922d10b4a59078635c7d0","index":{"url":"z_055061514423972c_posteriors_py.html","file":"src/lyscripts/compute/posteriors.py","description":"","nums":{"precision":0,"n_files":1,"n_statements":46,"n_excluded":0,"n_missing":19,"n_branches":0,"n_partial_branches":0,"n_missing_branches":0}}},"z_055061514423972c_prevalences_py":{"hash":"f44fb75a19ae2bb65135ce161f3b1439","index":{"url":"z_055061514423972c_prevalences_py.html","file":"src/lyscripts/compute/prevalences.py","description":"","nums":{"precision":0,"n_files":1,"n_statements":82,"n_excluded":0,"n_missing":7,"n_branches":0,"n_partial_branches":0,"n_missing_branches":0}}},"z_055061514423972c_priors_py":{"hash":"938d7299721def68340eb93f0fa5d153","index":{"url":"z_055061514423972c_priors_py.html","file":"src/lyscripts/compute/priors.py","description":"","nums":{"precision":0,"n_files":1,"n_statements":35,"n_excluded":0,"n_missing":2,"n_branches":0,"n_partial_branches":0,"n_missing_branches":0}}},"z_055061514423972c_risks_py":{"hash":"a6edde0dd25e478508fb393a89f139db","index":{"url":"z_055061514423972c_risks_py.html","file":"src/lyscripts/compute/risks.py","description":"","nums":{"precision":0,"n_files":1,"n_statements":51,"n_excluded":0,"n_missing":33,"n_branches":0,"n_partial_branches":0,"n_missing_branches":0}}},"z_055061514423972c_utils_py":{"hash":"6d7fc5f29d69249558ae513bcb587ccf","index":{"url":"z_055061514423972c_utils_py.html","file":"src/lyscripts/compute/utils.py","description":"","nums":{"precision":0,"n_files":1,"n_statements":118,"n_excluded":0,"n_missing":6,"n_branches":0,"n_partial_branches":0,"n_missing_branches":0}}},"z_5bf5c588c698c6cc_configs_py":{"hash":"d306b60881f39f8b4861252ce4d39307","index":{"url":"z_5bf5c588c698c6cc_configs_py.html","file":"src/lyscripts/configs.py","description":"","nums":{"precision":0,"n_files":1,"n_statements":280,"n_excluded":0,"n_missing":41,"n_branches":0,"n_partial_branches":0,"n_missing_branches":0}}},"z_9b7bcb970ba14d6a___init___py":{"hash":"f4f0d10f0cbf3c6f765e47a9eb83a994","index":{"url":"z_9b7bcb970ba14d6a___init___py.html","file":"src/lyscripts/data/__init__.py","description":"","nums":{"precision":0,"n_files":1,"n_statements":6,"n_excluded":0,"n_missing":1,"n_branches":0,"n_partial_branches":0,"n_missing_branches":0}}},"z_9b7bcb970ba14d6a___main___py":{"hash":"3c6f736c0cc0f5d3969598e0fd308385","index":{"url":"z_9b7bcb970ba14d6a___main___py.html","file":"src/lyscripts/data/__main__.py","description":"","nums":{"precision":0,"n_files":1,"n_statements":18,"n_excluded":0,"n_missing":18,"n_branches":0,"n_partial_branches":0,"n_missing_branches":0}}},"z_f60392fe1c3f3e73___init___py":{"hash":"7bbd08c6a98bbc9b59ee7180b2d8f8d7","index":{"url":"z_f60392fe1c3f3e73___init___py.html","file":"src/lyscripts/data/collect/__init__.py","description":"","nums":{"precision":0,"n_files":1,"n_statements":58,"n_excluded":0,"n_missing":27,"n_branches":0,"n_partial_branches":0,"n_missing_branches":0}}},"z_9b7bcb970ba14d6a_enhance_py":{"hash":"7eb0de275f05bab9e742aa7b06f7ded3","index":{"url":"z_9b7bcb970ba14d6a_enhance_py.html","file":"src/lyscripts/data/enhance.py","description":"","nums":{"precision":0,"n_files":1,"n_statements":19,"n_excluded":0,"n_missing":6,"n_branches":0,"n_partial_branches":0,"n_missing_branches":0}}},"z_9b7bcb970ba14d6a_fetch_py":{"hash":"996d41c4e8650b42e888527c96338722","index":{"url":"z_9b7bcb970ba14d6a_fetch_py.html","file":"src/lyscripts/data/fetch.py","description":"","nums":{"precision":0,"n_files":1,"n_statements":21,"n_excluded":0,"n_missing":7,"n_branches":0,"n_partial_branches":0,"n_missing_branches":0}}},"z_9b7bcb970ba14d6a_filter_py":{"hash":"380c9d36e43c6972fef5ce0322fbe4eb","index":{"url":"z_9b7bcb970ba14d6a_filter_py.html","file":"src/lyscripts/data/filter.py","description":"","nums":{"precision":0,"n_files":1,"n_statements":48,"n_excluded":0,"n_missing":30,"n_branches":0,"n_partial_branches":0,"n_missing_branches":0}}},"z_9b7bcb970ba14d6a_generate_py":{"hash":"725be2543bd5c8bf07870ba0e3cdf681","index":{"url":"z_9b7bcb970ba14d6a_generate_py.html","file":"src/lyscripts/data/generate.py","description":"","nums":{"precision":0,"n_files":1,"n_statements":35,"n_excluded":0,"n_missing":4,"n_branches":0,"n_partial_branches":0,"n_missing_branches":0}}},"z_9b7bcb970ba14d6a_join_py":{"hash":"13f6699f19198c22acf18ea2d63f6a4b","index":{"url":"z_9b7bcb970ba14d6a_join_py.html","file":"src/lyscripts/data/join.py","description":"","nums":{"precision":0,"n_files":1,"n_statements":22,"n_excluded":0,"n_missing":10,"n_branches":0,"n_partial_branches":0,"n_missing_branches":0}}},"z_9b7bcb970ba14d6a_lyproxify_py":{"hash":"cd9f8c7d5ba2c119ccac811c50ac40ed","index":{"url":"z_9b7bcb970ba14d6a_lyproxify_py.html","file":"src/lyscripts/data/lyproxify.py","description":"","nums":{"precision":0,"n_files":1,"n_statements":123,"n_excluded":1,"n_missing":67,"n_branches":0,"n_partial_branches":0,"n_missing_branches":0}}},"z_9b7bcb970ba14d6a_split_py":{"hash":"cf5599673d7cf038df372bbac2f34ab2","index":{"url":"z_9b7bcb970ba14d6a_split_py.html","file":"src/lyscripts/data/split.py","description":"","nums":{"precision":0,"n_files":1,"n_statements":29,"n_excluded":0,"n_missing":14,"n_branches":0,"n_partial_branches":0,"n_missing_branches":0}}},"z_9b7bcb970ba14d6a_utils_py":{"hash":"47562fb32ecfa4d4c051afef7ae8f8de","index":{"url":"z_9b7bcb970ba14d6a_utils_py.html","file":"src/lyscripts/data/utils.py","description":"","nums":{"precision":0,"n_files":1,"n_statements":9,"n_excluded":0,"n_missing":0,"n_branches":0,"n_partial_branches":0,"n_missing_branches":0}}},"z_5bf5c588c698c6cc_decorators_py":{"hash":"55c95b251b06120a1c97fed481f46634","index":{"url":"z_5bf5c588c698c6cc_decorators_py.html","file":"src/lyscripts/decorators.py","description":"","nums":{"precision":0,"n_files":1,"n_statements":41,"n_excluded":0,"n_missing":4,"n_branches":0,"n_partial_branches":0,"n_missing_branches":0}}},"z_5bf5c588c698c6cc_evaluate_py":{"hash":"a2fb3646caa92a86359ea04cd90c86bb","index":{"url":"z_5bf5c588c698c6cc_evaluate_py.html","file":"src/lyscripts/evaluate.py","description":"","nums":{"precision":0,"n_files":1,"n_statements":70,"n_excluded":0,"n_missing":52,"n_branches":0,"n_partial_branches":0,"n_missing_branches":0}}},"z_5bf5c588c698c6cc_integrate_py":{"hash":"5987dd3311b377b80648f7faaca4c514","index":{"url":"z_5bf5c588c698c6cc_integrate_py.html","file":"src/lyscripts/integrate.py","description":"","nums":{"precision":0,"n_files":1,"n_statements":46,"n_excluded":0,"n_missing":22,"n_branches":0,"n_partial_branches":0,"n_missing_branches":0}}},"z_5bf5c588c698c6cc_plots_py":{"hash":"c06e31173b21aa971d4192b15f685430","index":{"url":"z_5bf5c588c698c6cc_plots_py.html","file":"src/lyscripts/plots.py","description":"","nums":{"precision":0,"n_files":1,"n_statements":160,"n_excluded":9,"n_missing":18,"n_branches":0,"n_partial_branches":0,"n_missing_branches":0}}},"z_5bf5c588c698c6cc_sample_py":{"hash":"d104057f40a0ea861e4ab620079418fe","index":{"url":"z_5bf5c588c698c6cc_sample_py.html","file":"src/lyscripts/sample.py","description":"","nums":{"precision":0,"n_files":1,"n_statements":137,"n_excluded":5,"n_missing":12,"n_branches":0,"n_partial_branches":0,"n_missing_branches":0}}},"z_5bf5c588c698c6cc_schedule_py":{"hash":"90b0e9ebdd566b0ef791c88191d1b955","index":{"url":"z_5bf5c588c698c6cc_schedule_py.html","file":"src/lyscripts/schedule.py","description":"","nums":{"precision":0,"n_files":1,"n_statements":11,"n_excluded":0,"n_missing":5,"n_branches":0,"n_partial_branches":0,"n_missing_branches":0}}},"z_5bf5c588c698c6cc_schema_py":{"hash":"233930f0d9cf4fecec0df2270557b97d","index":{"url":"z_5bf5c588c698c6cc_schema_py.html","file":"src/lyscripts/schema.py","description":"","nums":{"precision":0,"n_files":1,"n_statements":22,"n_excluded":0,"n_missing":3,"n_branches":0,"n_partial_branches":0,"n_missing_branches":0}}},"z_5bf5c588c698c6cc_utils_py":{"hash":"85633c7219eedea5024025cbba2ee1a8","index":{"url":"z_5bf5c588c698c6cc_utils_py.html","file":"src/lyscripts/utils.py","description":"","nums":{"precision":0,"n_files":1,"n_statements":84,"n_excluded":0,"n_missing":5,"n_branches":0,"n_partial_branches":0,"n_missing_branches":0}}}}} \ No newline at end of file diff --git a/htmlcov/style_cb_9ff733b0.css b/htmlcov/style_cb_9ff733b0.css new file mode 100644 index 0000000..5e304ce --- /dev/null +++ b/htmlcov/style_cb_9ff733b0.css @@ -0,0 +1,389 @@ +@charset "UTF-8"; +/* Licensed under the Apache License: http://www.apache.org/licenses/LICENSE-2.0 */ +/* For details: https://github.com/coveragepy/coveragepy/blob/main/NOTICE.txt */ +/* Don't edit this .css file. Edit the .scss file instead! */ +html, body, h1, h2, h3, p, table, td, th { margin: 0; padding: 0; border: 0; font-weight: inherit; font-style: inherit; font-size: 100%; font-family: inherit; vertical-align: baseline; } + +body { font-family: -apple-system, BlinkMacSystemFont, "Segoe UI", Roboto, Ubuntu, Cantarell, "Helvetica Neue", sans-serif; font-size: 1em; background: #fff; color: #000; } + +@media (prefers-color-scheme: dark) { body { background: #1e1e1e; } } + +@media (prefers-color-scheme: dark) { body { color: #eee; } } + +html > body { font-size: 16px; } + +a:active, a:focus { outline: 2px dashed #007acc; } + +p { font-size: .875em; line-height: 1.4em; } + +table { border-collapse: collapse; } + +td { vertical-align: top; } + +table tr.hidden { display: none !important; } + +p#no_rows { display: none; font-size: 1.15em; font-family: -apple-system, BlinkMacSystemFont, "Segoe UI", Roboto, Ubuntu, Cantarell, "Helvetica Neue", sans-serif; } + +a.nav { text-decoration: none; color: inherit; } + +a.nav:hover { text-decoration: underline; color: inherit; } + +.hidden { display: none; } + +header { background: #f8f8f8; width: 100%; z-index: 2; border-bottom: 1px solid #ccc; } + +@media (prefers-color-scheme: dark) { header { background: black; } } + +@media (prefers-color-scheme: dark) { header { border-color: #333; } } + +header .content { padding: 1rem 3.5rem; } + +header h2 { margin-top: .5em; font-size: 1em; } + +header h2 a.button { font-family: inherit; font-size: inherit; border: 1px solid; border-radius: .2em; background: #eee; color: inherit; text-decoration: none; padding: .1em .5em; margin: 1px calc(.1em + 1px); cursor: pointer; border-color: #ccc; } + +@media (prefers-color-scheme: dark) { header h2 a.button { background: #333; } } + +@media (prefers-color-scheme: dark) { header h2 a.button { border-color: #444; } } + +header h2 a.button.current { border: 2px solid; background: #fff; border-color: #999; cursor: default; } + +@media (prefers-color-scheme: dark) { header h2 a.button.current { background: #1e1e1e; } } + +@media (prefers-color-scheme: dark) { header h2 a.button.current { border-color: #777; } } + +header p.text { margin: .5em 0 -.5em; color: #666; font-style: italic; } + +@media (prefers-color-scheme: dark) { header p.text { color: #aaa; } } + +header.sticky { position: fixed; left: 0; right: 0; height: 2.5em; } + +header.sticky .text { display: none; } + +header.sticky h1, header.sticky h2 { font-size: 1em; margin-top: 0; display: inline-block; } + +header.sticky .content { padding: 0.5rem 3.5rem; } + +header.sticky .content p { font-size: 1em; } + +header.sticky ~ #source { padding-top: 6.5em; } + +main { position: relative; z-index: 1; } + +footer { margin: 1rem 3.5rem; } + +footer .content { padding: 0; color: #666; font-style: italic; } + +@media (prefers-color-scheme: dark) { footer .content { color: #aaa; } } + +#index { margin: 1rem 0 0 3.5rem; } + +h1 { font-size: 1.25em; display: inline-block; } + +#filter_container { float: right; margin: 0 2em 0 0; line-height: 1.66em; } + +#filter_container #filter { width: 10em; padding: 0.2em 0.5em; border: 2px solid #ccc; background: #fff; color: #000; } + +@media (prefers-color-scheme: dark) { #filter_container #filter { border-color: #444; } } + +@media (prefers-color-scheme: dark) { #filter_container #filter { background: #1e1e1e; } } + +@media (prefers-color-scheme: dark) { #filter_container #filter { color: #eee; } } + +#filter_container #filter:focus { border-color: #007acc; } + +#filter_container :disabled ~ label { color: #ccc; } + +@media (prefers-color-scheme: dark) { #filter_container :disabled ~ label { color: #444; } } + +#filter_container label { font-size: .875em; color: #666; } + +@media (prefers-color-scheme: dark) { #filter_container label { color: #aaa; } } + +header button { font-family: inherit; font-size: inherit; border: 1px solid; border-radius: .2em; background: #eee; color: inherit; text-decoration: none; padding: .1em .5em; margin: 1px calc(.1em + 1px); cursor: pointer; border-color: #ccc; } + +@media (prefers-color-scheme: dark) { header button { background: #333; } } + +@media (prefers-color-scheme: dark) { header button { border-color: #444; } } + +header button:active, header button:focus { outline: 2px dashed #007acc; } + +header button.run { background: #eeffee; } + +@media (prefers-color-scheme: dark) { header button.run { background: #373d29; } } + +header button.run.show_run { background: #dfd; border: 2px solid #00dd00; margin: 0 .1em; } + +@media (prefers-color-scheme: dark) { header button.run.show_run { background: #373d29; } } + +header button.mis { background: #ffeeee; } + +@media (prefers-color-scheme: dark) { header button.mis { background: #4b1818; } } + +header button.mis.show_mis { background: #fdd; border: 2px solid #ff0000; margin: 0 .1em; } + +@media (prefers-color-scheme: dark) { header button.mis.show_mis { background: #4b1818; } } + +header button.exc { background: #f7f7f7; } + +@media (prefers-color-scheme: dark) { header button.exc { background: #333; } } + +header button.exc.show_exc { background: #eee; border: 2px solid #808080; margin: 0 .1em; } + +@media (prefers-color-scheme: dark) { header button.exc.show_exc { background: #333; } } + +header button.par { background: #ffffd5; } + +@media (prefers-color-scheme: dark) { header button.par { background: #650; } } + +header button.par.show_par { background: #ffa; border: 2px solid #bbbb00; margin: 0 .1em; } + +@media (prefers-color-scheme: dark) { header button.par.show_par { background: #650; } } + +#help_panel, #source p .annotate.long { display: none; position: absolute; z-index: 999; background: #ffffcc; border: 1px solid #888; border-radius: .2em; color: #333; padding: .25em .5em; } + +#source p .annotate.long { white-space: normal; float: right; top: 1.75em; right: 1em; height: auto; } + +#help_panel_wrapper { float: right; position: relative; } + +#keyboard_icon { margin: 5px; } + +#help_panel_state { display: none; } + +#help_panel { top: 25px; right: 0; padding: .75em; border: 1px solid #883; color: #333; } + +#help_panel .keyhelp p { margin-top: .75em; } + +#help_panel .legend { font-style: italic; margin-bottom: 1em; } + +.indexfile #help_panel { width: 25em; } + +.pyfile #help_panel { width: 18em; } + +#help_panel_state:checked ~ #help_panel { display: block; } + +kbd { border: 1px solid black; border-color: #888 #333 #333 #888; padding: .1em .35em; font-family: SFMono-Regular, Menlo, Monaco, Consolas, monospace; font-weight: bold; background: #eee; border-radius: 3px; } + +#source { padding: 1em 0 1em 3.5rem; font-family: SFMono-Regular, Menlo, Monaco, Consolas, monospace; } + +#source p { position: relative; white-space: pre; } + +#source p * { box-sizing: border-box; } + +#source p .n { float: left; text-align: right; width: 3.5rem; box-sizing: border-box; margin-left: -3.5rem; padding-right: 1em; color: #999; user-select: none; } + +@media (prefers-color-scheme: dark) { #source p .n { color: #777; } } + +#source p .n.highlight { background: #ffdd00; } + +#source p .n a { scroll-margin-top: 6em; text-decoration: none; color: #999; } + +@media (prefers-color-scheme: dark) { #source p .n a { color: #777; } } + +#source p .n a:hover { text-decoration: underline; color: #999; } + +@media (prefers-color-scheme: dark) { #source p .n a:hover { color: #777; } } + +#source p .t { display: inline-block; width: 100%; box-sizing: border-box; margin-left: -.5em; padding-left: 0.3em; border-left: 0.2em solid #fff; } + +@media (prefers-color-scheme: dark) { #source p .t { border-color: #1e1e1e; } } + +#source p .t:hover { background: #f2f2f2; } + +@media (prefers-color-scheme: dark) { #source p .t:hover { background: #282828; } } + +#source p .t:hover ~ .r .annotate.long { display: block; } + +#source p .t .com { color: #008000; font-style: italic; line-height: 1px; } + +@media (prefers-color-scheme: dark) { #source p .t .com { color: #6a9955; } } + +#source p .t .key { font-weight: bold; line-height: 1px; } + +#source p .t .str, #source p .t .fst { color: #0451a5; } + +@media (prefers-color-scheme: dark) { #source p .t .str, #source p .t .fst { color: #9cdcfe; } } + +#source p.mis .t { border-left: 0.2em solid #ff0000; } + +#source p.mis.show_mis .t { background: #fdd; } + +@media (prefers-color-scheme: dark) { #source p.mis.show_mis .t { background: #4b1818; } } + +#source p.mis.show_mis .t:hover { background: #f2d2d2; } + +@media (prefers-color-scheme: dark) { #source p.mis.show_mis .t:hover { background: #532323; } } + +#source p.mis.mis2 .t { border-left: 0.2em dotted #ff0000; } + +#source p.mis.mis2.show_mis .t { background: #ffeeee; } + +@media (prefers-color-scheme: dark) { #source p.mis.mis2.show_mis .t { background: #351b1b; } } + +#source p.mis.mis2.show_mis .t:hover { background: #f2d2d2; } + +@media (prefers-color-scheme: dark) { #source p.mis.mis2.show_mis .t:hover { background: #532323; } } + +#source p.run .t { border-left: 0.2em solid #00dd00; } + +#source p.run.show_run .t { background: #dfd; } + +@media (prefers-color-scheme: dark) { #source p.run.show_run .t { background: #373d29; } } + +#source p.run.show_run .t:hover { background: #d2f2d2; } + +@media (prefers-color-scheme: dark) { #source p.run.show_run .t:hover { background: #404633; } } + +#source p.run.run2 .t { border-left: 0.2em dotted #00dd00; } + +#source p.run.run2.show_run .t { background: #eeffee; } + +@media (prefers-color-scheme: dark) { #source p.run.run2.show_run .t { background: #2b2e24; } } + +#source p.run.run2.show_run .t:hover { background: #d2f2d2; } + +@media (prefers-color-scheme: dark) { #source p.run.run2.show_run .t:hover { background: #404633; } } + +#source p.exc .t { border-left: 0.2em solid #808080; } + +#source p.exc.show_exc .t { background: #eee; } + +@media (prefers-color-scheme: dark) { #source p.exc.show_exc .t { background: #333; } } + +#source p.exc.show_exc .t:hover { background: #e2e2e2; } + +@media (prefers-color-scheme: dark) { #source p.exc.show_exc .t:hover { background: #3c3c3c; } } + +#source p.exc.exc2 .t { border-left: 0.2em dotted #808080; } + +#source p.exc.exc2.show_exc .t { background: #f7f7f7; } + +@media (prefers-color-scheme: dark) { #source p.exc.exc2.show_exc .t { background: #292929; } } + +#source p.exc.exc2.show_exc .t:hover { background: #e2e2e2; } + +@media (prefers-color-scheme: dark) { #source p.exc.exc2.show_exc .t:hover { background: #3c3c3c; } } + +#source p.par .t { border-left: 0.2em solid #bbbb00; } + +#source p.par.show_par .t { background: #ffa; } + +@media (prefers-color-scheme: dark) { #source p.par.show_par .t { background: #650; } } + +#source p.par.show_par .t:hover { background: #f2f2a2; } + +@media (prefers-color-scheme: dark) { #source p.par.show_par .t:hover { background: #6d5d0c; } } + +#source p.par.par2 .t { border-left: 0.2em dotted #bbbb00; } + +#source p.par.par2.show_par .t { background: #ffffd5; } + +@media (prefers-color-scheme: dark) { #source p.par.par2.show_par .t { background: #423a0f; } } + +#source p.par.par2.show_par .t:hover { background: #f2f2a2; } + +@media (prefers-color-scheme: dark) { #source p.par.par2.show_par .t:hover { background: #6d5d0c; } } + +#source p .r { position: absolute; top: 0; right: 2.5em; font-family: -apple-system, BlinkMacSystemFont, "Segoe UI", Roboto, Ubuntu, Cantarell, "Helvetica Neue", sans-serif; } + +#source p .annotate { font-family: -apple-system, BlinkMacSystemFont, "Segoe UI", Roboto, Ubuntu, Cantarell, "Helvetica Neue", sans-serif; color: #666; padding-right: .5em; } + +@media (prefers-color-scheme: dark) { #source p .annotate { color: #ddd; } } + +#source p .annotate.short:hover ~ .long { display: block; } + +#source p .annotate.long { width: 30em; right: 2.5em; } + +#source p input { display: none; } + +#source p input ~ .r label.ctx { cursor: pointer; border-radius: .25em; } + +#source p input ~ .r label.ctx::before { content: "▶ "; } + +#source p input ~ .r label.ctx:hover { background: #e8f4ff; color: #666; } + +@media (prefers-color-scheme: dark) { #source p input ~ .r label.ctx:hover { background: #0f3a42; } } + +@media (prefers-color-scheme: dark) { #source p input ~ .r label.ctx:hover { color: #aaa; } } + +#source p input:checked ~ .r label.ctx { background: #d0e8ff; color: #666; border-radius: .75em .75em 0 0; padding: 0 .5em; margin: -.25em 0; } + +@media (prefers-color-scheme: dark) { #source p input:checked ~ .r label.ctx { background: #056; } } + +@media (prefers-color-scheme: dark) { #source p input:checked ~ .r label.ctx { color: #aaa; } } + +#source p input:checked ~ .r label.ctx::before { content: "▼ "; } + +#source p input:checked ~ .ctxs { padding: .25em .5em; overflow-y: scroll; max-height: 10.5em; } + +#source p label.ctx { color: #999; display: inline-block; padding: 0 .5em; font-size: .8333em; } + +@media (prefers-color-scheme: dark) { #source p label.ctx { color: #777; } } + +#source p .ctxs { display: block; max-height: 0; overflow-y: hidden; transition: all .2s; padding: 0 .5em; font-family: -apple-system, BlinkMacSystemFont, "Segoe UI", Roboto, Ubuntu, Cantarell, "Helvetica Neue", sans-serif; white-space: nowrap; background: #d0e8ff; border-radius: .25em; margin-right: 1.75em; text-align: right; } + +@media (prefers-color-scheme: dark) { #source p .ctxs { background: #056; } } + +#index { font-family: SFMono-Regular, Menlo, Monaco, Consolas, monospace; font-size: 0.875em; } + +#index table.index { margin-left: -.5em; } + +#index td, #index th { text-align: right; vertical-align: baseline; padding: .25em .5em; border-bottom: 1px solid #eee; } + +@media (prefers-color-scheme: dark) { #index td, #index th { border-color: #333; } } + +#index td.name, #index th.name { text-align: left; width: auto; font-family: -apple-system, BlinkMacSystemFont, "Segoe UI", Roboto, Ubuntu, Cantarell, "Helvetica Neue", sans-serif; min-width: 15em; } + +#index td.left, #index th.left { text-align: left; } + +#index td.spacer, #index th.spacer { border: none; padding: 0; } + +#index td.spacer:hover, #index th.spacer:hover { background: inherit; } + +#index th { font-family: -apple-system, BlinkMacSystemFont, "Segoe UI", Roboto, Ubuntu, Cantarell, "Helvetica Neue", sans-serif; font-style: italic; color: #333; border-color: #ccc; cursor: pointer; } + +@media (prefers-color-scheme: dark) { #index th { color: #ddd; } } + +@media (prefers-color-scheme: dark) { #index th { border-color: #444; } } + +#index th:hover { background: #eee; } + +@media (prefers-color-scheme: dark) { #index th:hover { background: #333; } } + +#index th .arrows { color: #666; font-size: 85%; font-family: sans-serif; font-style: normal; pointer-events: none; } + +#index th[aria-sort="ascending"], #index th[aria-sort="descending"] { white-space: nowrap; background: #eee; padding-left: .5em; } + +@media (prefers-color-scheme: dark) { #index th[aria-sort="ascending"], #index th[aria-sort="descending"] { background: #333; } } + +#index th[aria-sort="ascending"] .arrows::after { content: " ▲"; } + +#index th[aria-sort="descending"] .arrows::after { content: " ▼"; } + +#index tr.grouphead th { cursor: default; font-style: normal; border-color: #999; } + +@media (prefers-color-scheme: dark) { #index tr.grouphead th { border-color: #777; } } + +#index td.name { font-size: 1.15em; } + +#index td.name a { text-decoration: none; color: inherit; } + +#index td.name .no-noun { font-style: italic; } + +#index tr.total td, #index tr.total_dynamic td { font-weight: bold; border-bottom: none; } + +#index tr.region:hover { background: #eee; } + +@media (prefers-color-scheme: dark) { #index tr.region:hover { background: #333; } } + +#index tr.region:hover td.name { text-decoration: underline; color: inherit; } + +#scroll_marker { position: fixed; z-index: 3; right: 0; top: 0; width: 16px; height: 100%; background: #fff; border-left: 1px solid #eee; will-change: transform; } + +@media (prefers-color-scheme: dark) { #scroll_marker { background: #1e1e1e; } } + +@media (prefers-color-scheme: dark) { #scroll_marker { border-color: #333; } } + +#scroll_marker .marker { background: #ccc; position: absolute; min-height: 3px; width: 100%; } + +@media (prefers-color-scheme: dark) { #scroll_marker .marker { background: #444; } } diff --git a/htmlcov/z_055061514423972c___init___py.html b/htmlcov/z_055061514423972c___init___py.html new file mode 100644 index 0000000..c199605 --- /dev/null +++ b/htmlcov/z_055061514423972c___init___py.html @@ -0,0 +1,119 @@ + + + + + Coverage for src/lyscripts/compute/__init__.py: 80% + + + + + +

+ Coverage for src / lyscripts / compute / init.py: + 80% +

+ + + Show/hide keyboard shortcuts

+ +

Shortcuts on this page

+ r + m + x + toggle line displays +

+ j + k + next/prev highlighted chunk +

+ 0 (zero) top of page +

+ 1 (one) first highlighted chunk +

+ [ + ] + prev/next file +

+ u up to the index +

+ ? show/hide this help +

+ 5 statements + + + +

+ « prev + ^ index + » next + + coverage.py v7.13.5, + created at 2026-04-08 14:17 +0000 +

+ +

1"""Commands to compute prior and posterior state distributions from model samples.

3This can in turn speed up the computation of risks and prevalences.

4"""

6from pydantic_settings import BaseSettings, CliApp, CliSubCommand

8from lyscripts.compute import posteriors, prevalences, priors, risks, evidence

11class ComputeCLI(BaseSettings):

12 """Compute priors, posteriors, risks, prevalences and model evidence from model samples."""

14 priors: CliSubCommand[priors.PriorsCLI]

15 posteriors: CliSubCommand[posteriors.PosteriorsCLI]

16 risks: CliSubCommand[risks.RisksCLI]

17 prevalences: CliSubCommand[prevalences.PrevalencesCLI]

18 evidence: CliSubCommand[evidence.EvidenceCLI]

20 def cli_cmd(self) -> None:

21 """Start the ``compute`` subcommand."""

22 CliApp.run_subcommand(self)

+ + + diff --git a/htmlcov/z_055061514423972c___main___py.html b/htmlcov/z_055061514423972c___main___py.html new file mode 100644 index 0000000..95e3718 --- /dev/null +++ b/htmlcov/z_055061514423972c___main___py.html @@ -0,0 +1,105 @@ + + + + + Coverage for src/lyscripts/compute/__main__.py: 0% + + + + + +

+ Coverage for src / lyscripts / compute / main.py: + 0% +

+ + + Show/hide keyboard shortcuts

+ +

Shortcuts on this page

+ r + m + x + toggle line displays +

+ j + k + next/prev highlighted chunk +

+ 0 (zero) top of page +

+ 1 (one) first highlighted chunk +

+ [ + ] + prev/next file +

+ u up to the index +

+ ? show/hide this help +

+ 5 statements + + + +

+ « prev + ^ index + » next + + coverage.py v7.13.5, + created at 2026-04-08 14:17 +0000 +

+ +

1"""Run the compute module as a script."""

3from lyscripts.cli import assemble_main

4from lyscripts.compute import ComputeCLI

6if __name__ == "__main__":

7 main = assemble_main(settings_cls=ComputeCLI, prog_name="compute")

8 main()

+ + + diff --git a/htmlcov/z_055061514423972c_evidence_py.html b/htmlcov/z_055061514423972c_evidence_py.html new file mode 100644 index 0000000..77e8409 --- /dev/null +++ b/htmlcov/z_055061514423972c_evidence_py.html @@ -0,0 +1,294 @@ + + + + + Coverage for src/lyscripts/compute/evidence.py: 37% + + + + + +

+ Coverage for src / lyscripts / compute / evidence.py: + 37% +

+ + + Show/hide keyboard shortcuts

+ +

Shortcuts on this page

+ r + m + x + toggle line displays +

+ j + k + next/prev highlighted chunk +

+ 0 (zero) top of page +

+ 1 (one) first highlighted chunk +

+ [ + ] + prev/next file +

+ u up to the index +

+ ? show/hide this help +

+ 65 statements + + + +

+ « prev + ^ index + » next + + coverage.py v7.13.5, + created at 2026-04-08 14:17 +0000 +

+ +

1"""Compute the model evidence from MCMC samples.

3Given the samples drawn during thermodynamic integration and their respective log

4likelihoods, compute the model log evidence and the Bayesian Information Criterion.

5"""

7from __future__ import annotations

9import json

10from pathlib import Path

12import emcee

13import h5py

14import numpy as np

15import pandas as pd

16from loguru import logger

17from pydantic import Field

18from scipy.integrate import trapezoid

20from lyscripts.cli import assemble_main

21from lyscripts.configs import (

22 BaseCLI,

23 DataConfig,

24 SamplingConfig,

25 ScheduleConfig,

26)

28RNG = np.random.default_rng()

31def comp_bic(log_probs: np.ndarray, num_params: int, num_data: int) -> float:

32 r"""Compute the negative one half of the Bayesian Information Criterion (BIC).

34 The BIC is defined as [^1]

35 $$ BIC = k \\ln{n} - 2 \\ln{\\hat{L}} $$

36 where $k$ is the number of parameters ``num_params``, $n$ the number of datapoints

37 ``num_data`` and $\\hat{L}$ the maximum likelihood estimate of the ``log_prob``.

38 It is constructed such that the following is an

39 approximation of the model evidence:

40 $$ p(D \\mid m) \\approx \\exp{\\left( - BIC / 2 \\right)} $$

41 which is why this function returns the negative one half of it.

43 [^1]: https://en.wikipedia.org/wiki/Bayesian_information_criterion

44 """

45 return np.max(log_probs) - num_params * np.log(num_data) / 2.0

48def compute_evidence(

49 temp_schedule: np.ndarray,

50 log_probs: np.ndarray,

51) -> float:

52 """Compute the evidence.

54 Given a ``temp_schedule`` of inverse temperatures and corresponding sets of

55 ``log_probs``, we calculate the mean ``log_prob`` over all samples to approximate

56 the expectation value under the corresponding power posterior for each step in the

57 ``temp_schedule``. The evidence is evaluated using trapezoidal integration of the

58 expectation values over the ``temp_schedule``.

59 """

60 a_mc = np.mean(log_probs, axis=1)

61 return trapezoid(y=a_mc, x=temp_schedule)

64def compute_ti_results(

65 settings: EvidenceCLI,

66 temp_schedule: np.ndarray,

67 metrics: dict,

68 ndim: int,

69 h5_file: Path,

70) -> tuple[np.ndarray, np.ndarray]:

71 """Compute the results in case of a thermodynamic integration run."""

72 num_temps = len(temp_schedule)

74 if num_temps != len(h5_file["ti"]):

75 raise RuntimeError(

76 f"Parameters suggest temp schedule of length {num_temps}, "

77 f"but stored are {len(h5_file['ti'])}",

78 )

80 nwalker = ndim * settings.sampling.walkers_per_dim

81 nsteps = settings.sampling.num_steps

82 ti_log_probs = np.zeros(shape=(num_temps, nsteps * nwalker))

84 for i, run in enumerate(h5_file["ti"]):

85 reader = emcee.backends.HDFBackend(

86 settings.sampling.storage_file,

87 name=f"ti/{run}",

88 read_only=True,

89 )

90 ti_log_probs[i] = reader.get_blobs(flat=True)["log_prob"]

92 evidence = compute_evidence(temp_schedule, ti_log_probs)

93 metrics["evidence"] = evidence

95 return temp_schedule, ti_log_probs

98class EvidenceCLI(BaseCLI):

99 """Compute model evidence from thermodynamic integration samples."""

100

101 data: DataConfig

102 sampling: SamplingConfig

103 schedule: ScheduleConfig = Field(

104 description="Configuration for generating inverse temperature schedule.",

105 )

106 plots: Path = Field(

107 default="./plots",

108 description="Directory for storing plots.",

109 )

110 metrics: Path = Field(

111 default="./metrics.json",

112 description="Path to metrics file.",

113 )

114

115 def cli_cmd(self) -> None:

116 """Start the ``evidence`` subcommand.

117

118 Given the MCMC samples from thermodynamic integration provided by the

119 ``sampling`` argument and the corresponding inverse temperature schedule,

120 specified in the ``schedule`` argument, the model evidence is computed using

121 the functions :py:func:`compute_ti_results` and :py:func`compute_evidence`.

122 Further the BIC is evaluated.

123 """

124 data = self.data.load()

125

126 metrics = {}

127

128 temp_schedule = self.schedule.get_schedule()

129

130 with h5py.File(self.sampling.storage_file, mode="r") as h5_file:

131 # Get ndim from the HDF5 backend

132 backend = emcee.backends.HDFBackend(

133 self.sampling.storage_file,

134 read_only=True,

135 name=self.sampling.dataset,

136 )

137 ndim = backend.shape[1]

138 logger.info(f"Inferred {ndim} parameters from stored samples")

139

140 # if TI has been performed, compute the evidence

141 if "ti" in h5_file:

142 temp_schedule, ti_log_probs = compute_ti_results(

143 settings=self,

144 temp_schedule=temp_schedule,

145 metrics=metrics,

146 ndim=ndim,

147 h5_file=h5_file,

148 )

149

150 logger.info(

151 "Computed results of thermodynamic integration with "

152 f"{len(temp_schedule)} steps",

153 )

154

155 # store inverse temperatures and log-probs in CSV file

156 self.plots.parent.mkdir(parents=True, exist_ok=True)

157

158 beta_vs_accuracy = pd.DataFrame(

159 np.array(

160 [

161 temp_schedule,

162 np.mean(ti_log_probs, axis=1),

163 np.std(ti_log_probs, axis=1),

164 ],

165 ).T,

166 columns=["β", "accuracy", "std"],

167 )

168 beta_vs_accuracy.to_csv(self.plots, index=False)

169 logger.info(f"Plotted β vs accuracy at {self.plots}")

170

171 # use blobs, because also for TI, this is the unscaled log-prob

172 final_log_probs = backend.get_blobs()["log_prob"]

173 logger.info(

174 f"Opened samples from emcee backend from {self.sampling.storage_file}",

175 )

176

177 # store metrics in JSON file

178 self.metrics.parent.mkdir(parents=True, exist_ok=True)

179 self.metrics.touch(exist_ok=True)

180

181 metrics["BIC"] = comp_bic(

182 log_probs=final_log_probs,

183 num_params=ndim,

184 num_data=len(data),

185 )

186 metrics["max_llh"] = np.max(final_log_probs)

187 metrics["mean_llh"] = np.mean(final_log_probs)

188

189 with open(self.metrics, mode="w", encoding="utf-8") as metrics_file:

190 json.dump(metrics, metrics_file)

191

192 logger.info(f"Wrote out metrics to {self.metrics}")

193

194

195if __name__ == "__main__":

196 main = assemble_main(settings_cls=EvidenceCLI, prog_name="compute evidence")

197 main()

+ + + diff --git a/htmlcov/z_055061514423972c_posteriors_py.html b/htmlcov/z_055061514423972c_posteriors_py.html new file mode 100644 index 0000000..8deb162 --- /dev/null +++ b/htmlcov/z_055061514423972c_posteriors_py.html @@ -0,0 +1,239 @@ + + + + + Coverage for src/lyscripts/compute/posteriors.py: 59% + + + + + +

+ Coverage for src / lyscripts / compute / posteriors.py: + 59% +

+ + + Show/hide keyboard shortcuts

+ +

Shortcuts on this page

+ r + m + x + toggle line displays +

+ j + k + next/prev highlighted chunk +

+ 0 (zero) top of page +

+ 1 (one) first highlighted chunk +

+ [ + ] + prev/next file +

+ u up to the index +

+ ? show/hide this help +

+ 46 statements + + + +

+ « prev + ^ index + » next + + coverage.py v7.13.5, + created at 2026-04-08 14:17 +0000 +

+ +

1"""Compute posterior state distributions.

3The posteriors are computed from drawn samples for a list of defined scenarios. If

4priors have already been computed from the samples and the ``--cache_dir`` argument

5is the same as during that computation, the priors will automatically be loaded from

6the cache.

7"""

9from typing import Literal

11import numpy as np

12from loguru import logger

13from lymph import models

14from pydantic import Field

15from rich import progress

17from lyscripts.cli import assemble_main

18from lyscripts.compute.priors import compute_priors

19from lyscripts.compute.utils import BaseComputeCLI, HDF5FileStorage, get_cached

20from lyscripts.configs import (

21 DistributionConfig,

22 GraphConfig,

23 ModalityConfig,

24 ModelConfig,

25 add_distributions,

26 add_modalities,

27 construct_model,

28)

29from lyscripts.utils import console

32def compute_posteriors(

33 model_config: ModelConfig,

34 graph_config: GraphConfig,

35 dist_configs: dict[str, DistributionConfig],

36 modality_configs: dict[str, ModalityConfig],

37 priors: np.ndarray,

38 diagnosis: dict[Literal["ipsi", "contra"], dict],

39 midext: bool | None = None,

40 mode: Literal["HMM", "BN"] = "HMM",

41 progress_desc: str = "Computing posteriors from priors",

42) -> np.ndarray:

43 """Compute posterior state distributions from ``priors``.

45 This calls the ``model`` method :py:meth:`~lymph.types.Model.posterior_state_dist`

46 for each of the pre-computed ``priors``, given the specified ``diagnosis`` pattern.

48 For the :py:class:`~lymph.models.Midline` model, the ``midext`` argument can be

49 used to specify whether the midline extension is present or not.

50 """

51 model = construct_model(model_config, graph_config)

52 model = add_distributions(model, dist_configs)

53 model = add_modalities(model, modality_configs)

54 posteriors = []

55 kwargs = {"midext": midext} if isinstance(model, models.Midline) else {}

57 if isinstance(model, models.Unilateral | models.HPVUnilateral):

58 diagnosis = diagnosis.get("ipsi")

60 for prior in progress.track(

61 sequence=priors,

62 description=progress_desc,

63 total=len(priors),

64 console=console,

65 ):

66 posteriors.append(

67 model.posterior_state_dist(

68 given_state_dist=prior,

69 given_diagnosis=diagnosis,

70 mode=mode,

71 **kwargs,

72 ),

73 )

75 return np.stack(posteriors)

78class PosteriorsCLI(BaseComputeCLI):

79 """Compute posterior state distributions for different diagnosis scenarios."""

81 modalities: dict[str, ModalityConfig] = Field(

82 default={},

83 description=(

84 "Maps names of diagnostic modalities to their specificity/sensitivity."

85 ),

86 )

87 posteriors: HDF5FileStorage = Field(

88 description="Storage for the computed posteriors.",

89 )

91 def cli_cmd(self) -> None:

92 """Start the ``posteriors`` subcommand.

94 This will compute the posterior state distributions, given a personalized

95 diagnosis pattern, for each of the scenarios provided to the command.

96 """

97 logger.debug(self.model_dump_json(indent=2))

99 global_attrs = self.model_dump(

100 include={"model", "graph", "distributions", "modalities"},

101 )

102 self.posteriors.set_attrs(attrs=global_attrs, dataset="/")

103

104 samples = self.sampling.load()

105 cached_compute_priors = get_cached(compute_priors, self.cache_dir)

106 cached_compute_posteriors = get_cached(compute_posteriors, self.cache_dir)

107 num_scens = len(self.scenarios)

108

109 for i, scenario in enumerate(self.scenarios):

110 _fields = {"t_stages", "t_stages_dist", "mode"}

111 prior_kwargs = scenario.model_dump(include=_fields)

112

113 _priors = cached_compute_priors(

114 model_config=self.model,

115 graph_config=self.graph,

116 dist_configs=self.distributions,

117 samples=samples,

118 progress_desc=f"Computing priors for scenario {i + 1}/{num_scens}",

119 **prior_kwargs,

120 )

121

122 _fields = {"diagnosis", "midext", "mode"}

123 posterior_kwargs = scenario.model_dump(include=_fields)

124

125 posteriors = cached_compute_posteriors(

126 model_config=self.model,

127 graph_config=self.graph,

128 dist_configs=self.distributions,

129 modality_configs=self.modalities,

130 priors=_priors,

131 progress_desc=f"Computing posteriors for scenario {i + 1}/{num_scens}",

132 **posterior_kwargs,

133 )

134

135 self.posteriors.save(values=posteriors, dataset=f"{i:03d}")

136 self.posteriors.set_attrs(attrs=prior_kwargs, dataset=f"{i:03d}")

137 self.posteriors.set_attrs(attrs=posterior_kwargs, dataset=f"{i:03d}")

138

139

140if __name__ == "__main__":

141 main = assemble_main(settings_cls=PosteriorsCLI, prog_name="compute posteriors")

142 main()

+ + + diff --git a/htmlcov/z_055061514423972c_prevalences_py.html b/htmlcov/z_055061514423972c_prevalences_py.html new file mode 100644 index 0000000..fd439b4 --- /dev/null +++ b/htmlcov/z_055061514423972c_prevalences_py.html @@ -0,0 +1,333 @@ + + + + + Coverage for src/lyscripts/compute/prevalences.py: 91% + + + + + +

+ Coverage for src / lyscripts / compute / prevalences.py: + 91% +

+ + + Show/hide keyboard shortcuts

+ +

Shortcuts on this page

+ r + m + x + toggle line displays +

+ j + k + next/prev highlighted chunk +

+ 0 (zero) top of page +

+ 1 (one) first highlighted chunk +

+ [ + ] + prev/next file +

+ u up to the index +

+ ? show/hide this help +

+ 82 statements + + + +

+ « prev + ^ index + » next + + coverage.py v7.13.5, + created at 2026-04-08 14:17 +0000 +

+ +

1"""Prevalence prediction module.

3This computes the prevalence of an observed involvement pattern, given a trained model.

4It can also compare this prediction to the observed prevalence in the data. As for the

5risk prediction, this uses caching and computes the priors first.

6"""

8from collections.abc import Callable

9from typing import Literal

11import lydata # noqa: F401

12import numpy as np

13import pandas as pd

14from loguru import logger

15from lydata import C, Q

16from lydata.accessor import QueryPortion

17from lydata.querier import NoneQ

18from lydata.utils import is_old

19from lymph import models

20from pydantic import Field

21from rich import progress

23from lyscripts.cli import assemble_main

24from lyscripts.compute.priors import compute_priors

25from lyscripts.compute.utils import (

26 BaseComputeCLI,

27 HDF5FileStorage,

28 get_cached,

29)

30from lyscripts.configs import (

31 DataConfig,

32 DiagnosisConfig,

33 DistributionConfig,

34 GraphConfig,

35 ModalityConfig,

36 ModelConfig,

37 ScenarioConfig,

38 add_distributions,

39 add_modalities,

40 construct_model,

41)

42from lyscripts.utils import console

45def compute_prevalences(

46 model_config: ModelConfig,

47 graph_config: GraphConfig,

48 dist_configs: dict[str, DistributionConfig],

49 modality_configs: dict[str, ModalityConfig],

50 priors: np.ndarray,

51 diagnosis: dict[Literal["ipsi", "contra"], dict],

52 midext: bool | None = None,

53 progress_desc: str = "Computing prevalences from priors",

54) -> np.ndarray:

55 """Compute the prevalence of a diagnosis given the priors and the model."""

56 model = construct_model(model_config, graph_config)

57 model = add_distributions(model, dist_configs)

59 if len(modality_configs) != 1:

60 msg = "Only one modality is supported for prevalence prediction."

61 logger.error(msg)

62 raise ValueError(msg)

64 model = add_modalities(model, modality_configs)

65 prevalences = []

66 kwargs = {"midext": midext} if isinstance(model, models.Midline) else {}

68 for prior in progress.track(

69 sequence=priors,

70 description=progress_desc,

71 total=len(priors),

72 console=console,

73 ):

74 obs_dist = model.obs_dist(given_state_dist=prior)

75 involvement = {

76 side: diagnosis.get(side).get(next(iter(modality_configs)))

77 for side in ["ipsi", "contra"]

78 }

80 if isinstance(model, models.Unilateral | models.HPVUnilateral):

81 involvement = involvement.get("ipsi")

83 prevalence = model.marginalize(

84 given_state_dist=obs_dist,

85 involvement=involvement,

86 **kwargs,

87 )

89 if isinstance(model, models.Midline):

90 # In this case, we need to renormalize the prevalence by the marginalized

91 # probability of all states with midline extension. We must do this, because

92 # we compute the analogous quantity for the data. In principle, we could

93 # also compute the prevalence of the diagnosis *and* midline extension, but

94 # we have decided to compute the diagnosis *given* midline extension.

95 # https://github.com/lycosystem/lyscripts/blob/ea49ec/lyscripts/compute/prevalences.py#L217-L225

96 midext_prob = model.marginalize(

97 involvement=None,

98 given_state_dist=obs_dist,

99 **kwargs,

100 )

101 prevalence /= midext_prob

102

103 prevalences.append(prevalence)

104

105 return np.stack(prevalences)

106

107

108def generate_query_from_diagnosis(diagnosis: DiagnosisConfig) -> Q:

109 """Transform a diagnosis into a query for the data."""

110 result = NoneQ()

111 for side in ["ipsi", "contra"]:

112 for modality, pattern in getattr(diagnosis, side, {}).items():

113 for lnl, value in pattern.items():

114 column = (modality, side, lnl)

115 result &= C(column) == value

116 return result

117

118

119def observe_prevalence(

120 data: pd.DataFrame,

121 scenario_config: ScenarioConfig,

122 mapping: dict[int, str] | Callable[[int], str] | None = None,

123) -> QueryPortion:

124 """Extract prevalence defined in a ``scenario`` from the ``data``.

125

126 ``mapping`` defines how the T-stages in the data are supposed to be mapped to the

127 T-stages defined in the ``scenario``.

128

129 It returns the number of patients that match the given scenario and the total

130 number of patients that are considered. E.g., in the example below we 79 patients

131 are of late T-stage and have a tumor extending over the midline. Of those, 30 were

132 diagnosed with contralateral involvement in LNL II based on a CT scan.

133

134 >>> data = next(lydata.load_datasets(year=2021, institution="usz"))

135 >>> scenario_config = ScenarioConfig(

136 ... t_stages=["late"],

137 ... midext=True,

138 ... diagnosis=DiagnosisConfig(contra={"CT": {"II": True}}),

139 ... )

140 >>> observe_prevalence(data, scenario_config)

141 QueryPortion(match=np.int64(7), total=np.int64(79))

142 """

143 mapping = mapping or DataConfig.model_fields["mapping"].default_factory()

144 key = ("tumor", "1", "t_stage") if is_old(data) else ("tumor", "core", "t_stage")

145 data[key] = data.ly.t_stage.map(mapping)

146

147 has_t_stage = C("t_stage").isin(scenario_config.t_stages)

148 if scenario_config.midext is None:

149 has_midext = NoneQ()

150 else:

151 has_midext = C("midext") == scenario_config.midext

152

153 # Note that below we compute the prevalence of the diagnosis *given* midline

154 # extension. This means, that when computing the prevalence of the diagnosis in

155 # the model, we need to renormalize by diving by the probability of midline

156 # extension. For an older - but pretty surely correct - implementation see

157 # https://github.com/lycosystem/lyscripts/blob/ea49ec/lyscripts/compute/prevalences.py#L217-L225

158 return data.ly.portion(

159 query=generate_query_from_diagnosis(scenario_config.diagnosis),

160 given=has_t_stage & has_midext,

161 )

162

163

164class PrevalencesCLI(BaseComputeCLI):

165 """Predict the prevalence of an involvement pattern from model samples."""

166

167 modalities: dict[str, ModalityConfig] = Field(

168 default={},

169 description=(

170 "Maps names of diagnostic modalities to their specificity/sensitivity."

171 ),

172 )

173 prevalences: HDF5FileStorage = Field(

174 description="Storage for the computed prevalences.",

175 )

176 data: DataConfig

177

178 def cli_cmd(self) -> None:

179 """Start the ``prevalences`` subcommand."""

180 logger.debug(self.model_dump_json(indent=2))

181 global_attrs = self.model_dump(

182 include={"model", "graph", "distributions", "modalities"},

183 )

184 self.prevalences.set_attrs(attrs=global_attrs, dataset="/")

185

186 samples = self.sampling.load()

187 cached_compute_priors = get_cached(compute_priors, self.cache_dir)

188 cached_compute_prevalences = get_cached(compute_prevalences, self.cache_dir)

189 num_scens = len(self.scenarios)

190

191 for i, scenario in enumerate(self.scenarios):

192 _fields = {"t_stages", "t_stages_dist", "mode"}

193 prior_kwargs = scenario.model_dump(include=_fields)

194

195 _priors = cached_compute_priors(

196 model_config=self.model,

197 graph_config=self.graph,

198 dist_configs=self.distributions,

199 samples=samples,

200 progress_desc=f"Computing priors for scenario {i + 1}/{num_scens}",

201 **prior_kwargs,

202 )

203

204 _fields = {"diagnosis", "midext"}

205 prevalence_kwargs = scenario.model_dump(include=_fields)

206

207 prevalences = cached_compute_prevalences(

208 model_config=self.model,

209 graph_config=self.graph,

210 dist_configs=self.distributions,

211 modality_configs=self.modalities,

212 priors=_priors,

213 progress_desc=f"Computing prevalences for scenario {i + 1}/{num_scens}",

214 **prevalence_kwargs,

215 )

216

217 portion = observe_prevalence(

218 data=self.data.load(),

219 scenario_config=scenario,

220 mapping=self.data.mapping,

221 )

222 self.prevalences.save(values=prevalences, dataset=f"{i:03d}")

223 self.prevalences.set_attrs(attrs=prior_kwargs, dataset=f"{i:03d}")

224 self.prevalences.set_attrs(attrs=prevalence_kwargs, dataset=f"{i:03d}")

225 self.prevalences.set_attrs(

226 attrs={

227 "num_match": portion.match,

228 "num_total": portion.total,

229 },

230 dataset=f"{i:03d}",

231 )

232

233

234if __name__ == "__main__":

235 main = assemble_main(settings_cls=PrevalencesCLI, prog_name="compute prevalences")

236 main()

+ + + diff --git a/htmlcov/z_055061514423972c_priors_py.html b/htmlcov/z_055061514423972c_priors_py.html new file mode 100644 index 0000000..0ed3cff --- /dev/null +++ b/htmlcov/z_055061514423972c_priors_py.html @@ -0,0 +1,208 @@ + + + + + Coverage for src/lyscripts/compute/priors.py: 94% + + + + + +

+ Coverage for src / lyscripts / compute / priors.py: + 94% +

+ + + Show/hide keyboard shortcuts

+ +

Shortcuts on this page

+ r + m + x + toggle line displays +

+ j + k + next/prev highlighted chunk +

+ 0 (zero) top of page +

+ 1 (one) first highlighted chunk +

+ [ + ] + prev/next file +

+ u up to the index +

+ ? show/hide this help +

+ 35 statements + + + +

+ « prev + ^ index + » next + + coverage.py v7.13.5, + created at 2026-04-08 14:17 +0000 +

+ +

1"""Given samples drawn during an MCMC round, compute the (prior) state distributions.

3This is done for each sample and for a list of specified scenarios. The computation is

4cached at a location specified by the ``--cache_dir`` argument using ``joblib``.

5"""

7from typing import Literal

9import numpy as np

10from loguru import logger

11from pydantic import Field

12from rich import progress

14from lyscripts.cli import assemble_main

15from lyscripts.compute.utils import BaseComputeCLI, HDF5FileStorage, get_cached

16from lyscripts.configs import (

17 DistributionConfig,

18 GraphConfig,

19 ModelConfig,

20 add_distributions,

21 construct_model,

22)

23from lyscripts.utils import console

26def compute_priors(

27 model_config: ModelConfig,

28 graph_config: GraphConfig,

29 dist_configs: dict[str, DistributionConfig],

30 samples: np.ndarray,

31 t_stages: list[int | str],

32 t_stages_dist: list[float],

33 mode: Literal["HMM", "BN"] = "HMM",

34 progress_desc: str = "Computing priors from samples",

35) -> np.ndarray:

36 """Compute prior state distributions from the ``samples`` for the ``model``.

38 This will call the ``model`` method :py:meth:`~lymph.types.Model.state_dist`

39 for each of the ``samples``. The prior state distributions are computed for

40 each of the ``t_stages`` and marginalized over using the ``t_stages_dist``.

41 """

42 model = construct_model(model_config, graph_config)

43 model = add_distributions(model, dist_configs)

44 priors = []

46 for sample in progress.track(

47 sequence=samples,

48 description=progress_desc,

49 total=len(samples),

50 console=console,

51 ):

52 model.set_params(*sample)

53 priors.append(

54 sum(

55 model.state_dist(t_stage=t, mode=mode) * p

56 for t, p in zip(t_stages, t_stages_dist, strict=False)

57 ),

58 )

60 return np.stack(priors)

63class PriorsCLI(BaseComputeCLI):

64 """Compute the prior state distributions from MCMC samples."""

66 priors: HDF5FileStorage = Field(description="Storage for the computed priors.")

68 def cli_cmd(self) -> None:

69 """Start the ``priors`` subcommand.

71 Given a ``graph``, ``model``, ``distributions`` over diagnosis times, and

72 MCMC samples loaded from the ``sampling`` argument, this command computes the

73 prior state distributions for each of the specified ``scenarios``.

75 Precomputing these state distributions is useful, because they largely only

76 depend on T-stage and not on the diagnosis or involvement of interest. Hence,

77 computing the :py:mod:`~lyscripts.compute.posteriors` and

78 :py:mod:`~lyscripts.compute.risks` can be sped up.

80 Note that this command will use `joblib`_ to cache its computations.

82 .. _joblib: https://joblib.readthedocs.io/

83 """

84 logger.debug(self.model_dump_json(indent=2))

85 global_attrs = self.model_dump(include={"model", "graph", "distributions"})

86 self.priors.set_attrs(attrs=global_attrs, dataset="/")

88 samples = self.sampling.load()

89 cached_compute_priors = get_cached(compute_priors, self.cache_dir)

90 num_scenarios = len(self.scenarios)

92 for i, scenario in enumerate(self.scenarios):

93 _fields = {"t_stages", "t_stages_dist", "mode"}

94 prior_kwargs = scenario.model_dump(include=_fields)

96 priors = cached_compute_priors(

97 model_config=self.model,

98 graph_config=self.graph,

99 dist_configs=self.distributions,

100 samples=samples,

101 progress_desc=f"Computing priors for scenario {i + 1}/{num_scenarios}",

102 **prior_kwargs,

103 )

104

105 self.priors.save(values=priors, dataset=f"{i:03d}")

106 self.priors.set_attrs(attrs=prior_kwargs, dataset=f"{i:03d}")

107

108

109if __name__ == "__main__":

110 main = assemble_main(settings_cls=PriorsCLI, prog_name="compute priors")

111 main()

+ + + diff --git a/htmlcov/z_055061514423972c_risks_py.html b/htmlcov/z_055061514423972c_risks_py.html new file mode 100644 index 0000000..8386873 --- /dev/null +++ b/htmlcov/z_055061514423972c_risks_py.html @@ -0,0 +1,237 @@ + + + + + Coverage for src/lyscripts/compute/risks.py: 35% + + + + + +

+ Coverage for src / lyscripts / compute / risks.py: + 35% +

+ + + Show/hide keyboard shortcuts

+ +

Shortcuts on this page

+ r + m + x + toggle line displays +

+ j + k + next/prev highlighted chunk +

+ 0 (zero) top of page +

+ 1 (one) first highlighted chunk +

+ [ + ] + prev/next file +

+ u up to the index +

+ ? show/hide this help +

+ 51 statements + + + +

+ « prev + ^ index + » next + + coverage.py v7.13.5, + created at 2026-04-08 14:17 +0000 +

+ +

1"""Predict risks of involvements for scenarios using drawn MCMC samples.

3As the priors and posteriors, this computation, too, uses caching and may skip the

4computation of these two initial steps if the cache directory is the same as during

5their computation.

6"""

8from typing import Literal

10import numpy as np

11from loguru import logger

12from lymph import models

13from pydantic import Field

14from rich import progress

16from lyscripts.cli import assemble_main

17from lyscripts.compute.posteriors import compute_posteriors

18from lyscripts.compute.priors import compute_priors

19from lyscripts.compute.utils import BaseComputeCLI, HDF5FileStorage, get_cached

20from lyscripts.configs import (

21 DistributionConfig,

22 GraphConfig,

23 ModalityConfig,

24 ModelConfig,

25 add_distributions,

26 add_modalities,

27 construct_model,

28)

29from lyscripts.utils import console

32def compute_risks(

33 model_config: ModelConfig,

34 graph_config: GraphConfig,

35 dist_configs: dict[str, DistributionConfig],

36 modality_configs: dict[str, ModalityConfig],

37 posteriors: np.ndarray,

38 involvement: dict[Literal["ipsi", "contra"], dict],

39 progress_desc: str = "Computing risks from posteriors",

40) -> np.ndarray:

41 """Compute the risk of ``involvement`` from each of the ``posteriors``.

43 Essentially, this only calls the model's :py:meth:`lymph.models.Model.marginalize`

44 method, as nothing more is necessary than to marginalize the full posterior state

45 distribution over the states that correspond to the involvement of interest.

46 """

47 model = construct_model(model_config, graph_config)

48 model = add_distributions(model, dist_configs)

49 model = add_modalities(model, modality_configs)

50 risks = []

52 if isinstance(model, models.Unilateral | models.HPVUnilateral):

53 involvement = involvement.get("ipsi")

55 for posterior in progress.track(

56 sequence=posteriors,

57 description=progress_desc,

58 total=len(posteriors),

59 console=console,

60 ):

61 risks.append(

62 model.marginalize(involvement=involvement, given_state_dist=posterior),

63 )

65 return np.stack(risks)

68class RisksCLI(BaseComputeCLI):

69 """Predict the risk of involvement scenarios from model samples given diagnoses."""

71 modalities: dict[str, ModalityConfig] = Field(

72 default={},

73 description=(

74 "Maps names of diagnostic modalities to their specificity/sensitivity."

75 ),

76 )

77 risks: HDF5FileStorage = Field(description="Storage for the computed risks.")

79 def cli_cmd(self) -> None:

80 """Start the ``risks`` subcommand."""

81 logger.debug(self.model_dump_json(indent=2))

82 global_attrs = self.model_dump(

83 include={"model", "graph", "distributions", "modalities"},

84 )

85 self.risks.set_attrs(attrs=global_attrs, dataset="/")

87 samples = self.sampling.load()

88 cached_compute_priors = get_cached(compute_priors, self.cache_dir)

89 cached_compute_posteriors = get_cached(compute_posteriors, self.cache_dir)

90 cached_compute_risks = get_cached(compute_risks, self.cache_dir)

91 num_scens = len(self.scenarios)

93 for i, scenario in enumerate(self.scenarios):

94 _fields = {"t_stages", "t_stages_dist", "mode"}

95 prior_kwargs = scenario.model_dump(include=_fields)

97 _priors = cached_compute_priors(

98 model_config=self.model,

99 graph_config=self.graph,

100 dist_configs=self.distributions,

101 samples=samples,

102 progress_desc=f"Computing priors for scenario {i + 1}/{num_scens}",

103 **prior_kwargs,

104 )

105

106 _fields = {"diagnosis", "midext", "mode"}

107 posterior_kwargs = scenario.model_dump(include=_fields)

108

109 _posteriors = cached_compute_posteriors(

110 model_config=self.model,

111 graph_config=self.graph,

112 dist_configs=self.distributions,

113 modality_configs=self.modalities,

114 priors=_priors,

115 progress_desc=f"Computing posteriors for scenario {i + 1}/{num_scens}",

116 **posterior_kwargs,

117 )

118

119 _fields = {"involvement"}

120 risk_kwargs = scenario.model_dump(include=_fields)

121

122 risks = cached_compute_risks(

123 model_config=self.model,

124 graph_config=self.graph,

125 dist_configs=self.distributions,

126 modality_configs=self.modalities,

127 posteriors=_posteriors,

128 progress_desc=f"Computing risks for scenario {i + 1}/{num_scens}",

129 **risk_kwargs,

130 )

131

132 self.risks.save(values=risks, dataset=f"{i:03d}")

133 self.risks.set_attrs(attrs=prior_kwargs, dataset=f"{i:03d}")

134 self.risks.set_attrs(attrs=posterior_kwargs, dataset=f"{i:03d}")

135 self.risks.set_attrs(attrs=risk_kwargs, dataset=f"{i:03d}")

136

137

138if __name__ == "__main__":

139 main = assemble_main(settings_cls=RisksCLI, prog_name="compute risks")

140 main()

+ + + diff --git a/htmlcov/z_055061514423972c_utils_py.html b/htmlcov/z_055061514423972c_utils_py.html new file mode 100644 index 0000000..e842741 --- /dev/null +++ b/htmlcov/z_055061514423972c_utils_py.html @@ -0,0 +1,373 @@ + + + + + Coverage for src/lyscripts/compute/utils.py: 95% + + + + + +

+ Coverage for src / lyscripts / compute / utils.py: + 95% +

+ + + Show/hide keyboard shortcuts

+ +

Shortcuts on this page

+ r + m + x + toggle line displays +

+ j + k + next/prev highlighted chunk +

+ 0 (zero) top of page +

+ 1 (one) first highlighted chunk +

+ [ + ] + prev/next file +

+ u up to the index +

+ ? show/hide this help +

+ 118 statements + + + +

+ « prev + ^ index + » next + + coverage.py v7.13.5, + created at 2026-04-08 14:17 +0000 +

+ +

1"""Utilities for precomputing the priors and posteriors."""

3import ast

4import functools

5from pathlib import Path

6from typing import Annotated, Any

8import h5py

9import numpy as np

10from joblib import Memory

11from loguru import logger

12from pydantic import AfterValidator, BaseModel, Field

14from lyscripts.configs import (

15 BaseCLI,

16 DistributionConfig,

17 GraphConfig,

18 ModelConfig,

19 SamplingConfig,

20 ScenarioConfig,

21)

24class BaseComputeCLI(BaseCLI):

25 """Common command line settings for the submodule ``compute``."""

27 graph: GraphConfig

28 model: ModelConfig = ModelConfig()

29 distributions: dict[str, DistributionConfig] = Field(

30 default={},

31 description=(

32 "Mapping of model T-categories to predefined distributions over "

33 "diagnose times."

34 ),

35 )

36 cache_dir: Path = Field(

37 default=Path.cwd() / ".cache",

38 description="Cache directory for storing function calls.",

39 )

40 scenarios: list[ScenarioConfig] = Field(

41 default=[],

42 description="List of scenarios to compute risks for.",

43 )

44 sampling: SamplingConfig

47def is_hdf5_compatible(value: Any) -> bool:

48 """Check if the given ``value`` can be stored in an HDF5 file."""

49 return isinstance(

50 value,

52 )

55def to_hdf5_attrs(mapping: dict[str, Any]) -> dict[str, str]:

56 """Convert ``attrs`` to a dictionary of HDF5 compatible attributes or strings."""

57 res = {}

58 for key, val in mapping.items():

59 if is_hdf5_compatible(val):

60 res[key] = val

61 else:

62 res[key] = str(val)

63 return res

66def from_hdf5_attrs(mapping: h5py.AttributeManager) -> dict[str, Any]:

67 """Convert the HDF5 attributes to a dictionary of Python objects."""

68 attrs = {}

69 for key, value in mapping.items():

70 try:

71 attrs[key] = ast.literal_eval(value)

72 except ValueError:

73 attrs[key] = value

74 return attrs

77def extract_modalities(diagnosis: dict[str, Any]) -> set[str]:

78 """Get the set of modalities used in the ``diagnosis``.

80 This is not used in the main apps anymore, but since it may be useful, I keep it.

82 >>> diagnosis = {

83 ... "ipsi": {

84 ... "MRI": {"II": True, "III": False},

85 ... "PET": {"II": False, "III": True},

86 ... },

87 ... "contra": {"MRI": {"II": False, "III": None}},

88 ... }

89 >>> sorted(extract_modalities(diagnosis))

90 ['MRI', 'PET']

91 """

92 modality_set = set()

94 if "ipsi" not in diagnosis and "contra" not in diagnosis:

95 return modality_set | set(diagnosis.keys())

97 for side in ["ipsi", "contra"]:

98 if side in diagnosis:

99 modality_set |= set(diagnosis[side].keys())

100

101 return modality_set

102

103

104def ensure_parent_dir(path: Path) -> Path:

105 """Create the parent directory of the given ``path``."""

106 path.parent.mkdir(parents=True, exist_ok=True)

107 logger.debug(f"Ensured parent directory of {path}")

108 return path

109

110

111HasParentPath = Annotated[Path, AfterValidator(ensure_parent_dir)]

112"""Type hint for path whose parent dir is created if it doesn't exist."""

113

114

115class HDF5FileStorage(BaseModel):

116 """HDF5 file storage for in- and outputs of computations."""

117

118 file: HasParentPath = Field(

119 description="Path to the HDF5 file. Parent directories are created if needed.",

120 )

121 dataset: str | None = Field(

122 default=None,

123 description=(

124 "Name of the dataset in the HDF5 file. Save/load methods can override this."

125 ),

126 )

127

128 def _get_dataset(self) -> str:

129 """Get attribute ``dataset`` or the first dataset in the file.

130

131 >>> from tempfile import TemporaryDirectory

132 >>> tmp_path = Path(TemporaryDirectory().name) / "test.hdf5"

133 >>> storage = HDF5FileStorage(file=tmp_path)

134 >>> rand_data = np.random.rand(100, 100)

135 >>> storage.save(values=rand_data, dataset="test")

136 >>> np.all(storage.load(dataset="test") == rand_data)

137 np.True_

138 >>> np.all(storage.load() == rand_data) # loads first dataset

139 np.True_

140 >>> some_attrs = {"key": "value"}

141 >>> storage.set_attrs(attrs=some_attrs, dataset="test")

142 >>> storage.get_attrs(dataset="test")

143 {'key': 'value'}

144 """

145 if self.dataset is not None:

146 return self.dataset

147

148 with h5py.File(self.file, "r") as file:

149 return next(iter(file.keys()))

150

151 def load(self, dataset: str | None = None) -> np.ndarray:

152 """Load the dataset with the name ``dataset``."""

153 dataset = dataset or self._get_dataset()

154

155 with h5py.File(self.file, "r") as file:

156 array = file[dataset][()]

157

158 logger.debug(f"Loaded dataset {dataset} from {self.file}")

159 return array

160

161 def get_attrs(self, dataset: str | None = None) -> dict[str, Any]:

162 """Get the attributes of the dataset ``dataset``."""

163 dataset = dataset or self._get_dataset()

164

165 with h5py.File(self.file, "r") as file:

166 attrs = from_hdf5_attrs(file[dataset].attrs)

167

168 logger.debug(f"Loaded attrs for dataset '{dataset}' from {self.file}")

169 return attrs

170

171 def save(self, values: np.ndarray, dataset: str | None = None) -> None:

172 """Set the ``values`` for the ``dataset`` dataset."""

173 dataset = dataset or self._get_dataset()

174

175 with h5py.File(self.file, "a") as file:

176 if dataset in file:

177 del file[dataset]

178 file[dataset] = values

179

180 logger.debug(f"Stored dataset {dataset} in {self.file}")

181

182 def set_attrs(self, attrs: dict[str, Any], dataset: str | None = None) -> None:

183 """Update the ``attrs`` for the ``dataset`` dataset."""

184 dataset = dataset or self._get_dataset()

185

186 with h5py.File(self.file, "a") as file:

187 if dataset not in file:

188 raise ValueError(f"Dataset '{dataset}' not found in {self.file}")

189 file[dataset].attrs.update(to_hdf5_attrs(attrs))

190

191 logger.debug(f"Stored attrs {attrs} for dataset '{dataset}' in {self.file}")

192

193

194def reduce_pattern(pattern: dict[str, dict[str, bool]]) -> dict[str, dict[str, bool]]:

195 """Reduce a ``pattern`` by removing all entries that are ``None``.

196

197 This way, it should be completely recoverable by the ``complete_pattern`` function

198 but be shorter to store.

199

200 Unused but maybe useful for some cases. Keeping it in here for now.

201

202 >>> full = {

203 ... "ipsi": {"I": None, "II": True, "III": None},

204 ... "contra": {"I": None, "II": None, "III": None},

205 ... }

206 >>> reduce_pattern(full)

207 {'ipsi': {'II': True}}

208

209 """

210 tmp_pattern = pattern.copy()

211 reduced_pattern = {}

212 for side in ["ipsi", "contra"]:

213 if not all(v is None for v in tmp_pattern[side].values()):

214 reduced_pattern[side] = {}

215 for lnl, val in tmp_pattern[side].items():

216 if val is not None:

217 reduced_pattern[side][lnl] = val

218

219 return reduced_pattern

220

221

222def complete_pattern(

223 pattern: dict[str, dict[str, bool]] | None,

224 lnls: list[str],

225) -> dict[str, dict[str, bool]]:

226 """Make sure the provided involvement ``pattern`` is correct.

227

228 For each side of the neck, and for each of the ``lnls`` this should in the end

229 contain ``True``, ``False`` or ``None``.

230

231 Unused but maybe useful for some cases. Keeping it in here for now.

232

233 >>> pattern = {"ipsi": {"II": True}}

234 >>> lnls = ["II", "III"]

235 >>> complete_pattern(pattern, lnls)

236 {'ipsi': {'II': True, 'III': None}, 'contra': {'II': None, 'III': None}}

237

238 """

239 if pattern is None:

240 pattern = {}

241

242 for side in ["ipsi", "contra"]:

243 if side not in pattern:

244 pattern[side] = {}

245

246 for lnl in lnls:

247 if lnl not in pattern[side]:

248 pattern[side][lnl] = None

249 elif pattern[side][lnl] is None:

250 continue

251 else:

252 pattern[side][lnl] = bool(pattern[side][lnl])

253

254 return pattern

255

256

257def get_cached(func: callable, cache_dir: Path) -> callable:

258 """Return cached ``func`` with a cache at ``cache_dir``."""

259 memory = Memory(location=cache_dir, verbose=0)

260 cached_func = memory.cache(func, ignore=["progress_desc"])

261 logger.info(f"Initialized cache for {func.__name__} at {cache_dir}")

262

263 @functools.wraps(func)

264 def log_cache_info_wrapper(*args, **kwargs):

265 logger.debug(f"Calling {func.__name__}({args}, {kwargs})")

266 if cached_func.check_call_in_cache(*args, **kwargs):

267 logger.info(f"Cache hit for {func.__name__}, returning stored result")

268 else:

269 logger.info(f"Cache miss for {func.__name__}, computing result")

270

271 result = cached_func(*args, **kwargs)

272 logger.debug(f"Computed {result = }")

273 return result

274

275 log_cache_info_wrapper._cached_func = cached_func

276 return log_cache_info_wrapper

+ + + diff --git a/htmlcov/z_5bf5c588c698c6cc___init___py.html b/htmlcov/z_5bf5c588c698c6cc___init___py.html new file mode 100644 index 0000000..fbb5bfa --- /dev/null +++ b/htmlcov/z_5bf5c588c698c6cc___init___py.html @@ -0,0 +1,173 @@ + + + + + Coverage for src/lyscripts/__init__.py: 77% + + + + + +

+ Coverage for src / lyscripts / init.py: + 77% +

+ + + Show/hide keyboard shortcuts

+ +

Shortcuts on this page

+ r + m + x + toggle line displays +

+ j + k + next/prev highlighted chunk +

+ 0 (zero) top of page +

+ 1 (one) first highlighted chunk +

+ [ + ] + prev/next file +

+ u up to the index +

+ ? show/hide this help +

+ 30 statements + + + +

+ « prev + ^ index + » next + + coverage.py v7.13.5, + created at 2026-04-08 14:17 +0000 +

+ +

1"""Initial entry point for the lyscripts package and CLIs.

3This top-level module configures and provides the top-level CLI through which all

4subcommands can be accessed.

5"""

7import sys

8from typing import Literal

10import pandas as pd

11from loguru import logger

12from pydantic import Field

13from pydantic_settings import (

14 BaseSettings,

15 CliApp,

16 CliImplicitFlag,

17 CliSubCommand,

18)

20from lyscripts import compute, data, integrate, sample, schedule # noqa: F401

21from lyscripts._version import version

22from lyscripts.cli import assemble_main, configure_logging

23from lyscripts.utils import console

25__version__ = version

26__description__ = "Package to interact with lymphatic progression data and models."

27__author__ = "Roman Ludwig"

28__email__ = "gygqdstu3@mozmail.com"

29__uri__ = "https://github.com/lycosystem/lyscripts"

31# activate copy on write in pandas.

32# See https://pandas.pydata.org/docs/user_guide/copy_on_write.html

33pd.options.mode.copy_on_write = True

35logger.disable("lyscripts")

38class LyscriptsCLI(BaseSettings):

39 """A CLI to interact with lymphatic progression data and models."""

41 version: CliImplicitFlag[bool] = Field(

42 default=False,

43 description="Display the version of lyscripts and exit.",

44 )

45 log_level: Literal["DEBUG", "INFO", "WARNING", "ERROR", "CRITICAL"] = Field(

46 default="INFO",

47 description="Set the log level of the lyscripts CLI.",

48 )

50 data: CliSubCommand[data.DataCLI]

51 sample: CliSubCommand[sample.SampleCLI]

52 compute: CliSubCommand[compute.ComputeCLI]

53 schedule: CliSubCommand[schedule.ScheduleCLI]

54 integrate: CliSubCommand[integrate.IntegrateCLI]

56 def __init__(self, **kwargs):

57 """Add logging configuration to the lyscripts CLI."""

58 configure_logging(argv=sys.argv, console=console)

59 super().__init__(**kwargs)

61 def cli_cmd(self) -> None:

62 """Start the main lyscripts CLI.

64 If the ``version`` flag is set, the version of lyscripts is displayed and the

65 program exits. Otherwise, the lyscripts CLI runs one of the subcommands.

66 """

67 logger.debug("Starting lyscripts CLI.")

69 if self.version:

70 logger.info(f"lyscripts {__version__}")

71 return

73 CliApp.run_subcommand(self)

76main = assemble_main(settings_cls=LyscriptsCLI, prog_name="lyscripts")

+ + + diff --git a/htmlcov/z_5bf5c588c698c6cc___main___py.html b/htmlcov/z_5bf5c588c698c6cc___main___py.html new file mode 100644 index 0000000..b0ade06 --- /dev/null +++ b/htmlcov/z_5bf5c588c698c6cc___main___py.html @@ -0,0 +1,103 @@ + + + + + Coverage for src/lyscripts/__main__.py: 0% + + + + + +

+ Coverage for src / lyscripts / main.py: + 0% +

+ + + Show/hide keyboard shortcuts

+ +

Shortcuts on this page

+ r + m + x + toggle line displays +

+ j + k + next/prev highlighted chunk +

+ 0 (zero) top of page +

+ 1 (one) first highlighted chunk +

+ [ + ] + prev/next file +

+ u up to the index +

+ ? show/hide this help +

+ 3 statements + + + +

+ « prev + ^ index + » next + + coverage.py v7.13.5, + created at 2026-04-08 14:17 +0000 +

+ +

1"""Utility for common tasks w.r.t. inference & prediction using `lymph` package."""

3from lyscripts import main

5if __name__ == "__main__":

6 main()

+ + + diff --git a/htmlcov/z_5bf5c588c698c6cc__version_py.html b/htmlcov/z_5bf5c588c698c6cc__version_py.html new file mode 100644 index 0000000..d0d4a1a --- /dev/null +++ b/htmlcov/z_5bf5c588c698c6cc__version_py.html @@ -0,0 +1,121 @@ + + + + + Coverage for src/lyscripts/_version.py: 100% + + + + + +

+ Coverage for src / lyscripts / _version.py: + 100% +

+ + + Show/hide keyboard shortcuts

+ +

Shortcuts on this page

+ r + m + x + toggle line displays +

+ j + k + next/prev highlighted chunk +

+ 0 (zero) top of page +

+ 1 (one) first highlighted chunk +

+ [ + ] + prev/next file +

+ u up to the index +

+ ? show/hide this help +

+ 11 statements + + + +

+ « prev + ^ index + » next + + coverage.py v7.13.5, + created at 2026-04-08 14:17 +0000 +

+ +

1# file generated by vcs-versioning

2# don't change, don't track in version control

3from __future__ import annotations

5__all__ = [

6 "__version__",

7 "__version_tuple__",

8 "version",

9 "version_tuple",

10 "__commit_id__",

11 "commit_id",

12]

14version: str

15__version__: str

16__version_tuple__: tuple[int | str, ...]

17version_tuple: tuple[int | str, ...]

18commit_id: str | None

19__commit_id__: str | None

21__version__ = version = '0.1.dev1'

22__version_tuple__ = version_tuple = (0, 1, 'dev1')

24__commit_id__ = commit_id = 'g77f1c18c7'

+ + + diff --git a/htmlcov/z_5bf5c588c698c6cc_cli_py.html b/htmlcov/z_5bf5c588c698c6cc_cli_py.html new file mode 100644 index 0000000..ab2f74f --- /dev/null +++ b/htmlcov/z_5bf5c588c698c6cc_cli_py.html @@ -0,0 +1,220 @@ + + + + + Coverage for src/lyscripts/cli.py: 42% + + + + + +

+ Coverage for src / lyscripts / cli.py: + 42% +

+ + + Show/hide keyboard shortcuts

+ +

Shortcuts on this page

+ r + m + x + toggle line displays +

+ j + k + next/prev highlighted chunk +

+ 0 (zero) top of page +

+ 1 (one) first highlighted chunk +

+ [ + ] + prev/next file +

+ u up to the index +

+ ? show/hide this help +

+ 45 statements + + + +

+ « prev + ^ index + » next + + coverage.py v7.13.5, + created at 2026-04-08 14:17 +0000 +

+ +

1"""Utilities for configuring and running CLIs app.

3In this module, we define and configure a :py:class:`RichDefaultHelpFormatter` that

4nicely displays the CLI's ``--help`` text. We also provide a function to

5:py:func:`assemble a main function <assemble_main>` for the different CLI apps to save

6some boilerplate code. Lastly, we have two functions related to the `loguru`_ setup.

8.. _loguru: https://loguru.readthedocs.io/en/stable

9"""

11import inspect

12import logging

13from collections.abc import Callable

14from typing import Literal

16from loguru import logger

17from pydantic_settings import BaseSettings, CliApp, CliSettingsSource

18from rich.console import Console

19from rich.logging import RichHandler

20from rich_argparse import ArgumentDefaultsRichHelpFormatter

22_current_log_level: Literal["DEBUG", "INFO", "WARNING", "ERROR", "CRITICAL"] = "INFO"

25def assemble_main(

26 settings_cls: type[BaseSettings],

27 prog_name: str,

28) -> Callable[[], None]:

29 """Assemble a ``main()`` function for a CLI app.

31 It creates a :py:class:`~pydantic_settings.CliSettingsSource` object with the

32 provided ``settings_cls`` and ``prog_name``. Then, it fills in some default

33 settings for the CLI configuration and runs the CLI app.

35 Assembling a ``main()`` function for all subcommands like this saves some

36 boilerplate code.

37 """

39 def main() -> None:

40 """Start the main CLI app."""

41 cli_settings_source = CliSettingsSource(

42 settings_cls=settings_cls,

43 cli_prog_name=prog_name,

44 cli_kebab_case=True,

45 cli_use_class_docs_for_groups=True,

46 formatter_class=ArgumentDefaultsRichHelpFormatter,

47 )

48 CliApp.run(settings_cls, cli_settings_source=cli_settings_source)

50 return main

53def somewhat_safely_get_loglevel(

54 argv: list[str],

55) -> Literal["DEBUG", "INFO", "WARNING", "ERROR", "CRITICAL"]:

56 """Set the log level of the lyscripts CLI.

58 This is a bit of a hack, since the :py:class:`~lyscripts.LyscriptsCLI` class is not

59 yet initialized when we need to set the log level. In case the provided log-level is

60 not valid, :py:class:`~lyscripts.LyscriptsCLI` will raise an exception at a later

61 point.

63 Return ``"INFO"`` by default.

64 """

65 args_str = " ".join(argv)

66 if "--log-level" in args_str:

67 for log_level in ["DEBUG", "INFO", "WARNING", "ERROR", "CRITICAL"]:

68 if log_level in args_str:

69 return log_level

71 return "INFO"

74def configure_logging(

75 argv: list[str],

76 console: Console,

77) -> None:

78 """Configure the `loguru`_ logging system of the lyscripts CLI.

80 This function sets the log level and format of the lyscripts CLI. Notably, for

81 a log-level of `DEBUG` the output will contain more information.

83 .. _loguru: https://loguru.readthedocs.io/en/stable

84 """

85 logger.enable("lyscripts")

86 logger.enable("lydata")

87 global _current_log_level

88 _current_log_level = somewhat_safely_get_loglevel(argv=argv)

89 logger.remove()

90 handler = RichHandler(console=console)

91 logger.add(

92 sink=handler,

93 level=_current_log_level,

94 format="<lvl>{message}</>",

95 )

98class InterceptHandler(logging.Handler):

99 """Intercept logging messages and redirect them to Loguru."""

100

101 def emit(self, record: logging.LogRecord) -> None:

102 """Intercept the log record and redirect it to Loguru."""

103 # Get corresponding Loguru level if it exists.

104 try:

105 level: str | int = logger.level(record.levelname).name

106 except ValueError:

107 level = record.levelno

108

109 # Find caller from where originated the logged message.

110 frame, depth = inspect.currentframe(), 0

111 while frame:

112 filename = frame.f_code.co_filename

113 is_logging = filename == logging.__file__

114 is_frozen = "importlib" in filename and "_bootstrap" in filename

115 if depth > 0 and not (is_logging or is_frozen):

116 break

117 frame = frame.f_back

118 depth += 1

119

120 logger.opt(depth=depth, exception=record.exc_info).log(

121 level,

122 record.getMessage(),

123 )

+ + + diff --git a/htmlcov/z_5bf5c588c698c6cc_configs_py.html b/htmlcov/z_5bf5c588c698c6cc_configs_py.html new file mode 100644 index 0000000..3dda220 --- /dev/null +++ b/htmlcov/z_5bf5c588c698c6cc_configs_py.html @@ -0,0 +1,922 @@ + + + + + Coverage for src/lyscripts/configs.py: 85% + + + + + +

+ Coverage for src / lyscripts / configs.py: + 85% +

+ + + Show/hide keyboard shortcuts

+ +

Shortcuts on this page

+ r + m + x + toggle line displays +

+ j + k + next/prev highlighted chunk +

+ 0 (zero) top of page +

+ 1 (one) first highlighted chunk +

+ [ + ] + prev/next file +

+ u up to the index +

+ ? show/hide this help +

+ 280 statements + + + +

+ « prev + ^ index + » next + + coverage.py v7.13.5, + created at 2026-04-08 14:17 +0000 +

+ +

1"""Using `pydantic`_, we define configurations for the package.

3Most importantly, these configurations are part of the CLIs that the package provides.

4but they also help with programmatically validating and constructing various objects.

5Maybe most importantly, the :py:class:`GraphConfig` and :py:class:`ModelConfig` may be

6used to precisely and reproducibly define how the function :py:func:`construct_model`

7should create lymphatic progression :py:mod:`~lymph.models`.

9.. _pydantic: https://docs.pydantic.dev/latest/

10"""

12from __future__ import annotations

14import importlib

15import importlib.util

16import os

17import warnings

18from collections.abc import Callable, Sequence

19from copy import deepcopy

20from pathlib import Path

21from typing import Annotated, Any, Literal

23import numpy as np

24import pandas as pd

25import yaml

26from loguru import logger

27from lydata.loader import LyDataset

28from lydata.utils import ModalityConfig

29from lymph import graph, models

30from lymph.modalities import Pathological

31from lymph.types import Model, PatternType

32from pydantic import (

33 AfterValidator,

34 BaseModel,

35 ConfigDict,

36 Field,

37 FilePath,

38)

39from pydantic_settings import (

40 BaseSettings,

41 PydanticBaseSettingsSource,

42 YamlConfigSettingsSource,

43)

44from pydantic_settings.sources import DEFAULT_PATH

46from lyscripts.utils import binom_pmf, flatten, load_model_samples, load_patient_data

48FuncNameType = Literal["binomial"]

51DIST_MAP: dict[FuncNameType, Callable] = {

52 "binomial": binom_pmf,

53}

56class CrossValidationConfig(BaseModel):

57 """Configs for splitting a dataset into cross-validation folds."""

59 seed: int = Field(

60 default=42,

61 description="Seed for the random number generator.",

62 )

63 folds: int = Field(

64 default=5,

65 description="Number of folds to split the dataset into.",

66 )

69class DataConfig(BaseModel):

70 """Where to load lymphatic progression data from and how to feed it into a model."""

72 source: FilePath | LyDataset = Field(

73 description=(

74 "Either a path to a CSV file or a config that specifies how and where "

75 "to fetch the data from."

76 ),

77 )

78 side: Literal["ipsi", "contra"] | None = Field(

79 default=None,

80 description="Side of the neck to load data for. Only for Unilateral models.",

81 )

82 mapping: dict[Literal[0, 1, 2, 3, 4] | str, int | str] = Field(

83 default_factory=lambda: {i: "early" if i <= 2 else "late" for i in range(5)},

84 description="Optional mapping of numeric T-stages to model T-stages.",

85 )

87 def load(self, **get_dataframe_kwargs) -> pd.DataFrame:

88 """Load data from path or the :py:class:`~lydata.loader.LyDataset`."""

89 if isinstance(self.source, LyDataset):

90 return self.source.get_dataframe(**get_dataframe_kwargs)

92 return load_patient_data(self.source, **get_dataframe_kwargs)

94 def get_load_kwargs(self, **read_csv_kwargs: dict[str, Any]) -> dict[str, Any]:

95 """Get kwargs for :py:meth:`~lymph.types.Model.load_patient_data`."""

96 return {

97 "patient_data": self.load(**(read_csv_kwargs or {})),

98 **self.model_dump(exclude={"source"}, exclude_none=True),

99 }

100

101

102def check_pattern(value: PatternType) -> Any:

103 """Check if the value can be converted to a boolean value."""

104 return {lnl: map_to_optional_bool(v) for lnl, v in value.items()}

105

106

107class DiagnosisConfig(BaseModel):

108 """Defines an ipsi- and contralateral diagnosis pattern."""

109

110 ipsi: dict[str, Annotated[PatternType, AfterValidator(check_pattern)]] = Field(

111 default={},

112 description="Observed diagnoses by different modalities on the ipsi neck.",

113 examples=[{"CT": {"II": True, "III": False}}],

114 )

115 contra: dict[str, Annotated[PatternType, AfterValidator(check_pattern)]] = Field(

116 default={},

117 description="Observed diagnoses by different modalities on the contra neck.",

118 )

119

120 def to_involvement(self, modality: str) -> InvolvementConfig:

121 """Convert the diagnosis pattern to an involvement pattern for ``modality``."""

122 return InvolvementConfig(

123 ipsi=self.ipsi.get(modality, {}),

124 contra=self.contra.get(modality, {}),

125 )

126

127

128class DistributionConfig(BaseModel):

129 """Configuration defining a distribution over diagnose times."""

130

131 kind: Literal["frozen", "parametric"] = Field(

132 default="frozen",

133 description="Parametric distributions may be updated.",

134 )

135 func: FuncNameType = Field(

136 default="binomial",

137 description="Name of predefined function to use as distribution.",

138 )

139 params: dict[str, int | float] = Field(

140 default={},

141 description="Parameters to pass to the predefined function.",

142 )

143

144

145class InvolvementConfig(BaseModel):

146 """Config that defines an ipsi- and contralateral involvement pattern."""

147

148 ipsi: Annotated[PatternType, AfterValidator(check_pattern)] = Field(

149 default={},

150 description="Involvement pattern for the ipsilateral side of the neck.",

151 examples=[{"II": True, "III": False}],

152 )

153 contra: Annotated[PatternType, AfterValidator(check_pattern)] = Field(

154 default={},

155 description="Involvement pattern for the contralateral side of the neck.",

156 )

157

158

159def retrieve_graph_representation(model: Model) -> graph.Representation:

160 """Retrieve the graph representation from a model."""

161 if hasattr(model, "graph"):

162 return model.graph

163

164 if hasattr(model, "hpv"):

165 return retrieve_graph_representation(model.hpv)

166

167 if hasattr(model, "ipsi"):

168 return retrieve_graph_representation(model.ipsi)

169

170 if hasattr(model, "ext"):

171 return retrieve_graph_representation(model.ext)

172

173 raise ValueError("Model does not have a graph representation.")

174

175

176class GraphConfig(BaseModel):

177 """Specifies how the tumor(s) and LNLs are connected in a DAG."""

178

179 tumor: dict[str, list[str]] = Field(

180 description="Define the name of the tumor(s) and which LNLs it/they drain to.",

181 )

182 lnl: dict[str, list[str]] = Field(

183 description="Define the name of the LNL(s) and which LNLs it/they drain to.",

184 )

185

186 @classmethod

187 def from_model(cls: type, model: Model) -> GraphConfig:

188 """Create a ``GraphConfig`` from a ``Model``."""

189 graph = retrieve_graph_representation(model)

190 return cls(

191 tumor={

192 name: [edge.child.name for edge in tumor.out]

193 for name, tumor in graph.tumors.items()

194 },

195 lnl={

196 name: [edge.child.name for edge in lnl.out] # noqa

197 for name, lnl in graph.lnls.items()

198 },

199 )

200

201

202def has_model_symbol(path: Path) -> Path:

203 """Check if the Python file at ``path`` defines a symbol named ``model``."""

204 spec = importlib.util.spec_from_file_location(path.stem, path)

205 module = importlib.util.module_from_spec(spec)

206 spec.loader.exec_module(module)

207

208 if not hasattr(module, "model"):

209 raise ValueError(f"Python file at {path} does not define a symbol 'model'.")

210

211 return path

212

213

214def get_symmetry_kwargs(model: Model) -> dict[str, Any]:

215 """Get the symmetry kwargs from a model."""

216 if isinstance(model, models.Unilateral | models.HPVUnilateral):

217 raise TypeError("Unilateral models do not have symmetry kwargs.")

218

219 if hasattr(model, "ext"):

220 return get_symmetry_kwargs(model.ext)

221

222 return getattr(model, "is_symmetric", {})

223

224

225class ModelConfig(BaseModel):

226 """Define which of the ``lymph`` models to use and how to set them up."""

227

228 external_file: Annotated[FilePath, AfterValidator(has_model_symbol)] | None = Field(

229 default=None,

230 description="Path to a Python file that defines a model.",

231 )

232 class_name: Literal["Unilateral", "Bilateral", "Midline"] = Field(

233 default="Unilateral",

234 description="Name of the model class to use.",

235 )

236 constructor: Literal["binary", "trinary"] = Field(

237 default="binary",

238 description="Trinary models differentiate btw. micro- and macroscopic disease.",

239 )

240 max_time: int = Field(

241 default=10,

242 description="Max. number of time-steps to evolve the model over.",

243 )

244 named_params: Sequence[str] = Field(

245 default=None,

246 description=(

247 "Subset of valid model parameters a sampler may provide in the form of a "

248 "dictionary to the model instead of as an array. Or, after sampling, with "

249 "this list, one may safely recover which parameter corresponds to which "

250 "index in the sample."

251 ),

252 )

253 kwargs: dict[str, Any] = Field(

254 default={},

255 description="Additional keyword arguments to pass to the model constructor.",

256 )

257

258 @classmethod

259 def from_model(cls: type, model: Model) -> ModelConfig:

260 """Create a ``ModelConfig`` from a ``Model``."""

261 warnings.warn(

262 message=(

263 "Not all kwargs passed at initialization can be recovered into a "

264 "config. Make sure to manually double-check the config."

265 ),

266 category=UserWarning,

267 stacklevel=2,

268 )

269

270 if getattr(model, "_named_params", None):

271 additional_kwargs = {"named_params": list(model.named_params)}

272 else:

273 additional_kwargs = {}

274

275 try:

276 additional_kwargs["is_symmetric"] = get_symmetry_kwargs(model)

277 except TypeError:

278 pass

279

280 if isinstance(model, models.Midline):

281 additional_kwargs["use_midext_evo"] = model.use_midext_evo

282 additional_kwargs["use_central"] = hasattr(model, "_central")

283 additional_kwargs["use_mixing"] = hasattr(model, "mixing_param")

284

285 if not hasattr(model, "_unknown"):

286 additional_kwargs["marginalize_unknown"] = False

287

288 return cls(

289 class_name=model.__class__.__name__,

290 constructor="trinary" if model.is_trinary else "binary",

291 max_time=model.max_time,

292 kwargs=additional_kwargs,

293 )

294

295

296def modalityconfig_from_model(model: Model, modality_name: str) -> ModalityConfig:

297 """Create a ``ModalityConfig`` from a ``Model``."""

298 modality = model.get_modality(modality_name)

299 return ModalityConfig(

300 spec=modality.spec,

301 sens=modality.sens,

302 kind="pathological" if isinstance(modality, Pathological) else "clinical",

303 )

304

305

306class DeprecatedModelConfig(BaseModel):

307 """Model configuration prior to ``lyscripts`` major version 1.

308

309 This is implemented for backwards compatibility. Its sole job is to translate

310 the outdated settings format into the new one. Note that the only stuff that needs

311 to be translated is the model configuration itself and the distributions for

312 marginalization over diagnosis times. The :py:class:`~GraphConfig` is still

313 compatible.

314 """

315

316 first_binom_prob: float = Field(

317 description="Fixed parameter for first binomial dist over diagnosis times.",

318 ge=0.0,

319 le=1.0,

320 )

321 max_t: int = Field(

322 description="Max. number of time-steps to evolve the model over.",

323 gt=0,

324 )

325 t_stages: list[int | str] = Field(

326 description=(

327 "List of T-stages to marginalize over in the scenario. The old format "

328 "assumed all T-stages except the first one to be parametric. Only binomial "

329 "distributions are supported."

330 ),

331 )

332 class_: Literal["Unilateral", "Bilateral", "Midline", "MidlineBilateral"] = Field(

333 description="Name of the model class. Only binary models are supported.",

334 alias="class",

335 )

336 kwargs: dict[str, Any] = Field(

337 default={},

338 description="Additional keyword arguments to pass to the model constructor.",

339 )

340

341 def model_post_init(self, __context):

342 """Issue a deprecation warning."""

343 warnings.warn(

344 message="The 'DeprecatedModelConfig' is deprecated.",

345 category=DeprecationWarning,

346 stacklevel=2,

347 )

348 if "Midline" in self.class_:

349 self.class_ = "Midline"

350 warnings.warn(

351 "Model may not be recreated as expected due to extra parameter "

352 "`midext_prob`. Make sure to manually handle edge cases.",

353 stacklevel=2,

354 )

355 return super().model_post_init(__context)

356

357 def translate(self) -> tuple[ModelConfig, dict[int | str, DistributionConfig]]:

358 """Translate the deprecated model config to the new format."""

359 old_kwargs = self.kwargs.copy()

360 new_kwargs = {"use_midext_evo": False} if "Midline" in self.class_ else {}

361

362 if (tumor_spread := old_kwargs.pop("base_symmetric")) is not None:

363 new_kwargs["is_symmetric"] = new_kwargs.get("is_symmetric", {})

364 new_kwargs["is_symmetric"]["tumor_spread"] = tumor_spread

365

366 if (lnl_spread := old_kwargs.pop("trans_symmetric")) is not None:

367 new_kwargs["is_symmetric"] = new_kwargs.get("is_symmetric", {})

368 new_kwargs["is_symmetric"]["lnl_spread"] = lnl_spread

369

370 new_kwargs.update(old_kwargs)

371

372 model_config = ModelConfig(

373 class_name=self.class_,

374 constructor="binary",

375 max_time=self.max_t,

376 kwargs=new_kwargs,

377 )

378

379 distribution_configs = {}

380 for i, t_stage in enumerate(self.t_stages):

381 distribution_configs[t_stage] = DistributionConfig(

382 kind="frozen" if i == 0 else "parametric",

383 func="binomial",

384 params={"p": self.first_binom_prob},

385 )

386

387 return model_config, distribution_configs

388

389

390class SamplingConfig(BaseModel):

391 """Settings to configure the MCMC sampling."""

392

393 storage_file: Path = Field(

394 description="Path to HDF5 file store results or load last state.",

395 )

396 history_file: Path | None = Field(

397 default=None,

398 description="Path to store the burn-in metrics (as CSV file).",

399 )

400 dataset: str = Field(

401 default="mcmc",

402 description="Name of the dataset in the HDF5 file.",

403 )

404 cores: int | None = Field(

405 gt=0,

406 default=os.cpu_count(),

407 description=(

408 "Number of cores to use for parallel sampling. If `None`, no parallel "

409 "processing is used."

410 ),

411 )

412 seed: int = Field(

413 default=42,

414 description="Seed for the random number generator.",

415 )

416 walkers_per_dim: int = Field(

417 default=20,

418 description="Number of walkers per parameter space dimension.",

419 )

420 check_interval: int = Field(

421 default=50,

422 description="Check for convergence each time after this many steps.",

423 )

424 trust_factor: float = Field(

425 default=50.0,

426 description=(

427 "Trust the autocorrelation time only when it's smaller than this factor "

428 "times the length of the chain."

429 ),

430 )

431 relative_thresh: float = Field(

432 default=0.05,

433 description="Relative threshold for convergence.",

434 )

435 burnin_steps: int | None = Field(

436 default=None,

437 description=(

438 "Number of burn-in steps to take. If None, burn-in runs until convergence."

439 ),

440 )

441 num_steps: int | None = Field(

442 default=100,

443 description=("Number of steps to take in the MCMC sampling."),

444 )

445 thin_by: int = Field(

446 default=10,

447 description="How many samples to draw before for saving one.",

448 )

449 inverse_temp: float = Field(

450 default=1.0,

451 description=(

452 "Inverse temperature for thermodynamic integration. Note that this is not "

453 "yet fully implemented."

454 ),

455 )

456

457 def load(self, thin: int = 1) -> np.ndarray:

458 """Load the samples from the HDF5 file.

459

460 Note that the ``thin`` represents another round of thinning and is usually

461 not necessary if the samples were already thinned during the sampling process.

462 """

463 return load_model_samples(

464 file_path=self.storage_file,

465 name=self.dataset,

466 thin=thin,

467 )

468

469

470def geometric_schedule(num: int, *_a) -> np.ndarray:

471 """Create a geometric sequence of ``num`` numbers from 0 to 1."""

472 log_seq = np.logspace(0.0, 1.0, num)

473 shifted_seq = log_seq - 1.0

474 return shifted_seq / 9.0

475

476

477def linear_schedule(num: int, *_a) -> np.ndarray:

478 """Create a linear sequence of ``num`` numbers from 0 to 1.

479

480 Equivalent to the :py:func:`power_schedule` with ``power=1``.

481 """

482 return np.linspace(0.0, 1.0, num)

483

484

485def power_schedule(num: int, power: float, *_a) -> np.ndarray:

486 """Create a power sequence of ``num`` numbers from 0 to 1.

487

488 This is essentially a :py:func:`linear_schedule` of ``num`` numbers from 0 to 1,

489 but each number is raised to the power of ``power``.

490 """

491 lin_seq = np.linspace(0.0, 1.0, num)

492 return lin_seq**power

493

494

495SCHEDULES = {

496 "geometric": geometric_schedule,

497 "linear": linear_schedule,

498 "power": power_schedule,

499}

500

501

502class ScheduleConfig(BaseModel):

503 """Configuration for generating a schedule of inverse temperatures."""

504

505 method: Literal["geometric", "linear", "power"] = Field(

506 default="power",

507 description="Method to generate the inverse temperature schedule.",

508 )

509 num: int = Field(

510 default=32,

511 description="Number of inverse temperatures in the schedule.",

512 )

513 power: float = Field(

514 default=4.0,

515 description="If a power schedule is chosen, use this as power.",

516 )

517 values: list[float] | None = Field(

518 default=None,

519 description=(

520 "List of inverse temperatures to use instead of generating a schedule. "

521 "If a list is provided, the other parameters are ignored."

522 ),

523 )

524

525 def get_schedule(self) -> np.ndarray:

526 """Get the inverse temperature schedule as a numpy array."""

527 if self.values is not None:

528 logger.debug("Using provided inverse temperature values.")

529 schedule = np.array(self.values)

530 else:

531 logger.debug(f"Generating inverse temperature schedule with {self.method}.")

532 func = SCHEDULES[self.method]

533 schedule = func(self.num, self.power)

534

535 logger.info(f"Generated inverse temperature schedule: {schedule}")

536 return schedule

537

538

539def map_to_optional_bool(value: Any) -> Any:

540 """Try to convert the options in the `PatternType` to a boolean value."""

541 if value in [True, "involved", 1]:

542 return True

543

544 if value in [False, "healthy", 0]:

545 return False

546

547 return value

548

549

550class ScenarioConfig(BaseModel):

551 """Define a scenario for which e.g. prevalences and risks may be computed."""

552

553 t_stages: list[int | str] = Field(

554 description="List of T-stages to marginalize over in the scenario.",

555 examples=[["early"], [3, 4]],

556 )

557 t_stages_dist: list[float] = Field(

558 default=[1.0],

559 description="Distribution over T-stages to use for marginalization.",

560 examples=[[1.0], [0.6, 0.4]],

561 )

562 midext: bool | None = Field(

563 default=None,

564 description="Whether the patient's tumor extends over the midline.",

565 )

566 mode: Literal["HMM", "BN"] = Field(

567 default="HMM",

568 description="Which underlying model architecture to use.",

569 )

570 involvement: InvolvementConfig = InvolvementConfig()

571 diagnosis: DiagnosisConfig = DiagnosisConfig()

572

573 def model_post_init(self, __context: Any) -> None:

574 """Interpolate and normalize the distribution."""

575 self.interpolate()

576 self.normalize()

577

578 def interpolate(self):

579 """Interpolate the distribution to the number of ``t_stages``."""

580 if len(self.t_stages) != len(self.t_stages_dist):

581 new_x = np.linspace(0.0, 1.0, len(self.t_stages))

582 old_x = np.linspace(0.0, 1.0, len(self.t_stages_dist))

583 # cast to list to make ``__eq__`` work

584 self.t_stages_dist = np.interp(new_x, old_x, self.t_stages_dist).tolist()

585

586 def normalize(self):

587 """Normalize the distribution to sum to 1."""

588 if not np.isclose(np.sum(self.t_stages_dist), 1.0):

589 self.t_stages_dist = (

590 np.array(self.t_stages_dist) / np.sum(self.t_stages_dist)

591 ).tolist() # cast to list to make ``__eq__`` work

592

593

594def _construct_model_from_external(path: Path) -> Model:

595 """Construct a model from a Python file."""

596 module_name = path.stem

597 spec = importlib.util.spec_from_file_location(module_name, path)

598 module = importlib.util.module_from_spec(spec)

599 spec.loader.exec_module(module)

600 logger.info(f"Loaded model from {path}. This ignores model and graph configs.")

601 return module.model

602

603

604def construct_model(

605 model_config: ModelConfig,

606 graph_config: GraphConfig,

607) -> Model:

608 """Construct a model from a ``model_config``.

609

610 The default/expected use of this is to specify a model class from the

611 `lymph`_ package and pass the necessary arguments to its constructor.

612 However, it is also possible to load a model from an external Python file via the

613 ``external`` attribute of the ``model_config`` argument. In this case, a symbol

614 with name ``model`` must be defined in the file that is to be loaded.

615

616 .. note::

617

618 No check is performed on the model's compatibility with the command/pipeline

619 it is used in. It is assumed the model complies with the

620 :py:class:`model type <lymph.types.Model>` specifications of the `lymph`_

621 package.

622

623 .. _lymph: https://lymph-model.readthedocs.io/stable/

624 """

625 if model_config.external_file is not None:

626 return _construct_model_from_external(model_config.external_file)

627

628 cls = getattr(models, model_config.class_name)

629 constructor = getattr(cls, model_config.constructor)

630 model = constructor(

631 graph_dict=flatten(graph_config.model_dump()),

632 max_time=model_config.max_time,

633 named_params=model_config.named_params,

634 **model_config.kwargs,

635 )

636 logger.info(f"Constructed model: {model}")

637 return model

638

639

640def add_distributions(

641 model: Model,

642 configs: dict[str | int, DistributionConfig],

643 mapping: dict[FuncNameType, Callable] | None = None,

644 inplace: bool = False,

645) -> Model:

646 """Construct and add distributions over diagnose times to a ``model``."""

647 if not inplace:

648 model = deepcopy(model)

649 logger.debug("Created deepcopy of model.")

650

651 mapping = mapping or DIST_MAP

652

653 for t_stage, dist_config in configs.items():

654 if dist_config.kind == "frozen":

655 support = np.arange(model.max_time + 1)

656 dist = mapping[dist_config.func](support, **dist_config.params)

657 elif dist_config.kind == "parametric":

658 dist = mapping[dist_config.func]

659 else:

660 raise ValueError(f"Unknown distribution kind: {dist_config.kind}")

661

662 model.set_distribution(t_stage, dist)

663 if dist_config.kind == "parametric" and dist_config.params:

664 params = {f"{t_stage}_{k}": v for k, v in dist_config.params.items()}

665 model.set_params(**params)

666

667 logger.debug(f"Set {dist_config.kind} distribution for '{t_stage}': {dist}")

668

669 logger.info(f"Added {len(configs)} distributions to model: {model}")

670 return model

671

672

673def add_modalities(

674 model: Model,

675 modalities: dict[str, ModalityConfig],

676 inplace: bool = False,

677) -> Model:

678 """Add ``modalities`` to a ``model``."""

679 if not inplace:

680 model = deepcopy(model)

681 logger.debug("Created deepcopy of model.")

682

683 for modality, modality_config in modalities.items():

684 model.set_modality(modality, **modality_config.model_dump())

685 logger.debug(f"Added modality {modality} to model: {modality_config}")

686

687 logger.info(f"Added {len(modalities)} modalities to model: {model}")

688 return model

689

690

691def add_data(

692 model: Model,

693 path: Path,

694 side: Literal["ipsi", "contra"],

695 mapping: dict[Literal[0, 1, 2, 3, 4], int | str] | None = None,

696 inplace: bool = False,

697) -> Model:

698 """Add data to a ``model``."""

699 data = pd.read_csv(path, header=[0, 1, 2])

700 logger.debug(f"Loaded data from {path}: Shape: {data.shape}")

701

702 kwargs = {"patient_data": data, "mapping": mapping}

703 if isinstance(model, models.Unilateral):

704 kwargs["side"] = side

705

706 if not inplace:

707 model = deepcopy(model)

708 logger.debug("Created deepcopy of model.")

709

710 model.load_patient_data(**kwargs)

711 logger.info(f"Added data to model: {model}")

712 return model

713

714

715PathType = Path | str | Sequence[Path | str]

716

717

718class DynamicYamlConfigSettingsSource(YamlConfigSettingsSource):

719 """YAML config source that allows dynamic file path specification.

720

721 This is heavily inspired by `this comment`_ in the discussion on a related issue

722 of the `pydantic-settings`_ GitHub repository.

723

724 Essentially, this little hack allows a user to specify a one or multiple YAML files

725 from which the CLI should read configurations. Normally, `pydantic-settings` only

726 allows hard-coding the location of these config files.

727

728 .. _this comment: https://github.com/pydantic/pydantic-settings/issues/259#issuecomment-2549444286

729 .. _pydantic-settings: https://github.com/pydantic/pydantic-settings

730 """

731

732 def __init__(

733 self,

734 settings_cls,

735 yaml_file: PathType | None = DEFAULT_PATH,

736 yaml_file_encoding: str | None = None,

737 yaml_file_path_field: str = "configs",

738 ) -> None:

739 """Allow getting the YAML file path from any key in the current state.

740

741 The argument ``yaml_file_path_field`` should be the :py:class:`BaseSettings`

742 field that contains the path(s) to the YAML file(s).

743

744 Note that all config files must have a ``version: 1`` key in them to be

745 recognized as valid config files.

746 """

747 self.yaml_file_path_field = yaml_file_path_field

748 super().__init__(settings_cls, yaml_file, yaml_file_encoding)

749

750 def _read_file(self, file_path: Path) -> dict[str, Any]:

751 """Read the YAML and raise exception when ``version: 1`` not found."""

752 with open(file_path, encoding=self.yaml_file_encoding) as yaml_file:

753 data = yaml.safe_load(yaml_file) or {}

754 if data.get("version") != 1:

755 raise ValueError(

756 f"Config file {file_path} does not have a 'version: 1' key. "

757 "For compatibility reasons, all config files must have this key.",

758 )

759 return data

760

761 def __call__(self) -> dict[str, Any]:

762 """Reload the config files from the paths in the current state."""

763 yaml_file_to_reload = self.current_state.get(

764 self.yaml_file_path_field,

765 self.yaml_file_path,

766 )

767 logger.debug(f"Reloading YAML files from {yaml_file_to_reload} (if it exists).")

768 self.__init__(

769 settings_cls=self.settings_cls,

770 yaml_file=yaml_file_to_reload,

771 yaml_file_encoding=self.yaml_file_encoding,

772 yaml_file_path_field=self.yaml_file_path_field,

773 )

774 return super().__call__()

775

776 def __repr__(self) -> str:

777 """Return a string representation of the source."""

778 return (

779 self.__class__.__name__

780 + "("

781 + f"yaml_file={self.yaml_file_path!r}, "

782 + f"yaml_file_encoding={self.yaml_file_encoding!r}, "

783 + f"yaml_file_path_field={self.yaml_file_path_field!r}"

784 + ")"

785 )

786

787

788class BaseCLI(BaseSettings):

789 """Base settings class for all CLI scripts to inherit from."""

790

791 model_config = ConfigDict(yaml_file="config.yaml", extra="ignore")

792

793 configs: list[Path] = Field(

794 default=["config.yaml"],

795 description=(

796 "Path to the YAML file(s) that contain the configuration(s). Configs from "

797 "YAML files may be overwritten by command line arguments. When multiple "

798 "files are specified, the configs are merged in the order they are given. "

799 "Note that every config file must have a `version: 1` key in it."

800 ),

801 )

802

803 @classmethod

804 def settings_customise_sources(

805 cls,

806 settings_cls: type[BaseSettings],

807 init_settings: PydanticBaseSettingsSource,

808 env_settings: PydanticBaseSettingsSource,

809 dotenv_settings: PydanticBaseSettingsSource,

810 file_secret_settings: PydanticBaseSettingsSource,

811 ) -> tuple[PydanticBaseSettingsSource, ...]:

812 """Add the dynamic YAML config source to the CLI settings."""

813 dynamic_yaml_config_source = DynamicYamlConfigSettingsSource(

814 settings_cls=settings_cls,

815 yaml_file_path_field="configs",

816 yaml_file_encoding="utf-8",

817 )

818 logger.debug(f"Created {dynamic_yaml_config_source = }")

819 return (

820 init_settings,

821 env_settings,

822 dotenv_settings,

823 file_secret_settings,

824 dynamic_yaml_config_source,

825 )

+ + + diff --git a/htmlcov/z_5bf5c588c698c6cc_decorators_py.html b/htmlcov/z_5bf5c588c698c6cc_decorators_py.html new file mode 100644 index 0000000..126c178 --- /dev/null +++ b/htmlcov/z_5bf5c588c698c6cc_decorators_py.html @@ -0,0 +1,185 @@ + + + + + Coverage for src/lyscripts/decorators.py: 90% + + + + + +

+ Coverage for src / lyscripts / decorators.py: + 90% +

+ + + Show/hide keyboard shortcuts

+ +

Shortcuts on this page

+ r + m + x + toggle line displays +

+ j + k + next/prev highlighted chunk +

+ 0 (zero) top of page +

+ 1 (one) first highlighted chunk +

+ [ + ] + prev/next file +

+ u up to the index +

+ ? show/hide this help +

+ 41 statements + + + +

+ « prev + ^ index + » next + + coverage.py v7.13.5, + created at 2026-04-08 14:17 +0000 +

+ +

1"""Decorators to avoid repetitive snippets of code.

3E.g. safely opening files or logging the state of a function call.

5This is *not* a command line tool.

6"""

8import functools

9import logging

10from collections.abc import Callable

11from functools import wraps

12from pathlib import Path

13from typing import Any

16def assemble_signature(*args, **kwargs) -> str:

17 """Assemble the signature of the function call."""

18 args_str = ", ".join(str(arg) for arg in args)

19 kwargs_str = ", ".join(f"{key}={value}" for key, value in kwargs.items())

20 return ", ".join([args_str, kwargs_str])

23def log_state(log_level: int = logging.INFO) -> Callable:

24 """Provide a decorator that logs the state of the function execution.

26 The log message will simply be the function name where underscores are replaced

27 with spaces. The `log_level` can be set in the decorator call.

28 """

30 def log_decorator(func: Callable):

31 """Decorate function for which to add logs."""

33 @functools.wraps(func)

34 def wrapper(*args, **kwargs):

35 """Execute decorated function."""

36 logger = logging.getLogger(func.__module__)

37 signature = assemble_signature(*args, **kwargs)

38 logger.debug(f"Executing {func.__name__}({signature}).")

39 log_msg_from_func = func.__name__.replace("_", " ").capitalize() + "."

41 try:

42 logger.log(

43 log_level,

44 log_msg_from_func,

45 extra={

46 "func_filepath": f"{func.__module__.replace('.', '/')}.py",

47 "func_name": func.__name__,

48 "module_name": func.__module__,

49 },

50 )

51 return func(*args, **kwargs)

53 except Exception as exc:

54 logger.error(f"Error calling {func.__name__}().", exc_info=exc)

55 raise exc

57 return wrapper

59 return log_decorator

62def check_input_file_exists(loading_func: Callable) -> Callable:

63 """Check if the file path provided to the `loading_func` exists."""

65 @wraps(loading_func)

66 def inner(file_path: str, *args, **kwargs) -> Any:

67 """Execute wrapped loading function."""

68 file_path = Path(file_path)

69 if not file_path.is_file():

70 raise FileNotFoundError(f"File {file_path} does not exist.")

72 return loading_func(file_path, *args, **kwargs)

74 return inner

77def check_output_dir_exists(saving_func: Callable) -> Callable:

78 """Make sure the parent directory of the saved file exists."""

80 @wraps(saving_func)

81 def inner(file_path: str, *args, **kwargs) -> Any:

82 """Execute wrapped saving function."""

83 file_path = Path(file_path)

84 file_path.parent.mkdir(parents=True, exist_ok=True)

86 return saving_func(file_path, *args, **kwargs)

88 return inner

+ + + diff --git a/htmlcov/z_5bf5c588c698c6cc_evaluate_py.html b/htmlcov/z_5bf5c588c698c6cc_evaluate_py.html new file mode 100644 index 0000000..731bf0b --- /dev/null +++ b/htmlcov/z_5bf5c588c698c6cc_evaluate_py.html @@ -0,0 +1,302 @@ + + + + + Coverage for src/lyscripts/evaluate.py: 26% + + + + + +

+ Coverage for src / lyscripts / evaluate.py: + 26% +

+ + + Show/hide keyboard shortcuts

+ +

Shortcuts on this page

+ r + m + x + toggle line displays +

+ j + k + next/prev highlighted chunk +

+ 0 (zero) top of page +

+ 1 (one) first highlighted chunk +

+ [ + ] + prev/next file +

+ u up to the index +

+ ? show/hide this help +

+ 70 statements + + + +

+ « prev + ^ index + » next + + coverage.py v7.13.5, + created at 2026-04-08 14:17 +0000 +

+ +

1"""Evaluate the performance of the trained model.

3This is done by computing quantities like the Bayesian information criterion (BIC) or

4(if thermodynamic integration was performed) the actual evidence (with error) of the

5model.

6"""

8import argparse

9import json

10from pathlib import Path

12import emcee

13import h5py

14import numpy as np

15import pandas as pd

16from loguru import logger

17from scipy.integrate import trapezoid

19from lyscripts.utils import load_patient_data, load_yaml_params

21RNG = np.random.default_rng()

24def _add_parser(

25 subparsers: argparse._SubParsersAction,

26 help_formatter,

27):

28 """Add an ``ArgumentParser`` to the subparsers action."""

29 parser = subparsers.add_parser(

30 Path(__file__).name.replace(".py", ""),

31 description=__doc__,

32 help=__doc__,

33 formatter_class=help_formatter,

34 )

35 _add_arguments(parser)

38def _add_arguments(parser: argparse.ArgumentParser):

39 """Add arguments to a ``subparsers`` instance and run its main function when chosen.

41 This is called by the parent module that is called via the command line.

42 """

43 parser.add_argument(

44 "data",

45 type=Path,

46 help="Path to the tables of patient data (CSV).",

47 )

48 parser.add_argument("model", type=Path, help="Path to model output files (HDF5).")

50 parser.add_argument(

51 "-p",

52 "--params",

53 default="./params.yaml",

54 type=Path,

55 help="Path to parameter file",

56 )

57 parser.add_argument(

58 "--plots",

59 default="./plots",

60 type=Path,

61 help="Directory for storing plots",

62 )

63 parser.add_argument(

64 "--metrics",

65 default="./metrics.json",

66 type=Path,

67 help="Path to metrics file",

68 )

70 parser.set_defaults(run_main=main)

73def comp_bic(log_probs: np.ndarray, num_params: int, num_data: int) -> float:

74 r"""Compute the negative one half of the Bayesian Information Criterion (BIC).

76 The BIC is defined as [^1]

77 $$ BIC = k \\ln{n} - 2 \\ln{\\hat{L}} $$

78 where $k$ is the number of parameters ``num_params``, $n$ the number of datapoints

79 ``num_data`` and $\\hat{L}$ the maximum likelihood estimate of the ``log_prob``.

80 It is constructed such that the following is an

81 approximation of the model evidence:

82 $$ p(D \\mid m) \\approx \\exp{\\left( - BIC / 2 \\right)} $$

83 which is why this function returns the negative one half of it.

85 [^1]: https://en.wikipedia.org/wiki/Bayesian_information_criterion

86 """

87 return np.max(log_probs) - num_params * np.log(num_data) / 2.0

90def compute_evidence(

91 temp_schedule: np.ndarray,

92 log_probs: np.ndarray,

93) -> float:

94 """Compute the evidence.

96 Given a ``temp_schedule`` of inverse temperatures and corresponding sets of

97 ``log_probs``, we calculate the mean ``log_prob`` over all samples to approximate

98 the expectation value under the corresponding power posterior for each step in the

99 ``temp_schedule``. The evidence is evaluated using trapezoidal integration of the

100 expectation values over the ``temp_schedule``.

101 """

102 a_mc = np.mean(log_probs, axis=1)

103 return trapezoid(y=a_mc, x=temp_schedule)

104

105

106def compute_ti_results(

107 metrics: dict,

108 params: dict,

109 ndim: int,

110 h5_file: Path,

111 model: Path,

112) -> tuple[np.ndarray, np.ndarray]:

113 """Compute the results in case of a thermodynamic integration run."""

114 temp_schedule = params["sampling"]["temp_schedule"]

115 num_temps = len(temp_schedule)

116

117 if num_temps != len(h5_file["ti"]):

118 raise RuntimeError(

119 f"Parameters suggest temp schedule of length {num_temps}, "

120 f"but stored are {len(h5_file['ti'])}",

121 )

122

123 nwalker = ndim * params["sampling"]["walkers_per_dim"]

124 nsteps = params["sampling"]["nsteps"]

125 ti_log_probs = np.zeros(shape=(num_temps, nsteps * nwalker))

126

127 for i, run in enumerate(h5_file["ti"]):

128 reader = emcee.backends.HDFBackend(model, name=f"ti/{run}", read_only=True)

129 ti_log_probs[i] = reader.get_blobs(flat=True)

130

131 evidence = compute_evidence(temp_schedule, ti_log_probs)

132 metrics["evidence"] = evidence

133

134 return temp_schedule, ti_log_probs

135

136

137def main(args: argparse.Namespace):

138 """Run main script."""

139 metrics = {}

140

141 params = load_yaml_params(args.params)

142 model = None # create_model(params)

143 ndim = len(model.get_params())

144 data = load_patient_data(args.data)

145 h5_file = h5py.File(args.model, mode="r")

146

147 # if TI has been performed, compute the accuracy for every step

148 if "ti" in h5_file:

149 temp_schedule, ti_log_probs = compute_ti_results(

150 metrics=metrics,

151 params=params,

152 ndim=ndim,

153 h5_file=h5_file,

154 model=args.model,

155 )

156 logger.info(

157 "Computed results of thermodynamic integration with "

158 f"{len(temp_schedule)} steps",

159 )

160

161 # store inverse temperatures and log-probs in CSV file

162 args.plots.parent.mkdir(exist_ok=True)

163

164 beta_vs_accuracy = pd.DataFrame(

165 np.array(

166 [

167 temp_schedule,

168 np.mean(ti_log_probs, axis=1),

169 np.std(ti_log_probs, axis=1),

170 ],

171 ).T,

172 columns=["β", "accuracy", "std"],

173 )

174 beta_vs_accuracy.to_csv(args.plots, index=False)

175 logger.info(f"Plotted β vs accuracy at {args.plots}")

176

177 # use blobs, because also for TI, this is the unscaled log-prob

178 backend = emcee.backends.HDFBackend(args.model, read_only=True, name="mcmc")

179 final_log_probs = backend.get_blobs()

180 logger.info(f"Opened samples from emcee backend from {args.model}")

181

182 # store metrics in JSON file

183 args.metrics.parent.mkdir(parents=True, exist_ok=True)

184 args.metrics.touch(exist_ok=True)

185

186 metrics["BIC"] = comp_bic(

187 final_log_probs,

188 ndim,

189 len(data),

190 )

191 metrics["max_llh"] = np.max(final_log_probs)

192 metrics["mean_llh"] = np.mean(final_log_probs)

193

194 with open(args.metrics, mode="w", encoding="utf-8") as metrics_file:

195 json.dump(metrics, metrics_file)

196

197 logger.info(f"Wrote out metrics to {args.metrics}")

198

199

200if __name__ == "__main__":

201 parser = argparse.ArgumentParser(description=__doc__)

202 _add_arguments(parser)

203

204 args = parser.parse_args()

205 args.run_main(args)

+ + + diff --git a/htmlcov/z_5bf5c588c698c6cc_integrate_py.html b/htmlcov/z_5bf5c588c698c6cc_integrate_py.html new file mode 100644 index 0000000..4430407 --- /dev/null +++ b/htmlcov/z_5bf5c588c698c6cc_integrate_py.html @@ -0,0 +1,260 @@ + + + + + Coverage for src/lyscripts/integrate.py: 52% + + + + + +

+ Coverage for src / lyscripts / integrate.py: + 52% +

+ + + Show/hide keyboard shortcuts

+ +

Shortcuts on this page

+ r + m + x + toggle line displays +

+ j + k + next/prev highlighted chunk +

+ 0 (zero) top of page +

+ 1 (one) first highlighted chunk +

+ [ + ] + prev/next file +

+ u up to the index +

+ ? show/hide this help +

+ 46 statements + + + +

+ « prev + ^ index + » next + + coverage.py v7.13.5, + created at 2026-04-08 14:17 +0000 +

+ +

1"""Perform thermodynamic integration to evaluate the model evidence.

3Using the functions provided by the `sample` module, this script implements

4thermodynamic integration (TI) in order to compute the model evidence.

5This is done by sampling the model parameters at different inverse temperatures

6following a specified schedule.

7"""

9from __future__ import annotations

11import os

12from typing import Any

14import emcee

15import h5py

16import numpy as np

17from loguru import logger

18from lydata.utils import ModalityConfig

19from pydantic import Field

21import lyscripts.sample as sample_module # Import the module to set its global MODEL

22from lyscripts.cli import assemble_main

23from lyscripts.configs import (

24 BaseCLI,

25 DataConfig,

26 DistributionConfig,

27 GraphConfig,

28 ModelConfig,

29 SamplingConfig,

30 ScheduleConfig,

31 add_distributions,

32 add_modalities,

33 construct_model,

34)

35from lyscripts.utils import get_hdf5_backend

38def init_ti_sampler(

39 settings: IntegrateCLI,

40 temp_idx: int,

41 ndim: int,

42 inv_temp: float,

43 pool: Any,

44) -> emcee.EnsembleSampler:

45 """Initialize the ``emcee.EnsembleSampler`` for TI with the given ``settings''."""

46 nwalkers = ndim * settings.sampling.walkers_per_dim

47 backend = get_hdf5_backend(

48 file_path=settings.sampling.storage_file,

49 dataset=f"ti/{temp_idx + 1:0>2d}",

50 nwalkers=nwalkers,

51 ndim=ndim,

52 )

53 return emcee.EnsembleSampler(

54 nwalkers=nwalkers,

55 ndim=ndim,

56 log_prob_fn=sample_module.log_prob_fn,

57 kwargs={"inverse_temp": inv_temp},

58 moves=[(emcee.moves.DEMove(), 0.8), (emcee.moves.DESnookerMove(), 0.2)],

59 backend=backend,

60 pool=pool,

61 blobs_dtype=[("log_prob", np.float64)],

62 parameter_names=list(MODEL.get_named_params().keys()),

63 )

66class IntegrateCLI(BaseCLI):

67 """Perform thermodynamic integration to compute the model evidence."""

69 graph: GraphConfig

70 model: ModelConfig = ModelConfig()

71 distributions: dict[str, DistributionConfig] = Field(

72 default={},

73 description=(

74 "Mapping of model T-categories to predefined distributions over "

75 "diagnose times."

76 ),

77 )

78 modalities: dict[str, ModalityConfig] = Field(

79 default={},

80 description=(

81 "Maps names of diagnostic modalities to their specificity/sensitivity."

82 ),

83 )

84 data: DataConfig

85 sampling: SamplingConfig

86 schedule: ScheduleConfig = Field(

87 description="Configuration for generating inverse temperature schedule.",

88 )

90 def cli_cmd(self) -> None:

91 """Start the ``integrate`` subcommand.

93 The model construction and setup is done analogously to the

94 ``sample`` command. Afterwards, an :py:class:`emcee.EnsembleSampler`

95 is initialized (see :py:func:`init_sampler`) and :py:func:`run_sampling`,

96 implemented in the ``sample``module, is executed twice for each TI step:

97 once for the burn-in phase and once for the actual sampling phase.

98 Thereby, the log likelihood is scaled by the respective inverse

99 temperature of that step. All necessary settings for the sampling

100 are passed by the ``sampling``argument, except for the inverse

101 temperatures, which are provided by the ``schedule`` argument.

102 """

103 # as recommended in https://emcee.readthedocs.io/en/stable/tutorials/parallel/#

104 os.environ["OMP_NUM_THREADS"] = "1"

105

106 logger.debug(self.model_dump_json(indent=2))

107

108 # ugly, but necessary for pickling

109 global MODEL

110 MODEL = construct_model(self.model, self.graph)

111 MODEL = add_distributions(MODEL, self.distributions)

112 MODEL = add_modalities(MODEL, self.modalities)

113 MODEL.load_patient_data(**self.data.get_load_kwargs())

114 ndim = MODEL.get_num_dims()

115

116 # set MODEL in the sample module's namespace so log_prob_fn can access it

117 sample_module.MODEL = MODEL

118

119 schedule = self.schedule.get_schedule()

120

121 # emcee does not support numpy's new random number generator yet.

122 np.random.seed(self.sampling.seed) # noqa: NPY002

123

124 with sample_module.get_pool(self.sampling.cores) as pool:

125 for idx, inv_temp in enumerate(schedule):

126 sampler = init_ti_sampler(

127 settings=self,

128 temp_idx=idx,

129 ndim=ndim,

130 inv_temp=inv_temp,

131 pool=pool,

132 )

133

134 sample_module.run_sampling(

135 description=f"Burn-in phase: TI step {idx + 1}/{len(schedule)}",

136 sampler=sampler,

137 num_steps=self.sampling.burnin_steps,

138 check_interval=self.sampling.check_interval,

139 trust_factor=self.sampling.trust_factor,

140 relative_thresh=self.sampling.relative_thresh,

141 history_file=self.sampling.history_file,

142 )

143

144 sample_module.run_sampling(

145 description=f"Sampling phase: TI step {idx + 1}/{len(schedule)}",

146 sampler=sampler,

147 num_steps=self.sampling.num_steps,

148 reset_backend=True,

149 check_interval=self.sampling.num_steps,

150 thin_by=self.sampling.thin_by,

151 )

152 # copy last sampling round over to a group in the HDF5 file called "mcmc"

153 with h5py.File(self.sampling.storage_file, mode="r+") as h5_file:

154 h5_file.copy(

155 f"ti/{len(schedule):0>2d}",

156 h5_file,

157 name=self.sampling.dataset,

158 )

159

160

161if __name__ == "__main__":

162 main = assemble_main(settings_cls=IntegrateCLI, prog_name="integrate")

163 main()

+ + + diff --git a/htmlcov/z_5bf5c588c698c6cc_plots_py.html b/htmlcov/z_5bf5c588c698c6cc_plots_py.html new file mode 100644 index 0000000..957c370 --- /dev/null +++ b/htmlcov/z_5bf5c588c698c6cc_plots_py.html @@ -0,0 +1,508 @@ + + + + + Coverage for src/lyscripts/plots.py: 89% + + + + + +

+ Coverage for src / lyscripts / plots.py: + 89% +

+ + + Show/hide keyboard shortcuts

+ +

Shortcuts on this page

+ r + m + x + toggle line displays +

+ j + k + next/prev highlighted chunk +

+ 0 (zero) top of page +

+ 1 (one) first highlighted chunk +

+ [ + ] + prev/next file +

+ u up to the index +

+ ? show/hide this help +

+ 160 statements + + + +

+ « prev + ^ index + » next + + coverage.py v7.13.5, + created at 2026-04-08 14:17 +0000 +

+ +

1"""Utility functions for the plotting commands."""

3from __future__ import annotations

5from abc import abstractmethod

6from collections.abc import Mapping

7from dataclasses import field

8from itertools import cycle

9from pathlib import Path

10from typing import TYPE_CHECKING, Any, TypeVar

12import h5py

13import matplotlib.pyplot as plt

14import numpy as np

15import scipy as sp

16from numpydantic import NDArray, Shape

17from pydantic import BaseModel

19from lyscripts.decorators import (

20 check_input_file_exists,

21 check_output_dir_exists,

22 log_state,

23)

25if TYPE_CHECKING:

26 from matplotlib.axes._axes import Axes as MPLAxes

27 from matplotlib.figure import Figure

29# define USZ colors

30COLORS = {

31 "blue": "#005ea8",

32 "orange": "#f17900",

33 "green": "#00afa5",

34 "red": "#ae0060",

35 "gray": "#c5d5db",

36}

37COLOR_CYCLE = cycle(COLORS.values())

38CM_PER_INCH = 2.54

41def floor_at_decimal(value: float, decimal: int) -> float:

42 """Compute the floor of ``value`` for the specified ``decimal``.

44 Essentially the distance to the right of the decimal point. May be negative.

45 """

46 power = 10**decimal

47 return np.floor(power * value) / power

50def ceil_at_decimal(value: float, decimal: int) -> float:

51 """Compute the ceiling of ``value`` for the specified ``decimal``.

53 Analog to :py:func:`.floor_at_decimal`, this is the distance to the right of the

54 decimal point. May be negative.

55 """

56 return -floor_at_decimal(-value, decimal)

59def floor_to_step(value: float, step: float) -> float:

60 """Compute next value on ladder of stepsize ``step`` still below ``value``."""

61 return (value // step) * step

64def ceil_to_step(value: float, step: float) -> float:

65 """Compute next value on ladder of stepsize ``step`` still above ``value``."""

66 return floor_to_step(value, step) + step

69def clean_and_check(filename: str | Path) -> Path:

70 """Check if file with ``filename`` exists.

72 If not, raise error, otherwise return cleaned :py:class:`~pathlib.PosixPath`.

73 """

74 filepath = Path(filename)

75 if not filepath.exists():

76 msg = f"File with the name {filename} does not exist at {filepath.resolve()}"

77 raise FileNotFoundError(msg)

78 return filepath

81AbstractDistributionT = TypeVar("AbstractDistributionT", bound="AbstractDistribution")

84class AbstractDistribution(BaseModel):

85 """Abstract class for distributions that should be plotted."""

87 scale: float = 100.0

88 offset: float = 0.0

89 kwargs: dict[str, Any] = field(default_factory=lambda: {})

91 @abstractmethod

92 def draw(self, axes: MPLAxes) -> MPLAxes:

93 """Draw the distribution into the provided ``axes``."""

94 ...

96 @abstractmethod

97 def left_percentile(self, percent: float) -> float:

98 """Compute the point where ``percent`` of the values are to the left."""

99 ...

100

101 @abstractmethod

102 def right_percentile(self, percent: float) -> float:

103 """Compute the point where ``percent`` of the values are to the right."""

104 ...

105

106 def _get_label(self) -> str:

107 """Compute label for when ``kwargs`` does not contain one."""

108

109 @property

110 def label(self) -> str:

111 """Return the label of the histogram."""

112 return self.kwargs.get("label", self._get_label())

113

114

115class Histogram(AbstractDistribution):

116 """Class containing data for plotting a histogram."""

117

118 raw_values: NDArray[Shape["*"], float] # noqa: F722

119

120 @property

121 def values(self) -> np.ndarray:

122 """Return the values of the histogram scaled and offset."""

123 return self.raw_values * self.scale + self.offset

124

125 @classmethod

126 def from_hdf5(

127 cls: type[Histogram],

128 filename: str | Path,

129 dataname: str,

130 scale: float = 100.0,

131 offset: float = 0.0,

132 **kwargs,

133 ) -> Histogram:

134 """Create a histogram from an HDF5 file."""

135 filename = clean_and_check(filename)

136 with h5py.File(filename, mode="r") as h5file:

137 dataset = h5file[dataname]

138 if "label" not in kwargs:

139 kwargs["label"] = get_label(dataset.attrs)

140 return cls(raw_values=dataset[:], scale=scale, offset=offset, kwargs=kwargs)

141

142 def left_percentile(self, percent: float) -> float:

143 """Compute the point where `percent` of the values are to the left."""

144 return np.percentile(self.values, percent)

145

146 def right_percentile(self, percent: float) -> float:

147 """Compute the point where `percent` of the values are to the right."""

148 return np.percentile(self.values, 100.0 - percent)

149

150 def draw(self, axes: MPLAxes, **defaults) -> Any:

151 """Draw the histogram into the provided ``axes``."""

152 xlim = axes.get_xlim()

153

154 hist_kwargs = defaults.get("hist", {}).copy()

155 hist_kwargs.update(self.kwargs)

156

157 if self.label is not None:

158 hist_kwargs["label"] = self.label

159

160 return axes.hist(self.values, range=xlim, **hist_kwargs)

161

162

163class BetaPosterior(AbstractDistribution):

164 """Class for storing plot configs for a Beta posterior."""

165

166 num_success: int

167 num_total: int

168

169 @classmethod

170 def from_hdf5(

171 cls: type[BetaPosterior],

172 filename: str | Path,

173 dataname: str,

174 scale: float = 100.0,

175 offset: float = 0.0,

176 **kwargs,

177 ) -> BetaPosterior:

178 """Initialize data container for Beta posteriors from HDF5 file."""

179 filename = clean_and_check(filename)

180 with h5py.File(filename, mode="r") as h5file:

181 dataset = h5file[dataname]

182 try:

183 num_success = int(dataset.attrs["num_match"])

184 num_total = int(dataset.attrs["num_total"])

185 except KeyError as key_err:

186 raise KeyError(

187 "Dataset does not contain observed prevalence data",

188 ) from key_err

189

190 return cls(

191 num_success=num_success,

192 num_total=num_total,

193 scale=scale,

194 offset=offset,

195 kwargs=kwargs,

196 )

197

198 def _get_label(self) -> str:

199 return f"data: {self.num_success} of {self.num_total}"

200

201 @property

202 def num_fail(self):

203 """Return the number of failures, i.e. the totals minus the successes."""

204 return self.num_total - self.num_success

205

206 def pdf(self, x: np.ndarray) -> np.ndarray:

207 """Compute the probability density function."""

208 return sp.stats.beta.pdf(

209 x,

210 a=self.num_success + 1,

211 b=self.num_fail + 1,

212 loc=self.offset,

213 scale=self.scale,

214 )

215

216 def left_percentile(self, percent: float) -> float:

217 """Return the point where the CDF reaches ``percent``."""

218 return sp.stats.beta.ppf(

219 percent / 100.0,

220 a=self.num_success + 1,

221 b=self.num_fail + 1,

222 scale=self.scale,

223 )

224

225 def right_percentile(self, percent: float) -> float:

226 """Return the point where 100% minus the CDF equals ``percent``."""

227 return sp.stats.beta.ppf(

228 1.0 - (percent / 100.0),

229 a=self.num_success + 1,

230 b=self.num_fail + 1,

231 scale=self.scale,

232 )

233

234 def draw(self, axes: MPLAxes, resolution: int = 300, **defaults) -> Any:

235 """Draw the Beta posterior into the provided ``axes``.

236

237 Returns a handle and a label for the legend.

238 """

239 left, right = axes.get_xlim()

240 x = np.linspace(left, right, resolution)

241 y = self.pdf(x)

242

243 plot_kwargs = defaults.get("plot", {}).copy()

244 plot_kwargs.update(self.kwargs)

245

246 if self.label is not None:

247 plot_kwargs["label"] = self.label

248

249 return axes.plot(x, y, **plot_kwargs)

250

251

252def get_size(width="single", unit="cm", ratio="golden"):

253 """Return a tuple of figure sizes in inches.

254

255 This is provided as the ``matplotlib`` keyword argument ``figsize`` expects it.

256 This figure size is computed from a ``width``, in the ``unit`` of centimeters by

257 default, and a ``ratio`` which is set to the golden ratio by default.

258

259 >>> get_size(width="single", ratio="golden")

260 (3.937007874015748, 2.4332557935820445)

261 >>> get_size(width="full", ratio=2.)

262 (6.299212598425196, 3.149606299212598)

263 >>> get_size(width=10., ratio=1.)

264 (3.937007874015748, 3.937007874015748)

265 >>> get_size(width=5, unit="inches", ratio=2./3.)

266 (5, 7.5)

267 """

268 if width == "single":

269 width = 10

270 elif width == "full":

271 width = 16

272

273 ratio = 1.618 if ratio == "golden" else ratio

274 width = width / CM_PER_INCH if unit == "cm" else width

275 height = width / ratio

276 return (width, height)

277

278

279def get_label(attrs: Mapping) -> str:

280 """Extract label of a histogram from the HDF5 ``attrs`` object of the dataset."""

281 label = []

282 transforms = {

283 "label": str,

284 "modality": str,

285 "t_stage": str,

286 "midline_ext": lambda x: "ext" if x else "noext",

287 }

288 for key, func in transforms.items():

289 if key in attrs and attrs[key] is not None:

290 label.append(func(attrs[key]))

291 return " | ".join(label)

292

293

294def get_xlims(

295 contents: AbstractDistributionT,

296 percent_lims: tuple[float] = (10.0, 10.0),

297) -> tuple[float]:

298 """Get the x-axis limits for a plot containing multiple distribution.

299

300 Compute the ``xlims`` of a plot containing histograms and probability density

301 functions by considering their smallest and largest percentiles.

302 """

303 left_percentiles = np.array(

304 [c.left_percentile(percent_lims[0]) for c in contents],

305 )

306 left_lim = np.min(left_percentiles)

307 right_percentiles = np.array(

308 [c.right_percentile(percent_lims[0]) for c in contents],

309 )

310 right_lim = np.max(right_percentiles)

311 return left_lim, right_lim

312

313

314def draw(

315 axes: MPLAxes,

316 contents: list[AbstractDistribution],

317 percent_lims: tuple[float, float] = (10.0, 10.0),

318 xlims: tuple[float] | None = None,

319 hist_kwargs: dict[str, Any] | None = None,

320 plot_kwargs: dict[str, Any] | None = None,

321) -> MPLAxes:

322 """Draw histograms and Beta posterior from ``contents`` into ``axes``.

323

324 The limits of the x-axis is computed to be the smallest and largest left and right

325 percentile of all provided ``contents`` respectively via the ``percent_lims`` tuple.

326

327 The ``hist_kwargs`` define general settings that will be applied to all histograms.

328 One additional key ``'nbins'`` may be used to adjust only the numbers, not the

329 spacing of the histogram bins.

330 Similarly, ``plot_kwargs`` adjusts the default settings for the Beta posteriors.

331

332 Both these keyword arguments can be overwritten by what the individual ``contents``

333 have defined.

334 """

335 if not all(isinstance(c, AbstractDistribution) for c in contents):

336 raise TypeError("Contents must be subclasses of `AbstractDistribution`")

337

338 xlims = xlims or get_xlims(contents, percent_lims)

339

340 if len(xlims) != 2 or xlims[0] > xlims[-1]:

341 raise ValueError("`xlims` must be tuple of two increasing values")

342

343 axes.set_xlim(*xlims)

344

345 default_kwargs = {

346 "hist": {

347 "density": True,

348 "histtype": "stepfilled",

349 "alpha": 0.7,

350 "bins": 50,

351 },

352 "plot": {},

353 }

354 default_kwargs["hist"].update(hist_kwargs or {})

355 default_kwargs["plot"].update(plot_kwargs or {})

356

357 for content in contents:

358 content.draw(axes, **default_kwargs)

359

360 return axes

361

362

363def split_legends(

364 axes: MPLAxes,

365 titles: list[str],

366 locs: list[tuple[float, float]],

367 **kwargs,

368) -> None:

369 """Separate labels in ``axes`` into separate legends with ``titles`` at ``locs``."""

370 legend_kwargs = {

371 "title_fontsize": "small",

372 "labelspacing": 0.1,

373 "loc": "upper left",

374 }

375 legend_kwargs.update(kwargs)

376

377 handles, labels = axes.get_legend_handles_labels()

378 labels_per_legend = len(labels) // len(titles)

379

380 for i, (title, loc) in enumerate(zip(titles, locs, strict=True)):

381 start = i * labels_per_legend

382 stop = (i + 1) * labels_per_legend if i < len(titles) - 1 else None

383 idx = slice(start, stop)

384

385 legend = axes.legend(

386 handles[idx],

387 labels[idx],

388 bbox_to_anchor=loc,

389 title=title,

390 **legend_kwargs,

391 )

392 axes.add_artist(legend)

393

394

395@log_state()

396@check_input_file_exists

397def use_mpl_stylesheet(file_path: str | Path):

398 """Load a ``.mplstyle`` stylesheet from ``file_path``."""

399 plt.style.use(file_path)

400

401

402@log_state()

403@check_output_dir_exists

404def save_figure(

405 output_path: str | Path,

406 figure: Figure,

407 formats: list[str] | None,

408):

409 """Save a ``figure`` to ``output_path`` in every one of the provided ``formats``."""

410 for frmt in formats:

411 figure.savefig(output_path.with_suffix(f".{frmt}"))

+ + + diff --git a/htmlcov/z_5bf5c588c698c6cc_sample_py.html b/htmlcov/z_5bf5c588c698c6cc_sample_py.html new file mode 100644 index 0000000..c83c955 --- /dev/null +++ b/htmlcov/z_5bf5c588c698c6cc_sample_py.html @@ -0,0 +1,523 @@ + + + + + Coverage for src/lyscripts/sample.py: 91% + + + + + +

+ Coverage for src / lyscripts / sample.py: + 91% +

+ + + Show/hide keyboard shortcuts

+ +

Shortcuts on this page

+ r + m + x + toggle line displays +

+ j + k + next/prev highlighted chunk +

+ 0 (zero) top of page +

+ 1 (one) first highlighted chunk +

+ [ + ] + prev/next file +

+ u up to the index +

+ ? show/hide this help +

+ 137 statements + + + +

+ « prev + ^ index + » next + + coverage.py v7.13.5, + created at 2026-04-08 14:17 +0000 +

+ +

1"""Implementation of flexible MCMC sampling for lymphatic progression models.

3This module provides both helpful functions for programmatically building and running

4sampling pipelines, as well a CLI interface for th most common sampling use cases.

6The core is the :py:func:`run_sampling` function. It has a flexible interface and

7built-in convergence detection, as well as bookkeeping for monitoring and resuming

8interrupted sampling runs. It can be used both during the burn-in phase and the actual

9sampling phase.

11.. warning::

13 We strongly recommend to set the CLI's ``--cores`` argument to ``None`` (or ``null``

14 in the YAML config file) if you are on MacOS or Windows. This is because we haven't

15 yet figured out how we can safely and efficiently use the ``multiprocess(ing)``

16 library on these two platforms.

17"""

19from __future__ import annotations

21import os

22import sys

23from typing import Any

25from loguru import logger

27from lyscripts.cli import assemble_main

29try:

30 import multiprocess as mp

31except ModuleNotFoundError:

32 import multiprocessing as mp

34if sys.platform == "darwin":

35 logger.warning("Detected MacOS. Setting multiprocess(ing) start method to 'fork'.")

36 mp.set_start_method("fork")

38from pathlib import Path

40import emcee

41import numpy as np

42import pandas as pd

43from lydata.utils import ModalityConfig

44from lymph.types import ParamsType

45from pydantic import BaseModel, Field

46from rich.progress import Progress, ProgressColumn, Task, TimeElapsedColumn

47from rich.text import Text

49from lyscripts.configs import (

50 BaseCLI,

51 DataConfig,

52 DistributionConfig,

53 GraphConfig,

54 ModelConfig,

55 SamplingConfig,

56 add_distributions,

57 add_modalities,

58 construct_model,

59)

60from lyscripts.utils import console, get_hdf5_backend

63class CompletedItersColumn(ProgressColumn):

64 """A column that displays the completed number of iterations."""

66 def __init__(self, table_column=None, it: int = 0):

67 """Initialize the column with number of previous iterations."""

68 super().__init__(table_column)

69 self.it = it

71 def render(self, task: Task) -> Text:

72 """Render total iterations."""

73 if task.completed is None:

74 return Text("? it", style="progress.data.steps")

75 return Text(f"{task.completed + self.it} it", style="progress.data.steps")

78class ItersPerSecondColumn(ProgressColumn):

79 """A column that displays the number of iterations per second."""

81 def render(self, task: Task) -> Text:

82 """Render iterations per second."""

83 speed = task.finished_speed or task.speed

84 if speed is None:

85 return Text("? it/s", style="progress.data.speed")

86 return Text(f"{speed:.2f} it/s", style="progress.data.speed")

89class AcorTime(BaseModel, validate_assignment=True):

90 """Storage for old and new autocorrelation times."""

92 old: float

93 new: float

95 def update(self, new: float) -> None:

96 """Update the autocorrelation time."""

97 self.old = self.new

98 self.new = new

100 @property

101 def relative_diff(self) -> float:

102 """Get the relative difference between new and old autocorrelation time."""

103 return np.abs(self.new - self.old) / self.new

104

105

106class NumAccepted(BaseModel, validate_assignment=True):

107 """Storage for old and new number of accepted proposals."""

108

109 old: int

110 new: int

111

112 def update(self, new: int) -> None:

113 """Update the number of accepted proposals."""

114 self.old = self.new

115 self.new = new

116

117 @property

118 def newly_accepted(self) -> int:

119 """Get the number of newly accepted proposals."""

120 return self.new - self.old

123MODEL = None

126def log_prob_fn(theta: ParamsType, inverse_temp: float = 1.0) -> tuple[float, float]:

127 """Compute log-prob using global variables because of pickling.

128

129 An inverse temperature ``inverse_temp`` can be provided for thermodynamic

130 integration.

131 """

132 llh = MODEL.likelihood(given_params=theta)

133 if np.isinf(llh): # to prevent the case of 0 * inf = NaN

134 return -np.inf, -np.inf

135 return inverse_temp * llh, llh

136

137

138def ensure_initial_state(sampler: emcee.EnsembleSampler) -> np.ndarray:

139 """Try to extract a starting state from a ``sampler``.

140

141 Create a random starting state if no one was found.

142 """

143 try:

144 state = sampler.backend.get_last_sample()

145 logger.info(

146 f"Resuming from {sampler.backend.filename} with {sampler.iteration} "

147 "stored iterations.",

148 )

149 except AttributeError:

150 state = np.random.uniform(size=(sampler.nwalkers, sampler.ndim)) # noqa: NPY002

151 logger.debug(f"No stored samples found. Starting from random state {state}.")

152

153 return state

154

155

156def ensure_history_table(file: Path | None) -> pd.DataFrame:

157 """Return the history table from a file or an empty DataFrame.

158

159 It will try to load a history at the given ``file`` location, but with a ``.tmp``

160 extension. This is the expected name and location of a history file that was

161 stored during an interrupted sampling run.

162

163 If no file is found, an empty DataFrame is returned.

164 """

165 if file is None or not file.with_suffix(".tmp").exists():

166 return pd.DataFrame(

167 columns=[

168 "steps",

169 "acor_times",

170 "accept_fracs",

171 "max_log_probs",

172 ],

173 ).set_index("steps")

174

175 return pd.read_csv(file.with_suffix(".tmp"), index_col="steps")

176

177

178def update_history_table(

179 history: pd.DataFrame,

180 history_file: Path | None,

181 iteration: int,

182 acor_time: float,

183 accepted_frac: float,

184 max_log_prob: float,

185) -> pd.DataFrame:

186 """Update the history table with the current iteration's information."""

187 history.loc[iteration] = [acor_time, accepted_frac, max_log_prob]

188 logger.debug(history.iloc[-1].to_dict())

189

190 if history_file is not None:

191 history.to_csv(history_file.with_suffix(".tmp"))

192

193 return history

194

195

196def is_converged(

197 iteration: int,

198 acor_time: AcorTime,

199 trust_factor: float,

200 relative_thresh: float,

201) -> bool:

202 """Check if the chain has converged based on the autocorrelation time.

203

204 The criterion is based on the relative change of the autocorrelation time and

205 whether the autocorrelation extimate can be trusted. Essentially, we only trust

206 the estimate if it is smaller than ``trust_factor`` times the current ``iteration``.

207

208 More details can be found in the `emcee documentation`_.

209

210 .. _emcee documentation: https://emcee.readthedocs.io/en/stable/tutorials/autocorr/

211 """

212 return (

213 acor_time.new * trust_factor < iteration

214 and acor_time.relative_diff < relative_thresh

215 )

216

217

218def _get_columns(it: int = 0) -> list[ProgressColumn]:

219 """Get the default progress columns for the MCMC sampling."""

220 return [

221 *Progress.get_default_columns(),

222 ItersPerSecondColumn(),

223 CompletedItersColumn(it=it),

224 TimeElapsedColumn(),

225 ]

226

227

228def run_sampling(

229 sampler: emcee.EnsembleSampler,

230 initial_state: np.ndarray | None = None,

231 num_steps: int | None = None,

232 thin_by: int = 1,

233 check_interval: int = 100,

234 trust_factor: float = 50.0,

235 relative_thresh: float = 0.05,

236 history_file: Path | None = None,

237 reset_backend: bool = False,

238 description: str = "Burn-in phase",

239) -> None:

240 """Run MCMC sampling.

241

242 This will run the ``sampler`` either for ``num_steps`` steps or - if it set to

243 ``None`` - until convergence. Convergence is determined once within a

244 ``check_interval`` of steps by the :py:func:`is_converged` function. The

245 convergence criterion is based on a trustworthy estimate of the autocorrelation

246 time. This is elaborated in the `emcee documentation`_.

247

248 Some bookkeeping parameters may be stored in a ``history_file``. During sampling,

249 the history is stored in a temporary file with the suffix ``.tmp``. If the sampling

250 is interrupted, the history and the last state of the ``sampler`` can be recovered

251 and the sampling can be continued.

252

253 One may choose to ``reset_backend``, e.g. in case the previous sampling was run

254 until convergence and now one wants to store a length of the converged chain. This

255 may also be thinned by a factor of ``thin_by`` (directly passed to the

256 :py:class:`emcee.EnsembleSampler` class).

257

258 .. _emcee documentation: https://emcee.readthedocs.io/en/stable/tutorials/autocorr/

259 """

260 state = initial_state or ensure_initial_state(sampler)

261 history = ensure_history_table(history_file)

262

263 if reset_backend:

264 logger.debug("Resetting backend of sampler.")

265 sampler.backend.reset(sampler.nwalkers, sampler.ndim)

266

267 acor_time = AcorTime(old=np.inf, new=np.inf)

268 accepted = NumAccepted(old=0, new=sampler.backend.accepted.sum())

269

270 with Progress(*_get_columns(it=sampler.iteration), console=console) as progress:

271 task = progress.add_task(description=description, total=num_steps)

272 while sampler.iteration < (num_steps or np.inf):

273 for state in sampler.sample( # noqa: B007, B020

274 initial_state=state,

275 iterations=check_interval - sampler.iteration % check_interval,

276 thin_by=thin_by,

277 ):

278 progress.update(task, advance=1)

279

280 acor_time.update(new=sampler.get_autocorr_time(tol=0).mean())

281 accepted.update(new=sampler.backend.accepted.sum())

282

283 history = update_history_table(

284 history=history,

285 history_file=history_file,

286 iteration=sampler.iteration,

287 acor_time=acor_time.new,

288 accepted_frac=(

289 accepted.newly_accepted / (check_interval * sampler.nwalkers)

290 ),

291 max_log_prob=np.max(state.log_prob),

292 )

293

294 if num_steps is None and is_converged(

295 iteration=sampler.iteration,

296 acor_time=acor_time,

297 trust_factor=trust_factor,

298 relative_thresh=relative_thresh,

299 ):

300 logger.info(f"Sampling converged after {sampler.iteration} steps.")

301 break

302

303 if history_file is not None:

304 history_file.with_suffix(".tmp").rename(history_file)

305

306

307class DummyPool:

308 """Dummy class to allow for no multiprocessing."""

309

310 def __enter__(self) -> None:

311 """Enter the context manager."""

312 ...

313

314 def __exit__(self, *args) -> None:

315 """Exit the context manager."""

316 ...

317

318

319def get_pool(num_cores: int | None) -> Any | DummyPool: # type: ignore

320 """Get a ``multiprocess(ing)`` pool or ``DummyPool``.

321

322 Returns a ``multiprocess(ing)`` pool with ``num_cores`` cores if ``num_cores`` is

323 not ``None``. Otherwise, a ``DummyPool`` is returned.

324 """

325 return mp.Pool(num_cores) if num_cores is not None else DummyPool()

326

327

328def init_sampler(settings: SampleCLI, ndim: int, pool: Any) -> emcee.EnsembleSampler:

329 """Initialize the ``emcee.EnsembleSampler`` with the given ``settings``."""

330 nwalkers = ndim * settings.sampling.walkers_per_dim

331 backend = get_hdf5_backend(

332 file_path=settings.sampling.storage_file,

333 dataset=settings.sampling.dataset,

334 nwalkers=nwalkers,

335 ndim=ndim,

336 )

337 return emcee.EnsembleSampler(

338 nwalkers=nwalkers,

339 ndim=ndim,

340 log_prob_fn=log_prob_fn,

341 kwargs={"inverse_temp": settings.sampling.inverse_temp},

342 moves=[(emcee.moves.DEMove(), 0.8), (emcee.moves.DESnookerMove(), 0.2)],

343 backend=backend,

344 pool=pool,

345 blobs_dtype=[("log_prob", np.float64)],

346 parameter_names=list(MODEL.get_named_params().keys()),

347 )

348

349

350class SampleCLI(BaseCLI):

351 """Use MCMC to infer distributions over model parameters from data."""

352

353 graph: GraphConfig

354 model: ModelConfig = ModelConfig()

355 distributions: dict[str, DistributionConfig] = Field(

356 default={},

357 description=(

358 "Mapping of model T-categories to predefined distributions over "

359 "diagnose times."

360 ),

361 )

362 modalities: dict[str, ModalityConfig] = Field(

363 default={},

364 description=(

365 "Maps names of diagnostic modalities to their specificity/sensitivity."

366 ),

367 )

368 data: DataConfig

369 sampling: SamplingConfig

370

371 def cli_cmd(self) -> None:

372 """Start the ``sample`` subcommand.

373

374 First, it will construct the model from the ``graph`` and ``model`` arguments.

375 Then, it will add distributions over diagnose times via the dictionary from

376 the ``distributions`` argument. It will also set sensitivity and specificity of

377 diagnostic modalities via the dictionary provided through the ``modalities``

378 argument. Finally, it will load the patient data as specified via the ``data``

379 argument.

380

381 When the model is constructed, an :py:class:`emcee.EnsembleSampler` is

382 initialized (see :py:func:`init_sampler`) and :py:func:`run_sampling` is

383 executed twice: once for the burn-in phase and once for the actual sampling

384 phase. The ``sampling`` argument provides all necessary settings for the

385 sampling.

386 """

387 # as recommended in https://emcee.readthedocs.io/en/stable/tutorials/parallel/#

388 os.environ["OMP_NUM_THREADS"] = "1"

389

390 logger.debug(self.model_dump_json(indent=2))

391

392 # ugly, but necessary for pickling

393 global MODEL

394 MODEL = construct_model(self.model, self.graph)

395 MODEL = add_distributions(MODEL, self.distributions)

396 MODEL = add_modalities(MODEL, self.modalities)

397 MODEL.load_patient_data(**self.data.get_load_kwargs())

398 ndim = MODEL.get_num_dims()

399

400 # emcee does not support numpy's new random number generator yet.

401 np.random.seed(self.sampling.seed) # noqa: NPY002

402

403 with get_pool(self.sampling.cores) as pool:

404 sampler = init_sampler(settings=self, ndim=ndim, pool=pool)

405 run_sampling(

406 description="Burn-in phase",

407 sampler=sampler,

408 num_steps=self.sampling.burnin_steps,

409 check_interval=self.sampling.check_interval,

410 trust_factor=self.sampling.trust_factor,

411 relative_thresh=self.sampling.relative_thresh,

412 history_file=self.sampling.history_file,

413 )

414 run_sampling(

415 description="Sampling phase",

416 sampler=sampler,

417 num_steps=self.sampling.num_steps,

418 check_interval=self.sampling.num_steps,

419 reset_backend=True,

420 thin_by=self.sampling.thin_by,

421 )

422

423

424if __name__ == "__main__":

425 main = assemble_main(settings_cls=SampleCLI, prog_name="sample")

426 main()

+ + + diff --git a/htmlcov/z_5bf5c588c698c6cc_schedule_py.html b/htmlcov/z_5bf5c588c698c6cc_schedule_py.html new file mode 100644 index 0000000..9e62d82 --- /dev/null +++ b/htmlcov/z_5bf5c588c698c6cc_schedule_py.html @@ -0,0 +1,130 @@ + + + + + Coverage for src/lyscripts/schedule.py: 55% + + + + + +

+ Coverage for src / lyscripts / schedule.py: + 55% +

+ + + Show/hide keyboard shortcuts

+ +

Shortcuts on this page

+ r + m + x + toggle line displays +

+ j + k + next/prev highlighted chunk +

+ 0 (zero) top of page +

+ 1 (one) first highlighted chunk +

+ [ + ] + prev/next file +

+ u up to the index +

+ ? show/hide this help +

+ 11 statements + + + +

+ « prev + ^ index + » next + + coverage.py v7.13.5, + created at 2026-04-08 14:17 +0000 +

+ +

1r"""Generate inverse temperature schedules for thermodynamic integration.

3Thermodynamic integration is quite sensitive to the specific schedule which is used.

4I noticed in my models, that within the interval :math:`[0, 0.1]`, the increase in the

5expected log-likelihood is very steep. Hence, the inverse temperature :math:`\beta`

6must be more densely spaced in the beginning.

8This can be achieved by using a power sequence: Generate :math:`n` linearly spaced

9points in the interval :math:`[0, 1]` and then transform each point by computing

10:math:`\beta_i^k` where :math:`k` could e.g. be 5.

11"""

13from loguru import logger

15from lyscripts.cli import assemble_main

16from lyscripts.configs import BaseCLI, ScheduleConfig

19class ScheduleCLI(ScheduleConfig, BaseCLI):

20 """Generate an inverse temperature schedule for thermodynamic integration."""

22 def cli_cmd(self) -> None:

23 """Start the ``schedule`` command."""

24 logger.debug(self.model_dump_json(indent=2))

26 for inv_temp in self.get_schedule():

27 # print is necessary to allow piping the output

28 print(inv_temp) # noqa: T201

31if __name__ == "__main__":

32 main = assemble_main(settings_cls=ScheduleCLI, prog_name="schedule")

33 main()

+ + + diff --git a/htmlcov/z_5bf5c588c698c6cc_schema_py.html b/htmlcov/z_5bf5c588c698c6cc_schema_py.html new file mode 100644 index 0000000..045f18e --- /dev/null +++ b/htmlcov/z_5bf5c588c698c6cc_schema_py.html @@ -0,0 +1,163 @@ + + + + + Coverage for src/lyscripts/schema.py: 86% + + + + + +

+ Coverage for src / lyscripts / schema.py: + 86% +

+ + + Show/hide keyboard shortcuts

+ +

Shortcuts on this page

+ r + m + x + toggle line displays +

+ j + k + next/prev highlighted chunk +

+ 0 (zero) top of page +

+ 1 (one) first highlighted chunk +

+ [ + ] + prev/next file +

+ u up to the index +

+ ? show/hide this help +

+ 22 statements + + + +

+ « prev + ^ index + » next + + coverage.py v7.13.5, + created at 2026-04-08 14:17 +0000 +

+ +

1"""A fusion of all :py:mod:`configs`, allowing the creation of a JSON schema.

3This command is not intended to be used by the end user. Rather, it exists such that

4the developers and maintainers can create a JSON schema from all the defined

5:py:mod:`configs` an store that in the `source code repository`_. Subsequently, the

6end user can point their IDE to this schema, hosted on GitHub to provide them with

7auto-completion and validation of their YAML configuration files that they feed into

8the lyscripts CLIs when they build pipelines or scripts with it.

10The `URL for the schema`_ can for example be used in the settings of VS Code like this:

12.. code:: json

14 {

15 "yaml.schemas": {

16 "https://raw.githubusercontent.com/lycosystem/lyscripts/main/schemas/ly.json": "*.ly.yaml"

17 },

18 }

20Which would enable auto-completion and validation for all files with the extension

21``.ly.yaml`` in the workspace.

23.. _source code repository: https://github.com/lycosystem/lyscripts

24.. _URL for the schema: https://raw.githubusercontent.com/lycosystem/lyscripts/main/schemas/ly.json

25""" # noqa: E501

27import json

29from lydata.utils import ModalityConfig

30from pydantic import BaseModel, Field

32from lyscripts import configs

35class SchemaSettings(BaseModel):

36 """Settings for generating a JSON schema for lyscripts configuration files."""

38 version: int = Field(

39 description=(

40 "For future compatibility reasons, every config file must have a "

41 "`version: 1` field at the top level."

42 ),

43 ge=1,

44 le=1,

45 )

46 cross_validation: configs.CrossValidationConfig = None

47 data: configs.DataConfig = None

48 diagnosis: configs.DiagnosisConfig = None

49 distributions: dict[str, configs.DistributionConfig] = {}

50 graph: configs.GraphConfig = None

51 involvement: configs.InvolvementConfig = None

52 modalities: dict[str, ModalityConfig] = {}

53 model: configs.ModelConfig = None

54 sampling: configs.SamplingConfig = None

55 scenarios: list[configs.ScenarioConfig] = []

56 schedule: configs.ScheduleConfig = None

59def main() -> None:

60 """Generate a JSON schema for lyscripts configuration files."""

61 schema = SchemaSettings.model_json_schema()

62 print(json.dumps(schema, indent=2)) # noqa: T201

65if __name__ == "__main__":

66 main()

+ + + diff --git a/htmlcov/z_5bf5c588c698c6cc_utils_py.html b/htmlcov/z_5bf5c588c698c6cc_utils_py.html new file mode 100644 index 0000000..5b5e176 --- /dev/null +++ b/htmlcov/z_5bf5c588c698c6cc_utils_py.html @@ -0,0 +1,296 @@ + + + + + Coverage for src/lyscripts/utils.py: 94% + + + + + +

+ Coverage for src / lyscripts / utils.py: + 94% +

+ + + Show/hide keyboard shortcuts

+ +

Shortcuts on this page

+ r + m + x + toggle line displays +

+ j + k + next/prev highlighted chunk +

+ 0 (zero) top of page +

+ 1 (one) first highlighted chunk +

+ [ + ] + prev/next file +

+ u up to the index +

+ ? show/hide this help +

+ 84 statements + + + +

+ « prev + ^ index + » next + + coverage.py v7.13.5, + created at 2026-04-08 14:17 +0000 +

+ +

1"""General utility functions for the lyscripts package."""

3from pathlib import Path

5import numpy as np

6import pandas as pd

7import yaml

8from emcee.backends import HDFBackend

9from loguru import logger

10from rich.console import Console

11from scipy.special import factorial

13from lyscripts.decorators import (

14 check_input_file_exists,

15 check_output_dir_exists,

16)

18console = Console()

21def binom_pmf(support: list[int] | np.ndarray, p: float = 0.5):

22 """Binomial PMF that is much faster than the one from scipy."""

23 max_time = len(support) - 1

24 if p > 1.0 or p < 0.0:

25 raise ValueError("Binomial prob must be btw. 0 and 1")

26 q = 1.0 - p

27 binom_coeff = factorial(max_time) / (

28 factorial(support) * factorial(max_time - support)

29 )

30 return binom_coeff * p**support * q ** (max_time - support)

33def get_dict_depth(nested: dict) -> int:

34 """Get the depth of a nested dictionary.

36 >>> get_dict_depth({"a": {"b": 1}})

37 2

38 >>> varying_depth = {"a": {"b": 1}, "c": {"d": {"e": 2}}}

39 >>> get_dict_depth(varying_depth)

40 3

41 """

42 if not isinstance(nested, dict):

43 return 0

45 max_depth = None

46 for _, value in nested.items():

47 value_depth = get_dict_depth(value)

48 max_depth = max(max_depth or value_depth, value_depth)

50 return 1 + (max_depth or 0)

53def delete_private_keys(nested: dict) -> dict:

54 """Delete private keys from a nested dictionary.

56 A 'private' key is a key whose name starts with an underscore. For example:

58 >>> delete_private_keys({"patient": {"__doc__": "some patient info", "age": 61}})

59 {'patient': {'age': 61}}

60 >>> delete_private_keys({"patient": {"age": 61}})

61 {'patient': {'age': 61}}

62 """

63 cleaned = {}

65 if isinstance(nested, dict):

66 for key, value in nested.items():

67 if not (isinstance(key, str) and key.startswith("_")):

68 cleaned[key] = delete_private_keys(value)

69 else:

70 cleaned = nested

72 return cleaned

75def flatten(

76 nested: dict,

77 prev_key: tuple = (),

78 max_depth: int | None = None,

79) -> dict:

80 """Flatten ``nested`` dict by creating key tuples for each value at ``max_depth``.

82 >>> nested = {"tumor": {"1": {"t_stage": 1, "size": 12.3}}}

83 >>> flatten(nested)

84 {('tumor', '1', 't_stage'): 1, ('tumor', '1', 'size'): 12.3}

85 >>> mapping = {"patient": {"#": {"age": {"func": int, "columns": ["age"]}}}}

86 >>> flatten(mapping, max_depth=3)

87 {('patient', '#', 'age'): {'func': <class 'int'>, 'columns': ['age']}}

89 Note that flattening an already flat dictionary will yield some weird results.

90 """

91 result = {}

93 for key, value in nested.items():

94 is_dict = isinstance(value, dict)

95 has_reached_max_depth = max_depth is not None and len(prev_key) >= max_depth - 1

97 if is_dict and not has_reached_max_depth:

98 result.update(flatten(value, (*prev_key, key), max_depth))

99 else:

100 result[(*prev_key, key)] = value

101

102 return result

103

104

105def unflatten(flat: dict) -> dict:

106 """Take a flat dictionary with tuples of keys and create nested dict from it.

107

108 >>> flat = {('tumor', '1', 't_stage'): 1, ('tumor', '1', 'size'): 12.3}

109 >>> unflatten(flat)

110 {'tumor': {'1': {'t_stage': 1, 'size': 12.3}}}

111 >>> mapping = {('patient', '#', 'age'): {'func': int, 'columns': ['age']}}

112 >>> unflatten(mapping)

113 {'patient': {'#': {'age': {'func': <class 'int'>, 'columns': ['age']}}}}

114 """

115 result = {}

116

117 for keys, value in flat.items():

118 current = result

119 for key in keys[:-1]:

120 current = current.setdefault(key, {})

121

122 current[keys[-1]] = value

123

124 return result

125

126

127def get_modalities_subset(

128 defined_modalities: dict[str, list[float]],

129 selection: list[str],

130) -> dict[str, list[float]]:

131 """Of the ``defined_modalities`` return only those mentioned in the ``selection``.

132

133 >>> modalities = {"CT": [0.76, 0.81], "MRI": [0.63, 0.86]}

134 >>> get_modalities_subset(modalities, ["CT"])

135 {'CT': [0.76, 0.81]}

136 """

137 selected_modalities = {}

138 for mod in selection:

139 try:

140 selected_modalities[mod] = defined_modalities[mod]

141 except KeyError as key_err:

142 raise KeyError(f"Modality {mod} has not been defined yet") from key_err

143 return selected_modalities

144

145

146def load_patient_data(

147 file_path: Path,

148 **read_csv_kwargs: dict,

149) -> pd.DataFrame:

150 """Load patient data from a CSV file stored at ``file``."""

151 if "header" not in read_csv_kwargs:

152 read_csv_kwargs["header"] = [0, 1, 2]

153

154 data = pd.read_csv(file_path, **read_csv_kwargs)

155 logger.info(f"Loaded {len(data)} patient records from {file_path}")

156 return data

157

158

159@check_input_file_exists

160def load_yaml_params(file_path: Path) -> dict:

161 """Load parameters from a YAML ``file``."""

162 with open(file_path, encoding="utf-8") as file:

163 loaded_params = yaml.safe_load(file)

164 logger.info(f"Loaded YAML parameters from {file_path}")

165 return loaded_params

166

167

168@check_input_file_exists

169def load_model_samples(

170 file_path: Path,

171 name: str = "mcmc",

172 flat: bool = True,

173 discard: int = 0,

174 thin: int = 1,

175) -> np.ndarray:

176 """Load MCMC samples stored in HDF5 file at ``file_path`` under a key ``name``."""

177 backend = HDFBackend(file_path, name=name, read_only=True)

178 samples = backend.get_chain(flat=flat, discard=discard, thin=thin)

179 logger.info(f"Loaded samples with shape {samples.shape} from {file_path}")

180 return samples

181

182

183@check_output_dir_exists

184def get_hdf5_backend(

185 file_path: Path,

186 dataset: str = "mcmc",

187 nwalkers: int | None = None,

188 ndim: int | None = None,

189 reset: bool = False,

190) -> HDFBackend:

191 """Open an HDF5 file at ``file_path`` and return a backend."""

192 backend = HDFBackend(file_path, name=dataset)

193 logger.info(f"Opened HDF5 file at {file_path}")

194

195 if reset:

196 logger.info(f"Resetting backend at {file_path} to {nwalkers=} and {ndim=}")

197 backend.reset(nwalkers, ndim)

198

199 return backend

+ + + diff --git a/htmlcov/z_9b7bcb970ba14d6a___init___py.html b/htmlcov/z_9b7bcb970ba14d6a___init___py.html new file mode 100644 index 0000000..e035c63 --- /dev/null +++ b/htmlcov/z_9b7bcb970ba14d6a___init___py.html @@ -0,0 +1,150 @@ + + + + + Coverage for src/lyscripts/data/__init__.py: 83% + + + + + +

+ Coverage for src / lyscripts / data / init.py: + 83% +

+ + + Show/hide keyboard shortcuts

+ +

Shortcuts on this page

+ r + m + x + toggle line displays +

+ j + k + next/prev highlighted chunk +

+ 0 (zero) top of page +

+ 1 (one) first highlighted chunk +

+ [ + ] + prev/next file +

+ u up to the index +

+ ? show/hide this help +

+ 6 statements + + + +

+ « prev + ^ index + » next + + coverage.py v7.13.5, + created at 2026-04-08 14:17 +0000 +

+ +

1"""Commands and functions for managing CSV data on patterns of lymphatic progression.

3This contains helpful CLI commands that allow building quick and reproducible workflows

4even when using language-agnostic tools like `Make`_ or `DVC`_.

6Most of these commands can load `LyProX`_ style data from CSV files, but also from

7the installed datasets provided by the `lydata`_ package and directly from the

8associated `GitHub repository`_.

10Another cool feature is the built-in mini web application that allows collecting nodal

11involvement data interactively and in the same standardized format as we have published

12in the past, both on `LyProX`_ and in our `GitHub repository`_. It can be launched by

13running `lyscripts data collect` in the terminal. See the docs for the

14:py:mod:`lyscripts.data.collect` submodule on more information.

16.. _Make: https://www.gnu.org/software/make/

17.. _DVC: https://dvc.org

18.. _LyProX: https://lyprox.org

19.. _lydata: https://lydata.readthedocs.io

20.. _GitHub repository: https://github.com/lycosystem/lydata

21"""

23from pydantic_settings import BaseSettings, CliApp, CliSubCommand

25from lyscripts.data import ( # noqa: F401

26 collect,

27 enhance,

28 fetch,

29 generate,

30 join,

31 lyproxify,

32 split,

33)

35# Avoid conflict with built-in `filter` function

36from lyscripts.data import filter as filter_

39class DataCLI(BaseSettings):

40 """Work with lymphatic progression data through this CLI."""

42 collect: CliSubCommand[collect.CollectorCLI]

43 lyproxify: CliSubCommand[lyproxify.LyproxifyCLI]

44 join: CliSubCommand[join.JoinCLI]

45 split: CliSubCommand[split.SplitCLI]

46 fetch: CliSubCommand[fetch.FetchCLI]

47 filter: CliSubCommand[filter_.FilterCLI]

48 enhance: CliSubCommand[enhance.EnhanceCLI]

49 generate: CliSubCommand[generate.GenerateCLI]

51 def cli_cmd(self) -> None:

52 """Run one of the ``data`` subcommands."""

53 CliApp.run_subcommand(self)

+ + + diff --git a/htmlcov/z_9b7bcb970ba14d6a___main___py.html b/htmlcov/z_9b7bcb970ba14d6a___main___py.html new file mode 100644 index 0000000..6590714 --- /dev/null +++ b/htmlcov/z_9b7bcb970ba14d6a___main___py.html @@ -0,0 +1,133 @@ + + + + + Coverage for src/lyscripts/data/__main__.py: 0% + + + + + +

+ Coverage for src / lyscripts / data / main.py: + 0% +

+ + + Show/hide keyboard shortcuts

+ +

Shortcuts on this page

+ r + m + x + toggle line displays +

+ j + k + next/prev highlighted chunk +

+ 0 (zero) top of page +

+ 1 (one) first highlighted chunk +

+ [ + ] + prev/next file +

+ u up to the index +

+ ? show/hide this help +

+ 18 statements + + + +

+ « prev + ^ index + » next + + coverage.py v7.13.5, + created at 2026-04-08 14:17 +0000 +

+ +

1"""Run the data module as a script."""

3import argparse

5from lyscripts import exit_cli

6from lyscripts.cli import RichDefaultHelpFormatter

7from lyscripts.data import enhance, generate, join, split

9# Avoid conflict with built-in `filter` function

10from lyscripts.data import filter as filter_

13def main(args: argparse.Namespace):

14 """Run the main script."""

15 parser = argparse.ArgumentParser(

16 prog="lyscripts data",

17 description=__doc__,

18 formatter_class=RichDefaultHelpFormatter,

19 )

20 parser.set_defaults(run_main=exit_cli)

21 subparsers = parser.add_subparsers()

23 # the individual scripts add `ArgumentParser` instances and their arguments to

24 # this `subparsers` object

25 enhance._add_parser(subparsers, help_formatter=parser.formatter_class)

26 generate._add_parser(subparsers, help_formatter=parser.formatter_class)

27 join._add_parser(subparsers, help_formatter=parser.formatter_class)

28 split._add_parser(subparsers, help_formatter=parser.formatter_class)

29 filter_._add_parser(subparsers, help_formatter=parser.formatter_class)

31 args = parser.parse_args()

32 args.run_main(args, parser)

35if __name__ == "__main__":

36 main()

+ + + diff --git a/htmlcov/z_9b7bcb970ba14d6a_enhance_py.html b/htmlcov/z_9b7bcb970ba14d6a_enhance_py.html new file mode 100644 index 0000000..ce4fdd3 --- /dev/null +++ b/htmlcov/z_9b7bcb970ba14d6a_enhance_py.html @@ -0,0 +1,150 @@ + + + + + Coverage for src/lyscripts/data/enhance.py: 68% + + + + + +

+ Coverage for src / lyscripts / data / enhance.py: + 68% +

+ + + Show/hide keyboard shortcuts

+ +

Shortcuts on this page

+ r + m + x + toggle line displays +

+ j + k + next/prev highlighted chunk +

+ 0 (zero) top of page +

+ 1 (one) first highlighted chunk +

+ [ + ] + prev/next file +

+ u up to the index +

+ ? show/hide this help +

+ 19 statements + + + +

+ « prev + ^ index + » next + + coverage.py v7.13.5, + created at 2026-04-08 14:17 +0000 +

+ +

1"""Enhance the dataset by inferring additional columns from the data.

3This is a command-line interface to the methods

4:py:meth:`~lydata.accessor.LyDataAccessor.combine` and

5:py:meth:`~lydata.accessor.LyDataAccessor.augment` of the

6:py:class:`~lydata.accessor.LyDataAccessor` class.

7"""

9from typing import Literal

11from loguru import logger

12from lydata.accessor import LyDataFrame

13from lydata.utils import ModalityConfig

15from lyscripts.cli import assemble_main

16from lyscripts.configs import BaseCLI, DataConfig

17from lyscripts.data.utils import save_table_to_csv

20class EnhanceCLI(BaseCLI):

21 """Enhance the dataset by inferring additional columns from the data."""

23 input: DataConfig

24 modalities: dict[str, ModalityConfig] | None = None

25 method: Literal["max_llh", "rank"] = "max_llh"

26 lnl_subdivisions: dict[str, list[str]] = {

27 "I": ["a", "b"],

28 "II": ["a", "b"],

29 "V": ["a", "b"],

30 }

31 output_file: str

33 def cli_cmd(self) -> None:

34 """Infer additional columns from the data and save the enhanced dataset.

36 This basically provides a CLI to the

37 :py:func:`~lydata.accessor.LyDataAccessor.augment` function. See its docs for

38 more details on what exactly is happening here.

39 """

40 logger.debug(self.model_dump_json(indent=2))

42 data: LyDataFrame = self.input.load()

43 data = data.ly.enhance(

44 modalities=self.modalities,

45 method=self.method,

46 subdivisions=self.lnl_subdivisions,

47 )

48 save_table_to_csv(file_path=self.output_file, table=data)

51if __name__ == "__main__":

52 main = assemble_main(settings_cls=EnhanceCLI, prog_name="enhance")

53 main()

+ + + diff --git a/htmlcov/z_9b7bcb970ba14d6a_fetch_py.html b/htmlcov/z_9b7bcb970ba14d6a_fetch_py.html new file mode 100644 index 0000000..97927cf --- /dev/null +++ b/htmlcov/z_9b7bcb970ba14d6a_fetch_py.html @@ -0,0 +1,154 @@ + + + + + Coverage for src/lyscripts/data/fetch.py: 67% + + + + + +

+ Coverage for src / lyscripts / data / fetch.py: + 67% +

+ + + Show/hide keyboard shortcuts

+ +

Shortcuts on this page

+ r + m + x + toggle line displays +

+ j + k + next/prev highlighted chunk +

+ 0 (zero) top of page +

+ 1 (one) first highlighted chunk +

+ [ + ] + prev/next file +

+ u up to the index +

+ ? show/hide this help +

+ 21 statements + + + +

+ « prev + ^ index + » next + + coverage.py v7.13.5, + created at 2026-04-08 14:17 +0000 +

+ +

1"""Small command to fetch the data from a remote using the lydata package."""

3from pathlib import Path

5import lydata # noqa: F401

6from loguru import logger

7from lydata.loader import LyDataset

8from pydantic import Field

10from lyscripts.cli import assemble_main

11from lyscripts.configs import BaseCLI

14class FetchCLI(LyDataset, BaseCLI):

15 """Fetch a specific dataset from the lyDATA repository."""

17 github_token: str | None = Field(

18 default=None,

19 description=(

20 "GitHub token to access private datasets. Can also be provided as "

21 "`GITHUB_TOKEN` environment variable."

22 ),

23 )

24 github_user: str | None = Field(

25 default=None,

26 description=(

27 "GitHub user for non-token login. Can also be provided as "

28 "`GITHUB_USER` environment variable."

29 ),

30 )

31 github_password: str | None = Field(

32 default=None,

33 description=(

34 "GitHub password for non-token login. Can also be provided as "

35 "`GITHUB_PASSWORD` environment variable."

36 ),

37 )

38 output_file: Path = Field(description="The path to save the dataset to.")

40 def cli_cmd(self):

41 """Execute the ``fetch`` command."""

42 logger.enable("lydata")

43 logger.debug(self.model_dump_json(indent=2))

45 dataset = self.get_dataframe(

46 use_github=True,

47 token=self.github_token,

48 user=self.github_user,

49 password=self.github_password,

50 )

51 dataset.to_csv(self.output_file, index=False)

52 logger.success(f"Fetched dataset and saved to {self.output_file}")

55if __name__ == "__main__":

56 main = assemble_main(settings_cls=FetchCLI, prog_name="fetch")

57 main()

+ + + diff --git a/htmlcov/z_9b7bcb970ba14d6a_filter_py.html b/htmlcov/z_9b7bcb970ba14d6a_filter_py.html new file mode 100644 index 0000000..e28ec32 --- /dev/null +++ b/htmlcov/z_9b7bcb970ba14d6a_filter_py.html @@ -0,0 +1,196 @@ + + + + + Coverage for src/lyscripts/data/filter.py: 38% + + + + + +

+ Coverage for src / lyscripts / data / filter.py: + 38% +

+ + + Show/hide keyboard shortcuts

+ +

Shortcuts on this page

+ r + m + x + toggle line displays +

+ j + k + next/prev highlighted chunk +

+ 0 (zero) top of page +

+ 1 (one) first highlighted chunk +

+ [ + ] + prev/next file +

+ u up to the index +

+ ? show/hide this help +

+ 48 statements + + + +

+ « prev + ^ index + » next + + coverage.py v7.13.5, + created at 2026-04-08 14:17 +0000 +

+ +

1"""Filter a dataset according to some common criteria.

3This is essentially a command line interface to building a

4:py:class:`query object <lydata.querier.Q>` and applying it to the dataset.

5"""

7from pathlib import Path

8from typing import Literal

10from loguru import logger

11from lydata import Q

12from pydantic import Field

13from pydantic_settings import CliImplicitFlag

15from lyscripts.cli import assemble_main

16from lyscripts.configs import BaseCLI, DataConfig

17from lyscripts.data.utils import save_table_to_csv

20class FilterCLI(BaseCLI):

21 """In- or exclude patients where a certain column fulfills a certain condition."""

23 input: DataConfig

24 include: CliImplicitFlag[bool] = Field(

25 False,

26 description="Include patients where the condition is met (default: exclude).",

27 )

28 column: list[str] | str = Field(

29 description=(

30 "The column to filter by. May be a tuple of three strings, since data "

31 "has a three-level header. If it is only one string, the lydata package "

32 "tries to map that to a three-level header."

33 ),

34 )

35 operator: Literal["==", "!=", ">", "<", ">=", "<=", "in", "contains"] = Field(

36 description="The operator to use for comparison.",

37 )

38 value: float | int | str = Field(description="The value to compare against.")

39 output_file: Path = Field(description="The path to save the filtered dataset to.")

41 def model_post_init(self, __context):

42 """Cast to ``float``, if not possible ``int``, if not possible ``str``."""

43 if isinstance(self.column, list):

44 if len(self.column) == 1:

45 self.column = self.column[0]

46 elif len(self.column) == 3:

47 self.column = tuple(self.column)

48 else:

49 raise ValueError(

50 "The column attribute must be an iterable of three strings or a "

51 f"single string, but it is {self.column}.",

52 )

54 try:

55 self.value = float(self.value)

56 return super().model_post_init(__context)

57 except ValueError:

58 pass

60 try:

61 self.value = int(self.value)

62 return super().model_post_init(__context)

63 except ValueError:

64 pass

66 return super().model_post_init(__context)

68 def cli_cmd(self):

69 """Execute the ``filter`` command.

71 This command uses the :py:class:`~lydata.querier.Q` objects of the `lydata`_

72 library to filter the dataset according to the given criteria.

74 .. _lydata: https://lydata.readthedocs.io

75 """

76 logger.debug(self.model_dump_json(indent=2))

78 data = self.input.load()

79 query = Q(

80 column=self.column,

81 operator=self.operator,

82 value=self.value,

83 )

84 logger.debug(f"Created query object: {query}")

85 mask = query.execute(data)

87 if self.include:

88 filtered = data[mask]

89 logger.info(f"Keeping {sum(mask)} of {len(data)} patients.")

90 else:

91 filtered = data[~mask]

92 logger.info(f"Excluding {sum(mask)} of {len(data)} patients.")

94 save_table_to_csv(file_path=self.output_file, table=filtered)

97if __name__ == "__main__":

98 main = assemble_main(settings_cls=FilterCLI, prog_name="filter")

99 main()

+ + + diff --git a/htmlcov/z_9b7bcb970ba14d6a_generate_py.html b/htmlcov/z_9b7bcb970ba14d6a_generate_py.html new file mode 100644 index 0000000..f2a6778 --- /dev/null +++ b/htmlcov/z_9b7bcb970ba14d6a_generate_py.html @@ -0,0 +1,193 @@ + + + + + Coverage for src/lyscripts/data/generate.py: 89% + + + + + +

+ Coverage for src / lyscripts / data / generate.py: + 89% +

+ + + Show/hide keyboard shortcuts

+ +

Shortcuts on this page

+ r + m + x + toggle line displays +

+ j + k + next/prev highlighted chunk +

+ 0 (zero) top of page +

+ 1 (one) first highlighted chunk +

+ [ + ] + prev/next file +

+ u up to the index +

+ ? show/hide this help +

+ 35 statements + + + +

+ « prev + ^ index + » next + + coverage.py v7.13.5, + created at 2026-04-08 14:17 +0000 +

+ +

1"""Script to generate a synthetic dataset.

3The generation is done by the :py:meth:`~lymph.models.Unilateral.draw_patients` method

4of

5the `lymph`_ package, which is why this requires the specification of a model

6via the :py:class:`~lyscripts.configs.ModelConfig` class.

8.. _lymph: https://lymph-model.readthedocs.io/

9"""

11import numpy as np

12from loguru import logger

13from lydata.utils import ModalityConfig

14from pydantic import Field

16from lyscripts.cli import assemble_main

17from lyscripts.configs import (

18 BaseCLI,

19 DistributionConfig,

20 GraphConfig,

21 ModelConfig,

22 add_distributions,

23 add_modalities,

24 construct_model,

25)

26from lyscripts.data.utils import save_table_to_csv

29class GenerateCLI(BaseCLI):

30 """Settings for the command-line interface."""

32 graph: GraphConfig

33 model: ModelConfig = ModelConfig()

34 distributions: dict[str, DistributionConfig] = Field(

35 default={},

36 description=(

37 "Mapping of model T-categories to predefined distributions over "

38 "diagnose times."

39 ),

40 )

41 t_stages_dist: dict[str, float] = Field(

42 description=(

43 "Specify what fraction of generated patients should come from the "

44 "respective T-Stage."

45 ),

46 )

47 modalities: dict[str, ModalityConfig]

48 params: dict[str, float]

49 num_patients: int = 200

50 output_file: str

51 seed: int = 42

53 def model_post_init(self, __context) -> None:

54 """Make sure distribution over T-stages is normalized."""

55 total = 0.0

56 for t_stage in self.distributions:

57 if t_stage not in self.t_stages_dist:

58 raise ValueError(f"Missing distribution for T-stage {t_stage}.")

60 total += self.t_stages_dist[t_stage]

62 if not np.isclose(total, 1.0):

63 raise ValueError("Sum of T-stage distributions must be 1.")

65 return super().model_post_init(__context)

67 def cli_cmd(self) -> None:

68 """Run the ``generate`` command.

70 Here, the command constructs a model from the settings provided via the

71 arguments. It then generates a synthetic dataset using the

72 :py:meth:`~lymph.models.Unilateral.draw_patients` from the `lymph`_ package.

74 .. _lymph: https://lymph-model.readthedocs.io/

75 """

76 logger.debug(self.model_dump_json(indent=2))

78 model = construct_model(self.model, self.graph)

79 model = add_distributions(model, self.distributions)

80 model = add_modalities(model, self.modalities)

81 model.set_params(**self.params)

82 logger.info(f"Set parameters: {model.get_params(as_dict=True)}")

84 synth_data = model.draw_patients(

85 num=self.num_patients,

86 stage_dist=list(self.t_stages_dist.values()),

87 seed=self.seed,

88 )

89 logger.info(f"Generated synthetic data with shape {synth_data.shape}")

91 save_table_to_csv(file_path=self.output_file, table=synth_data)

94if __name__ == "__main__":

95 main = assemble_main(settings_cls=GenerateCLI, prog_name="data generate")

96 main()

+ + + diff --git a/htmlcov/z_9b7bcb970ba14d6a_join_py.html b/htmlcov/z_9b7bcb970ba14d6a_join_py.html new file mode 100644 index 0000000..39e9975 --- /dev/null +++ b/htmlcov/z_9b7bcb970ba14d6a_join_py.html @@ -0,0 +1,179 @@ + + + + + Coverage for src/lyscripts/data/join.py: 55% + + + + + +

+ Coverage for src / lyscripts / data / join.py: + 55% +

+ + + Show/hide keyboard shortcuts

+ +

Shortcuts on this page

+ r + m + x + toggle line displays +

+ j + k + next/prev highlighted chunk +

+ 0 (zero) top of page +

+ 1 (one) first highlighted chunk +

+ [ + ] + prev/next file +

+ u up to the index +

+ ? show/hide this help +

+ 22 statements + + + +

+ « prev + ^ index + » next + + coverage.py v7.13.5, + created at 2026-04-08 14:17 +0000 +

+ +

1"""Join multiple lymphatic progression datasets into a single dataset."""

3from pathlib import Path

5import pandas as pd

6from lydata.validator import cast_dtypes

7from pydantic import Field

9from lyscripts.cli import assemble_main

10from lyscripts.configs import BaseCLI, DataConfig

11from lyscripts.data.utils import save_table_to_csv

14class JoinCLI(BaseCLI):

15 """Join multiple lymphatic progression datasets into a single dataset."""

17 inputs: list[DataConfig] = Field(description="The datasets to join.")

18 output_file: Path = Field(description="The path to the output dataset.")

20 def cli_cmd(self) -> None:

21 r"""Start the ``join`` subcommand.

23 This will load all datasets specified in the ``inputs`` attribute and

24 concatenate them into a single dataset.

26 Unfortunately, the use of `pydantic`_ does make this particular command a

27 little bit more complicated (but also more powerful): If one simply wants to

28 concatenate multiple datasets on disk, the ``inputs`` should be provided like

29 this:

31 .. code-block:: bash

33 lyscripts data join \

34 --inputs '{"source": "file1.csv"}' \

35 --inputs '{"source": "file2.csv"}' \

36 --output-file "joined.csv"

38 But it also allows for concatenating datasets fetched directly from the

39 `lydata Github repo`_. Due to the rather complex command signature, we

40 recommend defining what to concatenate using a YAML file:

42 .. code-block:: yaml

44 inputs:

45 - data.year: 2021

46 data.institution: "usz"

47 data.subsite: "oropharynx"

48 - data.year: 2021

49 data.institution: "clb"

50 data.subsite: "oropharynx"

52 Then, the command will look like this:

54 .. code-block:: bash

56 lyscripts data join --configs datasets.ly.yaml --output-file joined.csv

58 .. _pydantic: https://docs.pydantic.dev/latest/

59 .. _lydata Github repo: https://github.com/lycosystem/lydata

60 """

61 joined = None

63 for data_config in self.inputs:

64 data = data_config.load()

65 # `cast_dtypes()` ensures that e.g. boolean values are not suddenly

66 # converted to strings when a dataset with missing values is concatenated.

67 data = cast_dtypes(data)

68 if joined is None:

69 joined = data

70 else:

71 joined = pd.concat(

72 [joined, data],

73 axis="index",

74 ignore_index=True,

75 )

77 save_table_to_csv(file_path=self.output_file, table=joined)

80if __name__ == "__main__":

81 main = assemble_main(settings_cls=JoinCLI, prog_name="join")

82 main()

+ + + diff --git a/htmlcov/z_9b7bcb970ba14d6a_lyproxify_py.html b/htmlcov/z_9b7bcb970ba14d6a_lyproxify_py.html new file mode 100644 index 0000000..50c278d --- /dev/null +++ b/htmlcov/z_9b7bcb970ba14d6a_lyproxify_py.html @@ -0,0 +1,438 @@ + + + + + Coverage for src/lyscripts/data/lyproxify.py: 46% + + + + + +

+ Coverage for src / lyscripts / data / lyproxify.py: + 46% +

+ + + Show/hide keyboard shortcuts

+ +

Shortcuts on this page

+ r + m + x + toggle line displays +

+ j + k + next/prev highlighted chunk +

+ 0 (zero) top of page +

+ 1 (one) first highlighted chunk +

+ [ + ] + prev/next file +

+ u up to the index +

+ ? show/hide this help +

+ 123 statements + + + +

+ « prev + ^ index + » next + + coverage.py v7.13.5, + created at 2026-04-08 14:17 +0000 +

+ +

1"""Consumes raw data and transforms it into a CSV that `LyProX`_ understands.

3To do so, it needs a dictionary that defines a mapping from raw columns to the LyProX

4style data format. See the documentation of the :py:func:`.transform_to_lyprox` function

5for more information.

7.. _LyProX: https://lyprox.org

8"""

10import importlib.util

11import warnings

12from pathlib import Path

13from typing import Annotated, Any

15import lydata # noqa: F401

16import pandas as pd

17from loguru import logger

18from lydata import C

19from pydantic import AfterValidator, Field, FilePath

21from lyscripts.cli import assemble_main

22from lyscripts.configs import BaseCLI

23from lyscripts.data.utils import save_table_to_csv

24from lyscripts.utils import delete_private_keys, flatten, load_patient_data

26warnings.simplefilter(action="ignore", category=FutureWarning)

29def ensure_python_file(file: Path) -> Path:

30 """Check if the file is a Python file."""

31 if file.suffix != ".py":

32 raise ValueError("Mapping file must be a Python file.")

34 return file

37def ensure_column_map(file: Path) -> Path:

38 """Ensure the Python file contains a ``COLUMN_MAP`` dictionary."""

39 spec = importlib.util.spec_from_file_location("map_module", file)

40 mapping = importlib.util.module_from_spec(spec)

41 spec.loader.exec_module(mapping)

43 if not hasattr(mapping, "COLUMN_MAP"):

44 raise ValueError("Mapping file must contain a `COLUMN_MAP` dictionary.")

46 return file

49class LyproxifyCLI(BaseCLI):

50 """Map any CSV file to the LyProX format with the help of a Python mapping dict."""

52 input_file: FilePath = Field(description="Location of raw CSV data.")

53 num_header_rows: int = Field(

54 default=1,

55 description="Number of rows comprising the header of the raw CSV file.",

56 )

57 mapping_file: Annotated[

58 FilePath,

59 AfterValidator(ensure_python_file),

60 AfterValidator(ensure_column_map),

61 ] = Field(

62 description=(

63 "Location of Python file containing a `COLUMN_MAP` dictionary. It may also "

64 "contain an `EXCLUDE` list of tuples `(column, check)` to exclude patients."

65 ),

66 )

67 drop_rows: list[int] = Field(

68 default=[],

69 description=(

70 "Delete rows of specified indices. Counting of rows start at 0 _after_ "

71 "the `header-rows`."

72 ),

73 )

74 drop_cols: list[int] = Field(

75 default=[],

76 description="Delete columns of specified indices.",

77 )

78 output_file: Path = Field(description="Location to store the lyproxified CSV file.")

80 def cli_cmd(self) -> None:

81 """Start the ``lyproxify`` subcommand.

83 After reading in the specified file, it will first ``drop_rows`` and

84 ``drop_cols``, as specified in the command line arguments. Then, it will

85 call :py:func:`.exclude_patients` which will further remove patients based

86 on the ``EXCLUDE`` object in the ``mapping_file``. Finally, it will call

87 :py:func:`.transform_to_lyprox` to transform the data into the LyProX format

88 given the ``COLUMN_MAP`` object in the ``mapping_file``.

89 """

90 logger.debug(self.model_dump_json(indent=2))

92 raw = load_patient_data(

93 file_path=self.input_file,

94 header=list(range(self.num_header_rows)),

95 )

96 raw = clean_header(

97 table=raw,

98 num_cols=raw.shape[1],

99 num_header_rows=self.num_header_rows,

100 )

101

102 cols_to_drop = raw.columns[self.drop_cols]

103 trimmed = raw.drop(cols_to_drop, axis="columns")

104 trimmed = trimmed.drop(index=self.drop_rows)

105 trimmed = trimmed.dropna(axis="index", how="all")

106 logger.info(f"Dropped rows {self.drop_rows} and columns {cols_to_drop}.")

107

108 spec = importlib.util.spec_from_file_location("map_module", self.mapping_file)

109 mapping = importlib.util.module_from_spec(spec)

110 spec.loader.exec_module(mapping)

111 logger.info(f"Imported mapping instructions from {self.mapping_file}")

112

113 reduced = exclude_patients(trimmed, mapping.EXCLUDE)

114 processed = transform_to_lyprox(reduced, mapping.COLUMN_MAP)

115

116 if "side" in processed.ly:

117 processed = leftright_to_ipsicontra(processed)

118

119 save_table_to_csv(file_path=self.output_file, table=processed)

120

121

122class ParsingError(Exception):

123 """Error while parsing the CSV file."""

124

125

126def clean_header(

127 table: pd.DataFrame,

128 num_cols: int,

129 num_header_rows: int,

130) -> pd.DataFrame:

131 """Rename the header cells in the ``table``."""

132 table = table.copy()

133

134 for col in range(num_cols):

135 for row in range(num_header_rows):

136 table.rename(

137 columns={f"Unnamed: {col}_level_{row}": f"{col}_lvl_{row}"},

138 inplace=True,

139 )

140

141 logger.debug("Cleaned headers of the raw data.")

142 return table

143

144

145def get_instruction_depth(nested_column_map: dict[tuple, dict[str, Any]]) -> int:

146 """Get the depth at which the column mapping instructions are nested.

147

148 Instructions are a dictionary that contains either a 'func' or 'default' key.

149

150 >>> nested_column_map = {"patient": {"age": {"func": int}}}

151 >>> get_instruction_depth(nested_column_map)

152 2

153 >>> flat_column_map = flatten(nested_column_map, max_depth=2)

154 >>> get_instruction_depth(flat_column_map)

155 1

156 >>> nested_column_map = {"patient": {"__doc__": "some patient info", "age": 61}}

157 >>> get_instruction_depth(nested_column_map)

158 Traceback (most recent call last):

159 ...

160 ValueError: Leaf of column map must be a dictionary with 'func' or 'default' key.

161 """

162 for _, value in nested_column_map.items():

163 if isinstance(value, dict):

164 if "func" in value or "default" in value:

165 return 1

166

167 return 1 + get_instruction_depth(value)

168

169 raise ValueError(

170 "Leaf of column map must be a dictionary with 'func' or 'default' key.",

171 )

172

173 raise ValueError("Empty column map.")

174

175

176def generate_markdown_docs(

177 nested_column_map: dict[tuple, dict[str, Any]],

178 depth: int = 0,

179 indent_len: int = 4,

180) -> str:

181 r"""Generate a markdown nested, ordered list as documentation for the column map.

182

183 A key in the doctionary is supposed to be documented, when its value is a dictionary

184 containing a ``"__doc__"`` key.

185

186 >>> nested_column_map = {

187 ... "patient": {

188 ... "__doc__": "some patient info",

189 ... "age": {

190 ... "__doc__": "age of the patient",

191 ... "func": int,

192 ... "columns": ["age"],

193 ... },

194 ... },

195 ... }

196 >>> generate_markdown_docs(nested_column_map)

197 '1. **`patient:`** some patient info\n 1. **`age:`** age of the patient\n'

198 """

199 md_docs = ""

200 indent = " " * indent_len * depth

201 i = 1

202 for key, value in nested_column_map.items():

203 if isinstance(value, dict):

204 if "__doc__" in value:

205 md_docs += f"{indent}{i}. **`{key}:`** {value['__doc__']}\n"

206 i += 1

207

208 md_docs += generate_markdown_docs(value, depth + 1, indent_len)

209

210 return md_docs

211

212

213def transform_to_lyprox(

214 raw: pd.DataFrame,

215 column_map: dict[tuple, dict[str, Any]],

216) -> pd.DataFrame:

217 """Transform ``raw`` data into table that can be uploaded directly to `LyProX`_.

218

219 To do so, it uses instructions in the `colum_map` dictionary, that needs to have

220 a particular structure:

221

222 For each column in the final 'lyproxified' `pd.DataFrame`, one entry must exist in

223 the `column_map` dictionary. E.g., for the column corresponding to a patient's age,

224 the dictionary should contain a key-value pair of this shape:

225

226 .. code-block:: python

227

228 column_map = {

229 ("patient", "core", "age"): {

230 "func": compute_age_from_raw,

231 "kwargs": {"randomize": False},

232 "columns": ["birthday", "date of diagnosis"]

233 },

234 }

235

236 In this example, the function ``compute_age_from_raw`` is called with the

237 values of the columns ``"birthday"`` and ``"date of diagnosis"`` as positional

238 arguments, and the keyword argument ``"randomize"`` is set to ``False``. The

239 function then returns the patient's age, which is subsequently stored in the column

240 ``("patient", "core", "age")``.

241

242 Note that the ``column_map`` dictionary must have either a ``"default"`` key or

243 ``"func"`` along with ``"columns"`` and ``"kwargs"``, depending on the function

244 definition. If the function does not take any arguments, ``"columns"`` can be

245 omitted. If it also does not take any keyword arguments, ``"kwargs"`` can be

246 omitted, too.

247

248 .. _LyProX: https://lyprox.org

249 """

250 column_map = delete_private_keys(column_map)

251

252 if (instruction_depth := get_instruction_depth(column_map)) > 1:

253 column_map = flatten(column_map, max_depth=instruction_depth)

254

255 multi_idx = pd.MultiIndex.from_tuples(column_map.keys())

256 processed = pd.DataFrame(columns=multi_idx)

257

258 for multi_idx_col, instruction in column_map.items():

259 if instruction != "":

260 if "default" in instruction:

261 processed[multi_idx_col] = [instruction["default"]] * len(raw)

262 elif "func" in instruction:

263 cols = instruction.get("columns", [])

264 kwargs = instruction.get("kwargs", {})

265 func = instruction["func"]

266

267 try:

268 processed[multi_idx_col] = [

269 func(*vals, **kwargs) for vals in raw[cols].values

270 ]

271 except Exception as exc:

272 raise ParsingError(

273 f"Exception encountered while parsing column {multi_idx_col}",

274 ) from exc

275 else:

276 raise ParsingError(

277 f"Column {multi_idx_col} has neither a `default` value nor `func` "

278 "describing how to fill this column.",

279 )

280

281 logger.info("Transformed raw data to LyProX format.")

282 return processed

283

284

285def leftright_to_ipsicontra(data: pd.DataFrame):

286 """Change absolute side reporting to tumor-relative.

287

288 Transform reporting of LNL involvement by absolute side (right & left) to a

289 reporting relative to the tumor (ipsi- & contralateral). The table ``data`` should

290 already be in the format LyProX requires, except for the side-reporting of LNL

291 involvement.

292 """

293 len_before = len(data)

294 left_data = data.ly.query(C("side") != "right")

295 right_data = data.ly.query(C("side") == "right")

296

297 left_data = left_data.rename(columns={"left": "ipsi"}, level=1)

298 left_data = left_data.rename(columns={"right": "contra"}, level=1)

299 right_data = right_data.rename(columns={"left": "contra"}, level=1)

300 right_data = right_data.rename(columns={"right": "ipsi"}, level=1)

301

302 data = pd.concat([left_data, right_data], ignore_index=True)

303 if len_before != len(data):

304 raise RuntimeError("Number of patients changed")

305

306 logger.info("Transformed side reporting to ipsi- and contralateral.")

307 return data

308

309

310def exclude_patients(raw: pd.DataFrame, exclude: list[tuple[str, Any]]):

311 """Exclude patients in the ``raw`` data based on a list of what to ``exclude``.

312

313 The ``exclude`` list contains tuples ``(column, check)``. The ``check`` function

314 will then exclude any patients from the cohort where ``check(raw[column])``

315 evaluates to ``True``.

316

317 >>> exclude = [("age", lambda s: s > 50)]

318 >>> table = pd.DataFrame({

319 ... "age": [43, 82, 18, 67],

320 ... "T-category": [ 3, 4, 2, 1],

321 ... })

322 >>> exclude_patients(table, exclude)

323 age T-category

324 0 43 3

325 2 18 2

326 """

327 num_before = len(raw)

328 filtered = raw.copy()

329

330 for column, check in exclude:

331 is_excluded = check(filtered[column])

332 filtered = filtered.loc[~is_excluded]

333

334 num_after = len(filtered)

335 logger.info(f"Excluded {num_before - num_after} patients.")

336 return filtered

337

338

339if __name__ == "__main__":

340 main = assemble_main(settings_cls=LyproxifyCLI, prog_name="lyproxify")

341 main()

+ + + diff --git a/htmlcov/z_9b7bcb970ba14d6a_split_py.html b/htmlcov/z_9b7bcb970ba14d6a_split_py.html new file mode 100644 index 0000000..583f3b8 --- /dev/null +++ b/htmlcov/z_9b7bcb970ba14d6a_split_py.html @@ -0,0 +1,170 @@ + + + + + Coverage for src/lyscripts/data/split.py: 52% + + + + + +

+ Coverage for src / lyscripts / data / split.py: + 52% +

+ + + Show/hide keyboard shortcuts

+ +

Shortcuts on this page

+ r + m + x + toggle line displays +

+ j + k + next/prev highlighted chunk +

+ 0 (zero) top of page +

+ 1 (one) first highlighted chunk +

+ [ + ] + prev/next file +

+ u up to the index +

+ ? show/hide this help +

+ 29 statements + + + +

+ « prev + ^ index + » next + + coverage.py v7.13.5, + created at 2026-04-08 14:17 +0000 +

+ +

1"""Split a dataset into cross-validation folds based on params.yaml file."""

3import warnings

4from pathlib import Path

6import numpy as np

7import pandas as pd

8from loguru import logger

9from pydantic import Field

11from lyscripts.cli import assemble_main

12from lyscripts.configs import BaseCLI, CrossValidationConfig, DataConfig

13from lyscripts.data.utils import save_table_to_csv

15warnings.simplefilter(action="ignore", category=FutureWarning)

18class SplitCLI(BaseCLI):

19 """Split a dataset into cross-validation folds."""

21 input: DataConfig

22 cross_validation: CrossValidationConfig = CrossValidationConfig()

23 output_dir: Path = Field(description="The folder to store the split CSV files in.")

25 def cli_cmd(self) -> None:

26 """Run the ``split`` subcommand.

28 This will load the dataset specified in the ``input`` argument and split it

29 into the number of folds specified in the ``cross_validation`` argument. The

30 resulting splits will be stored in the folder specified in the ``output_dir``

31 argument.

32 """

33 logger.debug(self.model_dump_json(indent=2))

35 self.output_dir.mkdir(parents=True, exist_ok=True)

36 logger.info(f"Ensure output directory {self.output_dir} exists")

38 data = self.input.load()

40 shuffled_data = data.sample(

41 frac=1.0,

42 replace=False,

43 random_state=self.cross_validation.seed,

44 ).reset_index(drop=True)

46 split_datas = np.array_split(

47 ary=shuffled_data,

48 indices_or_sections=self.cross_validation.folds,

49 )

50 for fold in range(self.cross_validation.folds):

51 _train_datas = [

52 split_datas[i] for i in range(self.cross_validation.folds) if i != fold

53 ]

54 train_data = pd.concat(

55 objs=_train_datas,

56 axis="index",

57 ignore_index=True,

58 )

59 eval_data = split_datas[fold]

61 save_table_to_csv(

62 file_path=self.output_dir / f"{fold}_train.csv",

63 table=train_data,

64 )

65 save_table_to_csv(

66 file_path=self.output_dir / f"{fold}_eval.csv",

67 table=eval_data,

68 )

71if __name__ == "__main__":

72 main = assemble_main(settings_cls=SplitCLI, prog_name="split")

73 main()

+ + + diff --git a/htmlcov/z_9b7bcb970ba14d6a_utils_py.html b/htmlcov/z_9b7bcb970ba14d6a_utils_py.html new file mode 100644 index 0000000..4a73044 --- /dev/null +++ b/htmlcov/z_9b7bcb970ba14d6a_utils_py.html @@ -0,0 +1,113 @@ + + + + + Coverage for src/lyscripts/data/utils.py: 100% + + + + + +

+ Coverage for src / lyscripts / data / utils.py: + 100% +

+ + + Show/hide keyboard shortcuts

+ +

Shortcuts on this page

+ r + m + x + toggle line displays +

+ j + k + next/prev highlighted chunk +

+ 0 (zero) top of page +

+ 1 (one) first highlighted chunk +

+ [ + ] + prev/next file +

+ u up to the index +

+ ? show/hide this help +

+ 9 statements + + + +

+ « prev + ^ index + » next + + coverage.py v7.13.5, + created at 2026-04-08 14:17 +0000 +

+ +

1"""Utilities related to the commands for data cleaning and processing."""

3from pathlib import Path

5import pandas as pd

6from loguru import logger

8from lyscripts.decorators import check_output_dir_exists

11@check_output_dir_exists

12def save_table_to_csv(file_path: Path, table: pd.DataFrame):

13 """Save a ``table`` to ``output_path``."""

14 shape = table.shape

15 logger.info(f"Saving table with {shape=} to {file_path.resolve()}")

16 table.to_csv(file_path, index=None)

+ + + diff --git a/htmlcov/z_f60392fe1c3f3e73___init___py.html b/htmlcov/z_f60392fe1c3f3e73___init___py.html new file mode 100644 index 0000000..e18c170 --- /dev/null +++ b/htmlcov/z_f60392fe1c3f3e73___init___py.html @@ -0,0 +1,247 @@ + + + + + Coverage for src/lyscripts/data/collect/__init__.py: 53% + + + + + +

+ Coverage for src / lyscripts / data / collect / init.py: + 53% +

+ + + Show/hide keyboard shortcuts

+ +

Shortcuts on this page

+ r + m + x + toggle line displays +

+ j + k + next/prev highlighted chunk +

+ 0 (zero) top of page +

+ 1 (one) first highlighted chunk +

+ [ + ] + prev/next file +

+ u up to the index +

+ ? show/hide this help +

+ 58 statements + + + +

+ « prev + ^ index + » next + + coverage.py v7.13.5, + created at 2026-04-08 14:17 +0000 +

+ +

1"""Submodule to collect data interactively using a simple web interface.

3With the simple command

5.. code-block:: bash

7 lyscripts data collect

9One can start a very basic web server that serves an interactive UI at

10``http://localhost:8000/``. There, one can enter patient, tumor, and lymphatic

11involvement data one by one. When completed, the "submit" button will parse, validate,

12and convert the data to serve a downloadable CSV file.

14The resulting CSV file is in the correct format to be used in `LyProX`_ and for

15inference using our `lymph-model`_ library.

17.. _LyProX: https://lyprox.org

18.. _lymph-model: https://lymph-model.readthedocs.io

19"""

21import io

22import logging

23from pathlib import Path

24from typing import Any

26import lydata

27import lydata.validator

28import pandas as pd

29from fastapi import FastAPI, HTTPException

30from fastapi.responses import StreamingResponse

31from loguru import logger

32from pydantic import Field, RootModel

33from starlette.responses import FileResponse, HTMLResponse

35from lyscripts.cli import InterceptHandler, _current_log_level

36from lyscripts.configs import BaseCLI

38app = FastAPI(

39 title="lyDATA Collector",

40 description=(

41 "A simple web interface to collect data for the lyDATA datasets. "

42 "This is a prototype and not intended for production use."

43 ),

44 version=lydata.__version__,

45)

47BASE_DIR = Path(__file__).parent

48modalities = lydata.schema.get_default_modalities()

49RecordModel = lydata.schema.create_full_record_model(modalities, model_name="Record")

50ROOT_MODEL = RootModel[list[RecordModel]]

53@app.get("/")

54def serve_index_html() -> HTMLResponse:

55 """Serve the ``index.html`` file at the URL's root."""

56 with open(BASE_DIR / "index.html") as file:

57 content = file.read()

58 return HTMLResponse(content=content)

61@app.get("/schema")

62def serve_schema() -> dict[str, Any]:

63 """Serve the JSON schema for the patient and tumor records."""

64 return ROOT_MODEL.model_json_schema()

67@app.get("/collector.js")

68def serve_collector_js() -> FileResponse:

69 """Serve the ``collector.js`` file under ``"http://{host}:{port}/collector.js"``.

71 This frontend JavaScript file loads the `JSON-Editor`_ library and initializes it

72 using the schema returned by the :py:func:`serve_schema` function.

74 .. _JSON-Editor: https://github.com/json-editor/json-editor/

75 """

76 return FileResponse(BASE_DIR / "collector.js")

79@app.post("/submit")

80async def process(data: RootModel) -> StreamingResponse:

81 """Process the submitted data to a DataFrame.

83 `FastAPI`_ will automatically parse the received JSON data into the list of

84 instances of he pydantic type defined by the

85 :py:func:`lydata.schema.create_full_record_model` function.

87 From this list, we create a pandas DataFrame and return it as a downloadable CSV

88 file.

90 .. _FastAPI: https://fastapi.tiangolo.com/

91 """

92 logger.info(f"Received data: {data.root}")

94 if len(data.root) == 0:

95 logger.warning("No records provided in the data.")

96 raise HTTPException(

97 status_code=400,

98 detail="No records provided in the data.",

99 )

100

101 flattened_records = []

102

103 for record in data.root:

104 flattened_record = lydata.validator.flatten(record)

105 logger.debug(f"Flattened record: {flattened_record}")

106 flattened_records.append(flattened_record)

107

108 df = pd.DataFrame(flattened_records)

109 df.columns = pd.MultiIndex.from_tuples(flattened_record.keys())

110 logger.info(df.patient.core.head())

111

112 buffer = io.StringIO()

113 df.to_csv(buffer, index=False)

114 buffer.seek(0)

115 logger.success("Data prepared for download")

116 return StreamingResponse(

117 buffer,

118 media_type="text/csv",

119 headers={"Content-Disposition": "attachment; filename=lydata_records.csv"},

120 )

121

122

123class CollectorCLI(BaseCLI):

124 """Serve a FastAPI web app for collecting involvement patterns as CSV files."""

125

126 hostname: str = Field(

127 default="localhost",

128 description="Hostname to run the FastAPI app on.",

129 )

130 port: int = Field(

131 default=8000,

132 description="Port to run the FastAPI app on.",

133 )

134

135 def cli_cmd(self) -> None:

136 """Run the FastAPI app."""

137 logger.debug(self.model_dump_json(indent=2))

138 import uvicorn

139

140 # Intercept standard logging and redirect it to Loguru

141 logging.basicConfig(handlers=[InterceptHandler()], level=0, force=True)

142 logger.enable("lydata")

143

144 uvicorn.run(

145 app,

146 host=self.hostname,

147 port=self.port,

148 log_level=_current_log_level.lower(),

149 log_config=None,

150 )

+ + + diff --git a/pyproject.toml b/pyproject.toml deleted file mode 100644 index 75401c0..0000000 --- a/pyproject.toml +++ /dev/null @@ -1,210 +0,0 @@ -[build-system] -requires = [ - "setuptools >= 61", - "setuptools_scm", - "wheel" -] -build-backend = "setuptools.build_meta" - -[project] -name = "lyscripts" -description = "Package to interact with lymphatic progression data and models." -authors = [ - {name = "Roman Ludwig", email = "gygqdstu3@mozmail.com"}, - {name = "Yoel Pérez Haas", email = "yoel.perezhaas@usz.ch"}, - {name = "Noemi Bührer", email = "noemi.buehrer@usz.ch"}, -] -readme = "README.md" -requires-python = ">=3.10" -keywords = ["scripts", "lymph", "inference"] -license = {text = "MIT"} -classifiers = [ - "Development Status :: 4 - Beta", - "Intended Audience :: Developers", - "Intended Audience :: Science/Research", - "License :: OSI Approved :: MIT License", - "Natural Language :: English", - "Programming Language :: Python", - "Programming Language :: Python :: 3", - "Programming Language :: Python :: 3.10", - "Programming Language :: Python :: 3.11", - "Programming Language :: Python :: 3.12", - "Programming Language :: Python :: 3.13", - "Topic :: Scientific/Engineering", - "Topic :: Software Development :: Libraries", -] -dependencies = [ - "numpy", - "pandas", - "emcee", - "h5py", - "tables", - "matplotlib", - "corner", - "scipy", - "xlrd", - "rich", - "rich-argparse", - "pyyaml", - "lymph-model >= 1.3.3", - "deprecated", - "joblib", - "pydantic", - "pydantic-settings >= 2.7.0, != 2.9.1, != 2.9.0", - "numpydantic", - "loguru", - "fastapi", - "uvicorn", - "lydata >= 0.4.0", -] -dynamic = ["version"] - -[project.urls] -source = "https://github.com/lycosystem/lyscripts" -documentation = "https://lyscripts.readthedocs.io" - -[project.optional-dependencies] -docs = [ - "sphinx", - "sphinx-book-theme", - "sphinx-autodoc-typehints", - "sphinxcontrib-programoutput", - "myst_parser", - "autodoc_pydantic", -] -tests = [ - "pytest", - "pytest-cov", - "pytest-mpl", -] -dev = [ - "pre-commit", - "git-cliff", -] - -[project.scripts] -lyscripts = "lyscripts:main" - -[tool.setuptools] -include-package-data = true - -[tool.setuptools.packages.find] -where = ["src"] - -[tool.setuptools.package-data] -"lyscripts" = [ - "src/lyscripts/data/collect/collector.js", - "src/lyscripts/data/collect/index.html", -] - -[tool.setuptools_scm] -write_to = "src/lyscripts/_version.py" -local_scheme = "no-local-version" - -[tool.pytest.ini_options] -testpaths = "." - -[tool.ruff.lint] -select = ["E", "F", "W", "B", "C", "R", "U", "D", "I", "S", "T", "A", "N", "COM", "FURB", "NPY", "UP"] -ignore = ["D409"] - -[tool.ruff.lint.per-file-ignores] -"__init__.py" = ["E402"] -"{tests,docs}/*" = [ - "D103", - "E402", - "S101", - "S603", - "S607", -] - -[tool.coverage.paths] -source = [ - "src/", - "**/site-packages/", -] - -[tool.coverage.run] -relative_files = true - -# git-cliff ~ default configuration file -# https://git-cliff.org/docs/configuration -# -# Lines starting with "#" are comments. -# Configuration options are organized into tables and keys. -# See documentation for more information on available options. - -[tool.git-cliff.changelog] -# changelog header -header = """ -# Changelog\n -All notable changes to this project will be documented in this file.\n -""" -# template for the changelog body -# https://tera.netlify.app/docs -body = """ -{% if version %}\ - ## [{{ version | trim_start_matches(pat="v") }}] - {{ timestamp | date(format="%Y-%m-%d") }} -{% else %}\ - ## [unreleased] -{% endif %}\ -{% for group, commits in commits | group_by(attribute="group") %} - ### {{ group | upper_first }} - {% for c in commits %} - - {% if c.breaking %}⚠ **BREAKING** {% endif -%} - {% if c.scope %}(**{{ c.scope }}**) {% endif -%} - {{ c.message | upper_first }}. - {%- if c.footers %}{% for f in c.footers %}{% if not f.breaking %} {{ f.token }} [{{ f.value }}].{% endif %}{% endfor %}{% endif %} - {%- if c.body %}\\ - {{ c.body | indent(prefix=" ", first=True) }} - {% endif -%} - {% endfor %} -{% endfor %}\n -""" -# remove the leading and trailing whitespace from the template -trim = true -# changelog footer -footer = """ - -""" - -[tool.git-cliff.git] -# parse the commits based on https://www.conventionalcommits.org -conventional_commits = true -# filter out the commits that are not conventional -filter_unconventional = true -# process each line of a commit as an individual commit -split_commits = false -# regex for preprocessing the commit messages -commit_preprocessors = [ - # { pattern = '$(\w+\s)?#([0-9]+)$', replace = "([#${2}](https://github.com/orhun/git-cliff/issues/${2}))"}, # replace issue numbers -] -# regex for parsing and grouping commits -commit_parsers = [ - { message = "^feat", group = "Features" }, - { message = "^fix", group = "Bug Fixes" }, - { message = "^docs", group = "Documentation" }, - { message = "^perf", group = "Performance" }, - { message = "^refactor", group = "Refactor" }, - { message = "^style", group = "Styling" }, - { message = "^test", group = "Testing" }, - { message = "^chore\$release\$: prepare for", skip = true }, - { message = "^chore", group = "Miscellaneous Tasks" }, - { body = ".*security", group = "Security" }, -] -# protect breaking changes from being skipped due to matching a skipping commit_parser -protect_breaking_commits = false -# filter out the commits that are not matched by commit parsers -filter_commits = false -# glob pattern for matching git tags -tag_pattern = "[0-9]*" -# regex for skipping tags -skip_tags = "v0.1.0-beta.1" -# regex for ignoring tags -ignore_tags = "" -# sort the tags topologically -topo_order = false -# limit the number of commits included in the changelog. -# limit_commits = 42 -# sort the commits inside sections by oldest/newest order -sort_commits = "oldest" diff --git a/schemas/ly.json b/schemas/ly.json deleted file mode 100644 index 561127c..0000000 --- a/schemas/ly.json +++ /dev/null @@ -1,786 +0,0 @@ -{ - "$defs": { - "CrossValidationConfig": { - "description": "Configs for splitting a dataset into cross-validation folds.", - "properties": { - "seed": { - "default": 42, - "description": "Seed for the random number generator.", - "title": "Seed", - "type": "integer" - }, - "folds": { - "default": 5, - "description": "Number of folds to split the dataset into.", - "title": "Folds", - "type": "integer" - } - }, - "title": "CrossValidationConfig", - "type": "object" - }, - "DataConfig": { - "description": "Where to load lymphatic progression data from and how to feed it into a model.", - "properties": { - "source": { - "anyOf": [ - { - "format": "file-path", - "type": "string" - }, - { - "$ref": "#/$defs/LyDataset" - } - ], - "description": "Either a path to a CSV file or a config that specifies how and where to fetch the data from.", - "title": "Source" - }, - "side": { - "anyOf": [ - { - "enum": [ - "ipsi", - "contra" - ], - "type": "string" - }, - { - "type": "null" - } - ], - "default": null, - "description": "Side of the neck to load data for. Only for Unilateral models.", - "title": "Side" - }, - "mapping": { - "additionalProperties": { - "anyOf": [ - { - "type": "integer" - }, - { - "type": "string" - } - ] - }, - "description": "Optional mapping of numeric T-stages to model T-stages.", - "title": "Mapping", - "type": "object" - } - }, - "required": [ - "source" - ], - "title": "DataConfig", - "type": "object" - }, - "DiagnosisConfig": { - "description": "Defines an ipsi- and contralateral diagnosis pattern.", - "properties": { - "ipsi": { - "additionalProperties": { - "additionalProperties": { - "anyOf": [ - { - "enum": [ - false, - 0, - "healthy", - true, - 1, - "involved", - "micro", - "macro", - "notmacro" - ] - }, - { - "type": "null" - } - ] - }, - "type": "object" - }, - "default": {}, - "description": "Observed diagnoses by different modalities on the ipsi neck.", - "examples": [ - { - "CT": { - "II": true, - "III": false - } - } - ], - "title": "Ipsi", - "type": "object" - }, - "contra": { - "additionalProperties": { - "additionalProperties": { - "anyOf": [ - { - "enum": [ - false, - 0, - "healthy", - true, - 1, - "involved", - "micro", - "macro", - "notmacro" - ] - }, - { - "type": "null" - } - ] - }, - "type": "object" - }, - "default": {}, - "description": "Observed diagnoses by different modalities on the contra neck.", - "title": "Contra", - "type": "object" - } - }, - "title": "DiagnosisConfig", - "type": "object" - }, - "DistributionConfig": { - "description": "Configuration defining a distribution over diagnose times.", - "properties": { - "kind": { - "default": "frozen", - "description": "Parametric distributions may be updated.", - "enum": [ - "frozen", - "parametric" - ], - "title": "Kind", - "type": "string" - }, - "func": { - "const": "binomial", - "default": "binomial", - "description": "Name of predefined function to use as distribution.", - "title": "Func", - "type": "string" - }, - "params": { - "additionalProperties": { - "anyOf": [ - { - "type": "integer" - }, - { - "type": "number" - } - ] - }, - "default": {}, - "description": "Parameters to pass to the predefined function.", - "title": "Params", - "type": "object" - } - }, - "title": "DistributionConfig", - "type": "object" - }, - "GraphConfig": { - "description": "Specifies how the tumor(s) and LNLs are connected in a DAG.", - "properties": { - "tumor": { - "additionalProperties": { - "items": { - "type": "string" - }, - "type": "array" - }, - "description": "Define the name of the tumor(s) and which LNLs it/they drain to.", - "title": "Tumor", - "type": "object" - }, - "lnl": { - "additionalProperties": { - "items": { - "type": "string" - }, - "type": "array" - }, - "description": "Define the name of the LNL(s) and which LNLs it/they drain to.", - "title": "Lnl", - "type": "object" - } - }, - "required": [ - "tumor", - "lnl" - ], - "title": "GraphConfig", - "type": "object" - }, - "InvolvementConfig": { - "description": "Config that defines an ipsi- and contralateral involvement pattern.", - "properties": { - "ipsi": { - "additionalProperties": { - "anyOf": [ - { - "enum": [ - false, - 0, - "healthy", - true, - 1, - "involved", - "micro", - "macro", - "notmacro" - ] - }, - { - "type": "null" - } - ] - }, - "default": {}, - "description": "Involvement pattern for the ipsilateral side of the neck.", - "examples": [ - { - "II": true, - "III": false - } - ], - "title": "Ipsi", - "type": "object" - }, - "contra": { - "additionalProperties": { - "anyOf": [ - { - "enum": [ - false, - 0, - "healthy", - true, - 1, - "involved", - "micro", - "macro", - "notmacro" - ] - }, - { - "type": "null" - } - ] - }, - "default": {}, - "description": "Involvement pattern for the contralateral side of the neck.", - "title": "Contra", - "type": "object" - } - }, - "title": "InvolvementConfig", - "type": "object" - }, - "LyDataset": { - "description": "Specification of a dataset.", - "properties": { - "year": { - "description": "Release year of dataset.", - "exclusiveMinimum": 0, - "maximum": 2025, - "title": "Year", - "type": "integer" - }, - "institution": { - "description": "Institution's short code. E.g., University Hospital Zurich: `usz`.", - "minLength": 1, - "title": "Institution", - "type": "string" - }, - "subsite": { - "description": "Tumor subsite(s) patients in this dataset were diagnosed with.", - "minLength": 1, - "title": "Subsite", - "type": "string" - }, - "repo_name": { - "anyOf": [ - { - "minLength": 1, - "type": "string" - }, - { - "type": "null" - } - ], - "default": "lycosystem/lydata", - "description": "GitHub `repository/owner`.", - "title": "Repo Name" - }, - "ref": { - "anyOf": [ - { - "minLength": 1, - "type": "string" - }, - { - "type": "null" - } - ], - "default": "main", - "description": "Branch/tag/commit of the repo.", - "title": "Ref" - }, - "local_dataset_dir": { - "anyOf": [ - { - "format": "directory-path", - "type": "string" - }, - { - "type": "null" - } - ], - "default": null, - "description": "Path to directory containing all the dataset subdirectories. So, e.g. if `path_on_disk` is `~/datasets` and the dataset is `2023-clb-multisite`, then the CSV file is expected to be at `~/datasets/2023-clb-multisite/data.csv`.", - "title": "Local Dataset Dir" - } - }, - "required": [ - "year", - "institution", - "subsite" - ], - "title": "LyDataset", - "type": "object" - }, - "ModalityConfig": { - "description": "Define a diagnostic or pathological modality.", - "properties": { - "spec": { - "description": "Specificity of the modality.", - "maximum": 1.0, - "minimum": 0.5, - "title": "Spec", - "type": "number" - }, - "sens": { - "description": "Sensitivity of the modality.", - "maximum": 1.0, - "minimum": 0.5, - "title": "Sens", - "type": "number" - }, - "kind": { - "default": "clinical", - "description": "Clinical modalities cannot detect microscopic disease.", - "enum": [ - "clinical", - "pathological" - ], - "title": "Kind", - "type": "string" - } - }, - "required": [ - "spec", - "sens" - ], - "title": "ModalityConfig", - "type": "object" - }, - "ModelConfig": { - "description": "Define which of the ``lymph`` models to use and how to set them up.", - "properties": { - "external_file": { - "anyOf": [ - { - "format": "file-path", - "type": "string" - }, - { - "type": "null" - } - ], - "default": null, - "description": "Path to a Python file that defines a model.", - "title": "External File" - }, - "class_name": { - "default": "Unilateral", - "description": "Name of the model class to use.", - "enum": [ - "Unilateral", - "Bilateral", - "Midline" - ], - "title": "Class Name", - "type": "string" - }, - "constructor": { - "default": "binary", - "description": "Trinary models differentiate btw. micro- and macroscopic disease.", - "enum": [ - "binary", - "trinary" - ], - "title": "Constructor", - "type": "string" - }, - "max_time": { - "default": 10, - "description": "Max. number of time-steps to evolve the model over.", - "title": "Max Time", - "type": "integer" - }, - "named_params": { - "default": null, - "description": "Subset of valid model parameters a sampler may provide in the form of a dictionary to the model instead of as an array. Or, after sampling, with this list, one may safely recover which parameter corresponds to which index in the sample.", - "items": { - "type": "string" - }, - "title": "Named Params", - "type": "array" - }, - "kwargs": { - "default": {}, - "description": "Additional keyword arguments to pass to the model constructor.", - "title": "Kwargs", - "type": "object" - } - }, - "title": "ModelConfig", - "type": "object" - }, - "SamplingConfig": { - "description": "Settings to configure the MCMC sampling.", - "properties": { - "storage_file": { - "description": "Path to HDF5 file store results or load last state.", - "format": "path", - "title": "Storage File", - "type": "string" - }, - "history_file": { - "anyOf": [ - { - "format": "path", - "type": "string" - }, - { - "type": "null" - } - ], - "default": null, - "description": "Path to store the burn-in metrics (as CSV file).", - "title": "History File" - }, - "dataset": { - "default": "mcmc", - "description": "Name of the dataset in the HDF5 file.", - "title": "Dataset", - "type": "string" - }, - "cores": { - "anyOf": [ - { - "exclusiveMinimum": 0, - "type": "integer" - }, - { - "type": "null" - } - ], - "default": 16, - "description": "Number of cores to use for parallel sampling. If `None`, no parallel processing is used.", - "title": "Cores" - }, - "seed": { - "default": 42, - "description": "Seed for the random number generator.", - "title": "Seed", - "type": "integer" - }, - "walkers_per_dim": { - "default": 20, - "description": "Number of walkers per parameter space dimension.", - "title": "Walkers Per Dim", - "type": "integer" - }, - "check_interval": { - "default": 50, - "description": "Check for convergence each time after this many steps.", - "title": "Check Interval", - "type": "integer" - }, - "trust_factor": { - "default": 50.0, - "description": "Trust the autocorrelation time only when it's smaller than this factor times the length of the chain.", - "title": "Trust Factor", - "type": "number" - }, - "relative_thresh": { - "default": 0.05, - "description": "Relative threshold for convergence.", - "title": "Relative Thresh", - "type": "number" - }, - "burnin_steps": { - "anyOf": [ - { - "type": "integer" - }, - { - "type": "null" - } - ], - "default": null, - "description": "Number of burn-in steps to take. If None, burn-in runs until convergence.", - "title": "Burnin Steps" - }, - "num_steps": { - "anyOf": [ - { - "type": "integer" - }, - { - "type": "null" - } - ], - "default": 100, - "description": "Number of steps to take in the MCMC sampling.", - "title": "Num Steps" - }, - "thin_by": { - "default": 10, - "description": "How many samples to draw before for saving one.", - "title": "Thin By", - "type": "integer" - }, - "inverse_temp": { - "default": 1.0, - "description": "Inverse temperature for thermodynamic integration. Note that this is not yet fully implemented.", - "title": "Inverse Temp", - "type": "number" - } - }, - "required": [ - "storage_file" - ], - "title": "SamplingConfig", - "type": "object" - }, - "ScenarioConfig": { - "description": "Define a scenario for which e.g. prevalences and risks may be computed.", - "properties": { - "t_stages": { - "description": "List of T-stages to marginalize over in the scenario.", - "examples": [ - [ - "early" - ], - [ - 3, - 4 - ] - ], - "items": { - "anyOf": [ - { - "type": "integer" - }, - { - "type": "string" - } - ] - }, - "title": "T Stages", - "type": "array" - }, - "t_stages_dist": { - "default": [ - 1.0 - ], - "description": "Distribution over T-stages to use for marginalization.", - "examples": [ - [ - 1.0 - ], - [ - 0.6, - 0.4 - ] - ], - "items": { - "type": "number" - }, - "title": "T Stages Dist", - "type": "array" - }, - "midext": { - "anyOf": [ - { - "type": "boolean" - }, - { - "type": "null" - } - ], - "default": null, - "description": "Whether the patient's tumor extends over the midline.", - "title": "Midext" - }, - "mode": { - "default": "HMM", - "description": "Which underlying model architecture to use.", - "enum": [ - "HMM", - "BN" - ], - "title": "Mode", - "type": "string" - }, - "involvement": { - "$ref": "#/$defs/InvolvementConfig", - "default": { - "ipsi": {}, - "contra": {} - } - }, - "diagnosis": { - "$ref": "#/$defs/DiagnosisConfig", - "default": { - "ipsi": {}, - "contra": {} - } - } - }, - "required": [ - "t_stages" - ], - "title": "ScenarioConfig", - "type": "object" - }, - "ScheduleConfig": { - "description": "Configuration for generating a schedule of inverse temperatures.", - "properties": { - "method": { - "default": "power", - "description": "Method to generate the inverse temperature schedule.", - "enum": [ - "geometric", - "linear", - "power" - ], - "title": "Method", - "type": "string" - }, - "num": { - "default": 32, - "description": "Number of inverse temperatures in the schedule.", - "title": "Num", - "type": "integer" - }, - "power": { - "default": 4.0, - "description": "If a power schedule is chosen, use this as power.", - "title": "Power", - "type": "number" - }, - "values": { - "anyOf": [ - { - "items": { - "type": "number" - }, - "type": "array" - }, - { - "type": "null" - } - ], - "default": null, - "description": "List of inverse temperatures to use instead of generating a schedule. If a list is provided, the other parameters are ignored.", - "title": "Values" - } - }, - "title": "ScheduleConfig", - "type": "object" - } - }, - "description": "Settings for generating a JSON schema for lyscripts configuration files.", - "properties": { - "version": { - "description": "For future compatibility reasons, every config file must have a `version: 1` field at the top level.", - "maximum": 1, - "minimum": 1, - "title": "Version", - "type": "integer" - }, - "cross_validation": { - "$ref": "#/$defs/CrossValidationConfig", - "default": null - }, - "data": { - "$ref": "#/$defs/DataConfig", - "default": null - }, - "diagnosis": { - "$ref": "#/$defs/DiagnosisConfig", - "default": null - }, - "distributions": { - "additionalProperties": { - "$ref": "#/$defs/DistributionConfig" - }, - "default": {}, - "title": "Distributions", - "type": "object" - }, - "graph": { - "$ref": "#/$defs/GraphConfig", - "default": null - }, - "involvement": { - "$ref": "#/$defs/InvolvementConfig", - "default": null - }, - "modalities": { - "additionalProperties": { - "$ref": "#/$defs/ModalityConfig" - }, - "default": {}, - "title": "Modalities", - "type": "object" - }, - "model": { - "$ref": "#/$defs/ModelConfig", - "default": null - }, - "sampling": { - "$ref": "#/$defs/SamplingConfig", - "default": null - }, - "scenarios": { - "default": [], - "items": { - "$ref": "#/$defs/ScenarioConfig" - }, - "title": "Scenarios", - "type": "array" - }, - "schedule": { - "$ref": "#/$defs/ScheduleConfig", - "default": null - } - }, - "required": [ - "version" - ], - "title": "SchemaSettings", - "type": "object" -} diff --git a/src/lyscripts/__init__.py b/src/lyscripts/__init__.py deleted file mode 100644 index 32808f2..0000000 --- a/src/lyscripts/__init__.py +++ /dev/null @@ -1,76 +0,0 @@ -"""Initial entry point for the lyscripts package and CLIs. - -This top-level module configures and provides the top-level CLI through which all -subcommands can be accessed. -""" - -import sys -from typing import Literal - -import pandas as pd -from loguru import logger -from pydantic import Field -from pydantic_settings import ( - BaseSettings, - CliApp, - CliImplicitFlag, - CliSubCommand, -) - -from lyscripts import compute, data, integrate, sample, schedule # noqa: F401 -from lyscripts._version import version -from lyscripts.cli import assemble_main, configure_logging -from lyscripts.utils import console - -__version__ = version -__description__ = "Package to interact with lymphatic progression data and models." -__author__ = "Roman Ludwig" -__email__ = "gygqdstu3@mozmail.com" -__uri__ = "https://github.com/lycosystem/lyscripts" - -# activate copy on write in pandas. -# See https://pandas.pydata.org/docs/user_guide/copy_on_write.html -pd.options.mode.copy_on_write = True - -logger.disable("lyscripts") - - -class LyscriptsCLI(BaseSettings): - """A CLI to interact with lymphatic progression data and models.""" - - version: CliImplicitFlag[bool] = Field( - default=False, - description="Display the version of lyscripts and exit.", - ) - log_level: Literal["DEBUG", "INFO", "WARNING", "ERROR", "CRITICAL"] = Field( - default="INFO", - description="Set the log level of the lyscripts CLI.", - ) - - data: CliSubCommand[data.DataCLI] - sample: CliSubCommand[sample.SampleCLI] - compute: CliSubCommand[compute.ComputeCLI] - schedule: CliSubCommand[schedule.ScheduleCLI] - integrate: CliSubCommand[integrate.IntegrateCLI] - - def __init__(self, **kwargs): - """Add logging configuration to the lyscripts CLI.""" - configure_logging(argv=sys.argv, console=console) - super().__init__(**kwargs) - - def cli_cmd(self) -> None: - """Start the main lyscripts CLI. - - If the ``version`` flag is set, the version of lyscripts is displayed and the - program exits. Otherwise, the lyscripts CLI runs one of the subcommands. - """ - logger.debug("Starting lyscripts CLI.") - - if self.version: - logger.info(f"lyscripts {__version__}") - return - - CliApp.run_subcommand(self) - - -main = assemble_main(settings_cls=LyscriptsCLI, prog_name="lyscripts") diff --git a/src/lyscripts/__main__.py b/src/lyscripts/__main__.py deleted file mode 100644 index 8d176b1..0000000 --- a/src/lyscripts/__main__.py +++ /dev/null @@ -1,6 +0,0 @@ -"""Utility for common tasks w.r.t. inference & prediction using `lymph` package.""" - -from lyscripts import main - -if __name__ == "__main__": - main() diff --git a/src/lyscripts/cli.py b/src/lyscripts/cli.py deleted file mode 100644 index b6448d0..0000000 --- a/src/lyscripts/cli.py +++ /dev/null @@ -1,123 +0,0 @@ -"""Utilities for configuring and running CLIs app. - -In this module, we define and configure a :py:class:`RichDefaultHelpFormatter` that -nicely displays the CLI's ``--help`` text. We also provide a function to -:py:func:`assemble a main function ` for the different CLI apps to save -some boilerplate code. Lastly, we have two functions related to the `loguru`_ setup. - -.. _loguru: https://loguru.readthedocs.io/en/stable -""" - -import inspect -import logging -from collections.abc import Callable -from typing import Literal - -from loguru import logger -from pydantic_settings import BaseSettings, CliApp, CliSettingsSource -from rich.console import Console -from rich.logging import RichHandler -from rich_argparse import ArgumentDefaultsRichHelpFormatter - -_current_log_level: Literal["DEBUG", "INFO", "WARNING", "ERROR", "CRITICAL"] = "INFO" - - -def assemble_main( - settings_cls: type[BaseSettings], - prog_name: str, -) -> Callable[[], None]: - """Assemble a ``main()`` function for a CLI app. - - It creates a :py:class:`~pydantic_settings.CliSettingsSource` object with the - provided ``settings_cls`` and ``prog_name``. Then, it fills in some default - settings for the CLI configuration and runs the CLI app. - - Assembling a ``main()`` function for all subcommands like this saves some - boilerplate code. - """ - - def main() -> None: - """Start the main CLI app.""" - cli_settings_source = CliSettingsSource( - settings_cls=settings_cls, - cli_prog_name=prog_name, - cli_kebab_case=True, - cli_use_class_docs_for_groups=True, - formatter_class=ArgumentDefaultsRichHelpFormatter, - ) - CliApp.run(settings_cls, cli_settings_source=cli_settings_source) - - return main - - -def somewhat_safely_get_loglevel( - argv: list[str], -) -> Literal["DEBUG", "INFO", "WARNING", "ERROR", "CRITICAL"]: - """Set the log level of the lyscripts CLI. - - This is a bit of a hack, since the :py:class:`~lyscripts.LyscriptsCLI` class is not - yet initialized when we need to set the log level. In case the provided log-level is - not valid, :py:class:`~lyscripts.LyscriptsCLI` will raise an exception at a later - point. - - Return ``"INFO"`` by default. - """ - args_str = " ".join(argv) - if "--log-level" in args_str: - for log_level in ["DEBUG", "INFO", "WARNING", "ERROR", "CRITICAL"]: - if log_level in args_str: - return log_level - - return "INFO" - - -def configure_logging( - argv: list[str], - console: Console, -) -> None: - """Configure the `loguru`_ logging system of the lyscripts CLI. - - This function sets the log level and format of the lyscripts CLI. Notably, for - a log-level of `DEBUG` the output will contain more information. - - .. _loguru: https://loguru.readthedocs.io/en/stable - """ - logger.enable("lyscripts") - logger.enable("lydata") - global _current_log_level - _current_log_level = somewhat_safely_get_loglevel(argv=argv) - logger.remove() - handler = RichHandler(console=console) - logger.add( - sink=handler, - level=_current_log_level, - format="{message}", - ) - - -class InterceptHandler(logging.Handler): - """Intercept logging messages and redirect them to Loguru.""" - - def emit(self, record: logging.LogRecord) -> None: - """Intercept the log record and redirect it to Loguru.""" - # Get corresponding Loguru level if it exists. - try: - level: str | int = logger.level(record.levelname).name - except ValueError: - level = record.levelno - - # Find caller from where originated the logged message. - frame, depth = inspect.currentframe(), 0 - while frame: - filename = frame.f_code.co_filename - is_logging = filename == logging.__file__ - is_frozen = "importlib" in filename and "_bootstrap" in filename - if depth > 0 and not (is_logging or is_frozen): - break - frame = frame.f_back - depth += 1 - - logger.opt(depth=depth, exception=record.exc_info).log( - level, - record.getMessage(), - ) diff --git a/src/lyscripts/compute/__init__.py b/src/lyscripts/compute/__init__.py deleted file mode 100644 index c2bbdff..0000000 --- a/src/lyscripts/compute/__init__.py +++ /dev/null @@ -1,22 +0,0 @@ -"""Commands to compute prior and posterior state distributions from model samples. - -This can in turn speed up the computation of risks and prevalences. -""" - -from pydantic_settings import BaseSettings, CliApp, CliSubCommand - -from lyscripts.compute import posteriors, prevalences, priors, risks, evidence - - -class ComputeCLI(BaseSettings): - """Compute priors, posteriors, risks, prevalences and model evidence from model samples.""" - - priors: CliSubCommand[priors.PriorsCLI] - posteriors: CliSubCommand[posteriors.PosteriorsCLI] - risks: CliSubCommand[risks.RisksCLI] - prevalences: CliSubCommand[prevalences.PrevalencesCLI] - evidence: CliSubCommand[evidence.EvidenceCLI] - - def cli_cmd(self) -> None: - """Start the ``compute`` subcommand.""" - CliApp.run_subcommand(self) diff --git a/src/lyscripts/compute/__main__.py b/src/lyscripts/compute/__main__.py deleted file mode 100644 index f6df909..0000000 --- a/src/lyscripts/compute/__main__.py +++ /dev/null @@ -1,8 +0,0 @@ -"""Run the compute module as a script.""" - -from lyscripts.cli import assemble_main -from lyscripts.compute import ComputeCLI - -if __name__ == "__main__": - main = assemble_main(settings_cls=ComputeCLI, prog_name="compute") - main() diff --git a/src/lyscripts/compute/evidence.py b/src/lyscripts/compute/evidence.py deleted file mode 100644 index ffce481..0000000 --- a/src/lyscripts/compute/evidence.py +++ /dev/null @@ -1,197 +0,0 @@ -"""Compute the model evidence from MCMC samples. - -Given the samples drawn during thermodynamic integration and their respective log -likelihoods, compute the model log evidence and the Bayesian Information Criterion. -""" - -from __future__ import annotations - -import json -from pathlib import Path - -import emcee -import h5py -import numpy as np -import pandas as pd -from loguru import logger -from pydantic import Field -from scipy.integrate import trapezoid - -from lyscripts.cli import assemble_main -from lyscripts.configs import ( - BaseCLI, - DataConfig, - SamplingConfig, - ScheduleConfig, -) - -RNG = np.random.default_rng() - - -def comp_bic(log_probs: np.ndarray, num_params: int, num_data: int) -> float: - r"""Compute the negative one half of the Bayesian Information Criterion (BIC). - - The BIC is defined as [^1] - $$ BIC = k \\ln{n} - 2 \\ln{\\hat{L}} $$ - where $k$ is the number of parameters ``num_params``, $n$ the number of datapoints - ``num_data`` and $\\hat{L}$ the maximum likelihood estimate of the ``log_prob``. - It is constructed such that the following is an - approximation of the model evidence: - $$ p(D \\mid m) \\approx \\exp{\\left( - BIC / 2 \\right)} $$ - which is why this function returns the negative one half of it. - - [^1]: https://en.wikipedia.org/wiki/Bayesian_information_criterion - """ - return np.max(log_probs) - num_params * np.log(num_data) / 2.0 - - -def compute_evidence( - temp_schedule: np.ndarray, - log_probs: np.ndarray, -) -> float: - """Compute the evidence. - - Given a ``temp_schedule`` of inverse temperatures and corresponding sets of - ``log_probs``, we calculate the mean ``log_prob`` over all samples to approximate - the expectation value under the corresponding power posterior for each step in the - ``temp_schedule``. The evidence is evaluated using trapezoidal integration of the - expectation values over the ``temp_schedule``. - """ - a_mc = np.mean(log_probs, axis=1) - return trapezoid(y=a_mc, x=temp_schedule) - - -def compute_ti_results( - settings: EvidenceCLI, - temp_schedule: np.ndarray, - metrics: dict, - ndim: int, - h5_file: Path, -) -> tuple[np.ndarray, np.ndarray]: - """Compute the results in case of a thermodynamic integration run.""" - num_temps = len(temp_schedule) - - if num_temps != len(h5_file["ti"]): - raise RuntimeError( - f"Parameters suggest temp schedule of length {num_temps}, " - f"but stored are {len(h5_file['ti'])}", - ) - - nwalker = ndim * settings.sampling.walkers_per_dim - nsteps = settings.sampling.num_steps - ti_log_probs = np.zeros(shape=(num_temps, nsteps * nwalker)) - - for i, run in enumerate(h5_file["ti"]): - reader = emcee.backends.HDFBackend( - settings.sampling.storage_file, - name=f"ti/{run}", - read_only=True, - ) - ti_log_probs[i] = reader.get_blobs(flat=True)["log_prob"] - - evidence = compute_evidence(temp_schedule, ti_log_probs) - metrics["evidence"] = evidence - - return temp_schedule, ti_log_probs - - -class EvidenceCLI(BaseCLI): - """Compute model evidence from thermodynamic integration samples.""" - - data: DataConfig - sampling: SamplingConfig - schedule: ScheduleConfig = Field( - description="Configuration for generating inverse temperature schedule.", - ) - plots: Path = Field( - default="./plots", - description="Directory for storing plots.", - ) - metrics: Path = Field( - default="./metrics.json", - description="Path to metrics file.", - ) - - def cli_cmd(self) -> None: - """Start the ``evidence`` subcommand. - - Given the MCMC samples from thermodynamic integration provided by the - ``sampling`` argument and the corresponding inverse temperature schedule, - specified in the ``schedule`` argument, the model evidence is computed using - the functions :py:func:`compute_ti_results` and :py:func`compute_evidence`. - Further the BIC is evaluated. - """ - data = self.data.load() - - metrics = {} - - temp_schedule = self.schedule.get_schedule() - - with h5py.File(self.sampling.storage_file, mode="r") as h5_file: - # Get ndim from the HDF5 backend - backend = emcee.backends.HDFBackend( - self.sampling.storage_file, - read_only=True, - name=self.sampling.dataset, - ) - ndim = backend.shape[1] - logger.info(f"Inferred {ndim} parameters from stored samples") - - # if TI has been performed, compute the evidence - if "ti" in h5_file: - temp_schedule, ti_log_probs = compute_ti_results( - settings=self, - temp_schedule=temp_schedule, - metrics=metrics, - ndim=ndim, - h5_file=h5_file, - ) - - logger.info( - "Computed results of thermodynamic integration with " - f"{len(temp_schedule)} steps", - ) - - # store inverse temperatures and log-probs in CSV file - self.plots.parent.mkdir(parents=True, exist_ok=True) - - beta_vs_accuracy = pd.DataFrame( - np.array( - [ - temp_schedule, - np.mean(ti_log_probs, axis=1), - np.std(ti_log_probs, axis=1), - ], - ).T, - columns=["β", "accuracy", "std"], - ) - beta_vs_accuracy.to_csv(self.plots, index=False) - logger.info(f"Plotted β vs accuracy at {self.plots}") - - # use blobs, because also for TI, this is the unscaled log-prob - final_log_probs = backend.get_blobs()["log_prob"] - logger.info( - f"Opened samples from emcee backend from {self.sampling.storage_file}", - ) - - # store metrics in JSON file - self.metrics.parent.mkdir(parents=True, exist_ok=True) - self.metrics.touch(exist_ok=True) - - metrics["BIC"] = comp_bic( - log_probs=final_log_probs, - num_params=ndim, - num_data=len(data), - ) - metrics["max_llh"] = np.max(final_log_probs) - metrics["mean_llh"] = np.mean(final_log_probs) - - with open(self.metrics, mode="w", encoding="utf-8") as metrics_file: - json.dump(metrics, metrics_file) - - logger.info(f"Wrote out metrics to {self.metrics}") - - -if __name__ == "__main__": - main = assemble_main(settings_cls=EvidenceCLI, prog_name="compute evidence") - main() diff --git a/src/lyscripts/compute/posteriors.py b/src/lyscripts/compute/posteriors.py deleted file mode 100644 index c017ccd..0000000 --- a/src/lyscripts/compute/posteriors.py +++ /dev/null @@ -1,142 +0,0 @@ -"""Compute posterior state distributions. - -The posteriors are computed from drawn samples for a list of defined scenarios. If -priors have already been computed from the samples and the ``--cache_dir`` argument -is the same as during that computation, the priors will automatically be loaded from -the cache. -""" - -from typing import Literal - -import numpy as np -from loguru import logger -from lymph import models -from pydantic import Field -from rich import progress - -from lyscripts.cli import assemble_main -from lyscripts.compute.priors import compute_priors -from lyscripts.compute.utils import BaseComputeCLI, HDF5FileStorage, get_cached -from lyscripts.configs import ( - DistributionConfig, - GraphConfig, - ModalityConfig, - ModelConfig, - add_distributions, - add_modalities, - construct_model, -) -from lyscripts.utils import console - - -def compute_posteriors( - model_config: ModelConfig, - graph_config: GraphConfig, - dist_configs: dict[str, DistributionConfig], - modality_configs: dict[str, ModalityConfig], - priors: np.ndarray, - diagnosis: dict[Literal["ipsi", "contra"], dict], - midext: bool | None = None, - mode: Literal["HMM", "BN"] = "HMM", - progress_desc: str = "Computing posteriors from priors", -) -> np.ndarray: - """Compute posterior state distributions from ``priors``. - - This calls the ``model`` method :py:meth:`~lymph.types.Model.posterior_state_dist` - for each of the pre-computed ``priors``, given the specified ``diagnosis`` pattern. - - For the :py:class:`~lymph.models.Midline` model, the ``midext`` argument can be - used to specify whether the midline extension is present or not. - """ - model = construct_model(model_config, graph_config) - model = add_distributions(model, dist_configs) - model = add_modalities(model, modality_configs) - posteriors = [] - kwargs = {"midext": midext} if isinstance(model, models.Midline) else {} - - if isinstance(model, models.Unilateral | models.HPVUnilateral): - diagnosis = diagnosis.get("ipsi") - - for prior in progress.track( - sequence=priors, - description=progress_desc, - total=len(priors), - console=console, - ): - posteriors.append( - model.posterior_state_dist( - given_state_dist=prior, - given_diagnosis=diagnosis, - mode=mode, - **kwargs, - ), - ) - - return np.stack(posteriors) - - -class PosteriorsCLI(BaseComputeCLI): - """Compute posterior state distributions for different diagnosis scenarios.""" - - modalities: dict[str, ModalityConfig] = Field( - default={}, - description=( - "Maps names of diagnostic modalities to their specificity/sensitivity." - ), - ) - posteriors: HDF5FileStorage = Field( - description="Storage for the computed posteriors.", - ) - - def cli_cmd(self) -> None: - """Start the ``posteriors`` subcommand. - - This will compute the posterior state distributions, given a personalized - diagnosis pattern, for each of the scenarios provided to the command. - """ - logger.debug(self.model_dump_json(indent=2)) - - global_attrs = self.model_dump( - include={"model", "graph", "distributions", "modalities"}, - ) - self.posteriors.set_attrs(attrs=global_attrs, dataset="/") - - samples = self.sampling.load() - cached_compute_priors = get_cached(compute_priors, self.cache_dir) - cached_compute_posteriors = get_cached(compute_posteriors, self.cache_dir) - num_scens = len(self.scenarios) - - for i, scenario in enumerate(self.scenarios): - _fields = {"t_stages", "t_stages_dist", "mode"} - prior_kwargs = scenario.model_dump(include=_fields) - - _priors = cached_compute_priors( - model_config=self.model, - graph_config=self.graph, - dist_configs=self.distributions, - samples=samples, - progress_desc=f"Computing priors for scenario {i + 1}/{num_scens}", - **prior_kwargs, - ) - - _fields = {"diagnosis", "midext", "mode"} - posterior_kwargs = scenario.model_dump(include=_fields) - - posteriors = cached_compute_posteriors( - model_config=self.model, - graph_config=self.graph, - dist_configs=self.distributions, - modality_configs=self.modalities, - priors=_priors, - progress_desc=f"Computing posteriors for scenario {i + 1}/{num_scens}", - **posterior_kwargs, - ) - - self.posteriors.save(values=posteriors, dataset=f"{i:03d}") - self.posteriors.set_attrs(attrs=prior_kwargs, dataset=f"{i:03d}") - self.posteriors.set_attrs(attrs=posterior_kwargs, dataset=f"{i:03d}") - - -if __name__ == "__main__": - main = assemble_main(settings_cls=PosteriorsCLI, prog_name="compute posteriors") - main() diff --git a/src/lyscripts/compute/prevalences.py b/src/lyscripts/compute/prevalences.py deleted file mode 100644 index 89b84c9..0000000 --- a/src/lyscripts/compute/prevalences.py +++ /dev/null @@ -1,236 +0,0 @@ -"""Prevalence prediction module. - -This computes the prevalence of an observed involvement pattern, given a trained model. -It can also compare this prediction to the observed prevalence in the data. As for the -risk prediction, this uses caching and computes the priors first. -""" - -from collections.abc import Callable -from typing import Literal - -import lydata # noqa: F401 -import numpy as np -import pandas as pd -from loguru import logger -from lydata import C, Q -from lydata.accessor import QueryPortion -from lydata.querier import NoneQ -from lydata.utils import is_old -from lymph import models -from pydantic import Field -from rich import progress - -from lyscripts.cli import assemble_main -from lyscripts.compute.priors import compute_priors -from lyscripts.compute.utils import ( - BaseComputeCLI, - HDF5FileStorage, - get_cached, -) -from lyscripts.configs import ( - DataConfig, - DiagnosisConfig, - DistributionConfig, - GraphConfig, - ModalityConfig, - ModelConfig, - ScenarioConfig, - add_distributions, - add_modalities, - construct_model, -) -from lyscripts.utils import console - - -def compute_prevalences( - model_config: ModelConfig, - graph_config: GraphConfig, - dist_configs: dict[str, DistributionConfig], - modality_configs: dict[str, ModalityConfig], - priors: np.ndarray, - diagnosis: dict[Literal["ipsi", "contra"], dict], - midext: bool | None = None, - progress_desc: str = "Computing prevalences from priors", -) -> np.ndarray: - """Compute the prevalence of a diagnosis given the priors and the model.""" - model = construct_model(model_config, graph_config) - model = add_distributions(model, dist_configs) - - if len(modality_configs) != 1: - msg = "Only one modality is supported for prevalence prediction." - logger.error(msg) - raise ValueError(msg) - - model = add_modalities(model, modality_configs) - prevalences = [] - kwargs = {"midext": midext} if isinstance(model, models.Midline) else {} - - for prior in progress.track( - sequence=priors, - description=progress_desc, - total=len(priors), - console=console, - ): - obs_dist = model.obs_dist(given_state_dist=prior) - involvement = { - side: diagnosis.get(side).get(next(iter(modality_configs))) - for side in ["ipsi", "contra"] - } - - if isinstance(model, models.Unilateral | models.HPVUnilateral): - involvement = involvement.get("ipsi") - - prevalence = model.marginalize( - given_state_dist=obs_dist, - involvement=involvement, - **kwargs, - ) - - if isinstance(model, models.Midline): - # In this case, we need to renormalize the prevalence by the marginalized - # probability of all states with midline extension. We must do this, because - # we compute the analogous quantity for the data. In principle, we could - # also compute the prevalence of the diagnosis *and* midline extension, but - # we have decided to compute the diagnosis *given* midline extension. - # https://github.com/lycosystem/lyscripts/blob/ea49ec/lyscripts/compute/prevalences.py#L217-L225 - midext_prob = model.marginalize( - involvement=None, - given_state_dist=obs_dist, - **kwargs, - ) - prevalence /= midext_prob - - prevalences.append(prevalence) - - return np.stack(prevalences) - - -def generate_query_from_diagnosis(diagnosis: DiagnosisConfig) -> Q: - """Transform a diagnosis into a query for the data.""" - result = NoneQ() - for side in ["ipsi", "contra"]: - for modality, pattern in getattr(diagnosis, side, {}).items(): - for lnl, value in pattern.items(): - column = (modality, side, lnl) - result &= C(column) == value - return result - - -def observe_prevalence( - data: pd.DataFrame, - scenario_config: ScenarioConfig, - mapping: dict[int, str] | Callable[[int], str] | None = None, -) -> QueryPortion: - """Extract prevalence defined in a ``scenario`` from the ``data``. - - ``mapping`` defines how the T-stages in the data are supposed to be mapped to the - T-stages defined in the ``scenario``. - - It returns the number of patients that match the given scenario and the total - number of patients that are considered. E.g., in the example below we 79 patients - are of late T-stage and have a tumor extending over the midline. Of those, 30 were - diagnosed with contralateral involvement in LNL II based on a CT scan. - - >>> data = next(lydata.load_datasets(year=2021, institution="usz")) - >>> scenario_config = ScenarioConfig( - ... t_stages=["late"], - ... midext=True, - ... diagnosis=DiagnosisConfig(contra={"CT": {"II": True}}), - ... ) - >>> observe_prevalence(data, scenario_config) - QueryPortion(match=np.int64(7), total=np.int64(79)) - """ - mapping = mapping or DataConfig.model_fields["mapping"].default_factory() - key = ("tumor", "1", "t_stage") if is_old(data) else ("tumor", "core", "t_stage") - data[key] = data.ly.t_stage.map(mapping) - - has_t_stage = C("t_stage").isin(scenario_config.t_stages) - if scenario_config.midext is None: - has_midext = NoneQ() - else: - has_midext = C("midext") == scenario_config.midext - - # Note that below we compute the prevalence of the diagnosis *given* midline - # extension. This means, that when computing the prevalence of the diagnosis in - # the model, we need to renormalize by diving by the probability of midline - # extension. For an older - but pretty surely correct - implementation see - # https://github.com/lycosystem/lyscripts/blob/ea49ec/lyscripts/compute/prevalences.py#L217-L225 - return data.ly.portion( - query=generate_query_from_diagnosis(scenario_config.diagnosis), - given=has_t_stage & has_midext, - ) - - -class PrevalencesCLI(BaseComputeCLI): - """Predict the prevalence of an involvement pattern from model samples.""" - - modalities: dict[str, ModalityConfig] = Field( - default={}, - description=( - "Maps names of diagnostic modalities to their specificity/sensitivity." - ), - ) - prevalences: HDF5FileStorage = Field( - description="Storage for the computed prevalences.", - ) - data: DataConfig - - def cli_cmd(self) -> None: - """Start the ``prevalences`` subcommand.""" - logger.debug(self.model_dump_json(indent=2)) - global_attrs = self.model_dump( - include={"model", "graph", "distributions", "modalities"}, - ) - self.prevalences.set_attrs(attrs=global_attrs, dataset="/") - - samples = self.sampling.load() - cached_compute_priors = get_cached(compute_priors, self.cache_dir) - cached_compute_prevalences = get_cached(compute_prevalences, self.cache_dir) - num_scens = len(self.scenarios) - - for i, scenario in enumerate(self.scenarios): - _fields = {"t_stages", "t_stages_dist", "mode"} - prior_kwargs = scenario.model_dump(include=_fields) - - _priors = cached_compute_priors( - model_config=self.model, - graph_config=self.graph, - dist_configs=self.distributions, - samples=samples, - progress_desc=f"Computing priors for scenario {i + 1}/{num_scens}", - **prior_kwargs, - ) - - _fields = {"diagnosis", "midext"} - prevalence_kwargs = scenario.model_dump(include=_fields) - - prevalences = cached_compute_prevalences( - model_config=self.model, - graph_config=self.graph, - dist_configs=self.distributions, - modality_configs=self.modalities, - priors=_priors, - progress_desc=f"Computing prevalences for scenario {i + 1}/{num_scens}", - **prevalence_kwargs, - ) - - portion = observe_prevalence( - data=self.data.load(), - scenario_config=scenario, - mapping=self.data.mapping, - ) - self.prevalences.save(values=prevalences, dataset=f"{i:03d}") - self.prevalences.set_attrs(attrs=prior_kwargs, dataset=f"{i:03d}") - self.prevalences.set_attrs(attrs=prevalence_kwargs, dataset=f"{i:03d}") - self.prevalences.set_attrs( - attrs={ - "num_match": portion.match, - "num_total": portion.total, - }, - dataset=f"{i:03d}", - ) - - -if __name__ == "__main__": - main = assemble_main(settings_cls=PrevalencesCLI, prog_name="compute prevalences") - main() diff --git a/src/lyscripts/compute/priors.py b/src/lyscripts/compute/priors.py deleted file mode 100644 index ae7307e..0000000 --- a/src/lyscripts/compute/priors.py +++ /dev/null @@ -1,111 +0,0 @@ -"""Given samples drawn during an MCMC round, compute the (prior) state distributions. - -This is done for each sample and for a list of specified scenarios. The computation is -cached at a location specified by the ``--cache_dir`` argument using ``joblib``. -""" - -from typing import Literal - -import numpy as np -from loguru import logger -from pydantic import Field -from rich import progress - -from lyscripts.cli import assemble_main -from lyscripts.compute.utils import BaseComputeCLI, HDF5FileStorage, get_cached -from lyscripts.configs import ( - DistributionConfig, - GraphConfig, - ModelConfig, - add_distributions, - construct_model, -) -from lyscripts.utils import console - - -def compute_priors( - model_config: ModelConfig, - graph_config: GraphConfig, - dist_configs: dict[str, DistributionConfig], - samples: np.ndarray, - t_stages: list[int | str], - t_stages_dist: list[float], - mode: Literal["HMM", "BN"] = "HMM", - progress_desc: str = "Computing priors from samples", -) -> np.ndarray: - """Compute prior state distributions from the ``samples`` for the ``model``. - - This will call the ``model`` method :py:meth:`~lymph.types.Model.state_dist` - for each of the ``samples``. The prior state distributions are computed for - each of the ``t_stages`` and marginalized over using the ``t_stages_dist``. - """ - model = construct_model(model_config, graph_config) - model = add_distributions(model, dist_configs) - priors = [] - - for sample in progress.track( - sequence=samples, - description=progress_desc, - total=len(samples), - console=console, - ): - model.set_params(*sample) - priors.append( - sum( - model.state_dist(t_stage=t, mode=mode) * p - for t, p in zip(t_stages, t_stages_dist, strict=False) - ), - ) - - return np.stack(priors) - - -class PriorsCLI(BaseComputeCLI): - """Compute the prior state distributions from MCMC samples.""" - - priors: HDF5FileStorage = Field(description="Storage for the computed priors.") - - def cli_cmd(self) -> None: - """Start the ``priors`` subcommand. - - Given a ``graph``, ``model``, ``distributions`` over diagnosis times, and - MCMC samples loaded from the ``sampling`` argument, this command computes the - prior state distributions for each of the specified ``scenarios``. - - Precomputing these state distributions is useful, because they largely only - depend on T-stage and not on the diagnosis or involvement of interest. Hence, - computing the :py:mod:`~lyscripts.compute.posteriors` and - :py:mod:`~lyscripts.compute.risks` can be sped up. - - Note that this command will use `joblib`_ to cache its computations. - - .. _joblib: https://joblib.readthedocs.io/ - """ - logger.debug(self.model_dump_json(indent=2)) - global_attrs = self.model_dump(include={"model", "graph", "distributions"}) - self.priors.set_attrs(attrs=global_attrs, dataset="/") - - samples = self.sampling.load() - cached_compute_priors = get_cached(compute_priors, self.cache_dir) - num_scenarios = len(self.scenarios) - - for i, scenario in enumerate(self.scenarios): - _fields = {"t_stages", "t_stages_dist", "mode"} - prior_kwargs = scenario.model_dump(include=_fields) - - priors = cached_compute_priors( - model_config=self.model, - graph_config=self.graph, - dist_configs=self.distributions, - samples=samples, - progress_desc=f"Computing priors for scenario {i + 1}/{num_scenarios}", - **prior_kwargs, - ) - - self.priors.save(values=priors, dataset=f"{i:03d}") - self.priors.set_attrs(attrs=prior_kwargs, dataset=f"{i:03d}") - - -if __name__ == "__main__": - main = assemble_main(settings_cls=PriorsCLI, prog_name="compute priors") - main() diff --git a/src/lyscripts/compute/risks.py b/src/lyscripts/compute/risks.py deleted file mode 100644 index 4b3e224..0000000 --- a/src/lyscripts/compute/risks.py +++ /dev/null @@ -1,140 +0,0 @@ -"""Predict risks of involvements for scenarios using drawn MCMC samples. - -As the priors and posteriors, this computation, too, uses caching and may skip the -computation of these two initial steps if the cache directory is the same as during -their computation. -""" - -from typing import Literal - -import numpy as np -from loguru import logger -from lymph import models -from pydantic import Field -from rich import progress - -from lyscripts.cli import assemble_main -from lyscripts.compute.posteriors import compute_posteriors -from lyscripts.compute.priors import compute_priors -from lyscripts.compute.utils import BaseComputeCLI, HDF5FileStorage, get_cached -from lyscripts.configs import ( - DistributionConfig, - GraphConfig, - ModalityConfig, - ModelConfig, - add_distributions, - add_modalities, - construct_model, -) -from lyscripts.utils import console - - -def compute_risks( - model_config: ModelConfig, - graph_config: GraphConfig, - dist_configs: dict[str, DistributionConfig], - modality_configs: dict[str, ModalityConfig], - posteriors: np.ndarray, - involvement: dict[Literal["ipsi", "contra"], dict], - progress_desc: str = "Computing risks from posteriors", -) -> np.ndarray: - """Compute the risk of ``involvement`` from each of the ``posteriors``. - - Essentially, this only calls the model's :py:meth:`lymph.models.Model.marginalize` - method, as nothing more is necessary than to marginalize the full posterior state - distribution over the states that correspond to the involvement of interest. - """ - model = construct_model(model_config, graph_config) - model = add_distributions(model, dist_configs) - model = add_modalities(model, modality_configs) - risks = [] - - if isinstance(model, models.Unilateral | models.HPVUnilateral): - involvement = involvement.get("ipsi") - - for posterior in progress.track( - sequence=posteriors, - description=progress_desc, - total=len(posteriors), - console=console, - ): - risks.append( - model.marginalize(involvement=involvement, given_state_dist=posterior), - ) - - return np.stack(risks) - - -class RisksCLI(BaseComputeCLI): - """Predict the risk of involvement scenarios from model samples given diagnoses.""" - - modalities: dict[str, ModalityConfig] = Field( - default={}, - description=( - "Maps names of diagnostic modalities to their specificity/sensitivity." - ), - ) - risks: HDF5FileStorage = Field(description="Storage for the computed risks.") - - def cli_cmd(self) -> None: - """Start the ``risks`` subcommand.""" - logger.debug(self.model_dump_json(indent=2)) - global_attrs = self.model_dump( - include={"model", "graph", "distributions", "modalities"}, - ) - self.risks.set_attrs(attrs=global_attrs, dataset="/") - - samples = self.sampling.load() - cached_compute_priors = get_cached(compute_priors, self.cache_dir) - cached_compute_posteriors = get_cached(compute_posteriors, self.cache_dir) - cached_compute_risks = get_cached(compute_risks, self.cache_dir) - num_scens = len(self.scenarios) - - for i, scenario in enumerate(self.scenarios): - _fields = {"t_stages", "t_stages_dist", "mode"} - prior_kwargs = scenario.model_dump(include=_fields) - - _priors = cached_compute_priors( - model_config=self.model, - graph_config=self.graph, - dist_configs=self.distributions, - samples=samples, - progress_desc=f"Computing priors for scenario {i + 1}/{num_scens}", - **prior_kwargs, - ) - - _fields = {"diagnosis", "midext", "mode"} - posterior_kwargs = scenario.model_dump(include=_fields) - - _posteriors = cached_compute_posteriors( - model_config=self.model, - graph_config=self.graph, - dist_configs=self.distributions, - modality_configs=self.modalities, - priors=_priors, - progress_desc=f"Computing posteriors for scenario {i + 1}/{num_scens}", - **posterior_kwargs, - ) - - _fields = {"involvement"} - risk_kwargs = scenario.model_dump(include=_fields) - - risks = cached_compute_risks( - model_config=self.model, - graph_config=self.graph, - dist_configs=self.distributions, - modality_configs=self.modalities, - posteriors=_posteriors, - progress_desc=f"Computing risks for scenario {i + 1}/{num_scens}", - **risk_kwargs, - ) - - self.risks.save(values=risks, dataset=f"{i:03d}") - self.risks.set_attrs(attrs=prior_kwargs, dataset=f"{i:03d}") - self.risks.set_attrs(attrs=posterior_kwargs, dataset=f"{i:03d}") - self.risks.set_attrs(attrs=risk_kwargs, dataset=f"{i:03d}") - - -if __name__ == "__main__": - main = assemble_main(settings_cls=RisksCLI, prog_name="compute risks") - main() diff --git a/src/lyscripts/compute/utils.py b/src/lyscripts/compute/utils.py deleted file mode 100644 index b3bed8a..0000000 --- a/src/lyscripts/compute/utils.py +++ /dev/null @@ -1,276 +0,0 @@ -"""Utilities for precomputing the priors and posteriors.""" - -import ast -import functools -from pathlib import Path -from typing import Annotated, Any - -import h5py -import numpy as np -from joblib import Memory -from loguru import logger -from pydantic import AfterValidator, BaseModel, Field - -from lyscripts.configs import ( - BaseCLI, - DistributionConfig, - GraphConfig, - ModelConfig, - SamplingConfig, - ScenarioConfig, -) - - -class BaseComputeCLI(BaseCLI): - """Common command line settings for the submodule ``compute``.""" - - graph: GraphConfig - model: ModelConfig = ModelConfig() - distributions: dict[str, DistributionConfig] = Field( - default={}, - description=( - "Mapping of model T-categories to predefined distributions over " - "diagnose times." - ), - ) - cache_dir: Path = Field( - default=Path.cwd() / ".cache", - description="Cache directory for storing function calls.", - ) - scenarios: list[ScenarioConfig] = Field( - default=[], - description="List of scenarios to compute risks for.", - ) - sampling: SamplingConfig - - -def is_hdf5_compatible(value: Any) -> bool: - """Check if the given ``value`` can be stored in an HDF5 file.""" - return isinstance( - value, - bool | str | bytes | int | float | np.ndarray | list | tuple, - ) - - -def to_hdf5_attrs(mapping: dict[str, Any]) -> dict[str, str]: - """Convert ``attrs`` to a dictionary of HDF5 compatible attributes or strings.""" - res = {} - for key, val in mapping.items(): - if is_hdf5_compatible(val): - res[key] = val - else: - res[key] = str(val) - return res - - -def from_hdf5_attrs(mapping: h5py.AttributeManager) -> dict[str, Any]: - """Convert the HDF5 attributes to a dictionary of Python objects.""" - attrs = {} - for key, value in mapping.items(): - try: - attrs[key] = ast.literal_eval(value) - except ValueError: - attrs[key] = value - return attrs - - -def extract_modalities(diagnosis: dict[str, Any]) -> set[str]: - """Get the set of modalities used in the ``diagnosis``. - - This is not used in the main apps anymore, but since it may be useful, I keep it. - - >>> diagnosis = { - ... "ipsi": { - ... "MRI": {"II": True, "III": False}, - ... "PET": {"II": False, "III": True}, - ... }, - ... "contra": {"MRI": {"II": False, "III": None}}, - ... } - >>> sorted(extract_modalities(diagnosis)) - ['MRI', 'PET'] - """ - modality_set = set() - - if "ipsi" not in diagnosis and "contra" not in diagnosis: - return modality_set | set(diagnosis.keys()) - - for side in ["ipsi", "contra"]: - if side in diagnosis: - modality_set |= set(diagnosis[side].keys()) - - return modality_set - - -def ensure_parent_dir(path: Path) -> Path: - """Create the parent directory of the given ``path``.""" - path.parent.mkdir(parents=True, exist_ok=True) - logger.debug(f"Ensured parent directory of {path}") - return path - - -HasParentPath = Annotated[Path, AfterValidator(ensure_parent_dir)] -"""Type hint for path whose parent dir is created if it doesn't exist.""" - - -class HDF5FileStorage(BaseModel): - """HDF5 file storage for in- and outputs of computations.""" - - file: HasParentPath = Field( - description="Path to the HDF5 file. Parent directories are created if needed.", - ) - dataset: str | None = Field( - default=None, - description=( - "Name of the dataset in the HDF5 file. Save/load methods can override this." - ), - ) - - def _get_dataset(self) -> str: - """Get attribute ``dataset`` or the first dataset in the file. - - >>> from tempfile import TemporaryDirectory - >>> tmp_path = Path(TemporaryDirectory().name) / "test.hdf5" - >>> storage = HDF5FileStorage(file=tmp_path) - >>> rand_data = np.random.rand(100, 100) - >>> storage.save(values=rand_data, dataset="test") - >>> np.all(storage.load(dataset="test") == rand_data) - np.True_ - >>> np.all(storage.load() == rand_data) # loads first dataset - np.True_ - >>> some_attrs = {"key": "value"} - >>> storage.set_attrs(attrs=some_attrs, dataset="test") - >>> storage.get_attrs(dataset="test") - {'key': 'value'} - """ - if self.dataset is not None: - return self.dataset - - with h5py.File(self.file, "r") as file: - return next(iter(file.keys())) - - def load(self, dataset: str | None = None) -> np.ndarray: - """Load the dataset with the name ``dataset``.""" - dataset = dataset or self._get_dataset() - - with h5py.File(self.file, "r") as file: - array = file[dataset][()] - - logger.debug(f"Loaded dataset {dataset} from {self.file}") - return array - - def get_attrs(self, dataset: str | None = None) -> dict[str, Any]: - """Get the attributes of the dataset ``dataset``.""" - dataset = dataset or self._get_dataset() - - with h5py.File(self.file, "r") as file: - attrs = from_hdf5_attrs(file[dataset].attrs) - - logger.debug(f"Loaded attrs for dataset '{dataset}' from {self.file}") - return attrs - - def save(self, values: np.ndarray, dataset: str | None = None) -> None: - """Set the ``values`` for the ``dataset`` dataset.""" - dataset = dataset or self._get_dataset() - - with h5py.File(self.file, "a") as file: - if dataset in file: - del file[dataset] - file[dataset] = values - - logger.debug(f"Stored dataset {dataset} in {self.file}") - - def set_attrs(self, attrs: dict[str, Any], dataset: str | None = None) -> None: - """Update the ``attrs`` for the ``dataset`` dataset.""" - dataset = dataset or self._get_dataset() - - with h5py.File(self.file, "a") as file: - if dataset not in file: - raise ValueError(f"Dataset '{dataset}' not found in {self.file}") - file[dataset].attrs.update(to_hdf5_attrs(attrs)) - - logger.debug(f"Stored attrs {attrs} for dataset '{dataset}' in {self.file}") - - -def reduce_pattern(pattern: dict[str, dict[str, bool]]) -> dict[str, dict[str, bool]]: - """Reduce a ``pattern`` by removing all entries that are ``None``. - - This way, it should be completely recoverable by the ``complete_pattern`` function - but be shorter to store. - - Unused but maybe useful for some cases. Keeping it in here for now. - - >>> full = { - ... "ipsi": {"I": None, "II": True, "III": None}, - ... "contra": {"I": None, "II": None, "III": None}, - ... } - >>> reduce_pattern(full) - {'ipsi': {'II': True}} - - """ - tmp_pattern = pattern.copy() - reduced_pattern = {} - for side in ["ipsi", "contra"]: - if not all(v is None for v in tmp_pattern[side].values()): - reduced_pattern[side] = {} - for lnl, val in tmp_pattern[side].items(): - if val is not None: - reduced_pattern[side][lnl] = val - - return reduced_pattern - - -def complete_pattern( - pattern: dict[str, dict[str, bool]] | None, - lnls: list[str], -) -> dict[str, dict[str, bool]]: - """Make sure the provided involvement ``pattern`` is correct. - - For each side of the neck, and for each of the ``lnls`` this should in the end - contain ``True``, ``False`` or ``None``. - - Unused but maybe useful for some cases. Keeping it in here for now. - - >>> pattern = {"ipsi": {"II": True}} - >>> lnls = ["II", "III"] - >>> complete_pattern(pattern, lnls) - {'ipsi': {'II': True, 'III': None}, 'contra': {'II': None, 'III': None}} - - """ - if pattern is None: - pattern = {} - - for side in ["ipsi", "contra"]: - if side not in pattern: - pattern[side] = {} - - for lnl in lnls: - if lnl not in pattern[side]: - pattern[side][lnl] = None - elif pattern[side][lnl] is None: - continue - else: - pattern[side][lnl] = bool(pattern[side][lnl]) - - return pattern - - -def get_cached(func: callable, cache_dir: Path) -> callable: - """Return cached ``func`` with a cache at ``cache_dir``.""" - memory = Memory(location=cache_dir, verbose=0) - cached_func = memory.cache(func, ignore=["progress_desc"]) - logger.info(f"Initialized cache for {func.__name__} at {cache_dir}") - - @functools.wraps(func) - def log_cache_info_wrapper(*args, **kwargs): - logger.debug(f"Calling {func.__name__}({args}, {kwargs})") - if cached_func.check_call_in_cache(*args, **kwargs): - logger.info(f"Cache hit for {func.__name__}, returning stored result") - else: - logger.info(f"Cache miss for {func.__name__}, computing result") - - result = cached_func(*args, **kwargs) - logger.debug(f"Computed {result = }") - return result - - log_cache_info_wrapper._cached_func = cached_func - return log_cache_info_wrapper diff --git a/src/lyscripts/configs.py b/src/lyscripts/configs.py deleted file mode 100644 index ebae236..0000000 --- a/src/lyscripts/configs.py +++ /dev/null @@ -1,825 +0,0 @@ -"""Using `pydantic`_, we define configurations for the package. - -Most importantly, these configurations are part of the CLIs that the package provides. -but they also help with programmatically validating and constructing various objects. -Maybe most importantly, the :py:class:`GraphConfig` and :py:class:`ModelConfig` may be -used to precisely and reproducibly define how the function :py:func:`construct_model` -should create lymphatic progression :py:mod:`~lymph.models`. - -.. _pydantic: https://docs.pydantic.dev/latest/ -""" - -from __future__ import annotations - -import importlib -import importlib.util -import os -import warnings -from collections.abc import Callable, Sequence -from copy import deepcopy -from pathlib import Path -from typing import Annotated, Any, Literal - -import numpy as np -import pandas as pd -import yaml -from loguru import logger -from lydata.loader import LyDataset -from lydata.utils import ModalityConfig -from lymph import graph, models -from lymph.modalities import Pathological -from lymph.types import Model, PatternType -from pydantic import ( - AfterValidator, - BaseModel, - ConfigDict, - Field, - FilePath, -) -from pydantic_settings import ( - BaseSettings, - PydanticBaseSettingsSource, - YamlConfigSettingsSource, -) -from pydantic_settings.sources import DEFAULT_PATH - -from lyscripts.utils import binom_pmf, flatten, load_model_samples, load_patient_data - -FuncNameType = Literal["binomial"] - - -DIST_MAP: dict[FuncNameType, Callable] = { - "binomial": binom_pmf, -} - - -class CrossValidationConfig(BaseModel): - """Configs for splitting a dataset into cross-validation folds.""" - - seed: int = Field( - default=42, - description="Seed for the random number generator.", - ) - folds: int = Field( - default=5, - description="Number of folds to split the dataset into.", - ) - - -class DataConfig(BaseModel): - """Where to load lymphatic progression data from and how to feed it into a model.""" - - source: FilePath | LyDataset = Field( - description=( - "Either a path to a CSV file or a config that specifies how and where " - "to fetch the data from." - ), - ) - side: Literal["ipsi", "contra"] | None = Field( - default=None, - description="Side of the neck to load data for. Only for Unilateral models.", - ) - mapping: dict[Literal[0, 1, 2, 3, 4] | str, int | str] = Field( - default_factory=lambda: {i: "early" if i <= 2 else "late" for i in range(5)}, - description="Optional mapping of numeric T-stages to model T-stages.", - ) - - def load(self, **get_dataframe_kwargs) -> pd.DataFrame: - """Load data from path or the :py:class:`~lydata.loader.LyDataset`.""" - if isinstance(self.source, LyDataset): - return self.source.get_dataframe(**get_dataframe_kwargs) - - return load_patient_data(self.source, **get_dataframe_kwargs) - - def get_load_kwargs(self, **read_csv_kwargs: dict[str, Any]) -> dict[str, Any]: - """Get kwargs for :py:meth:`~lymph.types.Model.load_patient_data`.""" - return { - "patient_data": self.load(**(read_csv_kwargs or {})), - **self.model_dump(exclude={"source"}, exclude_none=True), - } - - -def check_pattern(value: PatternType) -> Any: - """Check if the value can be converted to a boolean value.""" - return {lnl: map_to_optional_bool(v) for lnl, v in value.items()} - - -class DiagnosisConfig(BaseModel): - """Defines an ipsi- and contralateral diagnosis pattern.""" - - ipsi: dict[str, Annotated[PatternType, AfterValidator(check_pattern)]] = Field( - default={}, - description="Observed diagnoses by different modalities on the ipsi neck.", - examples=[{"CT": {"II": True, "III": False}}], - ) - contra: dict[str, Annotated[PatternType, AfterValidator(check_pattern)]] = Field( - default={}, - description="Observed diagnoses by different modalities on the contra neck.", - ) - - def to_involvement(self, modality: str) -> InvolvementConfig: - """Convert the diagnosis pattern to an involvement pattern for ``modality``.""" - return InvolvementConfig( - ipsi=self.ipsi.get(modality, {}), - contra=self.contra.get(modality, {}), - ) - - -class DistributionConfig(BaseModel): - """Configuration defining a distribution over diagnose times.""" - - kind: Literal["frozen", "parametric"] = Field( - default="frozen", - description="Parametric distributions may be updated.", - ) - func: FuncNameType = Field( - default="binomial", - description="Name of predefined function to use as distribution.", - ) - params: dict[str, int | float] = Field( - default={}, - description="Parameters to pass to the predefined function.", - ) - - -class InvolvementConfig(BaseModel): - """Config that defines an ipsi- and contralateral involvement pattern.""" - - ipsi: Annotated[PatternType, AfterValidator(check_pattern)] = Field( - default={}, - description="Involvement pattern for the ipsilateral side of the neck.", - examples=[{"II": True, "III": False}], - ) - contra: Annotated[PatternType, AfterValidator(check_pattern)] = Field( - default={}, - description="Involvement pattern for the contralateral side of the neck.", - ) - - -def retrieve_graph_representation(model: Model) -> graph.Representation: - """Retrieve the graph representation from a model.""" - if hasattr(model, "graph"): - return model.graph - - if hasattr(model, "hpv"): - return retrieve_graph_representation(model.hpv) - - if hasattr(model, "ipsi"): - return retrieve_graph_representation(model.ipsi) - - if hasattr(model, "ext"): - return retrieve_graph_representation(model.ext) - - raise ValueError("Model does not have a graph representation.") - - -class GraphConfig(BaseModel): - """Specifies how the tumor(s) and LNLs are connected in a DAG.""" - - tumor: dict[str, list[str]] = Field( - description="Define the name of the tumor(s) and which LNLs it/they drain to.", - ) - lnl: dict[str, list[str]] = Field( - description="Define the name of the LNL(s) and which LNLs it/they drain to.", - ) - - @classmethod - def from_model(cls: type, model: Model) -> GraphConfig: - """Create a ``GraphConfig`` from a ``Model``.""" - graph = retrieve_graph_representation(model) - return cls( - tumor={ - name: [edge.child.name for edge in tumor.out] - for name, tumor in graph.tumors.items() - }, - lnl={ - name: [edge.child.name for edge in lnl.out] # noqa - for name, lnl in graph.lnls.items() - }, - ) - - -def has_model_symbol(path: Path) -> Path: - """Check if the Python file at ``path`` defines a symbol named ``model``.""" - spec = importlib.util.spec_from_file_location(path.stem, path) - module = importlib.util.module_from_spec(spec) - spec.loader.exec_module(module) - - if not hasattr(module, "model"): - raise ValueError(f"Python file at {path} does not define a symbol 'model'.") - - return path - - -def get_symmetry_kwargs(model: Model) -> dict[str, Any]: - """Get the symmetry kwargs from a model.""" - if isinstance(model, models.Unilateral | models.HPVUnilateral): - raise TypeError("Unilateral models do not have symmetry kwargs.") - - if hasattr(model, "ext"): - return get_symmetry_kwargs(model.ext) - - return getattr(model, "is_symmetric", {}) - - -class ModelConfig(BaseModel): - """Define which of the ``lymph`` models to use and how to set them up.""" - - external_file: Annotated[FilePath, AfterValidator(has_model_symbol)] | None = Field( - default=None, - description="Path to a Python file that defines a model.", - ) - class_name: Literal["Unilateral", "Bilateral", "Midline"] = Field( - default="Unilateral", - description="Name of the model class to use.", - ) - constructor: Literal["binary", "trinary"] = Field( - default="binary", - description="Trinary models differentiate btw. micro- and macroscopic disease.", - ) - max_time: int = Field( - default=10, - description="Max. number of time-steps to evolve the model over.", - ) - named_params: Sequence[str] = Field( - default=None, - description=( - "Subset of valid model parameters a sampler may provide in the form of a " - "dictionary to the model instead of as an array. Or, after sampling, with " - "this list, one may safely recover which parameter corresponds to which " - "index in the sample." - ), - ) - kwargs: dict[str, Any] = Field( - default={}, - description="Additional keyword arguments to pass to the model constructor.", - ) - - @classmethod - def from_model(cls: type, model: Model) -> ModelConfig: - """Create a ``ModelConfig`` from a ``Model``.""" - warnings.warn( - message=( - "Not all kwargs passed at initialization can be recovered into a " - "config. Make sure to manually double-check the config." - ), - category=UserWarning, - stacklevel=2, - ) - - if getattr(model, "_named_params", None): - additional_kwargs = {"named_params": list(model.named_params)} - else: - additional_kwargs = {} - - try: - additional_kwargs["is_symmetric"] = get_symmetry_kwargs(model) - except TypeError: - pass - - if isinstance(model, models.Midline): - additional_kwargs["use_midext_evo"] = model.use_midext_evo - additional_kwargs["use_central"] = hasattr(model, "_central") - additional_kwargs["use_mixing"] = hasattr(model, "mixing_param") - - if not hasattr(model, "_unknown"): - additional_kwargs["marginalize_unknown"] = False - - return cls( - class_name=model.__class__.__name__, - constructor="trinary" if model.is_trinary else "binary", - max_time=model.max_time, - kwargs=additional_kwargs, - ) - - -def modalityconfig_from_model(model: Model, modality_name: str) -> ModalityConfig: - """Create a ``ModalityConfig`` from a ``Model``.""" - modality = model.get_modality(modality_name) - return ModalityConfig( - spec=modality.spec, - sens=modality.sens, - kind="pathological" if isinstance(modality, Pathological) else "clinical", - ) - - -class DeprecatedModelConfig(BaseModel): - """Model configuration prior to ``lyscripts`` major version 1. - - This is implemented for backwards compatibility. Its sole job is to translate - the outdated settings format into the new one. Note that the only stuff that needs - to be translated is the model configuration itself and the distributions for - marginalization over diagnosis times. The :py:class:`~GraphConfig` is still - compatible. - """ - - first_binom_prob: float = Field( - description="Fixed parameter for first binomial dist over diagnosis times.", - ge=0.0, - le=1.0, - ) - max_t: int = Field( - description="Max. number of time-steps to evolve the model over.", - gt=0, - ) - t_stages: list[int | str] = Field( - description=( - "List of T-stages to marginalize over in the scenario. The old format " - "assumed all T-stages except the first one to be parametric. Only binomial " - "distributions are supported." - ), - ) - class_: Literal["Unilateral", "Bilateral", "Midline", "MidlineBilateral"] = Field( - description="Name of the model class. Only binary models are supported.", - alias="class", - ) - kwargs: dict[str, Any] = Field( - default={}, - description="Additional keyword arguments to pass to the model constructor.", - ) - - def model_post_init(self, __context): - """Issue a deprecation warning.""" - warnings.warn( - message="The 'DeprecatedModelConfig' is deprecated.", - category=DeprecationWarning, - stacklevel=2, - ) - if "Midline" in self.class_: - self.class_ = "Midline" - warnings.warn( - "Model may not be recreated as expected due to extra parameter " - "`midext_prob`. Make sure to manually handle edge cases.", - stacklevel=2, - ) - return super().model_post_init(__context) - - def translate(self) -> tuple[ModelConfig, dict[int | str, DistributionConfig]]: - """Translate the deprecated model config to the new format.""" - old_kwargs = self.kwargs.copy() - new_kwargs = {"use_midext_evo": False} if "Midline" in self.class_ else {} - - if (tumor_spread := old_kwargs.pop("base_symmetric")) is not None: - new_kwargs["is_symmetric"] = new_kwargs.get("is_symmetric", {}) - new_kwargs["is_symmetric"]["tumor_spread"] = tumor_spread - - if (lnl_spread := old_kwargs.pop("trans_symmetric")) is not None: - new_kwargs["is_symmetric"] = new_kwargs.get("is_symmetric", {}) - new_kwargs["is_symmetric"]["lnl_spread"] = lnl_spread - - new_kwargs.update(old_kwargs) - - model_config = ModelConfig( - class_name=self.class_, - constructor="binary", - max_time=self.max_t, - kwargs=new_kwargs, - ) - - distribution_configs = {} - for i, t_stage in enumerate(self.t_stages): - distribution_configs[t_stage] = DistributionConfig( - kind="frozen" if i == 0 else "parametric", - func="binomial", - params={"p": self.first_binom_prob}, - ) - - return model_config, distribution_configs - - -class SamplingConfig(BaseModel): - """Settings to configure the MCMC sampling.""" - - storage_file: Path = Field( - description="Path to HDF5 file store results or load last state.", - ) - history_file: Path | None = Field( - default=None, - description="Path to store the burn-in metrics (as CSV file).", - ) - dataset: str = Field( - default="mcmc", - description="Name of the dataset in the HDF5 file.", - ) - cores: int | None = Field( - gt=0, - default=os.cpu_count(), - description=( - "Number of cores to use for parallel sampling. If `None`, no parallel " - "processing is used." - ), - ) - seed: int = Field( - default=42, - description="Seed for the random number generator.", - ) - walkers_per_dim: int = Field( - default=20, - description="Number of walkers per parameter space dimension.", - ) - check_interval: int = Field( - default=50, - description="Check for convergence each time after this many steps.", - ) - trust_factor: float = Field( - default=50.0, - description=( - "Trust the autocorrelation time only when it's smaller than this factor " - "times the length of the chain." - ), - ) - relative_thresh: float = Field( - default=0.05, - description="Relative threshold for convergence.", - ) - burnin_steps: int | None = Field( - default=None, - description=( - "Number of burn-in steps to take. If None, burn-in runs until convergence." - ), - ) - num_steps: int | None = Field( - default=100, - description=("Number of steps to take in the MCMC sampling."), - ) - thin_by: int = Field( - default=10, - description="How many samples to draw before for saving one.", - ) - inverse_temp: float = Field( - default=1.0, - description=( - "Inverse temperature for thermodynamic integration. Note that this is not " - "yet fully implemented." - ), - ) - - def load(self, thin: int = 1) -> np.ndarray: - """Load the samples from the HDF5 file. - - Note that the ``thin`` represents another round of thinning and is usually - not necessary if the samples were already thinned during the sampling process. - """ - return load_model_samples( - file_path=self.storage_file, - name=self.dataset, - thin=thin, - ) - - -def geometric_schedule(num: int, *_a) -> np.ndarray: - """Create a geometric sequence of ``num`` numbers from 0 to 1.""" - log_seq = np.logspace(0.0, 1.0, num) - shifted_seq = log_seq - 1.0 - return shifted_seq / 9.0 - - -def linear_schedule(num: int, *_a) -> np.ndarray: - """Create a linear sequence of ``num`` numbers from 0 to 1. - - Equivalent to the :py:func:`power_schedule` with ``power=1``. - """ - return np.linspace(0.0, 1.0, num) - - -def power_schedule(num: int, power: float, *_a) -> np.ndarray: - """Create a power sequence of ``num`` numbers from 0 to 1. - - This is essentially a :py:func:`linear_schedule` of ``num`` numbers from 0 to 1, - but each number is raised to the power of ``power``. - """ - lin_seq = np.linspace(0.0, 1.0, num) - return lin_seq**power - - -SCHEDULES = { - "geometric": geometric_schedule, - "linear": linear_schedule, - "power": power_schedule, -} - - -class ScheduleConfig(BaseModel): - """Configuration for generating a schedule of inverse temperatures.""" - - method: Literal["geometric", "linear", "power"] = Field( - default="power", - description="Method to generate the inverse temperature schedule.", - ) - num: int = Field( - default=32, - description="Number of inverse temperatures in the schedule.", - ) - power: float = Field( - default=4.0, - description="If a power schedule is chosen, use this as power.", - ) - values: list[float] | None = Field( - default=None, - description=( - "List of inverse temperatures to use instead of generating a schedule. " - "If a list is provided, the other parameters are ignored." - ), - ) - - def get_schedule(self) -> np.ndarray: - """Get the inverse temperature schedule as a numpy array.""" - if self.values is not None: - logger.debug("Using provided inverse temperature values.") - schedule = np.array(self.values) - else: - logger.debug(f"Generating inverse temperature schedule with {self.method}.") - func = SCHEDULES[self.method] - schedule = func(self.num, self.power) - - logger.info(f"Generated inverse temperature schedule: {schedule}") - return schedule - - -def map_to_optional_bool(value: Any) -> Any: - """Try to convert the options in the `PatternType` to a boolean value.""" - if value in [True, "involved", 1]: - return True - - if value in [False, "healthy", 0]: - return False - - return value - - -class ScenarioConfig(BaseModel): - """Define a scenario for which e.g. prevalences and risks may be computed.""" - - t_stages: list[int | str] = Field( - description="List of T-stages to marginalize over in the scenario.", - examples=[["early"], [3, 4]], - ) - t_stages_dist: list[float] = Field( - default=[1.0], - description="Distribution over T-stages to use for marginalization.", - examples=[[1.0], [0.6, 0.4]], - ) - midext: bool | None = Field( - default=None, - description="Whether the patient's tumor extends over the midline.", - ) - mode: Literal["HMM", "BN"] = Field( - default="HMM", - description="Which underlying model architecture to use.", - ) - involvement: InvolvementConfig = InvolvementConfig() - diagnosis: DiagnosisConfig = DiagnosisConfig() - - def model_post_init(self, __context: Any) -> None: - """Interpolate and normalize the distribution.""" - self.interpolate() - self.normalize() - - def interpolate(self): - """Interpolate the distribution to the number of ``t_stages``.""" - if len(self.t_stages) != len(self.t_stages_dist): - new_x = np.linspace(0.0, 1.0, len(self.t_stages)) - old_x = np.linspace(0.0, 1.0, len(self.t_stages_dist)) - # cast to list to make ``__eq__`` work - self.t_stages_dist = np.interp(new_x, old_x, self.t_stages_dist).tolist() - - def normalize(self): - """Normalize the distribution to sum to 1.""" - if not np.isclose(np.sum(self.t_stages_dist), 1.0): - self.t_stages_dist = ( - np.array(self.t_stages_dist) / np.sum(self.t_stages_dist) - ).tolist() # cast to list to make ``__eq__`` work - - -def _construct_model_from_external(path: Path) -> Model: - """Construct a model from a Python file.""" - module_name = path.stem - spec = importlib.util.spec_from_file_location(module_name, path) - module = importlib.util.module_from_spec(spec) - spec.loader.exec_module(module) - logger.info(f"Loaded model from {path}. This ignores model and graph configs.") - return module.model - - -def construct_model( - model_config: ModelConfig, - graph_config: GraphConfig, -) -> Model: - """Construct a model from a ``model_config``. - - The default/expected use of this is to specify a model class from the - `lymph`_ package and pass the necessary arguments to its constructor. - However, it is also possible to load a model from an external Python file via the - ``external`` attribute of the ``model_config`` argument. In this case, a symbol - with name ``model`` must be defined in the file that is to be loaded. - - .. note:: - - No check is performed on the model's compatibility with the command/pipeline - it is used in. It is assumed the model complies with the - :py:class:`model type ` specifications of the `lymph`_ - package. - - .. _lymph: https://lymph-model.readthedocs.io/stable/ - """ - if model_config.external_file is not None: - return _construct_model_from_external(model_config.external_file) - - cls = getattr(models, model_config.class_name) - constructor = getattr(cls, model_config.constructor) - model = constructor( - graph_dict=flatten(graph_config.model_dump()), - max_time=model_config.max_time, - named_params=model_config.named_params, - **model_config.kwargs, - ) - logger.info(f"Constructed model: {model}") - return model - - -def add_distributions( - model: Model, - configs: dict[str | int, DistributionConfig], - mapping: dict[FuncNameType, Callable] | None = None, - inplace: bool = False, -) -> Model: - """Construct and add distributions over diagnose times to a ``model``.""" - if not inplace: - model = deepcopy(model) - logger.debug("Created deepcopy of model.") - - mapping = mapping or DIST_MAP - - for t_stage, dist_config in configs.items(): - if dist_config.kind == "frozen": - support = np.arange(model.max_time + 1) - dist = mapping[dist_config.func](support, **dist_config.params) - elif dist_config.kind == "parametric": - dist = mapping[dist_config.func] - else: - raise ValueError(f"Unknown distribution kind: {dist_config.kind}") - - model.set_distribution(t_stage, dist) - if dist_config.kind == "parametric" and dist_config.params: - params = {f"{t_stage}_{k}": v for k, v in dist_config.params.items()} - model.set_params(**params) - - logger.debug(f"Set {dist_config.kind} distribution for '{t_stage}': {dist}") - - logger.info(f"Added {len(configs)} distributions to model: {model}") - return model - - -def add_modalities( - model: Model, - modalities: dict[str, ModalityConfig], - inplace: bool = False, -) -> Model: - """Add ``modalities`` to a ``model``.""" - if not inplace: - model = deepcopy(model) - logger.debug("Created deepcopy of model.") - - for modality, modality_config in modalities.items(): - model.set_modality(modality, **modality_config.model_dump()) - logger.debug(f"Added modality {modality} to model: {modality_config}") - - logger.info(f"Added {len(modalities)} modalities to model: {model}") - return model - - -def add_data( - model: Model, - path: Path, - side: Literal["ipsi", "contra"], - mapping: dict[Literal[0, 1, 2, 3, 4], int | str] | None = None, - inplace: bool = False, -) -> Model: - """Add data to a ``model``.""" - data = pd.read_csv(path, header=[0, 1, 2]) - logger.debug(f"Loaded data from {path}: Shape: {data.shape}") - - kwargs = {"patient_data": data, "mapping": mapping} - if isinstance(model, models.Unilateral): - kwargs["side"] = side - - if not inplace: - model = deepcopy(model) - logger.debug("Created deepcopy of model.") - - model.load_patient_data(**kwargs) - logger.info(f"Added data to model: {model}") - return model - - -PathType = Path | str | Sequence[Path | str] - - -class DynamicYamlConfigSettingsSource(YamlConfigSettingsSource): - """YAML config source that allows dynamic file path specification. - - This is heavily inspired by `this comment`_ in the discussion on a related issue - of the `pydantic-settings`_ GitHub repository. - - Essentially, this little hack allows a user to specify a one or multiple YAML files - from which the CLI should read configurations. Normally, `pydantic-settings` only - allows hard-coding the location of these config files. - - .. _this comment: https://github.com/pydantic/pydantic-settings/issues/259#issuecomment-2549444286 - .. _pydantic-settings: https://github.com/pydantic/pydantic-settings - """ - - def __init__( - self, - settings_cls, - yaml_file: PathType | None = DEFAULT_PATH, - yaml_file_encoding: str | None = None, - yaml_file_path_field: str = "configs", - ) -> None: - """Allow getting the YAML file path from any key in the current state. - - The argument ``yaml_file_path_field`` should be the :py:class:`BaseSettings` - field that contains the path(s) to the YAML file(s). - - Note that all config files must have a ``version: 1`` key in them to be - recognized as valid config files. - """ - self.yaml_file_path_field = yaml_file_path_field - super().__init__(settings_cls, yaml_file, yaml_file_encoding) - - def _read_file(self, file_path: Path) -> dict[str, Any]: - """Read the YAML and raise exception when ``version: 1`` not found.""" - with open(file_path, encoding=self.yaml_file_encoding) as yaml_file: - data = yaml.safe_load(yaml_file) or {} - if data.get("version") != 1: - raise ValueError( - f"Config file {file_path} does not have a 'version: 1' key. " - "For compatibility reasons, all config files must have this key.", - ) - return data - - def __call__(self) -> dict[str, Any]: - """Reload the config files from the paths in the current state.""" - yaml_file_to_reload = self.current_state.get( - self.yaml_file_path_field, - self.yaml_file_path, - ) - logger.debug(f"Reloading YAML files from {yaml_file_to_reload} (if it exists).") - self.__init__( - settings_cls=self.settings_cls, - yaml_file=yaml_file_to_reload, - yaml_file_encoding=self.yaml_file_encoding, - yaml_file_path_field=self.yaml_file_path_field, - ) - return super().__call__() - - def __repr__(self) -> str: - """Return a string representation of the source.""" - return ( - self.__class__.__name__ - + "(" - + f"yaml_file={self.yaml_file_path!r}, " - + f"yaml_file_encoding={self.yaml_file_encoding!r}, " - + f"yaml_file_path_field={self.yaml_file_path_field!r}" - + ")" - ) - - -class BaseCLI(BaseSettings): - """Base settings class for all CLI scripts to inherit from.""" - - model_config = ConfigDict(yaml_file="config.yaml", extra="ignore") - - configs: list[Path] = Field( - default=["config.yaml"], - description=( - "Path to the YAML file(s) that contain the configuration(s). Configs from " - "YAML files may be overwritten by command line arguments. When multiple " - "files are specified, the configs are merged in the order they are given. " - "Note that every config file must have a `version: 1` key in it." - ), - ) - - @classmethod - def settings_customise_sources( - cls, - settings_cls: type[BaseSettings], - init_settings: PydanticBaseSettingsSource, - env_settings: PydanticBaseSettingsSource, - dotenv_settings: PydanticBaseSettingsSource, - file_secret_settings: PydanticBaseSettingsSource, - ) -> tuple[PydanticBaseSettingsSource, ...]: - """Add the dynamic YAML config source to the CLI settings.""" - dynamic_yaml_config_source = DynamicYamlConfigSettingsSource( - settings_cls=settings_cls, - yaml_file_path_field="configs", - yaml_file_encoding="utf-8", - ) - logger.debug(f"Created {dynamic_yaml_config_source = }") - return ( - init_settings, - env_settings, - dotenv_settings, - file_secret_settings, - dynamic_yaml_config_source, - ) diff --git a/src/lyscripts/data/__init__.py b/src/lyscripts/data/__init__.py deleted file mode 100644 index 29ffe69..0000000 --- a/src/lyscripts/data/__init__.py +++ /dev/null @@ -1,53 +0,0 @@ -"""Commands and functions for managing CSV data on patterns of lymphatic progression. - -This contains helpful CLI commands that allow building quick and reproducible workflows -even when using language-agnostic tools like `Make`_ or `DVC`_. - -Most of these commands can load `LyProX`_ style data from CSV files, but also from -the installed datasets provided by the `lydata`_ package and directly from the -associated `GitHub repository`_. - -Another cool feature is the built-in mini web application that allows collecting nodal -involvement data interactively and in the same standardized format as we have published -in the past, both on `LyProX`_ and in our `GitHub repository`_. It can be launched by -running `lyscripts data collect` in the terminal. See the docs for the -:py:mod:`lyscripts.data.collect` submodule on more information. - -.. _Make: https://www.gnu.org/software/make/ -.. _DVC: https://dvc.org -.. _LyProX: https://lyprox.org -.. _lydata: https://lydata.readthedocs.io -.. _GitHub repository: https://github.com/lycosystem/lydata -""" - -from pydantic_settings import BaseSettings, CliApp, CliSubCommand - -from lyscripts.data import ( # noqa: F401 - collect, - enhance, - fetch, - generate, - join, - lyproxify, - split, -) - -# Avoid conflict with built-in `filter` function -from lyscripts.data import filter as filter_ - - -class DataCLI(BaseSettings): - """Work with lymphatic progression data through this CLI.""" - - collect: CliSubCommand[collect.CollectorCLI] - lyproxify: CliSubCommand[lyproxify.LyproxifyCLI] - join: CliSubCommand[join.JoinCLI] - split: CliSubCommand[split.SplitCLI] - fetch: CliSubCommand[fetch.FetchCLI] - filter: CliSubCommand[filter_.FilterCLI] - enhance: CliSubCommand[enhance.EnhanceCLI] - generate: CliSubCommand[generate.GenerateCLI] - - def cli_cmd(self) -> None: - """Run one of the ``data`` subcommands.""" - CliApp.run_subcommand(self) diff --git a/src/lyscripts/data/__main__.py b/src/lyscripts/data/__main__.py deleted file mode 100644 index a78ddd1..0000000 --- a/src/lyscripts/data/__main__.py +++ /dev/null @@ -1,36 +0,0 @@ -"""Run the data module as a script.""" - -import argparse - -from lyscripts import exit_cli -from lyscripts.cli import RichDefaultHelpFormatter -from lyscripts.data import enhance, generate, join, split - -# Avoid conflict with built-in `filter` function -from lyscripts.data import filter as filter_ - - -def main(args: argparse.Namespace): - """Run the main script.""" - parser = argparse.ArgumentParser( - prog="lyscripts data", - description=__doc__, - formatter_class=RichDefaultHelpFormatter, - ) - parser.set_defaults(run_main=exit_cli) - subparsers = parser.add_subparsers() - - # the individual scripts add `ArgumentParser` instances and their arguments to - # this `subparsers` object - enhance._add_parser(subparsers, help_formatter=parser.formatter_class) - generate._add_parser(subparsers, help_formatter=parser.formatter_class) - join._add_parser(subparsers, help_formatter=parser.formatter_class) - split._add_parser(subparsers, help_formatter=parser.formatter_class) - filter_._add_parser(subparsers, help_formatter=parser.formatter_class) - - args = parser.parse_args() - args.run_main(args, parser) - - -if __name__ == "__main__": - main() diff --git a/src/lyscripts/data/collect/__init__.py b/src/lyscripts/data/collect/__init__.py deleted file mode 100644 index 425c728..0000000 --- a/src/lyscripts/data/collect/__init__.py +++ /dev/null @@ -1,150 +0,0 @@ -"""Submodule to collect data interactively using a simple web interface. - -With the simple command - -.. code-block:: bash - - lyscripts data collect - -One can start a very basic web server that serves an interactive UI at -``http://localhost:8000/``. There, one can enter patient, tumor, and lymphatic -involvement data one by one. When completed, the "submit" button will parse, validate, -and convert the data to serve a downloadable CSV file. - -The resulting CSV file is in the correct format to be used in `LyProX`_ and for -inference using our `lymph-model`_ library. - -.. _LyProX: https://lyprox.org -.. _lymph-model: https://lymph-model.readthedocs.io -""" - -import io -import logging -from pathlib import Path -from typing import Any - -import lydata -import lydata.validator -import pandas as pd -from fastapi import FastAPI, HTTPException -from fastapi.responses import StreamingResponse -from loguru import logger -from pydantic import Field, RootModel -from starlette.responses import FileResponse, HTMLResponse - -from lyscripts.cli import InterceptHandler, _current_log_level -from lyscripts.configs import BaseCLI - -app = FastAPI( - title="lyDATA Collector", - description=( - "A simple web interface to collect data for the lyDATA datasets. " - "This is a prototype and not intended for production use." - ), - version=lydata.__version__, -) - -BASE_DIR = Path(__file__).parent -modalities = lydata.schema.get_default_modalities() -RecordModel = lydata.schema.create_full_record_model(modalities, model_name="Record") -ROOT_MODEL = RootModel[list[RecordModel]] - - -@app.get("/") -def serve_index_html() -> HTMLResponse: - """Serve the ``index.html`` file at the URL's root.""" - with open(BASE_DIR / "index.html") as file: - content = file.read() - return HTMLResponse(content=content) - - -@app.get("/schema") -def serve_schema() -> dict[str, Any]: - """Serve the JSON schema for the patient and tumor records.""" - return ROOT_MODEL.model_json_schema() - - -@app.get("/collector.js") -def serve_collector_js() -> FileResponse: - """Serve the ``collector.js`` file under ``"http://{host}:{port}/collector.js"``. - - This frontend JavaScript file loads the `JSON-Editor`_ library and initializes it - using the schema returned by the :py:func:`serve_schema` function. - - .. _JSON-Editor: https://github.com/json-editor/json-editor/ - """ - return FileResponse(BASE_DIR / "collector.js") - - -@app.post("/submit") -async def process(data: RootModel) -> StreamingResponse: - """Process the submitted data to a DataFrame. - - `FastAPI`_ will automatically parse the received JSON data into the list of - instances of he pydantic type defined by the - :py:func:`lydata.schema.create_full_record_model` function. - - From this list, we create a pandas DataFrame and return it as a downloadable CSV - file. - - .. _FastAPI: https://fastapi.tiangolo.com/ - """ - logger.info(f"Received data: {data.root}") - - if len(data.root) == 0: - logger.warning("No records provided in the data.") - raise HTTPException( - status_code=400, - detail="No records provided in the data.", - ) - - flattened_records = [] - - for record in data.root: - flattened_record = lydata.validator.flatten(record) - logger.debug(f"Flattened record: {flattened_record}") - flattened_records.append(flattened_record) - - df = pd.DataFrame(flattened_records) - df.columns = pd.MultiIndex.from_tuples(flattened_record.keys()) - logger.info(df.patient.core.head()) - - buffer = io.StringIO() - df.to_csv(buffer, index=False) - buffer.seek(0) - logger.success("Data prepared for download") - return StreamingResponse( - buffer, - media_type="text/csv", - headers={"Content-Disposition": "attachment; filename=lydata_records.csv"}, - ) - - -class CollectorCLI(BaseCLI): - """Serve a FastAPI web app for collecting involvement patterns as CSV files.""" - - hostname: str = Field( - default="localhost", - description="Hostname to run the FastAPI app on.", - ) - port: int = Field( - default=8000, - description="Port to run the FastAPI app on.", - ) - - def cli_cmd(self) -> None: - """Run the FastAPI app.""" - logger.debug(self.model_dump_json(indent=2)) - import uvicorn - - # Intercept standard logging and redirect it to Loguru - logging.basicConfig(handlers=[InterceptHandler()], level=0, force=True) - logger.enable("lydata") - - uvicorn.run( - app, - host=self.hostname, - port=self.port, - log_level=_current_log_level.lower(), - log_config=None, - ) diff --git a/src/lyscripts/data/collect/collector.js b/src/lyscripts/data/collect/collector.js deleted file mode 100644 index 3d8d5d9..0000000 --- a/src/lyscripts/data/collect/collector.js +++ /dev/null @@ -1,151 +0,0 @@ -/** - * Client-side helper functions for collecting user input through JSONEditor, - * validating it against a fetched JSON Schema, submitting the validated data - * to the backend, and presenting a downloadable CSV returned by the server. - * - * NOTE: Functionality is intentionally unchanged; only readability and - * documentation have been improved. - */ - -/** - * Ensure an alert element (used to display validation errors) exists. - * Creates and appends it if missing. - * - * @returns {HTMLDivElement} The existing or newly created alert element. - */ -function ensureAlertExists() { - let alertElement = document.querySelector('.alert'); - if (!alertElement) { - alertElement = document.createElement('div'); - } - alertElement.className = 'alert alert-danger'; - const editorHolder = document.getElementById('editor_holder'); - editorHolder.appendChild(alertElement); - return alertElement; -} - -/** - * Remove an existing validation alert if present. - */ -function ensureAlertRemoved() { - const existingAlert = document.querySelector('.alert'); - if (existingAlert) { - console.log('Clearing existing alert'); - existingAlert.remove(); - } -} - -/** - * Remove an existing download button (if it exists) to avoid duplicates. - */ -function ensureDownloadButtonRemoved() { - const existingButton = document.getElementById('download_link'); - if (existingButton) { - console.log('Clearing existing download button'); - existingButton.remove(); - } -} - -/** - * Create (or replace) a download button for a CSV blob returned by the server. - * - * @param {Blob} blob - The CSV data blob to make downloadable. - */ -function createDownloadButton(blob) { - ensureDownloadButtonRemoved(); - - const url = window.URL.createObjectURL(blob); - const downloadLink = document.createElement('a'); - downloadLink.id = 'download_link'; - downloadLink.href = url; - downloadLink.textContent = 'Download CSV'; - downloadLink.className = 'btn btn-success'; - downloadLink.download = 'lydata_records.csv'; - - document.getElementById('editor_holder').appendChild(downloadLink); - console.log('Download button created:', downloadLink); -} - -/** - * Send validated editor data to the backend for processing. Expects a CSV blob - * in response which is then exposed via a generated download button. - * - * @param {JSONEditor} editor - The JSONEditor instance from which to read data. - */ -async function sendEditorData(editor) { - const data = editor.getValue(); - console.log('Sending data:', data); - - try { - const response = await fetch('/submit', { - method: 'POST', - headers: { 'Content-Type': 'application/json' }, - body: JSON.stringify(data) - }); - - if (!response.ok) { - // Try to parse error details from JSON, fallback to text - let errorMsg = 'Unknown error'; - try { - const err = await response.json(); - errorMsg = err.detail || err.message || errorMsg; - } catch { - errorMsg = await response.text(); - } - throw new Error(errorMsg); - } - - const blob = await response.blob(); - console.log('Received processed data as blob:', blob); - createDownloadButton(blob); - } catch (error) { - ensureDownloadButtonRemoved(); - console.error('Error submitting data:', error); - const alert = ensureAlertExists(); - alert.textContent = 'Error submitting data: ' + error.message; - alert.classList.add('alert-danger'); - } -} - -/** - * Validate the editor content. If there are validation errors they are - * displayed in an alert; otherwise the data is submitted to the backend. - * - * @param {JSONEditor} editor - The JSONEditor instance to validate & submit. - */ -function processEditor(editor) { - const errors = editor.validate(); - - if (errors.length) { - console.error('Validation errors:', errors); - const alert = ensureAlertExists(); - alert.textContent = 'Validation errors: ' + errors.map(e => e.message).join(', '); - } else { - console.log('Data successfully validated'); - ensureAlertRemoved(); - sendEditorData(editor); - } -} - -// Fetch the JSON Schema to initialize the editor -fetch('/schema') - .then(response => response.json()) - .then(schema => { - const element = document.getElementById('editor_holder'); - const options = { - disable_edit_json: true, - theme: 'bootstrap5', - iconlib: 'bootstrap', - object_layout: 'grid', - disable_properties: true, - schema: schema - }; - const editor = new JSONEditor(element, options); - - // Bind the submit button to validation + submission flow - document.getElementById('submit').addEventListener('click', () => { - console.log('Submit button clicked'); - processEditor(editor); - }); - }) - .catch(error => console.error('Error loading schema:', error)); diff --git a/src/lyscripts/data/collect/index.html b/src/lyscripts/data/collect/index.html deleted file mode 100644 index 931dacf..0000000 --- a/src/lyscripts/data/collect/index.html +++ /dev/null @@ -1,26 +0,0 @@ - - - - - - Basic JSON Editor Example - - - - - - - - -

LyDATA Collector

- -

- - - -

- - - diff --git a/src/lyscripts/data/enhance.py b/src/lyscripts/data/enhance.py deleted file mode 100644 index 87b1e23..0000000 --- a/src/lyscripts/data/enhance.py +++ /dev/null @@ -1,53 +0,0 @@ -"""Enhance the dataset by inferring additional columns from the data. - -This is a command-line interface to the methods -:py:meth:`~lydata.accessor.LyDataAccessor.combine` and -:py:meth:`~lydata.accessor.LyDataAccessor.augment` of the -:py:class:`~lydata.accessor.LyDataAccessor` class. -""" - -from typing import Literal - -from loguru import logger -from lydata.accessor import LyDataFrame -from lydata.utils import ModalityConfig - -from lyscripts.cli import assemble_main -from lyscripts.configs import BaseCLI, DataConfig -from lyscripts.data.utils import save_table_to_csv - - -class EnhanceCLI(BaseCLI): - """Enhance the dataset by inferring additional columns from the data.""" - - input: DataConfig - modalities: dict[str, ModalityConfig] | None = None - method: Literal["max_llh", "rank"] = "max_llh" - lnl_subdivisions: dict[str, list[str]] = { - "I": ["a", "b"], - "II": ["a", "b"], - "V": ["a", "b"], - } - output_file: str - - def cli_cmd(self) -> None: - """Infer additional columns from the data and save the enhanced dataset. - - This basically provides a CLI to the - :py:func:`~lydata.accessor.LyDataAccessor.augment` function. See its docs for - more details on what exactly is happening here. - """ - logger.debug(self.model_dump_json(indent=2)) - - data: LyDataFrame = self.input.load() - data = data.ly.enhance( - modalities=self.modalities, - method=self.method, - subdivisions=self.lnl_subdivisions, - ) - save_table_to_csv(file_path=self.output_file, table=data) - - -if __name__ == "__main__": - main = assemble_main(settings_cls=EnhanceCLI, prog_name="enhance") - main() diff --git a/src/lyscripts/data/fetch.py b/src/lyscripts/data/fetch.py deleted file mode 100644 index dccbf87..0000000 --- a/src/lyscripts/data/fetch.py +++ /dev/null @@ -1,57 +0,0 @@ -"""Small command to fetch the data from a remote using the lydata package.""" - -from pathlib import Path - -import lydata # noqa: F401 -from loguru import logger -from lydata.loader import LyDataset -from pydantic import Field - -from lyscripts.cli import assemble_main -from lyscripts.configs import BaseCLI - - -class FetchCLI(LyDataset, BaseCLI): - """Fetch a specific dataset from the lyDATA repository.""" - - github_token: str | None = Field( - default=None, - description=( - "GitHub token to access private datasets. Can also be provided as " - "`GITHUB_TOKEN` environment variable." - ), - ) - github_user: str | None = Field( - default=None, - description=( - "GitHub user for non-token login. Can also be provided as " - "`GITHUB_USER` environment variable." - ), - ) - github_password: str | None = Field( - default=None, - description=( - "GitHub password for non-token login. Can also be provided as " - "`GITHUB_PASSWORD` environment variable." - ), - ) - output_file: Path = Field(description="The path to save the dataset to.") - - def cli_cmd(self): - """Execute the ``fetch`` command.""" - logger.enable("lydata") - logger.debug(self.model_dump_json(indent=2)) - - dataset = self.get_dataframe( - use_github=True, - token=self.github_token, - user=self.github_user, - password=self.github_password, - ) - dataset.to_csv(self.output_file, index=False) - logger.success(f"Fetched dataset and saved to {self.output_file}") - - -if __name__ == "__main__": - main = assemble_main(settings_cls=FetchCLI, prog_name="fetch") - main() diff --git a/src/lyscripts/data/filter.py b/src/lyscripts/data/filter.py deleted file mode 100644 index 3e09fe8..0000000 --- a/src/lyscripts/data/filter.py +++ /dev/null @@ -1,99 +0,0 @@ -"""Filter a dataset according to some common criteria. - -This is essentially a command line interface to building a -:py:class:`query object ` and applying it to the dataset. -""" - -from pathlib import Path -from typing import Literal - -from loguru import logger -from lydata import Q -from pydantic import Field -from pydantic_settings import CliImplicitFlag - -from lyscripts.cli import assemble_main -from lyscripts.configs import BaseCLI, DataConfig -from lyscripts.data.utils import save_table_to_csv - - -class FilterCLI(BaseCLI): - """In- or exclude patients where a certain column fulfills a certain condition.""" - - input: DataConfig - include: CliImplicitFlag[bool] = Field( - False, - description="Include patients where the condition is met (default: exclude).", - ) - column: list[str] | str = Field( - description=( - "The column to filter by. May be a tuple of three strings, since data " - "has a three-level header. If it is only one string, the lydata package " - "tries to map that to a three-level header." - ), - ) - operator: Literal["==", "!=", ">", "<", ">=", "<=", "in", "contains"] = Field( - description="The operator to use for comparison.", - ) - value: float | int | str = Field(description="The value to compare against.") - output_file: Path = Field(description="The path to save the filtered dataset to.") - - def model_post_init(self, __context): - """Cast to ``float``, if not possible ``int``, if not possible ``str``.""" - if isinstance(self.column, list): - if len(self.column) == 1: - self.column = self.column[0] - elif len(self.column) == 3: - self.column = tuple(self.column) - else: - raise ValueError( - "The column attribute must be an iterable of three strings or a " - f"single string, but it is {self.column}.", - ) - - try: - self.value = float(self.value) - return super().model_post_init(__context) - except ValueError: - pass - - try: - self.value = int(self.value) - return super().model_post_init(__context) - except ValueError: - pass - - return super().model_post_init(__context) - - def cli_cmd(self): - """Execute the ``filter`` command. - - This command uses the :py:class:`~lydata.querier.Q` objects of the `lydata`_ - library to filter the dataset according to the given criteria. - - .. _lydata: https://lydata.readthedocs.io - """ - logger.debug(self.model_dump_json(indent=2)) - - data = self.input.load() - query = Q( - column=self.column, - operator=self.operator, - value=self.value, - ) - logger.debug(f"Created query object: {query}") - mask = query.execute(data) - - if self.include: - filtered = data[mask] - logger.info(f"Keeping {sum(mask)} of {len(data)} patients.") - else: - filtered = data[~mask] - logger.info(f"Excluding {sum(mask)} of {len(data)} patients.") - - save_table_to_csv(file_path=self.output_file, table=filtered) - - -if __name__ == "__main__": - main = assemble_main(settings_cls=FilterCLI, prog_name="filter") - main() diff --git a/src/lyscripts/data/generate.py b/src/lyscripts/data/generate.py deleted file mode 100644 index 8dadf03..0000000 --- a/src/lyscripts/data/generate.py +++ /dev/null @@ -1,96 +0,0 @@ -"""Script to generate a synthetic dataset. - -The generation is done by the :py:meth:`~lymph.models.Unilateral.draw_patients` method -of -the `lymph`_ package, which is why this requires the specification of a model -via the :py:class:`~lyscripts.configs.ModelConfig` class. - -.. _lymph: https://lymph-model.readthedocs.io/ -""" - -import numpy as np -from loguru import logger -from lydata.utils import ModalityConfig -from pydantic import Field - -from lyscripts.cli import assemble_main -from lyscripts.configs import ( - BaseCLI, - DistributionConfig, - GraphConfig, - ModelConfig, - add_distributions, - add_modalities, - construct_model, -) -from lyscripts.data.utils import save_table_to_csv - - -class GenerateCLI(BaseCLI): - """Settings for the command-line interface.""" - - graph: GraphConfig - model: ModelConfig = ModelConfig() - distributions: dict[str, DistributionConfig] = Field( - default={}, - description=( - "Mapping of model T-categories to predefined distributions over " - "diagnose times." - ), - ) - t_stages_dist: dict[str, float] = Field( - description=( - "Specify what fraction of generated patients should come from the " - "respective T-Stage." - ), - ) - modalities: dict[str, ModalityConfig] - params: dict[str, float] - num_patients: int = 200 - output_file: str - seed: int = 42 - - def model_post_init(self, __context) -> None: - """Make sure distribution over T-stages is normalized.""" - total = 0.0 - for t_stage in self.distributions: - if t_stage not in self.t_stages_dist: - raise ValueError(f"Missing distribution for T-stage {t_stage}.") - - total += self.t_stages_dist[t_stage] - - if not np.isclose(total, 1.0): - raise ValueError("Sum of T-stage distributions must be 1.") - - return super().model_post_init(__context) - - def cli_cmd(self) -> None: - """Run the ``generate`` command. - - Here, the command constructs a model from the settings provided via the - arguments. It then generates a synthetic dataset using the - :py:meth:`~lymph.models.Unilateral.draw_patients` from the `lymph`_ package. - - .. _lymph: https://lymph-model.readthedocs.io/ - """ - logger.debug(self.model_dump_json(indent=2)) - - model = construct_model(self.model, self.graph) - model = add_distributions(model, self.distributions) - model = add_modalities(model, self.modalities) - model.set_params(**self.params) - logger.info(f"Set parameters: {model.get_params(as_dict=True)}") - - synth_data = model.draw_patients( - num=self.num_patients, - stage_dist=list(self.t_stages_dist.values()), - seed=self.seed, - ) - logger.info(f"Generated synthetic data with shape {synth_data.shape}") - - save_table_to_csv(file_path=self.output_file, table=synth_data) - - -if __name__ == "__main__": - main = assemble_main(settings_cls=GenerateCLI, prog_name="data generate") - main() diff --git a/src/lyscripts/data/join.py b/src/lyscripts/data/join.py deleted file mode 100644 index 8b0547e..0000000 --- a/src/lyscripts/data/join.py +++ /dev/null @@ -1,82 +0,0 @@ -"""Join multiple lymphatic progression datasets into a single dataset.""" - -from pathlib import Path - -import pandas as pd -from lydata.validator import cast_dtypes -from pydantic import Field - -from lyscripts.cli import assemble_main -from lyscripts.configs import BaseCLI, DataConfig -from lyscripts.data.utils import save_table_to_csv - - -class JoinCLI(BaseCLI): - """Join multiple lymphatic progression datasets into a single dataset.""" - - inputs: list[DataConfig] = Field(description="The datasets to join.") - output_file: Path = Field(description="The path to the output dataset.") - - def cli_cmd(self) -> None: - r"""Start the ``join`` subcommand. - - This will load all datasets specified in the ``inputs`` attribute and - concatenate them into a single dataset. - - Unfortunately, the use of `pydantic`_ does make this particular command a - little bit more complicated (but also more powerful): If one simply wants to - concatenate multiple datasets on disk, the ``inputs`` should be provided like - this: - - .. code-block:: bash - - lyscripts data join \ - --inputs '{"source": "file1.csv"}' \ - --inputs '{"source": "file2.csv"}' \ - --output-file "joined.csv" - - But it also allows for concatenating datasets fetched directly from the - `lydata Github repo`_. Due to the rather complex command signature, we - recommend defining what to concatenate using a YAML file: - - .. code-block:: yaml - - inputs: - - data.year: 2021 - data.institution: "usz" - data.subsite: "oropharynx" - - data.year: 2021 - data.institution: "clb" - data.subsite: "oropharynx" - - Then, the command will look like this: - - .. code-block:: bash - - lyscripts data join --configs datasets.ly.yaml --output-file joined.csv - - .. _pydantic: https://docs.pydantic.dev/latest/ - .. _lydata Github repo: https://github.com/lycosystem/lydata - """ - joined = None - - for data_config in self.inputs: - data = data_config.load() - # `cast_dtypes()` ensures that e.g. boolean values are not suddenly - # converted to strings when a dataset with missing values is concatenated. - data = cast_dtypes(data) - if joined is None: - joined = data - else: - joined = pd.concat( - [joined, data], - axis="index", - ignore_index=True, - ) - - save_table_to_csv(file_path=self.output_file, table=joined) - - -if __name__ == "__main__": - main = assemble_main(settings_cls=JoinCLI, prog_name="join") - main() diff --git a/src/lyscripts/data/lyproxify.py b/src/lyscripts/data/lyproxify.py deleted file mode 100644 index d810824..0000000 --- a/src/lyscripts/data/lyproxify.py +++ /dev/null @@ -1,341 +0,0 @@ -"""Consumes raw data and transforms it into a CSV that `LyProX`_ understands. - -To do so, it needs a dictionary that defines a mapping from raw columns to the LyProX -style data format. See the documentation of the :py:func:`.transform_to_lyprox` function -for more information. - -.. _LyProX: https://lyprox.org -""" - -import importlib.util -import warnings -from pathlib import Path -from typing import Annotated, Any - -import lydata # noqa: F401 -import pandas as pd -from loguru import logger -from lydata import C -from pydantic import AfterValidator, Field, FilePath - -from lyscripts.cli import assemble_main -from lyscripts.configs import BaseCLI -from lyscripts.data.utils import save_table_to_csv -from lyscripts.utils import delete_private_keys, flatten, load_patient_data - -warnings.simplefilter(action="ignore", category=FutureWarning) - - -def ensure_python_file(file: Path) -> Path: - """Check if the file is a Python file.""" - if file.suffix != ".py": - raise ValueError("Mapping file must be a Python file.") - - return file - - -def ensure_column_map(file: Path) -> Path: - """Ensure the Python file contains a ``COLUMN_MAP`` dictionary.""" - spec = importlib.util.spec_from_file_location("map_module", file) - mapping = importlib.util.module_from_spec(spec) - spec.loader.exec_module(mapping) - - if not hasattr(mapping, "COLUMN_MAP"): - raise ValueError("Mapping file must contain a `COLUMN_MAP` dictionary.") - - return file - - -class LyproxifyCLI(BaseCLI): - """Map any CSV file to the LyProX format with the help of a Python mapping dict.""" - - input_file: FilePath = Field(description="Location of raw CSV data.") - num_header_rows: int = Field( - default=1, - description="Number of rows comprising the header of the raw CSV file.", - ) - mapping_file: Annotated[ - FilePath, - AfterValidator(ensure_python_file), - AfterValidator(ensure_column_map), - ] = Field( - description=( - "Location of Python file containing a `COLUMN_MAP` dictionary. It may also " - "contain an `EXCLUDE` list of tuples `(column, check)` to exclude patients." - ), - ) - drop_rows: list[int] = Field( - default=[], - description=( - "Delete rows of specified indices. Counting of rows start at 0 _after_ " - "the `header-rows`." - ), - ) - drop_cols: list[int] = Field( - default=[], - description="Delete columns of specified indices.", - ) - output_file: Path = Field(description="Location to store the lyproxified CSV file.") - - def cli_cmd(self) -> None: - """Start the ``lyproxify`` subcommand. - - After reading in the specified file, it will first ``drop_rows`` and - ``drop_cols``, as specified in the command line arguments. Then, it will - call :py:func:`.exclude_patients` which will further remove patients based - on the ``EXCLUDE`` object in the ``mapping_file``. Finally, it will call - :py:func:`.transform_to_lyprox` to transform the data into the LyProX format - given the ``COLUMN_MAP`` object in the ``mapping_file``. - """ - logger.debug(self.model_dump_json(indent=2)) - - raw = load_patient_data( - file_path=self.input_file, - header=list(range(self.num_header_rows)), - ) - raw = clean_header( - table=raw, - num_cols=raw.shape[1], - num_header_rows=self.num_header_rows, - ) - - cols_to_drop = raw.columns[self.drop_cols] - trimmed = raw.drop(cols_to_drop, axis="columns") - trimmed = trimmed.drop(index=self.drop_rows) - trimmed = trimmed.dropna(axis="index", how="all") - logger.info(f"Dropped rows {self.drop_rows} and columns {cols_to_drop}.") - - spec = importlib.util.spec_from_file_location("map_module", self.mapping_file) - mapping = importlib.util.module_from_spec(spec) - spec.loader.exec_module(mapping) - logger.info(f"Imported mapping instructions from {self.mapping_file}") - - reduced = exclude_patients(trimmed, mapping.EXCLUDE) - processed = transform_to_lyprox(reduced, mapping.COLUMN_MAP) - - if "side" in processed.ly: - processed = leftright_to_ipsicontra(processed) - - save_table_to_csv(file_path=self.output_file, table=processed) - - -class ParsingError(Exception): - """Error while parsing the CSV file.""" - - -def clean_header( - table: pd.DataFrame, - num_cols: int, - num_header_rows: int, -) -> pd.DataFrame: - """Rename the header cells in the ``table``.""" - table = table.copy() - - for col in range(num_cols): - for row in range(num_header_rows): - table.rename( - columns={f"Unnamed: {col}_level_{row}": f"{col}_lvl_{row}"}, - inplace=True, - ) - - logger.debug("Cleaned headers of the raw data.") - return table - - -def get_instruction_depth(nested_column_map: dict[tuple, dict[str, Any]]) -> int: - """Get the depth at which the column mapping instructions are nested. - - Instructions are a dictionary that contains either a 'func' or 'default' key. - - >>> nested_column_map = {"patient": {"age": {"func": int}}} - >>> get_instruction_depth(nested_column_map) - 2 - >>> flat_column_map = flatten(nested_column_map, max_depth=2) - >>> get_instruction_depth(flat_column_map) - 1 - >>> nested_column_map = {"patient": {"__doc__": "some patient info", "age": 61}} - >>> get_instruction_depth(nested_column_map) - Traceback (most recent call last): - ... - ValueError: Leaf of column map must be a dictionary with 'func' or 'default' key. - """ - for _, value in nested_column_map.items(): - if isinstance(value, dict): - if "func" in value or "default" in value: - return 1 - - return 1 + get_instruction_depth(value) - - raise ValueError( - "Leaf of column map must be a dictionary with 'func' or 'default' key.", - ) - - raise ValueError("Empty column map.") - - -def generate_markdown_docs( - nested_column_map: dict[tuple, dict[str, Any]], - depth: int = 0, - indent_len: int = 4, -) -> str: - r"""Generate a markdown nested, ordered list as documentation for the column map. - - A key in the doctionary is supposed to be documented, when its value is a dictionary - containing a ``"__doc__"`` key. - - >>> nested_column_map = { - ... "patient": { - ... "__doc__": "some patient info", - ... "age": { - ... "__doc__": "age of the patient", - ... "func": int, - ... "columns": ["age"], - ... }, - ... }, - ... } - >>> generate_markdown_docs(nested_column_map) - '1. **`patient:`** some patient info\n 1. **`age:`** age of the patient\n' - """ - md_docs = "" - indent = " " * indent_len * depth - i = 1 - for key, value in nested_column_map.items(): - if isinstance(value, dict): - if "__doc__" in value: - md_docs += f"{indent}{i}. **`{key}:`** {value['__doc__']}\n" - i += 1 - - md_docs += generate_markdown_docs(value, depth + 1, indent_len) - - return md_docs - - -def transform_to_lyprox( - raw: pd.DataFrame, - column_map: dict[tuple, dict[str, Any]], -) -> pd.DataFrame: - """Transform ``raw`` data into table that can be uploaded directly to `LyProX`_. - - To do so, it uses instructions in the `colum_map` dictionary, that needs to have - a particular structure: - - For each column in the final 'lyproxified' `pd.DataFrame`, one entry must exist in - the `column_map` dictionary. E.g., for the column corresponding to a patient's age, - the dictionary should contain a key-value pair of this shape: - - .. code-block:: python - - column_map = { - ("patient", "core", "age"): { - "func": compute_age_from_raw, - "kwargs": {"randomize": False}, - "columns": ["birthday", "date of diagnosis"] - }, - } - - In this example, the function ``compute_age_from_raw`` is called with the - values of the columns ``"birthday"`` and ``"date of diagnosis"`` as positional - arguments, and the keyword argument ``"randomize"`` is set to ``False``. The - function then returns the patient's age, which is subsequently stored in the column - ``("patient", "core", "age")``. - - Note that the ``column_map`` dictionary must have either a ``"default"`` key or - ``"func"`` along with ``"columns"`` and ``"kwargs"``, depending on the function - definition. If the function does not take any arguments, ``"columns"`` can be - omitted. If it also does not take any keyword arguments, ``"kwargs"`` can be - omitted, too. - - .. _LyProX: https://lyprox.org - """ - column_map = delete_private_keys(column_map) - - if (instruction_depth := get_instruction_depth(column_map)) > 1: - column_map = flatten(column_map, max_depth=instruction_depth) - - multi_idx = pd.MultiIndex.from_tuples(column_map.keys()) - processed = pd.DataFrame(columns=multi_idx) - - for multi_idx_col, instruction in column_map.items(): - if instruction != "": - if "default" in instruction: - processed[multi_idx_col] = [instruction["default"]] * len(raw) - elif "func" in instruction: - cols = instruction.get("columns", []) - kwargs = instruction.get("kwargs", {}) - func = instruction["func"] - - try: - processed[multi_idx_col] = [ - func(*vals, **kwargs) for vals in raw[cols].values - ] - except Exception as exc: - raise ParsingError( - f"Exception encountered while parsing column {multi_idx_col}", - ) from exc - else: - raise ParsingError( - f"Column {multi_idx_col} has neither a `default` value nor `func` " - "describing how to fill this column.", - ) - - logger.info("Transformed raw data to LyProX format.") - return processed - - -def leftright_to_ipsicontra(data: pd.DataFrame): - """Change absolute side reporting to tumor-relative. - - Transform reporting of LNL involvement by absolute side (right & left) to a - reporting relative to the tumor (ipsi- & contralateral). The table ``data`` should - already be in the format LyProX requires, except for the side-reporting of LNL - involvement. - """ - len_before = len(data) - left_data = data.ly.query(C("side") != "right") - right_data = data.ly.query(C("side") == "right") - - left_data = left_data.rename(columns={"left": "ipsi"}, level=1) - left_data = left_data.rename(columns={"right": "contra"}, level=1) - right_data = right_data.rename(columns={"left": "contra"}, level=1) - right_data = right_data.rename(columns={"right": "ipsi"}, level=1) - - data = pd.concat([left_data, right_data], ignore_index=True) - if len_before != len(data): - raise RuntimeError("Number of patients changed") - - logger.info("Transformed side reporting to ipsi- and contralateral.") - return data - - -def exclude_patients(raw: pd.DataFrame, exclude: list[tuple[str, Any]]): - """Exclude patients in the ``raw`` data based on a list of what to ``exclude``. - - The ``exclude`` list contains tuples ``(column, check)``. The ``check`` function - will then exclude any patients from the cohort where ``check(raw[column])`` - evaluates to ``True``. - - >>> exclude = [("age", lambda s: s > 50)] - >>> table = pd.DataFrame({ - ... "age": [43, 82, 18, 67], - ... "T-category": [ 3, 4, 2, 1], - ... }) - >>> exclude_patients(table, exclude) - age T-category - 0 43 3 - 2 18 2 - """ - num_before = len(raw) - filtered = raw.copy() - - for column, check in exclude: - is_excluded = check(filtered[column]) - filtered = filtered.loc[~is_excluded] - - num_after = len(filtered) - logger.info(f"Excluded {num_before - num_after} patients.") - return filtered - - -if __name__ == "__main__": - main = assemble_main(settings_cls=LyproxifyCLI, prog_name="lyproxify") - main() diff --git a/src/lyscripts/data/split.py b/src/lyscripts/data/split.py deleted file mode 100644 index 2c9789e..0000000 --- a/src/lyscripts/data/split.py +++ /dev/null @@ -1,73 +0,0 @@ -"""Split a dataset into cross-validation folds based on params.yaml file.""" - -import warnings -from pathlib import Path - -import numpy as np -import pandas as pd -from loguru import logger -from pydantic import Field - -from lyscripts.cli import assemble_main -from lyscripts.configs import BaseCLI, CrossValidationConfig, DataConfig -from lyscripts.data.utils import save_table_to_csv - -warnings.simplefilter(action="ignore", category=FutureWarning) - - -class SplitCLI(BaseCLI): - """Split a dataset into cross-validation folds.""" - - input: DataConfig - cross_validation: CrossValidationConfig = CrossValidationConfig() - output_dir: Path = Field(description="The folder to store the split CSV files in.") - - def cli_cmd(self) -> None: - """Run the ``split`` subcommand. - - This will load the dataset specified in the ``input`` argument and split it - into the number of folds specified in the ``cross_validation`` argument. The - resulting splits will be stored in the folder specified in the ``output_dir`` - argument. - """ - logger.debug(self.model_dump_json(indent=2)) - - self.output_dir.mkdir(parents=True, exist_ok=True) - logger.info(f"Ensure output directory {self.output_dir} exists") - - data = self.input.load() - - shuffled_data = data.sample( - frac=1.0, - replace=False, - random_state=self.cross_validation.seed, - ).reset_index(drop=True) - - split_datas = np.array_split( - ary=shuffled_data, - indices_or_sections=self.cross_validation.folds, - ) - for fold in range(self.cross_validation.folds): - _train_datas = [ - split_datas[i] for i in range(self.cross_validation.folds) if i != fold - ] - train_data = pd.concat( - objs=_train_datas, - axis="index", - ignore_index=True, - ) - eval_data = split_datas[fold] - - save_table_to_csv( - file_path=self.output_dir / f"{fold}_train.csv", - table=train_data, - ) - save_table_to_csv( - file_path=self.output_dir / f"{fold}_eval.csv", - table=eval_data, - ) - - -if __name__ == "__main__": - main = assemble_main(settings_cls=SplitCLI, prog_name="split") - main() diff --git a/src/lyscripts/data/utils.py b/src/lyscripts/data/utils.py deleted file mode 100644 index a5399a9..0000000 --- a/src/lyscripts/data/utils.py +++ /dev/null @@ -1,16 +0,0 @@ -"""Utilities related to the commands for data cleaning and processing.""" - -from pathlib import Path - -import pandas as pd -from loguru import logger - -from lyscripts.decorators import check_output_dir_exists - - -@check_output_dir_exists -def save_table_to_csv(file_path: Path, table: pd.DataFrame): - """Save a ``table`` to ``output_path``.""" - shape = table.shape - logger.info(f"Saving table with {shape=} to {file_path.resolve()}") - table.to_csv(file_path, index=None) diff --git a/src/lyscripts/decorators.py b/src/lyscripts/decorators.py deleted file mode 100644 index 366ccd9..0000000 --- a/src/lyscripts/decorators.py +++ /dev/null @@ -1,88 +0,0 @@ -"""Decorators to avoid repetitive snippets of code. - -E.g. safely opening files or logging the state of a function call. - -This is *not* a command line tool. -""" - -import functools -import logging -from collections.abc import Callable -from functools import wraps -from pathlib import Path -from typing import Any - - -def assemble_signature(*args, **kwargs) -> str: - """Assemble the signature of the function call.""" - args_str = ", ".join(str(arg) for arg in args) - kwargs_str = ", ".join(f"{key}={value}" for key, value in kwargs.items()) - return ", ".join([args_str, kwargs_str]) - - -def log_state(log_level: int = logging.INFO) -> Callable: - """Provide a decorator that logs the state of the function execution. - - The log message will simply be the function name where underscores are replaced - with spaces. The `log_level` can be set in the decorator call. - """ - - def log_decorator(func: Callable): - """Decorate function for which to add logs.""" - - @functools.wraps(func) - def wrapper(*args, **kwargs): - """Execute decorated function.""" - logger = logging.getLogger(func.__module__) - signature = assemble_signature(*args, **kwargs) - logger.debug(f"Executing {func.__name__}({signature}).") - log_msg_from_func = func.__name__.replace("_", " ").capitalize() + "." - - try: - logger.log( - log_level, - log_msg_from_func, - extra={ - "func_filepath": f"{func.__module__.replace('.', '/')}.py", - "func_name": func.__name__, - "module_name": func.__module__, - }, - ) - return func(*args, **kwargs) - - except Exception as exc: - logger.error(f"Error calling {func.__name__}().", exc_info=exc) - raise exc - - return wrapper - - return log_decorator - - -def check_input_file_exists(loading_func: Callable) -> Callable: - """Check if the file path provided to the `loading_func` exists.""" - - @wraps(loading_func) - def inner(file_path: str, *args, **kwargs) -> Any: - """Execute wrapped loading function.""" - file_path = Path(file_path) - if not file_path.is_file(): - raise FileNotFoundError(f"File {file_path} does not exist.") - - return loading_func(file_path, *args, **kwargs) - - return inner - - -def check_output_dir_exists(saving_func: Callable) -> Callable: - """Make sure the parent directory of the saved file exists.""" - - @wraps(saving_func) - def inner(file_path: str, *args, **kwargs) -> Any: - """Execute wrapped saving function.""" - file_path = Path(file_path) - file_path.parent.mkdir(parents=True, exist_ok=True) - - return saving_func(file_path, *args, **kwargs) - - return inner diff --git a/src/lyscripts/evaluate.py b/src/lyscripts/evaluate.py deleted file mode 100644 index 5f9dbd0..0000000 --- a/src/lyscripts/evaluate.py +++ /dev/null @@ -1,205 +0,0 @@ -"""Evaluate the performance of the trained model. - -This is done by computing quantities like the Bayesian information criterion (BIC) or -(if thermodynamic integration was performed) the actual evidence (with error) of the -model. -""" - -import argparse -import json -from pathlib import Path - -import emcee -import h5py -import numpy as np -import pandas as pd -from loguru import logger -from scipy.integrate import trapezoid - -from lyscripts.utils import load_patient_data, load_yaml_params - -RNG = np.random.default_rng() - - -def _add_parser( - subparsers: argparse._SubParsersAction, - help_formatter, -): - """Add an ``ArgumentParser`` to the subparsers action.""" - parser = subparsers.add_parser( - Path(__file__).name.replace(".py", ""), - description=__doc__, - help=__doc__, - formatter_class=help_formatter, - ) - _add_arguments(parser) - - -def _add_arguments(parser: argparse.ArgumentParser): - """Add arguments to a ``subparsers`` instance and run its main function when chosen. - - This is called by the parent module that is called via the command line. - """ - parser.add_argument( - "data", - type=Path, - help="Path to the tables of patient data (CSV).", - ) - parser.add_argument("model", type=Path, help="Path to model output files (HDF5).") - - parser.add_argument( - "-p", - "--params", - default="./params.yaml", - type=Path, - help="Path to parameter file", - ) - parser.add_argument( - "--plots", - default="./plots", - type=Path, - help="Directory for storing plots", - ) - parser.add_argument( - "--metrics", - default="./metrics.json", - type=Path, - help="Path to metrics file", - ) - - parser.set_defaults(run_main=main) - - -def comp_bic(log_probs: np.ndarray, num_params: int, num_data: int) -> float: - r"""Compute the negative one half of the Bayesian Information Criterion (BIC). - - The BIC is defined as [^1] - $$ BIC = k \\ln{n} - 2 \\ln{\\hat{L}} $$ - where $k$ is the number of parameters ``num_params``, $n$ the number of datapoints - ``num_data`` and $\\hat{L}$ the maximum likelihood estimate of the ``log_prob``. - It is constructed such that the following is an - approximation of the model evidence: - $$ p(D \\mid m) \\approx \\exp{\\left( - BIC / 2 \\right)} $$ - which is why this function returns the negative one half of it. - - [^1]: https://en.wikipedia.org/wiki/Bayesian_information_criterion - """ - return np.max(log_probs) - num_params * np.log(num_data) / 2.0 - - -def compute_evidence( - temp_schedule: np.ndarray, - log_probs: np.ndarray, -) -> float: - """Compute the evidence. - - Given a ``temp_schedule`` of inverse temperatures and corresponding sets of - ``log_probs``, we calculate the mean ``log_prob`` over all samples to approximate - the expectation value under the corresponding power posterior for each step in the - ``temp_schedule``. The evidence is evaluated using trapezoidal integration of the - expectation values over the ``temp_schedule``. - """ - a_mc = np.mean(log_probs, axis=1) - return trapezoid(y=a_mc, x=temp_schedule) - - -def compute_ti_results( - metrics: dict, - params: dict, - ndim: int, - h5_file: Path, - model: Path, -) -> tuple[np.ndarray, np.ndarray]: - """Compute the results in case of a thermodynamic integration run.""" - temp_schedule = params["sampling"]["temp_schedule"] - num_temps = len(temp_schedule) - - if num_temps != len(h5_file["ti"]): - raise RuntimeError( - f"Parameters suggest temp schedule of length {num_temps}, " - f"but stored are {len(h5_file['ti'])}", - ) - - nwalker = ndim * params["sampling"]["walkers_per_dim"] - nsteps = params["sampling"]["nsteps"] - ti_log_probs = np.zeros(shape=(num_temps, nsteps * nwalker)) - - for i, run in enumerate(h5_file["ti"]): - reader = emcee.backends.HDFBackend(model, name=f"ti/{run}", read_only=True) - ti_log_probs[i] = reader.get_blobs(flat=True) - - evidence = compute_evidence(temp_schedule, ti_log_probs) - metrics["evidence"] = evidence - - return temp_schedule, ti_log_probs - - -def main(args: argparse.Namespace): - """Run main script.""" - metrics = {} - - params = load_yaml_params(args.params) - model = None # create_model(params) - ndim = len(model.get_params()) - data = load_patient_data(args.data) - h5_file = h5py.File(args.model, mode="r") - - # if TI has been performed, compute the accuracy for every step - if "ti" in h5_file: - temp_schedule, ti_log_probs = compute_ti_results( - metrics=metrics, - params=params, - ndim=ndim, - h5_file=h5_file, - model=args.model, - ) - logger.info( - "Computed results of thermodynamic integration with " - f"{len(temp_schedule)} steps", - ) - - # store inverse temperatures and log-probs in CSV file - args.plots.parent.mkdir(exist_ok=True) - - beta_vs_accuracy = pd.DataFrame( - np.array( - [ - temp_schedule, - np.mean(ti_log_probs, axis=1), - np.std(ti_log_probs, axis=1), - ], - ).T, - columns=["β", "accuracy", "std"], - ) - beta_vs_accuracy.to_csv(args.plots, index=False) - logger.info(f"Plotted β vs accuracy at {args.plots}") - - # use blobs, because also for TI, this is the unscaled log-prob - backend = emcee.backends.HDFBackend(args.model, read_only=True, name="mcmc") - final_log_probs = backend.get_blobs() - logger.info(f"Opened samples from emcee backend from {args.model}") - - # store metrics in JSON file - args.metrics.parent.mkdir(parents=True, exist_ok=True) - args.metrics.touch(exist_ok=True) - - metrics["BIC"] = comp_bic( - final_log_probs, - ndim, - len(data), - ) - metrics["max_llh"] = np.max(final_log_probs) - metrics["mean_llh"] = np.mean(final_log_probs) - - with open(args.metrics, mode="w", encoding="utf-8") as metrics_file: - json.dump(metrics, metrics_file) - - logger.info(f"Wrote out metrics to {args.metrics}") - - -if __name__ == "__main__": - parser = argparse.ArgumentParser(description=__doc__) - _add_arguments(parser) - - args = parser.parse_args() - args.run_main(args) diff --git a/src/lyscripts/integrate.py b/src/lyscripts/integrate.py deleted file mode 100644 index c213116..0000000 --- a/src/lyscripts/integrate.py +++ /dev/null @@ -1,163 +0,0 @@ -"""Perform thermodynamic integration to evaluate the model evidence. - -Using the functions provided by the `sample` module, this script implements -thermodynamic integration (TI) in order to compute the model evidence. -This is done by sampling the model parameters at different inverse temperatures -following a specified schedule. -""" - -from __future__ import annotations - -import os -from typing import Any - -import emcee -import h5py -import numpy as np -from loguru import logger -from lydata.utils import ModalityConfig -from pydantic import Field - -import lyscripts.sample as sample_module # Import the module to set its global MODEL -from lyscripts.cli import assemble_main -from lyscripts.configs import ( - BaseCLI, - DataConfig, - DistributionConfig, - GraphConfig, - ModelConfig, - SamplingConfig, - ScheduleConfig, - add_distributions, - add_modalities, - construct_model, -) -from lyscripts.utils import get_hdf5_backend - - -def init_ti_sampler( - settings: IntegrateCLI, - temp_idx: int, - ndim: int, - inv_temp: float, - pool: Any, -) -> emcee.EnsembleSampler: - """Initialize the ``emcee.EnsembleSampler`` for TI with the given ``settings''.""" - nwalkers = ndim * settings.sampling.walkers_per_dim - backend = get_hdf5_backend( - file_path=settings.sampling.storage_file, - dataset=f"ti/{temp_idx + 1:0>2d}", - nwalkers=nwalkers, - ndim=ndim, - ) - return emcee.EnsembleSampler( - nwalkers=nwalkers, - ndim=ndim, - log_prob_fn=sample_module.log_prob_fn, - kwargs={"inverse_temp": inv_temp}, - moves=[(emcee.moves.DEMove(), 0.8), (emcee.moves.DESnookerMove(), 0.2)], - backend=backend, - pool=pool, - blobs_dtype=[("log_prob", np.float64)], - parameter_names=list(MODEL.get_named_params().keys()), - ) - - -class IntegrateCLI(BaseCLI): - """Perform thermodynamic integration to compute the model evidence.""" - - graph: GraphConfig - model: ModelConfig = ModelConfig() - distributions: dict[str, DistributionConfig] = Field( - default={}, - description=( - "Mapping of model T-categories to predefined distributions over " - "diagnose times." - ), - ) - modalities: dict[str, ModalityConfig] = Field( - default={}, - description=( - "Maps names of diagnostic modalities to their specificity/sensitivity." - ), - ) - data: DataConfig - sampling: SamplingConfig - schedule: ScheduleConfig = Field( - description="Configuration for generating inverse temperature schedule.", - ) - - def cli_cmd(self) -> None: - """Start the ``integrate`` subcommand. - - The model construction and setup is done analogously to the - ``sample`` command. Afterwards, an :py:class:`emcee.EnsembleSampler` - is initialized (see :py:func:`init_sampler`) and :py:func:`run_sampling`, - implemented in the ``sample``module, is executed twice for each TI step: - once for the burn-in phase and once for the actual sampling phase. - Thereby, the log likelihood is scaled by the respective inverse - temperature of that step. All necessary settings for the sampling - are passed by the ``sampling``argument, except for the inverse - temperatures, which are provided by the ``schedule`` argument. - """ - # as recommended in https://emcee.readthedocs.io/en/stable/tutorials/parallel/# - os.environ["OMP_NUM_THREADS"] = "1" - - logger.debug(self.model_dump_json(indent=2)) - - # ugly, but necessary for pickling - global MODEL - MODEL = construct_model(self.model, self.graph) - MODEL = add_distributions(MODEL, self.distributions) - MODEL = add_modalities(MODEL, self.modalities) - MODEL.load_patient_data(**self.data.get_load_kwargs()) - ndim = MODEL.get_num_dims() - - # set MODEL in the sample module's namespace so log_prob_fn can access it - sample_module.MODEL = MODEL - - schedule = self.schedule.get_schedule() - - # emcee does not support numpy's new random number generator yet. - np.random.seed(self.sampling.seed) # noqa: NPY002 - - with sample_module.get_pool(self.sampling.cores) as pool: - for idx, inv_temp in enumerate(schedule): - sampler = init_ti_sampler( - settings=self, - temp_idx=idx, - ndim=ndim, - inv_temp=inv_temp, - pool=pool, - ) - - sample_module.run_sampling( - description=f"Burn-in phase: TI step {idx + 1}/{len(schedule)}", - sampler=sampler, - num_steps=self.sampling.burnin_steps, - check_interval=self.sampling.check_interval, - trust_factor=self.sampling.trust_factor, - relative_thresh=self.sampling.relative_thresh, - history_file=self.sampling.history_file, - ) - - sample_module.run_sampling( - description=f"Sampling phase: TI step {idx + 1}/{len(schedule)}", - sampler=sampler, - num_steps=self.sampling.num_steps, - reset_backend=True, - check_interval=self.sampling.num_steps, - thin_by=self.sampling.thin_by, - ) - # copy last sampling round over to a group in the HDF5 file called "mcmc" - with h5py.File(self.sampling.storage_file, mode="r+") as h5_file: - h5_file.copy( - f"ti/{len(schedule):0>2d}", - h5_file, - name=self.sampling.dataset, - ) - - -if __name__ == "__main__": - main = assemble_main(settings_cls=IntegrateCLI, prog_name="integrate") - main() diff --git a/src/lyscripts/plots.py b/src/lyscripts/plots.py deleted file mode 100644 index 07335b0..0000000 --- a/src/lyscripts/plots.py +++ /dev/null @@ -1,411 +0,0 @@ -"""Utility functions for the plotting commands.""" - -from __future__ import annotations - -from abc import abstractmethod -from collections.abc import Mapping -from dataclasses import field -from itertools import cycle -from pathlib import Path -from typing import TYPE_CHECKING, Any, TypeVar - -import h5py -import matplotlib.pyplot as plt -import numpy as np -import scipy as sp -from numpydantic import NDArray, Shape -from pydantic import BaseModel - -from lyscripts.decorators import ( - check_input_file_exists, - check_output_dir_exists, - log_state, -) - -if TYPE_CHECKING: - from matplotlib.axes._axes import Axes as MPLAxes - from matplotlib.figure import Figure - -# define USZ colors -COLORS = { - "blue": "#005ea8", - "orange": "#f17900", - "green": "#00afa5", - "red": "#ae0060", - "gray": "#c5d5db", -} -COLOR_CYCLE = cycle(COLORS.values()) -CM_PER_INCH = 2.54 - - -def floor_at_decimal(value: float, decimal: int) -> float: - """Compute the floor of ``value`` for the specified ``decimal``. - - Essentially the distance to the right of the decimal point. May be negative. - """ - power = 10**decimal - return np.floor(power * value) / power - - -def ceil_at_decimal(value: float, decimal: int) -> float: - """Compute the ceiling of ``value`` for the specified ``decimal``. - - Analog to :py:func:`.floor_at_decimal`, this is the distance to the right of the - decimal point. May be negative. - """ - return -floor_at_decimal(-value, decimal) - - -def floor_to_step(value: float, step: float) -> float: - """Compute next value on ladder of stepsize ``step`` still below ``value``.""" - return (value // step) * step - - -def ceil_to_step(value: float, step: float) -> float: - """Compute next value on ladder of stepsize ``step`` still above ``value``.""" - return floor_to_step(value, step) + step - - -def clean_and_check(filename: str | Path) -> Path: - """Check if file with ``filename`` exists. - - If not, raise error, otherwise return cleaned :py:class:`~pathlib.PosixPath`. - """ - filepath = Path(filename) - if not filepath.exists(): - msg = f"File with the name {filename} does not exist at {filepath.resolve()}" - raise FileNotFoundError(msg) - return filepath - - -AbstractDistributionT = TypeVar("AbstractDistributionT", bound="AbstractDistribution") - - -class AbstractDistribution(BaseModel): - """Abstract class for distributions that should be plotted.""" - - scale: float = 100.0 - offset: float = 0.0 - kwargs: dict[str, Any] = field(default_factory=lambda: {}) - - @abstractmethod - def draw(self, axes: MPLAxes) -> MPLAxes: - """Draw the distribution into the provided ``axes``.""" - ... - - @abstractmethod - def left_percentile(self, percent: float) -> float: - """Compute the point where ``percent`` of the values are to the left.""" - ... - - @abstractmethod - def right_percentile(self, percent: float) -> float: - """Compute the point where ``percent`` of the values are to the right.""" - ... - - def _get_label(self) -> str: - """Compute label for when ``kwargs`` does not contain one.""" - - @property - def label(self) -> str: - """Return the label of the histogram.""" - return self.kwargs.get("label", self._get_label()) - - -class Histogram(AbstractDistribution): - """Class containing data for plotting a histogram.""" - - raw_values: NDArray[Shape["*"], float] # noqa: F722 - - @property - def values(self) -> np.ndarray: - """Return the values of the histogram scaled and offset.""" - return self.raw_values * self.scale + self.offset - - @classmethod - def from_hdf5( - cls: type[Histogram], - filename: str | Path, - dataname: str, - scale: float = 100.0, - offset: float = 0.0, - **kwargs, - ) -> Histogram: - """Create a histogram from an HDF5 file.""" - filename = clean_and_check(filename) - with h5py.File(filename, mode="r") as h5file: - dataset = h5file[dataname] - if "label" not in kwargs: - kwargs["label"] = get_label(dataset.attrs) - return cls(raw_values=dataset[:], scale=scale, offset=offset, kwargs=kwargs) - - def left_percentile(self, percent: float) -> float: - """Compute the point where `percent` of the values are to the left.""" - return np.percentile(self.values, percent) - - def right_percentile(self, percent: float) -> float: - """Compute the point where `percent` of the values are to the right.""" - return np.percentile(self.values, 100.0 - percent) - - def draw(self, axes: MPLAxes, **defaults) -> Any: - """Draw the histogram into the provided ``axes``.""" - xlim = axes.get_xlim() - - hist_kwargs = defaults.get("hist", {}).copy() - hist_kwargs.update(self.kwargs) - - if self.label is not None: - hist_kwargs["label"] = self.label - - return axes.hist(self.values, range=xlim, **hist_kwargs) - - -class BetaPosterior(AbstractDistribution): - """Class for storing plot configs for a Beta posterior.""" - - num_success: int - num_total: int - - @classmethod - def from_hdf5( - cls: type[BetaPosterior], - filename: str | Path, - dataname: str, - scale: float = 100.0, - offset: float = 0.0, - **kwargs, - ) -> BetaPosterior: - """Initialize data container for Beta posteriors from HDF5 file.""" - filename = clean_and_check(filename) - with h5py.File(filename, mode="r") as h5file: - dataset = h5file[dataname] - try: - num_success = int(dataset.attrs["num_match"]) - num_total = int(dataset.attrs["num_total"]) - except KeyError as key_err: - raise KeyError( - "Dataset does not contain observed prevalence data", - ) from key_err - - return cls( - num_success=num_success, - num_total=num_total, - scale=scale, - offset=offset, - kwargs=kwargs, - ) - - def _get_label(self) -> str: - return f"data: {self.num_success} of {self.num_total}" - - @property - def num_fail(self): - """Return the number of failures, i.e. the totals minus the successes.""" - return self.num_total - self.num_success - - def pdf(self, x: np.ndarray) -> np.ndarray: - """Compute the probability density function.""" - return sp.stats.beta.pdf( - x, - a=self.num_success + 1, - b=self.num_fail + 1, - loc=self.offset, - scale=self.scale, - ) - - def left_percentile(self, percent: float) -> float: - """Return the point where the CDF reaches ``percent``.""" - return sp.stats.beta.ppf( - percent / 100.0, - a=self.num_success + 1, - b=self.num_fail + 1, - scale=self.scale, - ) - - def right_percentile(self, percent: float) -> float: - """Return the point where 100% minus the CDF equals ``percent``.""" - return sp.stats.beta.ppf( - 1.0 - (percent / 100.0), - a=self.num_success + 1, - b=self.num_fail + 1, - scale=self.scale, - ) - - def draw(self, axes: MPLAxes, resolution: int = 300, **defaults) -> Any: - """Draw the Beta posterior into the provided ``axes``. - - Returns a handle and a label for the legend. - """ - left, right = axes.get_xlim() - x = np.linspace(left, right, resolution) - y = self.pdf(x) - - plot_kwargs = defaults.get("plot", {}).copy() - plot_kwargs.update(self.kwargs) - - if self.label is not None: - plot_kwargs["label"] = self.label - - return axes.plot(x, y, **plot_kwargs) - - -def get_size(width="single", unit="cm", ratio="golden"): - """Return a tuple of figure sizes in inches. - - This is provided as the ``matplotlib`` keyword argument ``figsize`` expects it. - This figure size is computed from a ``width``, in the ``unit`` of centimeters by - default, and a ``ratio`` which is set to the golden ratio by default. - - >>> get_size(width="single", ratio="golden") - (3.937007874015748, 2.4332557935820445) - >>> get_size(width="full", ratio=2.) - (6.299212598425196, 3.149606299212598) - >>> get_size(width=10., ratio=1.) - (3.937007874015748, 3.937007874015748) - >>> get_size(width=5, unit="inches", ratio=2./3.) - (5, 7.5) - """ - if width == "single": - width = 10 - elif width == "full": - width = 16 - - ratio = 1.618 if ratio == "golden" else ratio - width = width / CM_PER_INCH if unit == "cm" else width - height = width / ratio - return (width, height) - - -def get_label(attrs: Mapping) -> str: - """Extract label of a histogram from the HDF5 ``attrs`` object of the dataset.""" - label = [] - transforms = { - "label": str, - "modality": str, - "t_stage": str, - "midline_ext": lambda x: "ext" if x else "noext", - } - for key, func in transforms.items(): - if key in attrs and attrs[key] is not None: - label.append(func(attrs[key])) - return " | ".join(label) - - -def get_xlims( - contents: AbstractDistributionT, - percent_lims: tuple[float] = (10.0, 10.0), -) -> tuple[float]: - """Get the x-axis limits for a plot containing multiple distribution. - - Compute the ``xlims`` of a plot containing histograms and probability density - functions by considering their smallest and largest percentiles. - """ - left_percentiles = np.array( - [c.left_percentile(percent_lims[0]) for c in contents], - ) - left_lim = np.min(left_percentiles) - right_percentiles = np.array( - [c.right_percentile(percent_lims[0]) for c in contents], - ) - right_lim = np.max(right_percentiles) - return left_lim, right_lim - - -def draw( - axes: MPLAxes, - contents: list[AbstractDistribution], - percent_lims: tuple[float, float] = (10.0, 10.0), - xlims: tuple[float] | None = None, - hist_kwargs: dict[str, Any] | None = None, - plot_kwargs: dict[str, Any] | None = None, -) -> MPLAxes: - """Draw histograms and Beta posterior from ``contents`` into ``axes``. - - The limits of the x-axis is computed to be the smallest and largest left and right - percentile of all provided ``contents`` respectively via the ``percent_lims`` tuple. - - The ``hist_kwargs`` define general settings that will be applied to all histograms. - One additional key ``'nbins'`` may be used to adjust only the numbers, not the - spacing of the histogram bins. - Similarly, ``plot_kwargs`` adjusts the default settings for the Beta posteriors. - - Both these keyword arguments can be overwritten by what the individual ``contents`` - have defined. - """ - if not all(isinstance(c, AbstractDistribution) for c in contents): - raise TypeError("Contents must be subclasses of `AbstractDistribution`") - - xlims = xlims or get_xlims(contents, percent_lims) - - if len(xlims) != 2 or xlims[0] > xlims[-1]: - raise ValueError("`xlims` must be tuple of two increasing values") - - axes.set_xlim(*xlims) - - default_kwargs = { - "hist": { - "density": True, - "histtype": "stepfilled", - "alpha": 0.7, - "bins": 50, - }, - "plot": {}, - } - default_kwargs["hist"].update(hist_kwargs or {}) - default_kwargs["plot"].update(plot_kwargs or {}) - - for content in contents: - content.draw(axes, **default_kwargs) - - return axes - - -def split_legends( - axes: MPLAxes, - titles: list[str], - locs: list[tuple[float, float]], - **kwargs, -) -> None: - """Separate labels in ``axes`` into separate legends with ``titles`` at ``locs``.""" - legend_kwargs = { - "title_fontsize": "small", - "labelspacing": 0.1, - "loc": "upper left", - } - legend_kwargs.update(kwargs) - - handles, labels = axes.get_legend_handles_labels() - labels_per_legend = len(labels) // len(titles) - - for i, (title, loc) in enumerate(zip(titles, locs, strict=True)): - start = i * labels_per_legend - stop = (i + 1) * labels_per_legend if i < len(titles) - 1 else None - idx = slice(start, stop) - - legend = axes.legend( - handles[idx], - labels[idx], - bbox_to_anchor=loc, - title=title, - **legend_kwargs, - ) - axes.add_artist(legend) - - -@log_state() -@check_input_file_exists -def use_mpl_stylesheet(file_path: str | Path): - """Load a ``.mplstyle`` stylesheet from ``file_path``.""" - plt.style.use(file_path) - - -@log_state() -@check_output_dir_exists -def save_figure( - output_path: str | Path, - figure: Figure, - formats: list[str] | None, -): - """Save a ``figure`` to ``output_path`` in every one of the provided ``formats``.""" - for frmt in formats: - figure.savefig(output_path.with_suffix(f".{frmt}")) diff --git a/src/lyscripts/sample.py b/src/lyscripts/sample.py deleted file mode 100644 index 1fe7096..0000000 --- a/src/lyscripts/sample.py +++ /dev/null @@ -1,426 +0,0 @@ -"""Implementation of flexible MCMC sampling for lymphatic progression models. - -This module provides both helpful functions for programmatically building and running -sampling pipelines, as well a CLI interface for th most common sampling use cases. - -The core is the :py:func:`run_sampling` function. It has a flexible interface and -built-in convergence detection, as well as bookkeeping for monitoring and resuming -interrupted sampling runs. It can be used both during the burn-in phase and the actual -sampling phase. - -.. warning:: - - We strongly recommend to set the CLI's ``--cores`` argument to ``None`` (or ``null`` - in the YAML config file) if you are on MacOS or Windows. This is because we haven't - yet figured out how we can safely and efficiently use the ``multiprocess(ing)`` - library on these two platforms. -""" - -from __future__ import annotations - -import os -import sys -from typing import Any - -from loguru import logger - -from lyscripts.cli import assemble_main - -try: - import multiprocess as mp -except ModuleNotFoundError: - import multiprocessing as mp - -if sys.platform == "darwin": - logger.warning("Detected MacOS. Setting multiprocess(ing) start method to 'fork'.") - mp.set_start_method("fork") - -from pathlib import Path - -import emcee -import numpy as np -import pandas as pd -from lydata.utils import ModalityConfig -from lymph.types import ParamsType -from pydantic import BaseModel, Field -from rich.progress import Progress, ProgressColumn, Task, TimeElapsedColumn -from rich.text import Text - -from lyscripts.configs import ( - BaseCLI, - DataConfig, - DistributionConfig, - GraphConfig, - ModelConfig, - SamplingConfig, - add_distributions, - add_modalities, - construct_model, -) -from lyscripts.utils import console, get_hdf5_backend - - -class CompletedItersColumn(ProgressColumn): - """A column that displays the completed number of iterations.""" - - def __init__(self, table_column=None, it: int = 0): - """Initialize the column with number of previous iterations.""" - super().__init__(table_column) - self.it = it - - def render(self, task: Task) -> Text: - """Render total iterations.""" - if task.completed is None: - return Text("? it", style="progress.data.steps") - return Text(f"{task.completed + self.it} it", style="progress.data.steps") - - -class ItersPerSecondColumn(ProgressColumn): - """A column that displays the number of iterations per second.""" - - def render(self, task: Task) -> Text: - """Render iterations per second.""" - speed = task.finished_speed or task.speed - if speed is None: - return Text("? it/s", style="progress.data.speed") - return Text(f"{speed:.2f} it/s", style="progress.data.speed") - - -class AcorTime(BaseModel, validate_assignment=True): - """Storage for old and new autocorrelation times.""" - - old: float - new: float - - def update(self, new: float) -> None: - """Update the autocorrelation time.""" - self.old = self.new - self.new = new - - @property - def relative_diff(self) -> float: - """Get the relative difference between new and old autocorrelation time.""" - return np.abs(self.new - self.old) / self.new - - -class NumAccepted(BaseModel, validate_assignment=True): - """Storage for old and new number of accepted proposals.""" - - old: int - new: int - - def update(self, new: int) -> None: - """Update the number of accepted proposals.""" - self.old = self.new - self.new = new - - @property - def newly_accepted(self) -> int: - """Get the number of newly accepted proposals.""" - return self.new - self.old - - -MODEL = None - - -def log_prob_fn(theta: ParamsType, inverse_temp: float = 1.0) -> tuple[float, float]: - """Compute log-prob using global variables because of pickling. - - An inverse temperature ``inverse_temp`` can be provided for thermodynamic - integration. - """ - llh = MODEL.likelihood(given_params=theta) - if np.isinf(llh): # to prevent the case of 0 * inf = NaN - return -np.inf, -np.inf - return inverse_temp * llh, llh - - -def ensure_initial_state(sampler: emcee.EnsembleSampler) -> np.ndarray: - """Try to extract a starting state from a ``sampler``. - - Create a random starting state if no one was found. - """ - try: - state = sampler.backend.get_last_sample() - logger.info( - f"Resuming from {sampler.backend.filename} with {sampler.iteration} " - "stored iterations.", - ) - except AttributeError: - state = np.random.uniform(size=(sampler.nwalkers, sampler.ndim)) # noqa: NPY002 - logger.debug(f"No stored samples found. Starting from random state {state}.") - - return state - - -def ensure_history_table(file: Path | None) -> pd.DataFrame: - """Return the history table from a file or an empty DataFrame. - - It will try to load a history at the given ``file`` location, but with a ``.tmp`` - extension. This is the expected name and location of a history file that was - stored during an interrupted sampling run. - - If no file is found, an empty DataFrame is returned. - """ - if file is None or not file.with_suffix(".tmp").exists(): - return pd.DataFrame( - columns=[ - "steps", - "acor_times", - "accept_fracs", - "max_log_probs", - ], - ).set_index("steps") - - return pd.read_csv(file.with_suffix(".tmp"), index_col="steps") - - -def update_history_table( - history: pd.DataFrame, - history_file: Path | None, - iteration: int, - acor_time: float, - accepted_frac: float, - max_log_prob: float, -) -> pd.DataFrame: - """Update the history table with the current iteration's information.""" - history.loc[iteration] = [acor_time, accepted_frac, max_log_prob] - logger.debug(history.iloc[-1].to_dict()) - - if history_file is not None: - history.to_csv(history_file.with_suffix(".tmp")) - - return history - - -def is_converged( - iteration: int, - acor_time: AcorTime, - trust_factor: float, - relative_thresh: float, -) -> bool: - """Check if the chain has converged based on the autocorrelation time. - - The criterion is based on the relative change of the autocorrelation time and - whether the autocorrelation extimate can be trusted. Essentially, we only trust - the estimate if it is smaller than ``trust_factor`` times the current ``iteration``. - - More details can be found in the `emcee documentation`_. - - .. _emcee documentation: https://emcee.readthedocs.io/en/stable/tutorials/autocorr/ - """ - return ( - acor_time.new * trust_factor < iteration - and acor_time.relative_diff < relative_thresh - ) - - -def _get_columns(it: int = 0) -> list[ProgressColumn]: - """Get the default progress columns for the MCMC sampling.""" - return [ - *Progress.get_default_columns(), - ItersPerSecondColumn(), - CompletedItersColumn(it=it), - TimeElapsedColumn(), - ] - - -def run_sampling( - sampler: emcee.EnsembleSampler, - initial_state: np.ndarray | None = None, - num_steps: int | None = None, - thin_by: int = 1, - check_interval: int = 100, - trust_factor: float = 50.0, - relative_thresh: float = 0.05, - history_file: Path | None = None, - reset_backend: bool = False, - description: str = "Burn-in phase", -) -> None: - """Run MCMC sampling. - - This will run the ``sampler`` either for ``num_steps`` steps or - if it set to - ``None`` - until convergence. Convergence is determined once within a - ``check_interval`` of steps by the :py:func:`is_converged` function. The - convergence criterion is based on a trustworthy estimate of the autocorrelation - time. This is elaborated in the `emcee documentation`_. - - Some bookkeeping parameters may be stored in a ``history_file``. During sampling, - the history is stored in a temporary file with the suffix ``.tmp``. If the sampling - is interrupted, the history and the last state of the ``sampler`` can be recovered - and the sampling can be continued. - - One may choose to ``reset_backend``, e.g. in case the previous sampling was run - until convergence and now one wants to store a length of the converged chain. This - may also be thinned by a factor of ``thin_by`` (directly passed to the - :py:class:`emcee.EnsembleSampler` class). - - .. _emcee documentation: https://emcee.readthedocs.io/en/stable/tutorials/autocorr/ - """ - state = initial_state or ensure_initial_state(sampler) - history = ensure_history_table(history_file) - - if reset_backend: - logger.debug("Resetting backend of sampler.") - sampler.backend.reset(sampler.nwalkers, sampler.ndim) - - acor_time = AcorTime(old=np.inf, new=np.inf) - accepted = NumAccepted(old=0, new=sampler.backend.accepted.sum()) - - with Progress(*_get_columns(it=sampler.iteration), console=console) as progress: - task = progress.add_task(description=description, total=num_steps) - while sampler.iteration < (num_steps or np.inf): - for state in sampler.sample( # noqa: B007, B020 - initial_state=state, - iterations=check_interval - sampler.iteration % check_interval, - thin_by=thin_by, - ): - progress.update(task, advance=1) - - acor_time.update(new=sampler.get_autocorr_time(tol=0).mean()) - accepted.update(new=sampler.backend.accepted.sum()) - - history = update_history_table( - history=history, - history_file=history_file, - iteration=sampler.iteration, - acor_time=acor_time.new, - accepted_frac=( - accepted.newly_accepted / (check_interval * sampler.nwalkers) - ), - max_log_prob=np.max(state.log_prob), - ) - - if num_steps is None and is_converged( - iteration=sampler.iteration, - acor_time=acor_time, - trust_factor=trust_factor, - relative_thresh=relative_thresh, - ): - logger.info(f"Sampling converged after {sampler.iteration} steps.") - break - - if history_file is not None: - history_file.with_suffix(".tmp").rename(history_file) - - -class DummyPool: - """Dummy class to allow for no multiprocessing.""" - - def __enter__(self) -> None: - """Enter the context manager.""" - ... - - def __exit__(self, *args) -> None: - """Exit the context manager.""" - ... - - -def get_pool(num_cores: int | None) -> Any | DummyPool: # type: ignore - """Get a ``multiprocess(ing)`` pool or ``DummyPool``. - - Returns a ``multiprocess(ing)`` pool with ``num_cores`` cores if ``num_cores`` is - not ``None``. Otherwise, a ``DummyPool`` is returned. - """ - return mp.Pool(num_cores) if num_cores is not None else DummyPool() - - -def init_sampler(settings: SampleCLI, ndim: int, pool: Any) -> emcee.EnsembleSampler: - """Initialize the ``emcee.EnsembleSampler`` with the given ``settings``.""" - nwalkers = ndim * settings.sampling.walkers_per_dim - backend = get_hdf5_backend( - file_path=settings.sampling.storage_file, - dataset=settings.sampling.dataset, - nwalkers=nwalkers, - ndim=ndim, - ) - return emcee.EnsembleSampler( - nwalkers=nwalkers, - ndim=ndim, - log_prob_fn=log_prob_fn, - kwargs={"inverse_temp": settings.sampling.inverse_temp}, - moves=[(emcee.moves.DEMove(), 0.8), (emcee.moves.DESnookerMove(), 0.2)], - backend=backend, - pool=pool, - blobs_dtype=[("log_prob", np.float64)], - parameter_names=list(MODEL.get_named_params().keys()), - ) - - -class SampleCLI(BaseCLI): - """Use MCMC to infer distributions over model parameters from data.""" - - graph: GraphConfig - model: ModelConfig = ModelConfig() - distributions: dict[str, DistributionConfig] = Field( - default={}, - description=( - "Mapping of model T-categories to predefined distributions over " - "diagnose times." - ), - ) - modalities: dict[str, ModalityConfig] = Field( - default={}, - description=( - "Maps names of diagnostic modalities to their specificity/sensitivity." - ), - ) - data: DataConfig - sampling: SamplingConfig - - def cli_cmd(self) -> None: - """Start the ``sample`` subcommand. - - First, it will construct the model from the ``graph`` and ``model`` arguments. - Then, it will add distributions over diagnose times via the dictionary from - the ``distributions`` argument. It will also set sensitivity and specificity of - diagnostic modalities via the dictionary provided through the ``modalities`` - argument. Finally, it will load the patient data as specified via the ``data`` - argument. - - When the model is constructed, an :py:class:`emcee.EnsembleSampler` is - initialized (see :py:func:`init_sampler`) and :py:func:`run_sampling` is - executed twice: once for the burn-in phase and once for the actual sampling - phase. The ``sampling`` argument provides all necessary settings for the - sampling. - """ - # as recommended in https://emcee.readthedocs.io/en/stable/tutorials/parallel/# - os.environ["OMP_NUM_THREADS"] = "1" - - logger.debug(self.model_dump_json(indent=2)) - - # ugly, but necessary for pickling - global MODEL - MODEL = construct_model(self.model, self.graph) - MODEL = add_distributions(MODEL, self.distributions) - MODEL = add_modalities(MODEL, self.modalities) - MODEL.load_patient_data(**self.data.get_load_kwargs()) - ndim = MODEL.get_num_dims() - - # emcee does not support numpy's new random number generator yet. - np.random.seed(self.sampling.seed) # noqa: NPY002 - - with get_pool(self.sampling.cores) as pool: - sampler = init_sampler(settings=self, ndim=ndim, pool=pool) - run_sampling( - description="Burn-in phase", - sampler=sampler, - num_steps=self.sampling.burnin_steps, - check_interval=self.sampling.check_interval, - trust_factor=self.sampling.trust_factor, - relative_thresh=self.sampling.relative_thresh, - history_file=self.sampling.history_file, - ) - run_sampling( - description="Sampling phase", - sampler=sampler, - num_steps=self.sampling.num_steps, - check_interval=self.sampling.num_steps, - reset_backend=True, - thin_by=self.sampling.thin_by, - ) - - -if __name__ == "__main__": - main = assemble_main(settings_cls=SampleCLI, prog_name="sample") - main() diff --git a/src/lyscripts/schedule.py b/src/lyscripts/schedule.py deleted file mode 100644 index 99fa334..0000000 --- a/src/lyscripts/schedule.py +++ /dev/null @@ -1,33 +0,0 @@ -r"""Generate inverse temperature schedules for thermodynamic integration. - -Thermodynamic integration is quite sensitive to the specific schedule which is used. -I noticed in my models, that within the interval :math:`[0, 0.1]`, the increase in the -expected log-likelihood is very steep. Hence, the inverse temperature :math:`\beta` -must be more densely spaced in the beginning. - -This can be achieved by using a power sequence: Generate :math:`n` linearly spaced -points in the interval :math:`[0, 1]` and then transform each point by computing -:math:`\beta_i^k` where :math:`k` could e.g. be 5. -""" - -from loguru import logger - -from lyscripts.cli import assemble_main -from lyscripts.configs import BaseCLI, ScheduleConfig - - -class ScheduleCLI(ScheduleConfig, BaseCLI): - """Generate an inverse temperature schedule for thermodynamic integration.""" - - def cli_cmd(self) -> None: - """Start the ``schedule`` command.""" - logger.debug(self.model_dump_json(indent=2)) - - for inv_temp in self.get_schedule(): - # print is necessary to allow piping the output - print(inv_temp) # noqa: T201 - - -if __name__ == "__main__": - main = assemble_main(settings_cls=ScheduleCLI, prog_name="schedule") - main() diff --git a/src/lyscripts/schema.py b/src/lyscripts/schema.py deleted file mode 100644 index 7470af3..0000000 --- a/src/lyscripts/schema.py +++ /dev/null @@ -1,66 +0,0 @@ -"""A fusion of all :py:mod:`configs`, allowing the creation of a JSON schema. - -This command is not intended to be used by the end user. Rather, it exists such that -the developers and maintainers can create a JSON schema from all the defined -:py:mod:`configs` an store that in the `source code repository`_. Subsequently, the -end user can point their IDE to this schema, hosted on GitHub to provide them with -auto-completion and validation of their YAML configuration files that they feed into -the lyscripts CLIs when they build pipelines or scripts with it. - -The `URL for the schema`_ can for example be used in the settings of VS Code like this: - -.. code:: json - - { - "yaml.schemas": { - "https://raw.githubusercontent.com/lycosystem/lyscripts/main/schemas/ly.json": "*.ly.yaml" - }, - } - -Which would enable auto-completion and validation for all files with the extension -``.ly.yaml`` in the workspace. - -.. _source code repository: https://github.com/lycosystem/lyscripts -.. _URL for the schema: https://raw.githubusercontent.com/lycosystem/lyscripts/main/schemas/ly.json -""" # noqa: E501 - -import json - -from lydata.utils import ModalityConfig -from pydantic import BaseModel, Field - -from lyscripts import configs - - -class SchemaSettings(BaseModel): - """Settings for generating a JSON schema for lyscripts configuration files.""" - - version: int = Field( - description=( - "For future compatibility reasons, every config file must have a " - "`version: 1` field at the top level." - ), - ge=1, - le=1, - ) - cross_validation: configs.CrossValidationConfig = None - data: configs.DataConfig = None - diagnosis: configs.DiagnosisConfig = None - distributions: dict[str, configs.DistributionConfig] = {} - graph: configs.GraphConfig = None - involvement: configs.InvolvementConfig = None - modalities: dict[str, ModalityConfig] = {} - model: configs.ModelConfig = None - sampling: configs.SamplingConfig = None - scenarios: list[configs.ScenarioConfig] = [] - schedule: configs.ScheduleConfig = None - - -def main() -> None: - """Generate a JSON schema for lyscripts configuration files.""" - schema = SchemaSettings.model_json_schema() - print(json.dumps(schema, indent=2)) # noqa: T201 - - -if __name__ == "__main__": - main() diff --git a/src/lyscripts/utils.py b/src/lyscripts/utils.py deleted file mode 100644 index b6b1261..0000000 --- a/src/lyscripts/utils.py +++ /dev/null @@ -1,199 +0,0 @@ -"""General utility functions for the lyscripts package.""" - -from pathlib import Path - -import numpy as np -import pandas as pd -import yaml -from emcee.backends import HDFBackend -from loguru import logger -from rich.console import Console -from scipy.special import factorial - -from lyscripts.decorators import ( - check_input_file_exists, - check_output_dir_exists, -) - -console = Console() - - -def binom_pmf(support: list[int] | np.ndarray, p: float = 0.5): - """Binomial PMF that is much faster than the one from scipy.""" - max_time = len(support) - 1 - if p > 1.0 or p < 0.0: - raise ValueError("Binomial prob must be btw. 0 and 1") - q = 1.0 - p - binom_coeff = factorial(max_time) / ( - factorial(support) * factorial(max_time - support) - ) - return binom_coeff * p**support * q ** (max_time - support) - - -def get_dict_depth(nested: dict) -> int: - """Get the depth of a nested dictionary. - - >>> get_dict_depth({"a": {"b": 1}}) - 2 - >>> varying_depth = {"a": {"b": 1}, "c": {"d": {"e": 2}}} - >>> get_dict_depth(varying_depth) - 3 - """ - if not isinstance(nested, dict): - return 0 - - max_depth = None - for _, value in nested.items(): - value_depth = get_dict_depth(value) - max_depth = max(max_depth or value_depth, value_depth) - - return 1 + (max_depth or 0) - - -def delete_private_keys(nested: dict) -> dict: - """Delete private keys from a nested dictionary. - - A 'private' key is a key whose name starts with an underscore. For example: - - >>> delete_private_keys({"patient": {"__doc__": "some patient info", "age": 61}}) - {'patient': {'age': 61}} - >>> delete_private_keys({"patient": {"age": 61}}) - {'patient': {'age': 61}} - """ - cleaned = {} - - if isinstance(nested, dict): - for key, value in nested.items(): - if not (isinstance(key, str) and key.startswith("_")): - cleaned[key] = delete_private_keys(value) - else: - cleaned = nested - - return cleaned - - -def flatten( - nested: dict, - prev_key: tuple = (), - max_depth: int | None = None, -) -> dict: - """Flatten ``nested`` dict by creating key tuples for each value at ``max_depth``. - - >>> nested = {"tumor": {"1": {"t_stage": 1, "size": 12.3}}} - >>> flatten(nested) - {('tumor', '1', 't_stage'): 1, ('tumor', '1', 'size'): 12.3} - >>> mapping = {"patient": {"#": {"age": {"func": int, "columns": ["age"]}}}} - >>> flatten(mapping, max_depth=3) - {('patient', '#', 'age'): {'func': , 'columns': ['age']}} - - Note that flattening an already flat dictionary will yield some weird results. - """ - result = {} - - for key, value in nested.items(): - is_dict = isinstance(value, dict) - has_reached_max_depth = max_depth is not None and len(prev_key) >= max_depth - 1 - - if is_dict and not has_reached_max_depth: - result.update(flatten(value, (*prev_key, key), max_depth)) - else: - result[(*prev_key, key)] = value - - return result - - -def unflatten(flat: dict) -> dict: - """Take a flat dictionary with tuples of keys and create nested dict from it. - - >>> flat = {('tumor', '1', 't_stage'): 1, ('tumor', '1', 'size'): 12.3} - >>> unflatten(flat) - {'tumor': {'1': {'t_stage': 1, 'size': 12.3}}} - >>> mapping = {('patient', '#', 'age'): {'func': int, 'columns': ['age']}} - >>> unflatten(mapping) - {'patient': {'#': {'age': {'func': , 'columns': ['age']}}}} - """ - result = {} - - for keys, value in flat.items(): - current = result - for key in keys[:-1]: - current = current.setdefault(key, {}) - - current[keys[-1]] = value - - return result - - -def get_modalities_subset( - defined_modalities: dict[str, list[float]], - selection: list[str], -) -> dict[str, list[float]]: - """Of the ``defined_modalities`` return only those mentioned in the ``selection``. - - >>> modalities = {"CT": [0.76, 0.81], "MRI": [0.63, 0.86]} - >>> get_modalities_subset(modalities, ["CT"]) - {'CT': [0.76, 0.81]} - """ - selected_modalities = {} - for mod in selection: - try: - selected_modalities[mod] = defined_modalities[mod] - except KeyError as key_err: - raise KeyError(f"Modality {mod} has not been defined yet") from key_err - return selected_modalities - - -def load_patient_data( - file_path: Path, - **read_csv_kwargs: dict, -) -> pd.DataFrame: - """Load patient data from a CSV file stored at ``file``.""" - if "header" not in read_csv_kwargs: - read_csv_kwargs["header"] = [0, 1, 2] - - data = pd.read_csv(file_path, **read_csv_kwargs) - logger.info(f"Loaded {len(data)} patient records from {file_path}") - return data - - -@check_input_file_exists -def load_yaml_params(file_path: Path) -> dict: - """Load parameters from a YAML ``file``.""" - with open(file_path, encoding="utf-8") as file: - loaded_params = yaml.safe_load(file) - logger.info(f"Loaded YAML parameters from {file_path}") - return loaded_params - - -@check_input_file_exists -def load_model_samples( - file_path: Path, - name: str = "mcmc", - flat: bool = True, - discard: int = 0, - thin: int = 1, -) -> np.ndarray: - """Load MCMC samples stored in HDF5 file at ``file_path`` under a key ``name``.""" - backend = HDFBackend(file_path, name=name, read_only=True) - samples = backend.get_chain(flat=flat, discard=discard, thin=thin) - logger.info(f"Loaded samples with shape {samples.shape} from {file_path}") - return samples - - -@check_output_dir_exists -def get_hdf5_backend( - file_path: Path, - dataset: str = "mcmc", - nwalkers: int | None = None, - ndim: int | None = None, - reset: bool = False, -) -> HDFBackend: - """Open an HDF5 file at ``file_path`` and return a backend.""" - backend = HDFBackend(file_path, name=dataset) - logger.info(f"Opened HDF5 file at {file_path}") - - if reset: - logger.info(f"Resetting backend at {file_path} to {nwalkers=} and {ndim=}") - backend.reset(nwalkers, ndim) - - return backend diff --git a/tests/__init__.py b/tests/__init__.py deleted file mode 100644 index e69de29..0000000 diff --git a/tests/_dummy_model.py b/tests/_dummy_model.py deleted file mode 100644 index 22eeab2..0000000 --- a/tests/_dummy_model.py +++ /dev/null @@ -1,7 +0,0 @@ -"""Define loadable dummy model for testing.""" - -class DummyModel: - def __init__(self, was_externally_loaded: bool = False): - self.was_externally_loaded = was_externally_loaded - -model = DummyModel(was_externally_loaded=True) diff --git a/tests/_dummy_no_model.py b/tests/_dummy_no_model.py deleted file mode 100644 index 67b8a47..0000000 --- a/tests/_dummy_no_model.py +++ /dev/null @@ -1,3 +0,0 @@ -"""File that does not provide a `model`.""" - -no_model = 42 diff --git a/tests/compute/posteriors_test.py b/tests/compute/posteriors_test.py deleted file mode 100644 index 85f3b49..0000000 --- a/tests/compute/posteriors_test.py +++ /dev/null @@ -1,135 +0,0 @@ -"""Test utilities of the predict submodule.""" - -import numpy as np -import pytest -from lydata.utils import ModalityConfig - -from lyscripts.compute.posteriors import compute_posteriors -from lyscripts.compute.priors import compute_priors -from lyscripts.compute.utils import complete_pattern -from lyscripts.configs import ( - DiagnosisConfig, - DistributionConfig, - GraphConfig, - ModelConfig, - add_distributions, - construct_model, -) - -RNG = np.random.default_rng(42) - - -@pytest.fixture(params=["Unilateral", "Bilateral"]) -def model_config(request) -> ModelConfig: - """Create unilateral model config.""" - return ModelConfig(class_name=request.param) - - -@pytest.fixture -def graph_config() -> GraphConfig: - """Create simple graph.""" - return GraphConfig( - tumor={"T": ["I", "II", "III"]}, - lnl={"I": ["II"], "II": ["III"], "III": []}, - ) - - -@pytest.fixture -def dist_configs() -> dict[str, DistributionConfig]: - """Provide early and late distributions.""" - return { - "early": DistributionConfig(kind="frozen", func="binomial"), - "late": DistributionConfig(kind="parametric", func="binomial"), - } - - -@pytest.fixture -def modality_config() -> ModalityConfig: - """Create modality config.""" - return ModalityConfig(spec=0.9, sens=0.8) - - -@pytest.fixture -def diagnosis_config() -> DiagnosisConfig: - """Create a simple diagnosis config.""" - return DiagnosisConfig( - ipsi={"D": {"I": True, "II": True, "III": False}}, - contra={"D": {"I": False, "II": True, "III": False}}, - ) - - -@pytest.fixture -def samples( - model_config: ModelConfig, - graph_config: GraphConfig, - dist_configs: dict[str, DistributionConfig], -) -> np.ndarray: - """Generate some samples.""" - model = construct_model(model_config, graph_config) - model = add_distributions(model, dist_configs) - return RNG.uniform(size=(100, model.get_num_dims())) - - -@pytest.fixture -def priors( - model_config: ModelConfig, - graph_config: GraphConfig, - dist_configs: dict[str, DistributionConfig], - samples: np.ndarray, -) -> np.ndarray: - """Provide some priors.""" - return compute_priors( - model_config=model_config, - graph_config=graph_config, - dist_configs=dist_configs, - samples=samples, - t_stages=["late"], - t_stages_dist=[1.0], - ) - - -def test_compute_posterior( - model_config: ModelConfig, - graph_config: GraphConfig, - dist_configs: dict[str, DistributionConfig], - modality_config: ModalityConfig, - diagnosis_config: DiagnosisConfig, - priors: np.ndarray, -) -> None: - """Ensure that the diagnosis is correctly treated.""" - posteriors = compute_posteriors( - model_config=model_config, - graph_config=graph_config, - dist_configs=dist_configs, - modality_configs={"D": modality_config}, - priors=priors, - diagnosis=diagnosis_config.model_dump(), - ) - - assert np.all(posteriors >= 0), "Negative probabilities in posterior." - assert np.all(posteriors <= 1), "Probabilities above 1 in posterior." - - -def test_clean_pattern(): - """Test outdated utility function.""" - empty_pattern = {} - one_pos_pattern = {"ipsi": {"II": True}} - nums_pattern = {"ipsi": {"I": 1}, "contra": {"III": 0}} - lnls = ["I", "II", "III"] - - empty_cleaned = complete_pattern(empty_pattern, lnls) - one_pos_cleaned = complete_pattern(one_pos_pattern, lnls) - nums_cleaned = complete_pattern(nums_pattern, lnls) - - assert empty_cleaned == { - "ipsi": {"I": None, "II": None, "III": None}, - "contra": {"I": None, "II": None, "III": None}, - }, "Empty pattern does not get filled correctly." - assert one_pos_cleaned == { - "ipsi": {"I": None, "II": True, "III": None}, - "contra": {"I": None, "II": None, "III": None}, - }, "Pattern with one positive LNL not cleaned properly." - assert nums_cleaned == { - "ipsi": {"I": True, "II": None, "III": None}, - "contra": {"I": None, "II": None, "III": False}, - }, "Number pattern cleaned wrongly." diff --git a/tests/compute/prevalences_test.py b/tests/compute/prevalences_test.py deleted file mode 100644 index 3fe2151..0000000 --- a/tests/compute/prevalences_test.py +++ /dev/null @@ -1,41 +0,0 @@ -"""Test the computation of the prevalences.""" - -import pandas as pd -import pytest -from lydata import load_datasets - -from lyscripts.compute.prevalences import observe_prevalence -from lyscripts.configs import DiagnosisConfig, ScenarioConfig - - -@pytest.fixture -def scenario_config() -> ScenarioConfig: - """Create a simple scenario config.""" - return ScenarioConfig( - t_stages=["early"], - diagnosis=DiagnosisConfig( - ipsi={"max_llh": {"II": "involved", "III": False}}, - contra={"max_llh": {"II": 0}}, - ), - ) - - -@pytest.fixture -def data() -> pd.DataFrame: - """Load one of the lyDATA datasets.""" - data = next(load_datasets(year=2021, institution="usz")) - return data.ly.enhance() - - -def test_observe_prevalence( - data: pd.DataFrame, - scenario_config: ScenarioConfig, -) -> None: - """Ensure that observing the prevalence works.""" - portion = observe_prevalence( - data=data, - scenario_config=scenario_config, - ) - - assert portion.match == 66 - assert portion.total == 150 diff --git a/tests/configs_test.py b/tests/configs_test.py deleted file mode 100644 index 477083b..0000000 --- a/tests/configs_test.py +++ /dev/null @@ -1,125 +0,0 @@ -"""Test the configs module.""" - -from pathlib import Path - -import pytest -import yaml -from lydata.utils import ModalityConfig -from pydantic import ValidationError - -from lyscripts.configs import ( - DistributionConfig, - GraphConfig, - ModelConfig, - add_distributions, - add_modalities, - construct_model, - modalityconfig_from_model, -) - - -@pytest.fixture -def yaml_config_dict() -> dict: - """Load the YAML params file.""" - with open("tests/test_params_v1.yaml") as file: - return yaml.safe_load(file) - - -@pytest.fixture -def external_model_config() -> ModelConfig: - return ModelConfig(external_file=Path("tests/_dummy_model.py")) - - -@pytest.fixture -def model_config() -> ModelConfig: - return ModelConfig( - class_name="Bilateral", - constructor="trinary", - named_params=["spread", "TtoII_spread", "late_p"], - ) - - -@pytest.fixture -def graph_config() -> GraphConfig: - return GraphConfig( - tumor={"T": ["II", "III"]}, - lnl={"II": ["III"], "III": []}, - ) - - -@pytest.fixture -def distribution_configs() -> dict[str, DistributionConfig]: - return { - "early": DistributionConfig(kind="frozen", params={"p": 0.3}), - "late": DistributionConfig(kind="parametric", params={"p": 0.7}), - } - - -def test_model_from_external( - external_model_config: ModelConfig, - graph_config: GraphConfig, -): - """Check if loading model from external file works.""" - model = construct_model(external_model_config, graph_config) - assert model.was_externally_loaded - - -def test_no_model_from_external() -> None: - """Ensure a `ValidationError` is raised when no model is provided.""" - with pytest.raises(ValidationError): - ModelConfig(external_file=Path("tests/_dummy_no_model.py")) - - -def test_model_from_no_file() -> None: - """Ensure a `ValidationError` is raised when the file does not exist.""" - with pytest.raises(ValidationError): - ModelConfig(external_file=Path("tests/_no_file.py")) - - -def test_model_from_config( - model_config: ModelConfig, - graph_config: GraphConfig, - distribution_configs: dict[str, DistributionConfig], -): - """Check that loading the model works correctly. Especially the named params.""" - model = construct_model( - model_config=model_config, - graph_config=graph_config, - ) - model = add_distributions( - model=model, - configs=distribution_configs, - ) - assert model.ipsi.get_distribution(t_stage="late") == model.contra.get_distribution( - t_stage="late" - ) - assert model.get_num_dims() == len(model_config.named_params) - - -def test_config_recovery(yaml_config_dict: dict) -> None: - """Ensure the round trip config -> mode -> config works.""" - model = construct_model( - model_config=ModelConfig(**yaml_config_dict["model"]), - graph_config=GraphConfig(**yaml_config_dict["graph"]), - ) - model = add_modalities( - model=model, - modalities={ - name: ModalityConfig(**config) - for name, config in yaml_config_dict["modalities"].items() - }, - ) - - recovered_config = {} - recovered_config["model"] = ModelConfig.from_model(model).model_dump( - exclude_none=True - ) - recovered_config["graph"] = GraphConfig.from_model(model).model_dump() - recovered_config["modalities"] = { - name: modalityconfig_from_model(model, name).model_dump() - for name in model.get_all_modalities() - } - - yaml_config_dict.pop("distributions") - yaml_config_dict.pop("version") - assert recovered_config == yaml_config_dict diff --git a/tests/integration/.gitignore b/tests/integration/.gitignore deleted file mode 100644 index be15619..0000000 --- a/tests/integration/.gitignore +++ /dev/null @@ -1,2 +0,0 @@ -*.csv -*.hdf5 diff --git a/tests/integration/config/data.ly.yaml b/tests/integration/config/data.ly.yaml deleted file mode 100644 index dd296f1..0000000 --- a/tests/integration/config/data.ly.yaml +++ /dev/null @@ -1,6 +0,0 @@ -version: 1 - -data: - mapping: - early: early - late: late diff --git a/tests/integration/config/distributions.ly.yaml b/tests/integration/config/distributions.ly.yaml deleted file mode 100644 index 48ca2d5..0000000 --- a/tests/integration/config/distributions.ly.yaml +++ /dev/null @@ -1,15 +0,0 @@ -version: 1 - -distributions: - early: - func: binomial - kind: frozen - params: {p: 0.3} - late: - func: binomial - kind: parametric - params: {p: 0.7} - -t_stages_dist: - early: 0.6 - late: 0.4 diff --git a/tests/integration/config/graph.ly.yaml b/tests/integration/config/graph.ly.yaml deleted file mode 100644 index 3215b1d..0000000 --- a/tests/integration/config/graph.ly.yaml +++ /dev/null @@ -1,14 +0,0 @@ -version: 1 - -graph: - tumor: - T: [II, III] - lnl: - II: [III] - III: [] - -params: - TtoII_spread: 0.3 - TtoIII_spread: 0.1 - IItoIII_spread: 0.2 - late_p: 0.7 diff --git a/tests/integration/config/modalities.ly.yaml b/tests/integration/config/modalities.ly.yaml deleted file mode 100644 index 2ff7de5..0000000 --- a/tests/integration/config/modalities.ly.yaml +++ /dev/null @@ -1,7 +0,0 @@ -version: 1 - -modalities: - imaging: - spec: 0.85 - sens: 0.85 - kind: clinical diff --git a/tests/integration/config/model.ly.yaml b/tests/integration/config/model.ly.yaml deleted file mode 100644 index 609ae0a..0000000 --- a/tests/integration/config/model.ly.yaml +++ /dev/null @@ -1,11 +0,0 @@ -version: 1 - -model: - class_name: Unilateral - constructor: binary - max_time: 10 - named_params: - - TtoII_spread - - TtoIII_spread - - IItoIII_spread - - late_p diff --git a/tests/integration/config/sampling.ly.yaml b/tests/integration/config/sampling.ly.yaml deleted file mode 100644 index 16c1b31..0000000 --- a/tests/integration/config/sampling.ly.yaml +++ /dev/null @@ -1,10 +0,0 @@ -version: 1 - -sampling: - seed: 42 - walkers_per_dim: 2 - check_interval: 10 - trust_factor: 10 - relative_thresh: 0.1 - thin_by: 1 - num_steps: 10 diff --git a/tests/integration/config/scenarios.ly.yaml b/tests/integration/config/scenarios.ly.yaml deleted file mode 100644 index 725c145..0000000 --- a/tests/integration/config/scenarios.ly.yaml +++ /dev/null @@ -1,14 +0,0 @@ -version: 1 - -scenarios: - - t_stages: [early] - mode: "HMM" - diagnosis: - ipsi: - imaging: {II: involved} - - t_stages: [early, late] - t_stages_dist: [0.7, 0.3] - mode: "HMM" - diagnosis: - ipsi: - imaging: {III: 1} diff --git a/tests/integration/integration_test.py b/tests/integration/integration_test.py deleted file mode 100644 index 2441afe..0000000 --- a/tests/integration/integration_test.py +++ /dev/null @@ -1,444 +0,0 @@ -"""Test the ``generate`` CLI.""" - -import shutil -import sys -from pathlib import Path - -import h5py -import numpy as np -import pandas as pd -import pytest -from lydata import C -from lydata.utils import ModalityConfig -from pydantic import TypeAdapter - -from lyscripts.cli import assemble_main -from lyscripts.compute.prevalences import PrevalencesCLI -from lyscripts.compute.priors import PriorsCLI, compute_priors -from lyscripts.compute.utils import get_cached -from lyscripts.configs import ( - DistributionConfig, - GraphConfig, - ModelConfig, - SamplingConfig, - ScenarioConfig, -) -from lyscripts.data.generate import GenerateCLI -from lyscripts.sample import SampleCLI -from lyscripts.utils import load_patient_data, load_yaml_params - - -@pytest.fixture(scope="session") -def monkeymodule(): - """Create a session scoped monkeypatch fixture. - - This can be used to e.g. mock the command line arguments by setting the - ``sys.argv`` variable. - """ - with pytest.MonkeyPatch.context() as mp: - yield mp - - -@pytest.fixture(scope="session") -def data_file() -> Path: - """Provide the path to the generated data. - - Delete any file at the beginning of a session if it exists. - """ - res = Path("tests/integration/generated.csv") - res.parent.mkdir(exist_ok=True) - if res.exists(): - res.unlink() - return res - - -@pytest.fixture(scope="session") -def samples_file() -> Path: - """Provide the path to the generated samples. - - Delete any file at the beginning of a session if it exists. - """ - res = Path("tests/integration/samples.hdf5") - res.parent.mkdir(exist_ok=True) - if res.exists(): - res.unlink() - return res - - -def _get_config_file(name: str) -> Path: - return Path(f"tests/integration/config/{name}.ly.yaml") - - -@pytest.fixture(scope="session") -def model_config_file() -> Path: - """Provide the path to the model configuration file.""" - return _get_config_file("model") - - -@pytest.fixture(scope="session") -def graph_config_file() -> Path: - """Provide the path to the graph configuration file.""" - return _get_config_file("graph") - - -@pytest.fixture(scope="session") -def distributions_config_file() -> Path: - """Provide the path to the distributions configuration file.""" - return _get_config_file("distributions") - - -@pytest.fixture(scope="session") -def modalities_config_file() -> Path: - """Provide the path to the modalities configuration file.""" - return _get_config_file("modalities") - - -@pytest.fixture(scope="session") -def scenarios_config_file() -> Path: - """Provide the path to the scenarios configuration file.""" - return _get_config_file("scenarios") - - -@pytest.fixture(scope="session") -def sampling_config_file() -> Path: - """Provide the path to the sampling configuration file.""" - return _get_config_file("sampling") - - -@pytest.fixture(scope="session") -def data_config_file() -> Path: - """Provide the path to the data configuration file.""" - return _get_config_file("data") - - -@pytest.fixture(scope="session") -def model_config(model_config_file: Path) -> ModelConfig: - """Provide the model configuration.""" - yaml_config = load_yaml_params(model_config_file) - return ModelConfig(**yaml_config["model"]) - - -@pytest.fixture(scope="session") -def graph_config(graph_config_file: Path) -> GraphConfig: - """Provide the graph configuration.""" - yaml_config = load_yaml_params(graph_config_file) - return GraphConfig(**yaml_config["graph"]) - - -@pytest.fixture(scope="session") -def distributions_config( - distributions_config_file: Path, -) -> dict[str, DistributionConfig]: - """Provide the distributions configuration.""" - yaml_config = load_yaml_params(distributions_config_file) - type_adapter = TypeAdapter(dict[str, DistributionConfig]) - return type_adapter.validate_python(yaml_config["distributions"]) - - -@pytest.fixture(scope="session") -def modalities_config(modalities_config_file: Path) -> dict[str, ModalityConfig]: - """Provide the modalities configuration.""" - yaml_config = load_yaml_params(modalities_config_file) - type_adapter = TypeAdapter(dict[str, ModalityConfig]) - return type_adapter.validate_python(yaml_config["modalities"]) - - -@pytest.fixture(scope="session") -def scenarios_config(scenarios_config_file: Path) -> list[ScenarioConfig]: - """Provide a list of defined scenarios.""" - yaml_config = load_yaml_params(scenarios_config_file) - type_adapter = TypeAdapter(list[ScenarioConfig]) - return type_adapter.validate_python(yaml_config["scenarios"]) - - -@pytest.fixture(scope="session") -def sampling_config(sampling_config_file: Path) -> SamplingConfig: - """Provide the sampling configuration.""" - yaml_config = load_yaml_params(sampling_config_file) - return SamplingConfig(**yaml_config["sampling"]) - - -@pytest.fixture(scope="session") -def generated_data( - monkeymodule, - data_file: Path, - model_config_file: Path, - graph_config_file: Path, - distributions_config_file: Path, - modalities_config_file: Path, -) -> pd.DataFrame: - """Execute the generate CLI and provide the generated data as a fixture.""" - monkeymodule.setattr( - sys, - "argv", - [ - "generate", - "--configs", - str(model_config_file.resolve()), - "--configs", - str(graph_config_file.resolve()), - "--configs", - str(distributions_config_file.resolve()), - "--configs", - str(modalities_config_file.resolve()), - "--num-patients", - "200", - "--output-file", - str(data_file), - "--seed", - "42", - ], - ) - main = assemble_main(settings_cls=GenerateCLI, prog_name="generate") - main() - return load_patient_data(data_file) - - -@pytest.fixture(scope="session") -def drawn_samples( - monkeymodule, - generated_data: pd.DataFrame, - data_file: Path, - model_config_file: Path, - graph_config_file: Path, - distributions_config_file: Path, - modalities_config_file: Path, - sampling_config_file: Path, - samples_file: Path, -) -> np.ndarray: - """Execute the sampling CLI and provide the samples as a fixture.""" - monkeymodule.setattr( - sys, - "argv", - [ - "sample", - "--configs", - str(model_config_file.resolve()), - "--configs", - str(graph_config_file.resolve()), - "--configs", - str(distributions_config_file.resolve()), - "--configs", - str(modalities_config_file.resolve()), - "--configs", - str(sampling_config_file.resolve()), - "--sampling.storage-file", - str(samples_file.resolve()), - # mapping because generated data already has the correct T-stage column - '--data.mapping={"early": "early", "late": "late"}', - "--data.source", - str(data_file), - ], - ) - main = assemble_main(settings_cls=SampleCLI, prog_name="sample") - main() - _yaml_params = load_yaml_params(sampling_config_file) - _sampling_config = SamplingConfig( - storage_file=samples_file, **_yaml_params["sampling"] - ) - return _sampling_config.load() - - -@pytest.fixture(scope="session") -def cache_dir() -> Path: - """Provide the path to the cache directory as a fixture. - - Delete any directory at the beginning of a session if it exists. - """ - res = Path("tests/integration/.cache") - if res.exists(): - shutil.rmtree(res) - return res - - -@pytest.fixture(scope="session") -def priors_file() -> Path: - """Provide the path to the computed priors as a fixture. - - Delete any file at the beginning of a session if it exists. - """ - res = Path("tests/integration/priors.hdf5") - res.parent.mkdir(exist_ok=True) - if res.exists(): - res.unlink() - return res - - -@pytest.fixture(scope="session") -def prevalences_file() -> Path: - """Provide the path to the computed prevalences as a fixture. - - Delete any file at the beginning of a session if it exists. - """ - res = Path("tests/integration/prevalences.hdf5") - res.parent.mkdir(exist_ok=True) - if res.exists(): - res.unlink() - return res - - -@pytest.fixture(scope="session") -def computed_priors( - monkeymodule, - cache_dir: Path, - model_config_file: Path, - graph_config_file: Path, - distributions_config_file: Path, - scenarios_config_file: Path, - sampling_config_file: Path, - samples_file: Path, - priors_file: Path, - dataset: str = "000", -) -> np.ndarray: - """Execute the ``priors`` CLI and provide the computed arrays as a fixture.""" - monkeymodule.setattr( - sys, - "argv", - [ - "priors", - "--cache-dir", - str(cache_dir.resolve()), - "--configs", - str(model_config_file.resolve()), - "--configs", - str(graph_config_file.resolve()), - "--configs", - str(distributions_config_file.resolve()), - "--configs", - str(scenarios_config_file.resolve()), - "--configs", - str(sampling_config_file.resolve()), - "--sampling.storage-file", - str(samples_file.resolve()), - "--priors.file", - str(priors_file), - ], - ) - main = assemble_main(settings_cls=PriorsCLI, prog_name="priors") - main() - with h5py.File(priors_file, "r") as h5file: - return h5file[dataset][:] - - -@pytest.fixture(scope="session") -def computed_prevalences( - monkeymodule, - cache_dir: Path, - model_config_file: Path, - graph_config_file: Path, - distributions_config_file: Path, - scenarios_config_file: Path, - modalities_config_file: Path, - sampling_config_file: Path, - samples_file: Path, - prevalences_file: Path, - data_file: Path, - # to ensure the correct execution order, also require data and samples - generated_data: pd.DataFrame, - drawn_samples: np.ndarray, - dataset: str = "000", -) -> tuple[np.ndarray, int, int]: - """Provide the computed prevalences as a fixture.""" - monkeymodule.setattr( - sys, - "argv", - [ - "prevalences", - "--cache-dir", - str(cache_dir.resolve()), - "--configs", - str(graph_config_file.resolve()), - "--configs", - str(model_config_file.resolve()), - "--configs", - str(distributions_config_file.resolve()), - "--configs", - str(scenarios_config_file.resolve()), - "--configs", - str(modalities_config_file.resolve()), - "--configs", - str(sampling_config_file.resolve()), - "--sampling.storage-file", - str(samples_file.resolve()), - "--prevalences.file", - str(prevalences_file), - "--data.source", - str(data_file.resolve()), - "--data.mapping", - '{"early": "early", "late": "late"}', - ], - ) - main = assemble_main(settings_cls=PrevalencesCLI, prog_name="prevalences") - main() - with h5py.File(prevalences_file, "r") as h5file: - return ( - h5file[dataset][:], - h5file[dataset].attrs["num_match"], - h5file[dataset].attrs["num_total"], - ) - - -def test_generated_data(generated_data: pd.DataFrame) -> None: - """Test the generated data.""" - assert generated_data.shape == (200, 3) - assert ( - generated_data["imaging", "ipsi", "II"].sum() - > generated_data["imaging", "ipsi", "III"].sum() - ) - assert generated_data.ly.t_stage.isin(["early", "late"]).all() - assert all( - generated_data.ly.query(C("t_stage") == "early")["imaging", "ipsi"].mean() - < generated_data.ly.query(C("t_stage") == "late")["imaging", "ipsi"].mean() - ) - - -def test_scenarios(scenarios_config: list[ScenarioConfig]) -> None: - """Check the loaded scenarios.""" - for scenario in scenarios_config: - assert np.isclose(np.sum(scenario.t_stages_dist), 1.0) - - -def test_drawn_samples(drawn_samples: np.ndarray) -> None: - """Test the drawn samples.""" - assert drawn_samples.shape[-1] == 4 - - -def test_computed_priors( - cache_dir: Path, - model_config: ModelConfig, - graph_config: GraphConfig, - distributions_config: dict[str, DistributionConfig], - drawn_samples: np.ndarray, - scenarios_config: list[ScenarioConfig], - computed_priors: np.ndarray, -) -> None: - """Test the computed priors.""" - scenario = scenarios_config[0] - kwargs = scenario.model_dump(include={"t_stages", "t_stages_dist", "mode"}) - cached_compute_priors = get_cached(compute_priors, cache_dir) - - kwargs.update( - { - "model_config": model_config, - "graph_config": graph_config, - "dist_configs": distributions_config, - "samples": drawn_samples, - } - ) - assert cached_compute_priors._cached_func.check_call_in_cache(**kwargs) - cached_output = cached_compute_priors(**kwargs) - assert np.allclose(computed_priors, cached_output) - - assert computed_priors.shape[-1] == 4 - assert np.all(computed_priors >= 0.0) - assert np.all(computed_priors <= 1.0) - - -def test_computed_prevalences( - computed_prevalences: tuple[np.ndarray, int, int], -) -> None: - """Test the computed prevalences.""" - prevalences, num_match, num_total = computed_prevalences - num_match, num_total = int(num_match), int(num_total) - assert num_match == 64 - assert num_total == 123 diff --git a/tests/plot/baseline/sine.png b/tests/plot/baseline/sine.png deleted file mode 100644 index 1c12e3f..0000000 Binary files a/tests/plot/baseline/sine.png and /dev/null differ diff --git a/tests/plot/baseline/sine.svg b/tests/plot/baseline/sine.svg deleted file mode 100644 index 302bf38..0000000 --- a/tests/plot/baseline/sine.svg +++ /dev/null @@ -1,515 +0,0 @@ - - - diff --git a/tests/plot/baseline/sine_svg.png b/tests/plot/baseline/sine_svg.png deleted file mode 100644 index edf255e..0000000 Binary files a/tests/plot/baseline/sine_svg.png and /dev/null differ diff --git a/tests/plot/baseline/test_draw.png b/tests/plot/baseline/test_draw.png deleted file mode 100644 index 9decc57..0000000 Binary files a/tests/plot/baseline/test_draw.png and /dev/null differ diff --git a/tests/plot/data/beta_samples.hdf5 b/tests/plot/data/beta_samples.hdf5 deleted file mode 100644 index b08fb84..0000000 Binary files a/tests/plot/data/beta_samples.hdf5 and /dev/null differ diff --git a/tests/plot/plot_utils_test.py b/tests/plot/plot_utils_test.py deleted file mode 100644 index 9140cf3..0000000 --- a/tests/plot/plot_utils_test.py +++ /dev/null @@ -1,284 +0,0 @@ -"""Testing of the utilities implemented for the plotting routines.""" - -from pathlib import Path - -import matplotlib.pyplot as plt -import matplotlib.testing.compare as mpl_comp -import numpy as np -import pytest - -from lyscripts.plots import ( - BetaPosterior, - Histogram, - ceil_to_step, - draw, - floor_to_step, - get_size, - save_figure, -) - - -@pytest.fixture -def beta_samples() -> str: - """Name of HDF5 file where some samples from a Beta distribution are stored.""" - return "./tests/plot/data/beta_samples.hdf5" - - -def test_floor_to_step(): - """Check correct rounding down to a given step size.""" - numbers = np.array([0.0, 3.0, 7.4, 2.01, np.pi, 12.7, 12.7, 17.3]) - steps = np.array([2, 2, 5, 2, 3, 3, 5, 0.17]) - exp_res = np.array([0.0, 2.0, 5.0, 2.0, 3.0, 12.0, 10.0, 17.17]) - - comp_res = np.zeros_like(exp_res) - for i, (num, step) in enumerate(zip(numbers, steps, strict=False)): - comp_res[i] = floor_to_step(num, step) - - assert all(np.isclose(comp_res, exp_res)), "Floor to step did not work properly." - - -def test_ceil_to_step(): - """Check correct rounding up to a given step size.""" - numbers = np.array([0.0, 3.0, 7.4, 2.01, np.pi, 12.7, 12.7, 17.3]) - steps = np.array([2, 2, 5, 2, 3, 3, 5, 0.17]) - exp_res = np.array([2.0, 4.0, 10.0, 4.0, 6.0, 15.0, 15.0, 17.34]) - - comp_res = np.zeros_like(exp_res) - for i, (num, step) in enumerate(zip(numbers, steps, strict=False)): - comp_res[i] = ceil_to_step(num, step) - - assert all(np.isclose(comp_res, exp_res)), "Ceil to step did not work properly." - - -def test_histogram_cls(beta_samples: str): - """Make sure the histogram data container works as intended.""" - str_filename = beta_samples - path_filename = Path(str_filename) - non_existent_filename = "non_existent.hdf5" - custom_label = "Lorem ipsum" - - hist_from_str = Histogram.from_hdf5(filename=str_filename, dataname="beta") - hist_from_path = Histogram.from_hdf5( - filename=path_filename, - dataname="beta", - scale=10.0, - label=custom_label, - ) - - with pytest.raises(FileNotFoundError): - Histogram.from_hdf5(filename=non_existent_filename, dataname="does_not_matter") - - assert np.all( - np.isclose(hist_from_str.values, 10.0 * hist_from_path.values) - ), "Scaling of data does not work correclty" - assert np.all( - np.isclose( - hist_from_str.left_percentile(50.0), - hist_from_str.right_percentile(50.0), - ) - ), "50% percentiles should be the same from the left and from the right." - assert np.all( - np.isclose( - hist_from_path.left_percentile(10.0), - hist_from_path.right_percentile(90.0), - ) - ), "10% from the left is not the same as 90% from the right" - assert ( - hist_from_str.kwargs["label"] == "beta | mega scan | 100 | ext" - ), "Label extraction did not work" - assert ( - hist_from_path.kwargs["label"] == custom_label - ), "Keyword override did not work" - - -def test_inverted_histogram_cls(beta_samples: str): - """Make sure the histogram data container works as intended.""" - str_filename = beta_samples - path_filename = Path(str_filename) - custom_label = "Lorem ipsum" - - hist_from_str = Histogram.from_hdf5(filename=str_filename, dataname="beta") - hist_from_path = Histogram.from_hdf5( - filename=path_filename, - dataname="beta", - scale=-100.0, - offset=100.0, - label=custom_label, - ) - - assert np.all( - np.isclose(100.0 - hist_from_str.values, hist_from_path.values) - ), "Scaling and offsetting of data does not work correclty" - assert np.all( - np.isclose( - hist_from_str.left_percentile(50.0), - hist_from_str.right_percentile(50.0), - ) - ), "50% percentiles should be the same from the left and from the right." - assert np.all( - np.isclose( - hist_from_path.left_percentile(10.0), - hist_from_path.right_percentile(90.0), - ) - ), "10% from the left is not the same as 90% from the right" - assert ( - hist_from_str.kwargs["label"] == "beta | mega scan | 100 | ext" - ), "Label extraction did not work" - assert ( - hist_from_path.kwargs["label"] == custom_label - ), "Keyword override did not work" - - -def test_posterior_cls(beta_samples: str): - """Test the container class for Beta posteriors.""" - str_filename = beta_samples - path_filename = Path(str_filename) - non_existent_filename = "non_existent.hdf5" - custom_label = "Lorem ipsum" - x_10 = np.linspace(0.0, 10.0, 100) - x_100 = np.linspace(0.0, 100.0, 100) - - post_from_str = BetaPosterior.from_hdf5(filename=str_filename, dataname="beta") - post_from_path = BetaPosterior.from_hdf5( - filename=path_filename, - dataname="beta", - scale=10.0, - label=custom_label, - ) - - with pytest.raises(FileNotFoundError): - BetaPosterior.from_hdf5( - filename=non_existent_filename, dataname="does_not_matter" - ) - - assert ( - post_from_str.num_success == post_from_path.num_success == 20 - ), "Number of successes not correctly extracted" - assert ( - post_from_str.num_total == post_from_path.num_total == 40 - ), "Total number of trials not correctly extracted" - assert ( - post_from_str.num_fail == post_from_path.num_fail == 20 - ), "Number of failures not correctly computed" - assert np.all( - np.isclose( - 10 * post_from_str.pdf(x_100), - post_from_path.pdf(x_10), - ) - ), "PDFs with different scaling do not match" - assert np.all( - np.isclose( - post_from_str.left_percentile(50.0), - post_from_str.right_percentile(50.0), - ) - ), "50% percentiles should be the same from the left and from the right." - assert np.all( - np.isclose( - post_from_path.left_percentile(10.0), - post_from_path.right_percentile(90.0), - ) - ), "10% from the left is not the same as 90% from the right" - - -@pytest.mark.mpl_image_compare -def test_draw(beta_samples: str): - """Check the drawing function.""" - filename = Path(beta_samples) - dataname = "beta" - hist = Histogram.from_hdf5(filename, dataname) - post = BetaPosterior.from_hdf5(filename, dataname) - fig, ax = plt.subplots() - ax = draw(axes=ax, contents=[hist, post], percent_lims=(2.0, 2.0)) - return fig - - -def test_draw_hist_kwargs(beta_samples: str): - """Make sure the `hist_kwargs` can override the defaults.""" - filename = Path(beta_samples) - dataname = "beta" - - hist = Histogram.from_hdf5(filename, dataname) - default_kwargs_path = "./tests/plot/results/default_kwargs" - fig, default_kwargs_ax = plt.subplots() - default_kwargs_ax = draw(default_kwargs_ax, contents=[hist]) - save_figure(default_kwargs_path, fig, ["png"]) - - bins_kwargs_path = "./tests/plot/results/bins_kwargs" - fig, bins_kwargs_ax = plt.subplots() - bins_kwargs_ax = draw(bins_kwargs_ax, contents=[hist], hist_kwargs={"bins": 70}) - save_figure(bins_kwargs_path, fig, ["png"]) - - global_kwargs_path = "./tests/plot/results/global_kwargs" - fig, global_kwargs_ax = plt.subplots() - global_kwargs_ax = draw( - global_kwargs_ax, contents=[hist], hist_kwargs={"alpha": 0.3} - ) - save_figure(global_kwargs_path, fig, ["png"]) - - hist = Histogram.from_hdf5(filename, dataname, alpha=0.3) - local_kwargs_path = "./tests/plot/results/local_kwargs" - fig, local_kwargs_ax = plt.subplots() - local_kwargs_ax = draw(local_kwargs_ax, contents=[hist], hist_kwargs={"alpha": 1.0}) - save_figure(local_kwargs_path, fig, ["png"]) - - assert ( - mpl_comp.compare_images( - expected=default_kwargs_path + ".png", - actual=bins_kwargs_path + ".png", - tol=0.001, - ) - is not None - ), "Changing bin number did not result in different plot" - - assert ( - mpl_comp.compare_images( - expected=default_kwargs_path + ".png", - actual=global_kwargs_path + ".png", - tol=0.001, - ) - is not None - ), "Changing global kwargs in `draw` did not result in different plot" - - assert ( - mpl_comp.compare_images( - expected=local_kwargs_path + ".png", - actual=global_kwargs_path + ".png", - tol=0.001, - ) - is None - ), "Overriding global with `Histogram` specific kwargs did not work" - - -def test_save_figure(capsys): - """Check that figures get stored correctly.""" - x = np.linspace(0.0, 2 * np.pi, 200) - y = np.sin(x) - fig, ax = plt.subplots(figsize=get_size()) - ax.plot(x, y) - output_path = "./tests/plot/results/sine" - formats = ["png", "svg"] - - save_figure(output_path, fig, formats) - - assert ( - mpl_comp.compare_images( - expected="./tests/plot/baseline/sine.png", - actual="./tests/plot/results/sine.png", - tol=0.0, - ) - is None - ), "PNG of figure was not stored correctly." - - # Commented out, because I recently got the following message from matplotlib: - # `SKIPPED (Don't know how to convert .svg files to png)` - # So, I am commenting out this test for now. - - # assert mpl_comp.compare_images( - # expected="./tests/plot/baseline/sine.svg", - # actual="./tests/plot/results/sine.svg", - # tol=0., - # ) is None, "SVG of figure was not stored correctly." - - # assert save_figure_capture.out == expected_output, ( - # "The output during the save figure procedure was wrong." - # ) diff --git a/tests/run_doctests.py b/tests/run_doctests.py deleted file mode 100644 index 241943e..0000000 --- a/tests/run_doctests.py +++ /dev/null @@ -1,12 +0,0 @@ -"""Script to run doctests in the modules of `lyscripts`.""" - -import doctest - -from lyscripts import plots, utils -from lyscripts.compute import prevalences, risks - -if __name__ == "__main__": - doctest.testmod(utils, verbose=True) - doctest.testmod(plots, verbose=True) - doctest.testmod(prevalences, verbose=True) - doctest.testmod(risks, verbose=True) diff --git a/tests/test.yaml b/tests/test.yaml deleted file mode 100644 index 552e4e2..0000000 --- a/tests/test.yaml +++ /dev/null @@ -1 +0,0 @@ -test: This is just for testing diff --git a/tests/test_backend.hdf5 b/tests/test_backend.hdf5 deleted file mode 100644 index 6312b3d..0000000 Binary files a/tests/test_backend.hdf5 and /dev/null differ diff --git a/tests/test_data.csv b/tests/test_data.csv deleted file mode 100644 index 3fbd2a1..0000000 --- a/tests/test_data.csv +++ /dev/null @@ -1,23 +0,0 @@ -synth_CT,synth_CT,synth_CT,tumor -ipsi,ipsi,ipsi,1 -a,b,c,t_stage -False,True,True,late -False,True,True,early -True,True,False,early -False,False,True,late -True,True,False,early -True,False,True,early -False,True,True,early -False,False,True,early -False,False,True,early -False,True,True,early -False,False,True,early -True,False,True,early -True,False,True,early -False,True,True,early -True,True,False,early -False,False,False,early -True,False,False,early -True,True,True,early -False,False,True,early -False,True,True,early diff --git a/tests/test_params_v0.yaml b/tests/test_params_v0.yaml deleted file mode 100644 index 492e0cb..0000000 --- a/tests/test_params_v0.yaml +++ /dev/null @@ -1,69 +0,0 @@ -######################################################################################## -# This dictionary defines which LNLs are present in the -# graph and how they are connected. In this case, the -# primary tumors spreads to the LNLs II, III and IV. -# Subsequently, level II spreads to III and III to IV. -graph: - tumor: - T: [a, b, c] - lnl: - a: [b, c] - b: [c] - c: [] - -######################################################################################## -# Below are some parameters necessary for the model setup: -model: - first_binom_prob: 0.3 # defines shape of time prior of first T-stage - max_t: 10 # max number of time steps to evolve system - t_stages: [early, late] # T-stages to consider in the data - class: MidlineBilateral # model class to use (see `lymph-model` docs) - # keyword arguments to pass to the model constructor - kwargs: - base_symmetric: false - trans_symmetric: true - use_mixing: true - -######################################################################################## -# Choose how to create the synthetic dataset. The currently set values -# reflect what one can see in the data (e.g. on https://lyprox.org) -synthetic: - t_stages_dist: - early: 0.6 # 60% of synethetic patients are early T-stage... - late: 0.4 # ...and 40% late T-stage - midline_ext_prob: 0.3 # create 30% of patients with midline extension - - # Use these made-up specificity & sensitivity values - # to create the fake observations - modalities: - synth_CT: [0.85, 0.85] - -######################################################################################## -# Under this key we define the specficity and sensitivity for a range -# of diagnostic modalities. They are taken from literature and during -# sampling, everything that's not commented our here will be used for -# inference. -modalities: - synth_CT: [0.87, 0.83] - -# this lists the above defined modalities that will be used for -# inference and evaluation. -# The wird nested way of writing them down here is so that DVC can unpack the dict -# directly into the command. -inference_modalities: - modalities: - - synth_CT - -######################################################################################## -# This defines the sampler settings and the thermodynamic integration path -sampling: - walkers_per_dim: 5 # num of parallel walkers per parameter space dimension - burnin: 10 # burn-in steps to discard - nsteps: 10 # do this many serious steps per sampling round - thin_by: 1 # draw this many samples for one step in `nsteps` - # only used when sampling until convergence - kwargs: - max_steps: 10 - check_interval: 20 - trust_threshold: 30.0 - rel_acor_threshold: 0.075 diff --git a/tests/test_params_v1.yaml b/tests/test_params_v1.yaml deleted file mode 100644 index 913dd8c..0000000 --- a/tests/test_params_v1.yaml +++ /dev/null @@ -1,50 +0,0 @@ -version: 1 - -######################################################################################## -# This dictionary defines which LNLs are present in the -# graph and how they are connected. In this case, the -# primary tumors spreads to the LNLs II, III and IV. -# Subsequently, level II spreads to III and III to IV. -graph: - tumor: - T: [a, b, c] - lnl: - a: [b, c] - b: [c] - c: [] - -######################################################################################## -# Below are some parameters necessary for the model setup: -model: - class_name: Midline - constructor: binary - max_time: 10 - kwargs: - is_symmetric: - tumor_spread: false - lnl_spread: true - use_mixing: true - use_central: false - use_midext_evo: true - -# marginalize over diagnose times. Keys are T-stages -distributions: - early: - kind: frozen - func: binomial - params: {p: 0.3} - late: - kind: parametric - func: binomial - params: {p: 0.3} - -# diagnostic modalities and their sensitivity/specificity values -modalities: - CT: - spec: 0.76 - sens: 0.81 - kind: clinical - FNA: - spec: 0.98 - sens: 0.80 - kind: pathological diff --git a/tests/test_sample_params.yaml b/tests/test_sample_params.yaml deleted file mode 100644 index ff917d3..0000000 --- a/tests/test_sample_params.yaml +++ /dev/null @@ -1,27 +0,0 @@ -version: 1 - -# graph -graph: - tumor: - T: [II, III] - lnl: - II: [III] - III: [] - -# model setup -model: - class: Unilateral - distributions: # marginalize over diagnose times. Keys are T-stages - early: - frozen: binomial - max_time: 10 - kwargs: {p: 0.3} - late: - parametric: binomial - max_time: 10 - modalities: [CT, FNA] # List of modalities, defined further down - -# definition of sensitivities and specificities -modalities: - CT: [0.76, 0.81] - FNA: [0.98, 0.80] diff --git a/tests/utils_test.py b/tests/utils_test.py deleted file mode 100644 index ebeaad1..0000000 --- a/tests/utils_test.py +++ /dev/null @@ -1,90 +0,0 @@ -"""Test the core utility functions of the package.""" - -from pathlib import Path -from typing import Any - -import pytest -import yaml -from pydantic import TypeAdapter - -from lyscripts.configs import DeprecatedModelConfig, DistributionConfig, ModelConfig -from lyscripts.utils import ( - flatten, - get_modalities_subset, -) - - -def test_flatten(): - """Check if the dictionary flattening works.""" - nested = { - "A": {"a": 1, "b": 2, "c": 3}, - "B": {"a": 4, "b": 5, "c": 6}, - "C": {"a": {"i": 7, "ii": 8}}, - } - exp_flattened = { - ("A", "a"): 1, - ("A", "b"): 2, - ("A", "c"): 3, - ("B", "a"): 4, - ("B", "b"): 5, - ("B", "c"): 6, - ("C", "a", "i"): 7, - ("C", "a", "ii"): 8, - } - - actual_flattened = flatten(nested) - assert actual_flattened == exp_flattened, "Dictionary was not flattened properly." - - -def test_get_modalities_subset(): - """Test the extraction of a modality subset.""" - modalities = { - "CT": [0.76, 0.81], - "MRI": [0.63, 0.86], - "PET": [0.79, 0.83], - "path": [1.0, 1.0], - } - selected = ["CT", "path"] - exp_subset = { - "CT": [0.76, 0.81], - "path": [1.0, 1.0], - } - - actual_subset = get_modalities_subset(modalities, selected) - assert actual_subset == exp_subset, "Extraction of modalities did not work." - - -@pytest.fixture -def v0_config() -> dict[str, Any]: - """Return a deprecated model configuration.""" - config_path = Path("tests/test_params_v0.yaml") - with open(config_path) as config_file: - return yaml.safe_load(config_file) - - -@pytest.fixture -def v1_config() -> dict[str, Any]: - """Return a deprecated model configuration.""" - config_path = Path("tests/test_params_v1.yaml") - with open(config_path) as config_file: - return yaml.safe_load(config_file) - - -def test_translate_deprecated_model_config( - v0_config: dict[str, Any], - v1_config: dict[str, Any], -): - """Test the translation of the deprecated model configuration.""" - adapter = TypeAdapter(dict[str | int, DistributionConfig]) - - old_model_config = DeprecatedModelConfig(**v0_config["model"]) - exp_model_config = ModelConfig(**v1_config["model"]) - exp_dist_configs = adapter.validate_python(v1_config["distributions"]) - - trans_model_config, trans_dist_configs = old_model_config.translate() - - assert ( # noqa - exp_model_config.model_dump(exclude="kwargs") - == trans_model_config.model_dump(exclude="kwargs") - ) - assert exp_dist_configs == trans_dist_configs