diff --git a/.chglog/CHANGELOG.tpl.md b/.chglog/CHANGELOG.tpl.md deleted file mode 100755 index 05e21ad..0000000 --- a/.chglog/CHANGELOG.tpl.md +++ /dev/null @@ -1,42 +0,0 @@ -{{ if .Versions -}} - -## [Unreleased] - -{{ if .Unreleased.CommitGroups -}} -{{ range .Unreleased.CommitGroups -}} -### {{ .Title }} -{{ range .Commits -}} -- {{ if .Scope }}**{{ .Scope }}:** {{ end }}{{ .Subject }} -{{ end }} -{{ end -}} -{{ end -}} -{{ end -}} - -{{ range .Versions }} - -## {{ if .Tag.Previous }}[{{ .Tag.Name }}]{{ else }}{{ .Tag.Name }}{{ end }} - {{ datetime "2006-01-02" .Tag.Date }} -{{ range .CommitGroups -}} -### {{ .Title }} -{{ range .Commits -}} -- {{ if .Scope }}**{{ .Scope }}:** {{ end }}{{ .Subject }} -{{ end }} -{{ end -}} - -{{- if .NoteGroups -}} -{{ range .NoteGroups -}} -### {{ .Title }} -{{ range .Notes }} -{{ .Body }} -{{ end }} -{{ end -}} -{{ end -}} -{{ end -}} - -{{- if .Versions }} -[Unreleased]: {{ .Info.RepositoryURL }}/compare/{{ $latest := index .Versions 0 }}{{ $latest.Tag.Name }}...HEAD -{{ range .Versions -}} -{{ if .Tag.Previous -}} -[{{ .Tag.Name }}]: {{ $.Info.RepositoryURL }}/compare/{{ .Tag.Previous.Name }}...{{ .Tag.Name }} -{{ end -}} -{{ end -}} -{{ end -}} diff --git a/.chglog/config.yml b/.chglog/config.yml deleted file mode 100755 index 0ea5514..0000000 --- a/.chglog/config.yml +++ /dev/null @@ -1,45 +0,0 @@ -style: github -template: CHANGELOG.tpl.md -info: - title: CHANGELOG - repository_url: https://github.com/lycosystem/lyscripts -options: - commits: - filters: - Type: - - feat - - fix - - perf - - refac - - docs - - chore - - test - issues: - prefix: - - # - refs: - actions: - - closes - - fixes - merges: - pattern: "^merge:" - reverts: - pattern: "^revert:" - commit_groups: - title_maps: - feat: Features - fix: Bug Fixes - perf: Performance Improvements - refac: Code Refactoring - docs: Documentation - chore: Maintenance - test: Testing - header: - pattern: "^(\\w*)(?:\\(([\\w\\$\\.\\-\\*\\s]*)\\))?\\:\\s(.*)$" - pattern_maps: - - Type - - Scope - - Subject - notes: - keywords: - - BREAKING CHANGE diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml deleted file mode 100644 index 3f47cab..0000000 --- a/.github/workflows/release.yml +++ /dev/null @@ -1,56 +0,0 @@ -name: Build - -on: - release: - types: [ created ] - - workflow_dispatch: - -jobs: - build: - name: Build package from source - runs-on: ubuntu-latest - - steps: - - uses: actions/checkout@v4 - with: - persist-credentials: false - fetch-depth: 0 - - name: Install Python 3 - uses: actions/setup-python@v5 - with: - python-version: '3.10' - - name: Install build tools - run: | - python3 -m pip install build --user - - name: Build package - run: | - python3 -m build - - name: Upload to CI runner - uses: actions/upload-artifact@v4 - with: - name: built-package - path: dist/ - - pypi-publish: - name: Publish built package on PyPI - runs-on: ubuntu-latest - needs: - - build - - # Specifying a GitHub environment is optional, but strongly encouraged - environment: - name: pypi - url: https://pypi.org/p/lyscripts - permissions: - # IMPORTANT: this permission is mandatory for Trusted Publishing - id-token: write - steps: - # retrieve your distributions here - - name: Download from CI runner - uses: actions/download-artifact@v4 - with: - name: built-package - path: dist/ - - name: Publish on PyPI - uses: pypa/gh-action-pypi-publish@release/v1 diff --git a/.github/workflows/testpypi.yml b/.github/workflows/testpypi.yml deleted file mode 100644 index 9029f11..0000000 --- a/.github/workflows/testpypi.yml +++ /dev/null @@ -1,59 +0,0 @@ -name: Test Build - -on: - push: - branches: [ main ] - pull_request: - branches: [ main ] - workflow_dispatch: - -jobs: - build: - name: Build package from source - runs-on: ubuntu-latest - - steps: - - uses: actions/checkout@v4 - with: - persist-credentials: false - fetch-depth: 0 - - name: Install Python 3 - uses: actions/setup-python@v5 - with: - python-version: '3.10' - - name: Install build tools - run: | - python3 -m pip install build --user - - name: Build package - run: | - python3 -m build - - name: Upload to CI runner - uses: actions/upload-artifact@v4 - with: - name: built-package - path: dist/ - - testpypi-publish: - name: Publish built package on TestPyPI - runs-on: ubuntu-latest - needs: - - build - - # Specifying a GitHub environment is optional, but strongly encouraged - environment: - name: testpypi - url: https://test.pypi.org/p/lyscripts - permissions: - # IMPORTANT: this permission is mandatory for Trusted Publishing - id-token: write - steps: - # retrieve your distributions here - - name: Download from CI runner - uses: actions/download-artifact@v4 - with: - name: built-package - path: dist/ - - name: Publish on PyPI - uses: pypa/gh-action-pypi-publish@release/v1 - with: - repository-url: https://test.pypi.org/legacy/ diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml deleted file mode 100644 index 84ebe00..0000000 --- a/.github/workflows/tests.yml +++ /dev/null @@ -1,56 +0,0 @@ -name: tests - -on: - push: - branches: [ main ] - pull_request: - branches: [ main ] - - workflow_dispatch: - -jobs: - tests: - name: Run tests & report coverage - runs-on: ubuntu-latest - permissions: - pull-requests: write - contents: write - steps: - - uses: actions/checkout@v4 - - name: Set up Python - uses: actions/setup-python@v5 - with: - python-version: "3.10" - - - name: Install dependencies - run: | - python -m pip install --upgrade pip - python -m pip install .[tests] - - # Below, we first run pytest in the `tests/` folder. Because we use a `src` - # layout, this will fail if the package is not installed correctly. - - name: Test package is installable - run: pytest --cov=lyscripts --cov-config=pyproject.toml tests - env: - COVERAGE_FILE: .coverage.is_installable - - # Now, we execute all doctests in the `src` tree. This will NOT run with - # the installed code, but it doesn't matter, because we already know it is - # installable from the step above. - - name: Run doctests - if: success() || failure() # run these even if previous step fails - run: pytest --cov=lyscripts --cov-config=pyproject.toml --doctest-modules src - env: - COVERAGE_FILE: .coverage.doctests - GITHUB_TOKEN: ${{ secrets.LYCOSYSTEM_READALL }} - - # Lastly, we collect all files that start with `.coverage` into one file and - # create a report either as a comment on the PR or in a separate branch if its - # a commit to the main branch. From that branch we can put badges and coverage - # reports into e.g. our main README.md - - name: Add coverage comment - if: success() || failure() # run these even if previous step fails - uses: py-cov-action/python-coverage-comment-action@v3 - with: - GITHUB_TOKEN: ${{ github.token }} - MERGE_COVERAGE_FILES: true diff --git a/.gitignore b/.gitignore deleted file mode 100644 index d737ce6..0000000 --- a/.gitignore +++ /dev/null @@ -1,137 +0,0 @@ -# VS Code -.vscode/ - -# Byte-compiled / optimized / DLL files -__pycache__/ -*.py[cod] -*$py.class - -# C extensions -*.so - -# Distribution / packaging -.Python -build/ -develop-eggs/ -dist/ -downloads/ -eggs/ -.eggs/ -lib/ -lib64/ -parts/ -sdist/ -var/ -wheels/ -pip-wheel-metadata/ -share/python-wheels/ -*.egg-info/ -.installed.cfg -*.egg -MANIFEST - -# PyInstaller -# Usually these files are written by a python script from a template -# before PyInstaller builds the exe, so as to inject date/other infos into it. -*.manifest -*.spec - -# Installer logs -pip-log.txt -pip-delete-this-directory.txt - -# Unit test / coverage reports -htmlcov/ -.tox/ -.nox/ -.coverage -.coverage.* -.cache -nosetests.xml -coverage.xml -*.cover -*.py,cover -.hypothesis/ -.pytest_cache/ -_version.py - -# Translations -*.mo -*.pot - -# Django stuff: -*.log -local_settings.py -db.sqlite3 -db.sqlite3-journal - -# Flask stuff: -instance/ -.webassets-cache - -# Scrapy stuff: -.scrapy - -# PyBuilder -target/ - -# Jupyter Notebook -.ipynb_checkpoints - -# IPython -profile_default/ -ipython_config.py - -# pyenv -.python-version - -# pipenv -# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. -# However, in case of collaboration, if having platform-specific dependencies or dependencies -# having no cross-platform support, pipenv may install dependencies that don't work, or not -# install all needed dependencies. -#Pipfile.lock - -# uv -# This could be tracked, but is not really necessary for library development -uv.lock - -# PEP 582; used by e.g. github.com/David-OConnor/pyflow -__pypackages__/ - -# Celery stuff -celerybeat-schedule -celerybeat.pid - -# SageMath parsed files -*.sage.py - -# Environments -.env -.venv -env/ -venv/ -ENV/ -env.bak/ -venv.bak/ - -# Spyder project settings -.spyderproject -.spyproject - -# Rope project settings -.ropeproject - -# mkdocs documentation -/site - -# mypy -.mypy_cache/ -.dmypy.json -dmypy.json - -# Pyre type checker -.pyre/ - -# Test result folders -/tests/plot/results diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml deleted file mode 100644 index 3b059e6..0000000 --- a/.pre-commit-config.yaml +++ /dev/null @@ -1,38 +0,0 @@ -default_install_hook_types: [pre-commit, commit-msg] - -repos: -- repo: https://github.com/pre-commit/pre-commit-hooks - rev: v5.0.0 - hooks: - - id: trailing-whitespace - - id: end-of-file-fixer - - id: check-toml - - id: check-yaml - - id: check-json -- repo: https://github.com/astral-sh/ruff-pre-commit - rev: v0.12.0 - hooks: - - id: ruff - args: [ --fix ] - - id: ruff-format -- repo: https://github.com/compilerla/conventional-pre-commit - rev: v4.2.0 - hooks: - - id: conventional-pre-commit - stages: [commit-msg] - args: - - build # changes of the build system or dependencies - - change # commit alters the implementation of an existing feature - - chore # technical or maintenance task not related to feature or user story - - ci # edits to the continuous integration scripts/configuration - - deprecate # a feature or functionality will be deprecated - - docs # add, update of revise the documentation - - feat # a new feature was implemented (bump MINOR version) - - fix # an issue or bug has been fixed (bump PATCH version) - - perf # performance improvements that do not alter existing behavior - - refac # update shuffles code around but does not alter functionality - - remove # a feature or functionality is removed - - style # source code is improved w.r.t. its code quality - - test # commits enhance or add to the test suite - - merge # merge one branch into another. Should be ignored by git-chglog - - revert # revert previous commit(s). Should be ignored by git-chglog diff --git a/.readthedocs.yml b/.readthedocs.yml deleted file mode 100644 index bb033d9..0000000 --- a/.readthedocs.yml +++ /dev/null @@ -1,25 +0,0 @@ -# .readthedocs.yml -# Read the Docs configuration file -# See https://docs.readthedocs.io/en/stable/config-file/v2.html for details - -# Required -version: 2 - -build: - os: ubuntu-22.04 - tools: - python: "3.10" - -# Build documentation in the docs/ directory with Sphinx -sphinx: - builder: html - configuration: docs/source/conf.py - fail_on_warning: false - -# tell RTD to install the package with the docs optional requirements -python: - install: - - method: pip - path: . - extra_requirements: - - docs diff --git a/CHANGELOG.md b/CHANGELOG.md deleted file mode 100644 index 84abe31..0000000 --- a/CHANGELOG.md +++ /dev/null @@ -1,999 +0,0 @@ -# Changelog - -All notable changes to this project will be documented in this file. - -## [1.0.2] - 2026-04-08 - -### Bug Fixes - -- Correct TI evaluation and leave out uncertainty reporting. Fixes [#81]. - -### Miscellaneous Tasks - -- Change email addresses. - -## [1.0.1] - 2025-09-10 - -### Bug Fixes - -- Convert dtypes during joining using lydata's `cast_dtypes()`. - -## [1.0.0] - 2025-09-04 - -### Bug Fixes - -- Change `info` to `core` for mid-level lydata columns -- Use bug-fixed lydata `.ly.enhance()` method. - -### Documentation - -- Update documentation for `integrate` and `evidence` commands. -- Improve `data collect` description. - -### Features - -- Add `integrate` command for thermodynamic integration. Thanks [@noemibuehrer]! -- Add command spawning websever for interactive data collection. - -### Miscellaneous Tasks - -- Add missing links to changelog. -- Add CITATION.cff. - -### Testing - -- Update tests for new lydata. - -### Build - -- Add uvicorn, fastapi to deps. -- Require at least lydata 0.4.0. - -### Change - -- Make compatible with new lyDATA version. -- Centralize inverse temperature schedule generation. -- Store selected log-level globally. -- Disable properties in collector. - -## [1.0.0rc3] - 2025-07-22 - -### Documentation - -- Fix `join` command's example call. - -### Features - -- Add `data fetch` command. Fixes [#75]. - -### Change - -- Access data only via lydata for compatibility.\ - We have changed the lydata 2nd level headers slightly for the - patient and tumor info (see - https://github.com/lycosystem/lydata/issues/21 for more info).\ - Since the lydata package was already updated to be compatible with - that change, we simply need to route every access the lyscripts - make to the data through lydata package and hence be compatible - too. -- Make CLI work with new lydata format.\ - This is also related to https://github.com/lycosystem/lydata/issues/21 - -## [1.0.0rc2] - 2025-06-26 - -### Bug Fixes - -- Divide by midext prevalence. Fixes [#72].\ - This fixes a bug we reintroduced where we didn't compute observed and - model prevalence in an analogous and comparable way. - -### Documentation - -- Fix build badge in README. -- Fix outdated links to rmnldwg. - -### Miscellaneous Tasks - -- Update pre-commit & ruff rules. - -### Testing - -- Fix the dataset used for testing prevalences. - -### Build - -- Switch to `src` layout. Fixes [#74]. - -### Ci - -- Use tests action with coverage. - -## [1.0.0rc1] - 2025-05-27 - -### Bug Fixes - -- Use 'fork' start method under MacOS. -- Specify config file encoding for other OSes. -- Cast scenario pattern to bool if possible. Fixes [#70].\ - Since we allow defining a pattern with keywords like `"involved"` or `1` - instead of only `True`, we also need to make sure that is correctly cast - to its boolean value for lydata's `C` objects. - -### Documentation - -- Add warning for Windows & MacOS regarding multiprocess(ing). - -### Miscellaneous Tasks - -- Add link to changelog. - -### Testing - -- Ensure observed prevalence is correct. Related [#70]. - -### Build - -- Exclude buggy pydantic-settings. is present in -version 2.9.0 and 2.9.1 of pydantic-settings. So, these versions must be -excluded. - -### Ci - -- Update release scripts to use [OIDC](https://docs.pypi.org/trusted-publishers/). This is more secure. - -### Remove - -- Unnecessary custom help formatter. - -## [1.0.0.a7] - 2025-04-15 - -### Bug Fixes - -- Make config -> model -> config round trip test pass. -- Remove `thin_by` factor in wrong place. -- Pass involvement & diagnosis correctly to risks and prevalences. - -### Features - -- Create Modality config from model. -- Create Graph config from model. -- Create Model config from model. - -Note that the three features above come with certain limitations. It is not possible -to export all aspects of a model to a configuration. Especially the distributions -over diagnosis times cannot be converted to a `DistributionConfig`. - -### Miscellaneous Tasks - -- Fix changelog version link. -- Update ly schema. - -### Testing - -- Round trip config -> model -> config. - -### Merge - -- Branch 'main' into 'dev'. -- Branch 'configs-from-model' into 'dev'.\ - Constructing configs from models is only partially possible with the - current implementation of the `lymph` model. - -## [1.0.0.a6] - 2025-03-12 - -### Bug Fixes - -- Better handle midline model. \ - This means disabling the evolution over midline extension. Also, since the new - version of `lymph-model`, the `midext_prob` parameter is not epected to be the - first one anymore when passed to `set_params()`. -- Pass only ipsilateral diagnosis to unilateral model. -- Pass diagnose & involvement correctly to models as dict. - -### Testing - -- Ensure unilateral model receives correct diagnosis. -- Test that diagnosis is used correctly in posteriors. - -### Build - -- Bump lydata & lymph-model dependency. - -## [1.0.0.a5] - 2025-02-05 - -### Bug Fixes - -- Provide rich `console` to compute progress bars. -- Correctly build deprecated models. - -### Features - -- Enable sampling only named parameter subset. - -### Testing - -- Check model construction & named params. -- Update integration test config YAML files. -- Add external model symbol check. -- Adding dists works correctly.\ - Previously, it could happen that in a `Bilateral` or `Midline` model the - individual submodel's distributions where not synced. - -### Build - -- Bump lymph-model. - -### Change - -- Require every YAML file to have `version`. -- Better version-related error and docs. -- Make in-/output names more consistent. - -### Remove - -- Drop CLI required argument `version`. - -## [1.0.0.a4] - 2025-01-23 - -### Bug Fixes - -- Update data loading to new lydata API. -- Add sampling config back to sample CLI. -- Finish `data filter` command. -- Correctly log number of excluded pateints in `lyproxify`. -- Allow extra args in CLI cmds. -- Logging during progress bar. - -### Documentation - -- Deactivate help of removed commands. -- Link only to stable versions. -- Fix intersphinx links. -- Update link to schedule module. -- Configure how pydantic models are displayed. -- Add more info about schema. -- Better explain sampling. -- Add proper info to `cli_cmd()` methods. - -### Features - -- Add mandatory `version` field to command settings.\ - This will allow to differentiate between old and new configs and create - the models accordingly. -- Add translation of old to new model configs. -- Add dynamic YAML config source. -- Configure logging nicely. -- Update `data enhance` command. -- Update `data join` command. -- Update `data filter` command. -- Update `data split` command. -- Capture lydata logging output. -- Update YAML schema for CLIs. -- Update `data lyproxify` cmd. -- Allow sampling only specified params.\ - Via a new CLI arg named `param_names` one may restrict the parameters - sampled to a named subset. In combination with the fact that any Python - model may be loaded, this results in an enormously flexible sampler. -- Update inv temp `schedule` cmd. -- Allow providing start state to sampling func. - -### Testing - -- Replace subprocess calls with monkeypatch.\ - This allows for better debugging during test calls. -- Load generated data correctly.\ - The synthetic data for testing already has "early" and "late" as - T-stages. Thus, the mapping needed to be adapted. - -### Build - -- pydantic-settings >= 2.7 needed. - -### Change - -- Make `version` field in command settings required. -- Use pydantic for subcommands. -- Use loguru over default logging. -- Rename `data` field to `input`. -- Use pydantic for plot utils, too. -- Use rich logging handler. - -### Refac - -- Slightly change CLI inheritances. -- Sort configs alphabetically. -- Make sampling more reusable. - -### Remove - -- Unused utility functions. -- Plotting scripts except histogram/betaposterp helpers. - -## [1.0.0.a3] - 2024-11-15 - -### Bug Fixes - -- (**plot**) Turn off label by passing `None`. -- (**plot**) Don't fail on wrong wrong in `.draw()`. -- Polish dataclass configuration. -- Make pydantic work with subparsers. -- (**data**) Correct argument name of save function. -- (**comp**) Add distributions & fix dir type for priors. -- (**config**) Don't thin samples twice. - -### Documentation - -- (**configs**) Add to sphinx docs. -- Add intersphinx link to lydata. -- (**config**) Improve `construct_model` docstring. -- Correct copyright year. -- Clean up refs to deleted modules. -- Fix links to documentation in readme. - -### Features - -- (**config**) Write new methods to assemble model. -- (**plot**) Add func `split_legends()`. Related [#60].\ - This allows the user to separate many plot's labels into a number of - different legends. -- (**configs**) Add graph config. -- (**sample**) Better config management.\ - The new sample command can merge configurations from multiple YAML files - as well as the command line. It does a better job at reporting progress - when the log-level is set to debug and it more consistently stores - samples and metric logs during burn-in. -- (**sample**) Add JSON schema for params.\ - With this JSON schema one can get auto-completion in most popular - code editors for the configs of most commands. -- (**config**) Allow loading external model.\ - It is now possible to not only specify a model using a fixed YAML - schema, but also via a plain Python file that defines a global `model` - variable. -- (**sample**) Add inverse temperature.\ - With this addition, it is now in principle possible to do thermodynamic - integration. But it is not yet fully fleshed out for a nice user - experience. -- Add tiny script to generate JSON schema.\ - The script creates a JSON schema based on all CLI commands' settings. -- (**comp**) Rewrite posteriors command that uses pydantic and joblib -- (**comp**) Rewrite risks command that uses pydantic and joblib -- (**sample**) Add iterations/second column to burnin progress display. -- (**sample**) Show total iterations to sampling progress display. -- (**config**) Allow converting diagnosis to involvement. -- (**compute**) Rewrite prevalence command that uses pydantic and joblib - -### Miscellaneous Tasks - -- Bump pre-commit hooks. -- Add ruff linting rules. -- Switch to ruff, drop pycln & isort. -- Run ruff & clean up codebase. -- Ignore some ruff rules in tests dir. - -### Styling - -- More cleanup to satisfy ruff. -- Improve docstrings and code style a bit. - -### Testing - -- Add some basic testing for config. -- (**data**) Basic integration test for `generate`. -- Add sampling step to integration tests. -- (**config**) Check the external loading feature. -- Extend & unify integration test for priors. - -### Build - -- Remove upper cap in dependencies. -- Bump lymph-model to 1.2.3. -- Remove `dev` from optional dependencies.\ - This is because a lot of dev tools like ruff, pre-commit, ... are - installed globally (e.g. with pipx) instead of per venv. -- Bump `lydata` dependency to 0.2.0. - -### Change - -- (**plot**) Improve beta post & hist. Fixes [60].\ - The histograms and beta posteriors are now better implemented, allowing - a user to extend the `draw()` function's abilities by adding classes - similar to `Histogram` and `BetaPosterior`. -- (**plot**) More flexible labels. -- Use pydantic over dacite. -- Switch to pydantic config for sampling (WIP). -- (**compute**) Use pydantic & joblib over dataclasses and custom caching. -- โš  **BREAKING** (**compute**) Add priors joblib cache -- โš  **BREAKING** (**data**) Replace the `generate` cmd.\ - This was just supposed to be a little script to generate data for an - integration test, but it turns out that it could just be used to update - the old `generate` command. - BREAKING CHANGES: `generate` command is better configurable -- (**config**) Merge sample/sampling configs. -- Use lydata's `ModalityConfig`.\ - Since the [lydata](https://github.com/lycosystem/lydata) package is - evolving quickly, I added it as a dependency and moved the first bit of - code over there. -- Enable use of lydata to load patient data. -- (**comp**) HDF5 file storage more versatile. -- (**sample**) Store history in .tmp file.\ - This serves an indication about whether or not a burn-in sampling round - has been interrupted. The sampler may then continue from where it left off. -- โš  **BREAKING** (**compute**) Update prevalence computation. -- More useful YAML load/merge logging. -- Improve logging of some utilities. - -### Remove - -- Outdated streamlit app. -- Temporary test file. -- Delete remaining streamlit code. -- โš  **BREAKING** Unused HDF5 cache and scenarios.\ - Both these things are superseded by better stuff based on pydantic. - -## [1.0.0.a2] - 2024-04-28 - -### ๐Ÿš€ Features - -- *(sample)* Allow no multiprocessing (0 cores) -- *(scenario)* Add `from_dict` classmethod -- *(plot)* Add `offset` to hist and beta dist - -### ๐Ÿ› Bug Fixes - -- Use correct heaer rows, fixes [#57] -- *(compute)* Observe bilateral prevalence -- *(scenario)* Dataclass with prpoerty issue -- Ensure sides in scenario diagnose/involvement -- *(compute)* Correct obs and pred prevalences - -### ๐Ÿ“š Documentation - -- *(data)* Refactor lyproxify docstrings -- Update badge to link to RTD, fixes [#53] - -### ๐Ÿงช Testing - -- Fix testing setup & add new histogram test - -### Change - -- *(data)* Make copy before edit in-place -- *(scenario)* Make scenario dataclass - -### Merge - -- Branch '57-lyproxify-loads-wrong-number-of-header-rows' into 'dev' - -## [1.0.0.a1] - 2024-04-03 - -### ๐Ÿš€ Features - -- *(data)* Add simple data filter command, fixes [#51] -- Customize log handler for better filename -- *(sample)* Allow custom T-stage mapping -- *(sample)* Allow to load `side` data -- *(utils)* Allow `Unilateral.binary` in params -- *(sample)* Display time elapsed during burnin -- *(predict)* Add cmd to precompute state dists -- *(post)* Start with posterior cmd (WIP) -- *(precompute)* Add `priors` cmd, related to [#54] -- *(precompute)* Work on posterior (WIP) -- *(utils)* Allow keywords in modalities def -- *(post)* Compute for multiple scenarios -- *(predict)* Update prevalences cmd (WIP) -- Add class for storing scenarios -- *(precompute)* Priors from list of scenarios -- *(predict)* Finish prevalences cmd -- *(data)* Implement custom pandas accessor -- *(scenario)* Track laterality as well -- *(compute)* Risk works with lymph v1, too, now - -### ๐Ÿ› Bug Fixes - -- [**breaking**] Use modern lydata cols for t_stage matching -- Wrong lnls in predict prevalences -- [**breaking**] Update prev prediction to new lymph API -- *(sample)* Match T-stage mapping with lymph API -- *(data)* Stop dtype change during `concat` -- *(sample)* Skip 0 iter convergence check -- *(sample)* Missing import -- *(sample)* Only pass `side` to unilateral model -- *(sample)* Display converged message nicely -- *(utils)* Correct default args for `get_chain()` -- Wrong posterior shape -- *(predict)* Even out some bugs -- Don't pycln accessor import -- *(data)* Enhance failed due to copy on write - -### ๐Ÿ“š Documentation - -- Start with basic sphinx setup -- Start organizing top-level cmds with sphinx -- Include all modules in docs -- Update docstrings to reST format -- Add document files for precompute subcmd -- Allow links to lymph docs -- Fix `temp_schedule` docstring -- Shorten titles -- *(predict)* Update prevalence module docstring -- Refactor docs for new `compute` subcommand -- Fix typos and missing modules - -### ๐Ÿงช Testing - -- *(data)* Ensure new joining works correctly -- *(sample)* Check some sampling methods -- Fix typos in tests -- Update failing tests - -### โš™๏ธ Miscellaneous Tasks - -- Add readthedocs config file -- Remove pdoc action -- Update changelog - -### Build - -- Bump lymph-model to v1.0 -- Bump lymph version & add sphinx deps - -### Change - -- *(sample)* [**breaking**] Start on new sample command (WIP) -- *(sample)* [**breaking**] Reimplement sampling command -- *(precompute)* Use HDF5 cache -- *(precompute)* Make recursive. Related: [#54] -- *(prevs)* Start on updated `prevalences` cmd -- Replace 'diagnose' & bump lymph to 1.2.0 -- Simplify scenario handling (WIP) -- *(precompute)* Posteriors only from priors -- *(scenario)* Shorten hash to 6 digits - -### Merge - -- Branch 'main' into 'dev' -- Branch '51-filter-command' into 'dev' -- Branch '53-use-sphinx-for-documentation' into 'dev' -- Branch '54-add-precompute-commands' into 'dev' - -### Refac - -- *(sample)* Better progress tracking -- *(precompute)* Comp state dist in own submod -- *(utils)* Move funcs out of `precompute` -- Bundle adding scneario args to parser -- *(compute)* Predict & precompute -> compute - -### Remove - -- [**breaking**] Midline_ext in create_patient_row for now - -## [1.0.0.a0] - 2023-12-20 - -### Bug Fixes - -- Update imports to new lymph version -- Remove references to `clean` command - -### Miscellaneous Tasks - -- [**breaking**] Lyprox to lymph convert not necessary anymore - -### Build - -- Bump lymph-model version to `>=1.0.0.a4` -- Bump type hints to Python 3.10 - -### Change - -- [**breaking**] `evaluate`: command does not depend on lymph model anymore -- Simplify `log_state()` decorator -- [**breaking**] Change model initialization in some places -- [**breaking**] Use deorated function name for `log_state()` message - -### Refac - -- [**breaking**] Deduplicate data loading functions: `load_csv_table()` was removed and `load_data_for_model()` renamed to `load_patient_data()` -- Change function names & remove logger from decorated function calls - -### Remove - -- [**breaking**] Delete unnecessary functions - -## [0.7.3] - 2023-08-29 - -### Bug Fixes - -- **data:** `enhance` command is now deterministic, fixes [#40] -- **plot:** correct color keyword arguments & swap arguments in `save_figure` function, fixes [#45] -- **sample:** use global numpy random state, fixes [#31] - -### Maintenance - -- fix upper version bound of lymph-model package - -### Testing - -- **sample:** add test for determinism of sampling, related to [#31] - -## [0.7.2] - 2023-07-31 - -### Bug Fixes - -- `enhance`: fix bug introduced in [0.7.1] - -## [0.7.1] - 2023-07-31 - -### Bug Fixes - -- `enhance`: negative sublevels don't overwrite superlevels anymore. Fixes [#44]. - -### Maintenance - -- bump pre-commit hooks - -## [0.7.0] - 2023-06-26 - -### Bug Fixes - -- add modalities from params in synthetic data generation - -### Features - -- add extensible & versatile logging decorator -- add `--log-level` option to top-level lyscripts command -- add log-level to `log_state` decorator - -### Other - -- all commands now use the logging library for status updates/ouputs. This fixes [#2]. - -## [0.6.9] - 2023-06-21 - -### Bug Fixes - -- change the indentation length in the generated markdown data documentation to 4 spaces. Fixes [#41]. - -## [0.6.8] - 2023-05-30 - -### Bug Fixes - -- flattening error in `lyproxify` -- more robust lyproxify working again - -### Documentation - -- add detail to docstring of `lyproxify` func - -### Features - -- add func to generate md docs from column map -- add two new dict modifying functions - -## [0.6.7] - 2023-05-23 - -### Bug Fixes - -- make flatten/unflatten funcs more consistent -- add `max_depth` option for `flatten` function -- bump isort version to avoid error - -### Features - -- add `unflatten` function - -## [0.6.6] - 2022-12-01 - -### Bug Fixes - -- pull another function out of a `rich` context, this time in the `join` command. Related to [#33]. - -## [0.6.5] - 2022-12-01 - -### Bug Fixes - -- swap arguments in the `save_figure` call of the `corner` command -- pull a function using [`rich`] to report its status out of an enclosing [`rich`] context. This fixes [#33]. - -## [0.6.4] - 2022-12-01 - -### Bug Fixes - -- `hist_kwargs` now overrides the default plot settings for `Histogram`. This fixes [#30] - -### Features - -- the `lyscripts sample` command now has an argument `--seed` with the aim of making sampling runs reproducible via a random number generator seed. However, it seems as if the [`emcee`] package does not properly support this as runs using the same seed still produce different results. Related to, but not resolving [#31]. - -## [0.6.3] - 2022-11-25 - -### Bug Fixes - -- `lyproxify`: apply re-indexing only *after* excluding patients -- fix `SettingWithCopyWarning` during re-indexing in `lyproxify` - -## [0.6.2] - 2022-11-25 - -### Bug Fixes - -- `lyproxify` cleans empty header cell names - -### Documentation - -- update lyproxify's `main` docstring -- improve `report_state` & `exclude_patients` documentation -- update top-level `lyproxify` help in README.md - -### Features - -- allow muting `report_state` decorator globally for a decorated function, while also allowing to override the verbosity per function call -- allow adding an index column during `lyproxify` -- add options to `lyproxify` for dropping rows and columns before starting transformation of raw data -- the `report_state` decorator can now be configured to exit the program when encountering an unexpected exception - -## [0.6.1] - 2022-11-24 - -### Features - -- add new command under `lyscripts data` to preprocess any raw data into a format that can be parsed by [LyProX]. Fixes [#25] - -## [0.6.0] - 2022-11-23 - -### Bug Fixes - -- display errors and stop, but don't reraise -- add & update main entry point for script use - -### Code Refactoring - -- use `lyscripts.utils` consistently across data commands -- use `lyscripts.utils` for `evaluate` script -- pull out method to compare prevalence for one sample -- write modular functions for loading YAML, CSV and HDF5 data -- make `lyscripts data join` command a bit more readable -- further modularize `lyscripts data ...` scripts -- standardize CSV saving process -- start to add `utils` for data commands -- put data commands in separate submodule, fixes [#5] (**BREAKING CHANGE!**) - -### Documentation - -- expand documentation on data, plot & predict subcommands -- enrich the module documentation of predict scripts -- update docstrings of data commands - -### Features - -- add YAML scenario output to prevalence app -- working version of prevalence app -- add prevalence plot to app -- allow constructing the `lyscripts.plot.utils.Histogram` and `lyscripts.plot.utils.Posterior` from plain data without HDF5 file (**BREAKING CHANGE!**) -- `lyscripts.temp_schedule` output does not have pretty border anymore, making copy & paste easier -- use generators for risk & prevalence prediction, fixes [#23] -- add more params widgets for prevalence app -- add t_stage, midline_ext, ... to prevalence app -- add `LyScriptsError` for passing up messages -- make smart decorators for status reporting -- implement setup of prevalence app -- start implementing streamlit apps - -### Testing - -- add GitHub action for tests -- fix missing import for corner doctests -- generally, the module is now partially covered by unit tests - -## [0.5.11] - 2022-11-06 - -### Bug Fixes - -- remove useless import & rename prediction `utils` -- histogram & respective posterior have same color -- fix two bugs detected during integration test: - 1. The custom enumerate with optional progress bar did not enumerate - 2. Function checking if midline extension matches did not work for some lymph classes -- fix wrong import in submodules `plot` & `predict` -- correct relative imports & remove unused functions -- fix small inconsistency in script call - -### Code Refactoring - -- use function for loading YAML in all scripts -- further modularize tasks, e.g. params loading -- rename test modules -- put function saving figures in separate utility -- replace common main tasks with `util` functions -- pull out function to extract model param labels -- make `utils` public and hence documented -- greatly simplify histogram plotting script -- pull shared functions into `_utils` -- update [`rich_argparse`] & add highlighting (fixes [#20]) -- outsource rich enumeration of predictions -- make prevalence prediction much cleaner -- risk & prevalence share method to clean pattern - -### Documentation - -- update help in predict's docstrings - -### Features - -- write decorators for general tasks -- add nice helper functions to plot (fixes [#21]) -- risk & prevalence can use thinned samples -- write neat recursive functions to flatten dictionaries - -### Testing - -- implement more `utils` tests -- add test to the `save_figure` utility -- add test to new params loader -- add small doctest to `get_size` plot utility -- add checks for plotting utils -- write simple tests for prevalence prediction -- add doctest & pytest for predict `utils` - -## [0.5.10] - 2022-10-13 - -### Bug Fixes - -- pick correct consensus method for enhancement ([#17]) -- sample does not crash when `pools` not given ([#16]) -- add thinning to convergence sampling, too ([#15]) - -### Documentation - -- fix typos & add favicon to docs - -## [0.5.9] - 2022-09-16 - -### Documentation - -- don't use relative path for social card - -### Features - -- `sample` command has a new optional argument `--pools` with which one can adjust the number of multiprocessing pools used during the sampling procedure. Fixes [#13] - -## [0.5.8] - 2022-09-12 - -### Bug Fixes - -- The function `get_midline_ext_prob` in the prevalence prediction now -does not throw an error anymore when unilateral data is provided, but -returns `None` instead. Fixes [#11] - -### Features - -- add entry points to CLI. This enables one to call `lyscripts ...` directly, instead of having to use `python -m lyscripts ...` all the time. - -### Documentation - -- add social card to README -- remove `python -m` prefix from command usage in docstrings - -## [0.5.7] - 2022-08-29 - -### Bug Fixes - -- fix `enhance`'s issue with varying LNLs across modalities ([#8]) - -### Features - -- add progress bar to `enhance` script - -## [0.5.6] - 2022-08-29 - -### Bug Fixes - -- can choose list of defined mods in params. This allows one to choose different lists of modalities for e.g. the `enhance` script and the `sampling` one. - -### Documentation - -- correct typos in the changed docstrings -- update docstring of changed scripts - -## [0.5.5] - 2022-08-25 - -### Bug Fixes - -- clean script was using deprecated lymph.utils. This script has now been incorporated into these scripts. - -### Documentation - -- update README and add docstrings about `enhance` - -### Features - -- add enhancement scipt that computes additional diagnostic modalities, combining existing ones. - -## [0.5.4] - 2022-08-24 - -### Documentation - -- add call signature to docs every subcommand's `main()` -- add badges, installation & usage to README -- fix pdoc issue with importing `__main__` files - -### Maintenance - -- make pyproject.toml look nice on PyPI -- tell git to ignore docs dir -- set up git-chglog for creating changelogs -- add pre-commit hook to check commit msg - -## [0.5.3] - 2022-08-22 - - -[1.0.2]: https://github.com/lycosystem/lyscripts/compare/1.0.1...1.0.2 -[1.0.1]: https://github.com/lycosystem/lyscripts/compare/1.0.0...1.0.1 -[1.0.0]: https://github.com/lycosystem/lyscripts/compare/1.0.0rc3...1.0.0 -[1.0.0rc3]: https://github.com/lycosystem/lyscripts/compare/1.0.0rc2...1.0.0rc3 -[1.0.0rc2]: https://github.com/lycosystem/lyscripts/compare/1.0.0rc1...1.0.0rc2 -[1.0.0rc1]: https://github.com/lycosystem/lyscripts/compare/1.0.0.a7...1.0.0rc1 -[1.0.0.a7]: https://github.com/lycosystem/lyscripts/compare/1.0.0.a6...1.0.0.a7 -[1.0.0.a6]: https://github.com/lycosystem/lyscripts/compare/1.0.0.a5...1.0.0.a6 -[1.0.0.a5]: https://github.com/lycosystem/lyscripts/compare/1.0.0.a4...1.0.0.a5 -[1.0.0.a4]: https://github.com/lycosystem/lyscripts/compare/1.0.0.a3...1.0.0.a4 -[1.0.0.a3]: https://github.com/lycosystem/lyscripts/compare/1.0.0.a2...1.0.0.a3 -[1.0.0.a2]: https://github.com/lycosystem/lyscripts/compare/1.0.0.a1...1.0.0.a2 -[1.0.0.a1]: https://github.com/lycosystem/lyscripts/compare/1.0.0.a0...1.0.0.a1 -[1.0.0.a0]: https://github.com/lycosystem/lyscripts/compare/0.7.3...1.0.0.a0 -[0.7.3]: https://github.com/lycosystem/lyscripts/compare/0.7.2...0.7.3 -[0.7.2]: https://github.com/lycosystem/lyscripts/compare/0.7.1...0.7.2 -[0.7.1]: https://github.com/lycosystem/lyscripts/compare/0.7.0...0.7.1 -[0.7.0]: https://github.com/lycosystem/lyscripts/compare/0.6.9...0.7.0 -[0.6.9]: https://github.com/lycosystem/lyscripts/compare/0.6.8...0.6.9 -[0.6.8]: https://github.com/lycosystem/lyscripts/compare/0.6.7...0.6.8 -[0.6.7]: https://github.com/lycosystem/lyscripts/compare/0.6.6...0.6.7 -[0.6.6]: https://github.com/lycosystem/lyscripts/compare/0.6.5...0.6.6 -[0.6.5]: https://github.com/lycosystem/lyscripts/compare/0.6.4...0.6.5 -[0.6.4]: https://github.com/lycosystem/lyscripts/compare/0.6.3...0.6.4 -[0.6.3]: https://github.com/lycosystem/lyscripts/compare/0.6.2...0.6.3 -[0.6.2]: https://github.com/lycosystem/lyscripts/compare/0.6.1...0.6.2 -[0.6.1]: https://github.com/lycosystem/lyscripts/compare/0.6.0...0.6.1 -[0.6.0]: https://github.com/lycosystem/lyscripts/compare/0.5.11...0.6.0 -[0.5.11]: https://github.com/lycosystem/lyscripts/compare/0.5.10...0.5.11 -[0.5.10]: https://github.com/lycosystem/lyscripts/compare/0.5.9...0.5.10 -[0.5.9]: https://github.com/lycosystem/lyscripts/compare/0.5.8...0.5.9 -[0.5.8]: https://github.com/lycosystem/lyscripts/compare/0.5.7...0.5.8 -[0.5.7]: https://github.com/lycosystem/lyscripts/compare/0.5.6...0.5.7 -[0.5.6]: https://github.com/lycosystem/lyscripts/compare/0.5.5...0.5.6 -[0.5.5]: https://github.com/lycosystem/lyscripts/compare/0.5.4...0.5.5 -[0.5.4]: https://github.com/lycosystem/lyscripts/compare/0.5.3...0.5.4 -[0.5.3]: https://github.com/lycosystem/lyscripts/compare/0.5.2...0.5.3 - -[#2]: https://github.com/lycosystem/lyscripts/issues/2 -[#5]: https://github.com/lycosystem/lyscripts/issues/5 -[#8]: https://github.com/lycosystem/lyscripts/issues/8 -[#11]: https://github.com/lycosystem/lyscripts/issues/11 -[#13]: https://github.com/lycosystem/lyscripts/issues/13 -[#15]: https://github.com/lycosystem/lyscripts/issues/15 -[#16]: https://github.com/lycosystem/lyscripts/issues/16 -[#17]: https://github.com/lycosystem/lyscripts/issues/17 -[#20]: https://github.com/lycosystem/lyscripts/issues/20 -[#21]: https://github.com/lycosystem/lyscripts/issues/21 -[#23]: https://github.com/lycosystem/lyscripts/issues/23 -[#25]: https://github.com/lycosystem/lyscripts/issues/25 -[#30]: https://github.com/lycosystem/lyscripts/issues/30 -[#31]: https://github.com/lycosystem/lyscripts/issues/31 -[#33]: https://github.com/lycosystem/lyscripts/issues/33 -[#40]: https://github.com/lycosystem/lyscripts/issues/40 -[#41]: https://github.com/lycosystem/lyscripts/issues/41 -[#44]: https://github.com/lycosystem/lyscripts/issues/44 -[#45]: https://github.com/lycosystem/lyscripts/issues/45 -[#51]: https://github.com/lycosystem/lyscripts/issues/51 -[#53]: https://github.com/lycosystem/lyscripts/issues/53 -[#54]: https://github.com/lycosystem/lyscripts/issues/54 -[#57]: https://github.com/lycosystem/lyscripts/issues/57 -[#70]: https://github.com/lycosystem/lyscripts/issues/70 -[#72]: https://github.com/lycosystem/lyscripts/issues/72 -[#74]: https://github.com/lycosystem/lyscripts/issues/74 -[#75]: https://github.com/lycosystem/lyscripts/issues/75 -[#81]: https://github.com/lycosystem/lyscripts/issues/81 - -[`emcee`]: https://emcee.readthedocs.io/en/stable/ -[`rich`]: https://rich.readthedocs.io/en/latest/ -[`rich_argparse`]: https://github.com/hamdanal/rich_argparse -[LyProX]: https://lyprox.org -[@noemibuehrer]: https://github.com/noemibuehrer diff --git a/CITATION.cff b/CITATION.cff deleted file mode 100755 index 70404a2..0000000 --- a/CITATION.cff +++ /dev/null @@ -1,25 +0,0 @@ -# This CITATION.cff file was generated with cffinit. -# Visit https://bit.ly/cffinit to generate yours today! - -cff-version: 1.2.0 -title: lyscripts -message: >- - If you use this software, please cite it using the - metadata from this file. -type: software -authors: - - given-names: Roman - family-names: Ludwig - orcid: 'https://orcid.org/0000-0001-9434-328X' - affiliation: University Hospital Zurich -repository-code: 'https://github.com/lycosystem/lyscripts' -url: 'https://lyscripts.readthedocs.io' -abstract: >- - Scripts for reproducible research on lymphatic tumor - progression in head and neck cancer. -keywords: - - cancer - - metastasis - - lymphatic system - - head and neck -license: MIT diff --git a/LICENSE b/LICENSE deleted file mode 100644 index f68adb1..0000000 --- a/LICENSE +++ /dev/null @@ -1,21 +0,0 @@ -MIT License - -Copyright (c) 2022 Roman Ludwig - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in all -copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -SOFTWARE. diff --git a/README.md b/README.md index e64323b..70b6bf9 100644 --- a/README.md +++ b/README.md @@ -1,72 +1,70 @@ -social card +# Repository Coverage + +[Full report](https://htmlpreview.github.io/?https://github.com/lycosystem/lyscripts/blob/python-coverage-comment-action-data/htmlcov/index.html) + +| Name | Stmts | Miss | Cover | Missing | +|------------------------------------------- | -------: | -------: | ------: | --------: | +| src/lyscripts/\_\_init\_\_.py | 30 | 7 | 77% |58-59, 67-73 | +| src/lyscripts/\_\_main\_\_.py | 3 | 3 | 0% | 3-6 | +| src/lyscripts/\_version.py | 11 | 0 | 100% | | +| src/lyscripts/cli.py | 45 | 26 | 42% |65-71, 85-91, 104-120 | +| src/lyscripts/compute/\_\_init\_\_.py | 5 | 1 | 80% | 22 | +| src/lyscripts/compute/\_\_main\_\_.py | 5 | 5 | 0% | 3-8 | +| src/lyscripts/compute/evidence.py | 65 | 41 | 37% |45, 60-61, 72-95, 124-192, 196-197 | +| src/lyscripts/compute/posteriors.py | 46 | 19 | 59% |97-137, 141-142 | +| src/lyscripts/compute/prevalences.py | 82 | 7 | 91% |60-62, 96-101, 235-236 | +| src/lyscripts/compute/priors.py | 35 | 2 | 94% | 110-111 | +| src/lyscripts/compute/risks.py | 51 | 33 | 35% |47-65, 81-135, 139-140 | +| src/lyscripts/compute/utils.py | 118 | 6 | 95% |95, 146, 177, 188, 240, 250 | +| src/lyscripts/configs.py | 280 | 41 | 85% |90, 122, 165, 173, 217, 271, 277-278, 286, 472-474, 482, 491-492, 527-536, 547, 581-584, 589, 660, 699-712, 755 | +| src/lyscripts/data/\_\_init\_\_.py | 6 | 1 | 83% | 53 | +| src/lyscripts/data/\_\_main\_\_.py | 18 | 18 | 0% | 3-36 | +| src/lyscripts/data/collect/\_\_init\_\_.py | 58 | 27 | 53% |56-58, 64, 76, 92-116, 137-144 | +| src/lyscripts/data/enhance.py | 19 | 6 | 68% |40-48, 52-53 | +| src/lyscripts/data/fetch.py | 21 | 7 | 67% |42-52, 56-57 | +| src/lyscripts/data/filter.py | 48 | 30 | 38% |43-66, 76-94, 98-99 | +| src/lyscripts/data/generate.py | 35 | 4 | 89% |58, 63, 95-96 | +| src/lyscripts/data/join.py | 22 | 10 | 55% |61-77, 81-82 | +| src/lyscripts/data/lyproxify.py | 123 | 67 | 46% |31-34, 39-46, 90-119, 132-142, 173, 250-282, 293-307, 340-341 | +| src/lyscripts/data/split.py | 29 | 14 | 52% |33-65, 72-73 | +| src/lyscripts/data/utils.py | 9 | 0 | 100% | | +| src/lyscripts/decorators.py | 41 | 4 | 90% | 53-55, 70 | +| src/lyscripts/evaluate.py | 70 | 52 | 26% |29-35, 43-70, 87, 102-103, 114-134, 139-197, 201-205 | +| src/lyscripts/integrate.py | 46 | 22 | 52% |46-53, 104-154, 162-163 | +| src/lyscripts/plots.py | 160 | 18 | 89% |46-47, 56, 185-186, 336, 341, 370-392, 399 | +| src/lyscripts/sample.py | 137 | 12 | 91% |35-36, 74, 132-135, 175, 191, 304, 425-426 | +| src/lyscripts/schedule.py | 11 | 5 | 55% |24-28, 32-33 | +| src/lyscripts/schema.py | 22 | 3 | 86% | 61-62, 66 | +| src/lyscripts/utils.py | 84 | 5 | 94% |25, 141-142, 196-197 | +| **TOTAL** | **1735** | **496** | **71%** | | + + +## Setup coverage badge + +Below are examples of the badges you can use in your main branch `README` file. + +### Direct image + +[![Coverage badge](https://raw.githubusercontent.com/lycosystem/lyscripts/python-coverage-comment-action-data/badge.svg)](https://htmlpreview.github.io/?https://github.com/lycosystem/lyscripts/blob/python-coverage-comment-action-data/htmlcov/index.html) + +This is the one to use if your repository is private or if you don't want to customize anything. + +### [Shields.io](https://shields.io) Json Endpoint -[![MIT license](https://img.shields.io/badge/license-MIT-blue.svg?style=flat)](https://github.com/lycosystem/lyscripts/blob/main/LICENSE) -[![GitHub repo](https://img.shields.io/badge/lycosystem%2Flymph-grey.svg?style=flat&logo=github)](https://github.com/lycosystem/lyscripts) -[![build badge](https://github.com/lycosystem/lyscripts/actions/workflows/release.yml/badge.svg?style=flat)](https://pypi.org/project/lyscripts/) -[![docs badge](https://readthedocs.org/projects/lyscripts/badge/?version=latest)](https://lyscripts.readthedocs.io/en/latest/?badge=latest) -[![tests badge](https://github.com/lycosystem/lyscripts/actions/workflows/tests.yml/badge.svg?style=flat)](https://lyscripts.readthedocs.io/en/latest/?badge=latest) [![Coverage badge](https://img.shields.io/endpoint?url=https://raw.githubusercontent.com/lycosystem/lyscripts/python-coverage-comment-action-data/endpoint.json)](https://htmlpreview.github.io/?https://github.com/lycosystem/lyscripts/blob/python-coverage-comment-action-data/htmlcov/index.html) -## What are these `lyscripts`? +Using this one will allow you to [customize](https://shields.io/endpoint) the look of your badge. +It won't work with private repositories. It won't be refreshed more than once per five minutes. -This package provides convenient scripts for performing inference and learning regarding the lymphatic spread of head & neck cancer. Essentially, it provides a *command line interface* (CLI) to the [lymph](https://github.com/lycosystem/lymph) library and the [lydata](https://github.com/lycosystem/lydata) repository that stores lymphatic progression data. +### [Shields.io](https://shields.io) Dynamic Badge -We are making these "convenience" scripts public, because doing so is one necessary requirement to making our research easily and fully reproducible. There exists another repository, [lynference](https://github.com/lycosystem/lynference), where we stored the pipelines that produced our published results in a persistent way. +[![Coverage badge](https://img.shields.io/badge/dynamic/json?color=brightgreen&label=coverage&query=%24.message&url=https%3A%2F%2Fraw.githubusercontent.com%2Flycosystem%2Flyscripts%2Fpython-coverage-comment-action-data%2Fendpoint.json)](https://htmlpreview.github.io/?https://github.com/lycosystem/lyscripts/blob/python-coverage-comment-action-data/htmlcov/index.html) -## Installation +This one will always be the same color. It won't work for private repos. I'm not even sure why we included it. -These scripts can be installed via `pip`: +## What is that? -```bash -pip install lyscripts -``` - -or installed from source by cloning this repo - -```bash -git clone https://github.com/lycosystem/lyscripts.git -cd lyscripts -pip install . -``` - -## Usage - -This package is intended to be mainly used as a collection of Python scripts that would be scattered throughout my projects, if I didn't bundle them here. Hence, they're mostly command line tools that do basic and repetitive stuff. - -### As a Command Line Tool - -Simply run - -``` -lyscripts --help -``` - -in your terminal to display the help text for the main command. It will list all subcommands that are avialable, which you can then also call with `lyscripts --help` to get more information on its use and the available arguments. - -For example, one subcommand is `lyscripts data collect`, which will launch a small web server that allows a user to enter patient records on lymphatic involvement in head and neck cancer one row at a time and construct a standardized CSV file from it. - - - -You can also refer to the [documentation] for a written-down version of all these help texts and even more context on how and why to use the provided commands. - -### As a Library - -Head over to the [documentation] for more information on the individual modules, classes, and functions that are implemented in this package. - -[documentation]: https://lyscripts.readthedocs.io - -### Configuration YAML Schema - -Most of the CLI commands allow passing a list of `--configs` in the form of YAML files. If for a particular CLI argument no value is passed directly, the program looks for the corresponding value in the merged YAML files (if multiple files are provided, later ones may overwrite earlier ones). - -For these YAML files we provide a unified schema containing all possible fields that any of the CLIs may accept. It is located at `schemas/ly.json` in this repository. So, one could configure e.g. VS Code to consider this schema for all `*.ly.yaml` files. Here is how that could look like in the JSON settings of VS Code: - -```json -{ - "yaml.schemas": { - "https://raw.githubusercontent.com/lycosystem/lyscripts/main/schemas/ly.json": "*.ly.yaml" - } -} -``` - -Subsequently, all files ending in `.ly.yaml` will have helpful autocompletion on the allowed/expected types available. +This branch is part of the +[python-coverage-comment-action](https://github.com/marketplace/actions/python-coverage-comment) +GitHub Action. All the files in this branch are automatically generated and may be +overwritten at any moment. \ No newline at end of file diff --git a/badge.svg b/badge.svg new file mode 100644 index 0000000..6f50e5c --- /dev/null +++ b/badge.svg @@ -0,0 +1 @@ +Coverage: 71%Coverage71% \ No newline at end of file diff --git a/data.json b/data.json new file mode 100644 index 0000000..ed45deb --- /dev/null +++ b/data.json @@ -0,0 +1 @@ +{"coverage": 71.41210374639769, "raw_data": {"meta": {"format": 3, "version": "7.13.5", "timestamp": "2026-04-08T14:17:37.564076", "branch_coverage": false, "show_contexts": false}, "files": {"src/lyscripts/__init__.py": {"executed_lines": [7, 8, 10, 11, 12, 13, 20, 21, 22, 23, 25, 26, 27, 28, 29, 33, 35, 38, 41, 45, 56, 61, 76], "summary": {"covered_lines": 23, "num_statements": 30, "percent_covered": 76.66666666666667, "percent_covered_display": "77", "missing_lines": 7, "excluded_lines": 0, "percent_statements_covered": 76.66666666666667, "percent_statements_covered_display": "77"}, "missing_lines": [58, 59, 67, 69, 70, 71, 73], "excluded_lines": [], "functions": {"LyscriptsCLI.__init__": {"executed_lines": [], "summary": {"covered_lines": 0, "num_statements": 2, "percent_covered": 0.0, "percent_covered_display": "0", "missing_lines": 2, "excluded_lines": 0, "percent_statements_covered": 0.0, "percent_statements_covered_display": "0"}, "missing_lines": [58, 59], "excluded_lines": [], "start_line": 56}, "LyscriptsCLI.cli_cmd": {"executed_lines": [], "summary": {"covered_lines": 0, "num_statements": 5, "percent_covered": 0.0, "percent_covered_display": "0", "missing_lines": 5, "excluded_lines": 0, "percent_statements_covered": 0.0, "percent_statements_covered_display": "0"}, "missing_lines": [67, 69, 70, 71, 73], "excluded_lines": [], "start_line": 61}, "": {"executed_lines": [7, 8, 10, 11, 12, 13, 20, 21, 22, 23, 25, 26, 27, 28, 29, 33, 35, 38, 41, 45, 56, 61, 76], "summary": {"covered_lines": 23, "num_statements": 23, "percent_covered": 100.0, "percent_covered_display": "100", "missing_lines": 0, "excluded_lines": 0, "percent_statements_covered": 100.0, "percent_statements_covered_display": "100"}, "missing_lines": [], "excluded_lines": [], "start_line": 1}}, "classes": {"LyscriptsCLI": {"executed_lines": [], "summary": {"covered_lines": 0, "num_statements": 7, "percent_covered": 0.0, "percent_covered_display": "0", "missing_lines": 7, "excluded_lines": 0, "percent_statements_covered": 0.0, "percent_statements_covered_display": "0"}, "missing_lines": [58, 59, 67, 69, 70, 71, 73], "excluded_lines": [], "start_line": 38}, "": {"executed_lines": [7, 8, 10, 11, 12, 13, 20, 21, 22, 23, 25, 26, 27, 28, 29, 33, 35, 38, 41, 45, 56, 61, 76], "summary": {"covered_lines": 23, "num_statements": 23, "percent_covered": 100.0, "percent_covered_display": "100", "missing_lines": 0, "excluded_lines": 0, "percent_statements_covered": 100.0, "percent_statements_covered_display": "100"}, "missing_lines": [], "excluded_lines": [], "start_line": 1}}}, "src/lyscripts/__main__.py": {"executed_lines": [], "summary": {"covered_lines": 0, "num_statements": 3, "percent_covered": 0.0, "percent_covered_display": "0", "missing_lines": 3, "excluded_lines": 0, "percent_statements_covered": 0.0, "percent_statements_covered_display": "0"}, "missing_lines": [3, 5, 6], "excluded_lines": [], "functions": {"": {"executed_lines": [], "summary": {"covered_lines": 0, "num_statements": 3, "percent_covered": 0.0, "percent_covered_display": "0", "missing_lines": 3, "excluded_lines": 0, "percent_statements_covered": 0.0, "percent_statements_covered_display": "0"}, "missing_lines": [3, 5, 6], "excluded_lines": [], "start_line": 1}}, "classes": {"": {"executed_lines": [], "summary": {"covered_lines": 0, "num_statements": 3, "percent_covered": 0.0, "percent_covered_display": "0", "missing_lines": 3, "excluded_lines": 0, "percent_statements_covered": 0.0, "percent_statements_covered_display": "0"}, "missing_lines": [3, 5, 6], "excluded_lines": [], "start_line": 1}}}, "src/lyscripts/_version.py": {"executed_lines": [3, 5, 14, 15, 16, 17, 18, 19, 21, 22, 24], "summary": {"covered_lines": 11, "num_statements": 11, "percent_covered": 100.0, "percent_covered_display": "100", "missing_lines": 0, "excluded_lines": 0, "percent_statements_covered": 100.0, "percent_statements_covered_display": "100"}, "missing_lines": [], "excluded_lines": [], "functions": {"": {"executed_lines": [3, 5, 14, 15, 16, 17, 18, 19, 21, 22, 24], "summary": {"covered_lines": 11, "num_statements": 11, "percent_covered": 100.0, "percent_covered_display": "100", "missing_lines": 0, "excluded_lines": 0, "percent_statements_covered": 100.0, "percent_statements_covered_display": "100"}, "missing_lines": [], "excluded_lines": [], "start_line": 1}}, "classes": {"": {"executed_lines": [3, 5, 14, 15, 16, 17, 18, 19, 21, 22, 24], "summary": {"covered_lines": 11, "num_statements": 11, "percent_covered": 100.0, "percent_covered_display": "100", "missing_lines": 0, "excluded_lines": 0, "percent_statements_covered": 100.0, "percent_statements_covered_display": "100"}, "missing_lines": [], "excluded_lines": [], "start_line": 1}}}, "src/lyscripts/cli.py": {"executed_lines": [11, 12, 13, 14, 16, 17, 18, 19, 20, 22, 25, 39, 41, 48, 50, 53, 74, 98, 101], "summary": {"covered_lines": 19, "num_statements": 45, "percent_covered": 42.22222222222222, "percent_covered_display": "42", "missing_lines": 26, "excluded_lines": 0, "percent_statements_covered": 42.22222222222222, "percent_statements_covered_display": "42"}, "missing_lines": [65, 66, 67, 68, 69, 71, 85, 86, 88, 89, 90, 91, 104, 105, 106, 107, 110, 111, 112, 113, 114, 115, 116, 117, 118, 120], "excluded_lines": [], "functions": {"assemble_main": {"executed_lines": [39, 50], "summary": {"covered_lines": 2, "num_statements": 2, "percent_covered": 100.0, "percent_covered_display": "100", "missing_lines": 0, "excluded_lines": 0, "percent_statements_covered": 100.0, "percent_statements_covered_display": "100"}, "missing_lines": [], "excluded_lines": [], "start_line": 25}, "assemble_main.main": {"executed_lines": [41, 48], "summary": {"covered_lines": 2, "num_statements": 2, "percent_covered": 100.0, "percent_covered_display": "100", "missing_lines": 0, "excluded_lines": 0, "percent_statements_covered": 100.0, "percent_statements_covered_display": "100"}, "missing_lines": [], "excluded_lines": [], "start_line": 39}, "somewhat_safely_get_loglevel": {"executed_lines": [], "summary": {"covered_lines": 0, "num_statements": 6, "percent_covered": 0.0, "percent_covered_display": "0", "missing_lines": 6, "excluded_lines": 0, "percent_statements_covered": 0.0, "percent_statements_covered_display": "0"}, "missing_lines": [65, 66, 67, 68, 69, 71], "excluded_lines": [], "start_line": 53}, "configure_logging": {"executed_lines": [], "summary": {"covered_lines": 0, "num_statements": 6, "percent_covered": 0.0, "percent_covered_display": "0", "missing_lines": 6, "excluded_lines": 0, "percent_statements_covered": 0.0, "percent_statements_covered_display": "0"}, "missing_lines": [85, 86, 88, 89, 90, 91], "excluded_lines": [], "start_line": 74}, "InterceptHandler.emit": {"executed_lines": [], "summary": {"covered_lines": 0, "num_statements": 14, "percent_covered": 0.0, "percent_covered_display": "0", "missing_lines": 14, "excluded_lines": 0, "percent_statements_covered": 0.0, "percent_statements_covered_display": "0"}, "missing_lines": [104, 105, 106, 107, 110, 111, 112, 113, 114, 115, 116, 117, 118, 120], "excluded_lines": [], "start_line": 101}, "": {"executed_lines": [11, 12, 13, 14, 16, 17, 18, 19, 20, 22, 25, 53, 74, 98, 101], "summary": {"covered_lines": 15, "num_statements": 15, "percent_covered": 100.0, "percent_covered_display": "100", "missing_lines": 0, "excluded_lines": 0, "percent_statements_covered": 100.0, "percent_statements_covered_display": "100"}, "missing_lines": [], "excluded_lines": [], "start_line": 1}}, "classes": {"InterceptHandler": {"executed_lines": [], "summary": {"covered_lines": 0, "num_statements": 14, "percent_covered": 0.0, "percent_covered_display": "0", "missing_lines": 14, "excluded_lines": 0, "percent_statements_covered": 0.0, "percent_statements_covered_display": "0"}, "missing_lines": [104, 105, 106, 107, 110, 111, 112, 113, 114, 115, 116, 117, 118, 120], "excluded_lines": [], "start_line": 98}, "": {"executed_lines": [11, 12, 13, 14, 16, 17, 18, 19, 20, 22, 25, 39, 41, 48, 50, 53, 74, 98, 101], "summary": {"covered_lines": 19, "num_statements": 31, "percent_covered": 61.29032258064516, "percent_covered_display": "61", "missing_lines": 12, "excluded_lines": 0, "percent_statements_covered": 61.29032258064516, "percent_statements_covered_display": "61"}, "missing_lines": [65, 66, 67, 68, 69, 71, 85, 86, 88, 89, 90, 91], "excluded_lines": [], "start_line": 1}}}, "src/lyscripts/compute/__init__.py": {"executed_lines": [6, 8, 11, 20], "summary": {"covered_lines": 4, "num_statements": 5, "percent_covered": 80.0, "percent_covered_display": "80", "missing_lines": 1, "excluded_lines": 0, "percent_statements_covered": 80.0, "percent_statements_covered_display": "80"}, "missing_lines": [22], "excluded_lines": [], "functions": {"ComputeCLI.cli_cmd": {"executed_lines": [], "summary": {"covered_lines": 0, "num_statements": 1, "percent_covered": 0.0, "percent_covered_display": "0", "missing_lines": 1, "excluded_lines": 0, "percent_statements_covered": 0.0, "percent_statements_covered_display": "0"}, "missing_lines": [22], "excluded_lines": [], "start_line": 20}, "": {"executed_lines": [6, 8, 11, 20], "summary": {"covered_lines": 4, "num_statements": 4, "percent_covered": 100.0, "percent_covered_display": "100", "missing_lines": 0, "excluded_lines": 0, "percent_statements_covered": 100.0, "percent_statements_covered_display": "100"}, "missing_lines": [], "excluded_lines": [], "start_line": 1}}, "classes": {"ComputeCLI": {"executed_lines": [], "summary": {"covered_lines": 0, "num_statements": 1, "percent_covered": 0.0, "percent_covered_display": "0", "missing_lines": 1, "excluded_lines": 0, "percent_statements_covered": 0.0, "percent_statements_covered_display": "0"}, "missing_lines": [22], "excluded_lines": [], "start_line": 11}, "": {"executed_lines": [6, 8, 11, 20], "summary": {"covered_lines": 4, "num_statements": 4, "percent_covered": 100.0, "percent_covered_display": "100", "missing_lines": 0, "excluded_lines": 0, "percent_statements_covered": 100.0, "percent_statements_covered_display": "100"}, "missing_lines": [], "excluded_lines": [], "start_line": 1}}}, "src/lyscripts/compute/__main__.py": {"executed_lines": [], "summary": {"covered_lines": 0, "num_statements": 5, "percent_covered": 0.0, "percent_covered_display": "0", "missing_lines": 5, "excluded_lines": 0, "percent_statements_covered": 0.0, "percent_statements_covered_display": "0"}, "missing_lines": [3, 4, 6, 7, 8], "excluded_lines": [], "functions": {"": {"executed_lines": [], "summary": {"covered_lines": 0, "num_statements": 5, "percent_covered": 0.0, "percent_covered_display": "0", "missing_lines": 5, "excluded_lines": 0, "percent_statements_covered": 0.0, "percent_statements_covered_display": "0"}, "missing_lines": [3, 4, 6, 7, 8], "excluded_lines": [], "start_line": 1}}, "classes": {"": {"executed_lines": [], "summary": {"covered_lines": 0, "num_statements": 5, "percent_covered": 0.0, "percent_covered_display": "0", "missing_lines": 5, "excluded_lines": 0, "percent_statements_covered": 0.0, "percent_statements_covered_display": "0"}, "missing_lines": [3, 4, 6, 7, 8], "excluded_lines": [], "start_line": 1}}}, "src/lyscripts/compute/evidence.py": {"executed_lines": [7, 9, 10, 12, 13, 14, 15, 16, 17, 18, 20, 21, 28, 31, 48, 64, 98, 101, 102, 103, 106, 110, 115, 195], "summary": {"covered_lines": 24, "num_statements": 65, "percent_covered": 36.92307692307692, "percent_covered_display": "37", "missing_lines": 41, "excluded_lines": 0, "percent_statements_covered": 36.92307692307692, "percent_statements_covered_display": "37"}, "missing_lines": [45, 60, 61, 72, 74, 75, 80, 81, 82, 84, 85, 90, 92, 93, 95, 124, 126, 128, 130, 132, 137, 138, 141, 142, 150, 156, 158, 168, 169, 172, 173, 178, 179, 181, 186, 187, 189, 190, 192, 196, 197], "excluded_lines": [], "functions": {"comp_bic": {"executed_lines": [], "summary": {"covered_lines": 0, "num_statements": 1, "percent_covered": 0.0, "percent_covered_display": "0", "missing_lines": 1, "excluded_lines": 0, "percent_statements_covered": 0.0, "percent_statements_covered_display": "0"}, "missing_lines": [45], "excluded_lines": [], "start_line": 31}, "compute_evidence": {"executed_lines": [], "summary": {"covered_lines": 0, "num_statements": 2, "percent_covered": 0.0, "percent_covered_display": "0", "missing_lines": 2, "excluded_lines": 0, "percent_statements_covered": 0.0, "percent_statements_covered_display": "0"}, "missing_lines": [60, 61], "excluded_lines": [], "start_line": 48}, "compute_ti_results": {"executed_lines": [], "summary": {"covered_lines": 0, "num_statements": 12, "percent_covered": 0.0, "percent_covered_display": "0", "missing_lines": 12, "excluded_lines": 0, "percent_statements_covered": 0.0, "percent_statements_covered_display": "0"}, "missing_lines": [72, 74, 75, 80, 81, 82, 84, 85, 90, 92, 93, 95], "excluded_lines": [], "start_line": 64}, "EvidenceCLI.cli_cmd": {"executed_lines": [], "summary": {"covered_lines": 0, "num_statements": 24, "percent_covered": 0.0, "percent_covered_display": "0", "missing_lines": 24, "excluded_lines": 0, "percent_statements_covered": 0.0, "percent_statements_covered_display": "0"}, "missing_lines": [124, 126, 128, 130, 132, 137, 138, 141, 142, 150, 156, 158, 168, 169, 172, 173, 178, 179, 181, 186, 187, 189, 190, 192], "excluded_lines": [], "start_line": 115}, "": {"executed_lines": [7, 9, 10, 12, 13, 14, 15, 16, 17, 18, 20, 21, 28, 31, 48, 64, 98, 101, 102, 103, 106, 110, 115, 195], "summary": {"covered_lines": 24, "num_statements": 26, "percent_covered": 92.3076923076923, "percent_covered_display": "92", "missing_lines": 2, "excluded_lines": 0, "percent_statements_covered": 92.3076923076923, "percent_statements_covered_display": "92"}, "missing_lines": [196, 197], "excluded_lines": [], "start_line": 1}}, "classes": {"EvidenceCLI": {"executed_lines": [], "summary": {"covered_lines": 0, "num_statements": 24, "percent_covered": 0.0, "percent_covered_display": "0", "missing_lines": 24, "excluded_lines": 0, "percent_statements_covered": 0.0, "percent_statements_covered_display": "0"}, "missing_lines": [124, 126, 128, 130, 132, 137, 138, 141, 142, 150, 156, 158, 168, 169, 172, 173, 178, 179, 181, 186, 187, 189, 190, 192], "excluded_lines": [], "start_line": 98}, "": {"executed_lines": [7, 9, 10, 12, 13, 14, 15, 16, 17, 18, 20, 21, 28, 31, 48, 64, 98, 101, 102, 103, 106, 110, 115, 195], "summary": {"covered_lines": 24, "num_statements": 41, "percent_covered": 58.53658536585366, "percent_covered_display": "59", "missing_lines": 17, "excluded_lines": 0, "percent_statements_covered": 58.53658536585366, "percent_statements_covered_display": "59"}, "missing_lines": [45, 60, 61, 72, 74, 75, 80, 81, 82, 84, 85, 90, 92, 93, 95, 196, 197], "excluded_lines": [], "start_line": 1}}}, "src/lyscripts/compute/posteriors.py": {"executed_lines": [9, 11, 12, 13, 14, 15, 17, 18, 19, 20, 29, 32, 51, 52, 53, 54, 55, 57, 58, 60, 66, 75, 78, 81, 87, 91, 140], "summary": {"covered_lines": 27, "num_statements": 46, "percent_covered": 58.69565217391305, "percent_covered_display": "59", "missing_lines": 19, "excluded_lines": 0, "percent_statements_covered": 58.69565217391305, "percent_statements_covered_display": "59"}, "missing_lines": [97, 99, 102, 104, 105, 106, 107, 109, 110, 111, 113, 122, 123, 125, 135, 136, 137, 141, 142], "excluded_lines": [], "functions": {"compute_posteriors": {"executed_lines": [51, 52, 53, 54, 55, 57, 58, 60, 66, 75], "summary": {"covered_lines": 10, "num_statements": 10, "percent_covered": 100.0, "percent_covered_display": "100", "missing_lines": 0, "excluded_lines": 0, "percent_statements_covered": 100.0, "percent_statements_covered_display": "100"}, "missing_lines": [], "excluded_lines": [], "start_line": 32}, "PosteriorsCLI.cli_cmd": {"executed_lines": [], "summary": {"covered_lines": 0, "num_statements": 17, "percent_covered": 0.0, "percent_covered_display": "0", "missing_lines": 17, "excluded_lines": 0, "percent_statements_covered": 0.0, "percent_statements_covered_display": "0"}, "missing_lines": [97, 99, 102, 104, 105, 106, 107, 109, 110, 111, 113, 122, 123, 125, 135, 136, 137], "excluded_lines": [], "start_line": 91}, "": {"executed_lines": [9, 11, 12, 13, 14, 15, 17, 18, 19, 20, 29, 32, 78, 81, 87, 91, 140], "summary": {"covered_lines": 17, "num_statements": 19, "percent_covered": 89.47368421052632, "percent_covered_display": "89", "missing_lines": 2, "excluded_lines": 0, "percent_statements_covered": 89.47368421052632, "percent_statements_covered_display": "89"}, "missing_lines": [141, 142], "excluded_lines": [], "start_line": 1}}, "classes": {"PosteriorsCLI": {"executed_lines": [], "summary": {"covered_lines": 0, "num_statements": 17, "percent_covered": 0.0, "percent_covered_display": "0", "missing_lines": 17, "excluded_lines": 0, "percent_statements_covered": 0.0, "percent_statements_covered_display": "0"}, "missing_lines": [97, 99, 102, 104, 105, 106, 107, 109, 110, 111, 113, 122, 123, 125, 135, 136, 137], "excluded_lines": [], "start_line": 78}, "": {"executed_lines": [9, 11, 12, 13, 14, 15, 17, 18, 19, 20, 29, 32, 51, 52, 53, 54, 55, 57, 58, 60, 66, 75, 78, 81, 87, 91, 140], "summary": {"covered_lines": 27, "num_statements": 29, "percent_covered": 93.10344827586206, "percent_covered_display": "93", "missing_lines": 2, "excluded_lines": 0, "percent_statements_covered": 93.10344827586206, "percent_statements_covered_display": "93"}, "missing_lines": [141, 142], "excluded_lines": [], "start_line": 1}}}, "src/lyscripts/compute/prevalences.py": {"executed_lines": [8, 9, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 23, 24, 25, 30, 42, 45, 56, 57, 59, 64, 65, 66, 68, 74, 75, 80, 81, 83, 89, 103, 105, 108, 110, 111, 112, 113, 114, 115, 116, 119, 143, 144, 145, 147, 148, 149, 151, 158, 164, 167, 173, 178, 180, 181, 184, 186, 187, 188, 189, 191, 192, 193, 195, 204, 205, 207, 217, 222, 223, 224, 225, 234], "summary": {"covered_lines": 75, "num_statements": 82, "percent_covered": 91.46341463414635, "percent_covered_display": "91", "missing_lines": 7, "excluded_lines": 0, "percent_statements_covered": 91.46341463414635, "percent_statements_covered_display": "91"}, "missing_lines": [60, 61, 62, 96, 101, 235, 236], "excluded_lines": [], "functions": {"compute_prevalences": {"executed_lines": [56, 57, 59, 64, 65, 66, 68, 74, 75, 80, 81, 83, 89, 103, 105], "summary": {"covered_lines": 15, "num_statements": 20, "percent_covered": 75.0, "percent_covered_display": "75", "missing_lines": 5, "excluded_lines": 0, "percent_statements_covered": 75.0, "percent_statements_covered_display": "75"}, "missing_lines": [60, 61, 62, 96, 101], "excluded_lines": [], "start_line": 45}, "generate_query_from_diagnosis": {"executed_lines": [110, 111, 112, 113, 114, 115, 116], "summary": {"covered_lines": 7, "num_statements": 7, "percent_covered": 100.0, "percent_covered_display": "100", "missing_lines": 0, "excluded_lines": 0, "percent_statements_covered": 100.0, "percent_statements_covered_display": "100"}, "missing_lines": [], "excluded_lines": [], "start_line": 108}, "observe_prevalence": {"executed_lines": [143, 144, 145, 147, 148, 149, 151, 158], "summary": {"covered_lines": 8, "num_statements": 8, "percent_covered": 100.0, "percent_covered_display": "100", "missing_lines": 0, "excluded_lines": 0, "percent_statements_covered": 100.0, "percent_statements_covered_display": "100"}, "missing_lines": [], "excluded_lines": [], "start_line": 119}, "PrevalencesCLI.cli_cmd": {"executed_lines": [180, 181, 184, 186, 187, 188, 189, 191, 192, 193, 195, 204, 205, 207, 217, 222, 223, 224, 225], "summary": {"covered_lines": 19, "num_statements": 19, "percent_covered": 100.0, "percent_covered_display": "100", "missing_lines": 0, "excluded_lines": 0, "percent_statements_covered": 100.0, "percent_statements_covered_display": "100"}, "missing_lines": [], "excluded_lines": [], "start_line": 178}, "": {"executed_lines": [8, 9, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 23, 24, 25, 30, 42, 45, 108, 119, 164, 167, 173, 178, 234], "summary": {"covered_lines": 26, "num_statements": 28, "percent_covered": 92.85714285714286, "percent_covered_display": "93", "missing_lines": 2, "excluded_lines": 0, "percent_statements_covered": 92.85714285714286, "percent_statements_covered_display": "93"}, "missing_lines": [235, 236], "excluded_lines": [], "start_line": 1}}, "classes": {"PrevalencesCLI": {"executed_lines": [180, 181, 184, 186, 187, 188, 189, 191, 192, 193, 195, 204, 205, 207, 217, 222, 223, 224, 225], "summary": {"covered_lines": 19, "num_statements": 19, "percent_covered": 100.0, "percent_covered_display": "100", "missing_lines": 0, "excluded_lines": 0, "percent_statements_covered": 100.0, "percent_statements_covered_display": "100"}, "missing_lines": [], "excluded_lines": [], "start_line": 164}, "": {"executed_lines": [8, 9, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 23, 24, 25, 30, 42, 45, 56, 57, 59, 64, 65, 66, 68, 74, 75, 80, 81, 83, 89, 103, 105, 108, 110, 111, 112, 113, 114, 115, 116, 119, 143, 144, 145, 147, 148, 149, 151, 158, 164, 167, 173, 178, 234], "summary": {"covered_lines": 56, "num_statements": 63, "percent_covered": 88.88888888888889, "percent_covered_display": "89", "missing_lines": 7, "excluded_lines": 0, "percent_statements_covered": 88.88888888888889, "percent_statements_covered_display": "89"}, "missing_lines": [60, 61, 62, 96, 101, 235, 236], "excluded_lines": [], "start_line": 1}}}, "src/lyscripts/compute/priors.py": {"executed_lines": [7, 9, 10, 11, 12, 14, 15, 16, 23, 26, 42, 43, 44, 46, 52, 53, 60, 63, 66, 68, 84, 85, 86, 88, 89, 90, 92, 93, 94, 96, 105, 106, 109], "summary": {"covered_lines": 33, "num_statements": 35, "percent_covered": 94.28571428571429, "percent_covered_display": "94", "missing_lines": 2, "excluded_lines": 0, "percent_statements_covered": 94.28571428571429, "percent_statements_covered_display": "94"}, "missing_lines": [110, 111], "excluded_lines": [], "functions": {"compute_priors": {"executed_lines": [42, 43, 44, 46, 52, 53, 60], "summary": {"covered_lines": 7, "num_statements": 7, "percent_covered": 100.0, "percent_covered_display": "100", "missing_lines": 0, "excluded_lines": 0, "percent_statements_covered": 100.0, "percent_statements_covered_display": "100"}, "missing_lines": [], "excluded_lines": [], "start_line": 26}, "PriorsCLI.cli_cmd": {"executed_lines": [84, 85, 86, 88, 89, 90, 92, 93, 94, 96, 105, 106], "summary": {"covered_lines": 12, "num_statements": 12, "percent_covered": 100.0, "percent_covered_display": "100", "missing_lines": 0, "excluded_lines": 0, "percent_statements_covered": 100.0, "percent_statements_covered_display": "100"}, "missing_lines": [], "excluded_lines": [], "start_line": 68}, "": {"executed_lines": [7, 9, 10, 11, 12, 14, 15, 16, 23, 26, 63, 66, 68, 109], "summary": {"covered_lines": 14, "num_statements": 16, "percent_covered": 87.5, "percent_covered_display": "88", "missing_lines": 2, "excluded_lines": 0, "percent_statements_covered": 87.5, "percent_statements_covered_display": "88"}, "missing_lines": [110, 111], "excluded_lines": [], "start_line": 1}}, "classes": {"PriorsCLI": {"executed_lines": [84, 85, 86, 88, 89, 90, 92, 93, 94, 96, 105, 106], "summary": {"covered_lines": 12, "num_statements": 12, "percent_covered": 100.0, "percent_covered_display": "100", "missing_lines": 0, "excluded_lines": 0, "percent_statements_covered": 100.0, "percent_statements_covered_display": "100"}, "missing_lines": [], "excluded_lines": [], "start_line": 63}, "": {"executed_lines": [7, 9, 10, 11, 12, 14, 15, 16, 23, 26, 42, 43, 44, 46, 52, 53, 60, 63, 66, 68, 109], "summary": {"covered_lines": 21, "num_statements": 23, "percent_covered": 91.30434782608695, "percent_covered_display": "91", "missing_lines": 2, "excluded_lines": 0, "percent_statements_covered": 91.30434782608695, "percent_statements_covered_display": "91"}, "missing_lines": [110, 111], "excluded_lines": [], "start_line": 1}}}, "src/lyscripts/compute/risks.py": {"executed_lines": [8, 10, 11, 12, 13, 14, 16, 17, 18, 19, 20, 29, 32, 68, 71, 77, 79, 138], "summary": {"covered_lines": 18, "num_statements": 51, "percent_covered": 35.294117647058826, "percent_covered_display": "35", "missing_lines": 33, "excluded_lines": 0, "percent_statements_covered": 35.294117647058826, "percent_statements_covered_display": "35"}, "missing_lines": [47, 48, 49, 50, 52, 53, 55, 61, 65, 81, 82, 85, 87, 88, 89, 90, 91, 93, 94, 95, 97, 106, 107, 109, 119, 120, 122, 132, 133, 134, 135, 139, 140], "excluded_lines": [], "functions": {"compute_risks": {"executed_lines": [], "summary": {"covered_lines": 0, "num_statements": 9, "percent_covered": 0.0, "percent_covered_display": "0", "missing_lines": 9, "excluded_lines": 0, "percent_statements_covered": 0.0, "percent_statements_covered_display": "0"}, "missing_lines": [47, 48, 49, 50, 52, 53, 55, 61, 65], "excluded_lines": [], "start_line": 32}, "RisksCLI.cli_cmd": {"executed_lines": [], "summary": {"covered_lines": 0, "num_statements": 22, "percent_covered": 0.0, "percent_covered_display": "0", "missing_lines": 22, "excluded_lines": 0, "percent_statements_covered": 0.0, "percent_statements_covered_display": "0"}, "missing_lines": [81, 82, 85, 87, 88, 89, 90, 91, 93, 94, 95, 97, 106, 107, 109, 119, 120, 122, 132, 133, 134, 135], "excluded_lines": [], "start_line": 79}, "": {"executed_lines": [8, 10, 11, 12, 13, 14, 16, 17, 18, 19, 20, 29, 32, 68, 71, 77, 79, 138], "summary": {"covered_lines": 18, "num_statements": 20, "percent_covered": 90.0, "percent_covered_display": "90", "missing_lines": 2, "excluded_lines": 0, "percent_statements_covered": 90.0, "percent_statements_covered_display": "90"}, "missing_lines": [139, 140], "excluded_lines": [], "start_line": 1}}, "classes": {"RisksCLI": {"executed_lines": [], "summary": {"covered_lines": 0, "num_statements": 22, "percent_covered": 0.0, "percent_covered_display": "0", "missing_lines": 22, "excluded_lines": 0, "percent_statements_covered": 0.0, "percent_statements_covered_display": "0"}, "missing_lines": [81, 82, 85, 87, 88, 89, 90, 91, 93, 94, 95, 97, 106, 107, 109, 119, 120, 122, 132, 133, 134, 135], "excluded_lines": [], "start_line": 68}, "": {"executed_lines": [8, 10, 11, 12, 13, 14, 16, 17, 18, 19, 20, 29, 32, 68, 71, 77, 79, 138], "summary": {"covered_lines": 18, "num_statements": 29, "percent_covered": 62.06896551724138, "percent_covered_display": "62", "missing_lines": 11, "excluded_lines": 0, "percent_statements_covered": 62.06896551724138, "percent_statements_covered_display": "62"}, "missing_lines": [47, 48, 49, 50, 52, 53, 55, 61, 65, 139, 140], "excluded_lines": [], "start_line": 1}}}, "src/lyscripts/compute/utils.py": {"executed_lines": [3, 4, 5, 6, 8, 9, 10, 11, 12, 14, 24, 28, 29, 36, 40, 47, 49, 55, 57, 58, 59, 60, 62, 63, 66, 68, 69, 70, 71, 72, 73, 74, 77, 92, 94, 97, 98, 99, 101, 104, 106, 107, 108, 111, 112, 115, 118, 121, 128, 145, 148, 149, 151, 153, 155, 156, 158, 159, 161, 163, 165, 166, 168, 169, 171, 173, 175, 176, 178, 180, 182, 184, 186, 187, 189, 191, 194, 210, 211, 212, 213, 214, 215, 216, 217, 219, 222, 239, 242, 243, 244, 246, 247, 248, 249, 252, 254, 257, 259, 260, 261, 263, 264, 265, 266, 267, 269, 271, 272, 273, 275, 276], "summary": {"covered_lines": 112, "num_statements": 118, "percent_covered": 94.91525423728814, "percent_covered_display": "95", "missing_lines": 6, "excluded_lines": 0, "percent_statements_covered": 94.91525423728814, "percent_statements_covered_display": "95"}, "missing_lines": [95, 146, 177, 188, 240, 250], "excluded_lines": [], "functions": {"is_hdf5_compatible": {"executed_lines": [49], "summary": {"covered_lines": 1, "num_statements": 1, "percent_covered": 100.0, "percent_covered_display": "100", "missing_lines": 0, "excluded_lines": 0, "percent_statements_covered": 100.0, "percent_statements_covered_display": "100"}, "missing_lines": [], "excluded_lines": [], "start_line": 47}, "to_hdf5_attrs": {"executed_lines": [57, 58, 59, 60, 62, 63], "summary": {"covered_lines": 6, "num_statements": 6, "percent_covered": 100.0, "percent_covered_display": "100", "missing_lines": 0, "excluded_lines": 0, "percent_statements_covered": 100.0, "percent_statements_covered_display": "100"}, "missing_lines": [], "excluded_lines": [], "start_line": 55}, "from_hdf5_attrs": {"executed_lines": [68, 69, 70, 71, 72, 73, 74], "summary": {"covered_lines": 7, "num_statements": 7, "percent_covered": 100.0, "percent_covered_display": "100", "missing_lines": 0, "excluded_lines": 0, "percent_statements_covered": 100.0, "percent_statements_covered_display": "100"}, "missing_lines": [], "excluded_lines": [], "start_line": 66}, "extract_modalities": {"executed_lines": [92, 94, 97, 98, 99, 101], "summary": {"covered_lines": 6, "num_statements": 7, "percent_covered": 85.71428571428571, "percent_covered_display": "86", "missing_lines": 1, "excluded_lines": 0, "percent_statements_covered": 85.71428571428571, "percent_statements_covered_display": "86"}, "missing_lines": [95], "excluded_lines": [], "start_line": 77}, "ensure_parent_dir": {"executed_lines": [106, 107, 108], "summary": {"covered_lines": 3, "num_statements": 3, "percent_covered": 100.0, "percent_covered_display": "100", "missing_lines": 0, "excluded_lines": 0, "percent_statements_covered": 100.0, "percent_statements_covered_display": "100"}, "missing_lines": [], "excluded_lines": [], "start_line": 104}, "HDF5FileStorage._get_dataset": {"executed_lines": [145, 148, 149], "summary": {"covered_lines": 3, "num_statements": 4, "percent_covered": 75.0, "percent_covered_display": "75", "missing_lines": 1, "excluded_lines": 0, "percent_statements_covered": 75.0, "percent_statements_covered_display": "75"}, "missing_lines": [146], "excluded_lines": [], "start_line": 128}, "HDF5FileStorage.load": {"executed_lines": [153, 155, 156, 158, 159], "summary": {"covered_lines": 5, "num_statements": 5, "percent_covered": 100.0, "percent_covered_display": "100", "missing_lines": 0, "excluded_lines": 0, "percent_statements_covered": 100.0, "percent_statements_covered_display": "100"}, "missing_lines": [], "excluded_lines": [], "start_line": 151}, "HDF5FileStorage.get_attrs": {"executed_lines": [163, 165, 166, 168, 169], "summary": {"covered_lines": 5, "num_statements": 5, "percent_covered": 100.0, "percent_covered_display": "100", "missing_lines": 0, "excluded_lines": 0, "percent_statements_covered": 100.0, "percent_statements_covered_display": "100"}, "missing_lines": [], "excluded_lines": [], "start_line": 161}, "HDF5FileStorage.save": {"executed_lines": [173, 175, 176, 178, 180], "summary": {"covered_lines": 5, "num_statements": 6, "percent_covered": 83.33333333333333, "percent_covered_display": "83", "missing_lines": 1, "excluded_lines": 0, "percent_statements_covered": 83.33333333333333, "percent_statements_covered_display": "83"}, "missing_lines": [177], "excluded_lines": [], "start_line": 171}, "HDF5FileStorage.set_attrs": {"executed_lines": [184, 186, 187, 189, 191], "summary": {"covered_lines": 5, "num_statements": 6, "percent_covered": 83.33333333333333, "percent_covered_display": "83", "missing_lines": 1, "excluded_lines": 0, "percent_statements_covered": 83.33333333333333, "percent_statements_covered_display": "83"}, "missing_lines": [188], "excluded_lines": [], "start_line": 182}, "reduce_pattern": {"executed_lines": [210, 211, 212, 213, 214, 215, 216, 217, 219], "summary": {"covered_lines": 9, "num_statements": 9, "percent_covered": 100.0, "percent_covered_display": "100", "missing_lines": 0, "excluded_lines": 0, "percent_statements_covered": 100.0, "percent_statements_covered_display": "100"}, "missing_lines": [], "excluded_lines": [], "start_line": 194}, "complete_pattern": {"executed_lines": [239, 242, 243, 244, 246, 247, 248, 249, 252, 254], "summary": {"covered_lines": 10, "num_statements": 12, "percent_covered": 83.33333333333333, "percent_covered_display": "83", "missing_lines": 2, "excluded_lines": 0, "percent_statements_covered": 83.33333333333333, "percent_statements_covered_display": "83"}, "missing_lines": [240, 250], "excluded_lines": [], "start_line": 222}, "get_cached": {"executed_lines": [259, 260, 261, 263, 264, 275, 276], "summary": {"covered_lines": 7, "num_statements": 7, "percent_covered": 100.0, "percent_covered_display": "100", "missing_lines": 0, "excluded_lines": 0, "percent_statements_covered": 100.0, "percent_statements_covered_display": "100"}, "missing_lines": [], "excluded_lines": [], "start_line": 257}, "get_cached.log_cache_info_wrapper": {"executed_lines": [265, 266, 267, 269, 271, 272, 273], "summary": {"covered_lines": 7, "num_statements": 7, "percent_covered": 100.0, "percent_covered_display": "100", "missing_lines": 0, "excluded_lines": 0, "percent_statements_covered": 100.0, "percent_statements_covered_display": "100"}, "missing_lines": [], "excluded_lines": [], "start_line": 264}, "": {"executed_lines": [3, 4, 5, 6, 8, 9, 10, 11, 12, 14, 24, 28, 29, 36, 40, 47, 55, 66, 77, 104, 111, 112, 115, 118, 121, 128, 151, 161, 171, 182, 194, 222, 257], "summary": {"covered_lines": 33, "num_statements": 33, "percent_covered": 100.0, "percent_covered_display": "100", "missing_lines": 0, "excluded_lines": 0, "percent_statements_covered": 100.0, "percent_statements_covered_display": "100"}, "missing_lines": [], "excluded_lines": [], "start_line": 1}}, "classes": {"BaseComputeCLI": {"executed_lines": [], "summary": {"covered_lines": 0, "num_statements": 0, "percent_covered": 100.0, "percent_covered_display": "100", "missing_lines": 0, "excluded_lines": 0, "percent_statements_covered": 100.0, "percent_statements_covered_display": "100"}, "missing_lines": [], "excluded_lines": [], "start_line": 24}, "HDF5FileStorage": {"executed_lines": [145, 148, 149, 153, 155, 156, 158, 159, 163, 165, 166, 168, 169, 173, 175, 176, 178, 180, 184, 186, 187, 189, 191], "summary": {"covered_lines": 23, "num_statements": 26, "percent_covered": 88.46153846153847, "percent_covered_display": "88", "missing_lines": 3, "excluded_lines": 0, "percent_statements_covered": 88.46153846153847, "percent_statements_covered_display": "88"}, "missing_lines": [146, 177, 188], "excluded_lines": [], "start_line": 115}, "": {"executed_lines": [3, 4, 5, 6, 8, 9, 10, 11, 12, 14, 24, 28, 29, 36, 40, 47, 49, 55, 57, 58, 59, 60, 62, 63, 66, 68, 69, 70, 71, 72, 73, 74, 77, 92, 94, 97, 98, 99, 101, 104, 106, 107, 108, 111, 112, 115, 118, 121, 128, 151, 161, 171, 182, 194, 210, 211, 212, 213, 214, 215, 216, 217, 219, 222, 239, 242, 243, 244, 246, 247, 248, 249, 252, 254, 257, 259, 260, 261, 263, 264, 265, 266, 267, 269, 271, 272, 273, 275, 276], "summary": {"covered_lines": 89, "num_statements": 92, "percent_covered": 96.73913043478261, "percent_covered_display": "97", "missing_lines": 3, "excluded_lines": 0, "percent_statements_covered": 96.73913043478261, "percent_statements_covered_display": "97"}, "missing_lines": [95, 240, 250], "excluded_lines": [], "start_line": 1}}}, "src/lyscripts/configs.py": {"executed_lines": [12, 14, 15, 16, 17, 18, 19, 20, 21, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 39, 44, 46, 48, 51, 56, 59, 63, 69, 72, 78, 82, 87, 89, 92, 94, 96, 102, 104, 107, 110, 115, 120, 128, 131, 135, 139, 145, 148, 153, 159, 161, 162, 164, 167, 168, 170, 171, 176, 179, 182, 186, 187, 189, 190, 202, 204, 205, 206, 208, 209, 211, 214, 216, 219, 220, 222, 225, 228, 232, 236, 240, 244, 253, 258, 259, 261, 270, 273, 275, 276, 280, 281, 282, 283, 285, 288, 296, 298, 299, 306, 316, 321, 325, 332, 336, 341, 343, 348, 349, 350, 355, 357, 359, 360, 362, 363, 364, 366, 367, 368, 370, 372, 379, 380, 381, 387, 390, 393, 396, 400, 404, 412, 416, 420, 424, 431, 435, 441, 445, 449, 457, 463, 470, 477, 485, 495, 502, 505, 509, 513, 517, 525, 539, 541, 542, 544, 545, 550, 553, 557, 562, 566, 570, 571, 573, 575, 576, 578, 580, 586, 588, 594, 596, 597, 598, 599, 600, 601, 604, 625, 626, 628, 629, 630, 636, 637, 640, 647, 648, 649, 651, 653, 654, 655, 656, 657, 658, 662, 663, 664, 665, 667, 669, 670, 673, 679, 680, 681, 683, 684, 685, 687, 688, 691, 715, 718, 732, 747, 748, 750, 752, 753, 754, 759, 761, 763, 767, 768, 774, 776, 778, 788, 791, 793, 803, 804, 813, 818, 819], "summary": {"covered_lines": 239, "num_statements": 280, "percent_covered": 85.35714285714286, "percent_covered_display": "85", "missing_lines": 41, "excluded_lines": 0, "percent_statements_covered": 85.35714285714286, "percent_statements_covered_display": "85"}, "missing_lines": [90, 122, 165, 173, 217, 271, 277, 278, 286, 472, 473, 474, 482, 491, 492, 527, 528, 529, 531, 532, 533, 535, 536, 547, 581, 582, 584, 589, 660, 699, 700, 702, 703, 704, 706, 707, 708, 710, 711, 712, 755], "excluded_lines": [], "functions": {"DataConfig.load": {"executed_lines": [89, 92], "summary": {"covered_lines": 2, "num_statements": 3, "percent_covered": 66.66666666666667, "percent_covered_display": "67", "missing_lines": 1, "excluded_lines": 0, "percent_statements_covered": 66.66666666666667, "percent_statements_covered_display": "67"}, "missing_lines": [90], "excluded_lines": [], "start_line": 87}, "DataConfig.get_load_kwargs": {"executed_lines": [96], "summary": {"covered_lines": 1, "num_statements": 1, "percent_covered": 100.0, "percent_covered_display": "100", "missing_lines": 0, "excluded_lines": 0, "percent_statements_covered": 100.0, "percent_statements_covered_display": "100"}, "missing_lines": [], "excluded_lines": [], "start_line": 94}, "check_pattern": {"executed_lines": [104], "summary": {"covered_lines": 1, "num_statements": 1, "percent_covered": 100.0, "percent_covered_display": "100", "missing_lines": 0, "excluded_lines": 0, "percent_statements_covered": 100.0, "percent_statements_covered_display": "100"}, "missing_lines": [], "excluded_lines": [], "start_line": 102}, "DiagnosisConfig.to_involvement": {"executed_lines": [], "summary": {"covered_lines": 0, "num_statements": 1, "percent_covered": 0.0, "percent_covered_display": "0", "missing_lines": 1, "excluded_lines": 0, "percent_statements_covered": 0.0, "percent_statements_covered_display": "0"}, "missing_lines": [122], "excluded_lines": [], "start_line": 120}, "retrieve_graph_representation": {"executed_lines": [161, 162, 164, 167, 168, 170, 171], "summary": {"covered_lines": 7, "num_statements": 9, "percent_covered": 77.77777777777777, "percent_covered_display": "78", "missing_lines": 2, "excluded_lines": 0, "percent_statements_covered": 77.77777777777777, "percent_statements_covered_display": "78"}, "missing_lines": [165, 173], "excluded_lines": [], "start_line": 159}, "GraphConfig.from_model": {"executed_lines": [189, 190], "summary": {"covered_lines": 2, "num_statements": 2, "percent_covered": 100.0, "percent_covered_display": "100", "missing_lines": 0, "excluded_lines": 0, "percent_statements_covered": 100.0, "percent_statements_covered_display": "100"}, "missing_lines": [], "excluded_lines": [], "start_line": 187}, "has_model_symbol": {"executed_lines": [204, 205, 206, 208, 209, 211], "summary": {"covered_lines": 6, "num_statements": 6, "percent_covered": 100.0, "percent_covered_display": "100", "missing_lines": 0, "excluded_lines": 0, "percent_statements_covered": 100.0, "percent_statements_covered_display": "100"}, "missing_lines": [], "excluded_lines": [], "start_line": 202}, "get_symmetry_kwargs": {"executed_lines": [216, 219, 220, 222], "summary": {"covered_lines": 4, "num_statements": 5, "percent_covered": 80.0, "percent_covered_display": "80", "missing_lines": 1, "excluded_lines": 0, "percent_statements_covered": 80.0, "percent_statements_covered_display": "80"}, "missing_lines": [217], "excluded_lines": [], "start_line": 214}, "ModelConfig.from_model": {"executed_lines": [261, 270, 273, 275, 276, 280, 281, 282, 283, 285, 288], "summary": {"covered_lines": 11, "num_statements": 15, "percent_covered": 73.33333333333333, "percent_covered_display": "73", "missing_lines": 4, "excluded_lines": 0, "percent_statements_covered": 73.33333333333333, "percent_statements_covered_display": "73"}, "missing_lines": [271, 277, 278, 286], "excluded_lines": [], "start_line": 259}, "modalityconfig_from_model": {"executed_lines": [298, 299], "summary": {"covered_lines": 2, "num_statements": 2, "percent_covered": 100.0, "percent_covered_display": "100", "missing_lines": 0, "excluded_lines": 0, "percent_statements_covered": 100.0, "percent_statements_covered_display": "100"}, "missing_lines": [], "excluded_lines": [], "start_line": 296}, "DeprecatedModelConfig.model_post_init": {"executed_lines": [343, 348, 349, 350, 355], "summary": {"covered_lines": 5, "num_statements": 5, "percent_covered": 100.0, "percent_covered_display": "100", "missing_lines": 0, "excluded_lines": 0, "percent_statements_covered": 100.0, "percent_statements_covered_display": "100"}, "missing_lines": [], "excluded_lines": [], "start_line": 341}, "DeprecatedModelConfig.translate": {"executed_lines": [359, 360, 362, 363, 364, 366, 367, 368, 370, 372, 379, 380, 381, 387], "summary": {"covered_lines": 14, "num_statements": 14, "percent_covered": 100.0, "percent_covered_display": "100", "missing_lines": 0, "excluded_lines": 0, "percent_statements_covered": 100.0, "percent_statements_covered_display": "100"}, "missing_lines": [], "excluded_lines": [], "start_line": 357}, "SamplingConfig.load": {"executed_lines": [463], "summary": {"covered_lines": 1, "num_statements": 1, "percent_covered": 100.0, "percent_covered_display": "100", "missing_lines": 0, "excluded_lines": 0, "percent_statements_covered": 100.0, "percent_statements_covered_display": "100"}, "missing_lines": [], "excluded_lines": [], "start_line": 457}, "geometric_schedule": {"executed_lines": [], "summary": {"covered_lines": 0, "num_statements": 3, "percent_covered": 0.0, "percent_covered_display": "0", "missing_lines": 3, "excluded_lines": 0, "percent_statements_covered": 0.0, "percent_statements_covered_display": "0"}, "missing_lines": [472, 473, 474], "excluded_lines": [], "start_line": 470}, "linear_schedule": {"executed_lines": [], "summary": {"covered_lines": 0, "num_statements": 1, "percent_covered": 0.0, "percent_covered_display": "0", "missing_lines": 1, "excluded_lines": 0, "percent_statements_covered": 0.0, "percent_statements_covered_display": "0"}, "missing_lines": [482], "excluded_lines": [], "start_line": 477}, "power_schedule": {"executed_lines": [], "summary": {"covered_lines": 0, "num_statements": 2, "percent_covered": 0.0, "percent_covered_display": "0", "missing_lines": 2, "excluded_lines": 0, "percent_statements_covered": 0.0, "percent_statements_covered_display": "0"}, "missing_lines": [491, 492], "excluded_lines": [], "start_line": 485}, "ScheduleConfig.get_schedule": {"executed_lines": [], "summary": {"covered_lines": 0, "num_statements": 8, "percent_covered": 0.0, "percent_covered_display": "0", "missing_lines": 8, "excluded_lines": 0, "percent_statements_covered": 0.0, "percent_statements_covered_display": "0"}, "missing_lines": [527, 528, 529, 531, 532, 533, 535, 536], "excluded_lines": [], "start_line": 525}, "map_to_optional_bool": {"executed_lines": [541, 542, 544, 545], "summary": {"covered_lines": 4, "num_statements": 5, "percent_covered": 80.0, "percent_covered_display": "80", "missing_lines": 1, "excluded_lines": 0, "percent_statements_covered": 80.0, "percent_statements_covered_display": "80"}, "missing_lines": [547], "excluded_lines": [], "start_line": 539}, "ScenarioConfig.model_post_init": {"executed_lines": [575, 576], "summary": {"covered_lines": 2, "num_statements": 2, "percent_covered": 100.0, "percent_covered_display": "100", "missing_lines": 0, "excluded_lines": 0, "percent_statements_covered": 100.0, "percent_statements_covered_display": "100"}, "missing_lines": [], "excluded_lines": [], "start_line": 573}, "ScenarioConfig.interpolate": {"executed_lines": [580], "summary": {"covered_lines": 1, "num_statements": 4, "percent_covered": 25.0, "percent_covered_display": "25", "missing_lines": 3, "excluded_lines": 0, "percent_statements_covered": 25.0, "percent_statements_covered_display": "25"}, "missing_lines": [581, 582, 584], "excluded_lines": [], "start_line": 578}, "ScenarioConfig.normalize": {"executed_lines": [588], "summary": {"covered_lines": 1, "num_statements": 2, "percent_covered": 50.0, "percent_covered_display": "50", "missing_lines": 1, "excluded_lines": 0, "percent_statements_covered": 50.0, "percent_statements_covered_display": "50"}, "missing_lines": [589], "excluded_lines": [], "start_line": 586}, "_construct_model_from_external": {"executed_lines": [596, 597, 598, 599, 600, 601], "summary": {"covered_lines": 6, "num_statements": 6, "percent_covered": 100.0, "percent_covered_display": "100", "missing_lines": 0, "excluded_lines": 0, "percent_statements_covered": 100.0, "percent_statements_covered_display": "100"}, "missing_lines": [], "excluded_lines": [], "start_line": 594}, "construct_model": {"executed_lines": [625, 626, 628, 629, 630, 636, 637], "summary": {"covered_lines": 7, "num_statements": 7, "percent_covered": 100.0, "percent_covered_display": "100", "missing_lines": 0, "excluded_lines": 0, "percent_statements_covered": 100.0, "percent_statements_covered_display": "100"}, "missing_lines": [], "excluded_lines": [], "start_line": 604}, "add_distributions": {"executed_lines": [647, 648, 649, 651, 653, 654, 655, 656, 657, 658, 662, 663, 664, 665, 667, 669, 670], "summary": {"covered_lines": 17, "num_statements": 18, "percent_covered": 94.44444444444444, "percent_covered_display": "94", "missing_lines": 1, "excluded_lines": 0, "percent_statements_covered": 94.44444444444444, "percent_statements_covered_display": "94"}, "missing_lines": [660], "excluded_lines": [], "start_line": 640}, "add_modalities": {"executed_lines": [679, 680, 681, 683, 684, 685, 687, 688], "summary": {"covered_lines": 8, "num_statements": 8, "percent_covered": 100.0, "percent_covered_display": "100", "missing_lines": 0, "excluded_lines": 0, "percent_statements_covered": 100.0, "percent_statements_covered_display": "100"}, "missing_lines": [], "excluded_lines": [], "start_line": 673}, "add_data": {"executed_lines": [], "summary": {"covered_lines": 0, "num_statements": 11, "percent_covered": 0.0, "percent_covered_display": "0", "missing_lines": 11, "excluded_lines": 0, "percent_statements_covered": 0.0, "percent_statements_covered_display": "0"}, "missing_lines": [699, 700, 702, 703, 704, 706, 707, 708, 710, 711, 712], "excluded_lines": [], "start_line": 691}, "DynamicYamlConfigSettingsSource.__init__": {"executed_lines": [747, 748], "summary": {"covered_lines": 2, "num_statements": 2, "percent_covered": 100.0, "percent_covered_display": "100", "missing_lines": 0, "excluded_lines": 0, "percent_statements_covered": 100.0, "percent_statements_covered_display": "100"}, "missing_lines": [], "excluded_lines": [], "start_line": 732}, "DynamicYamlConfigSettingsSource._read_file": {"executed_lines": [752, 753, 754, 759], "summary": {"covered_lines": 4, "num_statements": 5, "percent_covered": 80.0, "percent_covered_display": "80", "missing_lines": 1, "excluded_lines": 0, "percent_statements_covered": 80.0, "percent_statements_covered_display": "80"}, "missing_lines": [755], "excluded_lines": [], "start_line": 750}, "DynamicYamlConfigSettingsSource.__call__": {"executed_lines": [763, 767, 768, 774], "summary": {"covered_lines": 4, "num_statements": 4, "percent_covered": 100.0, "percent_covered_display": "100", "missing_lines": 0, "excluded_lines": 0, "percent_statements_covered": 100.0, "percent_statements_covered_display": "100"}, "missing_lines": [], "excluded_lines": [], "start_line": 761}, "DynamicYamlConfigSettingsSource.__repr__": {"executed_lines": [778], "summary": {"covered_lines": 1, "num_statements": 1, "percent_covered": 100.0, "percent_covered_display": "100", "missing_lines": 0, "excluded_lines": 0, "percent_statements_covered": 100.0, "percent_statements_covered_display": "100"}, "missing_lines": [], "excluded_lines": [], "start_line": 776}, "BaseCLI.settings_customise_sources": {"executed_lines": [813, 818, 819], "summary": {"covered_lines": 3, "num_statements": 3, "percent_covered": 100.0, "percent_covered_display": "100", "missing_lines": 0, "excluded_lines": 0, "percent_statements_covered": 100.0, "percent_statements_covered_display": "100"}, "missing_lines": [], "excluded_lines": [], "start_line": 804}, "": {"executed_lines": [12, 14, 15, 16, 17, 18, 19, 20, 21, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 39, 44, 46, 48, 51, 56, 59, 63, 69, 72, 78, 82, 87, 94, 102, 107, 110, 115, 120, 128, 131, 135, 139, 145, 148, 153, 159, 176, 179, 182, 186, 187, 202, 214, 225, 228, 232, 236, 240, 244, 253, 258, 259, 296, 306, 316, 321, 325, 332, 336, 341, 357, 390, 393, 396, 400, 404, 412, 416, 420, 424, 431, 435, 441, 445, 449, 457, 470, 477, 485, 495, 502, 505, 509, 513, 517, 525, 539, 550, 553, 557, 562, 566, 570, 571, 573, 578, 586, 594, 604, 640, 673, 691, 715, 718, 732, 750, 761, 776, 788, 791, 793, 803, 804], "summary": {"covered_lines": 123, "num_statements": 123, "percent_covered": 100.0, "percent_covered_display": "100", "missing_lines": 0, "excluded_lines": 0, "percent_statements_covered": 100.0, "percent_statements_covered_display": "100"}, "missing_lines": [], "excluded_lines": [], "start_line": 1}}, "classes": {"CrossValidationConfig": {"executed_lines": [], "summary": {"covered_lines": 0, "num_statements": 0, "percent_covered": 100.0, "percent_covered_display": "100", "missing_lines": 0, "excluded_lines": 0, "percent_statements_covered": 100.0, "percent_statements_covered_display": "100"}, "missing_lines": [], "excluded_lines": [], "start_line": 56}, "DataConfig": {"executed_lines": [89, 92, 96], "summary": {"covered_lines": 3, "num_statements": 4, "percent_covered": 75.0, "percent_covered_display": "75", "missing_lines": 1, "excluded_lines": 0, "percent_statements_covered": 75.0, "percent_statements_covered_display": "75"}, "missing_lines": [90], "excluded_lines": [], "start_line": 69}, "DiagnosisConfig": {"executed_lines": [], "summary": {"covered_lines": 0, "num_statements": 1, "percent_covered": 0.0, "percent_covered_display": "0", "missing_lines": 1, "excluded_lines": 0, "percent_statements_covered": 0.0, "percent_statements_covered_display": "0"}, "missing_lines": [122], "excluded_lines": [], "start_line": 107}, "DistributionConfig": {"executed_lines": [], "summary": {"covered_lines": 0, "num_statements": 0, "percent_covered": 100.0, "percent_covered_display": "100", "missing_lines": 0, "excluded_lines": 0, "percent_statements_covered": 100.0, "percent_statements_covered_display": "100"}, "missing_lines": [], "excluded_lines": [], "start_line": 128}, "InvolvementConfig": {"executed_lines": [], "summary": {"covered_lines": 0, "num_statements": 0, "percent_covered": 100.0, "percent_covered_display": "100", "missing_lines": 0, "excluded_lines": 0, "percent_statements_covered": 100.0, "percent_statements_covered_display": "100"}, "missing_lines": [], "excluded_lines": [], "start_line": 145}, "GraphConfig": {"executed_lines": [189, 190], "summary": {"covered_lines": 2, "num_statements": 2, "percent_covered": 100.0, "percent_covered_display": "100", "missing_lines": 0, "excluded_lines": 0, "percent_statements_covered": 100.0, "percent_statements_covered_display": "100"}, "missing_lines": [], "excluded_lines": [], "start_line": 176}, "ModelConfig": {"executed_lines": [261, 270, 273, 275, 276, 280, 281, 282, 283, 285, 288], "summary": {"covered_lines": 11, "num_statements": 15, "percent_covered": 73.33333333333333, "percent_covered_display": "73", "missing_lines": 4, "excluded_lines": 0, "percent_statements_covered": 73.33333333333333, "percent_statements_covered_display": "73"}, "missing_lines": [271, 277, 278, 286], "excluded_lines": [], "start_line": 225}, "DeprecatedModelConfig": {"executed_lines": [343, 348, 349, 350, 355, 359, 360, 362, 363, 364, 366, 367, 368, 370, 372, 379, 380, 381, 387], "summary": {"covered_lines": 19, "num_statements": 19, "percent_covered": 100.0, "percent_covered_display": "100", "missing_lines": 0, "excluded_lines": 0, "percent_statements_covered": 100.0, "percent_statements_covered_display": "100"}, "missing_lines": [], "excluded_lines": [], "start_line": 306}, "SamplingConfig": {"executed_lines": [463], "summary": {"covered_lines": 1, "num_statements": 1, "percent_covered": 100.0, "percent_covered_display": "100", "missing_lines": 0, "excluded_lines": 0, "percent_statements_covered": 100.0, "percent_statements_covered_display": "100"}, "missing_lines": [], "excluded_lines": [], "start_line": 390}, "ScheduleConfig": {"executed_lines": [], "summary": {"covered_lines": 0, "num_statements": 8, "percent_covered": 0.0, "percent_covered_display": "0", "missing_lines": 8, "excluded_lines": 0, "percent_statements_covered": 0.0, "percent_statements_covered_display": "0"}, "missing_lines": [527, 528, 529, 531, 532, 533, 535, 536], "excluded_lines": [], "start_line": 502}, "ScenarioConfig": {"executed_lines": [575, 576, 580, 588], "summary": {"covered_lines": 4, "num_statements": 8, "percent_covered": 50.0, "percent_covered_display": "50", "missing_lines": 4, "excluded_lines": 0, "percent_statements_covered": 50.0, "percent_statements_covered_display": "50"}, "missing_lines": [581, 582, 584, 589], "excluded_lines": [], "start_line": 550}, "DynamicYamlConfigSettingsSource": {"executed_lines": [747, 748, 752, 753, 754, 759, 763, 767, 768, 774, 778], "summary": {"covered_lines": 11, "num_statements": 12, "percent_covered": 91.66666666666667, "percent_covered_display": "92", "missing_lines": 1, "excluded_lines": 0, "percent_statements_covered": 91.66666666666667, "percent_statements_covered_display": "92"}, "missing_lines": [755], "excluded_lines": [], "start_line": 718}, "BaseCLI": {"executed_lines": [813, 818, 819], "summary": {"covered_lines": 3, "num_statements": 3, "percent_covered": 100.0, "percent_covered_display": "100", "missing_lines": 0, "excluded_lines": 0, "percent_statements_covered": 100.0, "percent_statements_covered_display": "100"}, "missing_lines": [], "excluded_lines": [], "start_line": 788}, "": {"executed_lines": [12, 14, 15, 16, 17, 18, 19, 20, 21, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 39, 44, 46, 48, 51, 56, 59, 63, 69, 72, 78, 82, 87, 94, 102, 104, 107, 110, 115, 120, 128, 131, 135, 139, 145, 148, 153, 159, 161, 162, 164, 167, 168, 170, 171, 176, 179, 182, 186, 187, 202, 204, 205, 206, 208, 209, 211, 214, 216, 219, 220, 222, 225, 228, 232, 236, 240, 244, 253, 258, 259, 296, 298, 299, 306, 316, 321, 325, 332, 336, 341, 357, 390, 393, 396, 400, 404, 412, 416, 420, 424, 431, 435, 441, 445, 449, 457, 470, 477, 485, 495, 502, 505, 509, 513, 517, 525, 539, 541, 542, 544, 545, 550, 553, 557, 562, 566, 570, 571, 573, 578, 586, 594, 596, 597, 598, 599, 600, 601, 604, 625, 626, 628, 629, 630, 636, 637, 640, 647, 648, 649, 651, 653, 654, 655, 656, 657, 658, 662, 663, 664, 665, 667, 669, 670, 673, 679, 680, 681, 683, 684, 685, 687, 688, 691, 715, 718, 732, 750, 761, 776, 788, 791, 793, 803, 804], "summary": {"covered_lines": 185, "num_statements": 207, "percent_covered": 89.3719806763285, "percent_covered_display": "89", "missing_lines": 22, "excluded_lines": 0, "percent_statements_covered": 89.3719806763285, "percent_statements_covered_display": "89"}, "missing_lines": [165, 173, 217, 472, 473, 474, 482, 491, 492, 547, 660, 699, 700, 702, 703, 704, 706, 707, 708, 710, 711, 712], "excluded_lines": [], "start_line": 1}}}, "src/lyscripts/data/__init__.py": {"executed_lines": [23, 25, 36, 39, 51], "summary": {"covered_lines": 5, "num_statements": 6, "percent_covered": 83.33333333333333, "percent_covered_display": "83", "missing_lines": 1, "excluded_lines": 0, "percent_statements_covered": 83.33333333333333, "percent_statements_covered_display": "83"}, "missing_lines": [53], "excluded_lines": [], "functions": {"DataCLI.cli_cmd": {"executed_lines": [], "summary": {"covered_lines": 0, "num_statements": 1, "percent_covered": 0.0, "percent_covered_display": "0", "missing_lines": 1, "excluded_lines": 0, "percent_statements_covered": 0.0, "percent_statements_covered_display": "0"}, "missing_lines": [53], "excluded_lines": [], "start_line": 51}, "": {"executed_lines": [23, 25, 36, 39, 51], "summary": {"covered_lines": 5, "num_statements": 5, "percent_covered": 100.0, "percent_covered_display": "100", "missing_lines": 0, "excluded_lines": 0, "percent_statements_covered": 100.0, "percent_statements_covered_display": "100"}, "missing_lines": [], "excluded_lines": [], "start_line": 1}}, "classes": {"DataCLI": {"executed_lines": [], "summary": {"covered_lines": 0, "num_statements": 1, "percent_covered": 0.0, "percent_covered_display": "0", "missing_lines": 1, "excluded_lines": 0, "percent_statements_covered": 0.0, "percent_statements_covered_display": "0"}, "missing_lines": [53], "excluded_lines": [], "start_line": 39}, "": {"executed_lines": [23, 25, 36, 39, 51], "summary": {"covered_lines": 5, "num_statements": 5, "percent_covered": 100.0, "percent_covered_display": "100", "missing_lines": 0, "excluded_lines": 0, "percent_statements_covered": 100.0, "percent_statements_covered_display": "100"}, "missing_lines": [], "excluded_lines": [], "start_line": 1}}}, "src/lyscripts/data/__main__.py": {"executed_lines": [], "summary": {"covered_lines": 0, "num_statements": 18, "percent_covered": 0.0, "percent_covered_display": "0", "missing_lines": 18, "excluded_lines": 0, "percent_statements_covered": 0.0, "percent_statements_covered_display": "0"}, "missing_lines": [3, 5, 6, 7, 10, 13, 15, 20, 21, 25, 26, 27, 28, 29, 31, 32, 35, 36], "excluded_lines": [], "functions": {"main": {"executed_lines": [], "summary": {"covered_lines": 0, "num_statements": 10, "percent_covered": 0.0, "percent_covered_display": "0", "missing_lines": 10, "excluded_lines": 0, "percent_statements_covered": 0.0, "percent_statements_covered_display": "0"}, "missing_lines": [15, 20, 21, 25, 26, 27, 28, 29, 31, 32], "excluded_lines": [], "start_line": 13}, "": {"executed_lines": [], "summary": {"covered_lines": 0, "num_statements": 8, "percent_covered": 0.0, "percent_covered_display": "0", "missing_lines": 8, "excluded_lines": 0, "percent_statements_covered": 0.0, "percent_statements_covered_display": "0"}, "missing_lines": [3, 5, 6, 7, 10, 13, 35, 36], "excluded_lines": [], "start_line": 1}}, "classes": {"": {"executed_lines": [], "summary": {"covered_lines": 0, "num_statements": 18, "percent_covered": 0.0, "percent_covered_display": "0", "missing_lines": 18, "excluded_lines": 0, "percent_statements_covered": 0.0, "percent_statements_covered_display": "0"}, "missing_lines": [3, 5, 6, 7, 10, 13, 15, 20, 21, 25, 26, 27, 28, 29, 31, 32, 35, 36], "excluded_lines": [], "start_line": 1}}}, "src/lyscripts/data/collect/__init__.py": {"executed_lines": [21, 22, 23, 24, 26, 27, 28, 29, 30, 31, 32, 33, 35, 36, 38, 47, 48, 49, 50, 53, 54, 61, 62, 67, 68, 79, 80, 123, 126, 130, 135], "summary": {"covered_lines": 31, "num_statements": 58, "percent_covered": 53.44827586206897, "percent_covered_display": "53", "missing_lines": 27, "excluded_lines": 0, "percent_statements_covered": 53.44827586206897, "percent_statements_covered_display": "53"}, "missing_lines": [56, 57, 58, 64, 76, 92, 94, 95, 96, 101, 103, 104, 105, 106, 108, 109, 110, 112, 113, 114, 115, 116, 137, 138, 141, 142, 144], "excluded_lines": [], "functions": {"serve_index_html": {"executed_lines": [], "summary": {"covered_lines": 0, "num_statements": 3, "percent_covered": 0.0, "percent_covered_display": "0", "missing_lines": 3, "excluded_lines": 0, "percent_statements_covered": 0.0, "percent_statements_covered_display": "0"}, "missing_lines": [56, 57, 58], "excluded_lines": [], "start_line": 54}, "serve_schema": {"executed_lines": [], "summary": {"covered_lines": 0, "num_statements": 1, "percent_covered": 0.0, "percent_covered_display": "0", "missing_lines": 1, "excluded_lines": 0, "percent_statements_covered": 0.0, "percent_statements_covered_display": "0"}, "missing_lines": [64], "excluded_lines": [], "start_line": 62}, "serve_collector_js": {"executed_lines": [], "summary": {"covered_lines": 0, "num_statements": 1, "percent_covered": 0.0, "percent_covered_display": "0", "missing_lines": 1, "excluded_lines": 0, "percent_statements_covered": 0.0, "percent_statements_covered_display": "0"}, "missing_lines": [76], "excluded_lines": [], "start_line": 68}, "process": {"executed_lines": [], "summary": {"covered_lines": 0, "num_statements": 17, "percent_covered": 0.0, "percent_covered_display": "0", "missing_lines": 17, "excluded_lines": 0, "percent_statements_covered": 0.0, "percent_statements_covered_display": "0"}, "missing_lines": [92, 94, 95, 96, 101, 103, 104, 105, 106, 108, 109, 110, 112, 113, 114, 115, 116], "excluded_lines": [], "start_line": 80}, "CollectorCLI.cli_cmd": {"executed_lines": [], "summary": {"covered_lines": 0, "num_statements": 5, "percent_covered": 0.0, "percent_covered_display": "0", "missing_lines": 5, "excluded_lines": 0, "percent_statements_covered": 0.0, "percent_statements_covered_display": "0"}, "missing_lines": [137, 138, 141, 142, 144], "excluded_lines": [], "start_line": 135}, "": {"executed_lines": [21, 22, 23, 24, 26, 27, 28, 29, 30, 31, 32, 33, 35, 36, 38, 47, 48, 49, 50, 53, 54, 61, 62, 67, 68, 79, 80, 123, 126, 130, 135], "summary": {"covered_lines": 31, "num_statements": 31, "percent_covered": 100.0, "percent_covered_display": "100", "missing_lines": 0, "excluded_lines": 0, "percent_statements_covered": 100.0, "percent_statements_covered_display": "100"}, "missing_lines": [], "excluded_lines": [], "start_line": 1}}, "classes": {"CollectorCLI": {"executed_lines": [], "summary": {"covered_lines": 0, "num_statements": 5, "percent_covered": 0.0, "percent_covered_display": "0", "missing_lines": 5, "excluded_lines": 0, "percent_statements_covered": 0.0, "percent_statements_covered_display": "0"}, "missing_lines": [137, 138, 141, 142, 144], "excluded_lines": [], "start_line": 123}, "": {"executed_lines": [21, 22, 23, 24, 26, 27, 28, 29, 30, 31, 32, 33, 35, 36, 38, 47, 48, 49, 50, 53, 54, 61, 62, 67, 68, 79, 80, 123, 126, 130, 135], "summary": {"covered_lines": 31, "num_statements": 53, "percent_covered": 58.490566037735846, "percent_covered_display": "58", "missing_lines": 22, "excluded_lines": 0, "percent_statements_covered": 58.490566037735846, "percent_statements_covered_display": "58"}, "missing_lines": [56, 57, 58, 64, 76, 92, 94, 95, 96, 101, 103, 104, 105, 106, 108, 109, 110, 112, 113, 114, 115, 116], "excluded_lines": [], "start_line": 1}}}, "src/lyscripts/data/enhance.py": {"executed_lines": [9, 11, 12, 13, 15, 16, 17, 20, 24, 25, 26, 33, 51], "summary": {"covered_lines": 13, "num_statements": 19, "percent_covered": 68.42105263157895, "percent_covered_display": "68", "missing_lines": 6, "excluded_lines": 0, "percent_statements_covered": 68.42105263157895, "percent_statements_covered_display": "68"}, "missing_lines": [40, 42, 43, 48, 52, 53], "excluded_lines": [], "functions": {"EnhanceCLI.cli_cmd": {"executed_lines": [], "summary": {"covered_lines": 0, "num_statements": 4, "percent_covered": 0.0, "percent_covered_display": "0", "missing_lines": 4, "excluded_lines": 0, "percent_statements_covered": 0.0, "percent_statements_covered_display": "0"}, "missing_lines": [40, 42, 43, 48], "excluded_lines": [], "start_line": 33}, "": {"executed_lines": [9, 11, 12, 13, 15, 16, 17, 20, 24, 25, 26, 33, 51], "summary": {"covered_lines": 13, "num_statements": 15, "percent_covered": 86.66666666666667, "percent_covered_display": "87", "missing_lines": 2, "excluded_lines": 0, "percent_statements_covered": 86.66666666666667, "percent_statements_covered_display": "87"}, "missing_lines": [52, 53], "excluded_lines": [], "start_line": 1}}, "classes": {"EnhanceCLI": {"executed_lines": [], "summary": {"covered_lines": 0, "num_statements": 4, "percent_covered": 0.0, "percent_covered_display": "0", "missing_lines": 4, "excluded_lines": 0, "percent_statements_covered": 0.0, "percent_statements_covered_display": "0"}, "missing_lines": [40, 42, 43, 48], "excluded_lines": [], "start_line": 20}, "": {"executed_lines": [9, 11, 12, 13, 15, 16, 17, 20, 24, 25, 26, 33, 51], "summary": {"covered_lines": 13, "num_statements": 15, "percent_covered": 86.66666666666667, "percent_covered_display": "87", "missing_lines": 2, "excluded_lines": 0, "percent_statements_covered": 86.66666666666667, "percent_statements_covered_display": "87"}, "missing_lines": [52, 53], "excluded_lines": [], "start_line": 1}}}, "src/lyscripts/data/fetch.py": {"executed_lines": [3, 5, 6, 7, 8, 10, 11, 14, 17, 24, 31, 38, 40, 55], "summary": {"covered_lines": 14, "num_statements": 21, "percent_covered": 66.66666666666667, "percent_covered_display": "67", "missing_lines": 7, "excluded_lines": 0, "percent_statements_covered": 66.66666666666667, "percent_statements_covered_display": "67"}, "missing_lines": [42, 43, 45, 51, 52, 56, 57], "excluded_lines": [], "functions": {"FetchCLI.cli_cmd": {"executed_lines": [], "summary": {"covered_lines": 0, "num_statements": 5, "percent_covered": 0.0, "percent_covered_display": "0", "missing_lines": 5, "excluded_lines": 0, "percent_statements_covered": 0.0, "percent_statements_covered_display": "0"}, "missing_lines": [42, 43, 45, 51, 52], "excluded_lines": [], "start_line": 40}, "": {"executed_lines": [3, 5, 6, 7, 8, 10, 11, 14, 17, 24, 31, 38, 40, 55], "summary": {"covered_lines": 14, "num_statements": 16, "percent_covered": 87.5, "percent_covered_display": "88", "missing_lines": 2, "excluded_lines": 0, "percent_statements_covered": 87.5, "percent_statements_covered_display": "88"}, "missing_lines": [56, 57], "excluded_lines": [], "start_line": 1}}, "classes": {"FetchCLI": {"executed_lines": [], "summary": {"covered_lines": 0, "num_statements": 5, "percent_covered": 0.0, "percent_covered_display": "0", "missing_lines": 5, "excluded_lines": 0, "percent_statements_covered": 0.0, "percent_statements_covered_display": "0"}, "missing_lines": [42, 43, 45, 51, 52], "excluded_lines": [], "start_line": 14}, "": {"executed_lines": [3, 5, 6, 7, 8, 10, 11, 14, 17, 24, 31, 38, 40, 55], "summary": {"covered_lines": 14, "num_statements": 16, "percent_covered": 87.5, "percent_covered_display": "88", "missing_lines": 2, "excluded_lines": 0, "percent_statements_covered": 87.5, "percent_statements_covered_display": "88"}, "missing_lines": [56, 57], "excluded_lines": [], "start_line": 1}}}, "src/lyscripts/data/filter.py": {"executed_lines": [7, 8, 10, 11, 12, 13, 15, 16, 17, 20, 24, 28, 35, 38, 39, 41, 68, 97], "summary": {"covered_lines": 18, "num_statements": 48, "percent_covered": 37.5, "percent_covered_display": "38", "missing_lines": 30, "excluded_lines": 0, "percent_statements_covered": 37.5, "percent_statements_covered_display": "38"}, "missing_lines": [43, 44, 45, 46, 47, 49, 54, 55, 56, 57, 58, 60, 61, 62, 63, 64, 66, 76, 78, 79, 84, 85, 87, 88, 89, 91, 92, 94, 98, 99], "excluded_lines": [], "functions": {"FilterCLI.model_post_init": {"executed_lines": [], "summary": {"covered_lines": 0, "num_statements": 17, "percent_covered": 0.0, "percent_covered_display": "0", "missing_lines": 17, "excluded_lines": 0, "percent_statements_covered": 0.0, "percent_statements_covered_display": "0"}, "missing_lines": [43, 44, 45, 46, 47, 49, 54, 55, 56, 57, 58, 60, 61, 62, 63, 64, 66], "excluded_lines": [], "start_line": 41}, "FilterCLI.cli_cmd": {"executed_lines": [], "summary": {"covered_lines": 0, "num_statements": 11, "percent_covered": 0.0, "percent_covered_display": "0", "missing_lines": 11, "excluded_lines": 0, "percent_statements_covered": 0.0, "percent_statements_covered_display": "0"}, "missing_lines": [76, 78, 79, 84, 85, 87, 88, 89, 91, 92, 94], "excluded_lines": [], "start_line": 68}, "": {"executed_lines": [7, 8, 10, 11, 12, 13, 15, 16, 17, 20, 24, 28, 35, 38, 39, 41, 68, 97], "summary": {"covered_lines": 18, "num_statements": 20, "percent_covered": 90.0, "percent_covered_display": "90", "missing_lines": 2, "excluded_lines": 0, "percent_statements_covered": 90.0, "percent_statements_covered_display": "90"}, "missing_lines": [98, 99], "excluded_lines": [], "start_line": 1}}, "classes": {"FilterCLI": {"executed_lines": [], "summary": {"covered_lines": 0, "num_statements": 28, "percent_covered": 0.0, "percent_covered_display": "0", "missing_lines": 28, "excluded_lines": 0, "percent_statements_covered": 0.0, "percent_statements_covered_display": "0"}, "missing_lines": [43, 44, 45, 46, 47, 49, 54, 55, 56, 57, 58, 60, 61, 62, 63, 64, 66, 76, 78, 79, 84, 85, 87, 88, 89, 91, 92, 94], "excluded_lines": [], "start_line": 20}, "": {"executed_lines": [7, 8, 10, 11, 12, 13, 15, 16, 17, 20, 24, 28, 35, 38, 39, 41, 68, 97], "summary": {"covered_lines": 18, "num_statements": 20, "percent_covered": 90.0, "percent_covered_display": "90", "missing_lines": 2, "excluded_lines": 0, "percent_statements_covered": 90.0, "percent_statements_covered_display": "90"}, "missing_lines": [98, 99], "excluded_lines": [], "start_line": 1}}}, "src/lyscripts/data/generate.py": {"executed_lines": [11, 12, 13, 14, 16, 17, 26, 29, 33, 34, 41, 49, 51, 53, 55, 56, 57, 60, 62, 65, 67, 76, 78, 79, 80, 81, 82, 84, 89, 91, 94], "summary": {"covered_lines": 31, "num_statements": 35, "percent_covered": 88.57142857142857, "percent_covered_display": "89", "missing_lines": 4, "excluded_lines": 0, "percent_statements_covered": 88.57142857142857, "percent_statements_covered_display": "89"}, "missing_lines": [58, 63, 95, 96], "excluded_lines": [], "functions": {"GenerateCLI.model_post_init": {"executed_lines": [55, 56, 57, 60, 62, 65], "summary": {"covered_lines": 6, "num_statements": 8, "percent_covered": 75.0, "percent_covered_display": "75", "missing_lines": 2, "excluded_lines": 0, "percent_statements_covered": 75.0, "percent_statements_covered_display": "75"}, "missing_lines": [58, 63], "excluded_lines": [], "start_line": 53}, "GenerateCLI.cli_cmd": {"executed_lines": [76, 78, 79, 80, 81, 82, 84, 89, 91], "summary": {"covered_lines": 9, "num_statements": 9, "percent_covered": 100.0, "percent_covered_display": "100", "missing_lines": 0, "excluded_lines": 0, "percent_statements_covered": 100.0, "percent_statements_covered_display": "100"}, "missing_lines": [], "excluded_lines": [], "start_line": 67}, "": {"executed_lines": [11, 12, 13, 14, 16, 17, 26, 29, 33, 34, 41, 49, 51, 53, 67, 94], "summary": {"covered_lines": 16, "num_statements": 18, "percent_covered": 88.88888888888889, "percent_covered_display": "89", "missing_lines": 2, "excluded_lines": 0, "percent_statements_covered": 88.88888888888889, "percent_statements_covered_display": "89"}, "missing_lines": [95, 96], "excluded_lines": [], "start_line": 1}}, "classes": {"GenerateCLI": {"executed_lines": [55, 56, 57, 60, 62, 65, 76, 78, 79, 80, 81, 82, 84, 89, 91], "summary": {"covered_lines": 15, "num_statements": 17, "percent_covered": 88.23529411764706, "percent_covered_display": "88", "missing_lines": 2, "excluded_lines": 0, "percent_statements_covered": 88.23529411764706, "percent_statements_covered_display": "88"}, "missing_lines": [58, 63], "excluded_lines": [], "start_line": 29}, "": {"executed_lines": [11, 12, 13, 14, 16, 17, 26, 29, 33, 34, 41, 49, 51, 53, 67, 94], "summary": {"covered_lines": 16, "num_statements": 18, "percent_covered": 88.88888888888889, "percent_covered_display": "89", "missing_lines": 2, "excluded_lines": 0, "percent_statements_covered": 88.88888888888889, "percent_statements_covered_display": "89"}, "missing_lines": [95, 96], "excluded_lines": [], "start_line": 1}}}, "src/lyscripts/data/join.py": {"executed_lines": [3, 5, 6, 7, 9, 10, 11, 14, 17, 18, 20, 80], "summary": {"covered_lines": 12, "num_statements": 22, "percent_covered": 54.54545454545455, "percent_covered_display": "55", "missing_lines": 10, "excluded_lines": 0, "percent_statements_covered": 54.54545454545455, "percent_statements_covered_display": "55"}, "missing_lines": [61, 63, 64, 67, 68, 69, 71, 77, 81, 82], "excluded_lines": [], "functions": {"JoinCLI.cli_cmd": {"executed_lines": [], "summary": {"covered_lines": 0, "num_statements": 8, "percent_covered": 0.0, "percent_covered_display": "0", "missing_lines": 8, "excluded_lines": 0, "percent_statements_covered": 0.0, "percent_statements_covered_display": "0"}, "missing_lines": [61, 63, 64, 67, 68, 69, 71, 77], "excluded_lines": [], "start_line": 20}, "": {"executed_lines": [3, 5, 6, 7, 9, 10, 11, 14, 17, 18, 20, 80], "summary": {"covered_lines": 12, "num_statements": 14, "percent_covered": 85.71428571428571, "percent_covered_display": "86", "missing_lines": 2, "excluded_lines": 0, "percent_statements_covered": 85.71428571428571, "percent_statements_covered_display": "86"}, "missing_lines": [81, 82], "excluded_lines": [], "start_line": 1}}, "classes": {"JoinCLI": {"executed_lines": [], "summary": {"covered_lines": 0, "num_statements": 8, "percent_covered": 0.0, "percent_covered_display": "0", "missing_lines": 8, "excluded_lines": 0, "percent_statements_covered": 0.0, "percent_statements_covered_display": "0"}, "missing_lines": [61, 63, 64, 67, 68, 69, 71, 77], "excluded_lines": [], "start_line": 14}, "": {"executed_lines": [3, 5, 6, 7, 9, 10, 11, 14, 17, 18, 20, 80], "summary": {"covered_lines": 12, "num_statements": 14, "percent_covered": 85.71428571428571, "percent_covered_display": "86", "missing_lines": 2, "excluded_lines": 0, "percent_statements_covered": 85.71428571428571, "percent_statements_covered_display": "86"}, "missing_lines": [81, 82], "excluded_lines": [], "start_line": 1}}}, "src/lyscripts/data/lyproxify.py": {"executed_lines": [10, 11, 12, 13, 15, 16, 17, 18, 19, 21, 22, 23, 24, 26, 29, 37, 49, 52, 53, 57, 67, 74, 78, 80, 122, 126, 145, 162, 163, 164, 165, 167, 169, 176, 199, 200, 201, 202, 203, 204, 205, 206, 208, 210, 213, 285, 310, 327, 328, 330, 331, 332, 334, 335, 336, 339], "summary": {"covered_lines": 56, "num_statements": 123, "percent_covered": 45.52845528455285, "percent_covered_display": "46", "missing_lines": 67, "excluded_lines": 1, "percent_statements_covered": 45.52845528455285, "percent_statements_covered_display": "46"}, "missing_lines": [31, 32, 34, 39, 40, 41, 43, 44, 46, 90, 92, 96, 102, 103, 104, 105, 106, 108, 109, 110, 111, 113, 114, 116, 117, 119, 132, 134, 135, 136, 141, 142, 173, 250, 252, 253, 255, 256, 258, 259, 260, 261, 262, 263, 264, 265, 267, 268, 271, 272, 276, 281, 282, 293, 294, 295, 297, 298, 299, 300, 302, 303, 304, 306, 307, 340, 341], "excluded_lines": [146], "functions": {"ensure_python_file": {"executed_lines": [], "summary": {"covered_lines": 0, "num_statements": 3, "percent_covered": 0.0, "percent_covered_display": "0", "missing_lines": 3, "excluded_lines": 0, "percent_statements_covered": 0.0, "percent_statements_covered_display": "0"}, "missing_lines": [31, 32, 34], "excluded_lines": [], "start_line": 29}, "ensure_column_map": {"executed_lines": [], "summary": {"covered_lines": 0, "num_statements": 6, "percent_covered": 0.0, "percent_covered_display": "0", "missing_lines": 6, "excluded_lines": 0, "percent_statements_covered": 0.0, "percent_statements_covered_display": "0"}, "missing_lines": [39, 40, 41, 43, 44, 46], "excluded_lines": [], "start_line": 37}, "LyproxifyCLI.cli_cmd": {"executed_lines": [], "summary": {"covered_lines": 0, "num_statements": 17, "percent_covered": 0.0, "percent_covered_display": "0", "missing_lines": 17, "excluded_lines": 0, "percent_statements_covered": 0.0, "percent_statements_covered_display": "0"}, "missing_lines": [90, 92, 96, 102, 103, 104, 105, 106, 108, 109, 110, 111, 113, 114, 116, 117, 119], "excluded_lines": [], "start_line": 80}, "clean_header": {"executed_lines": [], "summary": {"covered_lines": 0, "num_statements": 6, "percent_covered": 0.0, "percent_covered_display": "0", "missing_lines": 6, "excluded_lines": 0, "percent_statements_covered": 0.0, "percent_statements_covered_display": "0"}, "missing_lines": [132, 134, 135, 136, 141, 142], "excluded_lines": [], "start_line": 126}, "get_instruction_depth": {"executed_lines": [162, 163, 164, 165, 167, 169], "summary": {"covered_lines": 6, "num_statements": 7, "percent_covered": 85.71428571428571, "percent_covered_display": "86", "missing_lines": 1, "excluded_lines": 1, "percent_statements_covered": 85.71428571428571, "percent_statements_covered_display": "86"}, "missing_lines": [173], "excluded_lines": [146], "start_line": 145}, "generate_markdown_docs": {"executed_lines": [199, 200, 201, 202, 203, 204, 205, 206, 208, 210], "summary": {"covered_lines": 10, "num_statements": 10, "percent_covered": 100.0, "percent_covered_display": "100", "missing_lines": 0, "excluded_lines": 0, "percent_statements_covered": 100.0, "percent_statements_covered_display": "100"}, "missing_lines": [], "excluded_lines": [], "start_line": 176}, "transform_to_lyprox": {"executed_lines": [], "summary": {"covered_lines": 0, "num_statements": 20, "percent_covered": 0.0, "percent_covered_display": "0", "missing_lines": 20, "excluded_lines": 0, "percent_statements_covered": 0.0, "percent_statements_covered_display": "0"}, "missing_lines": [250, 252, 253, 255, 256, 258, 259, 260, 261, 262, 263, 264, 265, 267, 268, 271, 272, 276, 281, 282], "excluded_lines": [], "start_line": 213}, "leftright_to_ipsicontra": {"executed_lines": [], "summary": {"covered_lines": 0, "num_statements": 12, "percent_covered": 0.0, "percent_covered_display": "0", "missing_lines": 12, "excluded_lines": 0, "percent_statements_covered": 0.0, "percent_statements_covered_display": "0"}, "missing_lines": [293, 294, 295, 297, 298, 299, 300, 302, 303, 304, 306, 307], "excluded_lines": [], "start_line": 285}, "exclude_patients": {"executed_lines": [327, 328, 330, 331, 332, 334, 335, 336], "summary": {"covered_lines": 8, "num_statements": 8, "percent_covered": 100.0, "percent_covered_display": "100", "missing_lines": 0, "excluded_lines": 0, "percent_statements_covered": 100.0, "percent_statements_covered_display": "100"}, "missing_lines": [], "excluded_lines": [], "start_line": 310}, "": {"executed_lines": [10, 11, 12, 13, 15, 16, 17, 18, 19, 21, 22, 23, 24, 26, 29, 37, 49, 52, 53, 57, 67, 74, 78, 80, 122, 126, 145, 176, 213, 285, 310, 339], "summary": {"covered_lines": 32, "num_statements": 34, "percent_covered": 94.11764705882354, "percent_covered_display": "94", "missing_lines": 2, "excluded_lines": 0, "percent_statements_covered": 94.11764705882354, "percent_statements_covered_display": "94"}, "missing_lines": [340, 341], "excluded_lines": [], "start_line": 1}}, "classes": {"LyproxifyCLI": {"executed_lines": [], "summary": {"covered_lines": 0, "num_statements": 17, "percent_covered": 0.0, "percent_covered_display": "0", "missing_lines": 17, "excluded_lines": 0, "percent_statements_covered": 0.0, "percent_statements_covered_display": "0"}, "missing_lines": [90, 92, 96, 102, 103, 104, 105, 106, 108, 109, 110, 111, 113, 114, 116, 117, 119], "excluded_lines": [], "start_line": 49}, "ParsingError": {"executed_lines": [], "summary": {"covered_lines": 0, "num_statements": 0, "percent_covered": 100.0, "percent_covered_display": "100", "missing_lines": 0, "excluded_lines": 0, "percent_statements_covered": 100.0, "percent_statements_covered_display": "100"}, "missing_lines": [], "excluded_lines": [], "start_line": 122}, "": {"executed_lines": [10, 11, 12, 13, 15, 16, 17, 18, 19, 21, 22, 23, 24, 26, 29, 37, 49, 52, 53, 57, 67, 74, 78, 80, 122, 126, 145, 162, 163, 164, 165, 167, 169, 176, 199, 200, 201, 202, 203, 204, 205, 206, 208, 210, 213, 285, 310, 327, 328, 330, 331, 332, 334, 335, 336, 339], "summary": {"covered_lines": 56, "num_statements": 106, "percent_covered": 52.83018867924528, "percent_covered_display": "53", "missing_lines": 50, "excluded_lines": 1, "percent_statements_covered": 52.83018867924528, "percent_statements_covered_display": "53"}, "missing_lines": [31, 32, 34, 39, 40, 41, 43, 44, 46, 132, 134, 135, 136, 141, 142, 173, 250, 252, 253, 255, 256, 258, 259, 260, 261, 262, 263, 264, 265, 267, 268, 271, 272, 276, 281, 282, 293, 294, 295, 297, 298, 299, 300, 302, 303, 304, 306, 307, 340, 341], "excluded_lines": [146], "start_line": 1}}}, "src/lyscripts/data/split.py": {"executed_lines": [3, 4, 6, 7, 8, 9, 11, 12, 13, 15, 18, 22, 23, 25, 71], "summary": {"covered_lines": 15, "num_statements": 29, "percent_covered": 51.724137931034484, "percent_covered_display": "52", "missing_lines": 14, "excluded_lines": 0, "percent_statements_covered": 51.724137931034484, "percent_statements_covered_display": "52"}, "missing_lines": [33, 35, 36, 38, 40, 46, 50, 51, 54, 59, 61, 65, 72, 73], "excluded_lines": [], "functions": {"SplitCLI.cli_cmd": {"executed_lines": [], "summary": {"covered_lines": 0, "num_statements": 12, "percent_covered": 0.0, "percent_covered_display": "0", "missing_lines": 12, "excluded_lines": 0, "percent_statements_covered": 0.0, "percent_statements_covered_display": "0"}, "missing_lines": [33, 35, 36, 38, 40, 46, 50, 51, 54, 59, 61, 65], "excluded_lines": [], "start_line": 25}, "": {"executed_lines": [3, 4, 6, 7, 8, 9, 11, 12, 13, 15, 18, 22, 23, 25, 71], "summary": {"covered_lines": 15, "num_statements": 17, "percent_covered": 88.23529411764706, "percent_covered_display": "88", "missing_lines": 2, "excluded_lines": 0, "percent_statements_covered": 88.23529411764706, "percent_statements_covered_display": "88"}, "missing_lines": [72, 73], "excluded_lines": [], "start_line": 1}}, "classes": {"SplitCLI": {"executed_lines": [], "summary": {"covered_lines": 0, "num_statements": 12, "percent_covered": 0.0, "percent_covered_display": "0", "missing_lines": 12, "excluded_lines": 0, "percent_statements_covered": 0.0, "percent_statements_covered_display": "0"}, "missing_lines": [33, 35, 36, 38, 40, 46, 50, 51, 54, 59, 61, 65], "excluded_lines": [], "start_line": 18}, "": {"executed_lines": [3, 4, 6, 7, 8, 9, 11, 12, 13, 15, 18, 22, 23, 25, 71], "summary": {"covered_lines": 15, "num_statements": 17, "percent_covered": 88.23529411764706, "percent_covered_display": "88", "missing_lines": 2, "excluded_lines": 0, "percent_statements_covered": 88.23529411764706, "percent_statements_covered_display": "88"}, "missing_lines": [72, 73], "excluded_lines": [], "start_line": 1}}}, "src/lyscripts/data/utils.py": {"executed_lines": [3, 5, 6, 8, 11, 12, 14, 15, 16], "summary": {"covered_lines": 9, "num_statements": 9, "percent_covered": 100.0, "percent_covered_display": "100", "missing_lines": 0, "excluded_lines": 0, "percent_statements_covered": 100.0, "percent_statements_covered_display": "100"}, "missing_lines": [], "excluded_lines": [], "functions": {"save_table_to_csv": {"executed_lines": [14, 15, 16], "summary": {"covered_lines": 3, "num_statements": 3, "percent_covered": 100.0, "percent_covered_display": "100", "missing_lines": 0, "excluded_lines": 0, "percent_statements_covered": 100.0, "percent_statements_covered_display": "100"}, "missing_lines": [], "excluded_lines": [], "start_line": 12}, "": {"executed_lines": [3, 5, 6, 8, 11, 12], "summary": {"covered_lines": 6, "num_statements": 6, "percent_covered": 100.0, "percent_covered_display": "100", "missing_lines": 0, "excluded_lines": 0, "percent_statements_covered": 100.0, "percent_statements_covered_display": "100"}, "missing_lines": [], "excluded_lines": [], "start_line": 1}}, "classes": {"": {"executed_lines": [3, 5, 6, 8, 11, 12, 14, 15, 16], "summary": {"covered_lines": 9, "num_statements": 9, "percent_covered": 100.0, "percent_covered_display": "100", "missing_lines": 0, "excluded_lines": 0, "percent_statements_covered": 100.0, "percent_statements_covered_display": "100"}, "missing_lines": [], "excluded_lines": [], "start_line": 1}}}, "src/lyscripts/decorators.py": {"executed_lines": [8, 9, 10, 11, 12, 13, 16, 18, 19, 20, 23, 30, 33, 34, 36, 37, 38, 39, 41, 42, 51, 57, 59, 62, 65, 66, 68, 69, 72, 74, 77, 80, 81, 83, 84, 86, 88], "summary": {"covered_lines": 37, "num_statements": 41, "percent_covered": 90.2439024390244, "percent_covered_display": "90", "missing_lines": 4, "excluded_lines": 0, "percent_statements_covered": 90.2439024390244, "percent_statements_covered_display": "90"}, "missing_lines": [53, 54, 55, 70], "excluded_lines": [], "functions": {"assemble_signature": {"executed_lines": [18, 19, 20], "summary": {"covered_lines": 3, "num_statements": 3, "percent_covered": 100.0, "percent_covered_display": "100", "missing_lines": 0, "excluded_lines": 0, "percent_statements_covered": 100.0, "percent_statements_covered_display": "100"}, "missing_lines": [], "excluded_lines": [], "start_line": 16}, "log_state": {"executed_lines": [30, 59], "summary": {"covered_lines": 2, "num_statements": 2, "percent_covered": 100.0, "percent_covered_display": "100", "missing_lines": 0, "excluded_lines": 0, "percent_statements_covered": 100.0, "percent_statements_covered_display": "100"}, "missing_lines": [], "excluded_lines": [], "start_line": 23}, "log_state.log_decorator": {"executed_lines": [33, 34, 57], "summary": {"covered_lines": 3, "num_statements": 3, "percent_covered": 100.0, "percent_covered_display": "100", "missing_lines": 0, "excluded_lines": 0, "percent_statements_covered": 100.0, "percent_statements_covered_display": "100"}, "missing_lines": [], "excluded_lines": [], "start_line": 30}, "log_state.log_decorator.wrapper": {"executed_lines": [36, 37, 38, 39, 41, 42, 51], "summary": {"covered_lines": 7, "num_statements": 10, "percent_covered": 70.0, "percent_covered_display": "70", "missing_lines": 3, "excluded_lines": 0, "percent_statements_covered": 70.0, "percent_statements_covered_display": "70"}, "missing_lines": [53, 54, 55], "excluded_lines": [], "start_line": 34}, "check_input_file_exists": {"executed_lines": [65, 66, 74], "summary": {"covered_lines": 3, "num_statements": 3, "percent_covered": 100.0, "percent_covered_display": "100", "missing_lines": 0, "excluded_lines": 0, "percent_statements_covered": 100.0, "percent_statements_covered_display": "100"}, "missing_lines": [], "excluded_lines": [], "start_line": 62}, "check_input_file_exists.inner": {"executed_lines": [68, 69, 72], "summary": {"covered_lines": 3, "num_statements": 4, "percent_covered": 75.0, "percent_covered_display": "75", "missing_lines": 1, "excluded_lines": 0, "percent_statements_covered": 75.0, "percent_statements_covered_display": "75"}, "missing_lines": [70], "excluded_lines": [], "start_line": 66}, "check_output_dir_exists": {"executed_lines": [80, 81, 88], "summary": {"covered_lines": 3, "num_statements": 3, "percent_covered": 100.0, "percent_covered_display": "100", "missing_lines": 0, "excluded_lines": 0, "percent_statements_covered": 100.0, "percent_statements_covered_display": "100"}, "missing_lines": [], "excluded_lines": [], "start_line": 77}, "check_output_dir_exists.inner": {"executed_lines": [83, 84, 86], "summary": {"covered_lines": 3, "num_statements": 3, "percent_covered": 100.0, "percent_covered_display": "100", "missing_lines": 0, "excluded_lines": 0, "percent_statements_covered": 100.0, "percent_statements_covered_display": "100"}, "missing_lines": [], "excluded_lines": [], "start_line": 81}, "": {"executed_lines": [8, 9, 10, 11, 12, 13, 16, 23, 62, 77], "summary": {"covered_lines": 10, "num_statements": 10, "percent_covered": 100.0, "percent_covered_display": "100", "missing_lines": 0, "excluded_lines": 0, "percent_statements_covered": 100.0, "percent_statements_covered_display": "100"}, "missing_lines": [], "excluded_lines": [], "start_line": 1}}, "classes": {"": {"executed_lines": [8, 9, 10, 11, 12, 13, 16, 18, 19, 20, 23, 30, 33, 34, 36, 37, 38, 39, 41, 42, 51, 57, 59, 62, 65, 66, 68, 69, 72, 74, 77, 80, 81, 83, 84, 86, 88], "summary": {"covered_lines": 37, "num_statements": 41, "percent_covered": 90.2439024390244, "percent_covered_display": "90", "missing_lines": 4, "excluded_lines": 0, "percent_statements_covered": 90.2439024390244, "percent_statements_covered_display": "90"}, "missing_lines": [53, 54, 55, 70], "excluded_lines": [], "start_line": 1}}}, "src/lyscripts/evaluate.py": {"executed_lines": [8, 9, 10, 12, 13, 14, 15, 16, 17, 19, 21, 24, 38, 73, 90, 106, 137, 200], "summary": {"covered_lines": 18, "num_statements": 70, "percent_covered": 25.714285714285715, "percent_covered_display": "26", "missing_lines": 52, "excluded_lines": 0, "percent_statements_covered": 25.714285714285715, "percent_statements_covered_display": "26"}, "missing_lines": [29, 35, 43, 48, 50, 57, 63, 70, 87, 102, 103, 114, 115, 117, 118, 123, 124, 125, 127, 128, 129, 131, 132, 134, 139, 141, 142, 143, 144, 145, 148, 149, 156, 162, 164, 174, 175, 178, 179, 180, 183, 184, 186, 191, 192, 194, 195, 197, 201, 202, 204, 205], "excluded_lines": [], "functions": {"_add_parser": {"executed_lines": [], "summary": {"covered_lines": 0, "num_statements": 2, "percent_covered": 0.0, "percent_covered_display": "0", "missing_lines": 2, "excluded_lines": 0, "percent_statements_covered": 0.0, "percent_statements_covered_display": "0"}, "missing_lines": [29, 35], "excluded_lines": [], "start_line": 24}, "_add_arguments": {"executed_lines": [], "summary": {"covered_lines": 0, "num_statements": 6, "percent_covered": 0.0, "percent_covered_display": "0", "missing_lines": 6, "excluded_lines": 0, "percent_statements_covered": 0.0, "percent_statements_covered_display": "0"}, "missing_lines": [43, 48, 50, 57, 63, 70], "excluded_lines": [], "start_line": 38}, "comp_bic": {"executed_lines": [], "summary": {"covered_lines": 0, "num_statements": 1, "percent_covered": 0.0, "percent_covered_display": "0", "missing_lines": 1, "excluded_lines": 0, "percent_statements_covered": 0.0, "percent_statements_covered_display": "0"}, "missing_lines": [87], "excluded_lines": [], "start_line": 73}, "compute_evidence": {"executed_lines": [], "summary": {"covered_lines": 0, "num_statements": 2, "percent_covered": 0.0, "percent_covered_display": "0", "missing_lines": 2, "excluded_lines": 0, "percent_statements_covered": 0.0, "percent_statements_covered_display": "0"}, "missing_lines": [102, 103], "excluded_lines": [], "start_line": 90}, "compute_ti_results": {"executed_lines": [], "summary": {"covered_lines": 0, "num_statements": 13, "percent_covered": 0.0, "percent_covered_display": "0", "missing_lines": 13, "excluded_lines": 0, "percent_statements_covered": 0.0, "percent_statements_covered_display": "0"}, "missing_lines": [114, 115, 117, 118, 123, 124, 125, 127, 128, 129, 131, 132, 134], "excluded_lines": [], "start_line": 106}, "main": {"executed_lines": [], "summary": {"covered_lines": 0, "num_statements": 24, "percent_covered": 0.0, "percent_covered_display": "0", "missing_lines": 24, "excluded_lines": 0, "percent_statements_covered": 0.0, "percent_statements_covered_display": "0"}, "missing_lines": [139, 141, 142, 143, 144, 145, 148, 149, 156, 162, 164, 174, 175, 178, 179, 180, 183, 184, 186, 191, 192, 194, 195, 197], "excluded_lines": [], "start_line": 137}, "": {"executed_lines": [8, 9, 10, 12, 13, 14, 15, 16, 17, 19, 21, 24, 38, 73, 90, 106, 137, 200], "summary": {"covered_lines": 18, "num_statements": 22, "percent_covered": 81.81818181818181, "percent_covered_display": "82", "missing_lines": 4, "excluded_lines": 0, "percent_statements_covered": 81.81818181818181, "percent_statements_covered_display": "82"}, "missing_lines": [201, 202, 204, 205], "excluded_lines": [], "start_line": 1}}, "classes": {"": {"executed_lines": [8, 9, 10, 12, 13, 14, 15, 16, 17, 19, 21, 24, 38, 73, 90, 106, 137, 200], "summary": {"covered_lines": 18, "num_statements": 70, "percent_covered": 25.714285714285715, "percent_covered_display": "26", "missing_lines": 52, "excluded_lines": 0, "percent_statements_covered": 25.714285714285715, "percent_statements_covered_display": "26"}, "missing_lines": [29, 35, 43, 48, 50, 57, 63, 70, 87, 102, 103, 114, 115, 117, 118, 123, 124, 125, 127, 128, 129, 131, 132, 134, 139, 141, 142, 143, 144, 145, 148, 149, 156, 162, 164, 174, 175, 178, 179, 180, 183, 184, 186, 191, 192, 194, 195, 197, 201, 202, 204, 205], "excluded_lines": [], "start_line": 1}}}, "src/lyscripts/integrate.py": {"executed_lines": [9, 11, 12, 14, 15, 16, 17, 18, 19, 21, 22, 23, 35, 38, 66, 69, 70, 71, 78, 84, 85, 86, 90, 161], "summary": {"covered_lines": 24, "num_statements": 46, "percent_covered": 52.17391304347826, "percent_covered_display": "52", "missing_lines": 22, "excluded_lines": 0, "percent_statements_covered": 52.17391304347826, "percent_statements_covered_display": "52"}, "missing_lines": [46, 47, 53, 104, 106, 110, 111, 112, 113, 114, 117, 119, 122, 124, 125, 126, 134, 144, 153, 154, 162, 163], "excluded_lines": [], "functions": {"init_ti_sampler": {"executed_lines": [], "summary": {"covered_lines": 0, "num_statements": 3, "percent_covered": 0.0, "percent_covered_display": "0", "missing_lines": 3, "excluded_lines": 0, "percent_statements_covered": 0.0, "percent_statements_covered_display": "0"}, "missing_lines": [46, 47, 53], "excluded_lines": [], "start_line": 38}, "IntegrateCLI.cli_cmd": {"executed_lines": [], "summary": {"covered_lines": 0, "num_statements": 17, "percent_covered": 0.0, "percent_covered_display": "0", "missing_lines": 17, "excluded_lines": 0, "percent_statements_covered": 0.0, "percent_statements_covered_display": "0"}, "missing_lines": [104, 106, 110, 111, 112, 113, 114, 117, 119, 122, 124, 125, 126, 134, 144, 153, 154], "excluded_lines": [], "start_line": 90}, "": {"executed_lines": [9, 11, 12, 14, 15, 16, 17, 18, 19, 21, 22, 23, 35, 38, 66, 69, 70, 71, 78, 84, 85, 86, 90, 161], "summary": {"covered_lines": 24, "num_statements": 26, "percent_covered": 92.3076923076923, "percent_covered_display": "92", "missing_lines": 2, "excluded_lines": 0, "percent_statements_covered": 92.3076923076923, "percent_statements_covered_display": "92"}, "missing_lines": [162, 163], "excluded_lines": [], "start_line": 1}}, "classes": {"IntegrateCLI": {"executed_lines": [], "summary": {"covered_lines": 0, "num_statements": 17, "percent_covered": 0.0, "percent_covered_display": "0", "missing_lines": 17, "excluded_lines": 0, "percent_statements_covered": 0.0, "percent_statements_covered_display": "0"}, "missing_lines": [104, 106, 110, 111, 112, 113, 114, 117, 119, 122, 124, 125, 126, 134, 144, 153, 154], "excluded_lines": [], "start_line": 66}, "": {"executed_lines": [9, 11, 12, 14, 15, 16, 17, 18, 19, 21, 22, 23, 35, 38, 66, 69, 70, 71, 78, 84, 85, 86, 90, 161], "summary": {"covered_lines": 24, "num_statements": 29, "percent_covered": 82.75862068965517, "percent_covered_display": "83", "missing_lines": 5, "excluded_lines": 0, "percent_statements_covered": 82.75862068965517, "percent_statements_covered_display": "83"}, "missing_lines": [46, 47, 53, 162, 163], "excluded_lines": [], "start_line": 1}}}, "src/lyscripts/plots.py": {"executed_lines": [3, 5, 6, 7, 8, 9, 10, 12, 13, 14, 15, 16, 17, 19, 30, 37, 38, 41, 50, 59, 61, 64, 66, 69, 74, 75, 76, 77, 78, 81, 84, 87, 88, 89, 91, 92, 96, 97, 101, 102, 106, 109, 110, 112, 115, 118, 120, 121, 123, 125, 126, 135, 136, 137, 138, 139, 140, 142, 144, 146, 148, 150, 152, 154, 155, 157, 158, 160, 163, 166, 167, 169, 170, 179, 180, 181, 182, 183, 184, 190, 198, 199, 201, 202, 204, 206, 208, 216, 218, 225, 227, 234, 239, 240, 241, 243, 244, 246, 247, 249, 252, 268, 269, 270, 271, 273, 274, 275, 276, 279, 281, 282, 288, 289, 290, 291, 294, 303, 306, 307, 310, 311, 314, 335, 338, 340, 343, 345, 354, 355, 357, 358, 360, 363, 395, 396, 397, 402, 403, 404, 410, 411], "summary": {"covered_lines": 142, "num_statements": 160, "percent_covered": 88.75, "percent_covered_display": "89", "missing_lines": 18, "excluded_lines": 9, "percent_statements_covered": 88.75, "percent_statements_covered_display": "89"}, "missing_lines": [46, 47, 56, 185, 186, 336, 341, 370, 375, 377, 378, 380, 381, 382, 383, 385, 392, 399], "excluded_lines": [25, 26, 27, 94, 95, 99, 100, 104, 105], "functions": {"floor_at_decimal": {"executed_lines": [], "summary": {"covered_lines": 0, "num_statements": 2, "percent_covered": 0.0, "percent_covered_display": "0", "missing_lines": 2, "excluded_lines": 0, "percent_statements_covered": 0.0, "percent_statements_covered_display": "0"}, "missing_lines": [46, 47], "excluded_lines": [], "start_line": 41}, "ceil_at_decimal": {"executed_lines": [], "summary": {"covered_lines": 0, "num_statements": 1, "percent_covered": 0.0, "percent_covered_display": "0", "missing_lines": 1, "excluded_lines": 0, "percent_statements_covered": 0.0, "percent_statements_covered_display": "0"}, "missing_lines": [56], "excluded_lines": [], "start_line": 50}, "floor_to_step": {"executed_lines": [61], "summary": {"covered_lines": 1, "num_statements": 1, "percent_covered": 100.0, "percent_covered_display": "100", "missing_lines": 0, "excluded_lines": 0, "percent_statements_covered": 100.0, "percent_statements_covered_display": "100"}, "missing_lines": [], "excluded_lines": [], "start_line": 59}, "ceil_to_step": {"executed_lines": [66], "summary": {"covered_lines": 1, "num_statements": 1, "percent_covered": 100.0, "percent_covered_display": "100", "missing_lines": 0, "excluded_lines": 0, "percent_statements_covered": 100.0, "percent_statements_covered_display": "100"}, "missing_lines": [], "excluded_lines": [], "start_line": 64}, "clean_and_check": {"executed_lines": [74, 75, 76, 77, 78], "summary": {"covered_lines": 5, "num_statements": 5, "percent_covered": 100.0, "percent_covered_display": "100", "missing_lines": 0, "excluded_lines": 0, "percent_statements_covered": 100.0, "percent_statements_covered_display": "100"}, "missing_lines": [], "excluded_lines": [], "start_line": 69}, "AbstractDistribution.draw": {"executed_lines": [], "summary": {"covered_lines": 0, "num_statements": 0, "percent_covered": 100.0, "percent_covered_display": "100", "missing_lines": 0, "excluded_lines": 1, "percent_statements_covered": 100.0, "percent_statements_covered_display": "100"}, "missing_lines": [], "excluded_lines": [94], "start_line": 92}, "AbstractDistribution.left_percentile": {"executed_lines": [], "summary": {"covered_lines": 0, "num_statements": 0, "percent_covered": 100.0, "percent_covered_display": "100", "missing_lines": 0, "excluded_lines": 1, "percent_statements_covered": 100.0, "percent_statements_covered_display": "100"}, "missing_lines": [], "excluded_lines": [99], "start_line": 97}, "AbstractDistribution.right_percentile": {"executed_lines": [], "summary": {"covered_lines": 0, "num_statements": 0, "percent_covered": 100.0, "percent_covered_display": "100", "missing_lines": 0, "excluded_lines": 1, "percent_statements_covered": 100.0, "percent_statements_covered_display": "100"}, "missing_lines": [], "excluded_lines": [104], "start_line": 102}, "AbstractDistribution._get_label": {"executed_lines": [], "summary": {"covered_lines": 0, "num_statements": 0, "percent_covered": 100.0, "percent_covered_display": "100", "missing_lines": 0, "excluded_lines": 0, "percent_statements_covered": 100.0, "percent_statements_covered_display": "100"}, "missing_lines": [], "excluded_lines": [], "start_line": 106}, "AbstractDistribution.label": {"executed_lines": [112], "summary": {"covered_lines": 1, "num_statements": 1, "percent_covered": 100.0, "percent_covered_display": "100", "missing_lines": 0, "excluded_lines": 0, "percent_statements_covered": 100.0, "percent_statements_covered_display": "100"}, "missing_lines": [], "excluded_lines": [], "start_line": 110}, "Histogram.values": {"executed_lines": [123], "summary": {"covered_lines": 1, "num_statements": 1, "percent_covered": 100.0, "percent_covered_display": "100", "missing_lines": 0, "excluded_lines": 0, "percent_statements_covered": 100.0, "percent_statements_covered_display": "100"}, "missing_lines": [], "excluded_lines": [], "start_line": 121}, "Histogram.from_hdf5": {"executed_lines": [135, 136, 137, 138, 139, 140], "summary": {"covered_lines": 6, "num_statements": 6, "percent_covered": 100.0, "percent_covered_display": "100", "missing_lines": 0, "excluded_lines": 0, "percent_statements_covered": 100.0, "percent_statements_covered_display": "100"}, "missing_lines": [], "excluded_lines": [], "start_line": 126}, "Histogram.left_percentile": {"executed_lines": [144], "summary": {"covered_lines": 1, "num_statements": 1, "percent_covered": 100.0, "percent_covered_display": "100", "missing_lines": 0, "excluded_lines": 0, "percent_statements_covered": 100.0, "percent_statements_covered_display": "100"}, "missing_lines": [], "excluded_lines": [], "start_line": 142}, "Histogram.right_percentile": {"executed_lines": [148], "summary": {"covered_lines": 1, "num_statements": 1, "percent_covered": 100.0, "percent_covered_display": "100", "missing_lines": 0, "excluded_lines": 0, "percent_statements_covered": 100.0, "percent_statements_covered_display": "100"}, "missing_lines": [], "excluded_lines": [], "start_line": 146}, "Histogram.draw": {"executed_lines": [152, 154, 155, 157, 158, 160], "summary": {"covered_lines": 6, "num_statements": 6, "percent_covered": 100.0, "percent_covered_display": "100", "missing_lines": 0, "excluded_lines": 0, "percent_statements_covered": 100.0, "percent_statements_covered_display": "100"}, "missing_lines": [], "excluded_lines": [], "start_line": 150}, "BetaPosterior.from_hdf5": {"executed_lines": [179, 180, 181, 182, 183, 184, 190], "summary": {"covered_lines": 7, "num_statements": 9, "percent_covered": 77.77777777777777, "percent_covered_display": "78", "missing_lines": 2, "excluded_lines": 0, "percent_statements_covered": 77.77777777777777, "percent_statements_covered_display": "78"}, "missing_lines": [185, 186], "excluded_lines": [], "start_line": 170}, "BetaPosterior._get_label": {"executed_lines": [199], "summary": {"covered_lines": 1, "num_statements": 1, "percent_covered": 100.0, "percent_covered_display": "100", "missing_lines": 0, "excluded_lines": 0, "percent_statements_covered": 100.0, "percent_statements_covered_display": "100"}, "missing_lines": [], "excluded_lines": [], "start_line": 198}, "BetaPosterior.num_fail": {"executed_lines": [204], "summary": {"covered_lines": 1, "num_statements": 1, "percent_covered": 100.0, "percent_covered_display": "100", "missing_lines": 0, "excluded_lines": 0, "percent_statements_covered": 100.0, "percent_statements_covered_display": "100"}, "missing_lines": [], "excluded_lines": [], "start_line": 202}, "BetaPosterior.pdf": {"executed_lines": [208], "summary": {"covered_lines": 1, "num_statements": 1, "percent_covered": 100.0, "percent_covered_display": "100", "missing_lines": 0, "excluded_lines": 0, "percent_statements_covered": 100.0, "percent_statements_covered_display": "100"}, "missing_lines": [], "excluded_lines": [], "start_line": 206}, "BetaPosterior.left_percentile": {"executed_lines": [218], "summary": {"covered_lines": 1, "num_statements": 1, "percent_covered": 100.0, "percent_covered_display": "100", "missing_lines": 0, "excluded_lines": 0, "percent_statements_covered": 100.0, "percent_statements_covered_display": "100"}, "missing_lines": [], "excluded_lines": [], "start_line": 216}, "BetaPosterior.right_percentile": {"executed_lines": [227], "summary": {"covered_lines": 1, "num_statements": 1, "percent_covered": 100.0, "percent_covered_display": "100", "missing_lines": 0, "excluded_lines": 0, "percent_statements_covered": 100.0, "percent_statements_covered_display": "100"}, "missing_lines": [], "excluded_lines": [], "start_line": 225}, "BetaPosterior.draw": {"executed_lines": [239, 240, 241, 243, 244, 246, 247, 249], "summary": {"covered_lines": 8, "num_statements": 8, "percent_covered": 100.0, "percent_covered_display": "100", "missing_lines": 0, "excluded_lines": 0, "percent_statements_covered": 100.0, "percent_statements_covered_display": "100"}, "missing_lines": [], "excluded_lines": [], "start_line": 234}, "get_size": {"executed_lines": [268, 269, 270, 271, 273, 274, 275, 276], "summary": {"covered_lines": 8, "num_statements": 8, "percent_covered": 100.0, "percent_covered_display": "100", "missing_lines": 0, "excluded_lines": 0, "percent_statements_covered": 100.0, "percent_statements_covered_display": "100"}, "missing_lines": [], "excluded_lines": [], "start_line": 252}, "get_label": {"executed_lines": [281, 282, 288, 289, 290, 291], "summary": {"covered_lines": 6, "num_statements": 6, "percent_covered": 100.0, "percent_covered_display": "100", "missing_lines": 0, "excluded_lines": 0, "percent_statements_covered": 100.0, "percent_statements_covered_display": "100"}, "missing_lines": [], "excluded_lines": [], "start_line": 279}, "get_xlims": {"executed_lines": [303, 306, 307, 310, 311], "summary": {"covered_lines": 5, "num_statements": 5, "percent_covered": 100.0, "percent_covered_display": "100", "missing_lines": 0, "excluded_lines": 0, "percent_statements_covered": 100.0, "percent_statements_covered_display": "100"}, "missing_lines": [], "excluded_lines": [], "start_line": 294}, "draw": {"executed_lines": [335, 338, 340, 343, 345, 354, 355, 357, 358, 360], "summary": {"covered_lines": 10, "num_statements": 12, "percent_covered": 83.33333333333333, "percent_covered_display": "83", "missing_lines": 2, "excluded_lines": 0, "percent_statements_covered": 83.33333333333333, "percent_statements_covered_display": "83"}, "missing_lines": [336, 341], "excluded_lines": [], "start_line": 314}, "split_legends": {"executed_lines": [], "summary": {"covered_lines": 0, "num_statements": 10, "percent_covered": 0.0, "percent_covered_display": "0", "missing_lines": 10, "excluded_lines": 0, "percent_statements_covered": 0.0, "percent_statements_covered_display": "0"}, "missing_lines": [370, 375, 377, 378, 380, 381, 382, 383, 385, 392], "excluded_lines": [], "start_line": 363}, "use_mpl_stylesheet": {"executed_lines": [], "summary": {"covered_lines": 0, "num_statements": 1, "percent_covered": 0.0, "percent_covered_display": "0", "missing_lines": 1, "excluded_lines": 0, "percent_statements_covered": 0.0, "percent_statements_covered_display": "0"}, "missing_lines": [399], "excluded_lines": [], "start_line": 397}, "save_figure": {"executed_lines": [410, 411], "summary": {"covered_lines": 2, "num_statements": 2, "percent_covered": 100.0, "percent_covered_display": "100", "missing_lines": 0, "excluded_lines": 0, "percent_statements_covered": 100.0, "percent_statements_covered_display": "100"}, "missing_lines": [], "excluded_lines": [], "start_line": 404}, "": {"executed_lines": [3, 5, 6, 7, 8, 9, 10, 12, 13, 14, 15, 16, 17, 19, 30, 37, 38, 41, 50, 59, 64, 69, 81, 84, 87, 88, 89, 91, 92, 96, 97, 101, 102, 106, 109, 110, 115, 118, 120, 121, 125, 126, 142, 146, 150, 163, 166, 167, 169, 170, 198, 201, 202, 206, 216, 225, 234, 252, 279, 294, 314, 363, 395, 396, 397, 402, 403, 404], "summary": {"covered_lines": 68, "num_statements": 68, "percent_covered": 100.0, "percent_covered_display": "100", "missing_lines": 0, "excluded_lines": 6, "percent_statements_covered": 100.0, "percent_statements_covered_display": "100"}, "missing_lines": [], "excluded_lines": [25, 26, 27, 95, 100, 105], "start_line": 1}}, "classes": {"AbstractDistribution": {"executed_lines": [112], "summary": {"covered_lines": 1, "num_statements": 1, "percent_covered": 100.0, "percent_covered_display": "100", "missing_lines": 0, "excluded_lines": 3, "percent_statements_covered": 100.0, "percent_statements_covered_display": "100"}, "missing_lines": [], "excluded_lines": [94, 99, 104], "start_line": 84}, "Histogram": {"executed_lines": [123, 135, 136, 137, 138, 139, 140, 144, 148, 152, 154, 155, 157, 158, 160], "summary": {"covered_lines": 15, "num_statements": 15, "percent_covered": 100.0, "percent_covered_display": "100", "missing_lines": 0, "excluded_lines": 0, "percent_statements_covered": 100.0, "percent_statements_covered_display": "100"}, "missing_lines": [], "excluded_lines": [], "start_line": 115}, "BetaPosterior": {"executed_lines": [179, 180, 181, 182, 183, 184, 190, 199, 204, 208, 218, 227, 239, 240, 241, 243, 244, 246, 247, 249], "summary": {"covered_lines": 20, "num_statements": 22, "percent_covered": 90.9090909090909, "percent_covered_display": "91", "missing_lines": 2, "excluded_lines": 0, "percent_statements_covered": 90.9090909090909, "percent_statements_covered_display": "91"}, "missing_lines": [185, 186], "excluded_lines": [], "start_line": 163}, "": {"executed_lines": [3, 5, 6, 7, 8, 9, 10, 12, 13, 14, 15, 16, 17, 19, 30, 37, 38, 41, 50, 59, 61, 64, 66, 69, 74, 75, 76, 77, 78, 81, 84, 87, 88, 89, 91, 92, 96, 97, 101, 102, 106, 109, 110, 115, 118, 120, 121, 125, 126, 142, 146, 150, 163, 166, 167, 169, 170, 198, 201, 202, 206, 216, 225, 234, 252, 268, 269, 270, 271, 273, 274, 275, 276, 279, 281, 282, 288, 289, 290, 291, 294, 303, 306, 307, 310, 311, 314, 335, 338, 340, 343, 345, 354, 355, 357, 358, 360, 363, 395, 396, 397, 402, 403, 404, 410, 411], "summary": {"covered_lines": 106, "num_statements": 122, "percent_covered": 86.88524590163935, "percent_covered_display": "87", "missing_lines": 16, "excluded_lines": 6, "percent_statements_covered": 86.88524590163935, "percent_statements_covered_display": "87"}, "missing_lines": [46, 47, 56, 336, 341, 370, 375, 377, 378, 380, 381, 382, 383, 385, 392, 399], "excluded_lines": [25, 26, 27, 95, 100, 105], "start_line": 1}}}, "src/lyscripts/sample.py": {"executed_lines": [19, 21, 22, 23, 25, 27, 29, 30, 31, 32, 34, 38, 40, 41, 42, 43, 44, 45, 46, 47, 49, 60, 63, 66, 68, 69, 71, 73, 75, 78, 81, 83, 84, 85, 86, 89, 92, 93, 95, 97, 98, 100, 101, 103, 106, 109, 110, 112, 114, 115, 117, 118, 120, 123, 126, 138, 143, 144, 145, 149, 150, 151, 153, 156, 165, 166, 178, 187, 188, 190, 193, 196, 212, 218, 220, 228, 260, 261, 263, 264, 265, 267, 268, 270, 271, 272, 273, 278, 280, 281, 283, 294, 300, 301, 303, 307, 310, 314, 319, 325, 328, 330, 331, 337, 350, 353, 354, 355, 362, 368, 369, 371, 388, 390, 394, 395, 396, 397, 398, 401, 403, 404, 405, 414, 424], "summary": {"covered_lines": 125, "num_statements": 137, "percent_covered": 91.24087591240875, "percent_covered_display": "91", "missing_lines": 12, "excluded_lines": 5, "percent_statements_covered": 91.24087591240875, "percent_statements_covered_display": "91"}, "missing_lines": [35, 36, 74, 132, 133, 134, 135, 175, 191, 304, 425, 426], "excluded_lines": [312, 313, 316, 317, 318], "functions": {"CompletedItersColumn.__init__": {"executed_lines": [68, 69], "summary": {"covered_lines": 2, "num_statements": 2, "percent_covered": 100.0, "percent_covered_display": "100", "missing_lines": 0, "excluded_lines": 0, "percent_statements_covered": 100.0, "percent_statements_covered_display": "100"}, "missing_lines": [], "excluded_lines": [], "start_line": 66}, "CompletedItersColumn.render": {"executed_lines": [73, 75], "summary": {"covered_lines": 2, "num_statements": 3, "percent_covered": 66.66666666666667, "percent_covered_display": "67", "missing_lines": 1, "excluded_lines": 0, "percent_statements_covered": 66.66666666666667, "percent_statements_covered_display": "67"}, "missing_lines": [74], "excluded_lines": [], "start_line": 71}, "ItersPerSecondColumn.render": {"executed_lines": [83, 84, 85, 86], "summary": {"covered_lines": 4, "num_statements": 4, "percent_covered": 100.0, "percent_covered_display": "100", "missing_lines": 0, "excluded_lines": 0, "percent_statements_covered": 100.0, "percent_statements_covered_display": "100"}, "missing_lines": [], "excluded_lines": [], "start_line": 81}, "AcorTime.update": {"executed_lines": [97, 98], "summary": {"covered_lines": 2, "num_statements": 2, "percent_covered": 100.0, "percent_covered_display": "100", "missing_lines": 0, "excluded_lines": 0, "percent_statements_covered": 100.0, "percent_statements_covered_display": "100"}, "missing_lines": [], "excluded_lines": [], "start_line": 95}, "AcorTime.relative_diff": {"executed_lines": [103], "summary": {"covered_lines": 1, "num_statements": 1, "percent_covered": 100.0, "percent_covered_display": "100", "missing_lines": 0, "excluded_lines": 0, "percent_statements_covered": 100.0, "percent_statements_covered_display": "100"}, "missing_lines": [], "excluded_lines": [], "start_line": 101}, "NumAccepted.update": {"executed_lines": [114, 115], "summary": {"covered_lines": 2, "num_statements": 2, "percent_covered": 100.0, "percent_covered_display": "100", "missing_lines": 0, "excluded_lines": 0, "percent_statements_covered": 100.0, "percent_statements_covered_display": "100"}, "missing_lines": [], "excluded_lines": [], "start_line": 112}, "NumAccepted.newly_accepted": {"executed_lines": [120], "summary": {"covered_lines": 1, "num_statements": 1, "percent_covered": 100.0, "percent_covered_display": "100", "missing_lines": 0, "excluded_lines": 0, "percent_statements_covered": 100.0, "percent_statements_covered_display": "100"}, "missing_lines": [], "excluded_lines": [], "start_line": 118}, "log_prob_fn": {"executed_lines": [], "summary": {"covered_lines": 0, "num_statements": 4, "percent_covered": 0.0, "percent_covered_display": "0", "missing_lines": 4, "excluded_lines": 0, "percent_statements_covered": 0.0, "percent_statements_covered_display": "0"}, "missing_lines": [132, 133, 134, 135], "excluded_lines": [], "start_line": 126}, "ensure_initial_state": {"executed_lines": [143, 144, 145, 149, 150, 151, 153], "summary": {"covered_lines": 7, "num_statements": 7, "percent_covered": 100.0, "percent_covered_display": "100", "missing_lines": 0, "excluded_lines": 0, "percent_statements_covered": 100.0, "percent_statements_covered_display": "100"}, "missing_lines": [], "excluded_lines": [], "start_line": 138}, "ensure_history_table": {"executed_lines": [165, 166], "summary": {"covered_lines": 2, "num_statements": 3, "percent_covered": 66.66666666666667, "percent_covered_display": "67", "missing_lines": 1, "excluded_lines": 0, "percent_statements_covered": 66.66666666666667, "percent_statements_covered_display": "67"}, "missing_lines": [175], "excluded_lines": [], "start_line": 156}, "update_history_table": {"executed_lines": [187, 188, 190, 193], "summary": {"covered_lines": 4, "num_statements": 5, "percent_covered": 80.0, "percent_covered_display": "80", "missing_lines": 1, "excluded_lines": 0, "percent_statements_covered": 80.0, "percent_statements_covered_display": "80"}, "missing_lines": [191], "excluded_lines": [], "start_line": 178}, "is_converged": {"executed_lines": [212], "summary": {"covered_lines": 1, "num_statements": 1, "percent_covered": 100.0, "percent_covered_display": "100", "missing_lines": 0, "excluded_lines": 0, "percent_statements_covered": 100.0, "percent_statements_covered_display": "100"}, "missing_lines": [], "excluded_lines": [], "start_line": 196}, "_get_columns": {"executed_lines": [220], "summary": {"covered_lines": 1, "num_statements": 1, "percent_covered": 100.0, "percent_covered_display": "100", "missing_lines": 0, "excluded_lines": 0, "percent_statements_covered": 100.0, "percent_statements_covered_display": "100"}, "missing_lines": [], "excluded_lines": [], "start_line": 218}, "run_sampling": {"executed_lines": [260, 261, 263, 264, 265, 267, 268, 270, 271, 272, 273, 278, 280, 281, 283, 294, 300, 301, 303], "summary": {"covered_lines": 19, "num_statements": 20, "percent_covered": 95.0, "percent_covered_display": "95", "missing_lines": 1, "excluded_lines": 0, "percent_statements_covered": 95.0, "percent_statements_covered_display": "95"}, "missing_lines": [304], "excluded_lines": [], "start_line": 228}, "DummyPool.__enter__": {"executed_lines": [], "summary": {"covered_lines": 0, "num_statements": 0, "percent_covered": 100.0, "percent_covered_display": "100", "missing_lines": 0, "excluded_lines": 1, "percent_statements_covered": 100.0, "percent_statements_covered_display": "100"}, "missing_lines": [], "excluded_lines": [312], "start_line": 310}, "DummyPool.__exit__": {"executed_lines": [], "summary": {"covered_lines": 0, "num_statements": 0, "percent_covered": 100.0, "percent_covered_display": "100", "missing_lines": 0, "excluded_lines": 1, "percent_statements_covered": 100.0, "percent_statements_covered_display": "100"}, "missing_lines": [], "excluded_lines": [316], "start_line": 314}, "get_pool": {"executed_lines": [325], "summary": {"covered_lines": 1, "num_statements": 1, "percent_covered": 100.0, "percent_covered_display": "100", "missing_lines": 0, "excluded_lines": 0, "percent_statements_covered": 100.0, "percent_statements_covered_display": "100"}, "missing_lines": [], "excluded_lines": [], "start_line": 319}, "init_sampler": {"executed_lines": [330, 331, 337], "summary": {"covered_lines": 3, "num_statements": 3, "percent_covered": 100.0, "percent_covered_display": "100", "missing_lines": 0, "excluded_lines": 0, "percent_statements_covered": 100.0, "percent_statements_covered_display": "100"}, "missing_lines": [], "excluded_lines": [], "start_line": 328}, "SampleCLI.cli_cmd": {"executed_lines": [388, 390, 394, 395, 396, 397, 398, 401, 403, 404, 405, 414], "summary": {"covered_lines": 12, "num_statements": 12, "percent_covered": 100.0, "percent_covered_display": "100", "missing_lines": 0, "excluded_lines": 0, "percent_statements_covered": 100.0, "percent_statements_covered_display": "100"}, "missing_lines": [], "excluded_lines": [], "start_line": 371}, "": {"executed_lines": [19, 21, 22, 23, 25, 27, 29, 30, 31, 32, 34, 38, 40, 41, 42, 43, 44, 45, 46, 47, 49, 60, 63, 66, 71, 78, 81, 89, 92, 93, 95, 100, 101, 106, 109, 110, 112, 117, 118, 123, 126, 138, 156, 178, 196, 218, 228, 307, 310, 314, 319, 328, 350, 353, 354, 355, 362, 368, 369, 371, 424], "summary": {"covered_lines": 61, "num_statements": 65, "percent_covered": 93.84615384615384, "percent_covered_display": "94", "missing_lines": 4, "excluded_lines": 3, "percent_statements_covered": 93.84615384615384, "percent_statements_covered_display": "94"}, "missing_lines": [35, 36, 425, 426], "excluded_lines": [313, 317, 318], "start_line": 1}}, "classes": {"CompletedItersColumn": {"executed_lines": [68, 69, 73, 75], "summary": {"covered_lines": 4, "num_statements": 5, "percent_covered": 80.0, "percent_covered_display": "80", "missing_lines": 1, "excluded_lines": 0, "percent_statements_covered": 80.0, "percent_statements_covered_display": "80"}, "missing_lines": [74], "excluded_lines": [], "start_line": 63}, "ItersPerSecondColumn": {"executed_lines": [83, 84, 85, 86], "summary": {"covered_lines": 4, "num_statements": 4, "percent_covered": 100.0, "percent_covered_display": "100", "missing_lines": 0, "excluded_lines": 0, "percent_statements_covered": 100.0, "percent_statements_covered_display": "100"}, "missing_lines": [], "excluded_lines": [], "start_line": 78}, "AcorTime": {"executed_lines": [97, 98, 103], "summary": {"covered_lines": 3, "num_statements": 3, "percent_covered": 100.0, "percent_covered_display": "100", "missing_lines": 0, "excluded_lines": 0, "percent_statements_covered": 100.0, "percent_statements_covered_display": "100"}, "missing_lines": [], "excluded_lines": [], "start_line": 89}, "NumAccepted": {"executed_lines": [114, 115, 120], "summary": {"covered_lines": 3, "num_statements": 3, "percent_covered": 100.0, "percent_covered_display": "100", "missing_lines": 0, "excluded_lines": 0, "percent_statements_covered": 100.0, "percent_statements_covered_display": "100"}, "missing_lines": [], "excluded_lines": [], "start_line": 106}, "DummyPool": {"executed_lines": [], "summary": {"covered_lines": 0, "num_statements": 0, "percent_covered": 100.0, "percent_covered_display": "100", "missing_lines": 0, "excluded_lines": 2, "percent_statements_covered": 100.0, "percent_statements_covered_display": "100"}, "missing_lines": [], "excluded_lines": [312, 316], "start_line": 307}, "SampleCLI": {"executed_lines": [388, 390, 394, 395, 396, 397, 398, 401, 403, 404, 405, 414], "summary": {"covered_lines": 12, "num_statements": 12, "percent_covered": 100.0, "percent_covered_display": "100", "missing_lines": 0, "excluded_lines": 0, "percent_statements_covered": 100.0, "percent_statements_covered_display": "100"}, "missing_lines": [], "excluded_lines": [], "start_line": 350}, "": {"executed_lines": [19, 21, 22, 23, 25, 27, 29, 30, 31, 32, 34, 38, 40, 41, 42, 43, 44, 45, 46, 47, 49, 60, 63, 66, 71, 78, 81, 89, 92, 93, 95, 100, 101, 106, 109, 110, 112, 117, 118, 123, 126, 138, 143, 144, 145, 149, 150, 151, 153, 156, 165, 166, 178, 187, 188, 190, 193, 196, 212, 218, 220, 228, 260, 261, 263, 264, 265, 267, 268, 270, 271, 272, 273, 278, 280, 281, 283, 294, 300, 301, 303, 307, 310, 314, 319, 325, 328, 330, 331, 337, 350, 353, 354, 355, 362, 368, 369, 371, 424], "summary": {"covered_lines": 99, "num_statements": 110, "percent_covered": 90.0, "percent_covered_display": "90", "missing_lines": 11, "excluded_lines": 3, "percent_statements_covered": 90.0, "percent_statements_covered_display": "90"}, "missing_lines": [35, 36, 132, 133, 134, 135, 175, 191, 304, 425, 426], "excluded_lines": [313, 317, 318], "start_line": 1}}}, "src/lyscripts/schedule.py": {"executed_lines": [13, 15, 16, 19, 22, 31], "summary": {"covered_lines": 6, "num_statements": 11, "percent_covered": 54.54545454545455, "percent_covered_display": "55", "missing_lines": 5, "excluded_lines": 0, "percent_statements_covered": 54.54545454545455, "percent_statements_covered_display": "55"}, "missing_lines": [24, 26, 28, 32, 33], "excluded_lines": [], "functions": {"ScheduleCLI.cli_cmd": {"executed_lines": [], "summary": {"covered_lines": 0, "num_statements": 3, "percent_covered": 0.0, "percent_covered_display": "0", "missing_lines": 3, "excluded_lines": 0, "percent_statements_covered": 0.0, "percent_statements_covered_display": "0"}, "missing_lines": [24, 26, 28], "excluded_lines": [], "start_line": 22}, "": {"executed_lines": [13, 15, 16, 19, 22, 31], "summary": {"covered_lines": 6, "num_statements": 8, "percent_covered": 75.0, "percent_covered_display": "75", "missing_lines": 2, "excluded_lines": 0, "percent_statements_covered": 75.0, "percent_statements_covered_display": "75"}, "missing_lines": [32, 33], "excluded_lines": [], "start_line": 1}}, "classes": {"ScheduleCLI": {"executed_lines": [], "summary": {"covered_lines": 0, "num_statements": 3, "percent_covered": 0.0, "percent_covered_display": "0", "missing_lines": 3, "excluded_lines": 0, "percent_statements_covered": 0.0, "percent_statements_covered_display": "0"}, "missing_lines": [24, 26, 28], "excluded_lines": [], "start_line": 19}, "": {"executed_lines": [13, 15, 16, 19, 22, 31], "summary": {"covered_lines": 6, "num_statements": 8, "percent_covered": 75.0, "percent_covered_display": "75", "missing_lines": 2, "excluded_lines": 0, "percent_statements_covered": 75.0, "percent_statements_covered_display": "75"}, "missing_lines": [32, 33], "excluded_lines": [], "start_line": 1}}}, "src/lyscripts/schema.py": {"executed_lines": [27, 29, 30, 32, 35, 38, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 59, 65], "summary": {"covered_lines": 19, "num_statements": 22, "percent_covered": 86.36363636363636, "percent_covered_display": "86", "missing_lines": 3, "excluded_lines": 0, "percent_statements_covered": 86.36363636363636, "percent_statements_covered_display": "86"}, "missing_lines": [61, 62, 66], "excluded_lines": [], "functions": {"main": {"executed_lines": [], "summary": {"covered_lines": 0, "num_statements": 2, "percent_covered": 0.0, "percent_covered_display": "0", "missing_lines": 2, "excluded_lines": 0, "percent_statements_covered": 0.0, "percent_statements_covered_display": "0"}, "missing_lines": [61, 62], "excluded_lines": [], "start_line": 59}, "": {"executed_lines": [27, 29, 30, 32, 35, 38, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 59, 65], "summary": {"covered_lines": 19, "num_statements": 20, "percent_covered": 95.0, "percent_covered_display": "95", "missing_lines": 1, "excluded_lines": 0, "percent_statements_covered": 95.0, "percent_statements_covered_display": "95"}, "missing_lines": [66], "excluded_lines": [], "start_line": 1}}, "classes": {"SchemaSettings": {"executed_lines": [], "summary": {"covered_lines": 0, "num_statements": 0, "percent_covered": 100.0, "percent_covered_display": "100", "missing_lines": 0, "excluded_lines": 0, "percent_statements_covered": 100.0, "percent_statements_covered_display": "100"}, "missing_lines": [], "excluded_lines": [], "start_line": 35}, "": {"executed_lines": [27, 29, 30, 32, 35, 38, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 59, 65], "summary": {"covered_lines": 19, "num_statements": 22, "percent_covered": 86.36363636363636, "percent_covered_display": "86", "missing_lines": 3, "excluded_lines": 0, "percent_statements_covered": 86.36363636363636, "percent_statements_covered_display": "86"}, "missing_lines": [61, 62, 66], "excluded_lines": [], "start_line": 1}}}, "src/lyscripts/utils.py": {"executed_lines": [3, 5, 6, 7, 8, 9, 10, 11, 13, 18, 21, 23, 24, 26, 27, 30, 33, 42, 43, 45, 46, 47, 48, 50, 53, 63, 65, 66, 67, 68, 70, 72, 75, 91, 93, 94, 95, 97, 98, 100, 102, 105, 115, 117, 118, 119, 120, 122, 124, 127, 137, 138, 139, 140, 143, 146, 151, 152, 154, 155, 156, 159, 160, 162, 163, 164, 165, 168, 169, 177, 178, 179, 180, 183, 184, 192, 193, 195, 199], "summary": {"covered_lines": 79, "num_statements": 84, "percent_covered": 94.04761904761905, "percent_covered_display": "94", "missing_lines": 5, "excluded_lines": 0, "percent_statements_covered": 94.04761904761905, "percent_statements_covered_display": "94"}, "missing_lines": [25, 141, 142, 196, 197], "excluded_lines": [], "functions": {"binom_pmf": {"executed_lines": [23, 24, 26, 27, 30], "summary": {"covered_lines": 5, "num_statements": 6, "percent_covered": 83.33333333333333, "percent_covered_display": "83", "missing_lines": 1, "excluded_lines": 0, "percent_statements_covered": 83.33333333333333, "percent_statements_covered_display": "83"}, "missing_lines": [25], "excluded_lines": [], "start_line": 21}, "get_dict_depth": {"executed_lines": [42, 43, 45, 46, 47, 48, 50], "summary": {"covered_lines": 7, "num_statements": 7, "percent_covered": 100.0, "percent_covered_display": "100", "missing_lines": 0, "excluded_lines": 0, "percent_statements_covered": 100.0, "percent_statements_covered_display": "100"}, "missing_lines": [], "excluded_lines": [], "start_line": 33}, "delete_private_keys": {"executed_lines": [63, 65, 66, 67, 68, 70, 72], "summary": {"covered_lines": 7, "num_statements": 7, "percent_covered": 100.0, "percent_covered_display": "100", "missing_lines": 0, "excluded_lines": 0, "percent_statements_covered": 100.0, "percent_statements_covered_display": "100"}, "missing_lines": [], "excluded_lines": [], "start_line": 53}, "flatten": {"executed_lines": [91, 93, 94, 95, 97, 98, 100, 102], "summary": {"covered_lines": 8, "num_statements": 8, "percent_covered": 100.0, "percent_covered_display": "100", "missing_lines": 0, "excluded_lines": 0, "percent_statements_covered": 100.0, "percent_statements_covered_display": "100"}, "missing_lines": [], "excluded_lines": [], "start_line": 75}, "unflatten": {"executed_lines": [115, 117, 118, 119, 120, 122, 124], "summary": {"covered_lines": 7, "num_statements": 7, "percent_covered": 100.0, "percent_covered_display": "100", "missing_lines": 0, "excluded_lines": 0, "percent_statements_covered": 100.0, "percent_statements_covered_display": "100"}, "missing_lines": [], "excluded_lines": [], "start_line": 105}, "get_modalities_subset": {"executed_lines": [137, 138, 139, 140, 143], "summary": {"covered_lines": 5, "num_statements": 7, "percent_covered": 71.42857142857143, "percent_covered_display": "71", "missing_lines": 2, "excluded_lines": 0, "percent_statements_covered": 71.42857142857143, "percent_statements_covered_display": "71"}, "missing_lines": [141, 142], "excluded_lines": [], "start_line": 127}, "load_patient_data": {"executed_lines": [151, 152, 154, 155, 156], "summary": {"covered_lines": 5, "num_statements": 5, "percent_covered": 100.0, "percent_covered_display": "100", "missing_lines": 0, "excluded_lines": 0, "percent_statements_covered": 100.0, "percent_statements_covered_display": "100"}, "missing_lines": [], "excluded_lines": [], "start_line": 146}, "load_yaml_params": {"executed_lines": [162, 163, 164, 165], "summary": {"covered_lines": 4, "num_statements": 4, "percent_covered": 100.0, "percent_covered_display": "100", "missing_lines": 0, "excluded_lines": 0, "percent_statements_covered": 100.0, "percent_statements_covered_display": "100"}, "missing_lines": [], "excluded_lines": [], "start_line": 160}, "load_model_samples": {"executed_lines": [177, 178, 179, 180], "summary": {"covered_lines": 4, "num_statements": 4, "percent_covered": 100.0, "percent_covered_display": "100", "missing_lines": 0, "excluded_lines": 0, "percent_statements_covered": 100.0, "percent_statements_covered_display": "100"}, "missing_lines": [], "excluded_lines": [], "start_line": 169}, "get_hdf5_backend": {"executed_lines": [192, 193, 195, 199], "summary": {"covered_lines": 4, "num_statements": 6, "percent_covered": 66.66666666666667, "percent_covered_display": "67", "missing_lines": 2, "excluded_lines": 0, "percent_statements_covered": 66.66666666666667, "percent_statements_covered_display": "67"}, "missing_lines": [196, 197], "excluded_lines": [], "start_line": 184}, "": {"executed_lines": [3, 5, 6, 7, 8, 9, 10, 11, 13, 18, 21, 33, 53, 75, 105, 127, 146, 159, 160, 168, 169, 183, 184], "summary": {"covered_lines": 23, "num_statements": 23, "percent_covered": 100.0, "percent_covered_display": "100", "missing_lines": 0, "excluded_lines": 0, "percent_statements_covered": 100.0, "percent_statements_covered_display": "100"}, "missing_lines": [], "excluded_lines": [], "start_line": 1}}, "classes": {"": {"executed_lines": [3, 5, 6, 7, 8, 9, 10, 11, 13, 18, 21, 23, 24, 26, 27, 30, 33, 42, 43, 45, 46, 47, 48, 50, 53, 63, 65, 66, 67, 68, 70, 72, 75, 91, 93, 94, 95, 97, 98, 100, 102, 105, 115, 117, 118, 119, 120, 122, 124, 127, 137, 138, 139, 140, 143, 146, 151, 152, 154, 155, 156, 159, 160, 162, 163, 164, 165, 168, 169, 177, 178, 179, 180, 183, 184, 192, 193, 195, 199], "summary": {"covered_lines": 79, "num_statements": 84, "percent_covered": 94.04761904761905, "percent_covered_display": "94", "missing_lines": 5, "excluded_lines": 0, "percent_statements_covered": 94.04761904761905, "percent_statements_covered_display": "94"}, "missing_lines": [25, 141, 142, 196, 197], "excluded_lines": [], "start_line": 1}}}}, "totals": {"covered_lines": 1239, "num_statements": 1735, "percent_covered": 71.41210374639769, "percent_covered_display": "71", "missing_lines": 496, "excluded_lines": 15, "percent_statements_covered": 71.41210374639769, "percent_statements_covered_display": "71"}}, "coverage_path": "."} \ No newline at end of file diff --git a/docs/Makefile b/docs/Makefile deleted file mode 100644 index d0c3cbf..0000000 --- a/docs/Makefile +++ /dev/null @@ -1,20 +0,0 @@ -# Minimal makefile for Sphinx documentation -# - -# You can set these variables from the command line, and also -# from the environment for the first two. -SPHINXOPTS ?= -SPHINXBUILD ?= sphinx-build -SOURCEDIR = source -BUILDDIR = build - -# Put it first so that "make" without argument is like "make help". -help: - @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) - -.PHONY: help Makefile - -# Catch-all target: route all unknown targets to Sphinx using the new -# "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS). -%: Makefile - @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) diff --git a/docs/make.bat b/docs/make.bat deleted file mode 100644 index 747ffb7..0000000 --- a/docs/make.bat +++ /dev/null @@ -1,35 +0,0 @@ -@ECHO OFF - -pushd %~dp0 - -REM Command file for Sphinx documentation - -if "%SPHINXBUILD%" == "" ( - set SPHINXBUILD=sphinx-build -) -set SOURCEDIR=source -set BUILDDIR=build - -%SPHINXBUILD% >NUL 2>NUL -if errorlevel 9009 ( - echo. - echo.The 'sphinx-build' command was not found. Make sure you have Sphinx - echo.installed, then set the SPHINXBUILD environment variable to point - echo.to the full path of the 'sphinx-build' executable. Alternatively you - echo.may add the Sphinx directory to PATH. - echo. - echo.If you don't have Sphinx installed, grab it from - echo.https://www.sphinx-doc.org/ - exit /b 1 -) - -if "%1" == "" goto help - -%SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O% -goto end - -:help -%SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O% - -:end -popd diff --git a/docs/source/_static/css/custom.css b/docs/source/_static/css/custom.css deleted file mode 100644 index 4e87d9c..0000000 --- a/docs/source/_static/css/custom.css +++ /dev/null @@ -1,11 +0,0 @@ -/* Indent by four spaces */ -.sig-param::before { - content: "\a\20\20\20\20"; - white-space: pre; -} - -/* Don't indent closing bracket */ -dt em.sig-param:last-of-type::after { - content: "\a"; - white-space: pre; -} diff --git a/docs/source/_static/favicon.png b/docs/source/_static/favicon.png deleted file mode 100644 index 6666b97..0000000 Binary files a/docs/source/_static/favicon.png and /dev/null differ diff --git a/docs/source/_static/github-social-card.png b/docs/source/_static/github-social-card.png deleted file mode 100644 index 94a83c6..0000000 Binary files a/docs/source/_static/github-social-card.png and /dev/null differ diff --git a/docs/source/compute/init.rst b/docs/source/compute/init.rst deleted file mode 100644 index af6e244..0000000 --- a/docs/source/compute/init.rst +++ /dev/null @@ -1,25 +0,0 @@ -.. currentmodule:: lyscripts.compute - -Model Predictions -================= - -.. automodule:: lyscripts.compute - :members: - -Command Help ------------- - -.. program-output:: lyscripts compute --help - - -Submodules ----------- - -.. toctree:: - :maxdepth: 1 - - priors - posteriors - prevalences - risks - utils diff --git a/docs/source/compute/posteriors.rst b/docs/source/compute/posteriors.rst deleted file mode 100644 index a99c0ef..0000000 --- a/docs/source/compute/posteriors.rst +++ /dev/null @@ -1,13 +0,0 @@ -.. currentmodule:: lyscripts.compute.posteriors - -Posterior State Distributions -============================= - -.. automodule:: lyscripts.compute.posteriors - :members: - :show-inheritance: - -Command Help ------------- - -.. program-output:: lyscripts compute posteriors --help diff --git a/docs/source/compute/prevalences.rst b/docs/source/compute/prevalences.rst deleted file mode 100644 index 35b6276..0000000 --- a/docs/source/compute/prevalences.rst +++ /dev/null @@ -1,13 +0,0 @@ -.. currentmodule:: lyscripts.compute.prevalences - -Predict Prevalence of Involvement -================================= - -.. automodule:: lyscripts.compute.prevalences - :members: - :show-inheritance: - -Command Help ------------- - -.. program-output:: lyscripts compute prevalences --help diff --git a/docs/source/compute/priors.rst b/docs/source/compute/priors.rst deleted file mode 100644 index 41813b3..0000000 --- a/docs/source/compute/priors.rst +++ /dev/null @@ -1,13 +0,0 @@ -.. currentmodule:: lyscripts.compute.priors - -Prior State Distributions -========================= - -.. automodule:: lyscripts.compute.priors - :members: - :show-inheritance: - -Command Help ------------- - -.. program-output:: lyscripts compute priors --help diff --git a/docs/source/compute/risks.rst b/docs/source/compute/risks.rst deleted file mode 100644 index e23d041..0000000 --- a/docs/source/compute/risks.rst +++ /dev/null @@ -1,13 +0,0 @@ -.. currentmodule:: lyscripts.compute.risks - -Compute Risk of Involvement -=========================== - -.. automodule:: lyscripts.compute.risks - :members: - :show-inheritance: - -Command Help ------------- - -.. program-output:: lyscripts compute risks --help diff --git a/docs/source/compute/utils.rst b/docs/source/compute/utils.rst deleted file mode 100644 index 1335e51..0000000 --- a/docs/source/compute/utils.rst +++ /dev/null @@ -1,8 +0,0 @@ -.. currentmodule:: lyscripts.compute.utils - -Helpers for Computing Quantities -================================ - -.. automodule:: lyscripts.compute.utils - :members: - :show-inheritance: diff --git a/docs/source/conf.py b/docs/source/conf.py deleted file mode 100644 index 1c7a8d3..0000000 --- a/docs/source/conf.py +++ /dev/null @@ -1,98 +0,0 @@ -"""Configuration file for the Sphinx documentation builder. - -For the full list of built-in configuration values, see the documentation: -https://www.sphinx-doc.org/en/master/usage/configuration.html. -""" - -import lyscripts - -# -- Project information ----------------------------------------------------- -# https://www.sphinx-doc.org/en/master/usage/configuration.html#project-information - -project = "lyscripts" -copyright = "2022, Roman Ludwig" # noqa: A001 -author = "Roman Ludwig" -gh_username = "rmnldwg" -version = lyscripts.__version__ -release = lyscripts.__version__ - -# -- General configuration --------------------------------------------------- -# https://www.sphinx-doc.org/en/master/usage/configuration.html#general-configuration - -extensions = [ - "sphinx.ext.intersphinx", - "sphinx.ext.autodoc", - "sphinx.ext.mathjax", - "sphinx.ext.viewcode", - "sphinx.ext.napoleon", - "sphinxcontrib.programoutput", - "sphinxcontrib.autodoc_pydantic", - "myst_parser", -] - -# markdown to reST -source_suffix = [".rst", ".md"] - -templates_path = ["_templates"] -exclude_patterns = [] - -# document classes and their constructors -autoclass_content = "class" - -# sort members by source -autodoc_member_order = "bysource" - -# show type hints -autodoc_typehints = "signature" - -# create links to other projects -intersphinx_mapping = { - "python": ("https://docs.python.org/3.10", None), - "lymph": ("https://lymph-model.readthedocs.io/stable/", None), - "pandas": ("https://pandas.pydata.org/pandas-docs/stable/", None), - "numpy": ("https://numpy.org/doc/stable/", None), - "lydata": ("https://lydata.readthedocs.io/stable/", None), - "emcee": ("https://emcee.readthedocs.io/en/stable/", None), -} - -# autodoc_pydantic settings -autodoc_pydantic_model_show_config_summary = False -autodoc_pydantic_model_show_validator_members = False -autodoc_pydantic_model_show_field_summary = False -autodoc_pydantic_model_member_order = "bysource" -autodoc_pydantic_settings_show_config_summary = False -autodoc_pydantic_settings_show_validator_members = False -autodoc_pydantic_settings_show_field_summary = False -autodoc_pydantic_settings_member_order = "bysource" -autodoc_pydantic_field_show_constraints = False - - -# -- Options for HTML output ------------------------------------------------- - -# The theme to use for HTML and HTML Help pages. See the documentation for -# a list of builtin themes. -# - -html_theme = "sphinx_book_theme" -html_theme_options = { - "repository_url": f"https://github.com/{gh_username}/{project}", - "repository_branch": "main", - "use_repository_button": True, - "show_navbar_depth": 3, - "home_page_in_toc": True, -} -html_favicon = "_static/favicon.png" - -# import sphinx_modern_theme -# html_theme = "sphinx_modern_theme" -# html_theme_path = [sphinx_modern_theme.get_html_theme_path()] - -# html_theme = "bootstrap-astropy" - -# Add any paths that contain custom static files (such as style sheets) here, -# relative to this directory. They are copied after the builtin static files, -# so a file named "default.css" will overwrite the builtin "default.css". -html_static_path = ["./_static"] -html_css_files = [ - "css/custom.css", -] diff --git a/docs/source/configs.rst b/docs/source/configs.rst deleted file mode 100644 index 3d209de..0000000 --- a/docs/source/configs.rst +++ /dev/null @@ -1,8 +0,0 @@ -.. currentmodule:: lyscripts.configs - -Pydantic Configurations -======================= - -.. automodule:: lyscripts.configs - :members: - :show-inheritance: diff --git a/docs/source/data/collect.rst b/docs/source/data/collect.rst deleted file mode 100644 index 8c8b08b..0000000 --- a/docs/source/data/collect.rst +++ /dev/null @@ -1,13 +0,0 @@ -.. currentmodule:: lyscripts.data.collect - -Collect lyDATA Tables Interactively -=================================== - -.. automodule:: lyscripts.data.collect - :members: - :show-inheritance: - -Command Help ------------- - -.. program-output:: lyscripts data collect --help diff --git a/docs/source/data/enhance.rst b/docs/source/data/enhance.rst deleted file mode 100644 index d1f28e7..0000000 --- a/docs/source/data/enhance.rst +++ /dev/null @@ -1,13 +0,0 @@ -.. currentmodule:: lyscripts.data.enhance - -Infer Additional Data Columns -============================= - -.. automodule:: lyscripts.data.enhance - :members: - :show-inheritance: - -Command Help ------------- - -.. program-output:: lyscripts data enhance --help diff --git a/docs/source/data/filter.rst b/docs/source/data/filter.rst deleted file mode 100644 index e8e6824..0000000 --- a/docs/source/data/filter.rst +++ /dev/null @@ -1,13 +0,0 @@ -.. currentmodule:: lyscripts.data.filter - -Filtering Datasets -================== - -.. automodule:: lyscripts.data.filter - :members: - :show-inheritance: - -Command Help ------------- - -.. program-output:: lyscripts data filter --help diff --git a/docs/source/data/generate.rst b/docs/source/data/generate.rst deleted file mode 100644 index 7ba9820..0000000 --- a/docs/source/data/generate.rst +++ /dev/null @@ -1,13 +0,0 @@ -.. currentmodule:: lyscripts.data.generate - -Generating Synthetic Data -========================= - -.. automodule:: lyscripts.data.generate - :members: - :show-inheritance: - -Command Help ------------- - -.. program-output:: lyscripts data generate --help diff --git a/docs/source/data/init.rst b/docs/source/data/init.rst deleted file mode 100644 index 014bb85..0000000 --- a/docs/source/data/init.rst +++ /dev/null @@ -1,29 +0,0 @@ -.. currentmodule:: lyscripts.data - -Data Commands/Helpers -===================== - -.. automodule:: lyscripts.data - :members: - :show-inheritance: - -Command Help ------------- - -.. program-output:: lyscripts data --help - - -Submodules ----------- - -.. toctree:: - :maxdepth: 1 - - collect - lyproxify - join - split - filter - enhance - generate - utils diff --git a/docs/source/data/join.rst b/docs/source/data/join.rst deleted file mode 100644 index a400a72..0000000 --- a/docs/source/data/join.rst +++ /dev/null @@ -1,13 +0,0 @@ -.. currentmodule:: lyscripts.data.join - -Join Data Files -=============== - -.. automodule:: lyscripts.data.join - :members: - :show-inheritance: - -Command Help ------------- - -.. program-output:: lyscripts data join --help diff --git a/docs/source/data/lyproxify.rst b/docs/source/data/lyproxify.rst deleted file mode 100644 index 653819c..0000000 --- a/docs/source/data/lyproxify.rst +++ /dev/null @@ -1,13 +0,0 @@ -.. currentmodule:: lyscripts.data.lyproxify - -Map to LyProX Format -==================== - -.. automodule:: lyscripts.data.lyproxify - :members: - :show-inheritance: - -Command Help ------------- - -.. program-output:: lyscripts data lyproxify --help diff --git a/docs/source/data/split.rst b/docs/source/data/split.rst deleted file mode 100644 index e8a831b..0000000 --- a/docs/source/data/split.rst +++ /dev/null @@ -1,13 +0,0 @@ -.. currentmodule:: lyscripts.data.split - -Split Data -========== - -.. automodule:: lyscripts.data.split - :members: - :show-inheritance: - -Command Help ------------- - -.. program-output:: lyscripts data split --help diff --git a/docs/source/data/utils.rst b/docs/source/data/utils.rst deleted file mode 100644 index 89df120..0000000 --- a/docs/source/data/utils.rst +++ /dev/null @@ -1,7 +0,0 @@ -.. currentmodule:: lyscripts.data.utils - -Utilities Related to Data Processing -==================================== - -.. automodule:: lyscripts.data.utils - :members: diff --git a/docs/source/decorators.rst b/docs/source/decorators.rst deleted file mode 100644 index e2adbed..0000000 --- a/docs/source/decorators.rst +++ /dev/null @@ -1,7 +0,0 @@ -.. currentmodule:: lyscripts.decorators - -Decorators -========== - -.. automodule:: lyscripts.decorators - :members: diff --git a/docs/source/evaluate.rst b/docs/source/evaluate.rst deleted file mode 100644 index b87d741..0000000 --- a/docs/source/evaluate.rst +++ /dev/null @@ -1,14 +0,0 @@ -.. currentmodule:: lyscripts.evaluate - -Evaluation -========== - -.. automodule:: lyscripts.evaluate - :members: - :show-inheritance: - -Command Help ------------- - -.. - .. program-output:: lyscripts evaluate --help diff --git a/docs/source/index.rst b/docs/source/index.rst deleted file mode 100644 index 647e4e1..0000000 --- a/docs/source/index.rst +++ /dev/null @@ -1,39 +0,0 @@ -.. lyscripts documentation master file, created by - sphinx-quickstart on Wed Mar 20 20:56:17 2024. - You can adapt this file completely to your liking, but it should at least - contain the root `toctree` directive. - -Introduction -============ - - -.. include:: ../../README.md - :end-before: - :parser: myst_parser.sphinx_ - - -Documentation -------------- - -.. toctree:: - :maxdepth: 1 - - init - configs - data/init - sample - compute/init - evaluate - plots - schedule - schema - utils - decorators - - -Indices and tables ------------------- - -* :ref:`genindex` -* :ref:`modindex` -* :ref:`search` diff --git a/docs/source/init.rst b/docs/source/init.rst deleted file mode 100644 index 72ac180..0000000 --- a/docs/source/init.rst +++ /dev/null @@ -1,12 +0,0 @@ -.. currentmodule:: lyscripts - -Main Lyscripts CLI -================== - -.. automodule:: lyscripts - :members: - -Command Help ------------- - -.. program-output:: lyscripts --help diff --git a/docs/source/plots.rst b/docs/source/plots.rst deleted file mode 100644 index 2c3f504..0000000 --- a/docs/source/plots.rst +++ /dev/null @@ -1,7 +0,0 @@ -.. currentmodule:: lyscripts.plots - -Plotting Utilities -================== - -.. automodule:: lyscripts.plots - :members: diff --git a/docs/source/sample.rst b/docs/source/sample.rst deleted file mode 100644 index ff5d825..0000000 --- a/docs/source/sample.rst +++ /dev/null @@ -1,13 +0,0 @@ -.. currentmodule:: lyscripts.sample - -MCMC Sampling -============= - -.. automodule:: lyscripts.sample - :members: - :show-inheritance: - -Command Help ------------- - -.. program-output:: lyscripts sample --help diff --git a/docs/source/schedule.rst b/docs/source/schedule.rst deleted file mode 100644 index 4869206..0000000 --- a/docs/source/schedule.rst +++ /dev/null @@ -1,12 +0,0 @@ -.. currentmodule:: lyscripts.schedule - -Temperature Schedule -==================== - -.. automodule:: lyscripts.schedule - :members: - -Command Help ------------- - -.. program-output:: lyscripts schedule --help diff --git a/docs/source/schema.rst b/docs/source/schema.rst deleted file mode 100644 index 9bab995..0000000 --- a/docs/source/schema.rst +++ /dev/null @@ -1,7 +0,0 @@ -.. currentmodule:: lyscripts.schema - -JSON Schema -=========== - -.. automodule:: lyscripts.schema - :members: diff --git a/docs/source/utils.rst b/docs/source/utils.rst deleted file mode 100644 index 5f06cba..0000000 --- a/docs/source/utils.rst +++ /dev/null @@ -1,7 +0,0 @@ -.. currentmodule:: lyscripts.utils - -Top Level Utilities -=================== - -.. automodule:: lyscripts.utils - :members: diff --git a/endpoint.json b/endpoint.json new file mode 100644 index 0000000..92f3222 --- /dev/null +++ b/endpoint.json @@ -0,0 +1 @@ +{"schemaVersion": 1, "label": "Coverage", "message": "71%", "color": "orange"} \ No newline at end of file diff --git a/favicon.png b/favicon.png deleted file mode 100644 index 6666b97..0000000 Binary files a/favicon.png and /dev/null differ diff --git a/github-social-card.png b/github-social-card.png deleted file mode 100644 index 94a83c6..0000000 Binary files a/github-social-card.png and /dev/null differ diff --git a/htmlcov/class_index.html b/htmlcov/class_index.html new file mode 100644 index 0000000..2fb0b5d --- /dev/null +++ b/htmlcov/class_index.html @@ -0,0 +1,812 @@ + + + + + Coverage report + + + + + +
+
+

Coverage report: + 71% +

+ +
+ +
+ + +
+
+

+ Files + Functions + Classes +

+

+ coverage.py v7.13.5, + created at 2026-04-08 14:17 +0000 +

+
+
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
Fileclass statementsmissingexcluded coverage
src / lyscripts / __init__.pyLyscriptsCLI 770 0%
src / lyscripts / __init__.py(no class) 2300 100%
src / lyscripts / __main__.py(no class) 330 0%
src / lyscripts / _version.py(no class) 1100 100%
src / lyscripts / cli.pyInterceptHandler 14140 0%
src / lyscripts / cli.py(no class) 31120 61%
src / lyscripts / compute / __init__.pyComputeCLI 110 0%
src / lyscripts / compute / __init__.py(no class) 400 100%
src / lyscripts / compute / __main__.py(no class) 550 0%
src / lyscripts / compute / evidence.pyEvidenceCLI 24240 0%
src / lyscripts / compute / evidence.py(no class) 41170 59%
src / lyscripts / compute / posteriors.pyPosteriorsCLI 17170 0%
src / lyscripts / compute / posteriors.py(no class) 2920 93%
src / lyscripts / compute / prevalences.pyPrevalencesCLI 1900 100%
src / lyscripts / compute / prevalences.py(no class) 6370 89%
src / lyscripts / compute / priors.pyPriorsCLI 1200 100%
src / lyscripts / compute / priors.py(no class) 2320 91%
src / lyscripts / compute / risks.pyRisksCLI 22220 0%
src / lyscripts / compute / risks.py(no class) 29110 62%
src / lyscripts / compute / utils.pyHDF5FileStorage 2630 88%
src / lyscripts / compute / utils.py(no class) 9230 97%
src / lyscripts / configs.pyDataConfig 410 75%
src / lyscripts / configs.pyDiagnosisConfig 110 0%
src / lyscripts / configs.pyGraphConfig 200 100%
src / lyscripts / configs.pyModelConfig 1540 73%
src / lyscripts / configs.pyDeprecatedModelConfig 1900 100%
src / lyscripts / configs.pySamplingConfig 100 100%
src / lyscripts / configs.pyScheduleConfig 880 0%
src / lyscripts / configs.pyScenarioConfig 840 50%
src / lyscripts / configs.pyDynamicYamlConfigSettingsSource 1210 92%
src / lyscripts / configs.pyBaseCLI 300 100%
src / lyscripts / configs.py(no class) 207220 89%
src / lyscripts / data / __init__.pyDataCLI 110 0%
src / lyscripts / data / __init__.py(no class) 500 100%
src / lyscripts / data / __main__.py(no class) 18180 0%
src / lyscripts / data / collect / __init__.pyCollectorCLI 550 0%
src / lyscripts / data / collect / __init__.py(no class) 53220 58%
src / lyscripts / data / enhance.pyEnhanceCLI 440 0%
src / lyscripts / data / enhance.py(no class) 1520 87%
src / lyscripts / data / fetch.pyFetchCLI 550 0%
src / lyscripts / data / fetch.py(no class) 1620 88%
src / lyscripts / data / filter.pyFilterCLI 28280 0%
src / lyscripts / data / filter.py(no class) 2020 90%
src / lyscripts / data / generate.pyGenerateCLI 1720 88%
src / lyscripts / data / generate.py(no class) 1820 89%
src / lyscripts / data / join.pyJoinCLI 880 0%
src / lyscripts / data / join.py(no class) 1420 86%
src / lyscripts / data / lyproxify.pyLyproxifyCLI 17170 0%
src / lyscripts / data / lyproxify.py(no class) 106501 53%
src / lyscripts / data / split.pySplitCLI 12120 0%
src / lyscripts / data / split.py(no class) 1720 88%
src / lyscripts / data / utils.py(no class) 900 100%
src / lyscripts / decorators.py(no class) 4140 90%
src / lyscripts / evaluate.py(no class) 70520 26%
src / lyscripts / integrate.pyIntegrateCLI 17170 0%
src / lyscripts / integrate.py(no class) 2950 83%
src / lyscripts / plots.pyAbstractDistribution 103 100%
src / lyscripts / plots.pyHistogram 1500 100%
src / lyscripts / plots.pyBetaPosterior 2220 91%
src / lyscripts / plots.py(no class) 122166 87%
src / lyscripts / sample.pyCompletedItersColumn 510 80%
src / lyscripts / sample.pyItersPerSecondColumn 400 100%
src / lyscripts / sample.pyAcorTime 300 100%
src / lyscripts / sample.pyNumAccepted 300 100%
src / lyscripts / sample.pySampleCLI 1200 100%
src / lyscripts / sample.py(no class) 110113 90%
src / lyscripts / schedule.pyScheduleCLI 330 0%
src / lyscripts / schedule.py(no class) 820 75%
src / lyscripts / schema.py(no class) 2230 86%
src / lyscripts / utils.py(no class) 8450 94%
Total  173549615 71%
+

+ No items found using the specified filter. +

+

7 empty classes skipped.

+
+ + + diff --git a/htmlcov/coverage_html_cb_dd2e7eb5.js b/htmlcov/coverage_html_cb_dd2e7eb5.js new file mode 100644 index 0000000..6f87174 --- /dev/null +++ b/htmlcov/coverage_html_cb_dd2e7eb5.js @@ -0,0 +1,735 @@ +// Licensed under the Apache License: http://www.apache.org/licenses/LICENSE-2.0 +// For details: https://github.com/coveragepy/coveragepy/blob/main/NOTICE.txt + +// Coverage.py HTML report browser code. +/*jslint browser: true, sloppy: true, vars: true, plusplus: true, maxerr: 50, indent: 4 */ +/*global coverage: true, document, window, $ */ + +coverage = {}; + +// General helpers +function debounce(callback, wait) { + let timeoutId = null; + return function(...args) { + clearTimeout(timeoutId); + timeoutId = setTimeout(() => { + callback.apply(this, args); + }, wait); + }; +}; + +function checkVisible(element) { + const rect = element.getBoundingClientRect(); + const viewBottom = Math.max(document.documentElement.clientHeight, window.innerHeight); + const viewTop = 30; + return !(rect.bottom < viewTop || rect.top >= viewBottom); +} + +function on_click(sel, fn) { + const elt = document.querySelector(sel); + if (elt) { + elt.addEventListener("click", fn); + } +} + +// Helpers for table sorting +function getCellValue(row, column = 0) { + const cell = row.cells[column] // nosemgrep: eslint.detect-object-injection + if (cell.childElementCount == 1) { + var child = cell.firstElementChild; + if (child.tagName === "A") { + child = child.firstElementChild; + } + if (child instanceof HTMLDataElement && child.value) { + return child.value; + } + } + return cell.innerText || cell.textContent; +} + +function rowComparator(rowA, rowB, column = 0) { + let valueA = getCellValue(rowA, column); + let valueB = getCellValue(rowB, column); + if (!isNaN(valueA) && !isNaN(valueB)) { + return valueA - valueB; + } + return valueA.localeCompare(valueB, undefined, {numeric: true}); +} + +function sortColumn(th) { + // Get the current sorting direction of the selected header, + // clear state on other headers and then set the new sorting direction. + const currentSortOrder = th.getAttribute("aria-sort"); + [...th.parentElement.cells].forEach(header => header.setAttribute("aria-sort", "none")); + var direction; + if (currentSortOrder === "none") { + direction = th.dataset.defaultSortOrder || "ascending"; + } + else if (currentSortOrder === "ascending") { + direction = "descending"; + } + else { + direction = "ascending"; + } + th.setAttribute("aria-sort", direction); + + const column = [...th.parentElement.cells].indexOf(th) + + // Sort all rows and afterwards append them in order to move them in the DOM. + Array.from(th.closest("table").querySelectorAll("tbody tr")) + .sort((rowA, rowB) => rowComparator(rowA, rowB, column) * (direction === "ascending" ? 1 : -1)) + .forEach(tr => tr.parentElement.appendChild(tr)); + + // Save the sort order for next time. + if (th.id !== "region") { + let th_id = "file"; // Sort by file if we don't have a column id + let current_direction = direction; + const stored_list = localStorage.getItem(coverage.INDEX_SORT_STORAGE); + if (stored_list) { + ({th_id, direction} = JSON.parse(stored_list)) + } + localStorage.setItem(coverage.INDEX_SORT_STORAGE, JSON.stringify({ + "th_id": th.id, + "direction": current_direction + })); + if (th.id !== th_id || document.getElementById("region")) { + // Sort column has changed, unset sorting by function or class. + localStorage.setItem(coverage.SORTED_BY_REGION, JSON.stringify({ + "by_region": false, + "region_direction": current_direction + })); + } + } + else { + // Sort column has changed to by function or class, remember that. + localStorage.setItem(coverage.SORTED_BY_REGION, JSON.stringify({ + "by_region": true, + "region_direction": direction + })); + } +} + +// Find all the elements with data-shortcut attribute, and use them to assign a shortcut key. +coverage.assign_shortkeys = function () { + document.querySelectorAll("[data-shortcut]").forEach(element => { + document.addEventListener("keypress", event => { + if (event.target.tagName.toLowerCase() === "input") { + return; // ignore keypress from search filter + } + if (event.key === element.dataset.shortcut) { + element.click(); + } + }); + }); +}; + +// Create the events for the filter box. +coverage.wire_up_filter = function () { + // Populate the filter and hide100 inputs if there are saved values for them. + const saved_filter_value = localStorage.getItem(coverage.FILTER_STORAGE); + if (saved_filter_value) { + document.getElementById("filter").value = saved_filter_value; + } + const saved_hide100_value = localStorage.getItem(coverage.HIDE100_STORAGE); + if (saved_hide100_value) { + document.getElementById("hide100").checked = JSON.parse(saved_hide100_value); + } + + // Cache elements. + const table = document.querySelector("table.index"); + const table_body_rows = table.querySelectorAll("tbody tr"); + const no_rows = document.getElementById("no_rows"); + + const footer = table.tFoot.rows[0]; + const ratio_columns = Array.from(footer.cells).map(cell => Boolean(cell.dataset.ratio)); + + // Observe filter keyevents. + const filter_handler = (event => { + // Keep running total of each metric, first index contains number of shown rows + const totals = ratio_columns.map( + is_ratio => is_ratio ? {"numer": 0, "denom": 0} : 0 + ); + + var text = document.getElementById("filter").value; + // Store filter value + localStorage.setItem(coverage.FILTER_STORAGE, text); + const casefold = (text === text.toLowerCase()); + const hide100 = document.getElementById("hide100").checked; + // Store hide value. + localStorage.setItem(coverage.HIDE100_STORAGE, JSON.stringify(hide100)); + + // Hide / show elements. + table_body_rows.forEach(row => { + var show = false; + // Check the text filter. + for (let column = 0; column < totals.length; column++) { + cell = row.cells[column]; + if (cell.classList.contains("name")) { + var celltext = cell.textContent; + if (casefold) { + celltext = celltext.toLowerCase(); + } + if (celltext.includes(text)) { + show = true; + } + } + } + + // Check the "hide covered" filter. + if (show && hide100) { + const [numer, denom] = row.cells[row.cells.length - 1].dataset.ratio.split(" "); + show = (numer !== denom); + } + + if (!show) { + // hide + row.classList.add("hidden"); + return; + } + + // show + row.classList.remove("hidden"); + totals[0]++; + + for (let column = 0; column < totals.length; column++) { + // Accumulate dynamic totals + cell = row.cells[column] // nosemgrep: eslint.detect-object-injection + if (cell.matches(".name, .spacer")) { + continue; + } + if (ratio_columns[column] && cell.dataset.ratio) { + // Column stores a ratio + const [numer, denom] = cell.dataset.ratio.split(" "); + totals[column]["numer"] += parseInt(numer, 10); // nosemgrep: eslint.detect-object-injection + totals[column]["denom"] += parseInt(denom, 10); // nosemgrep: eslint.detect-object-injection + } + else { + totals[column] += parseInt(cell.textContent, 10); // nosemgrep: eslint.detect-object-injection + } + } + }); + + // Show placeholder if no rows will be displayed. + if (!totals[0]) { + // Show placeholder, hide table. + no_rows.style.display = "block"; + table.style.display = "none"; + return; + } + + // Hide placeholder, show table. + no_rows.style.display = null; + table.style.display = null; + + // Calculate new dynamic sum values based on visible rows. + for (let column = 0; column < totals.length; column++) { + // Get footer cell element. + const cell = footer.cells[column]; // nosemgrep: eslint.detect-object-injection + if (cell.matches(".name, .spacer")) { + continue; + } + + // Set value into dynamic footer cell element. + if (ratio_columns[column]) { + // Percentage column uses the numerator and denominator, + // and adapts to the number of decimal places. + const match = /\.([0-9]+)/.exec(cell.textContent); + const places = match ? match[1].length : 0; + const { numer, denom } = totals[column]; // nosemgrep: eslint.detect-object-injection + cell.dataset.ratio = `${numer} ${denom}`; + // Check denom to prevent NaN if filtered files contain no statements + cell.textContent = denom + ? `${(numer * 100 / denom).toFixed(places)}%` + : `${(100).toFixed(places)}%`; + } + else { + cell.textContent = totals[column]; // nosemgrep: eslint.detect-object-injection + } + } + }); + + document.getElementById("filter").addEventListener("input", debounce(filter_handler)); + document.getElementById("hide100").addEventListener("input", debounce(filter_handler)); + + // Trigger change event on setup, to force filter on page refresh + // (filter value may still be present). + document.getElementById("filter").dispatchEvent(new Event("input")); + document.getElementById("hide100").dispatchEvent(new Event("input")); +}; +coverage.FILTER_STORAGE = "COVERAGE_FILTER_VALUE"; +coverage.HIDE100_STORAGE = "COVERAGE_HIDE100_VALUE"; + +// Set up the click-to-sort columns. +coverage.wire_up_sorting = function () { + document.querySelectorAll("[data-sortable] th[aria-sort]").forEach( + th => th.addEventListener("click", e => sortColumn(e.target)) + ); + + // Look for a localStorage item containing previous sort settings: + let th_id = "file", direction = "ascending"; + const stored_list = localStorage.getItem(coverage.INDEX_SORT_STORAGE); + if (stored_list) { + ({th_id, direction} = JSON.parse(stored_list)); + } + let by_region = false, region_direction = "ascending"; + const sorted_by_region = localStorage.getItem(coverage.SORTED_BY_REGION); + if (sorted_by_region) { + ({ + by_region, + region_direction + } = JSON.parse(sorted_by_region)); + } + + const region_id = "region"; + if (by_region && document.getElementById(region_id)) { + direction = region_direction; + } + // If we are in a page that has a column with id of "region", sort on + // it if the last sort was by function or class. + let th; + if (document.getElementById(region_id)) { + th = document.getElementById(by_region ? region_id : th_id); + } + else { + th = document.getElementById(th_id); + } + th.setAttribute("aria-sort", direction === "ascending" ? "descending" : "ascending"); + th.click() +}; + +coverage.INDEX_SORT_STORAGE = "COVERAGE_INDEX_SORT_2"; +coverage.SORTED_BY_REGION = "COVERAGE_SORT_REGION"; + +// Loaded on index.html +coverage.index_ready = function () { + coverage.assign_shortkeys(); + coverage.wire_up_filter(); + coverage.wire_up_sorting(); + + on_click(".button_prev_file", coverage.to_prev_file); + on_click(".button_next_file", coverage.to_next_file); + + on_click(".button_show_hide_help", coverage.show_hide_help); +}; + +// -- pyfile stuff -- + +coverage.LINE_FILTERS_STORAGE = "COVERAGE_LINE_FILTERS"; + +coverage.pyfile_ready = function () { + // If we're directed to a particular line number, highlight the line. + var frag = location.hash; + if (frag.length > 2 && frag[1] === "t") { + document.querySelector(frag).closest(".n").classList.add("highlight"); + coverage.set_sel(parseInt(frag.substr(2), 10)); + } + else { + coverage.set_sel(0); + } + + on_click(".button_toggle_run", coverage.toggle_lines); + on_click(".button_toggle_mis", coverage.toggle_lines); + on_click(".button_toggle_exc", coverage.toggle_lines); + on_click(".button_toggle_par", coverage.toggle_lines); + + on_click(".button_next_chunk", coverage.to_next_chunk_nicely); + on_click(".button_prev_chunk", coverage.to_prev_chunk_nicely); + on_click(".button_top_of_page", coverage.to_top); + on_click(".button_first_chunk", coverage.to_first_chunk); + + on_click(".button_prev_file", coverage.to_prev_file); + on_click(".button_next_file", coverage.to_next_file); + on_click(".button_to_index", coverage.to_index); + + on_click(".button_show_hide_help", coverage.show_hide_help); + + coverage.filters = undefined; + try { + coverage.filters = localStorage.getItem(coverage.LINE_FILTERS_STORAGE); + } catch(err) {} + + if (coverage.filters) { + coverage.filters = JSON.parse(coverage.filters); + } + else { + coverage.filters = {run: false, exc: true, mis: true, par: true}; + } + + for (cls in coverage.filters) { + coverage.set_line_visibilty(cls, coverage.filters[cls]); // nosemgrep: eslint.detect-object-injection + } + + coverage.assign_shortkeys(); + coverage.init_scroll_markers(); + coverage.wire_up_sticky_header(); + + document.querySelectorAll("[id^=ctxs]").forEach( + cbox => cbox.addEventListener("click", coverage.expand_contexts) + ); + + // Rebuild scroll markers when the window height changes. + window.addEventListener("resize", coverage.build_scroll_markers); +}; + +coverage.toggle_lines = function (event) { + const btn = event.target.closest("button"); + const category = btn.value + const show = !btn.classList.contains("show_" + category); + coverage.set_line_visibilty(category, show); + coverage.build_scroll_markers(); + coverage.filters[category] = show; + try { + localStorage.setItem(coverage.LINE_FILTERS_STORAGE, JSON.stringify(coverage.filters)); + } catch(err) {} +}; + +coverage.set_line_visibilty = function (category, should_show) { + const cls = "show_" + category; + const btn = document.querySelector(".button_toggle_" + category); + if (btn) { + if (should_show) { + document.querySelectorAll("#source ." + category).forEach(e => e.classList.add(cls)); + btn.classList.add(cls); + } + else { + document.querySelectorAll("#source ." + category).forEach(e => e.classList.remove(cls)); + btn.classList.remove(cls); + } + } +}; + +// Return the nth line div. +coverage.line_elt = function (n) { + return document.getElementById("t" + n)?.closest("p"); +}; + +// Set the selection. b and e are line numbers. +coverage.set_sel = function (b, e) { + // The first line selected. + coverage.sel_begin = b; + // The next line not selected. + coverage.sel_end = (e === undefined) ? b+1 : e; +}; + +coverage.to_top = function () { + coverage.set_sel(0, 1); + coverage.scroll_window(0); +}; + +coverage.to_first_chunk = function () { + coverage.set_sel(0, 1); + coverage.to_next_chunk(); +}; + +coverage.to_prev_file = function () { + window.location = document.getElementById("prevFileLink").href; +} + +coverage.to_next_file = function () { + window.location = document.getElementById("nextFileLink").href; +} + +coverage.to_index = function () { + location.href = document.getElementById("indexLink").href; +} + +coverage.show_hide_help = function () { + const helpCheck = document.getElementById("help_panel_state") + helpCheck.checked = !helpCheck.checked; +} + +// Return a string indicating what kind of chunk this line belongs to, +// or null if not a chunk. +coverage.chunk_indicator = function (line_elt) { + const classes = line_elt?.className; + if (!classes) { + return null; + } + const match = classes.match(/\bshow_\w+\b/); + if (!match) { + return null; + } + return match[0]; +}; + +coverage.to_next_chunk = function () { + const c = coverage; + + // Find the start of the next colored chunk. + var probe = c.sel_end; + var chunk_indicator, probe_line; + while (true) { + probe_line = c.line_elt(probe); + if (!probe_line) { + return; + } + chunk_indicator = c.chunk_indicator(probe_line); + if (chunk_indicator) { + break; + } + probe++; + } + + // There's a next chunk, `probe` points to it. + var begin = probe; + + // Find the end of this chunk. + var next_indicator = chunk_indicator; + while (next_indicator === chunk_indicator) { + probe++; + probe_line = c.line_elt(probe); + next_indicator = c.chunk_indicator(probe_line); + } + c.set_sel(begin, probe); + c.show_selection(); +}; + +coverage.to_prev_chunk = function () { + const c = coverage; + + // Find the end of the prev colored chunk. + var probe = c.sel_begin-1; + var probe_line = c.line_elt(probe); + if (!probe_line) { + return; + } + var chunk_indicator = c.chunk_indicator(probe_line); + while (probe > 1 && !chunk_indicator) { + probe--; + probe_line = c.line_elt(probe); + if (!probe_line) { + return; + } + chunk_indicator = c.chunk_indicator(probe_line); + } + + // There's a prev chunk, `probe` points to its last line. + var end = probe+1; + + // Find the beginning of this chunk. + var prev_indicator = chunk_indicator; + while (prev_indicator === chunk_indicator) { + probe--; + if (probe <= 0) { + return; + } + probe_line = c.line_elt(probe); + prev_indicator = c.chunk_indicator(probe_line); + } + c.set_sel(probe+1, end); + c.show_selection(); +}; + +// Returns 0, 1, or 2: how many of the two ends of the selection are on +// the screen right now? +coverage.selection_ends_on_screen = function () { + if (coverage.sel_begin === 0) { + return 0; + } + + const begin = coverage.line_elt(coverage.sel_begin); + const end = coverage.line_elt(coverage.sel_end-1); + + return ( + (checkVisible(begin) ? 1 : 0) + + (checkVisible(end) ? 1 : 0) + ); +}; + +coverage.to_next_chunk_nicely = function () { + if (coverage.selection_ends_on_screen() === 0) { + // The selection is entirely off the screen: + // Set the top line on the screen as selection. + + // This will select the top-left of the viewport + // As this is most likely the span with the line number we take the parent + const line = document.elementFromPoint(0, 0).parentElement; + if (line.parentElement !== document.getElementById("source")) { + // The element is not a source line but the header or similar + coverage.select_line_or_chunk(1); + } + else { + // We extract the line number from the id + coverage.select_line_or_chunk(parseInt(line.id.substring(1), 10)); + } + } + coverage.to_next_chunk(); +}; + +coverage.to_prev_chunk_nicely = function () { + if (coverage.selection_ends_on_screen() === 0) { + // The selection is entirely off the screen: + // Set the lowest line on the screen as selection. + + // This will select the bottom-left of the viewport + // As this is most likely the span with the line number we take the parent + const line = document.elementFromPoint(document.documentElement.clientHeight-1, 0).parentElement; + if (line.parentElement !== document.getElementById("source")) { + // The element is not a source line but the header or similar + coverage.select_line_or_chunk(coverage.lines_len); + } + else { + // We extract the line number from the id + coverage.select_line_or_chunk(parseInt(line.id.substring(1), 10)); + } + } + coverage.to_prev_chunk(); +}; + +// Select line number lineno, or if it is in a colored chunk, select the +// entire chunk +coverage.select_line_or_chunk = function (lineno) { + var c = coverage; + var probe_line = c.line_elt(lineno); + if (!probe_line) { + return; + } + var the_indicator = c.chunk_indicator(probe_line); + if (the_indicator) { + // The line is in a highlighted chunk. + // Search backward for the first line. + var probe = lineno; + var indicator = the_indicator; + while (probe > 0 && indicator === the_indicator) { + probe--; + probe_line = c.line_elt(probe); + if (!probe_line) { + break; + } + indicator = c.chunk_indicator(probe_line); + } + var begin = probe + 1; + + // Search forward for the last line. + probe = lineno; + indicator = the_indicator; + while (indicator === the_indicator) { + probe++; + probe_line = c.line_elt(probe); + indicator = c.chunk_indicator(probe_line); + } + + coverage.set_sel(begin, probe); + } + else { + coverage.set_sel(lineno); + } +}; + +coverage.show_selection = function () { + // Highlight the lines in the chunk + document.querySelectorAll("#source .highlight").forEach(e => e.classList.remove("highlight")); + for (let probe = coverage.sel_begin; probe < coverage.sel_end; probe++) { + coverage.line_elt(probe).querySelector(".n").classList.add("highlight"); + } + + coverage.scroll_to_selection(); +}; + +coverage.scroll_to_selection = function () { + // Scroll the page if the chunk isn't fully visible. + if (coverage.selection_ends_on_screen() < 2) { + const element = coverage.line_elt(coverage.sel_begin); + coverage.scroll_window(element.offsetTop - 60); + } +}; + +coverage.scroll_window = function (to_pos) { + window.scroll({top: to_pos, behavior: "smooth"}); +}; + +coverage.init_scroll_markers = function () { + // Init some variables + coverage.lines_len = document.querySelectorAll("#source > p").length; + + // Build html + coverage.build_scroll_markers(); +}; + +coverage.build_scroll_markers = function () { + const temp_scroll_marker = document.getElementById("scroll_marker") + if (temp_scroll_marker) temp_scroll_marker.remove(); + // Don't build markers if the window has no scroll bar. + if (document.body.scrollHeight <= window.innerHeight) { + return; + } + + const marker_scale = window.innerHeight / document.body.scrollHeight; + const line_height = Math.min(Math.max(3, window.innerHeight / coverage.lines_len), 10); + + let previous_line = -99, last_mark, last_top; + + const scroll_marker = document.createElement("div"); + scroll_marker.id = "scroll_marker"; + document.getElementById("source").querySelectorAll( + "p.show_run, p.show_mis, p.show_exc, p.show_exc, p.show_par" + ).forEach(element => { + const line_top = Math.floor(element.offsetTop * marker_scale); + const line_number = parseInt(element.querySelector(".n a").id.substr(1)); + + if (line_number === previous_line + 1) { + // If this solid missed block just make previous mark higher. + last_mark.style.height = `${line_top + line_height - last_top}px`; + } + else { + // Add colored line in scroll_marker block. + last_mark = document.createElement("div"); + last_mark.id = `m${line_number}`; + last_mark.classList.add("marker"); + last_mark.style.height = `${line_height}px`; + last_mark.style.top = `${line_top}px`; + scroll_marker.append(last_mark); + last_top = line_top; + } + + previous_line = line_number; + }); + + // Append last to prevent layout calculation + document.body.append(scroll_marker); +}; + +coverage.wire_up_sticky_header = function () { + const header = document.querySelector("header"); + const header_bottom = ( + header.querySelector(".content h2").getBoundingClientRect().top - + header.getBoundingClientRect().top + ); + + function updateHeader() { + if (window.scrollY > header_bottom) { + header.classList.add("sticky"); + } + else { + header.classList.remove("sticky"); + } + } + + window.addEventListener("scroll", updateHeader); + updateHeader(); +}; + +coverage.expand_contexts = function (e) { + var ctxs = e.target.parentNode.querySelector(".ctxs"); + + if (!ctxs.classList.contains("expanded")) { + var ctxs_text = ctxs.textContent; + var width = Number(ctxs_text[0]); + ctxs.textContent = ""; + for (var i = 1; i < ctxs_text.length; i += width) { + key = ctxs_text.substring(i, i + width).trim(); + ctxs.appendChild(document.createTextNode(contexts[key])); + ctxs.appendChild(document.createElement("br")); + } + ctxs.classList.add("expanded"); + } +}; + +document.addEventListener("DOMContentLoaded", () => { + if (document.body.classList.contains("indexfile")) { + coverage.index_ready(); + } + else { + coverage.pyfile_ready(); + } +}); diff --git a/htmlcov/favicon_32_cb_c827f16f.png b/htmlcov/favicon_32_cb_c827f16f.png new file mode 100644 index 0000000..8649f04 Binary files /dev/null and b/htmlcov/favicon_32_cb_c827f16f.png differ diff --git a/htmlcov/function_index.html b/htmlcov/function_index.html new file mode 100644 index 0000000..5c9412e --- /dev/null +++ b/htmlcov/function_index.html @@ -0,0 +1,2052 @@ + + + + + Coverage report + + + + + +
+
+

Coverage report: + 71% +

+ +
+ +
+ + +
+
+

+ Files + Functions + Classes +

+

+ coverage.py v7.13.5, + created at 2026-04-08 14:17 +0000 +

+
+
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
Filefunction statementsmissingexcluded coverage
src / lyscripts / __init__.pyLyscriptsCLI.__init__ 220 0%
src / lyscripts / __init__.pyLyscriptsCLI.cli_cmd 550 0%
src / lyscripts / __init__.py(no function) 2300 100%
src / lyscripts / __main__.py(no function) 330 0%
src / lyscripts / _version.py(no function) 1100 100%
src / lyscripts / cli.pyassemble_main 200 100%
src / lyscripts / cli.pyassemble_main.main 200 100%
src / lyscripts / cli.pysomewhat_safely_get_loglevel 660 0%
src / lyscripts / cli.pyconfigure_logging 660 0%
src / lyscripts / cli.pyInterceptHandler.emit 14140 0%
src / lyscripts / cli.py(no function) 1500 100%
src / lyscripts / compute / __init__.pyComputeCLI.cli_cmd 110 0%
src / lyscripts / compute / __init__.py(no function) 400 100%
src / lyscripts / compute / __main__.py(no function) 550 0%
src / lyscripts / compute / evidence.pycomp_bic 110 0%
src / lyscripts / compute / evidence.pycompute_evidence 220 0%
src / lyscripts / compute / evidence.pycompute_ti_results 12120 0%
src / lyscripts / compute / evidence.pyEvidenceCLI.cli_cmd 24240 0%
src / lyscripts / compute / evidence.py(no function) 2620 92%
src / lyscripts / compute / posteriors.pycompute_posteriors 1000 100%
src / lyscripts / compute / posteriors.pyPosteriorsCLI.cli_cmd 17170 0%
src / lyscripts / compute / posteriors.py(no function) 1920 89%
src / lyscripts / compute / prevalences.pycompute_prevalences 2050 75%
src / lyscripts / compute / prevalences.pygenerate_query_from_diagnosis 700 100%
src / lyscripts / compute / prevalences.pyobserve_prevalence 800 100%
src / lyscripts / compute / prevalences.pyPrevalencesCLI.cli_cmd 1900 100%
src / lyscripts / compute / prevalences.py(no function) 2820 93%
src / lyscripts / compute / priors.pycompute_priors 700 100%
src / lyscripts / compute / priors.pyPriorsCLI.cli_cmd 1200 100%
src / lyscripts / compute / priors.py(no function) 1620 88%
src / lyscripts / compute / risks.pycompute_risks 990 0%
src / lyscripts / compute / risks.pyRisksCLI.cli_cmd 22220 0%
src / lyscripts / compute / risks.py(no function) 2020 90%
src / lyscripts / compute / utils.pyis_hdf5_compatible 100 100%
src / lyscripts / compute / utils.pyto_hdf5_attrs 600 100%
src / lyscripts / compute / utils.pyfrom_hdf5_attrs 700 100%
src / lyscripts / compute / utils.pyextract_modalities 710 86%
src / lyscripts / compute / utils.pyensure_parent_dir 300 100%
src / lyscripts / compute / utils.pyHDF5FileStorage._get_dataset 410 75%
src / lyscripts / compute / utils.pyHDF5FileStorage.load 500 100%
src / lyscripts / compute / utils.pyHDF5FileStorage.get_attrs 500 100%
src / lyscripts / compute / utils.pyHDF5FileStorage.save 610 83%
src / lyscripts / compute / utils.pyHDF5FileStorage.set_attrs 610 83%
src / lyscripts / compute / utils.pyreduce_pattern 900 100%
src / lyscripts / compute / utils.pycomplete_pattern 1220 83%
src / lyscripts / compute / utils.pyget_cached 700 100%
src / lyscripts / compute / utils.pyget_cached.log_cache_info_wrapper 700 100%
src / lyscripts / compute / utils.py(no function) 3300 100%
src / lyscripts / configs.pyDataConfig.load 310 67%
src / lyscripts / configs.pyDataConfig.get_load_kwargs 100 100%
src / lyscripts / configs.pycheck_pattern 100 100%
src / lyscripts / configs.pyDiagnosisConfig.to_involvement 110 0%
src / lyscripts / configs.pyretrieve_graph_representation 920 78%
src / lyscripts / configs.pyGraphConfig.from_model 200 100%
src / lyscripts / configs.pyhas_model_symbol 600 100%
src / lyscripts / configs.pyget_symmetry_kwargs 510 80%
src / lyscripts / configs.pyModelConfig.from_model 1540 73%
src / lyscripts / configs.pymodalityconfig_from_model 200 100%
src / lyscripts / configs.pyDeprecatedModelConfig.model_post_init 500 100%
src / lyscripts / configs.pyDeprecatedModelConfig.translate 1400 100%
src / lyscripts / configs.pySamplingConfig.load 100 100%
src / lyscripts / configs.pygeometric_schedule 330 0%
src / lyscripts / configs.pylinear_schedule 110 0%
src / lyscripts / configs.pypower_schedule 220 0%
src / lyscripts / configs.pyScheduleConfig.get_schedule 880 0%
src / lyscripts / configs.pymap_to_optional_bool 510 80%
src / lyscripts / configs.pyScenarioConfig.model_post_init 200 100%
src / lyscripts / configs.pyScenarioConfig.interpolate 430 25%
src / lyscripts / configs.pyScenarioConfig.normalize 210 50%
src / lyscripts / configs.py_construct_model_from_external 600 100%
src / lyscripts / configs.pyconstruct_model 700 100%
src / lyscripts / configs.pyadd_distributions 1810 94%
src / lyscripts / configs.pyadd_modalities 800 100%
src / lyscripts / configs.pyadd_data 11110 0%
src / lyscripts / configs.pyDynamicYamlConfigSettingsSource.__init__ 200 100%
src / lyscripts / configs.pyDynamicYamlConfigSettingsSource._read_file 510 80%
src / lyscripts / configs.pyDynamicYamlConfigSettingsSource.__call__ 400 100%
src / lyscripts / configs.pyDynamicYamlConfigSettingsSource.__repr__ 100 100%
src / lyscripts / configs.pyBaseCLI.settings_customise_sources 300 100%
src / lyscripts / configs.py(no function) 12300 100%
src / lyscripts / data / __init__.pyDataCLI.cli_cmd 110 0%
src / lyscripts / data / __init__.py(no function) 500 100%
src / lyscripts / data / __main__.pymain 10100 0%
src / lyscripts / data / __main__.py(no function) 880 0%
src / lyscripts / data / collect / __init__.pyserve_index_html 330 0%
src / lyscripts / data / collect / __init__.pyserve_schema 110 0%
src / lyscripts / data / collect / __init__.pyserve_collector_js 110 0%
src / lyscripts / data / collect / __init__.pyprocess 17170 0%
src / lyscripts / data / collect / __init__.pyCollectorCLI.cli_cmd 550 0%
src / lyscripts / data / collect / __init__.py(no function) 3100 100%
src / lyscripts / data / enhance.pyEnhanceCLI.cli_cmd 440 0%
src / lyscripts / data / enhance.py(no function) 1520 87%
src / lyscripts / data / fetch.pyFetchCLI.cli_cmd 550 0%
src / lyscripts / data / fetch.py(no function) 1620 88%
src / lyscripts / data / filter.pyFilterCLI.model_post_init 17170 0%
src / lyscripts / data / filter.pyFilterCLI.cli_cmd 11110 0%
src / lyscripts / data / filter.py(no function) 2020 90%
src / lyscripts / data / generate.pyGenerateCLI.model_post_init 820 75%
src / lyscripts / data / generate.pyGenerateCLI.cli_cmd 900 100%
src / lyscripts / data / generate.py(no function) 1820 89%
src / lyscripts / data / join.pyJoinCLI.cli_cmd 880 0%
src / lyscripts / data / join.py(no function) 1420 86%
src / lyscripts / data / lyproxify.pyensure_python_file 330 0%
src / lyscripts / data / lyproxify.pyensure_column_map 660 0%
src / lyscripts / data / lyproxify.pyLyproxifyCLI.cli_cmd 17170 0%
src / lyscripts / data / lyproxify.pyclean_header 660 0%
src / lyscripts / data / lyproxify.pyget_instruction_depth 711 86%
src / lyscripts / data / lyproxify.pygenerate_markdown_docs 1000 100%
src / lyscripts / data / lyproxify.pytransform_to_lyprox 20200 0%
src / lyscripts / data / lyproxify.pyleftright_to_ipsicontra 12120 0%
src / lyscripts / data / lyproxify.pyexclude_patients 800 100%
src / lyscripts / data / lyproxify.py(no function) 3420 94%
src / lyscripts / data / split.pySplitCLI.cli_cmd 12120 0%
src / lyscripts / data / split.py(no function) 1720 88%
src / lyscripts / data / utils.pysave_table_to_csv 300 100%
src / lyscripts / data / utils.py(no function) 600 100%
src / lyscripts / decorators.pyassemble_signature 300 100%
src / lyscripts / decorators.pylog_state 200 100%
src / lyscripts / decorators.pylog_state.log_decorator 300 100%
src / lyscripts / decorators.pylog_state.log_decorator.wrapper 1030 70%
src / lyscripts / decorators.pycheck_input_file_exists 300 100%
src / lyscripts / decorators.pycheck_input_file_exists.inner 410 75%
src / lyscripts / decorators.pycheck_output_dir_exists 300 100%
src / lyscripts / decorators.pycheck_output_dir_exists.inner 300 100%
src / lyscripts / decorators.py(no function) 1000 100%
src / lyscripts / evaluate.py_add_parser 220 0%
src / lyscripts / evaluate.py_add_arguments 660 0%
src / lyscripts / evaluate.pycomp_bic 110 0%
src / lyscripts / evaluate.pycompute_evidence 220 0%
src / lyscripts / evaluate.pycompute_ti_results 13130 0%
src / lyscripts / evaluate.pymain 24240 0%
src / lyscripts / evaluate.py(no function) 2240 82%
src / lyscripts / integrate.pyinit_ti_sampler 330 0%
src / lyscripts / integrate.pyIntegrateCLI.cli_cmd 17170 0%
src / lyscripts / integrate.py(no function) 2620 92%
src / lyscripts / plots.pyfloor_at_decimal 220 0%
src / lyscripts / plots.pyceil_at_decimal 110 0%
src / lyscripts / plots.pyfloor_to_step 100 100%
src / lyscripts / plots.pyceil_to_step 100 100%
src / lyscripts / plots.pyclean_and_check 500 100%
src / lyscripts / plots.pyAbstractDistribution.label 100 100%
src / lyscripts / plots.pyHistogram.values 100 100%
src / lyscripts / plots.pyHistogram.from_hdf5 600 100%
src / lyscripts / plots.pyHistogram.left_percentile 100 100%
src / lyscripts / plots.pyHistogram.right_percentile 100 100%
src / lyscripts / plots.pyHistogram.draw 600 100%
src / lyscripts / plots.pyBetaPosterior.from_hdf5 920 78%
src / lyscripts / plots.pyBetaPosterior._get_label 100 100%
src / lyscripts / plots.pyBetaPosterior.num_fail 100 100%
src / lyscripts / plots.pyBetaPosterior.pdf 100 100%
src / lyscripts / plots.pyBetaPosterior.left_percentile 100 100%
src / lyscripts / plots.pyBetaPosterior.right_percentile 100 100%
src / lyscripts / plots.pyBetaPosterior.draw 800 100%
src / lyscripts / plots.pyget_size 800 100%
src / lyscripts / plots.pyget_label 600 100%
src / lyscripts / plots.pyget_xlims 500 100%
src / lyscripts / plots.pydraw 1220 83%
src / lyscripts / plots.pysplit_legends 10100 0%
src / lyscripts / plots.pyuse_mpl_stylesheet 110 0%
src / lyscripts / plots.pysave_figure 200 100%
src / lyscripts / plots.py(no function) 6806 100%
src / lyscripts / sample.pyCompletedItersColumn.__init__ 200 100%
src / lyscripts / sample.pyCompletedItersColumn.render 310 67%
src / lyscripts / sample.pyItersPerSecondColumn.render 400 100%
src / lyscripts / sample.pyAcorTime.update 200 100%
src / lyscripts / sample.pyAcorTime.relative_diff 100 100%
src / lyscripts / sample.pyNumAccepted.update 200 100%
src / lyscripts / sample.pyNumAccepted.newly_accepted 100 100%
src / lyscripts / sample.pylog_prob_fn 440 0%
src / lyscripts / sample.pyensure_initial_state 700 100%
src / lyscripts / sample.pyensure_history_table 310 67%
src / lyscripts / sample.pyupdate_history_table 510 80%
src / lyscripts / sample.pyis_converged 100 100%
src / lyscripts / sample.py_get_columns 100 100%
src / lyscripts / sample.pyrun_sampling 2010 95%
src / lyscripts / sample.pyget_pool 100 100%
src / lyscripts / sample.pyinit_sampler 300 100%
src / lyscripts / sample.pySampleCLI.cli_cmd 1200 100%
src / lyscripts / sample.py(no function) 6543 94%
src / lyscripts / schedule.pyScheduleCLI.cli_cmd 330 0%
src / lyscripts / schedule.py(no function) 820 75%
src / lyscripts / schema.pymain 220 0%
src / lyscripts / schema.py(no function) 2010 95%
src / lyscripts / utils.pybinom_pmf 610 83%
src / lyscripts / utils.pyget_dict_depth 700 100%
src / lyscripts / utils.pydelete_private_keys 700 100%
src / lyscripts / utils.pyflatten 800 100%
src / lyscripts / utils.pyunflatten 700 100%
src / lyscripts / utils.pyget_modalities_subset 720 71%
src / lyscripts / utils.pyload_patient_data 500 100%
src / lyscripts / utils.pyload_yaml_params 400 100%
src / lyscripts / utils.pyload_model_samples 400 100%
src / lyscripts / utils.pyget_hdf5_backend 620 67%
src / lyscripts / utils.py(no function) 2300 100%
Total  173549615 71%
+

+ No items found using the specified filter. +

+

6 empty functions skipped.

+
+ + + diff --git a/htmlcov/index.html b/htmlcov/index.html new file mode 100644 index 0000000..c2b5d31 --- /dev/null +++ b/htmlcov/index.html @@ -0,0 +1,396 @@ + + + + + Coverage report + + + + + +
+
+

Coverage report: + 71% +

+ +
+ +
+ + +
+
+

+ Files + Functions + Classes +

+

+ coverage.py v7.13.5, + created at 2026-04-08 14:17 +0000 +

+
+
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
File statementsmissingexcluded coverage
src / lyscripts / __init__.py 3070 77%
src / lyscripts / __main__.py 330 0%
src / lyscripts / _version.py 1100 100%
src / lyscripts / cli.py 45260 42%
src / lyscripts / compute / __init__.py 510 80%
src / lyscripts / compute / __main__.py 550 0%
src / lyscripts / compute / evidence.py 65410 37%
src / lyscripts / compute / posteriors.py 46190 59%
src / lyscripts / compute / prevalences.py 8270 91%
src / lyscripts / compute / priors.py 3520 94%
src / lyscripts / compute / risks.py 51330 35%
src / lyscripts / compute / utils.py 11860 95%
src / lyscripts / configs.py 280410 85%
src / lyscripts / data / __init__.py 610 83%
src / lyscripts / data / __main__.py 18180 0%
src / lyscripts / data / collect / __init__.py 58270 53%
src / lyscripts / data / enhance.py 1960 68%
src / lyscripts / data / fetch.py 2170 67%
src / lyscripts / data / filter.py 48300 38%
src / lyscripts / data / generate.py 3540 89%
src / lyscripts / data / join.py 22100 55%
src / lyscripts / data / lyproxify.py 123671 46%
src / lyscripts / data / split.py 29140 52%
src / lyscripts / data / utils.py 900 100%
src / lyscripts / decorators.py 4140 90%
src / lyscripts / evaluate.py 70520 26%
src / lyscripts / integrate.py 46220 52%
src / lyscripts / plots.py 160189 89%
src / lyscripts / sample.py 137125 91%
src / lyscripts / schedule.py 1150 55%
src / lyscripts / schema.py 2230 86%
src / lyscripts / utils.py 8450 94%
Total 173549615 71%
+

+ No items found using the specified filter. +

+
+ + + diff --git a/htmlcov/keybd_closed_cb_900cfef5.png b/htmlcov/keybd_closed_cb_900cfef5.png new file mode 100644 index 0000000..ba119c4 Binary files /dev/null and b/htmlcov/keybd_closed_cb_900cfef5.png differ diff --git a/htmlcov/status.json b/htmlcov/status.json new file mode 100644 index 0000000..6495e3e --- /dev/null +++ b/htmlcov/status.json @@ -0,0 +1 @@ +{"note":"This file is an internal implementation detail to speed up HTML report generation. Its format can change at any time. You might be looking for the JSON report: https://coverage.rtfd.io/cmd.html#cmd-json","format":5,"version":"7.13.5","globals":"50bf532352c807e4f5a5d7e355804c07","files":{"z_5bf5c588c698c6cc___init___py":{"hash":"a0eae5c6356ead96e996c23a7b93503e","index":{"url":"z_5bf5c588c698c6cc___init___py.html","file":"src/lyscripts/__init__.py","description":"","nums":{"precision":0,"n_files":1,"n_statements":30,"n_excluded":0,"n_missing":7,"n_branches":0,"n_partial_branches":0,"n_missing_branches":0}}},"z_5bf5c588c698c6cc___main___py":{"hash":"f0c35da943d35886efff34f2d0b2b729","index":{"url":"z_5bf5c588c698c6cc___main___py.html","file":"src/lyscripts/__main__.py","description":"","nums":{"precision":0,"n_files":1,"n_statements":3,"n_excluded":0,"n_missing":3,"n_branches":0,"n_partial_branches":0,"n_missing_branches":0}}},"z_5bf5c588c698c6cc__version_py":{"hash":"7dda3ad4c1b3f5d2a35e683e9bfb8ef5","index":{"url":"z_5bf5c588c698c6cc__version_py.html","file":"src/lyscripts/_version.py","description":"","nums":{"precision":0,"n_files":1,"n_statements":11,"n_excluded":0,"n_missing":0,"n_branches":0,"n_partial_branches":0,"n_missing_branches":0}}},"z_5bf5c588c698c6cc_cli_py":{"hash":"668153c451c926231097db51313ff65e","index":{"url":"z_5bf5c588c698c6cc_cli_py.html","file":"src/lyscripts/cli.py","description":"","nums":{"precision":0,"n_files":1,"n_statements":45,"n_excluded":0,"n_missing":26,"n_branches":0,"n_partial_branches":0,"n_missing_branches":0}}},"z_055061514423972c___init___py":{"hash":"3a51d37a0111360f944df234b225a1f3","index":{"url":"z_055061514423972c___init___py.html","file":"src/lyscripts/compute/__init__.py","description":"","nums":{"precision":0,"n_files":1,"n_statements":5,"n_excluded":0,"n_missing":1,"n_branches":0,"n_partial_branches":0,"n_missing_branches":0}}},"z_055061514423972c___main___py":{"hash":"f6b9779e777d0dfdc91f504f1fd62dd3","index":{"url":"z_055061514423972c___main___py.html","file":"src/lyscripts/compute/__main__.py","description":"","nums":{"precision":0,"n_files":1,"n_statements":5,"n_excluded":0,"n_missing":5,"n_branches":0,"n_partial_branches":0,"n_missing_branches":0}}},"z_055061514423972c_evidence_py":{"hash":"f38ea0f3323181b306eccc21072086be","index":{"url":"z_055061514423972c_evidence_py.html","file":"src/lyscripts/compute/evidence.py","description":"","nums":{"precision":0,"n_files":1,"n_statements":65,"n_excluded":0,"n_missing":41,"n_branches":0,"n_partial_branches":0,"n_missing_branches":0}}},"z_055061514423972c_posteriors_py":{"hash":"9d56db3d46c922d10b4a59078635c7d0","index":{"url":"z_055061514423972c_posteriors_py.html","file":"src/lyscripts/compute/posteriors.py","description":"","nums":{"precision":0,"n_files":1,"n_statements":46,"n_excluded":0,"n_missing":19,"n_branches":0,"n_partial_branches":0,"n_missing_branches":0}}},"z_055061514423972c_prevalences_py":{"hash":"f44fb75a19ae2bb65135ce161f3b1439","index":{"url":"z_055061514423972c_prevalences_py.html","file":"src/lyscripts/compute/prevalences.py","description":"","nums":{"precision":0,"n_files":1,"n_statements":82,"n_excluded":0,"n_missing":7,"n_branches":0,"n_partial_branches":0,"n_missing_branches":0}}},"z_055061514423972c_priors_py":{"hash":"938d7299721def68340eb93f0fa5d153","index":{"url":"z_055061514423972c_priors_py.html","file":"src/lyscripts/compute/priors.py","description":"","nums":{"precision":0,"n_files":1,"n_statements":35,"n_excluded":0,"n_missing":2,"n_branches":0,"n_partial_branches":0,"n_missing_branches":0}}},"z_055061514423972c_risks_py":{"hash":"a6edde0dd25e478508fb393a89f139db","index":{"url":"z_055061514423972c_risks_py.html","file":"src/lyscripts/compute/risks.py","description":"","nums":{"precision":0,"n_files":1,"n_statements":51,"n_excluded":0,"n_missing":33,"n_branches":0,"n_partial_branches":0,"n_missing_branches":0}}},"z_055061514423972c_utils_py":{"hash":"6d7fc5f29d69249558ae513bcb587ccf","index":{"url":"z_055061514423972c_utils_py.html","file":"src/lyscripts/compute/utils.py","description":"","nums":{"precision":0,"n_files":1,"n_statements":118,"n_excluded":0,"n_missing":6,"n_branches":0,"n_partial_branches":0,"n_missing_branches":0}}},"z_5bf5c588c698c6cc_configs_py":{"hash":"d306b60881f39f8b4861252ce4d39307","index":{"url":"z_5bf5c588c698c6cc_configs_py.html","file":"src/lyscripts/configs.py","description":"","nums":{"precision":0,"n_files":1,"n_statements":280,"n_excluded":0,"n_missing":41,"n_branches":0,"n_partial_branches":0,"n_missing_branches":0}}},"z_9b7bcb970ba14d6a___init___py":{"hash":"f4f0d10f0cbf3c6f765e47a9eb83a994","index":{"url":"z_9b7bcb970ba14d6a___init___py.html","file":"src/lyscripts/data/__init__.py","description":"","nums":{"precision":0,"n_files":1,"n_statements":6,"n_excluded":0,"n_missing":1,"n_branches":0,"n_partial_branches":0,"n_missing_branches":0}}},"z_9b7bcb970ba14d6a___main___py":{"hash":"3c6f736c0cc0f5d3969598e0fd308385","index":{"url":"z_9b7bcb970ba14d6a___main___py.html","file":"src/lyscripts/data/__main__.py","description":"","nums":{"precision":0,"n_files":1,"n_statements":18,"n_excluded":0,"n_missing":18,"n_branches":0,"n_partial_branches":0,"n_missing_branches":0}}},"z_f60392fe1c3f3e73___init___py":{"hash":"7bbd08c6a98bbc9b59ee7180b2d8f8d7","index":{"url":"z_f60392fe1c3f3e73___init___py.html","file":"src/lyscripts/data/collect/__init__.py","description":"","nums":{"precision":0,"n_files":1,"n_statements":58,"n_excluded":0,"n_missing":27,"n_branches":0,"n_partial_branches":0,"n_missing_branches":0}}},"z_9b7bcb970ba14d6a_enhance_py":{"hash":"7eb0de275f05bab9e742aa7b06f7ded3","index":{"url":"z_9b7bcb970ba14d6a_enhance_py.html","file":"src/lyscripts/data/enhance.py","description":"","nums":{"precision":0,"n_files":1,"n_statements":19,"n_excluded":0,"n_missing":6,"n_branches":0,"n_partial_branches":0,"n_missing_branches":0}}},"z_9b7bcb970ba14d6a_fetch_py":{"hash":"996d41c4e8650b42e888527c96338722","index":{"url":"z_9b7bcb970ba14d6a_fetch_py.html","file":"src/lyscripts/data/fetch.py","description":"","nums":{"precision":0,"n_files":1,"n_statements":21,"n_excluded":0,"n_missing":7,"n_branches":0,"n_partial_branches":0,"n_missing_branches":0}}},"z_9b7bcb970ba14d6a_filter_py":{"hash":"380c9d36e43c6972fef5ce0322fbe4eb","index":{"url":"z_9b7bcb970ba14d6a_filter_py.html","file":"src/lyscripts/data/filter.py","description":"","nums":{"precision":0,"n_files":1,"n_statements":48,"n_excluded":0,"n_missing":30,"n_branches":0,"n_partial_branches":0,"n_missing_branches":0}}},"z_9b7bcb970ba14d6a_generate_py":{"hash":"725be2543bd5c8bf07870ba0e3cdf681","index":{"url":"z_9b7bcb970ba14d6a_generate_py.html","file":"src/lyscripts/data/generate.py","description":"","nums":{"precision":0,"n_files":1,"n_statements":35,"n_excluded":0,"n_missing":4,"n_branches":0,"n_partial_branches":0,"n_missing_branches":0}}},"z_9b7bcb970ba14d6a_join_py":{"hash":"13f6699f19198c22acf18ea2d63f6a4b","index":{"url":"z_9b7bcb970ba14d6a_join_py.html","file":"src/lyscripts/data/join.py","description":"","nums":{"precision":0,"n_files":1,"n_statements":22,"n_excluded":0,"n_missing":10,"n_branches":0,"n_partial_branches":0,"n_missing_branches":0}}},"z_9b7bcb970ba14d6a_lyproxify_py":{"hash":"cd9f8c7d5ba2c119ccac811c50ac40ed","index":{"url":"z_9b7bcb970ba14d6a_lyproxify_py.html","file":"src/lyscripts/data/lyproxify.py","description":"","nums":{"precision":0,"n_files":1,"n_statements":123,"n_excluded":1,"n_missing":67,"n_branches":0,"n_partial_branches":0,"n_missing_branches":0}}},"z_9b7bcb970ba14d6a_split_py":{"hash":"cf5599673d7cf038df372bbac2f34ab2","index":{"url":"z_9b7bcb970ba14d6a_split_py.html","file":"src/lyscripts/data/split.py","description":"","nums":{"precision":0,"n_files":1,"n_statements":29,"n_excluded":0,"n_missing":14,"n_branches":0,"n_partial_branches":0,"n_missing_branches":0}}},"z_9b7bcb970ba14d6a_utils_py":{"hash":"47562fb32ecfa4d4c051afef7ae8f8de","index":{"url":"z_9b7bcb970ba14d6a_utils_py.html","file":"src/lyscripts/data/utils.py","description":"","nums":{"precision":0,"n_files":1,"n_statements":9,"n_excluded":0,"n_missing":0,"n_branches":0,"n_partial_branches":0,"n_missing_branches":0}}},"z_5bf5c588c698c6cc_decorators_py":{"hash":"55c95b251b06120a1c97fed481f46634","index":{"url":"z_5bf5c588c698c6cc_decorators_py.html","file":"src/lyscripts/decorators.py","description":"","nums":{"precision":0,"n_files":1,"n_statements":41,"n_excluded":0,"n_missing":4,"n_branches":0,"n_partial_branches":0,"n_missing_branches":0}}},"z_5bf5c588c698c6cc_evaluate_py":{"hash":"a2fb3646caa92a86359ea04cd90c86bb","index":{"url":"z_5bf5c588c698c6cc_evaluate_py.html","file":"src/lyscripts/evaluate.py","description":"","nums":{"precision":0,"n_files":1,"n_statements":70,"n_excluded":0,"n_missing":52,"n_branches":0,"n_partial_branches":0,"n_missing_branches":0}}},"z_5bf5c588c698c6cc_integrate_py":{"hash":"5987dd3311b377b80648f7faaca4c514","index":{"url":"z_5bf5c588c698c6cc_integrate_py.html","file":"src/lyscripts/integrate.py","description":"","nums":{"precision":0,"n_files":1,"n_statements":46,"n_excluded":0,"n_missing":22,"n_branches":0,"n_partial_branches":0,"n_missing_branches":0}}},"z_5bf5c588c698c6cc_plots_py":{"hash":"c06e31173b21aa971d4192b15f685430","index":{"url":"z_5bf5c588c698c6cc_plots_py.html","file":"src/lyscripts/plots.py","description":"","nums":{"precision":0,"n_files":1,"n_statements":160,"n_excluded":9,"n_missing":18,"n_branches":0,"n_partial_branches":0,"n_missing_branches":0}}},"z_5bf5c588c698c6cc_sample_py":{"hash":"d104057f40a0ea861e4ab620079418fe","index":{"url":"z_5bf5c588c698c6cc_sample_py.html","file":"src/lyscripts/sample.py","description":"","nums":{"precision":0,"n_files":1,"n_statements":137,"n_excluded":5,"n_missing":12,"n_branches":0,"n_partial_branches":0,"n_missing_branches":0}}},"z_5bf5c588c698c6cc_schedule_py":{"hash":"90b0e9ebdd566b0ef791c88191d1b955","index":{"url":"z_5bf5c588c698c6cc_schedule_py.html","file":"src/lyscripts/schedule.py","description":"","nums":{"precision":0,"n_files":1,"n_statements":11,"n_excluded":0,"n_missing":5,"n_branches":0,"n_partial_branches":0,"n_missing_branches":0}}},"z_5bf5c588c698c6cc_schema_py":{"hash":"233930f0d9cf4fecec0df2270557b97d","index":{"url":"z_5bf5c588c698c6cc_schema_py.html","file":"src/lyscripts/schema.py","description":"","nums":{"precision":0,"n_files":1,"n_statements":22,"n_excluded":0,"n_missing":3,"n_branches":0,"n_partial_branches":0,"n_missing_branches":0}}},"z_5bf5c588c698c6cc_utils_py":{"hash":"85633c7219eedea5024025cbba2ee1a8","index":{"url":"z_5bf5c588c698c6cc_utils_py.html","file":"src/lyscripts/utils.py","description":"","nums":{"precision":0,"n_files":1,"n_statements":84,"n_excluded":0,"n_missing":5,"n_branches":0,"n_partial_branches":0,"n_missing_branches":0}}}}} \ No newline at end of file diff --git a/htmlcov/style_cb_9ff733b0.css b/htmlcov/style_cb_9ff733b0.css new file mode 100644 index 0000000..5e304ce --- /dev/null +++ b/htmlcov/style_cb_9ff733b0.css @@ -0,0 +1,389 @@ +@charset "UTF-8"; +/* Licensed under the Apache License: http://www.apache.org/licenses/LICENSE-2.0 */ +/* For details: https://github.com/coveragepy/coveragepy/blob/main/NOTICE.txt */ +/* Don't edit this .css file. Edit the .scss file instead! */ +html, body, h1, h2, h3, p, table, td, th { margin: 0; padding: 0; border: 0; font-weight: inherit; font-style: inherit; font-size: 100%; font-family: inherit; vertical-align: baseline; } + +body { font-family: -apple-system, BlinkMacSystemFont, "Segoe UI", Roboto, Ubuntu, Cantarell, "Helvetica Neue", sans-serif; font-size: 1em; background: #fff; color: #000; } + +@media (prefers-color-scheme: dark) { body { background: #1e1e1e; } } + +@media (prefers-color-scheme: dark) { body { color: #eee; } } + +html > body { font-size: 16px; } + +a:active, a:focus { outline: 2px dashed #007acc; } + +p { font-size: .875em; line-height: 1.4em; } + +table { border-collapse: collapse; } + +td { vertical-align: top; } + +table tr.hidden { display: none !important; } + +p#no_rows { display: none; font-size: 1.15em; font-family: -apple-system, BlinkMacSystemFont, "Segoe UI", Roboto, Ubuntu, Cantarell, "Helvetica Neue", sans-serif; } + +a.nav { text-decoration: none; color: inherit; } + +a.nav:hover { text-decoration: underline; color: inherit; } + +.hidden { display: none; } + +header { background: #f8f8f8; width: 100%; z-index: 2; border-bottom: 1px solid #ccc; } + +@media (prefers-color-scheme: dark) { header { background: black; } } + +@media (prefers-color-scheme: dark) { header { border-color: #333; } } + +header .content { padding: 1rem 3.5rem; } + +header h2 { margin-top: .5em; font-size: 1em; } + +header h2 a.button { font-family: inherit; font-size: inherit; border: 1px solid; border-radius: .2em; background: #eee; color: inherit; text-decoration: none; padding: .1em .5em; margin: 1px calc(.1em + 1px); cursor: pointer; border-color: #ccc; } + +@media (prefers-color-scheme: dark) { header h2 a.button { background: #333; } } + +@media (prefers-color-scheme: dark) { header h2 a.button { border-color: #444; } } + +header h2 a.button.current { border: 2px solid; background: #fff; border-color: #999; cursor: default; } + +@media (prefers-color-scheme: dark) { header h2 a.button.current { background: #1e1e1e; } } + +@media (prefers-color-scheme: dark) { header h2 a.button.current { border-color: #777; } } + +header p.text { margin: .5em 0 -.5em; color: #666; font-style: italic; } + +@media (prefers-color-scheme: dark) { header p.text { color: #aaa; } } + +header.sticky { position: fixed; left: 0; right: 0; height: 2.5em; } + +header.sticky .text { display: none; } + +header.sticky h1, header.sticky h2 { font-size: 1em; margin-top: 0; display: inline-block; } + +header.sticky .content { padding: 0.5rem 3.5rem; } + +header.sticky .content p { font-size: 1em; } + +header.sticky ~ #source { padding-top: 6.5em; } + +main { position: relative; z-index: 1; } + +footer { margin: 1rem 3.5rem; } + +footer .content { padding: 0; color: #666; font-style: italic; } + +@media (prefers-color-scheme: dark) { footer .content { color: #aaa; } } + +#index { margin: 1rem 0 0 3.5rem; } + +h1 { font-size: 1.25em; display: inline-block; } + +#filter_container { float: right; margin: 0 2em 0 0; line-height: 1.66em; } + +#filter_container #filter { width: 10em; padding: 0.2em 0.5em; border: 2px solid #ccc; background: #fff; color: #000; } + +@media (prefers-color-scheme: dark) { #filter_container #filter { border-color: #444; } } + +@media (prefers-color-scheme: dark) { #filter_container #filter { background: #1e1e1e; } } + +@media (prefers-color-scheme: dark) { #filter_container #filter { color: #eee; } } + +#filter_container #filter:focus { border-color: #007acc; } + +#filter_container :disabled ~ label { color: #ccc; } + +@media (prefers-color-scheme: dark) { #filter_container :disabled ~ label { color: #444; } } + +#filter_container label { font-size: .875em; color: #666; } + +@media (prefers-color-scheme: dark) { #filter_container label { color: #aaa; } } + +header button { font-family: inherit; font-size: inherit; border: 1px solid; border-radius: .2em; background: #eee; color: inherit; text-decoration: none; padding: .1em .5em; margin: 1px calc(.1em + 1px); cursor: pointer; border-color: #ccc; } + +@media (prefers-color-scheme: dark) { header button { background: #333; } } + +@media (prefers-color-scheme: dark) { header button { border-color: #444; } } + +header button:active, header button:focus { outline: 2px dashed #007acc; } + +header button.run { background: #eeffee; } + +@media (prefers-color-scheme: dark) { header button.run { background: #373d29; } } + +header button.run.show_run { background: #dfd; border: 2px solid #00dd00; margin: 0 .1em; } + +@media (prefers-color-scheme: dark) { header button.run.show_run { background: #373d29; } } + +header button.mis { background: #ffeeee; } + +@media (prefers-color-scheme: dark) { header button.mis { background: #4b1818; } } + +header button.mis.show_mis { background: #fdd; border: 2px solid #ff0000; margin: 0 .1em; } + +@media (prefers-color-scheme: dark) { header button.mis.show_mis { background: #4b1818; } } + +header button.exc { background: #f7f7f7; } + +@media (prefers-color-scheme: dark) { header button.exc { background: #333; } } + +header button.exc.show_exc { background: #eee; border: 2px solid #808080; margin: 0 .1em; } + +@media (prefers-color-scheme: dark) { header button.exc.show_exc { background: #333; } } + +header button.par { background: #ffffd5; } + +@media (prefers-color-scheme: dark) { header button.par { background: #650; } } + +header button.par.show_par { background: #ffa; border: 2px solid #bbbb00; margin: 0 .1em; } + +@media (prefers-color-scheme: dark) { header button.par.show_par { background: #650; } } + +#help_panel, #source p .annotate.long { display: none; position: absolute; z-index: 999; background: #ffffcc; border: 1px solid #888; border-radius: .2em; color: #333; padding: .25em .5em; } + +#source p .annotate.long { white-space: normal; float: right; top: 1.75em; right: 1em; height: auto; } + +#help_panel_wrapper { float: right; position: relative; } + +#keyboard_icon { margin: 5px; } + +#help_panel_state { display: none; } + +#help_panel { top: 25px; right: 0; padding: .75em; border: 1px solid #883; color: #333; } + +#help_panel .keyhelp p { margin-top: .75em; } + +#help_panel .legend { font-style: italic; margin-bottom: 1em; } + +.indexfile #help_panel { width: 25em; } + +.pyfile #help_panel { width: 18em; } + +#help_panel_state:checked ~ #help_panel { display: block; } + +kbd { border: 1px solid black; border-color: #888 #333 #333 #888; padding: .1em .35em; font-family: SFMono-Regular, Menlo, Monaco, Consolas, monospace; font-weight: bold; background: #eee; border-radius: 3px; } + +#source { padding: 1em 0 1em 3.5rem; font-family: SFMono-Regular, Menlo, Monaco, Consolas, monospace; } + +#source p { position: relative; white-space: pre; } + +#source p * { box-sizing: border-box; } + +#source p .n { float: left; text-align: right; width: 3.5rem; box-sizing: border-box; margin-left: -3.5rem; padding-right: 1em; color: #999; user-select: none; } + +@media (prefers-color-scheme: dark) { #source p .n { color: #777; } } + +#source p .n.highlight { background: #ffdd00; } + +#source p .n a { scroll-margin-top: 6em; text-decoration: none; color: #999; } + +@media (prefers-color-scheme: dark) { #source p .n a { color: #777; } } + +#source p .n a:hover { text-decoration: underline; color: #999; } + +@media (prefers-color-scheme: dark) { #source p .n a:hover { color: #777; } } + +#source p .t { display: inline-block; width: 100%; box-sizing: border-box; margin-left: -.5em; padding-left: 0.3em; border-left: 0.2em solid #fff; } + +@media (prefers-color-scheme: dark) { #source p .t { border-color: #1e1e1e; } } + +#source p .t:hover { background: #f2f2f2; } + +@media (prefers-color-scheme: dark) { #source p .t:hover { background: #282828; } } + +#source p .t:hover ~ .r .annotate.long { display: block; } + +#source p .t .com { color: #008000; font-style: italic; line-height: 1px; } + +@media (prefers-color-scheme: dark) { #source p .t .com { color: #6a9955; } } + +#source p .t .key { font-weight: bold; line-height: 1px; } + +#source p .t .str, #source p .t .fst { color: #0451a5; } + +@media (prefers-color-scheme: dark) { #source p .t .str, #source p .t .fst { color: #9cdcfe; } } + +#source p.mis .t { border-left: 0.2em solid #ff0000; } + +#source p.mis.show_mis .t { background: #fdd; } + +@media (prefers-color-scheme: dark) { #source p.mis.show_mis .t { background: #4b1818; } } + +#source p.mis.show_mis .t:hover { background: #f2d2d2; } + +@media (prefers-color-scheme: dark) { #source p.mis.show_mis .t:hover { background: #532323; } } + +#source p.mis.mis2 .t { border-left: 0.2em dotted #ff0000; } + +#source p.mis.mis2.show_mis .t { background: #ffeeee; } + +@media (prefers-color-scheme: dark) { #source p.mis.mis2.show_mis .t { background: #351b1b; } } + +#source p.mis.mis2.show_mis .t:hover { background: #f2d2d2; } + +@media (prefers-color-scheme: dark) { #source p.mis.mis2.show_mis .t:hover { background: #532323; } } + +#source p.run .t { border-left: 0.2em solid #00dd00; } + +#source p.run.show_run .t { background: #dfd; } + +@media (prefers-color-scheme: dark) { #source p.run.show_run .t { background: #373d29; } } + +#source p.run.show_run .t:hover { background: #d2f2d2; } + +@media (prefers-color-scheme: dark) { #source p.run.show_run .t:hover { background: #404633; } } + +#source p.run.run2 .t { border-left: 0.2em dotted #00dd00; } + +#source p.run.run2.show_run .t { background: #eeffee; } + +@media (prefers-color-scheme: dark) { #source p.run.run2.show_run .t { background: #2b2e24; } } + +#source p.run.run2.show_run .t:hover { background: #d2f2d2; } + +@media (prefers-color-scheme: dark) { #source p.run.run2.show_run .t:hover { background: #404633; } } + +#source p.exc .t { border-left: 0.2em solid #808080; } + +#source p.exc.show_exc .t { background: #eee; } + +@media (prefers-color-scheme: dark) { #source p.exc.show_exc .t { background: #333; } } + +#source p.exc.show_exc .t:hover { background: #e2e2e2; } + +@media (prefers-color-scheme: dark) { #source p.exc.show_exc .t:hover { background: #3c3c3c; } } + +#source p.exc.exc2 .t { border-left: 0.2em dotted #808080; } + +#source p.exc.exc2.show_exc .t { background: #f7f7f7; } + +@media (prefers-color-scheme: dark) { #source p.exc.exc2.show_exc .t { background: #292929; } } + +#source p.exc.exc2.show_exc .t:hover { background: #e2e2e2; } + +@media (prefers-color-scheme: dark) { #source p.exc.exc2.show_exc .t:hover { background: #3c3c3c; } } + +#source p.par .t { border-left: 0.2em solid #bbbb00; } + +#source p.par.show_par .t { background: #ffa; } + +@media (prefers-color-scheme: dark) { #source p.par.show_par .t { background: #650; } } + +#source p.par.show_par .t:hover { background: #f2f2a2; } + +@media (prefers-color-scheme: dark) { #source p.par.show_par .t:hover { background: #6d5d0c; } } + +#source p.par.par2 .t { border-left: 0.2em dotted #bbbb00; } + +#source p.par.par2.show_par .t { background: #ffffd5; } + +@media (prefers-color-scheme: dark) { #source p.par.par2.show_par .t { background: #423a0f; } } + +#source p.par.par2.show_par .t:hover { background: #f2f2a2; } + +@media (prefers-color-scheme: dark) { #source p.par.par2.show_par .t:hover { background: #6d5d0c; } } + +#source p .r { position: absolute; top: 0; right: 2.5em; font-family: -apple-system, BlinkMacSystemFont, "Segoe UI", Roboto, Ubuntu, Cantarell, "Helvetica Neue", sans-serif; } + +#source p .annotate { font-family: -apple-system, BlinkMacSystemFont, "Segoe UI", Roboto, Ubuntu, Cantarell, "Helvetica Neue", sans-serif; color: #666; padding-right: .5em; } + +@media (prefers-color-scheme: dark) { #source p .annotate { color: #ddd; } } + +#source p .annotate.short:hover ~ .long { display: block; } + +#source p .annotate.long { width: 30em; right: 2.5em; } + +#source p input { display: none; } + +#source p input ~ .r label.ctx { cursor: pointer; border-radius: .25em; } + +#source p input ~ .r label.ctx::before { content: "โ–ถ "; } + +#source p input ~ .r label.ctx:hover { background: #e8f4ff; color: #666; } + +@media (prefers-color-scheme: dark) { #source p input ~ .r label.ctx:hover { background: #0f3a42; } } + +@media (prefers-color-scheme: dark) { #source p input ~ .r label.ctx:hover { color: #aaa; } } + +#source p input:checked ~ .r label.ctx { background: #d0e8ff; color: #666; border-radius: .75em .75em 0 0; padding: 0 .5em; margin: -.25em 0; } + +@media (prefers-color-scheme: dark) { #source p input:checked ~ .r label.ctx { background: #056; } } + +@media (prefers-color-scheme: dark) { #source p input:checked ~ .r label.ctx { color: #aaa; } } + +#source p input:checked ~ .r label.ctx::before { content: "โ–ผ "; } + +#source p input:checked ~ .ctxs { padding: .25em .5em; overflow-y: scroll; max-height: 10.5em; } + +#source p label.ctx { color: #999; display: inline-block; padding: 0 .5em; font-size: .8333em; } + +@media (prefers-color-scheme: dark) { #source p label.ctx { color: #777; } } + +#source p .ctxs { display: block; max-height: 0; overflow-y: hidden; transition: all .2s; padding: 0 .5em; font-family: -apple-system, BlinkMacSystemFont, "Segoe UI", Roboto, Ubuntu, Cantarell, "Helvetica Neue", sans-serif; white-space: nowrap; background: #d0e8ff; border-radius: .25em; margin-right: 1.75em; text-align: right; } + +@media (prefers-color-scheme: dark) { #source p .ctxs { background: #056; } } + +#index { font-family: SFMono-Regular, Menlo, Monaco, Consolas, monospace; font-size: 0.875em; } + +#index table.index { margin-left: -.5em; } + +#index td, #index th { text-align: right; vertical-align: baseline; padding: .25em .5em; border-bottom: 1px solid #eee; } + +@media (prefers-color-scheme: dark) { #index td, #index th { border-color: #333; } } + +#index td.name, #index th.name { text-align: left; width: auto; font-family: -apple-system, BlinkMacSystemFont, "Segoe UI", Roboto, Ubuntu, Cantarell, "Helvetica Neue", sans-serif; min-width: 15em; } + +#index td.left, #index th.left { text-align: left; } + +#index td.spacer, #index th.spacer { border: none; padding: 0; } + +#index td.spacer:hover, #index th.spacer:hover { background: inherit; } + +#index th { font-family: -apple-system, BlinkMacSystemFont, "Segoe UI", Roboto, Ubuntu, Cantarell, "Helvetica Neue", sans-serif; font-style: italic; color: #333; border-color: #ccc; cursor: pointer; } + +@media (prefers-color-scheme: dark) { #index th { color: #ddd; } } + +@media (prefers-color-scheme: dark) { #index th { border-color: #444; } } + +#index th:hover { background: #eee; } + +@media (prefers-color-scheme: dark) { #index th:hover { background: #333; } } + +#index th .arrows { color: #666; font-size: 85%; font-family: sans-serif; font-style: normal; pointer-events: none; } + +#index th[aria-sort="ascending"], #index th[aria-sort="descending"] { white-space: nowrap; background: #eee; padding-left: .5em; } + +@media (prefers-color-scheme: dark) { #index th[aria-sort="ascending"], #index th[aria-sort="descending"] { background: #333; } } + +#index th[aria-sort="ascending"] .arrows::after { content: " โ–ฒ"; } + +#index th[aria-sort="descending"] .arrows::after { content: " โ–ผ"; } + +#index tr.grouphead th { cursor: default; font-style: normal; border-color: #999; } + +@media (prefers-color-scheme: dark) { #index tr.grouphead th { border-color: #777; } } + +#index td.name { font-size: 1.15em; } + +#index td.name a { text-decoration: none; color: inherit; } + +#index td.name .no-noun { font-style: italic; } + +#index tr.total td, #index tr.total_dynamic td { font-weight: bold; border-bottom: none; } + +#index tr.region:hover { background: #eee; } + +@media (prefers-color-scheme: dark) { #index tr.region:hover { background: #333; } } + +#index tr.region:hover td.name { text-decoration: underline; color: inherit; } + +#scroll_marker { position: fixed; z-index: 3; right: 0; top: 0; width: 16px; height: 100%; background: #fff; border-left: 1px solid #eee; will-change: transform; } + +@media (prefers-color-scheme: dark) { #scroll_marker { background: #1e1e1e; } } + +@media (prefers-color-scheme: dark) { #scroll_marker { border-color: #333; } } + +#scroll_marker .marker { background: #ccc; position: absolute; min-height: 3px; width: 100%; } + +@media (prefers-color-scheme: dark) { #scroll_marker .marker { background: #444; } } diff --git a/htmlcov/z_055061514423972c___init___py.html b/htmlcov/z_055061514423972c___init___py.html new file mode 100644 index 0000000..c199605 --- /dev/null +++ b/htmlcov/z_055061514423972c___init___py.html @@ -0,0 +1,119 @@ + + + + + Coverage for src/lyscripts/compute/__init__.py: 80% + + + + + +
+
+

+ Coverage for src / lyscripts / compute / __init__.py: + 80% +

+ +

+ 5 statements   + + + +

+

+ « prev     + ^ index     + » next +       + coverage.py v7.13.5, + created at 2026-04-08 14:17 +0000 +

+ +
+
+
+

1"""Commands to compute prior and posterior state distributions from model samples. 

+

2 

+

3This can in turn speed up the computation of risks and prevalences. 

+

4""" 

+

5 

+

6from pydantic_settings import BaseSettings, CliApp, CliSubCommand 

+

7 

+

8from lyscripts.compute import posteriors, prevalences, priors, risks, evidence 

+

9 

+

10 

+

11class ComputeCLI(BaseSettings): 

+

12 """Compute priors, posteriors, risks, prevalences and model evidence from model samples.""" 

+

13 

+

14 priors: CliSubCommand[priors.PriorsCLI] 

+

15 posteriors: CliSubCommand[posteriors.PosteriorsCLI] 

+

16 risks: CliSubCommand[risks.RisksCLI] 

+

17 prevalences: CliSubCommand[prevalences.PrevalencesCLI] 

+

18 evidence: CliSubCommand[evidence.EvidenceCLI] 

+

19 

+

20 def cli_cmd(self) -> None: 

+

21 """Start the ``compute`` subcommand.""" 

+

22 CliApp.run_subcommand(self) 

+
+ + + diff --git a/htmlcov/z_055061514423972c___main___py.html b/htmlcov/z_055061514423972c___main___py.html new file mode 100644 index 0000000..95e3718 --- /dev/null +++ b/htmlcov/z_055061514423972c___main___py.html @@ -0,0 +1,105 @@ + + + + + Coverage for src/lyscripts/compute/__main__.py: 0% + + + + + +
+
+

+ Coverage for src / lyscripts / compute / __main__.py: + 0% +

+ +

+ 5 statements   + + + +

+

+ « prev     + ^ index     + » next +       + coverage.py v7.13.5, + created at 2026-04-08 14:17 +0000 +

+ +
+
+
+

1"""Run the compute module as a script.""" 

+

2 

+

3from lyscripts.cli import assemble_main 

+

4from lyscripts.compute import ComputeCLI 

+

5 

+

6if __name__ == "__main__": 

+

7 main = assemble_main(settings_cls=ComputeCLI, prog_name="compute") 

+

8 main() 

+
+ + + diff --git a/htmlcov/z_055061514423972c_evidence_py.html b/htmlcov/z_055061514423972c_evidence_py.html new file mode 100644 index 0000000..77e8409 --- /dev/null +++ b/htmlcov/z_055061514423972c_evidence_py.html @@ -0,0 +1,294 @@ + + + + + Coverage for src/lyscripts/compute/evidence.py: 37% + + + + + +
+
+

+ Coverage for src / lyscripts / compute / evidence.py: + 37% +

+ +

+ 65 statements   + + + +

+

+ « prev     + ^ index     + » next +       + coverage.py v7.13.5, + created at 2026-04-08 14:17 +0000 +

+ +
+
+
+

1"""Compute the model evidence from MCMC samples. 

+

2 

+

3Given the samples drawn during thermodynamic integration and their respective log 

+

4likelihoods, compute the model log evidence and the Bayesian Information Criterion. 

+

5""" 

+

6 

+

7from __future__ import annotations 

+

8 

+

9import json 

+

10from pathlib import Path 

+

11 

+

12import emcee 

+

13import h5py 

+

14import numpy as np 

+

15import pandas as pd 

+

16from loguru import logger 

+

17from pydantic import Field 

+

18from scipy.integrate import trapezoid 

+

19 

+

20from lyscripts.cli import assemble_main 

+

21from lyscripts.configs import ( 

+

22 BaseCLI, 

+

23 DataConfig, 

+

24 SamplingConfig, 

+

25 ScheduleConfig, 

+

26) 

+

27 

+

28RNG = np.random.default_rng() 

+

29 

+

30 

+

31def comp_bic(log_probs: np.ndarray, num_params: int, num_data: int) -> float: 

+

32 r"""Compute the negative one half of the Bayesian Information Criterion (BIC). 

+

33 

+

34 The BIC is defined as [^1] 

+

35 $$ BIC = k \\ln{n} - 2 \\ln{\\hat{L}} $$ 

+

36 where $k$ is the number of parameters ``num_params``, $n$ the number of datapoints 

+

37 ``num_data`` and $\\hat{L}$ the maximum likelihood estimate of the ``log_prob``. 

+

38 It is constructed such that the following is an 

+

39 approximation of the model evidence: 

+

40 $$ p(D \\mid m) \\approx \\exp{\\left( - BIC / 2 \\right)} $$ 

+

41 which is why this function returns the negative one half of it. 

+

42 

+

43 [^1]: https://en.wikipedia.org/wiki/Bayesian_information_criterion 

+

44 """ 

+

45 return np.max(log_probs) - num_params * np.log(num_data) / 2.0 

+

46 

+

47 

+

48def compute_evidence( 

+

49 temp_schedule: np.ndarray, 

+

50 log_probs: np.ndarray, 

+

51) -> float: 

+

52 """Compute the evidence. 

+

53 

+

54 Given a ``temp_schedule`` of inverse temperatures and corresponding sets of 

+

55 ``log_probs``, we calculate the mean ``log_prob`` over all samples to approximate 

+

56 the expectation value under the corresponding power posterior for each step in the 

+

57 ``temp_schedule``. The evidence is evaluated using trapezoidal integration of the 

+

58 expectation values over the ``temp_schedule``. 

+

59 """ 

+

60 a_mc = np.mean(log_probs, axis=1) 

+

61 return trapezoid(y=a_mc, x=temp_schedule) 

+

62 

+

63 

+

64def compute_ti_results( 

+

65 settings: EvidenceCLI, 

+

66 temp_schedule: np.ndarray, 

+

67 metrics: dict, 

+

68 ndim: int, 

+

69 h5_file: Path, 

+

70) -> tuple[np.ndarray, np.ndarray]: 

+

71 """Compute the results in case of a thermodynamic integration run.""" 

+

72 num_temps = len(temp_schedule) 

+

73 

+

74 if num_temps != len(h5_file["ti"]): 

+

75 raise RuntimeError( 

+

76 f"Parameters suggest temp schedule of length {num_temps}, " 

+

77 f"but stored are {len(h5_file['ti'])}", 

+

78 ) 

+

79 

+

80 nwalker = ndim * settings.sampling.walkers_per_dim 

+

81 nsteps = settings.sampling.num_steps 

+

82 ti_log_probs = np.zeros(shape=(num_temps, nsteps * nwalker)) 

+

83 

+

84 for i, run in enumerate(h5_file["ti"]): 

+

85 reader = emcee.backends.HDFBackend( 

+

86 settings.sampling.storage_file, 

+

87 name=f"ti/{run}", 

+

88 read_only=True, 

+

89 ) 

+

90 ti_log_probs[i] = reader.get_blobs(flat=True)["log_prob"] 

+

91 

+

92 evidence = compute_evidence(temp_schedule, ti_log_probs) 

+

93 metrics["evidence"] = evidence 

+

94 

+

95 return temp_schedule, ti_log_probs 

+

96 

+

97 

+

98class EvidenceCLI(BaseCLI): 

+

99 """Compute model evidence from thermodynamic integration samples.""" 

+

100 

+

101 data: DataConfig 

+

102 sampling: SamplingConfig 

+

103 schedule: ScheduleConfig = Field( 

+

104 description="Configuration for generating inverse temperature schedule.", 

+

105 ) 

+

106 plots: Path = Field( 

+

107 default="./plots", 

+

108 description="Directory for storing plots.", 

+

109 ) 

+

110 metrics: Path = Field( 

+

111 default="./metrics.json", 

+

112 description="Path to metrics file.", 

+

113 ) 

+

114 

+

115 def cli_cmd(self) -> None: 

+

116 """Start the ``evidence`` subcommand. 

+

117 

+

118 Given the MCMC samples from thermodynamic integration provided by the 

+

119 ``sampling`` argument and the corresponding inverse temperature schedule, 

+

120 specified in the ``schedule`` argument, the model evidence is computed using 

+

121 the functions :py:func:`compute_ti_results` and :py:func`compute_evidence`. 

+

122 Further the BIC is evaluated. 

+

123 """ 

+

124 data = self.data.load() 

+

125 

+

126 metrics = {} 

+

127 

+

128 temp_schedule = self.schedule.get_schedule() 

+

129 

+

130 with h5py.File(self.sampling.storage_file, mode="r") as h5_file: 

+

131 # Get ndim from the HDF5 backend 

+

132 backend = emcee.backends.HDFBackend( 

+

133 self.sampling.storage_file, 

+

134 read_only=True, 

+

135 name=self.sampling.dataset, 

+

136 ) 

+

137 ndim = backend.shape[1] 

+

138 logger.info(f"Inferred {ndim} parameters from stored samples") 

+

139 

+

140 # if TI has been performed, compute the evidence 

+

141 if "ti" in h5_file: 

+

142 temp_schedule, ti_log_probs = compute_ti_results( 

+

143 settings=self, 

+

144 temp_schedule=temp_schedule, 

+

145 metrics=metrics, 

+

146 ndim=ndim, 

+

147 h5_file=h5_file, 

+

148 ) 

+

149 

+

150 logger.info( 

+

151 "Computed results of thermodynamic integration with " 

+

152 f"{len(temp_schedule)} steps", 

+

153 ) 

+

154 

+

155 # store inverse temperatures and log-probs in CSV file 

+

156 self.plots.parent.mkdir(parents=True, exist_ok=True) 

+

157 

+

158 beta_vs_accuracy = pd.DataFrame( 

+

159 np.array( 

+

160 [ 

+

161 temp_schedule, 

+

162 np.mean(ti_log_probs, axis=1), 

+

163 np.std(ti_log_probs, axis=1), 

+

164 ], 

+

165 ).T, 

+

166 columns=["β", "accuracy", "std"], 

+

167 ) 

+

168 beta_vs_accuracy.to_csv(self.plots, index=False) 

+

169 logger.info(f"Plotted β vs accuracy at {self.plots}") 

+

170 

+

171 # use blobs, because also for TI, this is the unscaled log-prob 

+

172 final_log_probs = backend.get_blobs()["log_prob"] 

+

173 logger.info( 

+

174 f"Opened samples from emcee backend from {self.sampling.storage_file}", 

+

175 ) 

+

176 

+

177 # store metrics in JSON file 

+

178 self.metrics.parent.mkdir(parents=True, exist_ok=True) 

+

179 self.metrics.touch(exist_ok=True) 

+

180 

+

181 metrics["BIC"] = comp_bic( 

+

182 log_probs=final_log_probs, 

+

183 num_params=ndim, 

+

184 num_data=len(data), 

+

185 ) 

+

186 metrics["max_llh"] = np.max(final_log_probs) 

+

187 metrics["mean_llh"] = np.mean(final_log_probs) 

+

188 

+

189 with open(self.metrics, mode="w", encoding="utf-8") as metrics_file: 

+

190 json.dump(metrics, metrics_file) 

+

191 

+

192 logger.info(f"Wrote out metrics to {self.metrics}") 

+

193 

+

194 

+

195if __name__ == "__main__": 

+

196 main = assemble_main(settings_cls=EvidenceCLI, prog_name="compute evidence") 

+

197 main() 

+
+ + + diff --git a/htmlcov/z_055061514423972c_posteriors_py.html b/htmlcov/z_055061514423972c_posteriors_py.html new file mode 100644 index 0000000..8deb162 --- /dev/null +++ b/htmlcov/z_055061514423972c_posteriors_py.html @@ -0,0 +1,239 @@ + + + + + Coverage for src/lyscripts/compute/posteriors.py: 59% + + + + + +
+
+

+ Coverage for src / lyscripts / compute / posteriors.py: + 59% +

+ +

+ 46 statements   + + + +

+

+ « prev     + ^ index     + » next +       + coverage.py v7.13.5, + created at 2026-04-08 14:17 +0000 +

+ +
+
+
+

1"""Compute posterior state distributions. 

+

2 

+

3The posteriors are computed from drawn samples for a list of defined scenarios. If 

+

4priors have already been computed from the samples and the ``--cache_dir`` argument 

+

5is the same as during that computation, the priors will automatically be loaded from 

+

6the cache. 

+

7""" 

+

8 

+

9from typing import Literal 

+

10 

+

11import numpy as np 

+

12from loguru import logger 

+

13from lymph import models 

+

14from pydantic import Field 

+

15from rich import progress 

+

16 

+

17from lyscripts.cli import assemble_main 

+

18from lyscripts.compute.priors import compute_priors 

+

19from lyscripts.compute.utils import BaseComputeCLI, HDF5FileStorage, get_cached 

+

20from lyscripts.configs import ( 

+

21 DistributionConfig, 

+

22 GraphConfig, 

+

23 ModalityConfig, 

+

24 ModelConfig, 

+

25 add_distributions, 

+

26 add_modalities, 

+

27 construct_model, 

+

28) 

+

29from lyscripts.utils import console 

+

30 

+

31 

+

32def compute_posteriors( 

+

33 model_config: ModelConfig, 

+

34 graph_config: GraphConfig, 

+

35 dist_configs: dict[str, DistributionConfig], 

+

36 modality_configs: dict[str, ModalityConfig], 

+

37 priors: np.ndarray, 

+

38 diagnosis: dict[Literal["ipsi", "contra"], dict], 

+

39 midext: bool | None = None, 

+

40 mode: Literal["HMM", "BN"] = "HMM", 

+

41 progress_desc: str = "Computing posteriors from priors", 

+

42) -> np.ndarray: 

+

43 """Compute posterior state distributions from ``priors``. 

+

44 

+

45 This calls the ``model`` method :py:meth:`~lymph.types.Model.posterior_state_dist` 

+

46 for each of the pre-computed ``priors``, given the specified ``diagnosis`` pattern. 

+

47 

+

48 For the :py:class:`~lymph.models.Midline` model, the ``midext`` argument can be 

+

49 used to specify whether the midline extension is present or not. 

+

50 """ 

+

51 model = construct_model(model_config, graph_config) 

+

52 model = add_distributions(model, dist_configs) 

+

53 model = add_modalities(model, modality_configs) 

+

54 posteriors = [] 

+

55 kwargs = {"midext": midext} if isinstance(model, models.Midline) else {} 

+

56 

+

57 if isinstance(model, models.Unilateral | models.HPVUnilateral): 

+

58 diagnosis = diagnosis.get("ipsi") 

+

59 

+

60 for prior in progress.track( 

+

61 sequence=priors, 

+

62 description=progress_desc, 

+

63 total=len(priors), 

+

64 console=console, 

+

65 ): 

+

66 posteriors.append( 

+

67 model.posterior_state_dist( 

+

68 given_state_dist=prior, 

+

69 given_diagnosis=diagnosis, 

+

70 mode=mode, 

+

71 **kwargs, 

+

72 ), 

+

73 ) 

+

74 

+

75 return np.stack(posteriors) 

+

76 

+

77 

+

78class PosteriorsCLI(BaseComputeCLI): 

+

79 """Compute posterior state distributions for different diagnosis scenarios.""" 

+

80 

+

81 modalities: dict[str, ModalityConfig] = Field( 

+

82 default={}, 

+

83 description=( 

+

84 "Maps names of diagnostic modalities to their specificity/sensitivity." 

+

85 ), 

+

86 ) 

+

87 posteriors: HDF5FileStorage = Field( 

+

88 description="Storage for the computed posteriors.", 

+

89 ) 

+

90 

+

91 def cli_cmd(self) -> None: 

+

92 """Start the ``posteriors`` subcommand. 

+

93 

+

94 This will compute the posterior state distributions, given a personalized 

+

95 diagnosis pattern, for each of the scenarios provided to the command. 

+

96 """ 

+

97 logger.debug(self.model_dump_json(indent=2)) 

+

98 

+

99 global_attrs = self.model_dump( 

+

100 include={"model", "graph", "distributions", "modalities"}, 

+

101 ) 

+

102 self.posteriors.set_attrs(attrs=global_attrs, dataset="/") 

+

103 

+

104 samples = self.sampling.load() 

+

105 cached_compute_priors = get_cached(compute_priors, self.cache_dir) 

+

106 cached_compute_posteriors = get_cached(compute_posteriors, self.cache_dir) 

+

107 num_scens = len(self.scenarios) 

+

108 

+

109 for i, scenario in enumerate(self.scenarios): 

+

110 _fields = {"t_stages", "t_stages_dist", "mode"} 

+

111 prior_kwargs = scenario.model_dump(include=_fields) 

+

112 

+

113 _priors = cached_compute_priors( 

+

114 model_config=self.model, 

+

115 graph_config=self.graph, 

+

116 dist_configs=self.distributions, 

+

117 samples=samples, 

+

118 progress_desc=f"Computing priors for scenario {i + 1}/{num_scens}", 

+

119 **prior_kwargs, 

+

120 ) 

+

121 

+

122 _fields = {"diagnosis", "midext", "mode"} 

+

123 posterior_kwargs = scenario.model_dump(include=_fields) 

+

124 

+

125 posteriors = cached_compute_posteriors( 

+

126 model_config=self.model, 

+

127 graph_config=self.graph, 

+

128 dist_configs=self.distributions, 

+

129 modality_configs=self.modalities, 

+

130 priors=_priors, 

+

131 progress_desc=f"Computing posteriors for scenario {i + 1}/{num_scens}", 

+

132 **posterior_kwargs, 

+

133 ) 

+

134 

+

135 self.posteriors.save(values=posteriors, dataset=f"{i:03d}") 

+

136 self.posteriors.set_attrs(attrs=prior_kwargs, dataset=f"{i:03d}") 

+

137 self.posteriors.set_attrs(attrs=posterior_kwargs, dataset=f"{i:03d}") 

+

138 

+

139 

+

140if __name__ == "__main__": 

+

141 main = assemble_main(settings_cls=PosteriorsCLI, prog_name="compute posteriors") 

+

142 main() 

+
+ + + diff --git a/htmlcov/z_055061514423972c_prevalences_py.html b/htmlcov/z_055061514423972c_prevalences_py.html new file mode 100644 index 0000000..fd439b4 --- /dev/null +++ b/htmlcov/z_055061514423972c_prevalences_py.html @@ -0,0 +1,333 @@ + + + + + Coverage for src/lyscripts/compute/prevalences.py: 91% + + + + + +
+
+

+ Coverage for src / lyscripts / compute / prevalences.py: + 91% +

+ +

+ 82 statements   + + + +

+

+ « prev     + ^ index     + » next +       + coverage.py v7.13.5, + created at 2026-04-08 14:17 +0000 +

+ +
+
+
+

1"""Prevalence prediction module. 

+

2 

+

3This computes the prevalence of an observed involvement pattern, given a trained model. 

+

4It can also compare this prediction to the observed prevalence in the data. As for the 

+

5risk prediction, this uses caching and computes the priors first. 

+

6""" 

+

7 

+

8from collections.abc import Callable 

+

9from typing import Literal 

+

10 

+

11import lydata # noqa: F401 

+

12import numpy as np 

+

13import pandas as pd 

+

14from loguru import logger 

+

15from lydata import C, Q 

+

16from lydata.accessor import QueryPortion 

+

17from lydata.querier import NoneQ 

+

18from lydata.utils import is_old 

+

19from lymph import models 

+

20from pydantic import Field 

+

21from rich import progress 

+

22 

+

23from lyscripts.cli import assemble_main 

+

24from lyscripts.compute.priors import compute_priors 

+

25from lyscripts.compute.utils import ( 

+

26 BaseComputeCLI, 

+

27 HDF5FileStorage, 

+

28 get_cached, 

+

29) 

+

30from lyscripts.configs import ( 

+

31 DataConfig, 

+

32 DiagnosisConfig, 

+

33 DistributionConfig, 

+

34 GraphConfig, 

+

35 ModalityConfig, 

+

36 ModelConfig, 

+

37 ScenarioConfig, 

+

38 add_distributions, 

+

39 add_modalities, 

+

40 construct_model, 

+

41) 

+

42from lyscripts.utils import console 

+

43 

+

44 

+

45def compute_prevalences( 

+

46 model_config: ModelConfig, 

+

47 graph_config: GraphConfig, 

+

48 dist_configs: dict[str, DistributionConfig], 

+

49 modality_configs: dict[str, ModalityConfig], 

+

50 priors: np.ndarray, 

+

51 diagnosis: dict[Literal["ipsi", "contra"], dict], 

+

52 midext: bool | None = None, 

+

53 progress_desc: str = "Computing prevalences from priors", 

+

54) -> np.ndarray: 

+

55 """Compute the prevalence of a diagnosis given the priors and the model.""" 

+

56 model = construct_model(model_config, graph_config) 

+

57 model = add_distributions(model, dist_configs) 

+

58 

+

59 if len(modality_configs) != 1: 

+

60 msg = "Only one modality is supported for prevalence prediction." 

+

61 logger.error(msg) 

+

62 raise ValueError(msg) 

+

63 

+

64 model = add_modalities(model, modality_configs) 

+

65 prevalences = [] 

+

66 kwargs = {"midext": midext} if isinstance(model, models.Midline) else {} 

+

67 

+

68 for prior in progress.track( 

+

69 sequence=priors, 

+

70 description=progress_desc, 

+

71 total=len(priors), 

+

72 console=console, 

+

73 ): 

+

74 obs_dist = model.obs_dist(given_state_dist=prior) 

+

75 involvement = { 

+

76 side: diagnosis.get(side).get(next(iter(modality_configs))) 

+

77 for side in ["ipsi", "contra"] 

+

78 } 

+

79 

+

80 if isinstance(model, models.Unilateral | models.HPVUnilateral): 

+

81 involvement = involvement.get("ipsi") 

+

82 

+

83 prevalence = model.marginalize( 

+

84 given_state_dist=obs_dist, 

+

85 involvement=involvement, 

+

86 **kwargs, 

+

87 ) 

+

88 

+

89 if isinstance(model, models.Midline): 

+

90 # In this case, we need to renormalize the prevalence by the marginalized 

+

91 # probability of all states with midline extension. We must do this, because 

+

92 # we compute the analogous quantity for the data. In principle, we could 

+

93 # also compute the prevalence of the diagnosis *and* midline extension, but 

+

94 # we have decided to compute the diagnosis *given* midline extension. 

+

95 # https://github.com/lycosystem/lyscripts/blob/ea49ec/lyscripts/compute/prevalences.py#L217-L225 

+

96 midext_prob = model.marginalize( 

+

97 involvement=None, 

+

98 given_state_dist=obs_dist, 

+

99 **kwargs, 

+

100 ) 

+

101 prevalence /= midext_prob 

+

102 

+

103 prevalences.append(prevalence) 

+

104 

+

105 return np.stack(prevalences) 

+

106 

+

107 

+

108def generate_query_from_diagnosis(diagnosis: DiagnosisConfig) -> Q: 

+

109 """Transform a diagnosis into a query for the data.""" 

+

110 result = NoneQ() 

+

111 for side in ["ipsi", "contra"]: 

+

112 for modality, pattern in getattr(diagnosis, side, {}).items(): 

+

113 for lnl, value in pattern.items(): 

+

114 column = (modality, side, lnl) 

+

115 result &= C(column) == value 

+

116 return result 

+

117 

+

118 

+

119def observe_prevalence( 

+

120 data: pd.DataFrame, 

+

121 scenario_config: ScenarioConfig, 

+

122 mapping: dict[int, str] | Callable[[int], str] | None = None, 

+

123) -> QueryPortion: 

+

124 """Extract prevalence defined in a ``scenario`` from the ``data``. 

+

125 

+

126 ``mapping`` defines how the T-stages in the data are supposed to be mapped to the 

+

127 T-stages defined in the ``scenario``. 

+

128 

+

129 It returns the number of patients that match the given scenario and the total 

+

130 number of patients that are considered. E.g., in the example below we 79 patients 

+

131 are of late T-stage and have a tumor extending over the midline. Of those, 30 were 

+

132 diagnosed with contralateral involvement in LNL II based on a CT scan. 

+

133 

+

134 >>> data = next(lydata.load_datasets(year=2021, institution="usz")) 

+

135 >>> scenario_config = ScenarioConfig( 

+

136 ... t_stages=["late"], 

+

137 ... midext=True, 

+

138 ... diagnosis=DiagnosisConfig(contra={"CT": {"II": True}}), 

+

139 ... ) 

+

140 >>> observe_prevalence(data, scenario_config) 

+

141 QueryPortion(match=np.int64(7), total=np.int64(79)) 

+

142 """ 

+

143 mapping = mapping or DataConfig.model_fields["mapping"].default_factory() 

+

144 key = ("tumor", "1", "t_stage") if is_old(data) else ("tumor", "core", "t_stage") 

+

145 data[key] = data.ly.t_stage.map(mapping) 

+

146 

+

147 has_t_stage = C("t_stage").isin(scenario_config.t_stages) 

+

148 if scenario_config.midext is None: 

+

149 has_midext = NoneQ() 

+

150 else: 

+

151 has_midext = C("midext") == scenario_config.midext 

+

152 

+

153 # Note that below we compute the prevalence of the diagnosis *given* midline 

+

154 # extension. This means, that when computing the prevalence of the diagnosis in 

+

155 # the model, we need to renormalize by diving by the probability of midline 

+

156 # extension. For an older - but pretty surely correct - implementation see 

+

157 # https://github.com/lycosystem/lyscripts/blob/ea49ec/lyscripts/compute/prevalences.py#L217-L225 

+

158 return data.ly.portion( 

+

159 query=generate_query_from_diagnosis(scenario_config.diagnosis), 

+

160 given=has_t_stage & has_midext, 

+

161 ) 

+

162 

+

163 

+

164class PrevalencesCLI(BaseComputeCLI): 

+

165 """Predict the prevalence of an involvement pattern from model samples.""" 

+

166 

+

167 modalities: dict[str, ModalityConfig] = Field( 

+

168 default={}, 

+

169 description=( 

+

170 "Maps names of diagnostic modalities to their specificity/sensitivity." 

+

171 ), 

+

172 ) 

+

173 prevalences: HDF5FileStorage = Field( 

+

174 description="Storage for the computed prevalences.", 

+

175 ) 

+

176 data: DataConfig 

+

177 

+

178 def cli_cmd(self) -> None: 

+

179 """Start the ``prevalences`` subcommand.""" 

+

180 logger.debug(self.model_dump_json(indent=2)) 

+

181 global_attrs = self.model_dump( 

+

182 include={"model", "graph", "distributions", "modalities"}, 

+

183 ) 

+

184 self.prevalences.set_attrs(attrs=global_attrs, dataset="/") 

+

185 

+

186 samples = self.sampling.load() 

+

187 cached_compute_priors = get_cached(compute_priors, self.cache_dir) 

+

188 cached_compute_prevalences = get_cached(compute_prevalences, self.cache_dir) 

+

189 num_scens = len(self.scenarios) 

+

190 

+

191 for i, scenario in enumerate(self.scenarios): 

+

192 _fields = {"t_stages", "t_stages_dist", "mode"} 

+

193 prior_kwargs = scenario.model_dump(include=_fields) 

+

194 

+

195 _priors = cached_compute_priors( 

+

196 model_config=self.model, 

+

197 graph_config=self.graph, 

+

198 dist_configs=self.distributions, 

+

199 samples=samples, 

+

200 progress_desc=f"Computing priors for scenario {i + 1}/{num_scens}", 

+

201 **prior_kwargs, 

+

202 ) 

+

203 

+

204 _fields = {"diagnosis", "midext"} 

+

205 prevalence_kwargs = scenario.model_dump(include=_fields) 

+

206 

+

207 prevalences = cached_compute_prevalences( 

+

208 model_config=self.model, 

+

209 graph_config=self.graph, 

+

210 dist_configs=self.distributions, 

+

211 modality_configs=self.modalities, 

+

212 priors=_priors, 

+

213 progress_desc=f"Computing prevalences for scenario {i + 1}/{num_scens}", 

+

214 **prevalence_kwargs, 

+

215 ) 

+

216 

+

217 portion = observe_prevalence( 

+

218 data=self.data.load(), 

+

219 scenario_config=scenario, 

+

220 mapping=self.data.mapping, 

+

221 ) 

+

222 self.prevalences.save(values=prevalences, dataset=f"{i:03d}") 

+

223 self.prevalences.set_attrs(attrs=prior_kwargs, dataset=f"{i:03d}") 

+

224 self.prevalences.set_attrs(attrs=prevalence_kwargs, dataset=f"{i:03d}") 

+

225 self.prevalences.set_attrs( 

+

226 attrs={ 

+

227 "num_match": portion.match, 

+

228 "num_total": portion.total, 

+

229 }, 

+

230 dataset=f"{i:03d}", 

+

231 ) 

+

232 

+

233 

+

234if __name__ == "__main__": 

+

235 main = assemble_main(settings_cls=PrevalencesCLI, prog_name="compute prevalences") 

+

236 main() 

+
+ + + diff --git a/htmlcov/z_055061514423972c_priors_py.html b/htmlcov/z_055061514423972c_priors_py.html new file mode 100644 index 0000000..0ed3cff --- /dev/null +++ b/htmlcov/z_055061514423972c_priors_py.html @@ -0,0 +1,208 @@ + + + + + Coverage for src/lyscripts/compute/priors.py: 94% + + + + + +
+
+

+ Coverage for src / lyscripts / compute / priors.py: + 94% +

+ +

+ 35 statements   + + + +

+

+ « prev     + ^ index     + » next +       + coverage.py v7.13.5, + created at 2026-04-08 14:17 +0000 +

+ +
+
+
+

1"""Given samples drawn during an MCMC round, compute the (prior) state distributions. 

+

2 

+

3This is done for each sample and for a list of specified scenarios. The computation is 

+

4cached at a location specified by the ``--cache_dir`` argument using ``joblib``. 

+

5""" 

+

6 

+

7from typing import Literal 

+

8 

+

9import numpy as np 

+

10from loguru import logger 

+

11from pydantic import Field 

+

12from rich import progress 

+

13 

+

14from lyscripts.cli import assemble_main 

+

15from lyscripts.compute.utils import BaseComputeCLI, HDF5FileStorage, get_cached 

+

16from lyscripts.configs import ( 

+

17 DistributionConfig, 

+

18 GraphConfig, 

+

19 ModelConfig, 

+

20 add_distributions, 

+

21 construct_model, 

+

22) 

+

23from lyscripts.utils import console 

+

24 

+

25 

+

26def compute_priors( 

+

27 model_config: ModelConfig, 

+

28 graph_config: GraphConfig, 

+

29 dist_configs: dict[str, DistributionConfig], 

+

30 samples: np.ndarray, 

+

31 t_stages: list[int | str], 

+

32 t_stages_dist: list[float], 

+

33 mode: Literal["HMM", "BN"] = "HMM", 

+

34 progress_desc: str = "Computing priors from samples", 

+

35) -> np.ndarray: 

+

36 """Compute prior state distributions from the ``samples`` for the ``model``. 

+

37 

+

38 This will call the ``model`` method :py:meth:`~lymph.types.Model.state_dist` 

+

39 for each of the ``samples``. The prior state distributions are computed for 

+

40 each of the ``t_stages`` and marginalized over using the ``t_stages_dist``. 

+

41 """ 

+

42 model = construct_model(model_config, graph_config) 

+

43 model = add_distributions(model, dist_configs) 

+

44 priors = [] 

+

45 

+

46 for sample in progress.track( 

+

47 sequence=samples, 

+

48 description=progress_desc, 

+

49 total=len(samples), 

+

50 console=console, 

+

51 ): 

+

52 model.set_params(*sample) 

+

53 priors.append( 

+

54 sum( 

+

55 model.state_dist(t_stage=t, mode=mode) * p 

+

56 for t, p in zip(t_stages, t_stages_dist, strict=False) 

+

57 ), 

+

58 ) 

+

59 

+

60 return np.stack(priors) 

+

61 

+

62 

+

63class PriorsCLI(BaseComputeCLI): 

+

64 """Compute the prior state distributions from MCMC samples.""" 

+

65 

+

66 priors: HDF5FileStorage = Field(description="Storage for the computed priors.") 

+

67 

+

68 def cli_cmd(self) -> None: 

+

69 """Start the ``priors`` subcommand. 

+

70 

+

71 Given a ``graph``, ``model``, ``distributions`` over diagnosis times, and 

+

72 MCMC samples loaded from the ``sampling`` argument, this command computes the 

+

73 prior state distributions for each of the specified ``scenarios``. 

+

74 

+

75 Precomputing these state distributions is useful, because they largely only 

+

76 depend on T-stage and not on the diagnosis or involvement of interest. Hence, 

+

77 computing the :py:mod:`~lyscripts.compute.posteriors` and 

+

78 :py:mod:`~lyscripts.compute.risks` can be sped up. 

+

79 

+

80 Note that this command will use `joblib`_ to cache its computations. 

+

81 

+

82 .. _joblib: https://joblib.readthedocs.io/ 

+

83 """ 

+

84 logger.debug(self.model_dump_json(indent=2)) 

+

85 global_attrs = self.model_dump(include={"model", "graph", "distributions"}) 

+

86 self.priors.set_attrs(attrs=global_attrs, dataset="/") 

+

87 

+

88 samples = self.sampling.load() 

+

89 cached_compute_priors = get_cached(compute_priors, self.cache_dir) 

+

90 num_scenarios = len(self.scenarios) 

+

91 

+

92 for i, scenario in enumerate(self.scenarios): 

+

93 _fields = {"t_stages", "t_stages_dist", "mode"} 

+

94 prior_kwargs = scenario.model_dump(include=_fields) 

+

95 

+

96 priors = cached_compute_priors( 

+

97 model_config=self.model, 

+

98 graph_config=self.graph, 

+

99 dist_configs=self.distributions, 

+

100 samples=samples, 

+

101 progress_desc=f"Computing priors for scenario {i + 1}/{num_scenarios}", 

+

102 **prior_kwargs, 

+

103 ) 

+

104 

+

105 self.priors.save(values=priors, dataset=f"{i:03d}") 

+

106 self.priors.set_attrs(attrs=prior_kwargs, dataset=f"{i:03d}") 

+

107 

+

108 

+

109if __name__ == "__main__": 

+

110 main = assemble_main(settings_cls=PriorsCLI, prog_name="compute priors") 

+

111 main() 

+
+ + + diff --git a/htmlcov/z_055061514423972c_risks_py.html b/htmlcov/z_055061514423972c_risks_py.html new file mode 100644 index 0000000..8386873 --- /dev/null +++ b/htmlcov/z_055061514423972c_risks_py.html @@ -0,0 +1,237 @@ + + + + + Coverage for src/lyscripts/compute/risks.py: 35% + + + + + +
+
+

+ Coverage for src / lyscripts / compute / risks.py: + 35% +

+ +

+ 51 statements   + + + +

+

+ « prev     + ^ index     + » next +       + coverage.py v7.13.5, + created at 2026-04-08 14:17 +0000 +

+ +
+
+
+

1"""Predict risks of involvements for scenarios using drawn MCMC samples. 

+

2 

+

3As the priors and posteriors, this computation, too, uses caching and may skip the 

+

4computation of these two initial steps if the cache directory is the same as during 

+

5their computation. 

+

6""" 

+

7 

+

8from typing import Literal 

+

9 

+

10import numpy as np 

+

11from loguru import logger 

+

12from lymph import models 

+

13from pydantic import Field 

+

14from rich import progress 

+

15 

+

16from lyscripts.cli import assemble_main 

+

17from lyscripts.compute.posteriors import compute_posteriors 

+

18from lyscripts.compute.priors import compute_priors 

+

19from lyscripts.compute.utils import BaseComputeCLI, HDF5FileStorage, get_cached 

+

20from lyscripts.configs import ( 

+

21 DistributionConfig, 

+

22 GraphConfig, 

+

23 ModalityConfig, 

+

24 ModelConfig, 

+

25 add_distributions, 

+

26 add_modalities, 

+

27 construct_model, 

+

28) 

+

29from lyscripts.utils import console 

+

30 

+

31 

+

32def compute_risks( 

+

33 model_config: ModelConfig, 

+

34 graph_config: GraphConfig, 

+

35 dist_configs: dict[str, DistributionConfig], 

+

36 modality_configs: dict[str, ModalityConfig], 

+

37 posteriors: np.ndarray, 

+

38 involvement: dict[Literal["ipsi", "contra"], dict], 

+

39 progress_desc: str = "Computing risks from posteriors", 

+

40) -> np.ndarray: 

+

41 """Compute the risk of ``involvement`` from each of the ``posteriors``. 

+

42 

+

43 Essentially, this only calls the model's :py:meth:`lymph.models.Model.marginalize` 

+

44 method, as nothing more is necessary than to marginalize the full posterior state 

+

45 distribution over the states that correspond to the involvement of interest. 

+

46 """ 

+

47 model = construct_model(model_config, graph_config) 

+

48 model = add_distributions(model, dist_configs) 

+

49 model = add_modalities(model, modality_configs) 

+

50 risks = [] 

+

51 

+

52 if isinstance(model, models.Unilateral | models.HPVUnilateral): 

+

53 involvement = involvement.get("ipsi") 

+

54 

+

55 for posterior in progress.track( 

+

56 sequence=posteriors, 

+

57 description=progress_desc, 

+

58 total=len(posteriors), 

+

59 console=console, 

+

60 ): 

+

61 risks.append( 

+

62 model.marginalize(involvement=involvement, given_state_dist=posterior), 

+

63 ) 

+

64 

+

65 return np.stack(risks) 

+

66 

+

67 

+

68class RisksCLI(BaseComputeCLI): 

+

69 """Predict the risk of involvement scenarios from model samples given diagnoses.""" 

+

70 

+

71 modalities: dict[str, ModalityConfig] = Field( 

+

72 default={}, 

+

73 description=( 

+

74 "Maps names of diagnostic modalities to their specificity/sensitivity." 

+

75 ), 

+

76 ) 

+

77 risks: HDF5FileStorage = Field(description="Storage for the computed risks.") 

+

78 

+

79 def cli_cmd(self) -> None: 

+

80 """Start the ``risks`` subcommand.""" 

+

81 logger.debug(self.model_dump_json(indent=2)) 

+

82 global_attrs = self.model_dump( 

+

83 include={"model", "graph", "distributions", "modalities"}, 

+

84 ) 

+

85 self.risks.set_attrs(attrs=global_attrs, dataset="/") 

+

86 

+

87 samples = self.sampling.load() 

+

88 cached_compute_priors = get_cached(compute_priors, self.cache_dir) 

+

89 cached_compute_posteriors = get_cached(compute_posteriors, self.cache_dir) 

+

90 cached_compute_risks = get_cached(compute_risks, self.cache_dir) 

+

91 num_scens = len(self.scenarios) 

+

92 

+

93 for i, scenario in enumerate(self.scenarios): 

+

94 _fields = {"t_stages", "t_stages_dist", "mode"} 

+

95 prior_kwargs = scenario.model_dump(include=_fields) 

+

96 

+

97 _priors = cached_compute_priors( 

+

98 model_config=self.model, 

+

99 graph_config=self.graph, 

+

100 dist_configs=self.distributions, 

+

101 samples=samples, 

+

102 progress_desc=f"Computing priors for scenario {i + 1}/{num_scens}", 

+

103 **prior_kwargs, 

+

104 ) 

+

105 

+

106 _fields = {"diagnosis", "midext", "mode"} 

+

107 posterior_kwargs = scenario.model_dump(include=_fields) 

+

108 

+

109 _posteriors = cached_compute_posteriors( 

+

110 model_config=self.model, 

+

111 graph_config=self.graph, 

+

112 dist_configs=self.distributions, 

+

113 modality_configs=self.modalities, 

+

114 priors=_priors, 

+

115 progress_desc=f"Computing posteriors for scenario {i + 1}/{num_scens}", 

+

116 **posterior_kwargs, 

+

117 ) 

+

118 

+

119 _fields = {"involvement"} 

+

120 risk_kwargs = scenario.model_dump(include=_fields) 

+

121 

+

122 risks = cached_compute_risks( 

+

123 model_config=self.model, 

+

124 graph_config=self.graph, 

+

125 dist_configs=self.distributions, 

+

126 modality_configs=self.modalities, 

+

127 posteriors=_posteriors, 

+

128 progress_desc=f"Computing risks for scenario {i + 1}/{num_scens}", 

+

129 **risk_kwargs, 

+

130 ) 

+

131 

+

132 self.risks.save(values=risks, dataset=f"{i:03d}") 

+

133 self.risks.set_attrs(attrs=prior_kwargs, dataset=f"{i:03d}") 

+

134 self.risks.set_attrs(attrs=posterior_kwargs, dataset=f"{i:03d}") 

+

135 self.risks.set_attrs(attrs=risk_kwargs, dataset=f"{i:03d}") 

+

136 

+

137 

+

138if __name__ == "__main__": 

+

139 main = assemble_main(settings_cls=RisksCLI, prog_name="compute risks") 

+

140 main() 

+
+ + + diff --git a/htmlcov/z_055061514423972c_utils_py.html b/htmlcov/z_055061514423972c_utils_py.html new file mode 100644 index 0000000..e842741 --- /dev/null +++ b/htmlcov/z_055061514423972c_utils_py.html @@ -0,0 +1,373 @@ + + + + + Coverage for src/lyscripts/compute/utils.py: 95% + + + + + +
+
+

+ Coverage for src / lyscripts / compute / utils.py: + 95% +

+ +

+ 118 statements   + + + +

+

+ « prev     + ^ index     + » next +       + coverage.py v7.13.5, + created at 2026-04-08 14:17 +0000 +

+ +
+
+
+

1"""Utilities for precomputing the priors and posteriors.""" 

+

2 

+

3import ast 

+

4import functools 

+

5from pathlib import Path 

+

6from typing import Annotated, Any 

+

7 

+

8import h5py 

+

9import numpy as np 

+

10from joblib import Memory 

+

11from loguru import logger 

+

12from pydantic import AfterValidator, BaseModel, Field 

+

13 

+

14from lyscripts.configs import ( 

+

15 BaseCLI, 

+

16 DistributionConfig, 

+

17 GraphConfig, 

+

18 ModelConfig, 

+

19 SamplingConfig, 

+

20 ScenarioConfig, 

+

21) 

+

22 

+

23 

+

24class BaseComputeCLI(BaseCLI): 

+

25 """Common command line settings for the submodule ``compute``.""" 

+

26 

+

27 graph: GraphConfig 

+

28 model: ModelConfig = ModelConfig() 

+

29 distributions: dict[str, DistributionConfig] = Field( 

+

30 default={}, 

+

31 description=( 

+

32 "Mapping of model T-categories to predefined distributions over " 

+

33 "diagnose times." 

+

34 ), 

+

35 ) 

+

36 cache_dir: Path = Field( 

+

37 default=Path.cwd() / ".cache", 

+

38 description="Cache directory for storing function calls.", 

+

39 ) 

+

40 scenarios: list[ScenarioConfig] = Field( 

+

41 default=[], 

+

42 description="List of scenarios to compute risks for.", 

+

43 ) 

+

44 sampling: SamplingConfig 

+

45 

+

46 

+

47def is_hdf5_compatible(value: Any) -> bool: 

+

48 """Check if the given ``value`` can be stored in an HDF5 file.""" 

+

49 return isinstance( 

+

50 value, 

+

51 bool | str | bytes | int | float | np.ndarray | list | tuple, 

+

52 ) 

+

53 

+

54 

+

55def to_hdf5_attrs(mapping: dict[str, Any]) -> dict[str, str]: 

+

56 """Convert ``attrs`` to a dictionary of HDF5 compatible attributes or strings.""" 

+

57 res = {} 

+

58 for key, val in mapping.items(): 

+

59 if is_hdf5_compatible(val): 

+

60 res[key] = val 

+

61 else: 

+

62 res[key] = str(val) 

+

63 return res 

+

64 

+

65 

+

66def from_hdf5_attrs(mapping: h5py.AttributeManager) -> dict[str, Any]: 

+

67 """Convert the HDF5 attributes to a dictionary of Python objects.""" 

+

68 attrs = {} 

+

69 for key, value in mapping.items(): 

+

70 try: 

+

71 attrs[key] = ast.literal_eval(value) 

+

72 except ValueError: 

+

73 attrs[key] = value 

+

74 return attrs 

+

75 

+

76 

+

77def extract_modalities(diagnosis: dict[str, Any]) -> set[str]: 

+

78 """Get the set of modalities used in the ``diagnosis``. 

+

79 

+

80 This is not used in the main apps anymore, but since it may be useful, I keep it. 

+

81 

+

82 >>> diagnosis = { 

+

83 ... "ipsi": { 

+

84 ... "MRI": {"II": True, "III": False}, 

+

85 ... "PET": {"II": False, "III": True}, 

+

86 ... }, 

+

87 ... "contra": {"MRI": {"II": False, "III": None}}, 

+

88 ... } 

+

89 >>> sorted(extract_modalities(diagnosis)) 

+

90 ['MRI', 'PET'] 

+

91 """ 

+

92 modality_set = set() 

+

93 

+

94 if "ipsi" not in diagnosis and "contra" not in diagnosis: 

+

95 return modality_set | set(diagnosis.keys()) 

+

96 

+

97 for side in ["ipsi", "contra"]: 

+

98 if side in diagnosis: 

+

99 modality_set |= set(diagnosis[side].keys()) 

+

100 

+

101 return modality_set 

+

102 

+

103 

+

104def ensure_parent_dir(path: Path) -> Path: 

+

105 """Create the parent directory of the given ``path``.""" 

+

106 path.parent.mkdir(parents=True, exist_ok=True) 

+

107 logger.debug(f"Ensured parent directory of {path}") 

+

108 return path 

+

109 

+

110 

+

111HasParentPath = Annotated[Path, AfterValidator(ensure_parent_dir)] 

+

112"""Type hint for path whose parent dir is created if it doesn't exist.""" 

+

113 

+

114 

+

115class HDF5FileStorage(BaseModel): 

+

116 """HDF5 file storage for in- and outputs of computations.""" 

+

117 

+

118 file: HasParentPath = Field( 

+

119 description="Path to the HDF5 file. Parent directories are created if needed.", 

+

120 ) 

+

121 dataset: str | None = Field( 

+

122 default=None, 

+

123 description=( 

+

124 "Name of the dataset in the HDF5 file. Save/load methods can override this." 

+

125 ), 

+

126 ) 

+

127 

+

128 def _get_dataset(self) -> str: 

+

129 """Get attribute ``dataset`` or the first dataset in the file. 

+

130 

+

131 >>> from tempfile import TemporaryDirectory 

+

132 >>> tmp_path = Path(TemporaryDirectory().name) / "test.hdf5" 

+

133 >>> storage = HDF5FileStorage(file=tmp_path) 

+

134 >>> rand_data = np.random.rand(100, 100) 

+

135 >>> storage.save(values=rand_data, dataset="test") 

+

136 >>> np.all(storage.load(dataset="test") == rand_data) 

+

137 np.True_ 

+

138 >>> np.all(storage.load() == rand_data) # loads first dataset 

+

139 np.True_ 

+

140 >>> some_attrs = {"key": "value"} 

+

141 >>> storage.set_attrs(attrs=some_attrs, dataset="test") 

+

142 >>> storage.get_attrs(dataset="test") 

+

143 {'key': 'value'} 

+

144 """ 

+

145 if self.dataset is not None: 

+

146 return self.dataset 

+

147 

+

148 with h5py.File(self.file, "r") as file: 

+

149 return next(iter(file.keys())) 

+

150 

+

151 def load(self, dataset: str | None = None) -> np.ndarray: 

+

152 """Load the dataset with the name ``dataset``.""" 

+

153 dataset = dataset or self._get_dataset() 

+

154 

+

155 with h5py.File(self.file, "r") as file: 

+

156 array = file[dataset][()] 

+

157 

+

158 logger.debug(f"Loaded dataset {dataset} from {self.file}") 

+

159 return array 

+

160 

+

161 def get_attrs(self, dataset: str | None = None) -> dict[str, Any]: 

+

162 """Get the attributes of the dataset ``dataset``.""" 

+

163 dataset = dataset or self._get_dataset() 

+

164 

+

165 with h5py.File(self.file, "r") as file: 

+

166 attrs = from_hdf5_attrs(file[dataset].attrs) 

+

167 

+

168 logger.debug(f"Loaded attrs for dataset '{dataset}' from {self.file}") 

+

169 return attrs 

+

170 

+

171 def save(self, values: np.ndarray, dataset: str | None = None) -> None: 

+

172 """Set the ``values`` for the ``dataset`` dataset.""" 

+

173 dataset = dataset or self._get_dataset() 

+

174 

+

175 with h5py.File(self.file, "a") as file: 

+

176 if dataset in file: 

+

177 del file[dataset] 

+

178 file[dataset] = values 

+

179 

+

180 logger.debug(f"Stored dataset {dataset} in {self.file}") 

+

181 

+

182 def set_attrs(self, attrs: dict[str, Any], dataset: str | None = None) -> None: 

+

183 """Update the ``attrs`` for the ``dataset`` dataset.""" 

+

184 dataset = dataset or self._get_dataset() 

+

185 

+

186 with h5py.File(self.file, "a") as file: 

+

187 if dataset not in file: 

+

188 raise ValueError(f"Dataset '{dataset}' not found in {self.file}") 

+

189 file[dataset].attrs.update(to_hdf5_attrs(attrs)) 

+

190 

+

191 logger.debug(f"Stored attrs {attrs} for dataset '{dataset}' in {self.file}") 

+

192 

+

193 

+

194def reduce_pattern(pattern: dict[str, dict[str, bool]]) -> dict[str, dict[str, bool]]: 

+

195 """Reduce a ``pattern`` by removing all entries that are ``None``. 

+

196 

+

197 This way, it should be completely recoverable by the ``complete_pattern`` function 

+

198 but be shorter to store. 

+

199 

+

200 Unused but maybe useful for some cases. Keeping it in here for now. 

+

201 

+

202 >>> full = { 

+

203 ... "ipsi": {"I": None, "II": True, "III": None}, 

+

204 ... "contra": {"I": None, "II": None, "III": None}, 

+

205 ... } 

+

206 >>> reduce_pattern(full) 

+

207 {'ipsi': {'II': True}} 

+

208 

+

209 """ 

+

210 tmp_pattern = pattern.copy() 

+

211 reduced_pattern = {} 

+

212 for side in ["ipsi", "contra"]: 

+

213 if not all(v is None for v in tmp_pattern[side].values()): 

+

214 reduced_pattern[side] = {} 

+

215 for lnl, val in tmp_pattern[side].items(): 

+

216 if val is not None: 

+

217 reduced_pattern[side][lnl] = val 

+

218 

+

219 return reduced_pattern 

+

220 

+

221 

+

222def complete_pattern( 

+

223 pattern: dict[str, dict[str, bool]] | None, 

+

224 lnls: list[str], 

+

225) -> dict[str, dict[str, bool]]: 

+

226 """Make sure the provided involvement ``pattern`` is correct. 

+

227 

+

228 For each side of the neck, and for each of the ``lnls`` this should in the end 

+

229 contain ``True``, ``False`` or ``None``. 

+

230 

+

231 Unused but maybe useful for some cases. Keeping it in here for now. 

+

232 

+

233 >>> pattern = {"ipsi": {"II": True}} 

+

234 >>> lnls = ["II", "III"] 

+

235 >>> complete_pattern(pattern, lnls) 

+

236 {'ipsi': {'II': True, 'III': None}, 'contra': {'II': None, 'III': None}} 

+

237 

+

238 """ 

+

239 if pattern is None: 

+

240 pattern = {} 

+

241 

+

242 for side in ["ipsi", "contra"]: 

+

243 if side not in pattern: 

+

244 pattern[side] = {} 

+

245 

+

246 for lnl in lnls: 

+

247 if lnl not in pattern[side]: 

+

248 pattern[side][lnl] = None 

+

249 elif pattern[side][lnl] is None: 

+

250 continue 

+

251 else: 

+

252 pattern[side][lnl] = bool(pattern[side][lnl]) 

+

253 

+

254 return pattern 

+

255 

+

256 

+

257def get_cached(func: callable, cache_dir: Path) -> callable: 

+

258 """Return cached ``func`` with a cache at ``cache_dir``.""" 

+

259 memory = Memory(location=cache_dir, verbose=0) 

+

260 cached_func = memory.cache(func, ignore=["progress_desc"]) 

+

261 logger.info(f"Initialized cache for {func.__name__} at {cache_dir}") 

+

262 

+

263 @functools.wraps(func) 

+

264 def log_cache_info_wrapper(*args, **kwargs): 

+

265 logger.debug(f"Calling {func.__name__}({args}, {kwargs})") 

+

266 if cached_func.check_call_in_cache(*args, **kwargs): 

+

267 logger.info(f"Cache hit for {func.__name__}, returning stored result") 

+

268 else: 

+

269 logger.info(f"Cache miss for {func.__name__}, computing result") 

+

270 

+

271 result = cached_func(*args, **kwargs) 

+

272 logger.debug(f"Computed {result = }") 

+

273 return result 

+

274 

+

275 log_cache_info_wrapper._cached_func = cached_func 

+

276 return log_cache_info_wrapper 

+
+ + + diff --git a/htmlcov/z_5bf5c588c698c6cc___init___py.html b/htmlcov/z_5bf5c588c698c6cc___init___py.html new file mode 100644 index 0000000..fbb5bfa --- /dev/null +++ b/htmlcov/z_5bf5c588c698c6cc___init___py.html @@ -0,0 +1,173 @@ + + + + + Coverage for src/lyscripts/__init__.py: 77% + + + + + +
+
+

+ Coverage for src / lyscripts / __init__.py: + 77% +

+ +

+ 30 statements   + + + +

+

+ « prev     + ^ index     + » next +       + coverage.py v7.13.5, + created at 2026-04-08 14:17 +0000 +

+ +
+
+
+

1"""Initial entry point for the lyscripts package and CLIs. 

+

2 

+

3This top-level module configures and provides the top-level CLI through which all 

+

4subcommands can be accessed. 

+

5""" 

+

6 

+

7import sys 

+

8from typing import Literal 

+

9 

+

10import pandas as pd 

+

11from loguru import logger 

+

12from pydantic import Field 

+

13from pydantic_settings import ( 

+

14 BaseSettings, 

+

15 CliApp, 

+

16 CliImplicitFlag, 

+

17 CliSubCommand, 

+

18) 

+

19 

+

20from lyscripts import compute, data, integrate, sample, schedule # noqa: F401 

+

21from lyscripts._version import version 

+

22from lyscripts.cli import assemble_main, configure_logging 

+

23from lyscripts.utils import console 

+

24 

+

25__version__ = version 

+

26__description__ = "Package to interact with lymphatic progression data and models." 

+

27__author__ = "Roman Ludwig" 

+

28__email__ = "gygqdstu3@mozmail.com" 

+

29__uri__ = "https://github.com/lycosystem/lyscripts" 

+

30 

+

31# activate copy on write in pandas. 

+

32# See https://pandas.pydata.org/docs/user_guide/copy_on_write.html 

+

33pd.options.mode.copy_on_write = True 

+

34 

+

35logger.disable("lyscripts") 

+

36 

+

37 

+

38class LyscriptsCLI(BaseSettings): 

+

39 """A CLI to interact with lymphatic progression data and models.""" 

+

40 

+

41 version: CliImplicitFlag[bool] = Field( 

+

42 default=False, 

+

43 description="Display the version of lyscripts and exit.", 

+

44 ) 

+

45 log_level: Literal["DEBUG", "INFO", "WARNING", "ERROR", "CRITICAL"] = Field( 

+

46 default="INFO", 

+

47 description="Set the log level of the lyscripts CLI.", 

+

48 ) 

+

49 

+

50 data: CliSubCommand[data.DataCLI] 

+

51 sample: CliSubCommand[sample.SampleCLI] 

+

52 compute: CliSubCommand[compute.ComputeCLI] 

+

53 schedule: CliSubCommand[schedule.ScheduleCLI] 

+

54 integrate: CliSubCommand[integrate.IntegrateCLI] 

+

55 

+

56 def __init__(self, **kwargs): 

+

57 """Add logging configuration to the lyscripts CLI.""" 

+

58 configure_logging(argv=sys.argv, console=console) 

+

59 super().__init__(**kwargs) 

+

60 

+

61 def cli_cmd(self) -> None: 

+

62 """Start the main lyscripts CLI. 

+

63 

+

64 If the ``version`` flag is set, the version of lyscripts is displayed and the 

+

65 program exits. Otherwise, the lyscripts CLI runs one of the subcommands. 

+

66 """ 

+

67 logger.debug("Starting lyscripts CLI.") 

+

68 

+

69 if self.version: 

+

70 logger.info(f"lyscripts {__version__}") 

+

71 return 

+

72 

+

73 CliApp.run_subcommand(self) 

+

74 

+

75 

+

76main = assemble_main(settings_cls=LyscriptsCLI, prog_name="lyscripts") 

+
+ + + diff --git a/htmlcov/z_5bf5c588c698c6cc___main___py.html b/htmlcov/z_5bf5c588c698c6cc___main___py.html new file mode 100644 index 0000000..b0ade06 --- /dev/null +++ b/htmlcov/z_5bf5c588c698c6cc___main___py.html @@ -0,0 +1,103 @@ + + + + + Coverage for src/lyscripts/__main__.py: 0% + + + + + +
+
+

+ Coverage for src / lyscripts / __main__.py: + 0% +

+ +

+ 3 statements   + + + +

+

+ « prev     + ^ index     + » next +       + coverage.py v7.13.5, + created at 2026-04-08 14:17 +0000 +

+ +
+
+
+

1"""Utility for common tasks w.r.t. inference & prediction using `lymph` package.""" 

+

2 

+

3from lyscripts import main 

+

4 

+

5if __name__ == "__main__": 

+

6 main() 

+
+ + + diff --git a/htmlcov/z_5bf5c588c698c6cc__version_py.html b/htmlcov/z_5bf5c588c698c6cc__version_py.html new file mode 100644 index 0000000..d0d4a1a --- /dev/null +++ b/htmlcov/z_5bf5c588c698c6cc__version_py.html @@ -0,0 +1,121 @@ + + + + + Coverage for src/lyscripts/_version.py: 100% + + + + + +
+
+

+ Coverage for src / lyscripts / _version.py: + 100% +

+ +

+ 11 statements   + + + +

+

+ « prev     + ^ index     + » next +       + coverage.py v7.13.5, + created at 2026-04-08 14:17 +0000 +

+ +
+
+
+

1# file generated by vcs-versioning 

+

2# don't change, don't track in version control 

+

3from __future__ import annotations 

+

4 

+

5__all__ = [ 

+

6 "__version__", 

+

7 "__version_tuple__", 

+

8 "version", 

+

9 "version_tuple", 

+

10 "__commit_id__", 

+

11 "commit_id", 

+

12] 

+

13 

+

14version: str 

+

15__version__: str 

+

16__version_tuple__: tuple[int | str, ...] 

+

17version_tuple: tuple[int | str, ...] 

+

18commit_id: str | None 

+

19__commit_id__: str | None 

+

20 

+

21__version__ = version = '0.1.dev1' 

+

22__version_tuple__ = version_tuple = (0, 1, 'dev1') 

+

23 

+

24__commit_id__ = commit_id = 'g77f1c18c7' 

+
+ + + diff --git a/htmlcov/z_5bf5c588c698c6cc_cli_py.html b/htmlcov/z_5bf5c588c698c6cc_cli_py.html new file mode 100644 index 0000000..ab2f74f --- /dev/null +++ b/htmlcov/z_5bf5c588c698c6cc_cli_py.html @@ -0,0 +1,220 @@ + + + + + Coverage for src/lyscripts/cli.py: 42% + + + + + +
+
+

+ Coverage for src / lyscripts / cli.py: + 42% +

+ +

+ 45 statements   + + + +

+

+ « prev     + ^ index     + » next +       + coverage.py v7.13.5, + created at 2026-04-08 14:17 +0000 +

+ +
+
+
+

1"""Utilities for configuring and running CLIs app. 

+

2 

+

3In this module, we define and configure a :py:class:`RichDefaultHelpFormatter` that 

+

4nicely displays the CLI's ``--help`` text. We also provide a function to 

+

5:py:func:`assemble a main function <assemble_main>` for the different CLI apps to save 

+

6some boilerplate code. Lastly, we have two functions related to the `loguru`_ setup. 

+

7 

+

8.. _loguru: https://loguru.readthedocs.io/en/stable 

+

9""" 

+

10 

+

11import inspect 

+

12import logging 

+

13from collections.abc import Callable 

+

14from typing import Literal 

+

15 

+

16from loguru import logger 

+

17from pydantic_settings import BaseSettings, CliApp, CliSettingsSource 

+

18from rich.console import Console 

+

19from rich.logging import RichHandler 

+

20from rich_argparse import ArgumentDefaultsRichHelpFormatter 

+

21 

+

22_current_log_level: Literal["DEBUG", "INFO", "WARNING", "ERROR", "CRITICAL"] = "INFO" 

+

23 

+

24 

+

25def assemble_main( 

+

26 settings_cls: type[BaseSettings], 

+

27 prog_name: str, 

+

28) -> Callable[[], None]: 

+

29 """Assemble a ``main()`` function for a CLI app. 

+

30 

+

31 It creates a :py:class:`~pydantic_settings.CliSettingsSource` object with the 

+

32 provided ``settings_cls`` and ``prog_name``. Then, it fills in some default 

+

33 settings for the CLI configuration and runs the CLI app. 

+

34 

+

35 Assembling a ``main()`` function for all subcommands like this saves some 

+

36 boilerplate code. 

+

37 """ 

+

38 

+

39 def main() -> None: 

+

40 """Start the main CLI app.""" 

+

41 cli_settings_source = CliSettingsSource( 

+

42 settings_cls=settings_cls, 

+

43 cli_prog_name=prog_name, 

+

44 cli_kebab_case=True, 

+

45 cli_use_class_docs_for_groups=True, 

+

46 formatter_class=ArgumentDefaultsRichHelpFormatter, 

+

47 ) 

+

48 CliApp.run(settings_cls, cli_settings_source=cli_settings_source) 

+

49 

+

50 return main 

+

51 

+

52 

+

53def somewhat_safely_get_loglevel( 

+

54 argv: list[str], 

+

55) -> Literal["DEBUG", "INFO", "WARNING", "ERROR", "CRITICAL"]: 

+

56 """Set the log level of the lyscripts CLI. 

+

57 

+

58 This is a bit of a hack, since the :py:class:`~lyscripts.LyscriptsCLI` class is not 

+

59 yet initialized when we need to set the log level. In case the provided log-level is 

+

60 not valid, :py:class:`~lyscripts.LyscriptsCLI` will raise an exception at a later 

+

61 point. 

+

62 

+

63 Return ``"INFO"`` by default. 

+

64 """ 

+

65 args_str = " ".join(argv) 

+

66 if "--log-level" in args_str: 

+

67 for log_level in ["DEBUG", "INFO", "WARNING", "ERROR", "CRITICAL"]: 

+

68 if log_level in args_str: 

+

69 return log_level 

+

70 

+

71 return "INFO" 

+

72 

+

73 

+

74def configure_logging( 

+

75 argv: list[str], 

+

76 console: Console, 

+

77) -> None: 

+

78 """Configure the `loguru`_ logging system of the lyscripts CLI. 

+

79 

+

80 This function sets the log level and format of the lyscripts CLI. Notably, for 

+

81 a log-level of `DEBUG` the output will contain more information. 

+

82 

+

83 .. _loguru: https://loguru.readthedocs.io/en/stable 

+

84 """ 

+

85 logger.enable("lyscripts") 

+

86 logger.enable("lydata") 

+

87 global _current_log_level 

+

88 _current_log_level = somewhat_safely_get_loglevel(argv=argv) 

+

89 logger.remove() 

+

90 handler = RichHandler(console=console) 

+

91 logger.add( 

+

92 sink=handler, 

+

93 level=_current_log_level, 

+

94 format="<lvl>{message}</>", 

+

95 ) 

+

96 

+

97 

+

98class InterceptHandler(logging.Handler): 

+

99 """Intercept logging messages and redirect them to Loguru.""" 

+

100 

+

101 def emit(self, record: logging.LogRecord) -> None: 

+

102 """Intercept the log record and redirect it to Loguru.""" 

+

103 # Get corresponding Loguru level if it exists. 

+

104 try: 

+

105 level: str | int = logger.level(record.levelname).name 

+

106 except ValueError: 

+

107 level = record.levelno 

+

108 

+

109 # Find caller from where originated the logged message. 

+

110 frame, depth = inspect.currentframe(), 0 

+

111 while frame: 

+

112 filename = frame.f_code.co_filename 

+

113 is_logging = filename == logging.__file__ 

+

114 is_frozen = "importlib" in filename and "_bootstrap" in filename 

+

115 if depth > 0 and not (is_logging or is_frozen): 

+

116 break 

+

117 frame = frame.f_back 

+

118 depth += 1 

+

119 

+

120 logger.opt(depth=depth, exception=record.exc_info).log( 

+

121 level, 

+

122 record.getMessage(), 

+

123 ) 

+
+ + + diff --git a/htmlcov/z_5bf5c588c698c6cc_configs_py.html b/htmlcov/z_5bf5c588c698c6cc_configs_py.html new file mode 100644 index 0000000..3dda220 --- /dev/null +++ b/htmlcov/z_5bf5c588c698c6cc_configs_py.html @@ -0,0 +1,922 @@ + + + + + Coverage for src/lyscripts/configs.py: 85% + + + + + +
+
+

+ Coverage for src / lyscripts / configs.py: + 85% +

+ +

+ 280 statements   + + + +

+

+ « prev     + ^ index     + » next +       + coverage.py v7.13.5, + created at 2026-04-08 14:17 +0000 +

+ +
+
+
+

1"""Using `pydantic`_, we define configurations for the package. 

+

2 

+

3Most importantly, these configurations are part of the CLIs that the package provides. 

+

4but they also help with programmatically validating and constructing various objects. 

+

5Maybe most importantly, the :py:class:`GraphConfig` and :py:class:`ModelConfig` may be 

+

6used to precisely and reproducibly define how the function :py:func:`construct_model` 

+

7should create lymphatic progression :py:mod:`~lymph.models`. 

+

8 

+

9.. _pydantic: https://docs.pydantic.dev/latest/ 

+

10""" 

+

11 

+

12from __future__ import annotations 

+

13 

+

14import importlib 

+

15import importlib.util 

+

16import os 

+

17import warnings 

+

18from collections.abc import Callable, Sequence 

+

19from copy import deepcopy 

+

20from pathlib import Path 

+

21from typing import Annotated, Any, Literal 

+

22 

+

23import numpy as np 

+

24import pandas as pd 

+

25import yaml 

+

26from loguru import logger 

+

27from lydata.loader import LyDataset 

+

28from lydata.utils import ModalityConfig 

+

29from lymph import graph, models 

+

30from lymph.modalities import Pathological 

+

31from lymph.types import Model, PatternType 

+

32from pydantic import ( 

+

33 AfterValidator, 

+

34 BaseModel, 

+

35 ConfigDict, 

+

36 Field, 

+

37 FilePath, 

+

38) 

+

39from pydantic_settings import ( 

+

40 BaseSettings, 

+

41 PydanticBaseSettingsSource, 

+

42 YamlConfigSettingsSource, 

+

43) 

+

44from pydantic_settings.sources import DEFAULT_PATH 

+

45 

+

46from lyscripts.utils import binom_pmf, flatten, load_model_samples, load_patient_data 

+

47 

+

48FuncNameType = Literal["binomial"] 

+

49 

+

50 

+

51DIST_MAP: dict[FuncNameType, Callable] = { 

+

52 "binomial": binom_pmf, 

+

53} 

+

54 

+

55 

+

56class CrossValidationConfig(BaseModel): 

+

57 """Configs for splitting a dataset into cross-validation folds.""" 

+

58 

+

59 seed: int = Field( 

+

60 default=42, 

+

61 description="Seed for the random number generator.", 

+

62 ) 

+

63 folds: int = Field( 

+

64 default=5, 

+

65 description="Number of folds to split the dataset into.", 

+

66 ) 

+

67 

+

68 

+

69class DataConfig(BaseModel): 

+

70 """Where to load lymphatic progression data from and how to feed it into a model.""" 

+

71 

+

72 source: FilePath | LyDataset = Field( 

+

73 description=( 

+

74 "Either a path to a CSV file or a config that specifies how and where " 

+

75 "to fetch the data from." 

+

76 ), 

+

77 ) 

+

78 side: Literal["ipsi", "contra"] | None = Field( 

+

79 default=None, 

+

80 description="Side of the neck to load data for. Only for Unilateral models.", 

+

81 ) 

+

82 mapping: dict[Literal[0, 1, 2, 3, 4] | str, int | str] = Field( 

+

83 default_factory=lambda: {i: "early" if i <= 2 else "late" for i in range(5)}, 

+

84 description="Optional mapping of numeric T-stages to model T-stages.", 

+

85 ) 

+

86 

+

87 def load(self, **get_dataframe_kwargs) -> pd.DataFrame: 

+

88 """Load data from path or the :py:class:`~lydata.loader.LyDataset`.""" 

+

89 if isinstance(self.source, LyDataset): 

+

90 return self.source.get_dataframe(**get_dataframe_kwargs) 

+

91 

+

92 return load_patient_data(self.source, **get_dataframe_kwargs) 

+

93 

+

94 def get_load_kwargs(self, **read_csv_kwargs: dict[str, Any]) -> dict[str, Any]: 

+

95 """Get kwargs for :py:meth:`~lymph.types.Model.load_patient_data`.""" 

+

96 return { 

+

97 "patient_data": self.load(**(read_csv_kwargs or {})), 

+

98 **self.model_dump(exclude={"source"}, exclude_none=True), 

+

99 } 

+

100 

+

101 

+

102def check_pattern(value: PatternType) -> Any: 

+

103 """Check if the value can be converted to a boolean value.""" 

+

104 return {lnl: map_to_optional_bool(v) for lnl, v in value.items()} 

+

105 

+

106 

+

107class DiagnosisConfig(BaseModel): 

+

108 """Defines an ipsi- and contralateral diagnosis pattern.""" 

+

109 

+

110 ipsi: dict[str, Annotated[PatternType, AfterValidator(check_pattern)]] = Field( 

+

111 default={}, 

+

112 description="Observed diagnoses by different modalities on the ipsi neck.", 

+

113 examples=[{"CT": {"II": True, "III": False}}], 

+

114 ) 

+

115 contra: dict[str, Annotated[PatternType, AfterValidator(check_pattern)]] = Field( 

+

116 default={}, 

+

117 description="Observed diagnoses by different modalities on the contra neck.", 

+

118 ) 

+

119 

+

120 def to_involvement(self, modality: str) -> InvolvementConfig: 

+

121 """Convert the diagnosis pattern to an involvement pattern for ``modality``.""" 

+

122 return InvolvementConfig( 

+

123 ipsi=self.ipsi.get(modality, {}), 

+

124 contra=self.contra.get(modality, {}), 

+

125 ) 

+

126 

+

127 

+

128class DistributionConfig(BaseModel): 

+

129 """Configuration defining a distribution over diagnose times.""" 

+

130 

+

131 kind: Literal["frozen", "parametric"] = Field( 

+

132 default="frozen", 

+

133 description="Parametric distributions may be updated.", 

+

134 ) 

+

135 func: FuncNameType = Field( 

+

136 default="binomial", 

+

137 description="Name of predefined function to use as distribution.", 

+

138 ) 

+

139 params: dict[str, int | float] = Field( 

+

140 default={}, 

+

141 description="Parameters to pass to the predefined function.", 

+

142 ) 

+

143 

+

144 

+

145class InvolvementConfig(BaseModel): 

+

146 """Config that defines an ipsi- and contralateral involvement pattern.""" 

+

147 

+

148 ipsi: Annotated[PatternType, AfterValidator(check_pattern)] = Field( 

+

149 default={}, 

+

150 description="Involvement pattern for the ipsilateral side of the neck.", 

+

151 examples=[{"II": True, "III": False}], 

+

152 ) 

+

153 contra: Annotated[PatternType, AfterValidator(check_pattern)] = Field( 

+

154 default={}, 

+

155 description="Involvement pattern for the contralateral side of the neck.", 

+

156 ) 

+

157 

+

158 

+

159def retrieve_graph_representation(model: Model) -> graph.Representation: 

+

160 """Retrieve the graph representation from a model.""" 

+

161 if hasattr(model, "graph"): 

+

162 return model.graph 

+

163 

+

164 if hasattr(model, "hpv"): 

+

165 return retrieve_graph_representation(model.hpv) 

+

166 

+

167 if hasattr(model, "ipsi"): 

+

168 return retrieve_graph_representation(model.ipsi) 

+

169 

+

170 if hasattr(model, "ext"): 

+

171 return retrieve_graph_representation(model.ext) 

+

172 

+

173 raise ValueError("Model does not have a graph representation.") 

+

174 

+

175 

+

176class GraphConfig(BaseModel): 

+

177 """Specifies how the tumor(s) and LNLs are connected in a DAG.""" 

+

178 

+

179 tumor: dict[str, list[str]] = Field( 

+

180 description="Define the name of the tumor(s) and which LNLs it/they drain to.", 

+

181 ) 

+

182 lnl: dict[str, list[str]] = Field( 

+

183 description="Define the name of the LNL(s) and which LNLs it/they drain to.", 

+

184 ) 

+

185 

+

186 @classmethod 

+

187 def from_model(cls: type, model: Model) -> GraphConfig: 

+

188 """Create a ``GraphConfig`` from a ``Model``.""" 

+

189 graph = retrieve_graph_representation(model) 

+

190 return cls( 

+

191 tumor={ 

+

192 name: [edge.child.name for edge in tumor.out] 

+

193 for name, tumor in graph.tumors.items() 

+

194 }, 

+

195 lnl={ 

+

196 name: [edge.child.name for edge in lnl.out] # noqa 

+

197 for name, lnl in graph.lnls.items() 

+

198 }, 

+

199 ) 

+

200 

+

201 

+

202def has_model_symbol(path: Path) -> Path: 

+

203 """Check if the Python file at ``path`` defines a symbol named ``model``.""" 

+

204 spec = importlib.util.spec_from_file_location(path.stem, path) 

+

205 module = importlib.util.module_from_spec(spec) 

+

206 spec.loader.exec_module(module) 

+

207 

+

208 if not hasattr(module, "model"): 

+

209 raise ValueError(f"Python file at {path} does not define a symbol 'model'.") 

+

210 

+

211 return path 

+

212 

+

213 

+

214def get_symmetry_kwargs(model: Model) -> dict[str, Any]: 

+

215 """Get the symmetry kwargs from a model.""" 

+

216 if isinstance(model, models.Unilateral | models.HPVUnilateral): 

+

217 raise TypeError("Unilateral models do not have symmetry kwargs.") 

+

218 

+

219 if hasattr(model, "ext"): 

+

220 return get_symmetry_kwargs(model.ext) 

+

221 

+

222 return getattr(model, "is_symmetric", {}) 

+

223 

+

224 

+

225class ModelConfig(BaseModel): 

+

226 """Define which of the ``lymph`` models to use and how to set them up.""" 

+

227 

+

228 external_file: Annotated[FilePath, AfterValidator(has_model_symbol)] | None = Field( 

+

229 default=None, 

+

230 description="Path to a Python file that defines a model.", 

+

231 ) 

+

232 class_name: Literal["Unilateral", "Bilateral", "Midline"] = Field( 

+

233 default="Unilateral", 

+

234 description="Name of the model class to use.", 

+

235 ) 

+

236 constructor: Literal["binary", "trinary"] = Field( 

+

237 default="binary", 

+

238 description="Trinary models differentiate btw. micro- and macroscopic disease.", 

+

239 ) 

+

240 max_time: int = Field( 

+

241 default=10, 

+

242 description="Max. number of time-steps to evolve the model over.", 

+

243 ) 

+

244 named_params: Sequence[str] = Field( 

+

245 default=None, 

+

246 description=( 

+

247 "Subset of valid model parameters a sampler may provide in the form of a " 

+

248 "dictionary to the model instead of as an array. Or, after sampling, with " 

+

249 "this list, one may safely recover which parameter corresponds to which " 

+

250 "index in the sample." 

+

251 ), 

+

252 ) 

+

253 kwargs: dict[str, Any] = Field( 

+

254 default={}, 

+

255 description="Additional keyword arguments to pass to the model constructor.", 

+

256 ) 

+

257 

+

258 @classmethod 

+

259 def from_model(cls: type, model: Model) -> ModelConfig: 

+

260 """Create a ``ModelConfig`` from a ``Model``.""" 

+

261 warnings.warn( 

+

262 message=( 

+

263 "Not all kwargs passed at initialization can be recovered into a " 

+

264 "config. Make sure to manually double-check the config." 

+

265 ), 

+

266 category=UserWarning, 

+

267 stacklevel=2, 

+

268 ) 

+

269 

+

270 if getattr(model, "_named_params", None): 

+

271 additional_kwargs = {"named_params": list(model.named_params)} 

+

272 else: 

+

273 additional_kwargs = {} 

+

274 

+

275 try: 

+

276 additional_kwargs["is_symmetric"] = get_symmetry_kwargs(model) 

+

277 except TypeError: 

+

278 pass 

+

279 

+

280 if isinstance(model, models.Midline): 

+

281 additional_kwargs["use_midext_evo"] = model.use_midext_evo 

+

282 additional_kwargs["use_central"] = hasattr(model, "_central") 

+

283 additional_kwargs["use_mixing"] = hasattr(model, "mixing_param") 

+

284 

+

285 if not hasattr(model, "_unknown"): 

+

286 additional_kwargs["marginalize_unknown"] = False 

+

287 

+

288 return cls( 

+

289 class_name=model.__class__.__name__, 

+

290 constructor="trinary" if model.is_trinary else "binary", 

+

291 max_time=model.max_time, 

+

292 kwargs=additional_kwargs, 

+

293 ) 

+

294 

+

295 

+

296def modalityconfig_from_model(model: Model, modality_name: str) -> ModalityConfig: 

+

297 """Create a ``ModalityConfig`` from a ``Model``.""" 

+

298 modality = model.get_modality(modality_name) 

+

299 return ModalityConfig( 

+

300 spec=modality.spec, 

+

301 sens=modality.sens, 

+

302 kind="pathological" if isinstance(modality, Pathological) else "clinical", 

+

303 ) 

+

304 

+

305 

+

306class DeprecatedModelConfig(BaseModel): 

+

307 """Model configuration prior to ``lyscripts`` major version 1. 

+

308 

+

309 This is implemented for backwards compatibility. Its sole job is to translate 

+

310 the outdated settings format into the new one. Note that the only stuff that needs 

+

311 to be translated is the model configuration itself and the distributions for 

+

312 marginalization over diagnosis times. The :py:class:`~GraphConfig` is still 

+

313 compatible. 

+

314 """ 

+

315 

+

316 first_binom_prob: float = Field( 

+

317 description="Fixed parameter for first binomial dist over diagnosis times.", 

+

318 ge=0.0, 

+

319 le=1.0, 

+

320 ) 

+

321 max_t: int = Field( 

+

322 description="Max. number of time-steps to evolve the model over.", 

+

323 gt=0, 

+

324 ) 

+

325 t_stages: list[int | str] = Field( 

+

326 description=( 

+

327 "List of T-stages to marginalize over in the scenario. The old format " 

+

328 "assumed all T-stages except the first one to be parametric. Only binomial " 

+

329 "distributions are supported." 

+

330 ), 

+

331 ) 

+

332 class_: Literal["Unilateral", "Bilateral", "Midline", "MidlineBilateral"] = Field( 

+

333 description="Name of the model class. Only binary models are supported.", 

+

334 alias="class", 

+

335 ) 

+

336 kwargs: dict[str, Any] = Field( 

+

337 default={}, 

+

338 description="Additional keyword arguments to pass to the model constructor.", 

+

339 ) 

+

340 

+

341 def model_post_init(self, __context): 

+

342 """Issue a deprecation warning.""" 

+

343 warnings.warn( 

+

344 message="The 'DeprecatedModelConfig' is deprecated.", 

+

345 category=DeprecationWarning, 

+

346 stacklevel=2, 

+

347 ) 

+

348 if "Midline" in self.class_: 

+

349 self.class_ = "Midline" 

+

350 warnings.warn( 

+

351 "Model may not be recreated as expected due to extra parameter " 

+

352 "`midext_prob`. Make sure to manually handle edge cases.", 

+

353 stacklevel=2, 

+

354 ) 

+

355 return super().model_post_init(__context) 

+

356 

+

357 def translate(self) -> tuple[ModelConfig, dict[int | str, DistributionConfig]]: 

+

358 """Translate the deprecated model config to the new format.""" 

+

359 old_kwargs = self.kwargs.copy() 

+

360 new_kwargs = {"use_midext_evo": False} if "Midline" in self.class_ else {} 

+

361 

+

362 if (tumor_spread := old_kwargs.pop("base_symmetric")) is not None: 

+

363 new_kwargs["is_symmetric"] = new_kwargs.get("is_symmetric", {}) 

+

364 new_kwargs["is_symmetric"]["tumor_spread"] = tumor_spread 

+

365 

+

366 if (lnl_spread := old_kwargs.pop("trans_symmetric")) is not None: 

+

367 new_kwargs["is_symmetric"] = new_kwargs.get("is_symmetric", {}) 

+

368 new_kwargs["is_symmetric"]["lnl_spread"] = lnl_spread 

+

369 

+

370 new_kwargs.update(old_kwargs) 

+

371 

+

372 model_config = ModelConfig( 

+

373 class_name=self.class_, 

+

374 constructor="binary", 

+

375 max_time=self.max_t, 

+

376 kwargs=new_kwargs, 

+

377 ) 

+

378 

+

379 distribution_configs = {} 

+

380 for i, t_stage in enumerate(self.t_stages): 

+

381 distribution_configs[t_stage] = DistributionConfig( 

+

382 kind="frozen" if i == 0 else "parametric", 

+

383 func="binomial", 

+

384 params={"p": self.first_binom_prob}, 

+

385 ) 

+

386 

+

387 return model_config, distribution_configs 

+

388 

+

389 

+

390class SamplingConfig(BaseModel): 

+

391 """Settings to configure the MCMC sampling.""" 

+

392 

+

393 storage_file: Path = Field( 

+

394 description="Path to HDF5 file store results or load last state.", 

+

395 ) 

+

396 history_file: Path | None = Field( 

+

397 default=None, 

+

398 description="Path to store the burn-in metrics (as CSV file).", 

+

399 ) 

+

400 dataset: str = Field( 

+

401 default="mcmc", 

+

402 description="Name of the dataset in the HDF5 file.", 

+

403 ) 

+

404 cores: int | None = Field( 

+

405 gt=0, 

+

406 default=os.cpu_count(), 

+

407 description=( 

+

408 "Number of cores to use for parallel sampling. If `None`, no parallel " 

+

409 "processing is used." 

+

410 ), 

+

411 ) 

+

412 seed: int = Field( 

+

413 default=42, 

+

414 description="Seed for the random number generator.", 

+

415 ) 

+

416 walkers_per_dim: int = Field( 

+

417 default=20, 

+

418 description="Number of walkers per parameter space dimension.", 

+

419 ) 

+

420 check_interval: int = Field( 

+

421 default=50, 

+

422 description="Check for convergence each time after this many steps.", 

+

423 ) 

+

424 trust_factor: float = Field( 

+

425 default=50.0, 

+

426 description=( 

+

427 "Trust the autocorrelation time only when it's smaller than this factor " 

+

428 "times the length of the chain." 

+

429 ), 

+

430 ) 

+

431 relative_thresh: float = Field( 

+

432 default=0.05, 

+

433 description="Relative threshold for convergence.", 

+

434 ) 

+

435 burnin_steps: int | None = Field( 

+

436 default=None, 

+

437 description=( 

+

438 "Number of burn-in steps to take. If None, burn-in runs until convergence." 

+

439 ), 

+

440 ) 

+

441 num_steps: int | None = Field( 

+

442 default=100, 

+

443 description=("Number of steps to take in the MCMC sampling."), 

+

444 ) 

+

445 thin_by: int = Field( 

+

446 default=10, 

+

447 description="How many samples to draw before for saving one.", 

+

448 ) 

+

449 inverse_temp: float = Field( 

+

450 default=1.0, 

+

451 description=( 

+

452 "Inverse temperature for thermodynamic integration. Note that this is not " 

+

453 "yet fully implemented." 

+

454 ), 

+

455 ) 

+

456 

+

457 def load(self, thin: int = 1) -> np.ndarray: 

+

458 """Load the samples from the HDF5 file. 

+

459 

+

460 Note that the ``thin`` represents another round of thinning and is usually 

+

461 not necessary if the samples were already thinned during the sampling process. 

+

462 """ 

+

463 return load_model_samples( 

+

464 file_path=self.storage_file, 

+

465 name=self.dataset, 

+

466 thin=thin, 

+

467 ) 

+

468 

+

469 

+

470def geometric_schedule(num: int, *_a) -> np.ndarray: 

+

471 """Create a geometric sequence of ``num`` numbers from 0 to 1.""" 

+

472 log_seq = np.logspace(0.0, 1.0, num) 

+

473 shifted_seq = log_seq - 1.0 

+

474 return shifted_seq / 9.0 

+

475 

+

476 

+

477def linear_schedule(num: int, *_a) -> np.ndarray: 

+

478 """Create a linear sequence of ``num`` numbers from 0 to 1. 

+

479 

+

480 Equivalent to the :py:func:`power_schedule` with ``power=1``. 

+

481 """ 

+

482 return np.linspace(0.0, 1.0, num) 

+

483 

+

484 

+

485def power_schedule(num: int, power: float, *_a) -> np.ndarray: 

+

486 """Create a power sequence of ``num`` numbers from 0 to 1. 

+

487 

+

488 This is essentially a :py:func:`linear_schedule` of ``num`` numbers from 0 to 1, 

+

489 but each number is raised to the power of ``power``. 

+

490 """ 

+

491 lin_seq = np.linspace(0.0, 1.0, num) 

+

492 return lin_seq**power 

+

493 

+

494 

+

495SCHEDULES = { 

+

496 "geometric": geometric_schedule, 

+

497 "linear": linear_schedule, 

+

498 "power": power_schedule, 

+

499} 

+

500 

+

501 

+

502class ScheduleConfig(BaseModel): 

+

503 """Configuration for generating a schedule of inverse temperatures.""" 

+

504 

+

505 method: Literal["geometric", "linear", "power"] = Field( 

+

506 default="power", 

+

507 description="Method to generate the inverse temperature schedule.", 

+

508 ) 

+

509 num: int = Field( 

+

510 default=32, 

+

511 description="Number of inverse temperatures in the schedule.", 

+

512 ) 

+

513 power: float = Field( 

+

514 default=4.0, 

+

515 description="If a power schedule is chosen, use this as power.", 

+

516 ) 

+

517 values: list[float] | None = Field( 

+

518 default=None, 

+

519 description=( 

+

520 "List of inverse temperatures to use instead of generating a schedule. " 

+

521 "If a list is provided, the other parameters are ignored." 

+

522 ), 

+

523 ) 

+

524 

+

525 def get_schedule(self) -> np.ndarray: 

+

526 """Get the inverse temperature schedule as a numpy array.""" 

+

527 if self.values is not None: 

+

528 logger.debug("Using provided inverse temperature values.") 

+

529 schedule = np.array(self.values) 

+

530 else: 

+

531 logger.debug(f"Generating inverse temperature schedule with {self.method}.") 

+

532 func = SCHEDULES[self.method] 

+

533 schedule = func(self.num, self.power) 

+

534 

+

535 logger.info(f"Generated inverse temperature schedule: {schedule}") 

+

536 return schedule 

+

537 

+

538 

+

539def map_to_optional_bool(value: Any) -> Any: 

+

540 """Try to convert the options in the `PatternType` to a boolean value.""" 

+

541 if value in [True, "involved", 1]: 

+

542 return True 

+

543 

+

544 if value in [False, "healthy", 0]: 

+

545 return False 

+

546 

+

547 return value 

+

548 

+

549 

+

550class ScenarioConfig(BaseModel): 

+

551 """Define a scenario for which e.g. prevalences and risks may be computed.""" 

+

552 

+

553 t_stages: list[int | str] = Field( 

+

554 description="List of T-stages to marginalize over in the scenario.", 

+

555 examples=[["early"], [3, 4]], 

+

556 ) 

+

557 t_stages_dist: list[float] = Field( 

+

558 default=[1.0], 

+

559 description="Distribution over T-stages to use for marginalization.", 

+

560 examples=[[1.0], [0.6, 0.4]], 

+

561 ) 

+

562 midext: bool | None = Field( 

+

563 default=None, 

+

564 description="Whether the patient's tumor extends over the midline.", 

+

565 ) 

+

566 mode: Literal["HMM", "BN"] = Field( 

+

567 default="HMM", 

+

568 description="Which underlying model architecture to use.", 

+

569 ) 

+

570 involvement: InvolvementConfig = InvolvementConfig() 

+

571 diagnosis: DiagnosisConfig = DiagnosisConfig() 

+

572 

+

573 def model_post_init(self, __context: Any) -> None: 

+

574 """Interpolate and normalize the distribution.""" 

+

575 self.interpolate() 

+

576 self.normalize() 

+

577 

+

578 def interpolate(self): 

+

579 """Interpolate the distribution to the number of ``t_stages``.""" 

+

580 if len(self.t_stages) != len(self.t_stages_dist): 

+

581 new_x = np.linspace(0.0, 1.0, len(self.t_stages)) 

+

582 old_x = np.linspace(0.0, 1.0, len(self.t_stages_dist)) 

+

583 # cast to list to make ``__eq__`` work 

+

584 self.t_stages_dist = np.interp(new_x, old_x, self.t_stages_dist).tolist() 

+

585 

+

586 def normalize(self): 

+

587 """Normalize the distribution to sum to 1.""" 

+

588 if not np.isclose(np.sum(self.t_stages_dist), 1.0): 

+

589 self.t_stages_dist = ( 

+

590 np.array(self.t_stages_dist) / np.sum(self.t_stages_dist) 

+

591 ).tolist() # cast to list to make ``__eq__`` work 

+

592 

+

593 

+

594def _construct_model_from_external(path: Path) -> Model: 

+

595 """Construct a model from a Python file.""" 

+

596 module_name = path.stem 

+

597 spec = importlib.util.spec_from_file_location(module_name, path) 

+

598 module = importlib.util.module_from_spec(spec) 

+

599 spec.loader.exec_module(module) 

+

600 logger.info(f"Loaded model from {path}. This ignores model and graph configs.") 

+

601 return module.model 

+

602 

+

603 

+

604def construct_model( 

+

605 model_config: ModelConfig, 

+

606 graph_config: GraphConfig, 

+

607) -> Model: 

+

608 """Construct a model from a ``model_config``. 

+

609 

+

610 The default/expected use of this is to specify a model class from the 

+

611 `lymph`_ package and pass the necessary arguments to its constructor. 

+

612 However, it is also possible to load a model from an external Python file via the 

+

613 ``external`` attribute of the ``model_config`` argument. In this case, a symbol 

+

614 with name ``model`` must be defined in the file that is to be loaded. 

+

615 

+

616 .. note:: 

+

617 

+

618 No check is performed on the model's compatibility with the command/pipeline 

+

619 it is used in. It is assumed the model complies with the 

+

620 :py:class:`model type <lymph.types.Model>` specifications of the `lymph`_ 

+

621 package. 

+

622 

+

623 .. _lymph: https://lymph-model.readthedocs.io/stable/ 

+

624 """ 

+

625 if model_config.external_file is not None: 

+

626 return _construct_model_from_external(model_config.external_file) 

+

627 

+

628 cls = getattr(models, model_config.class_name) 

+

629 constructor = getattr(cls, model_config.constructor) 

+

630 model = constructor( 

+

631 graph_dict=flatten(graph_config.model_dump()), 

+

632 max_time=model_config.max_time, 

+

633 named_params=model_config.named_params, 

+

634 **model_config.kwargs, 

+

635 ) 

+

636 logger.info(f"Constructed model: {model}") 

+

637 return model 

+

638 

+

639 

+

640def add_distributions( 

+

641 model: Model, 

+

642 configs: dict[str | int, DistributionConfig], 

+

643 mapping: dict[FuncNameType, Callable] | None = None, 

+

644 inplace: bool = False, 

+

645) -> Model: 

+

646 """Construct and add distributions over diagnose times to a ``model``.""" 

+

647 if not inplace: 

+

648 model = deepcopy(model) 

+

649 logger.debug("Created deepcopy of model.") 

+

650 

+

651 mapping = mapping or DIST_MAP 

+

652 

+

653 for t_stage, dist_config in configs.items(): 

+

654 if dist_config.kind == "frozen": 

+

655 support = np.arange(model.max_time + 1) 

+

656 dist = mapping[dist_config.func](support, **dist_config.params) 

+

657 elif dist_config.kind == "parametric": 

+

658 dist = mapping[dist_config.func] 

+

659 else: 

+

660 raise ValueError(f"Unknown distribution kind: {dist_config.kind}") 

+

661 

+

662 model.set_distribution(t_stage, dist) 

+

663 if dist_config.kind == "parametric" and dist_config.params: 

+

664 params = {f"{t_stage}_{k}": v for k, v in dist_config.params.items()} 

+

665 model.set_params(**params) 

+

666 

+

667 logger.debug(f"Set {dist_config.kind} distribution for '{t_stage}': {dist}") 

+

668 

+

669 logger.info(f"Added {len(configs)} distributions to model: {model}") 

+

670 return model 

+

671 

+

672 

+

673def add_modalities( 

+

674 model: Model, 

+

675 modalities: dict[str, ModalityConfig], 

+

676 inplace: bool = False, 

+

677) -> Model: 

+

678 """Add ``modalities`` to a ``model``.""" 

+

679 if not inplace: 

+

680 model = deepcopy(model) 

+

681 logger.debug("Created deepcopy of model.") 

+

682 

+

683 for modality, modality_config in modalities.items(): 

+

684 model.set_modality(modality, **modality_config.model_dump()) 

+

685 logger.debug(f"Added modality {modality} to model: {modality_config}") 

+

686 

+

687 logger.info(f"Added {len(modalities)} modalities to model: {model}") 

+

688 return model 

+

689 

+

690 

+

691def add_data( 

+

692 model: Model, 

+

693 path: Path, 

+

694 side: Literal["ipsi", "contra"], 

+

695 mapping: dict[Literal[0, 1, 2, 3, 4], int | str] | None = None, 

+

696 inplace: bool = False, 

+

697) -> Model: 

+

698 """Add data to a ``model``.""" 

+

699 data = pd.read_csv(path, header=[0, 1, 2]) 

+

700 logger.debug(f"Loaded data from {path}: Shape: {data.shape}") 

+

701 

+

702 kwargs = {"patient_data": data, "mapping": mapping} 

+

703 if isinstance(model, models.Unilateral): 

+

704 kwargs["side"] = side 

+

705 

+

706 if not inplace: 

+

707 model = deepcopy(model) 

+

708 logger.debug("Created deepcopy of model.") 

+

709 

+

710 model.load_patient_data(**kwargs) 

+

711 logger.info(f"Added data to model: {model}") 

+

712 return model 

+

713 

+

714 

+

715PathType = Path | str | Sequence[Path | str] 

+

716 

+

717 

+

718class DynamicYamlConfigSettingsSource(YamlConfigSettingsSource): 

+

719 """YAML config source that allows dynamic file path specification. 

+

720 

+

721 This is heavily inspired by `this comment`_ in the discussion on a related issue 

+

722 of the `pydantic-settings`_ GitHub repository. 

+

723 

+

724 Essentially, this little hack allows a user to specify a one or multiple YAML files 

+

725 from which the CLI should read configurations. Normally, `pydantic-settings` only 

+

726 allows hard-coding the location of these config files. 

+

727 

+

728 .. _this comment: https://github.com/pydantic/pydantic-settings/issues/259#issuecomment-2549444286 

+

729 .. _pydantic-settings: https://github.com/pydantic/pydantic-settings 

+

730 """ 

+

731 

+

732 def __init__( 

+

733 self, 

+

734 settings_cls, 

+

735 yaml_file: PathType | None = DEFAULT_PATH, 

+

736 yaml_file_encoding: str | None = None, 

+

737 yaml_file_path_field: str = "configs", 

+

738 ) -> None: 

+

739 """Allow getting the YAML file path from any key in the current state. 

+

740 

+

741 The argument ``yaml_file_path_field`` should be the :py:class:`BaseSettings` 

+

742 field that contains the path(s) to the YAML file(s). 

+

743 

+

744 Note that all config files must have a ``version: 1`` key in them to be 

+

745 recognized as valid config files. 

+

746 """ 

+

747 self.yaml_file_path_field = yaml_file_path_field 

+

748 super().__init__(settings_cls, yaml_file, yaml_file_encoding) 

+

749 

+

750 def _read_file(self, file_path: Path) -> dict[str, Any]: 

+

751 """Read the YAML and raise exception when ``version: 1`` not found.""" 

+

752 with open(file_path, encoding=self.yaml_file_encoding) as yaml_file: 

+

753 data = yaml.safe_load(yaml_file) or {} 

+

754 if data.get("version") != 1: 

+

755 raise ValueError( 

+

756 f"Config file {file_path} does not have a 'version: 1' key. " 

+

757 "For compatibility reasons, all config files must have this key.", 

+

758 ) 

+

759 return data 

+

760 

+

761 def __call__(self) -> dict[str, Any]: 

+

762 """Reload the config files from the paths in the current state.""" 

+

763 yaml_file_to_reload = self.current_state.get( 

+

764 self.yaml_file_path_field, 

+

765 self.yaml_file_path, 

+

766 ) 

+

767 logger.debug(f"Reloading YAML files from {yaml_file_to_reload} (if it exists).") 

+

768 self.__init__( 

+

769 settings_cls=self.settings_cls, 

+

770 yaml_file=yaml_file_to_reload, 

+

771 yaml_file_encoding=self.yaml_file_encoding, 

+

772 yaml_file_path_field=self.yaml_file_path_field, 

+

773 ) 

+

774 return super().__call__() 

+

775 

+

776 def __repr__(self) -> str: 

+

777 """Return a string representation of the source.""" 

+

778 return ( 

+

779 self.__class__.__name__ 

+

780 + "(" 

+

781 + f"yaml_file={self.yaml_file_path!r}, " 

+

782 + f"yaml_file_encoding={self.yaml_file_encoding!r}, " 

+

783 + f"yaml_file_path_field={self.yaml_file_path_field!r}" 

+

784 + ")" 

+

785 ) 

+

786 

+

787 

+

788class BaseCLI(BaseSettings): 

+

789 """Base settings class for all CLI scripts to inherit from.""" 

+

790 

+

791 model_config = ConfigDict(yaml_file="config.yaml", extra="ignore") 

+

792 

+

793 configs: list[Path] = Field( 

+

794 default=["config.yaml"], 

+

795 description=( 

+

796 "Path to the YAML file(s) that contain the configuration(s). Configs from " 

+

797 "YAML files may be overwritten by command line arguments. When multiple " 

+

798 "files are specified, the configs are merged in the order they are given. " 

+

799 "Note that every config file must have a `version: 1` key in it." 

+

800 ), 

+

801 ) 

+

802 

+

803 @classmethod 

+

804 def settings_customise_sources( 

+

805 cls, 

+

806 settings_cls: type[BaseSettings], 

+

807 init_settings: PydanticBaseSettingsSource, 

+

808 env_settings: PydanticBaseSettingsSource, 

+

809 dotenv_settings: PydanticBaseSettingsSource, 

+

810 file_secret_settings: PydanticBaseSettingsSource, 

+

811 ) -> tuple[PydanticBaseSettingsSource, ...]: 

+

812 """Add the dynamic YAML config source to the CLI settings.""" 

+

813 dynamic_yaml_config_source = DynamicYamlConfigSettingsSource( 

+

814 settings_cls=settings_cls, 

+

815 yaml_file_path_field="configs", 

+

816 yaml_file_encoding="utf-8", 

+

817 ) 

+

818 logger.debug(f"Created {dynamic_yaml_config_source = }") 

+

819 return ( 

+

820 init_settings, 

+

821 env_settings, 

+

822 dotenv_settings, 

+

823 file_secret_settings, 

+

824 dynamic_yaml_config_source, 

+

825 ) 

+
+ + + diff --git a/htmlcov/z_5bf5c588c698c6cc_decorators_py.html b/htmlcov/z_5bf5c588c698c6cc_decorators_py.html new file mode 100644 index 0000000..126c178 --- /dev/null +++ b/htmlcov/z_5bf5c588c698c6cc_decorators_py.html @@ -0,0 +1,185 @@ + + + + + Coverage for src/lyscripts/decorators.py: 90% + + + + + +
+
+

+ Coverage for src / lyscripts / decorators.py: + 90% +

+ +

+ 41 statements   + + + +

+

+ « prev     + ^ index     + » next +       + coverage.py v7.13.5, + created at 2026-04-08 14:17 +0000 +

+ +
+
+
+

1"""Decorators to avoid repetitive snippets of code. 

+

2 

+

3E.g. safely opening files or logging the state of a function call. 

+

4 

+

5This is *not* a command line tool. 

+

6""" 

+

7 

+

8import functools 

+

9import logging 

+

10from collections.abc import Callable 

+

11from functools import wraps 

+

12from pathlib import Path 

+

13from typing import Any 

+

14 

+

15 

+

16def assemble_signature(*args, **kwargs) -> str: 

+

17 """Assemble the signature of the function call.""" 

+

18 args_str = ", ".join(str(arg) for arg in args) 

+

19 kwargs_str = ", ".join(f"{key}={value}" for key, value in kwargs.items()) 

+

20 return ", ".join([args_str, kwargs_str]) 

+

21 

+

22 

+

23def log_state(log_level: int = logging.INFO) -> Callable: 

+

24 """Provide a decorator that logs the state of the function execution. 

+

25 

+

26 The log message will simply be the function name where underscores are replaced 

+

27 with spaces. The `log_level` can be set in the decorator call. 

+

28 """ 

+

29 

+

30 def log_decorator(func: Callable): 

+

31 """Decorate function for which to add logs.""" 

+

32 

+

33 @functools.wraps(func) 

+

34 def wrapper(*args, **kwargs): 

+

35 """Execute decorated function.""" 

+

36 logger = logging.getLogger(func.__module__) 

+

37 signature = assemble_signature(*args, **kwargs) 

+

38 logger.debug(f"Executing {func.__name__}({signature}).") 

+

39 log_msg_from_func = func.__name__.replace("_", " ").capitalize() + "." 

+

40 

+

41 try: 

+

42 logger.log( 

+

43 log_level, 

+

44 log_msg_from_func, 

+

45 extra={ 

+

46 "func_filepath": f"{func.__module__.replace('.', '/')}.py", 

+

47 "func_name": func.__name__, 

+

48 "module_name": func.__module__, 

+

49 }, 

+

50 ) 

+

51 return func(*args, **kwargs) 

+

52 

+

53 except Exception as exc: 

+

54 logger.error(f"Error calling {func.__name__}().", exc_info=exc) 

+

55 raise exc 

+

56 

+

57 return wrapper 

+

58 

+

59 return log_decorator 

+

60 

+

61 

+

62def check_input_file_exists(loading_func: Callable) -> Callable: 

+

63 """Check if the file path provided to the `loading_func` exists.""" 

+

64 

+

65 @wraps(loading_func) 

+

66 def inner(file_path: str, *args, **kwargs) -> Any: 

+

67 """Execute wrapped loading function.""" 

+

68 file_path = Path(file_path) 

+

69 if not file_path.is_file(): 

+

70 raise FileNotFoundError(f"File {file_path} does not exist.") 

+

71 

+

72 return loading_func(file_path, *args, **kwargs) 

+

73 

+

74 return inner 

+

75 

+

76 

+

77def check_output_dir_exists(saving_func: Callable) -> Callable: 

+

78 """Make sure the parent directory of the saved file exists.""" 

+

79 

+

80 @wraps(saving_func) 

+

81 def inner(file_path: str, *args, **kwargs) -> Any: 

+

82 """Execute wrapped saving function.""" 

+

83 file_path = Path(file_path) 

+

84 file_path.parent.mkdir(parents=True, exist_ok=True) 

+

85 

+

86 return saving_func(file_path, *args, **kwargs) 

+

87 

+

88 return inner 

+
+ + + diff --git a/htmlcov/z_5bf5c588c698c6cc_evaluate_py.html b/htmlcov/z_5bf5c588c698c6cc_evaluate_py.html new file mode 100644 index 0000000..731bf0b --- /dev/null +++ b/htmlcov/z_5bf5c588c698c6cc_evaluate_py.html @@ -0,0 +1,302 @@ + + + + + Coverage for src/lyscripts/evaluate.py: 26% + + + + + +
+
+

+ Coverage for src / lyscripts / evaluate.py: + 26% +

+ +

+ 70 statements   + + + +

+

+ « prev     + ^ index     + » next +       + coverage.py v7.13.5, + created at 2026-04-08 14:17 +0000 +

+ +
+
+
+

1"""Evaluate the performance of the trained model. 

+

2 

+

3This is done by computing quantities like the Bayesian information criterion (BIC) or 

+

4(if thermodynamic integration was performed) the actual evidence (with error) of the 

+

5model. 

+

6""" 

+

7 

+

8import argparse 

+

9import json 

+

10from pathlib import Path 

+

11 

+

12import emcee 

+

13import h5py 

+

14import numpy as np 

+

15import pandas as pd 

+

16from loguru import logger 

+

17from scipy.integrate import trapezoid 

+

18 

+

19from lyscripts.utils import load_patient_data, load_yaml_params 

+

20 

+

21RNG = np.random.default_rng() 

+

22 

+

23 

+

24def _add_parser( 

+

25 subparsers: argparse._SubParsersAction, 

+

26 help_formatter, 

+

27): 

+

28 """Add an ``ArgumentParser`` to the subparsers action.""" 

+

29 parser = subparsers.add_parser( 

+

30 Path(__file__).name.replace(".py", ""), 

+

31 description=__doc__, 

+

32 help=__doc__, 

+

33 formatter_class=help_formatter, 

+

34 ) 

+

35 _add_arguments(parser) 

+

36 

+

37 

+

38def _add_arguments(parser: argparse.ArgumentParser): 

+

39 """Add arguments to a ``subparsers`` instance and run its main function when chosen. 

+

40 

+

41 This is called by the parent module that is called via the command line. 

+

42 """ 

+

43 parser.add_argument( 

+

44 "data", 

+

45 type=Path, 

+

46 help="Path to the tables of patient data (CSV).", 

+

47 ) 

+

48 parser.add_argument("model", type=Path, help="Path to model output files (HDF5).") 

+

49 

+

50 parser.add_argument( 

+

51 "-p", 

+

52 "--params", 

+

53 default="./params.yaml", 

+

54 type=Path, 

+

55 help="Path to parameter file", 

+

56 ) 

+

57 parser.add_argument( 

+

58 "--plots", 

+

59 default="./plots", 

+

60 type=Path, 

+

61 help="Directory for storing plots", 

+

62 ) 

+

63 parser.add_argument( 

+

64 "--metrics", 

+

65 default="./metrics.json", 

+

66 type=Path, 

+

67 help="Path to metrics file", 

+

68 ) 

+

69 

+

70 parser.set_defaults(run_main=main) 

+

71 

+

72 

+

73def comp_bic(log_probs: np.ndarray, num_params: int, num_data: int) -> float: 

+

74 r"""Compute the negative one half of the Bayesian Information Criterion (BIC). 

+

75 

+

76 The BIC is defined as [^1] 

+

77 $$ BIC = k \\ln{n} - 2 \\ln{\\hat{L}} $$ 

+

78 where $k$ is the number of parameters ``num_params``, $n$ the number of datapoints 

+

79 ``num_data`` and $\\hat{L}$ the maximum likelihood estimate of the ``log_prob``. 

+

80 It is constructed such that the following is an 

+

81 approximation of the model evidence: 

+

82 $$ p(D \\mid m) \\approx \\exp{\\left( - BIC / 2 \\right)} $$ 

+

83 which is why this function returns the negative one half of it. 

+

84 

+

85 [^1]: https://en.wikipedia.org/wiki/Bayesian_information_criterion 

+

86 """ 

+

87 return np.max(log_probs) - num_params * np.log(num_data) / 2.0 

+

88 

+

89 

+

90def compute_evidence( 

+

91 temp_schedule: np.ndarray, 

+

92 log_probs: np.ndarray, 

+

93) -> float: 

+

94 """Compute the evidence. 

+

95 

+

96 Given a ``temp_schedule`` of inverse temperatures and corresponding sets of 

+

97 ``log_probs``, we calculate the mean ``log_prob`` over all samples to approximate 

+

98 the expectation value under the corresponding power posterior for each step in the 

+

99 ``temp_schedule``. The evidence is evaluated using trapezoidal integration of the 

+

100 expectation values over the ``temp_schedule``. 

+

101 """ 

+

102 a_mc = np.mean(log_probs, axis=1) 

+

103 return trapezoid(y=a_mc, x=temp_schedule) 

+

104 

+

105 

+

106def compute_ti_results( 

+

107 metrics: dict, 

+

108 params: dict, 

+

109 ndim: int, 

+

110 h5_file: Path, 

+

111 model: Path, 

+

112) -> tuple[np.ndarray, np.ndarray]: 

+

113 """Compute the results in case of a thermodynamic integration run.""" 

+

114 temp_schedule = params["sampling"]["temp_schedule"] 

+

115 num_temps = len(temp_schedule) 

+

116 

+

117 if num_temps != len(h5_file["ti"]): 

+

118 raise RuntimeError( 

+

119 f"Parameters suggest temp schedule of length {num_temps}, " 

+

120 f"but stored are {len(h5_file['ti'])}", 

+

121 ) 

+

122 

+

123 nwalker = ndim * params["sampling"]["walkers_per_dim"] 

+

124 nsteps = params["sampling"]["nsteps"] 

+

125 ti_log_probs = np.zeros(shape=(num_temps, nsteps * nwalker)) 

+

126 

+

127 for i, run in enumerate(h5_file["ti"]): 

+

128 reader = emcee.backends.HDFBackend(model, name=f"ti/{run}", read_only=True) 

+

129 ti_log_probs[i] = reader.get_blobs(flat=True) 

+

130 

+

131 evidence = compute_evidence(temp_schedule, ti_log_probs) 

+

132 metrics["evidence"] = evidence 

+

133 

+

134 return temp_schedule, ti_log_probs 

+

135 

+

136 

+

137def main(args: argparse.Namespace): 

+

138 """Run main script.""" 

+

139 metrics = {} 

+

140 

+

141 params = load_yaml_params(args.params) 

+

142 model = None # create_model(params) 

+

143 ndim = len(model.get_params()) 

+

144 data = load_patient_data(args.data) 

+

145 h5_file = h5py.File(args.model, mode="r") 

+

146 

+

147 # if TI has been performed, compute the accuracy for every step 

+

148 if "ti" in h5_file: 

+

149 temp_schedule, ti_log_probs = compute_ti_results( 

+

150 metrics=metrics, 

+

151 params=params, 

+

152 ndim=ndim, 

+

153 h5_file=h5_file, 

+

154 model=args.model, 

+

155 ) 

+

156 logger.info( 

+

157 "Computed results of thermodynamic integration with " 

+

158 f"{len(temp_schedule)} steps", 

+

159 ) 

+

160 

+

161 # store inverse temperatures and log-probs in CSV file 

+

162 args.plots.parent.mkdir(exist_ok=True) 

+

163 

+

164 beta_vs_accuracy = pd.DataFrame( 

+

165 np.array( 

+

166 [ 

+

167 temp_schedule, 

+

168 np.mean(ti_log_probs, axis=1), 

+

169 np.std(ti_log_probs, axis=1), 

+

170 ], 

+

171 ).T, 

+

172 columns=["β", "accuracy", "std"], 

+

173 ) 

+

174 beta_vs_accuracy.to_csv(args.plots, index=False) 

+

175 logger.info(f"Plotted β vs accuracy at {args.plots}") 

+

176 

+

177 # use blobs, because also for TI, this is the unscaled log-prob 

+

178 backend = emcee.backends.HDFBackend(args.model, read_only=True, name="mcmc") 

+

179 final_log_probs = backend.get_blobs() 

+

180 logger.info(f"Opened samples from emcee backend from {args.model}") 

+

181 

+

182 # store metrics in JSON file 

+

183 args.metrics.parent.mkdir(parents=True, exist_ok=True) 

+

184 args.metrics.touch(exist_ok=True) 

+

185 

+

186 metrics["BIC"] = comp_bic( 

+

187 final_log_probs, 

+

188 ndim, 

+

189 len(data), 

+

190 ) 

+

191 metrics["max_llh"] = np.max(final_log_probs) 

+

192 metrics["mean_llh"] = np.mean(final_log_probs) 

+

193 

+

194 with open(args.metrics, mode="w", encoding="utf-8") as metrics_file: 

+

195 json.dump(metrics, metrics_file) 

+

196 

+

197 logger.info(f"Wrote out metrics to {args.metrics}") 

+

198 

+

199 

+

200if __name__ == "__main__": 

+

201 parser = argparse.ArgumentParser(description=__doc__) 

+

202 _add_arguments(parser) 

+

203 

+

204 args = parser.parse_args() 

+

205 args.run_main(args) 

+
+ + + diff --git a/htmlcov/z_5bf5c588c698c6cc_integrate_py.html b/htmlcov/z_5bf5c588c698c6cc_integrate_py.html new file mode 100644 index 0000000..4430407 --- /dev/null +++ b/htmlcov/z_5bf5c588c698c6cc_integrate_py.html @@ -0,0 +1,260 @@ + + + + + Coverage for src/lyscripts/integrate.py: 52% + + + + + +
+
+

+ Coverage for src / lyscripts / integrate.py: + 52% +

+ +

+ 46 statements   + + + +

+

+ « prev     + ^ index     + » next +       + coverage.py v7.13.5, + created at 2026-04-08 14:17 +0000 +

+ +
+
+
+

1"""Perform thermodynamic integration to evaluate the model evidence. 

+

2 

+

3Using the functions provided by the `sample` module, this script implements 

+

4thermodynamic integration (TI) in order to compute the model evidence. 

+

5This is done by sampling the model parameters at different inverse temperatures 

+

6following a specified schedule. 

+

7""" 

+

8 

+

9from __future__ import annotations 

+

10 

+

11import os 

+

12from typing import Any 

+

13 

+

14import emcee 

+

15import h5py 

+

16import numpy as np 

+

17from loguru import logger 

+

18from lydata.utils import ModalityConfig 

+

19from pydantic import Field 

+

20 

+

21import lyscripts.sample as sample_module # Import the module to set its global MODEL 

+

22from lyscripts.cli import assemble_main 

+

23from lyscripts.configs import ( 

+

24 BaseCLI, 

+

25 DataConfig, 

+

26 DistributionConfig, 

+

27 GraphConfig, 

+

28 ModelConfig, 

+

29 SamplingConfig, 

+

30 ScheduleConfig, 

+

31 add_distributions, 

+

32 add_modalities, 

+

33 construct_model, 

+

34) 

+

35from lyscripts.utils import get_hdf5_backend 

+

36 

+

37 

+

38def init_ti_sampler( 

+

39 settings: IntegrateCLI, 

+

40 temp_idx: int, 

+

41 ndim: int, 

+

42 inv_temp: float, 

+

43 pool: Any, 

+

44) -> emcee.EnsembleSampler: 

+

45 """Initialize the ``emcee.EnsembleSampler`` for TI with the given ``settings''.""" 

+

46 nwalkers = ndim * settings.sampling.walkers_per_dim 

+

47 backend = get_hdf5_backend( 

+

48 file_path=settings.sampling.storage_file, 

+

49 dataset=f"ti/{temp_idx + 1:0>2d}", 

+

50 nwalkers=nwalkers, 

+

51 ndim=ndim, 

+

52 ) 

+

53 return emcee.EnsembleSampler( 

+

54 nwalkers=nwalkers, 

+

55 ndim=ndim, 

+

56 log_prob_fn=sample_module.log_prob_fn, 

+

57 kwargs={"inverse_temp": inv_temp}, 

+

58 moves=[(emcee.moves.DEMove(), 0.8), (emcee.moves.DESnookerMove(), 0.2)], 

+

59 backend=backend, 

+

60 pool=pool, 

+

61 blobs_dtype=[("log_prob", np.float64)], 

+

62 parameter_names=list(MODEL.get_named_params().keys()), 

+

63 ) 

+

64 

+

65 

+

66class IntegrateCLI(BaseCLI): 

+

67 """Perform thermodynamic integration to compute the model evidence.""" 

+

68 

+

69 graph: GraphConfig 

+

70 model: ModelConfig = ModelConfig() 

+

71 distributions: dict[str, DistributionConfig] = Field( 

+

72 default={}, 

+

73 description=( 

+

74 "Mapping of model T-categories to predefined distributions over " 

+

75 "diagnose times." 

+

76 ), 

+

77 ) 

+

78 modalities: dict[str, ModalityConfig] = Field( 

+

79 default={}, 

+

80 description=( 

+

81 "Maps names of diagnostic modalities to their specificity/sensitivity." 

+

82 ), 

+

83 ) 

+

84 data: DataConfig 

+

85 sampling: SamplingConfig 

+

86 schedule: ScheduleConfig = Field( 

+

87 description="Configuration for generating inverse temperature schedule.", 

+

88 ) 

+

89 

+

90 def cli_cmd(self) -> None: 

+

91 """Start the ``integrate`` subcommand. 

+

92 

+

93 The model construction and setup is done analogously to the 

+

94 ``sample`` command. Afterwards, an :py:class:`emcee.EnsembleSampler` 

+

95 is initialized (see :py:func:`init_sampler`) and :py:func:`run_sampling`, 

+

96 implemented in the ``sample``module, is executed twice for each TI step: 

+

97 once for the burn-in phase and once for the actual sampling phase. 

+

98 Thereby, the log likelihood is scaled by the respective inverse 

+

99 temperature of that step. All necessary settings for the sampling 

+

100 are passed by the ``sampling``argument, except for the inverse 

+

101 temperatures, which are provided by the ``schedule`` argument. 

+

102 """ 

+

103 # as recommended in https://emcee.readthedocs.io/en/stable/tutorials/parallel/# 

+

104 os.environ["OMP_NUM_THREADS"] = "1" 

+

105 

+

106 logger.debug(self.model_dump_json(indent=2)) 

+

107 

+

108 # ugly, but necessary for pickling 

+

109 global MODEL 

+

110 MODEL = construct_model(self.model, self.graph) 

+

111 MODEL = add_distributions(MODEL, self.distributions) 

+

112 MODEL = add_modalities(MODEL, self.modalities) 

+

113 MODEL.load_patient_data(**self.data.get_load_kwargs()) 

+

114 ndim = MODEL.get_num_dims() 

+

115 

+

116 # set MODEL in the sample module's namespace so log_prob_fn can access it 

+

117 sample_module.MODEL = MODEL 

+

118 

+

119 schedule = self.schedule.get_schedule() 

+

120 

+

121 # emcee does not support numpy's new random number generator yet. 

+

122 np.random.seed(self.sampling.seed) # noqa: NPY002 

+

123 

+

124 with sample_module.get_pool(self.sampling.cores) as pool: 

+

125 for idx, inv_temp in enumerate(schedule): 

+

126 sampler = init_ti_sampler( 

+

127 settings=self, 

+

128 temp_idx=idx, 

+

129 ndim=ndim, 

+

130 inv_temp=inv_temp, 

+

131 pool=pool, 

+

132 ) 

+

133 

+

134 sample_module.run_sampling( 

+

135 description=f"Burn-in phase: TI step {idx + 1}/{len(schedule)}", 

+

136 sampler=sampler, 

+

137 num_steps=self.sampling.burnin_steps, 

+

138 check_interval=self.sampling.check_interval, 

+

139 trust_factor=self.sampling.trust_factor, 

+

140 relative_thresh=self.sampling.relative_thresh, 

+

141 history_file=self.sampling.history_file, 

+

142 ) 

+

143 

+

144 sample_module.run_sampling( 

+

145 description=f"Sampling phase: TI step {idx + 1}/{len(schedule)}", 

+

146 sampler=sampler, 

+

147 num_steps=self.sampling.num_steps, 

+

148 reset_backend=True, 

+

149 check_interval=self.sampling.num_steps, 

+

150 thin_by=self.sampling.thin_by, 

+

151 ) 

+

152 # copy last sampling round over to a group in the HDF5 file called "mcmc" 

+

153 with h5py.File(self.sampling.storage_file, mode="r+") as h5_file: 

+

154 h5_file.copy( 

+

155 f"ti/{len(schedule):0>2d}", 

+

156 h5_file, 

+

157 name=self.sampling.dataset, 

+

158 ) 

+

159 

+

160 

+

161if __name__ == "__main__": 

+

162 main = assemble_main(settings_cls=IntegrateCLI, prog_name="integrate") 

+

163 main() 

+
+ + + diff --git a/htmlcov/z_5bf5c588c698c6cc_plots_py.html b/htmlcov/z_5bf5c588c698c6cc_plots_py.html new file mode 100644 index 0000000..957c370 --- /dev/null +++ b/htmlcov/z_5bf5c588c698c6cc_plots_py.html @@ -0,0 +1,508 @@ + + + + + Coverage for src/lyscripts/plots.py: 89% + + + + + +
+
+

+ Coverage for src / lyscripts / plots.py: + 89% +

+ +

+ 160 statements   + + + +

+

+ « prev     + ^ index     + » next +       + coverage.py v7.13.5, + created at 2026-04-08 14:17 +0000 +

+ +
+
+
+

1"""Utility functions for the plotting commands.""" 

+

2 

+

3from __future__ import annotations 

+

4 

+

5from abc import abstractmethod 

+

6from collections.abc import Mapping 

+

7from dataclasses import field 

+

8from itertools import cycle 

+

9from pathlib import Path 

+

10from typing import TYPE_CHECKING, Any, TypeVar 

+

11 

+

12import h5py 

+

13import matplotlib.pyplot as plt 

+

14import numpy as np 

+

15import scipy as sp 

+

16from numpydantic import NDArray, Shape 

+

17from pydantic import BaseModel 

+

18 

+

19from lyscripts.decorators import ( 

+

20 check_input_file_exists, 

+

21 check_output_dir_exists, 

+

22 log_state, 

+

23) 

+

24 

+

25if TYPE_CHECKING: 

+

26 from matplotlib.axes._axes import Axes as MPLAxes 

+

27 from matplotlib.figure import Figure 

+

28 

+

29# define USZ colors 

+

30COLORS = { 

+

31 "blue": "#005ea8", 

+

32 "orange": "#f17900", 

+

33 "green": "#00afa5", 

+

34 "red": "#ae0060", 

+

35 "gray": "#c5d5db", 

+

36} 

+

37COLOR_CYCLE = cycle(COLORS.values()) 

+

38CM_PER_INCH = 2.54 

+

39 

+

40 

+

41def floor_at_decimal(value: float, decimal: int) -> float: 

+

42 """Compute the floor of ``value`` for the specified ``decimal``. 

+

43 

+

44 Essentially the distance to the right of the decimal point. May be negative. 

+

45 """ 

+

46 power = 10**decimal 

+

47 return np.floor(power * value) / power 

+

48 

+

49 

+

50def ceil_at_decimal(value: float, decimal: int) -> float: 

+

51 """Compute the ceiling of ``value`` for the specified ``decimal``. 

+

52 

+

53 Analog to :py:func:`.floor_at_decimal`, this is the distance to the right of the 

+

54 decimal point. May be negative. 

+

55 """ 

+

56 return -floor_at_decimal(-value, decimal) 

+

57 

+

58 

+

59def floor_to_step(value: float, step: float) -> float: 

+

60 """Compute next value on ladder of stepsize ``step`` still below ``value``.""" 

+

61 return (value // step) * step 

+

62 

+

63 

+

64def ceil_to_step(value: float, step: float) -> float: 

+

65 """Compute next value on ladder of stepsize ``step`` still above ``value``.""" 

+

66 return floor_to_step(value, step) + step 

+

67 

+

68 

+

69def clean_and_check(filename: str | Path) -> Path: 

+

70 """Check if file with ``filename`` exists. 

+

71 

+

72 If not, raise error, otherwise return cleaned :py:class:`~pathlib.PosixPath`. 

+

73 """ 

+

74 filepath = Path(filename) 

+

75 if not filepath.exists(): 

+

76 msg = f"File with the name {filename} does not exist at {filepath.resolve()}" 

+

77 raise FileNotFoundError(msg) 

+

78 return filepath 

+

79 

+

80 

+

81AbstractDistributionT = TypeVar("AbstractDistributionT", bound="AbstractDistribution") 

+

82 

+

83 

+

84class AbstractDistribution(BaseModel): 

+

85 """Abstract class for distributions that should be plotted.""" 

+

86 

+

87 scale: float = 100.0 

+

88 offset: float = 0.0 

+

89 kwargs: dict[str, Any] = field(default_factory=lambda: {}) 

+

90 

+

91 @abstractmethod 

+

92 def draw(self, axes: MPLAxes) -> MPLAxes: 

+

93 """Draw the distribution into the provided ``axes``.""" 

+

94 ... 

+

95 

+

96 @abstractmethod 

+

97 def left_percentile(self, percent: float) -> float: 

+

98 """Compute the point where ``percent`` of the values are to the left.""" 

+

99 ... 

+

100 

+

101 @abstractmethod 

+

102 def right_percentile(self, percent: float) -> float: 

+

103 """Compute the point where ``percent`` of the values are to the right.""" 

+

104 ... 

+

105 

+

106 def _get_label(self) -> str: 

+

107 """Compute label for when ``kwargs`` does not contain one.""" 

+

108 

+

109 @property 

+

110 def label(self) -> str: 

+

111 """Return the label of the histogram.""" 

+

112 return self.kwargs.get("label", self._get_label()) 

+

113 

+

114 

+

115class Histogram(AbstractDistribution): 

+

116 """Class containing data for plotting a histogram.""" 

+

117 

+

118 raw_values: NDArray[Shape["*"], float] # noqa: F722 

+

119 

+

120 @property 

+

121 def values(self) -> np.ndarray: 

+

122 """Return the values of the histogram scaled and offset.""" 

+

123 return self.raw_values * self.scale + self.offset 

+

124 

+

125 @classmethod 

+

126 def from_hdf5( 

+

127 cls: type[Histogram], 

+

128 filename: str | Path, 

+

129 dataname: str, 

+

130 scale: float = 100.0, 

+

131 offset: float = 0.0, 

+

132 **kwargs, 

+

133 ) -> Histogram: 

+

134 """Create a histogram from an HDF5 file.""" 

+

135 filename = clean_and_check(filename) 

+

136 with h5py.File(filename, mode="r") as h5file: 

+

137 dataset = h5file[dataname] 

+

138 if "label" not in kwargs: 

+

139 kwargs["label"] = get_label(dataset.attrs) 

+

140 return cls(raw_values=dataset[:], scale=scale, offset=offset, kwargs=kwargs) 

+

141 

+

142 def left_percentile(self, percent: float) -> float: 

+

143 """Compute the point where `percent` of the values are to the left.""" 

+

144 return np.percentile(self.values, percent) 

+

145 

+

146 def right_percentile(self, percent: float) -> float: 

+

147 """Compute the point where `percent` of the values are to the right.""" 

+

148 return np.percentile(self.values, 100.0 - percent) 

+

149 

+

150 def draw(self, axes: MPLAxes, **defaults) -> Any: 

+

151 """Draw the histogram into the provided ``axes``.""" 

+

152 xlim = axes.get_xlim() 

+

153 

+

154 hist_kwargs = defaults.get("hist", {}).copy() 

+

155 hist_kwargs.update(self.kwargs) 

+

156 

+

157 if self.label is not None: 

+

158 hist_kwargs["label"] = self.label 

+

159 

+

160 return axes.hist(self.values, range=xlim, **hist_kwargs) 

+

161 

+

162 

+

163class BetaPosterior(AbstractDistribution): 

+

164 """Class for storing plot configs for a Beta posterior.""" 

+

165 

+

166 num_success: int 

+

167 num_total: int 

+

168 

+

169 @classmethod 

+

170 def from_hdf5( 

+

171 cls: type[BetaPosterior], 

+

172 filename: str | Path, 

+

173 dataname: str, 

+

174 scale: float = 100.0, 

+

175 offset: float = 0.0, 

+

176 **kwargs, 

+

177 ) -> BetaPosterior: 

+

178 """Initialize data container for Beta posteriors from HDF5 file.""" 

+

179 filename = clean_and_check(filename) 

+

180 with h5py.File(filename, mode="r") as h5file: 

+

181 dataset = h5file[dataname] 

+

182 try: 

+

183 num_success = int(dataset.attrs["num_match"]) 

+

184 num_total = int(dataset.attrs["num_total"]) 

+

185 except KeyError as key_err: 

+

186 raise KeyError( 

+

187 "Dataset does not contain observed prevalence data", 

+

188 ) from key_err 

+

189 

+

190 return cls( 

+

191 num_success=num_success, 

+

192 num_total=num_total, 

+

193 scale=scale, 

+

194 offset=offset, 

+

195 kwargs=kwargs, 

+

196 ) 

+

197 

+

198 def _get_label(self) -> str: 

+

199 return f"data: {self.num_success} of {self.num_total}" 

+

200 

+

201 @property 

+

202 def num_fail(self): 

+

203 """Return the number of failures, i.e. the totals minus the successes.""" 

+

204 return self.num_total - self.num_success 

+

205 

+

206 def pdf(self, x: np.ndarray) -> np.ndarray: 

+

207 """Compute the probability density function.""" 

+

208 return sp.stats.beta.pdf( 

+

209 x, 

+

210 a=self.num_success + 1, 

+

211 b=self.num_fail + 1, 

+

212 loc=self.offset, 

+

213 scale=self.scale, 

+

214 ) 

+

215 

+

216 def left_percentile(self, percent: float) -> float: 

+

217 """Return the point where the CDF reaches ``percent``.""" 

+

218 return sp.stats.beta.ppf( 

+

219 percent / 100.0, 

+

220 a=self.num_success + 1, 

+

221 b=self.num_fail + 1, 

+

222 scale=self.scale, 

+

223 ) 

+

224 

+

225 def right_percentile(self, percent: float) -> float: 

+

226 """Return the point where 100% minus the CDF equals ``percent``.""" 

+

227 return sp.stats.beta.ppf( 

+

228 1.0 - (percent / 100.0), 

+

229 a=self.num_success + 1, 

+

230 b=self.num_fail + 1, 

+

231 scale=self.scale, 

+

232 ) 

+

233 

+

234 def draw(self, axes: MPLAxes, resolution: int = 300, **defaults) -> Any: 

+

235 """Draw the Beta posterior into the provided ``axes``. 

+

236 

+

237 Returns a handle and a label for the legend. 

+

238 """ 

+

239 left, right = axes.get_xlim() 

+

240 x = np.linspace(left, right, resolution) 

+

241 y = self.pdf(x) 

+

242 

+

243 plot_kwargs = defaults.get("plot", {}).copy() 

+

244 plot_kwargs.update(self.kwargs) 

+

245 

+

246 if self.label is not None: 

+

247 plot_kwargs["label"] = self.label 

+

248 

+

249 return axes.plot(x, y, **plot_kwargs) 

+

250 

+

251 

+

252def get_size(width="single", unit="cm", ratio="golden"): 

+

253 """Return a tuple of figure sizes in inches. 

+

254 

+

255 This is provided as the ``matplotlib`` keyword argument ``figsize`` expects it. 

+

256 This figure size is computed from a ``width``, in the ``unit`` of centimeters by 

+

257 default, and a ``ratio`` which is set to the golden ratio by default. 

+

258 

+

259 >>> get_size(width="single", ratio="golden") 

+

260 (3.937007874015748, 2.4332557935820445) 

+

261 >>> get_size(width="full", ratio=2.) 

+

262 (6.299212598425196, 3.149606299212598) 

+

263 >>> get_size(width=10., ratio=1.) 

+

264 (3.937007874015748, 3.937007874015748) 

+

265 >>> get_size(width=5, unit="inches", ratio=2./3.) 

+

266 (5, 7.5) 

+

267 """ 

+

268 if width == "single": 

+

269 width = 10 

+

270 elif width == "full": 

+

271 width = 16 

+

272 

+

273 ratio = 1.618 if ratio == "golden" else ratio 

+

274 width = width / CM_PER_INCH if unit == "cm" else width 

+

275 height = width / ratio 

+

276 return (width, height) 

+

277 

+

278 

+

279def get_label(attrs: Mapping) -> str: 

+

280 """Extract label of a histogram from the HDF5 ``attrs`` object of the dataset.""" 

+

281 label = [] 

+

282 transforms = { 

+

283 "label": str, 

+

284 "modality": str, 

+

285 "t_stage": str, 

+

286 "midline_ext": lambda x: "ext" if x else "noext", 

+

287 } 

+

288 for key, func in transforms.items(): 

+

289 if key in attrs and attrs[key] is not None: 

+

290 label.append(func(attrs[key])) 

+

291 return " | ".join(label) 

+

292 

+

293 

+

294def get_xlims( 

+

295 contents: AbstractDistributionT, 

+

296 percent_lims: tuple[float] = (10.0, 10.0), 

+

297) -> tuple[float]: 

+

298 """Get the x-axis limits for a plot containing multiple distribution. 

+

299 

+

300 Compute the ``xlims`` of a plot containing histograms and probability density 

+

301 functions by considering their smallest and largest percentiles. 

+

302 """ 

+

303 left_percentiles = np.array( 

+

304 [c.left_percentile(percent_lims[0]) for c in contents], 

+

305 ) 

+

306 left_lim = np.min(left_percentiles) 

+

307 right_percentiles = np.array( 

+

308 [c.right_percentile(percent_lims[0]) for c in contents], 

+

309 ) 

+

310 right_lim = np.max(right_percentiles) 

+

311 return left_lim, right_lim 

+

312 

+

313 

+

314def draw( 

+

315 axes: MPLAxes, 

+

316 contents: list[AbstractDistribution], 

+

317 percent_lims: tuple[float, float] = (10.0, 10.0), 

+

318 xlims: tuple[float] | None = None, 

+

319 hist_kwargs: dict[str, Any] | None = None, 

+

320 plot_kwargs: dict[str, Any] | None = None, 

+

321) -> MPLAxes: 

+

322 """Draw histograms and Beta posterior from ``contents`` into ``axes``. 

+

323 

+

324 The limits of the x-axis is computed to be the smallest and largest left and right 

+

325 percentile of all provided ``contents`` respectively via the ``percent_lims`` tuple. 

+

326 

+

327 The ``hist_kwargs`` define general settings that will be applied to all histograms. 

+

328 One additional key ``'nbins'`` may be used to adjust only the numbers, not the 

+

329 spacing of the histogram bins. 

+

330 Similarly, ``plot_kwargs`` adjusts the default settings for the Beta posteriors. 

+

331 

+

332 Both these keyword arguments can be overwritten by what the individual ``contents`` 

+

333 have defined. 

+

334 """ 

+

335 if not all(isinstance(c, AbstractDistribution) for c in contents): 

+

336 raise TypeError("Contents must be subclasses of `AbstractDistribution`") 

+

337 

+

338 xlims = xlims or get_xlims(contents, percent_lims) 

+

339 

+

340 if len(xlims) != 2 or xlims[0] > xlims[-1]: 

+

341 raise ValueError("`xlims` must be tuple of two increasing values") 

+

342 

+

343 axes.set_xlim(*xlims) 

+

344 

+

345 default_kwargs = { 

+

346 "hist": { 

+

347 "density": True, 

+

348 "histtype": "stepfilled", 

+

349 "alpha": 0.7, 

+

350 "bins": 50, 

+

351 }, 

+

352 "plot": {}, 

+

353 } 

+

354 default_kwargs["hist"].update(hist_kwargs or {}) 

+

355 default_kwargs["plot"].update(plot_kwargs or {}) 

+

356 

+

357 for content in contents: 

+

358 content.draw(axes, **default_kwargs) 

+

359 

+

360 return axes 

+

361 

+

362 

+

363def split_legends( 

+

364 axes: MPLAxes, 

+

365 titles: list[str], 

+

366 locs: list[tuple[float, float]], 

+

367 **kwargs, 

+

368) -> None: 

+

369 """Separate labels in ``axes`` into separate legends with ``titles`` at ``locs``.""" 

+

370 legend_kwargs = { 

+

371 "title_fontsize": "small", 

+

372 "labelspacing": 0.1, 

+

373 "loc": "upper left", 

+

374 } 

+

375 legend_kwargs.update(kwargs) 

+

376 

+

377 handles, labels = axes.get_legend_handles_labels() 

+

378 labels_per_legend = len(labels) // len(titles) 

+

379 

+

380 for i, (title, loc) in enumerate(zip(titles, locs, strict=True)): 

+

381 start = i * labels_per_legend 

+

382 stop = (i + 1) * labels_per_legend if i < len(titles) - 1 else None 

+

383 idx = slice(start, stop) 

+

384 

+

385 legend = axes.legend( 

+

386 handles[idx], 

+

387 labels[idx], 

+

388 bbox_to_anchor=loc, 

+

389 title=title, 

+

390 **legend_kwargs, 

+

391 ) 

+

392 axes.add_artist(legend) 

+

393 

+

394 

+

395@log_state() 

+

396@check_input_file_exists 

+

397def use_mpl_stylesheet(file_path: str | Path): 

+

398 """Load a ``.mplstyle`` stylesheet from ``file_path``.""" 

+

399 plt.style.use(file_path) 

+

400 

+

401 

+

402@log_state() 

+

403@check_output_dir_exists 

+

404def save_figure( 

+

405 output_path: str | Path, 

+

406 figure: Figure, 

+

407 formats: list[str] | None, 

+

408): 

+

409 """Save a ``figure`` to ``output_path`` in every one of the provided ``formats``.""" 

+

410 for frmt in formats: 

+

411 figure.savefig(output_path.with_suffix(f".{frmt}")) 

+
+ + + diff --git a/htmlcov/z_5bf5c588c698c6cc_sample_py.html b/htmlcov/z_5bf5c588c698c6cc_sample_py.html new file mode 100644 index 0000000..c83c955 --- /dev/null +++ b/htmlcov/z_5bf5c588c698c6cc_sample_py.html @@ -0,0 +1,523 @@ + + + + + Coverage for src/lyscripts/sample.py: 91% + + + + + +
+
+

+ Coverage for src / lyscripts / sample.py: + 91% +

+ +

+ 137 statements   + + + +

+

+ « prev     + ^ index     + » next +       + coverage.py v7.13.5, + created at 2026-04-08 14:17 +0000 +

+ +
+
+
+

1"""Implementation of flexible MCMC sampling for lymphatic progression models. 

+

2 

+

3This module provides both helpful functions for programmatically building and running 

+

4sampling pipelines, as well a CLI interface for th most common sampling use cases. 

+

5 

+

6The core is the :py:func:`run_sampling` function. It has a flexible interface and 

+

7built-in convergence detection, as well as bookkeeping for monitoring and resuming 

+

8interrupted sampling runs. It can be used both during the burn-in phase and the actual 

+

9sampling phase. 

+

10 

+

11.. warning:: 

+

12 

+

13 We strongly recommend to set the CLI's ``--cores`` argument to ``None`` (or ``null`` 

+

14 in the YAML config file) if you are on MacOS or Windows. This is because we haven't 

+

15 yet figured out how we can safely and efficiently use the ``multiprocess(ing)`` 

+

16 library on these two platforms. 

+

17""" 

+

18 

+

19from __future__ import annotations 

+

20 

+

21import os 

+

22import sys 

+

23from typing import Any 

+

24 

+

25from loguru import logger 

+

26 

+

27from lyscripts.cli import assemble_main 

+

28 

+

29try: 

+

30 import multiprocess as mp 

+

31except ModuleNotFoundError: 

+

32 import multiprocessing as mp 

+

33 

+

34if sys.platform == "darwin": 

+

35 logger.warning("Detected MacOS. Setting multiprocess(ing) start method to 'fork'.") 

+

36 mp.set_start_method("fork") 

+

37 

+

38from pathlib import Path 

+

39 

+

40import emcee 

+

41import numpy as np 

+

42import pandas as pd 

+

43from lydata.utils import ModalityConfig 

+

44from lymph.types import ParamsType 

+

45from pydantic import BaseModel, Field 

+

46from rich.progress import Progress, ProgressColumn, Task, TimeElapsedColumn 

+

47from rich.text import Text 

+

48 

+

49from lyscripts.configs import ( 

+

50 BaseCLI, 

+

51 DataConfig, 

+

52 DistributionConfig, 

+

53 GraphConfig, 

+

54 ModelConfig, 

+

55 SamplingConfig, 

+

56 add_distributions, 

+

57 add_modalities, 

+

58 construct_model, 

+

59) 

+

60from lyscripts.utils import console, get_hdf5_backend 

+

61 

+

62 

+

63class CompletedItersColumn(ProgressColumn): 

+

64 """A column that displays the completed number of iterations.""" 

+

65 

+

66 def __init__(self, table_column=None, it: int = 0): 

+

67 """Initialize the column with number of previous iterations.""" 

+

68 super().__init__(table_column) 

+

69 self.it = it 

+

70 

+

71 def render(self, task: Task) -> Text: 

+

72 """Render total iterations.""" 

+

73 if task.completed is None: 

+

74 return Text("? it", style="progress.data.steps") 

+

75 return Text(f"{task.completed + self.it} it", style="progress.data.steps") 

+

76 

+

77 

+

78class ItersPerSecondColumn(ProgressColumn): 

+

79 """A column that displays the number of iterations per second.""" 

+

80 

+

81 def render(self, task: Task) -> Text: 

+

82 """Render iterations per second.""" 

+

83 speed = task.finished_speed or task.speed 

+

84 if speed is None: 

+

85 return Text("? it/s", style="progress.data.speed") 

+

86 return Text(f"{speed:.2f} it/s", style="progress.data.speed") 

+

87 

+

88 

+

89class AcorTime(BaseModel, validate_assignment=True): 

+

90 """Storage for old and new autocorrelation times.""" 

+

91 

+

92 old: float 

+

93 new: float 

+

94 

+

95 def update(self, new: float) -> None: 

+

96 """Update the autocorrelation time.""" 

+

97 self.old = self.new 

+

98 self.new = new 

+

99 

+

100 @property 

+

101 def relative_diff(self) -> float: 

+

102 """Get the relative difference between new and old autocorrelation time.""" 

+

103 return np.abs(self.new - self.old) / self.new 

+

104 

+

105 

+

106class NumAccepted(BaseModel, validate_assignment=True): 

+

107 """Storage for old and new number of accepted proposals.""" 

+

108 

+

109 old: int 

+

110 new: int 

+

111 

+

112 def update(self, new: int) -> None: 

+

113 """Update the number of accepted proposals.""" 

+

114 self.old = self.new 

+

115 self.new = new 

+

116 

+

117 @property 

+

118 def newly_accepted(self) -> int: 

+

119 """Get the number of newly accepted proposals.""" 

+

120 return self.new - self.old 

+

121 

+

122 

+

123MODEL = None 

+

124 

+

125 

+

126def log_prob_fn(theta: ParamsType, inverse_temp: float = 1.0) -> tuple[float, float]: 

+

127 """Compute log-prob using global variables because of pickling. 

+

128 

+

129 An inverse temperature ``inverse_temp`` can be provided for thermodynamic 

+

130 integration. 

+

131 """ 

+

132 llh = MODEL.likelihood(given_params=theta) 

+

133 if np.isinf(llh): # to prevent the case of 0 * inf = NaN 

+

134 return -np.inf, -np.inf 

+

135 return inverse_temp * llh, llh 

+

136 

+

137 

+

138def ensure_initial_state(sampler: emcee.EnsembleSampler) -> np.ndarray: 

+

139 """Try to extract a starting state from a ``sampler``. 

+

140 

+

141 Create a random starting state if no one was found. 

+

142 """ 

+

143 try: 

+

144 state = sampler.backend.get_last_sample() 

+

145 logger.info( 

+

146 f"Resuming from {sampler.backend.filename} with {sampler.iteration} " 

+

147 "stored iterations.", 

+

148 ) 

+

149 except AttributeError: 

+

150 state = np.random.uniform(size=(sampler.nwalkers, sampler.ndim)) # noqa: NPY002 

+

151 logger.debug(f"No stored samples found. Starting from random state {state}.") 

+

152 

+

153 return state 

+

154 

+

155 

+

156def ensure_history_table(file: Path | None) -> pd.DataFrame: 

+

157 """Return the history table from a file or an empty DataFrame. 

+

158 

+

159 It will try to load a history at the given ``file`` location, but with a ``.tmp`` 

+

160 extension. This is the expected name and location of a history file that was 

+

161 stored during an interrupted sampling run. 

+

162 

+

163 If no file is found, an empty DataFrame is returned. 

+

164 """ 

+

165 if file is None or not file.with_suffix(".tmp").exists(): 

+

166 return pd.DataFrame( 

+

167 columns=[ 

+

168 "steps", 

+

169 "acor_times", 

+

170 "accept_fracs", 

+

171 "max_log_probs", 

+

172 ], 

+

173 ).set_index("steps") 

+

174 

+

175 return pd.read_csv(file.with_suffix(".tmp"), index_col="steps") 

+

176 

+

177 

+

178def update_history_table( 

+

179 history: pd.DataFrame, 

+

180 history_file: Path | None, 

+

181 iteration: int, 

+

182 acor_time: float, 

+

183 accepted_frac: float, 

+

184 max_log_prob: float, 

+

185) -> pd.DataFrame: 

+

186 """Update the history table with the current iteration's information.""" 

+

187 history.loc[iteration] = [acor_time, accepted_frac, max_log_prob] 

+

188 logger.debug(history.iloc[-1].to_dict()) 

+

189 

+

190 if history_file is not None: 

+

191 history.to_csv(history_file.with_suffix(".tmp")) 

+

192 

+

193 return history 

+

194 

+

195 

+

196def is_converged( 

+

197 iteration: int, 

+

198 acor_time: AcorTime, 

+

199 trust_factor: float, 

+

200 relative_thresh: float, 

+

201) -> bool: 

+

202 """Check if the chain has converged based on the autocorrelation time. 

+

203 

+

204 The criterion is based on the relative change of the autocorrelation time and 

+

205 whether the autocorrelation extimate can be trusted. Essentially, we only trust 

+

206 the estimate if it is smaller than ``trust_factor`` times the current ``iteration``. 

+

207 

+

208 More details can be found in the `emcee documentation`_. 

+

209 

+

210 .. _emcee documentation: https://emcee.readthedocs.io/en/stable/tutorials/autocorr/ 

+

211 """ 

+

212 return ( 

+

213 acor_time.new * trust_factor < iteration 

+

214 and acor_time.relative_diff < relative_thresh 

+

215 ) 

+

216 

+

217 

+

218def _get_columns(it: int = 0) -> list[ProgressColumn]: 

+

219 """Get the default progress columns for the MCMC sampling.""" 

+

220 return [ 

+

221 *Progress.get_default_columns(), 

+

222 ItersPerSecondColumn(), 

+

223 CompletedItersColumn(it=it), 

+

224 TimeElapsedColumn(), 

+

225 ] 

+

226 

+

227 

+

228def run_sampling( 

+

229 sampler: emcee.EnsembleSampler, 

+

230 initial_state: np.ndarray | None = None, 

+

231 num_steps: int | None = None, 

+

232 thin_by: int = 1, 

+

233 check_interval: int = 100, 

+

234 trust_factor: float = 50.0, 

+

235 relative_thresh: float = 0.05, 

+

236 history_file: Path | None = None, 

+

237 reset_backend: bool = False, 

+

238 description: str = "Burn-in phase", 

+

239) -> None: 

+

240 """Run MCMC sampling. 

+

241 

+

242 This will run the ``sampler`` either for ``num_steps`` steps or - if it set to 

+

243 ``None`` - until convergence. Convergence is determined once within a 

+

244 ``check_interval`` of steps by the :py:func:`is_converged` function. The 

+

245 convergence criterion is based on a trustworthy estimate of the autocorrelation 

+

246 time. This is elaborated in the `emcee documentation`_. 

+

247 

+

248 Some bookkeeping parameters may be stored in a ``history_file``. During sampling, 

+

249 the history is stored in a temporary file with the suffix ``.tmp``. If the sampling 

+

250 is interrupted, the history and the last state of the ``sampler`` can be recovered 

+

251 and the sampling can be continued. 

+

252 

+

253 One may choose to ``reset_backend``, e.g. in case the previous sampling was run 

+

254 until convergence and now one wants to store a length of the converged chain. This 

+

255 may also be thinned by a factor of ``thin_by`` (directly passed to the 

+

256 :py:class:`emcee.EnsembleSampler` class). 

+

257 

+

258 .. _emcee documentation: https://emcee.readthedocs.io/en/stable/tutorials/autocorr/ 

+

259 """ 

+

260 state = initial_state or ensure_initial_state(sampler) 

+

261 history = ensure_history_table(history_file) 

+

262 

+

263 if reset_backend: 

+

264 logger.debug("Resetting backend of sampler.") 

+

265 sampler.backend.reset(sampler.nwalkers, sampler.ndim) 

+

266 

+

267 acor_time = AcorTime(old=np.inf, new=np.inf) 

+

268 accepted = NumAccepted(old=0, new=sampler.backend.accepted.sum()) 

+

269 

+

270 with Progress(*_get_columns(it=sampler.iteration), console=console) as progress: 

+

271 task = progress.add_task(description=description, total=num_steps) 

+

272 while sampler.iteration < (num_steps or np.inf): 

+

273 for state in sampler.sample( # noqa: B007, B020 

+

274 initial_state=state, 

+

275 iterations=check_interval - sampler.iteration % check_interval, 

+

276 thin_by=thin_by, 

+

277 ): 

+

278 progress.update(task, advance=1) 

+

279 

+

280 acor_time.update(new=sampler.get_autocorr_time(tol=0).mean()) 

+

281 accepted.update(new=sampler.backend.accepted.sum()) 

+

282 

+

283 history = update_history_table( 

+

284 history=history, 

+

285 history_file=history_file, 

+

286 iteration=sampler.iteration, 

+

287 acor_time=acor_time.new, 

+

288 accepted_frac=( 

+

289 accepted.newly_accepted / (check_interval * sampler.nwalkers) 

+

290 ), 

+

291 max_log_prob=np.max(state.log_prob), 

+

292 ) 

+

293 

+

294 if num_steps is None and is_converged( 

+

295 iteration=sampler.iteration, 

+

296 acor_time=acor_time, 

+

297 trust_factor=trust_factor, 

+

298 relative_thresh=relative_thresh, 

+

299 ): 

+

300 logger.info(f"Sampling converged after {sampler.iteration} steps.") 

+

301 break 

+

302 

+

303 if history_file is not None: 

+

304 history_file.with_suffix(".tmp").rename(history_file) 

+

305 

+

306 

+

307class DummyPool: 

+

308 """Dummy class to allow for no multiprocessing.""" 

+

309 

+

310 def __enter__(self) -> None: 

+

311 """Enter the context manager.""" 

+

312 ... 

+

313 

+

314 def __exit__(self, *args) -> None: 

+

315 """Exit the context manager.""" 

+

316 ... 

+

317 

+

318 

+

319def get_pool(num_cores: int | None) -> Any | DummyPool: # type: ignore 

+

320 """Get a ``multiprocess(ing)`` pool or ``DummyPool``. 

+

321 

+

322 Returns a ``multiprocess(ing)`` pool with ``num_cores`` cores if ``num_cores`` is 

+

323 not ``None``. Otherwise, a ``DummyPool`` is returned. 

+

324 """ 

+

325 return mp.Pool(num_cores) if num_cores is not None else DummyPool() 

+

326 

+

327 

+

328def init_sampler(settings: SampleCLI, ndim: int, pool: Any) -> emcee.EnsembleSampler: 

+

329 """Initialize the ``emcee.EnsembleSampler`` with the given ``settings``.""" 

+

330 nwalkers = ndim * settings.sampling.walkers_per_dim 

+

331 backend = get_hdf5_backend( 

+

332 file_path=settings.sampling.storage_file, 

+

333 dataset=settings.sampling.dataset, 

+

334 nwalkers=nwalkers, 

+

335 ndim=ndim, 

+

336 ) 

+

337 return emcee.EnsembleSampler( 

+

338 nwalkers=nwalkers, 

+

339 ndim=ndim, 

+

340 log_prob_fn=log_prob_fn, 

+

341 kwargs={"inverse_temp": settings.sampling.inverse_temp}, 

+

342 moves=[(emcee.moves.DEMove(), 0.8), (emcee.moves.DESnookerMove(), 0.2)], 

+

343 backend=backend, 

+

344 pool=pool, 

+

345 blobs_dtype=[("log_prob", np.float64)], 

+

346 parameter_names=list(MODEL.get_named_params().keys()), 

+

347 ) 

+

348 

+

349 

+

350class SampleCLI(BaseCLI): 

+

351 """Use MCMC to infer distributions over model parameters from data.""" 

+

352 

+

353 graph: GraphConfig 

+

354 model: ModelConfig = ModelConfig() 

+

355 distributions: dict[str, DistributionConfig] = Field( 

+

356 default={}, 

+

357 description=( 

+

358 "Mapping of model T-categories to predefined distributions over " 

+

359 "diagnose times." 

+

360 ), 

+

361 ) 

+

362 modalities: dict[str, ModalityConfig] = Field( 

+

363 default={}, 

+

364 description=( 

+

365 "Maps names of diagnostic modalities to their specificity/sensitivity." 

+

366 ), 

+

367 ) 

+

368 data: DataConfig 

+

369 sampling: SamplingConfig 

+

370 

+

371 def cli_cmd(self) -> None: 

+

372 """Start the ``sample`` subcommand. 

+

373 

+

374 First, it will construct the model from the ``graph`` and ``model`` arguments. 

+

375 Then, it will add distributions over diagnose times via the dictionary from 

+

376 the ``distributions`` argument. It will also set sensitivity and specificity of 

+

377 diagnostic modalities via the dictionary provided through the ``modalities`` 

+

378 argument. Finally, it will load the patient data as specified via the ``data`` 

+

379 argument. 

+

380 

+

381 When the model is constructed, an :py:class:`emcee.EnsembleSampler` is 

+

382 initialized (see :py:func:`init_sampler`) and :py:func:`run_sampling` is 

+

383 executed twice: once for the burn-in phase and once for the actual sampling 

+

384 phase. The ``sampling`` argument provides all necessary settings for the 

+

385 sampling. 

+

386 """ 

+

387 # as recommended in https://emcee.readthedocs.io/en/stable/tutorials/parallel/# 

+

388 os.environ["OMP_NUM_THREADS"] = "1" 

+

389 

+

390 logger.debug(self.model_dump_json(indent=2)) 

+

391 

+

392 # ugly, but necessary for pickling 

+

393 global MODEL 

+

394 MODEL = construct_model(self.model, self.graph) 

+

395 MODEL = add_distributions(MODEL, self.distributions) 

+

396 MODEL = add_modalities(MODEL, self.modalities) 

+

397 MODEL.load_patient_data(**self.data.get_load_kwargs()) 

+

398 ndim = MODEL.get_num_dims() 

+

399 

+

400 # emcee does not support numpy's new random number generator yet. 

+

401 np.random.seed(self.sampling.seed) # noqa: NPY002 

+

402 

+

403 with get_pool(self.sampling.cores) as pool: 

+

404 sampler = init_sampler(settings=self, ndim=ndim, pool=pool) 

+

405 run_sampling( 

+

406 description="Burn-in phase", 

+

407 sampler=sampler, 

+

408 num_steps=self.sampling.burnin_steps, 

+

409 check_interval=self.sampling.check_interval, 

+

410 trust_factor=self.sampling.trust_factor, 

+

411 relative_thresh=self.sampling.relative_thresh, 

+

412 history_file=self.sampling.history_file, 

+

413 ) 

+

414 run_sampling( 

+

415 description="Sampling phase", 

+

416 sampler=sampler, 

+

417 num_steps=self.sampling.num_steps, 

+

418 check_interval=self.sampling.num_steps, 

+

419 reset_backend=True, 

+

420 thin_by=self.sampling.thin_by, 

+

421 ) 

+

422 

+

423 

+

424if __name__ == "__main__": 

+

425 main = assemble_main(settings_cls=SampleCLI, prog_name="sample") 

+

426 main() 

+
+ + + diff --git a/htmlcov/z_5bf5c588c698c6cc_schedule_py.html b/htmlcov/z_5bf5c588c698c6cc_schedule_py.html new file mode 100644 index 0000000..9e62d82 --- /dev/null +++ b/htmlcov/z_5bf5c588c698c6cc_schedule_py.html @@ -0,0 +1,130 @@ + + + + + Coverage for src/lyscripts/schedule.py: 55% + + + + + +
+
+

+ Coverage for src / lyscripts / schedule.py: + 55% +

+ +

+ 11 statements   + + + +

+

+ « prev     + ^ index     + » next +       + coverage.py v7.13.5, + created at 2026-04-08 14:17 +0000 +

+ +
+
+
+

1r"""Generate inverse temperature schedules for thermodynamic integration. 

+

2 

+

3Thermodynamic integration is quite sensitive to the specific schedule which is used. 

+

4I noticed in my models, that within the interval :math:`[0, 0.1]`, the increase in the 

+

5expected log-likelihood is very steep. Hence, the inverse temperature :math:`\beta` 

+

6must be more densely spaced in the beginning. 

+

7 

+

8This can be achieved by using a power sequence: Generate :math:`n` linearly spaced 

+

9points in the interval :math:`[0, 1]` and then transform each point by computing 

+

10:math:`\beta_i^k` where :math:`k` could e.g. be 5. 

+

11""" 

+

12 

+

13from loguru import logger 

+

14 

+

15from lyscripts.cli import assemble_main 

+

16from lyscripts.configs import BaseCLI, ScheduleConfig 

+

17 

+

18 

+

19class ScheduleCLI(ScheduleConfig, BaseCLI): 

+

20 """Generate an inverse temperature schedule for thermodynamic integration.""" 

+

21 

+

22 def cli_cmd(self) -> None: 

+

23 """Start the ``schedule`` command.""" 

+

24 logger.debug(self.model_dump_json(indent=2)) 

+

25 

+

26 for inv_temp in self.get_schedule(): 

+

27 # print is necessary to allow piping the output 

+

28 print(inv_temp) # noqa: T201 

+

29 

+

30 

+

31if __name__ == "__main__": 

+

32 main = assemble_main(settings_cls=ScheduleCLI, prog_name="schedule") 

+

33 main() 

+
+ + + diff --git a/htmlcov/z_5bf5c588c698c6cc_schema_py.html b/htmlcov/z_5bf5c588c698c6cc_schema_py.html new file mode 100644 index 0000000..045f18e --- /dev/null +++ b/htmlcov/z_5bf5c588c698c6cc_schema_py.html @@ -0,0 +1,163 @@ + + + + + Coverage for src/lyscripts/schema.py: 86% + + + + + +
+
+

+ Coverage for src / lyscripts / schema.py: + 86% +

+ +

+ 22 statements   + + + +

+

+ « prev     + ^ index     + » next +       + coverage.py v7.13.5, + created at 2026-04-08 14:17 +0000 +

+ +
+
+
+

1"""A fusion of all :py:mod:`configs`, allowing the creation of a JSON schema. 

+

2 

+

3This command is not intended to be used by the end user. Rather, it exists such that 

+

4the developers and maintainers can create a JSON schema from all the defined 

+

5:py:mod:`configs` an store that in the `source code repository`_. Subsequently, the 

+

6end user can point their IDE to this schema, hosted on GitHub to provide them with 

+

7auto-completion and validation of their YAML configuration files that they feed into 

+

8the lyscripts CLIs when they build pipelines or scripts with it. 

+

9 

+

10The `URL for the schema`_ can for example be used in the settings of VS Code like this: 

+

11 

+

12.. code:: json 

+

13 

+

14 { 

+

15 "yaml.schemas": { 

+

16 "https://raw.githubusercontent.com/lycosystem/lyscripts/main/schemas/ly.json": "*.ly.yaml" 

+

17 }, 

+

18 } 

+

19 

+

20Which would enable auto-completion and validation for all files with the extension 

+

21``.ly.yaml`` in the workspace. 

+

22 

+

23.. _source code repository: https://github.com/lycosystem/lyscripts 

+

24.. _URL for the schema: https://raw.githubusercontent.com/lycosystem/lyscripts/main/schemas/ly.json 

+

25""" # noqa: E501 

+

26 

+

27import json 

+

28 

+

29from lydata.utils import ModalityConfig 

+

30from pydantic import BaseModel, Field 

+

31 

+

32from lyscripts import configs 

+

33 

+

34 

+

35class SchemaSettings(BaseModel): 

+

36 """Settings for generating a JSON schema for lyscripts configuration files.""" 

+

37 

+

38 version: int = Field( 

+

39 description=( 

+

40 "For future compatibility reasons, every config file must have a " 

+

41 "`version: 1` field at the top level." 

+

42 ), 

+

43 ge=1, 

+

44 le=1, 

+

45 ) 

+

46 cross_validation: configs.CrossValidationConfig = None 

+

47 data: configs.DataConfig = None 

+

48 diagnosis: configs.DiagnosisConfig = None 

+

49 distributions: dict[str, configs.DistributionConfig] = {} 

+

50 graph: configs.GraphConfig = None 

+

51 involvement: configs.InvolvementConfig = None 

+

52 modalities: dict[str, ModalityConfig] = {} 

+

53 model: configs.ModelConfig = None 

+

54 sampling: configs.SamplingConfig = None 

+

55 scenarios: list[configs.ScenarioConfig] = [] 

+

56 schedule: configs.ScheduleConfig = None 

+

57 

+

58 

+

59def main() -> None: 

+

60 """Generate a JSON schema for lyscripts configuration files.""" 

+

61 schema = SchemaSettings.model_json_schema() 

+

62 print(json.dumps(schema, indent=2)) # noqa: T201 

+

63 

+

64 

+

65if __name__ == "__main__": 

+

66 main() 

+
+ + + diff --git a/htmlcov/z_5bf5c588c698c6cc_utils_py.html b/htmlcov/z_5bf5c588c698c6cc_utils_py.html new file mode 100644 index 0000000..5b5e176 --- /dev/null +++ b/htmlcov/z_5bf5c588c698c6cc_utils_py.html @@ -0,0 +1,296 @@ + + + + + Coverage for src/lyscripts/utils.py: 94% + + + + + +
+
+

+ Coverage for src / lyscripts / utils.py: + 94% +

+ +

+ 84 statements   + + + +

+

+ « prev     + ^ index     + » next +       + coverage.py v7.13.5, + created at 2026-04-08 14:17 +0000 +

+ +
+
+
+

1"""General utility functions for the lyscripts package.""" 

+

2 

+

3from pathlib import Path 

+

4 

+

5import numpy as np 

+

6import pandas as pd 

+

7import yaml 

+

8from emcee.backends import HDFBackend 

+

9from loguru import logger 

+

10from rich.console import Console 

+

11from scipy.special import factorial 

+

12 

+

13from lyscripts.decorators import ( 

+

14 check_input_file_exists, 

+

15 check_output_dir_exists, 

+

16) 

+

17 

+

18console = Console() 

+

19 

+

20 

+

21def binom_pmf(support: list[int] | np.ndarray, p: float = 0.5): 

+

22 """Binomial PMF that is much faster than the one from scipy.""" 

+

23 max_time = len(support) - 1 

+

24 if p > 1.0 or p < 0.0: 

+

25 raise ValueError("Binomial prob must be btw. 0 and 1") 

+

26 q = 1.0 - p 

+

27 binom_coeff = factorial(max_time) / ( 

+

28 factorial(support) * factorial(max_time - support) 

+

29 ) 

+

30 return binom_coeff * p**support * q ** (max_time - support) 

+

31 

+

32 

+

33def get_dict_depth(nested: dict) -> int: 

+

34 """Get the depth of a nested dictionary. 

+

35 

+

36 >>> get_dict_depth({"a": {"b": 1}}) 

+

37 2 

+

38 >>> varying_depth = {"a": {"b": 1}, "c": {"d": {"e": 2}}} 

+

39 >>> get_dict_depth(varying_depth) 

+

40 3 

+

41 """ 

+

42 if not isinstance(nested, dict): 

+

43 return 0 

+

44 

+

45 max_depth = None 

+

46 for _, value in nested.items(): 

+

47 value_depth = get_dict_depth(value) 

+

48 max_depth = max(max_depth or value_depth, value_depth) 

+

49 

+

50 return 1 + (max_depth or 0) 

+

51 

+

52 

+

53def delete_private_keys(nested: dict) -> dict: 

+

54 """Delete private keys from a nested dictionary. 

+

55 

+

56 A 'private' key is a key whose name starts with an underscore. For example: 

+

57 

+

58 >>> delete_private_keys({"patient": {"__doc__": "some patient info", "age": 61}}) 

+

59 {'patient': {'age': 61}} 

+

60 >>> delete_private_keys({"patient": {"age": 61}}) 

+

61 {'patient': {'age': 61}} 

+

62 """ 

+

63 cleaned = {} 

+

64 

+

65 if isinstance(nested, dict): 

+

66 for key, value in nested.items(): 

+

67 if not (isinstance(key, str) and key.startswith("_")): 

+

68 cleaned[key] = delete_private_keys(value) 

+

69 else: 

+

70 cleaned = nested 

+

71 

+

72 return cleaned 

+

73 

+

74 

+

75def flatten( 

+

76 nested: dict, 

+

77 prev_key: tuple = (), 

+

78 max_depth: int | None = None, 

+

79) -> dict: 

+

80 """Flatten ``nested`` dict by creating key tuples for each value at ``max_depth``. 

+

81 

+

82 >>> nested = {"tumor": {"1": {"t_stage": 1, "size": 12.3}}} 

+

83 >>> flatten(nested) 

+

84 {('tumor', '1', 't_stage'): 1, ('tumor', '1', 'size'): 12.3} 

+

85 >>> mapping = {"patient": {"#": {"age": {"func": int, "columns": ["age"]}}}} 

+

86 >>> flatten(mapping, max_depth=3) 

+

87 {('patient', '#', 'age'): {'func': <class 'int'>, 'columns': ['age']}} 

+

88 

+

89 Note that flattening an already flat dictionary will yield some weird results. 

+

90 """ 

+

91 result = {} 

+

92 

+

93 for key, value in nested.items(): 

+

94 is_dict = isinstance(value, dict) 

+

95 has_reached_max_depth = max_depth is not None and len(prev_key) >= max_depth - 1 

+

96 

+

97 if is_dict and not has_reached_max_depth: 

+

98 result.update(flatten(value, (*prev_key, key), max_depth)) 

+

99 else: 

+

100 result[(*prev_key, key)] = value 

+

101 

+

102 return result 

+

103 

+

104 

+

105def unflatten(flat: dict) -> dict: 

+

106 """Take a flat dictionary with tuples of keys and create nested dict from it. 

+

107 

+

108 >>> flat = {('tumor', '1', 't_stage'): 1, ('tumor', '1', 'size'): 12.3} 

+

109 >>> unflatten(flat) 

+

110 {'tumor': {'1': {'t_stage': 1, 'size': 12.3}}} 

+

111 >>> mapping = {('patient', '#', 'age'): {'func': int, 'columns': ['age']}} 

+

112 >>> unflatten(mapping) 

+

113 {'patient': {'#': {'age': {'func': <class 'int'>, 'columns': ['age']}}}} 

+

114 """ 

+

115 result = {} 

+

116 

+

117 for keys, value in flat.items(): 

+

118 current = result 

+

119 for key in keys[:-1]: 

+

120 current = current.setdefault(key, {}) 

+

121 

+

122 current[keys[-1]] = value 

+

123 

+

124 return result 

+

125 

+

126 

+

127def get_modalities_subset( 

+

128 defined_modalities: dict[str, list[float]], 

+

129 selection: list[str], 

+

130) -> dict[str, list[float]]: 

+

131 """Of the ``defined_modalities`` return only those mentioned in the ``selection``. 

+

132 

+

133 >>> modalities = {"CT": [0.76, 0.81], "MRI": [0.63, 0.86]} 

+

134 >>> get_modalities_subset(modalities, ["CT"]) 

+

135 {'CT': [0.76, 0.81]} 

+

136 """ 

+

137 selected_modalities = {} 

+

138 for mod in selection: 

+

139 try: 

+

140 selected_modalities[mod] = defined_modalities[mod] 

+

141 except KeyError as key_err: 

+

142 raise KeyError(f"Modality {mod} has not been defined yet") from key_err 

+

143 return selected_modalities 

+

144 

+

145 

+

146def load_patient_data( 

+

147 file_path: Path, 

+

148 **read_csv_kwargs: dict, 

+

149) -> pd.DataFrame: 

+

150 """Load patient data from a CSV file stored at ``file``.""" 

+

151 if "header" not in read_csv_kwargs: 

+

152 read_csv_kwargs["header"] = [0, 1, 2] 

+

153 

+

154 data = pd.read_csv(file_path, **read_csv_kwargs) 

+

155 logger.info(f"Loaded {len(data)} patient records from {file_path}") 

+

156 return data 

+

157 

+

158 

+

159@check_input_file_exists 

+

160def load_yaml_params(file_path: Path) -> dict: 

+

161 """Load parameters from a YAML ``file``.""" 

+

162 with open(file_path, encoding="utf-8") as file: 

+

163 loaded_params = yaml.safe_load(file) 

+

164 logger.info(f"Loaded YAML parameters from {file_path}") 

+

165 return loaded_params 

+

166 

+

167 

+

168@check_input_file_exists 

+

169def load_model_samples( 

+

170 file_path: Path, 

+

171 name: str = "mcmc", 

+

172 flat: bool = True, 

+

173 discard: int = 0, 

+

174 thin: int = 1, 

+

175) -> np.ndarray: 

+

176 """Load MCMC samples stored in HDF5 file at ``file_path`` under a key ``name``.""" 

+

177 backend = HDFBackend(file_path, name=name, read_only=True) 

+

178 samples = backend.get_chain(flat=flat, discard=discard, thin=thin) 

+

179 logger.info(f"Loaded samples with shape {samples.shape} from {file_path}") 

+

180 return samples 

+

181 

+

182 

+

183@check_output_dir_exists 

+

184def get_hdf5_backend( 

+

185 file_path: Path, 

+

186 dataset: str = "mcmc", 

+

187 nwalkers: int | None = None, 

+

188 ndim: int | None = None, 

+

189 reset: bool = False, 

+

190) -> HDFBackend: 

+

191 """Open an HDF5 file at ``file_path`` and return a backend.""" 

+

192 backend = HDFBackend(file_path, name=dataset) 

+

193 logger.info(f"Opened HDF5 file at {file_path}") 

+

194 

+

195 if reset: 

+

196 logger.info(f"Resetting backend at {file_path} to {nwalkers=} and {ndim=}") 

+

197 backend.reset(nwalkers, ndim) 

+

198 

+

199 return backend 

+
+ + + diff --git a/htmlcov/z_9b7bcb970ba14d6a___init___py.html b/htmlcov/z_9b7bcb970ba14d6a___init___py.html new file mode 100644 index 0000000..e035c63 --- /dev/null +++ b/htmlcov/z_9b7bcb970ba14d6a___init___py.html @@ -0,0 +1,150 @@ + + + + + Coverage for src/lyscripts/data/__init__.py: 83% + + + + + +
+
+

+ Coverage for src / lyscripts / data / __init__.py: + 83% +

+ +

+ 6 statements   + + + +

+

+ « prev     + ^ index     + » next +       + coverage.py v7.13.5, + created at 2026-04-08 14:17 +0000 +

+ +
+
+
+

1"""Commands and functions for managing CSV data on patterns of lymphatic progression. 

+

2 

+

3This contains helpful CLI commands that allow building quick and reproducible workflows 

+

4even when using language-agnostic tools like `Make`_ or `DVC`_. 

+

5 

+

6Most of these commands can load `LyProX`_ style data from CSV files, but also from 

+

7the installed datasets provided by the `lydata`_ package and directly from the 

+

8associated `GitHub repository`_. 

+

9 

+

10Another cool feature is the built-in mini web application that allows collecting nodal 

+

11involvement data interactively and in the same standardized format as we have published 

+

12in the past, both on `LyProX`_ and in our `GitHub repository`_. It can be launched by 

+

13running `lyscripts data collect` in the terminal. See the docs for the 

+

14:py:mod:`lyscripts.data.collect` submodule on more information. 

+

15 

+

16.. _Make: https://www.gnu.org/software/make/ 

+

17.. _DVC: https://dvc.org 

+

18.. _LyProX: https://lyprox.org 

+

19.. _lydata: https://lydata.readthedocs.io 

+

20.. _GitHub repository: https://github.com/lycosystem/lydata 

+

21""" 

+

22 

+

23from pydantic_settings import BaseSettings, CliApp, CliSubCommand 

+

24 

+

25from lyscripts.data import ( # noqa: F401 

+

26 collect, 

+

27 enhance, 

+

28 fetch, 

+

29 generate, 

+

30 join, 

+

31 lyproxify, 

+

32 split, 

+

33) 

+

34 

+

35# Avoid conflict with built-in `filter` function 

+

36from lyscripts.data import filter as filter_ 

+

37 

+

38 

+

39class DataCLI(BaseSettings): 

+

40 """Work with lymphatic progression data through this CLI.""" 

+

41 

+

42 collect: CliSubCommand[collect.CollectorCLI] 

+

43 lyproxify: CliSubCommand[lyproxify.LyproxifyCLI] 

+

44 join: CliSubCommand[join.JoinCLI] 

+

45 split: CliSubCommand[split.SplitCLI] 

+

46 fetch: CliSubCommand[fetch.FetchCLI] 

+

47 filter: CliSubCommand[filter_.FilterCLI] 

+

48 enhance: CliSubCommand[enhance.EnhanceCLI] 

+

49 generate: CliSubCommand[generate.GenerateCLI] 

+

50 

+

51 def cli_cmd(self) -> None: 

+

52 """Run one of the ``data`` subcommands.""" 

+

53 CliApp.run_subcommand(self) 

+
+ + + diff --git a/htmlcov/z_9b7bcb970ba14d6a___main___py.html b/htmlcov/z_9b7bcb970ba14d6a___main___py.html new file mode 100644 index 0000000..6590714 --- /dev/null +++ b/htmlcov/z_9b7bcb970ba14d6a___main___py.html @@ -0,0 +1,133 @@ + + + + + Coverage for src/lyscripts/data/__main__.py: 0% + + + + + +
+
+

+ Coverage for src / lyscripts / data / __main__.py: + 0% +

+ +

+ 18 statements   + + + +

+

+ « prev     + ^ index     + » next +       + coverage.py v7.13.5, + created at 2026-04-08 14:17 +0000 +

+ +
+
+
+

1"""Run the data module as a script.""" 

+

2 

+

3import argparse 

+

4 

+

5from lyscripts import exit_cli 

+

6from lyscripts.cli import RichDefaultHelpFormatter 

+

7from lyscripts.data import enhance, generate, join, split 

+

8 

+

9# Avoid conflict with built-in `filter` function 

+

10from lyscripts.data import filter as filter_ 

+

11 

+

12 

+

13def main(args: argparse.Namespace): 

+

14 """Run the main script.""" 

+

15 parser = argparse.ArgumentParser( 

+

16 prog="lyscripts data", 

+

17 description=__doc__, 

+

18 formatter_class=RichDefaultHelpFormatter, 

+

19 ) 

+

20 parser.set_defaults(run_main=exit_cli) 

+

21 subparsers = parser.add_subparsers() 

+

22 

+

23 # the individual scripts add `ArgumentParser` instances and their arguments to 

+

24 # this `subparsers` object 

+

25 enhance._add_parser(subparsers, help_formatter=parser.formatter_class) 

+

26 generate._add_parser(subparsers, help_formatter=parser.formatter_class) 

+

27 join._add_parser(subparsers, help_formatter=parser.formatter_class) 

+

28 split._add_parser(subparsers, help_formatter=parser.formatter_class) 

+

29 filter_._add_parser(subparsers, help_formatter=parser.formatter_class) 

+

30 

+

31 args = parser.parse_args() 

+

32 args.run_main(args, parser) 

+

33 

+

34 

+

35if __name__ == "__main__": 

+

36 main() 

+
+ + + diff --git a/htmlcov/z_9b7bcb970ba14d6a_enhance_py.html b/htmlcov/z_9b7bcb970ba14d6a_enhance_py.html new file mode 100644 index 0000000..ce4fdd3 --- /dev/null +++ b/htmlcov/z_9b7bcb970ba14d6a_enhance_py.html @@ -0,0 +1,150 @@ + + + + + Coverage for src/lyscripts/data/enhance.py: 68% + + + + + +
+
+

+ Coverage for src / lyscripts / data / enhance.py: + 68% +

+ +

+ 19 statements   + + + +

+

+ « prev     + ^ index     + » next +       + coverage.py v7.13.5, + created at 2026-04-08 14:17 +0000 +

+ +
+
+
+

1"""Enhance the dataset by inferring additional columns from the data. 

+

2 

+

3This is a command-line interface to the methods 

+

4:py:meth:`~lydata.accessor.LyDataAccessor.combine` and 

+

5:py:meth:`~lydata.accessor.LyDataAccessor.augment` of the 

+

6:py:class:`~lydata.accessor.LyDataAccessor` class. 

+

7""" 

+

8 

+

9from typing import Literal 

+

10 

+

11from loguru import logger 

+

12from lydata.accessor import LyDataFrame 

+

13from lydata.utils import ModalityConfig 

+

14 

+

15from lyscripts.cli import assemble_main 

+

16from lyscripts.configs import BaseCLI, DataConfig 

+

17from lyscripts.data.utils import save_table_to_csv 

+

18 

+

19 

+

20class EnhanceCLI(BaseCLI): 

+

21 """Enhance the dataset by inferring additional columns from the data.""" 

+

22 

+

23 input: DataConfig 

+

24 modalities: dict[str, ModalityConfig] | None = None 

+

25 method: Literal["max_llh", "rank"] = "max_llh" 

+

26 lnl_subdivisions: dict[str, list[str]] = { 

+

27 "I": ["a", "b"], 

+

28 "II": ["a", "b"], 

+

29 "V": ["a", "b"], 

+

30 } 

+

31 output_file: str 

+

32 

+

33 def cli_cmd(self) -> None: 

+

34 """Infer additional columns from the data and save the enhanced dataset. 

+

35 

+

36 This basically provides a CLI to the 

+

37 :py:func:`~lydata.accessor.LyDataAccessor.augment` function. See its docs for 

+

38 more details on what exactly is happening here. 

+

39 """ 

+

40 logger.debug(self.model_dump_json(indent=2)) 

+

41 

+

42 data: LyDataFrame = self.input.load() 

+

43 data = data.ly.enhance( 

+

44 modalities=self.modalities, 

+

45 method=self.method, 

+

46 subdivisions=self.lnl_subdivisions, 

+

47 ) 

+

48 save_table_to_csv(file_path=self.output_file, table=data) 

+

49 

+

50 

+

51if __name__ == "__main__": 

+

52 main = assemble_main(settings_cls=EnhanceCLI, prog_name="enhance") 

+

53 main() 

+
+ + + diff --git a/htmlcov/z_9b7bcb970ba14d6a_fetch_py.html b/htmlcov/z_9b7bcb970ba14d6a_fetch_py.html new file mode 100644 index 0000000..97927cf --- /dev/null +++ b/htmlcov/z_9b7bcb970ba14d6a_fetch_py.html @@ -0,0 +1,154 @@ + + + + + Coverage for src/lyscripts/data/fetch.py: 67% + + + + + +
+
+

+ Coverage for src / lyscripts / data / fetch.py: + 67% +

+ +

+ 21 statements   + + + +

+

+ « prev     + ^ index     + » next +       + coverage.py v7.13.5, + created at 2026-04-08 14:17 +0000 +

+ +
+
+
+

1"""Small command to fetch the data from a remote using the lydata package.""" 

+

2 

+

3from pathlib import Path 

+

4 

+

5import lydata # noqa: F401 

+

6from loguru import logger 

+

7from lydata.loader import LyDataset 

+

8from pydantic import Field 

+

9 

+

10from lyscripts.cli import assemble_main 

+

11from lyscripts.configs import BaseCLI 

+

12 

+

13 

+

14class FetchCLI(LyDataset, BaseCLI): 

+

15 """Fetch a specific dataset from the lyDATA repository.""" 

+

16 

+

17 github_token: str | None = Field( 

+

18 default=None, 

+

19 description=( 

+

20 "GitHub token to access private datasets. Can also be provided as " 

+

21 "`GITHUB_TOKEN` environment variable." 

+

22 ), 

+

23 ) 

+

24 github_user: str | None = Field( 

+

25 default=None, 

+

26 description=( 

+

27 "GitHub user for non-token login. Can also be provided as " 

+

28 "`GITHUB_USER` environment variable." 

+

29 ), 

+

30 ) 

+

31 github_password: str | None = Field( 

+

32 default=None, 

+

33 description=( 

+

34 "GitHub password for non-token login. Can also be provided as " 

+

35 "`GITHUB_PASSWORD` environment variable." 

+

36 ), 

+

37 ) 

+

38 output_file: Path = Field(description="The path to save the dataset to.") 

+

39 

+

40 def cli_cmd(self): 

+

41 """Execute the ``fetch`` command.""" 

+

42 logger.enable("lydata") 

+

43 logger.debug(self.model_dump_json(indent=2)) 

+

44 

+

45 dataset = self.get_dataframe( 

+

46 use_github=True, 

+

47 token=self.github_token, 

+

48 user=self.github_user, 

+

49 password=self.github_password, 

+

50 ) 

+

51 dataset.to_csv(self.output_file, index=False) 

+

52 logger.success(f"Fetched dataset and saved to {self.output_file}") 

+

53 

+

54 

+

55if __name__ == "__main__": 

+

56 main = assemble_main(settings_cls=FetchCLI, prog_name="fetch") 

+

57 main() 

+
+ + + diff --git a/htmlcov/z_9b7bcb970ba14d6a_filter_py.html b/htmlcov/z_9b7bcb970ba14d6a_filter_py.html new file mode 100644 index 0000000..e28ec32 --- /dev/null +++ b/htmlcov/z_9b7bcb970ba14d6a_filter_py.html @@ -0,0 +1,196 @@ + + + + + Coverage for src/lyscripts/data/filter.py: 38% + + + + + +
+
+

+ Coverage for src / lyscripts / data / filter.py: + 38% +

+ +

+ 48 statements   + + + +

+

+ « prev     + ^ index     + » next +       + coverage.py v7.13.5, + created at 2026-04-08 14:17 +0000 +

+ +
+
+
+

1"""Filter a dataset according to some common criteria. 

+

2 

+

3This is essentially a command line interface to building a 

+

4:py:class:`query object <lydata.querier.Q>` and applying it to the dataset. 

+

5""" 

+

6 

+

7from pathlib import Path 

+

8from typing import Literal 

+

9 

+

10from loguru import logger 

+

11from lydata import Q 

+

12from pydantic import Field 

+

13from pydantic_settings import CliImplicitFlag 

+

14 

+

15from lyscripts.cli import assemble_main 

+

16from lyscripts.configs import BaseCLI, DataConfig 

+

17from lyscripts.data.utils import save_table_to_csv 

+

18 

+

19 

+

20class FilterCLI(BaseCLI): 

+

21 """In- or exclude patients where a certain column fulfills a certain condition.""" 

+

22 

+

23 input: DataConfig 

+

24 include: CliImplicitFlag[bool] = Field( 

+

25 False, 

+

26 description="Include patients where the condition is met (default: exclude).", 

+

27 ) 

+

28 column: list[str] | str = Field( 

+

29 description=( 

+

30 "The column to filter by. May be a tuple of three strings, since data " 

+

31 "has a three-level header. If it is only one string, the lydata package " 

+

32 "tries to map that to a three-level header." 

+

33 ), 

+

34 ) 

+

35 operator: Literal["==", "!=", ">", "<", ">=", "<=", "in", "contains"] = Field( 

+

36 description="The operator to use for comparison.", 

+

37 ) 

+

38 value: float | int | str = Field(description="The value to compare against.") 

+

39 output_file: Path = Field(description="The path to save the filtered dataset to.") 

+

40 

+

41 def model_post_init(self, __context): 

+

42 """Cast to ``float``, if not possible ``int``, if not possible ``str``.""" 

+

43 if isinstance(self.column, list): 

+

44 if len(self.column) == 1: 

+

45 self.column = self.column[0] 

+

46 elif len(self.column) == 3: 

+

47 self.column = tuple(self.column) 

+

48 else: 

+

49 raise ValueError( 

+

50 "The column attribute must be an iterable of three strings or a " 

+

51 f"single string, but it is {self.column}.", 

+

52 ) 

+

53 

+

54 try: 

+

55 self.value = float(self.value) 

+

56 return super().model_post_init(__context) 

+

57 except ValueError: 

+

58 pass 

+

59 

+

60 try: 

+

61 self.value = int(self.value) 

+

62 return super().model_post_init(__context) 

+

63 except ValueError: 

+

64 pass 

+

65 

+

66 return super().model_post_init(__context) 

+

67 

+

68 def cli_cmd(self): 

+

69 """Execute the ``filter`` command. 

+

70 

+

71 This command uses the :py:class:`~lydata.querier.Q` objects of the `lydata`_ 

+

72 library to filter the dataset according to the given criteria. 

+

73 

+

74 .. _lydata: https://lydata.readthedocs.io 

+

75 """ 

+

76 logger.debug(self.model_dump_json(indent=2)) 

+

77 

+

78 data = self.input.load() 

+

79 query = Q( 

+

80 column=self.column, 

+

81 operator=self.operator, 

+

82 value=self.value, 

+

83 ) 

+

84 logger.debug(f"Created query object: {query}") 

+

85 mask = query.execute(data) 

+

86 

+

87 if self.include: 

+

88 filtered = data[mask] 

+

89 logger.info(f"Keeping {sum(mask)} of {len(data)} patients.") 

+

90 else: 

+

91 filtered = data[~mask] 

+

92 logger.info(f"Excluding {sum(mask)} of {len(data)} patients.") 

+

93 

+

94 save_table_to_csv(file_path=self.output_file, table=filtered) 

+

95 

+

96 

+

97if __name__ == "__main__": 

+

98 main = assemble_main(settings_cls=FilterCLI, prog_name="filter") 

+

99 main() 

+
+ + + diff --git a/htmlcov/z_9b7bcb970ba14d6a_generate_py.html b/htmlcov/z_9b7bcb970ba14d6a_generate_py.html new file mode 100644 index 0000000..f2a6778 --- /dev/null +++ b/htmlcov/z_9b7bcb970ba14d6a_generate_py.html @@ -0,0 +1,193 @@ + + + + + Coverage for src/lyscripts/data/generate.py: 89% + + + + + +
+
+

+ Coverage for src / lyscripts / data / generate.py: + 89% +

+ +

+ 35 statements   + + + +

+

+ « prev     + ^ index     + » next +       + coverage.py v7.13.5, + created at 2026-04-08 14:17 +0000 +

+ +
+
+
+

1"""Script to generate a synthetic dataset. 

+

2 

+

3The generation is done by the :py:meth:`~lymph.models.Unilateral.draw_patients` method 

+

4of 

+

5the `lymph`_ package, which is why this requires the specification of a model 

+

6via the :py:class:`~lyscripts.configs.ModelConfig` class. 

+

7 

+

8.. _lymph: https://lymph-model.readthedocs.io/ 

+

9""" 

+

10 

+

11import numpy as np 

+

12from loguru import logger 

+

13from lydata.utils import ModalityConfig 

+

14from pydantic import Field 

+

15 

+

16from lyscripts.cli import assemble_main 

+

17from lyscripts.configs import ( 

+

18 BaseCLI, 

+

19 DistributionConfig, 

+

20 GraphConfig, 

+

21 ModelConfig, 

+

22 add_distributions, 

+

23 add_modalities, 

+

24 construct_model, 

+

25) 

+

26from lyscripts.data.utils import save_table_to_csv 

+

27 

+

28 

+

29class GenerateCLI(BaseCLI): 

+

30 """Settings for the command-line interface.""" 

+

31 

+

32 graph: GraphConfig 

+

33 model: ModelConfig = ModelConfig() 

+

34 distributions: dict[str, DistributionConfig] = Field( 

+

35 default={}, 

+

36 description=( 

+

37 "Mapping of model T-categories to predefined distributions over " 

+

38 "diagnose times." 

+

39 ), 

+

40 ) 

+

41 t_stages_dist: dict[str, float] = Field( 

+

42 description=( 

+

43 "Specify what fraction of generated patients should come from the " 

+

44 "respective T-Stage." 

+

45 ), 

+

46 ) 

+

47 modalities: dict[str, ModalityConfig] 

+

48 params: dict[str, float] 

+

49 num_patients: int = 200 

+

50 output_file: str 

+

51 seed: int = 42 

+

52 

+

53 def model_post_init(self, __context) -> None: 

+

54 """Make sure distribution over T-stages is normalized.""" 

+

55 total = 0.0 

+

56 for t_stage in self.distributions: 

+

57 if t_stage not in self.t_stages_dist: 

+

58 raise ValueError(f"Missing distribution for T-stage {t_stage}.") 

+

59 

+

60 total += self.t_stages_dist[t_stage] 

+

61 

+

62 if not np.isclose(total, 1.0): 

+

63 raise ValueError("Sum of T-stage distributions must be 1.") 

+

64 

+

65 return super().model_post_init(__context) 

+

66 

+

67 def cli_cmd(self) -> None: 

+

68 """Run the ``generate`` command. 

+

69 

+

70 Here, the command constructs a model from the settings provided via the 

+

71 arguments. It then generates a synthetic dataset using the 

+

72 :py:meth:`~lymph.models.Unilateral.draw_patients` from the `lymph`_ package. 

+

73 

+

74 .. _lymph: https://lymph-model.readthedocs.io/ 

+

75 """ 

+

76 logger.debug(self.model_dump_json(indent=2)) 

+

77 

+

78 model = construct_model(self.model, self.graph) 

+

79 model = add_distributions(model, self.distributions) 

+

80 model = add_modalities(model, self.modalities) 

+

81 model.set_params(**self.params) 

+

82 logger.info(f"Set parameters: {model.get_params(as_dict=True)}") 

+

83 

+

84 synth_data = model.draw_patients( 

+

85 num=self.num_patients, 

+

86 stage_dist=list(self.t_stages_dist.values()), 

+

87 seed=self.seed, 

+

88 ) 

+

89 logger.info(f"Generated synthetic data with shape {synth_data.shape}") 

+

90 

+

91 save_table_to_csv(file_path=self.output_file, table=synth_data) 

+

92 

+

93 

+

94if __name__ == "__main__": 

+

95 main = assemble_main(settings_cls=GenerateCLI, prog_name="data generate") 

+

96 main() 

+
+ + + diff --git a/htmlcov/z_9b7bcb970ba14d6a_join_py.html b/htmlcov/z_9b7bcb970ba14d6a_join_py.html new file mode 100644 index 0000000..39e9975 --- /dev/null +++ b/htmlcov/z_9b7bcb970ba14d6a_join_py.html @@ -0,0 +1,179 @@ + + + + + Coverage for src/lyscripts/data/join.py: 55% + + + + + +
+
+

+ Coverage for src / lyscripts / data / join.py: + 55% +

+ +

+ 22 statements   + + + +

+

+ « prev     + ^ index     + » next +       + coverage.py v7.13.5, + created at 2026-04-08 14:17 +0000 +

+ +
+
+
+

1"""Join multiple lymphatic progression datasets into a single dataset.""" 

+

2 

+

3from pathlib import Path 

+

4 

+

5import pandas as pd 

+

6from lydata.validator import cast_dtypes 

+

7from pydantic import Field 

+

8 

+

9from lyscripts.cli import assemble_main 

+

10from lyscripts.configs import BaseCLI, DataConfig 

+

11from lyscripts.data.utils import save_table_to_csv 

+

12 

+

13 

+

14class JoinCLI(BaseCLI): 

+

15 """Join multiple lymphatic progression datasets into a single dataset.""" 

+

16 

+

17 inputs: list[DataConfig] = Field(description="The datasets to join.") 

+

18 output_file: Path = Field(description="The path to the output dataset.") 

+

19 

+

20 def cli_cmd(self) -> None: 

+

21 r"""Start the ``join`` subcommand. 

+

22 

+

23 This will load all datasets specified in the ``inputs`` attribute and 

+

24 concatenate them into a single dataset. 

+

25 

+

26 Unfortunately, the use of `pydantic`_ does make this particular command a 

+

27 little bit more complicated (but also more powerful): If one simply wants to 

+

28 concatenate multiple datasets on disk, the ``inputs`` should be provided like 

+

29 this: 

+

30 

+

31 .. code-block:: bash 

+

32 

+

33 lyscripts data join \ 

+

34 --inputs '{"source": "file1.csv"}' \ 

+

35 --inputs '{"source": "file2.csv"}' \ 

+

36 --output-file "joined.csv" 

+

37 

+

38 But it also allows for concatenating datasets fetched directly from the 

+

39 `lydata Github repo`_. Due to the rather complex command signature, we 

+

40 recommend defining what to concatenate using a YAML file: 

+

41 

+

42 .. code-block:: yaml 

+

43 

+

44 inputs: 

+

45 - data.year: 2021 

+

46 data.institution: "usz" 

+

47 data.subsite: "oropharynx" 

+

48 - data.year: 2021 

+

49 data.institution: "clb" 

+

50 data.subsite: "oropharynx" 

+

51 

+

52 Then, the command will look like this: 

+

53 

+

54 .. code-block:: bash 

+

55 

+

56 lyscripts data join --configs datasets.ly.yaml --output-file joined.csv 

+

57 

+

58 .. _pydantic: https://docs.pydantic.dev/latest/ 

+

59 .. _lydata Github repo: https://github.com/lycosystem/lydata 

+

60 """ 

+

61 joined = None 

+

62 

+

63 for data_config in self.inputs: 

+

64 data = data_config.load() 

+

65 # `cast_dtypes()` ensures that e.g. boolean values are not suddenly 

+

66 # converted to strings when a dataset with missing values is concatenated. 

+

67 data = cast_dtypes(data) 

+

68 if joined is None: 

+

69 joined = data 

+

70 else: 

+

71 joined = pd.concat( 

+

72 [joined, data], 

+

73 axis="index", 

+

74 ignore_index=True, 

+

75 ) 

+

76 

+

77 save_table_to_csv(file_path=self.output_file, table=joined) 

+

78 

+

79 

+

80if __name__ == "__main__": 

+

81 main = assemble_main(settings_cls=JoinCLI, prog_name="join") 

+

82 main() 

+
+ + + diff --git a/htmlcov/z_9b7bcb970ba14d6a_lyproxify_py.html b/htmlcov/z_9b7bcb970ba14d6a_lyproxify_py.html new file mode 100644 index 0000000..50c278d --- /dev/null +++ b/htmlcov/z_9b7bcb970ba14d6a_lyproxify_py.html @@ -0,0 +1,438 @@ + + + + + Coverage for src/lyscripts/data/lyproxify.py: 46% + + + + + +
+
+

+ Coverage for src / lyscripts / data / lyproxify.py: + 46% +

+ +

+ 123 statements   + + + +

+

+ « prev     + ^ index     + » next +       + coverage.py v7.13.5, + created at 2026-04-08 14:17 +0000 +

+ +
+
+
+

1"""Consumes raw data and transforms it into a CSV that `LyProX`_ understands. 

+

2 

+

3To do so, it needs a dictionary that defines a mapping from raw columns to the LyProX 

+

4style data format. See the documentation of the :py:func:`.transform_to_lyprox` function 

+

5for more information. 

+

6 

+

7.. _LyProX: https://lyprox.org 

+

8""" 

+

9 

+

10import importlib.util 

+

11import warnings 

+

12from pathlib import Path 

+

13from typing import Annotated, Any 

+

14 

+

15import lydata # noqa: F401 

+

16import pandas as pd 

+

17from loguru import logger 

+

18from lydata import C 

+

19from pydantic import AfterValidator, Field, FilePath 

+

20 

+

21from lyscripts.cli import assemble_main 

+

22from lyscripts.configs import BaseCLI 

+

23from lyscripts.data.utils import save_table_to_csv 

+

24from lyscripts.utils import delete_private_keys, flatten, load_patient_data 

+

25 

+

26warnings.simplefilter(action="ignore", category=FutureWarning) 

+

27 

+

28 

+

29def ensure_python_file(file: Path) -> Path: 

+

30 """Check if the file is a Python file.""" 

+

31 if file.suffix != ".py": 

+

32 raise ValueError("Mapping file must be a Python file.") 

+

33 

+

34 return file 

+

35 

+

36 

+

37def ensure_column_map(file: Path) -> Path: 

+

38 """Ensure the Python file contains a ``COLUMN_MAP`` dictionary.""" 

+

39 spec = importlib.util.spec_from_file_location("map_module", file) 

+

40 mapping = importlib.util.module_from_spec(spec) 

+

41 spec.loader.exec_module(mapping) 

+

42 

+

43 if not hasattr(mapping, "COLUMN_MAP"): 

+

44 raise ValueError("Mapping file must contain a `COLUMN_MAP` dictionary.") 

+

45 

+

46 return file 

+

47 

+

48 

+

49class LyproxifyCLI(BaseCLI): 

+

50 """Map any CSV file to the LyProX format with the help of a Python mapping dict.""" 

+

51 

+

52 input_file: FilePath = Field(description="Location of raw CSV data.") 

+

53 num_header_rows: int = Field( 

+

54 default=1, 

+

55 description="Number of rows comprising the header of the raw CSV file.", 

+

56 ) 

+

57 mapping_file: Annotated[ 

+

58 FilePath, 

+

59 AfterValidator(ensure_python_file), 

+

60 AfterValidator(ensure_column_map), 

+

61 ] = Field( 

+

62 description=( 

+

63 "Location of Python file containing a `COLUMN_MAP` dictionary. It may also " 

+

64 "contain an `EXCLUDE` list of tuples `(column, check)` to exclude patients." 

+

65 ), 

+

66 ) 

+

67 drop_rows: list[int] = Field( 

+

68 default=[], 

+

69 description=( 

+

70 "Delete rows of specified indices. Counting of rows start at 0 _after_ " 

+

71 "the `header-rows`." 

+

72 ), 

+

73 ) 

+

74 drop_cols: list[int] = Field( 

+

75 default=[], 

+

76 description="Delete columns of specified indices.", 

+

77 ) 

+

78 output_file: Path = Field(description="Location to store the lyproxified CSV file.") 

+

79 

+

80 def cli_cmd(self) -> None: 

+

81 """Start the ``lyproxify`` subcommand. 

+

82 

+

83 After reading in the specified file, it will first ``drop_rows`` and 

+

84 ``drop_cols``, as specified in the command line arguments. Then, it will 

+

85 call :py:func:`.exclude_patients` which will further remove patients based 

+

86 on the ``EXCLUDE`` object in the ``mapping_file``. Finally, it will call 

+

87 :py:func:`.transform_to_lyprox` to transform the data into the LyProX format 

+

88 given the ``COLUMN_MAP`` object in the ``mapping_file``. 

+

89 """ 

+

90 logger.debug(self.model_dump_json(indent=2)) 

+

91 

+

92 raw = load_patient_data( 

+

93 file_path=self.input_file, 

+

94 header=list(range(self.num_header_rows)), 

+

95 ) 

+

96 raw = clean_header( 

+

97 table=raw, 

+

98 num_cols=raw.shape[1], 

+

99 num_header_rows=self.num_header_rows, 

+

100 ) 

+

101 

+

102 cols_to_drop = raw.columns[self.drop_cols] 

+

103 trimmed = raw.drop(cols_to_drop, axis="columns") 

+

104 trimmed = trimmed.drop(index=self.drop_rows) 

+

105 trimmed = trimmed.dropna(axis="index", how="all") 

+

106 logger.info(f"Dropped rows {self.drop_rows} and columns {cols_to_drop}.") 

+

107 

+

108 spec = importlib.util.spec_from_file_location("map_module", self.mapping_file) 

+

109 mapping = importlib.util.module_from_spec(spec) 

+

110 spec.loader.exec_module(mapping) 

+

111 logger.info(f"Imported mapping instructions from {self.mapping_file}") 

+

112 

+

113 reduced = exclude_patients(trimmed, mapping.EXCLUDE) 

+

114 processed = transform_to_lyprox(reduced, mapping.COLUMN_MAP) 

+

115 

+

116 if "side" in processed.ly: 

+

117 processed = leftright_to_ipsicontra(processed) 

+

118 

+

119 save_table_to_csv(file_path=self.output_file, table=processed) 

+

120 

+

121 

+

122class ParsingError(Exception): 

+

123 """Error while parsing the CSV file.""" 

+

124 

+

125 

+

126def clean_header( 

+

127 table: pd.DataFrame, 

+

128 num_cols: int, 

+

129 num_header_rows: int, 

+

130) -> pd.DataFrame: 

+

131 """Rename the header cells in the ``table``.""" 

+

132 table = table.copy() 

+

133 

+

134 for col in range(num_cols): 

+

135 for row in range(num_header_rows): 

+

136 table.rename( 

+

137 columns={f"Unnamed: {col}_level_{row}": f"{col}_lvl_{row}"}, 

+

138 inplace=True, 

+

139 ) 

+

140 

+

141 logger.debug("Cleaned headers of the raw data.") 

+

142 return table 

+

143 

+

144 

+

145def get_instruction_depth(nested_column_map: dict[tuple, dict[str, Any]]) -> int: 

+

146 """Get the depth at which the column mapping instructions are nested. 

+

147 

+

148 Instructions are a dictionary that contains either a 'func' or 'default' key. 

+

149 

+

150 >>> nested_column_map = {"patient": {"age": {"func": int}}} 

+

151 >>> get_instruction_depth(nested_column_map) 

+

152 2 

+

153 >>> flat_column_map = flatten(nested_column_map, max_depth=2) 

+

154 >>> get_instruction_depth(flat_column_map) 

+

155 1 

+

156 >>> nested_column_map = {"patient": {"__doc__": "some patient info", "age": 61}} 

+

157 >>> get_instruction_depth(nested_column_map) 

+

158 Traceback (most recent call last): 

+

159 ... 

+

160 ValueError: Leaf of column map must be a dictionary with 'func' or 'default' key. 

+

161 """ 

+

162 for _, value in nested_column_map.items(): 

+

163 if isinstance(value, dict): 

+

164 if "func" in value or "default" in value: 

+

165 return 1 

+

166 

+

167 return 1 + get_instruction_depth(value) 

+

168 

+

169 raise ValueError( 

+

170 "Leaf of column map must be a dictionary with 'func' or 'default' key.", 

+

171 ) 

+

172 

+

173 raise ValueError("Empty column map.") 

+

174 

+

175 

+

176def generate_markdown_docs( 

+

177 nested_column_map: dict[tuple, dict[str, Any]], 

+

178 depth: int = 0, 

+

179 indent_len: int = 4, 

+

180) -> str: 

+

181 r"""Generate a markdown nested, ordered list as documentation for the column map. 

+

182 

+

183 A key in the doctionary is supposed to be documented, when its value is a dictionary 

+

184 containing a ``"__doc__"`` key. 

+

185 

+

186 >>> nested_column_map = { 

+

187 ... "patient": { 

+

188 ... "__doc__": "some patient info", 

+

189 ... "age": { 

+

190 ... "__doc__": "age of the patient", 

+

191 ... "func": int, 

+

192 ... "columns": ["age"], 

+

193 ... }, 

+

194 ... }, 

+

195 ... } 

+

196 >>> generate_markdown_docs(nested_column_map) 

+

197 '1. **`patient:`** some patient info\n 1. **`age:`** age of the patient\n' 

+

198 """ 

+

199 md_docs = "" 

+

200 indent = " " * indent_len * depth 

+

201 i = 1 

+

202 for key, value in nested_column_map.items(): 

+

203 if isinstance(value, dict): 

+

204 if "__doc__" in value: 

+

205 md_docs += f"{indent}{i}. **`{key}:`** {value['__doc__']}\n" 

+

206 i += 1 

+

207 

+

208 md_docs += generate_markdown_docs(value, depth + 1, indent_len) 

+

209 

+

210 return md_docs 

+

211 

+

212 

+

213def transform_to_lyprox( 

+

214 raw: pd.DataFrame, 

+

215 column_map: dict[tuple, dict[str, Any]], 

+

216) -> pd.DataFrame: 

+

217 """Transform ``raw`` data into table that can be uploaded directly to `LyProX`_. 

+

218 

+

219 To do so, it uses instructions in the `colum_map` dictionary, that needs to have 

+

220 a particular structure: 

+

221 

+

222 For each column in the final 'lyproxified' `pd.DataFrame`, one entry must exist in 

+

223 the `column_map` dictionary. E.g., for the column corresponding to a patient's age, 

+

224 the dictionary should contain a key-value pair of this shape: 

+

225 

+

226 .. code-block:: python 

+

227 

+

228 column_map = { 

+

229 ("patient", "core", "age"): { 

+

230 "func": compute_age_from_raw, 

+

231 "kwargs": {"randomize": False}, 

+

232 "columns": ["birthday", "date of diagnosis"] 

+

233 }, 

+

234 } 

+

235 

+

236 In this example, the function ``compute_age_from_raw`` is called with the 

+

237 values of the columns ``"birthday"`` and ``"date of diagnosis"`` as positional 

+

238 arguments, and the keyword argument ``"randomize"`` is set to ``False``. The 

+

239 function then returns the patient's age, which is subsequently stored in the column 

+

240 ``("patient", "core", "age")``. 

+

241 

+

242 Note that the ``column_map`` dictionary must have either a ``"default"`` key or 

+

243 ``"func"`` along with ``"columns"`` and ``"kwargs"``, depending on the function 

+

244 definition. If the function does not take any arguments, ``"columns"`` can be 

+

245 omitted. If it also does not take any keyword arguments, ``"kwargs"`` can be 

+

246 omitted, too. 

+

247 

+

248 .. _LyProX: https://lyprox.org 

+

249 """ 

+

250 column_map = delete_private_keys(column_map) 

+

251 

+

252 if (instruction_depth := get_instruction_depth(column_map)) > 1: 

+

253 column_map = flatten(column_map, max_depth=instruction_depth) 

+

254 

+

255 multi_idx = pd.MultiIndex.from_tuples(column_map.keys()) 

+

256 processed = pd.DataFrame(columns=multi_idx) 

+

257 

+

258 for multi_idx_col, instruction in column_map.items(): 

+

259 if instruction != "": 

+

260 if "default" in instruction: 

+

261 processed[multi_idx_col] = [instruction["default"]] * len(raw) 

+

262 elif "func" in instruction: 

+

263 cols = instruction.get("columns", []) 

+

264 kwargs = instruction.get("kwargs", {}) 

+

265 func = instruction["func"] 

+

266 

+

267 try: 

+

268 processed[multi_idx_col] = [ 

+

269 func(*vals, **kwargs) for vals in raw[cols].values 

+

270 ] 

+

271 except Exception as exc: 

+

272 raise ParsingError( 

+

273 f"Exception encountered while parsing column {multi_idx_col}", 

+

274 ) from exc 

+

275 else: 

+

276 raise ParsingError( 

+

277 f"Column {multi_idx_col} has neither a `default` value nor `func` " 

+

278 "describing how to fill this column.", 

+

279 ) 

+

280 

+

281 logger.info("Transformed raw data to LyProX format.") 

+

282 return processed 

+

283 

+

284 

+

285def leftright_to_ipsicontra(data: pd.DataFrame): 

+

286 """Change absolute side reporting to tumor-relative. 

+

287 

+

288 Transform reporting of LNL involvement by absolute side (right & left) to a 

+

289 reporting relative to the tumor (ipsi- & contralateral). The table ``data`` should 

+

290 already be in the format LyProX requires, except for the side-reporting of LNL 

+

291 involvement. 

+

292 """ 

+

293 len_before = len(data) 

+

294 left_data = data.ly.query(C("side") != "right") 

+

295 right_data = data.ly.query(C("side") == "right") 

+

296 

+

297 left_data = left_data.rename(columns={"left": "ipsi"}, level=1) 

+

298 left_data = left_data.rename(columns={"right": "contra"}, level=1) 

+

299 right_data = right_data.rename(columns={"left": "contra"}, level=1) 

+

300 right_data = right_data.rename(columns={"right": "ipsi"}, level=1) 

+

301 

+

302 data = pd.concat([left_data, right_data], ignore_index=True) 

+

303 if len_before != len(data): 

+

304 raise RuntimeError("Number of patients changed") 

+

305 

+

306 logger.info("Transformed side reporting to ipsi- and contralateral.") 

+

307 return data 

+

308 

+

309 

+

310def exclude_patients(raw: pd.DataFrame, exclude: list[tuple[str, Any]]): 

+

311 """Exclude patients in the ``raw`` data based on a list of what to ``exclude``. 

+

312 

+

313 The ``exclude`` list contains tuples ``(column, check)``. The ``check`` function 

+

314 will then exclude any patients from the cohort where ``check(raw[column])`` 

+

315 evaluates to ``True``. 

+

316 

+

317 >>> exclude = [("age", lambda s: s > 50)] 

+

318 >>> table = pd.DataFrame({ 

+

319 ... "age": [43, 82, 18, 67], 

+

320 ... "T-category": [ 3, 4, 2, 1], 

+

321 ... }) 

+

322 >>> exclude_patients(table, exclude) 

+

323 age T-category 

+

324 0 43 3 

+

325 2 18 2 

+

326 """ 

+

327 num_before = len(raw) 

+

328 filtered = raw.copy() 

+

329 

+

330 for column, check in exclude: 

+

331 is_excluded = check(filtered[column]) 

+

332 filtered = filtered.loc[~is_excluded] 

+

333 

+

334 num_after = len(filtered) 

+

335 logger.info(f"Excluded {num_before - num_after} patients.") 

+

336 return filtered 

+

337 

+

338 

+

339if __name__ == "__main__": 

+

340 main = assemble_main(settings_cls=LyproxifyCLI, prog_name="lyproxify") 

+

341 main() 

+
+ + + diff --git a/htmlcov/z_9b7bcb970ba14d6a_split_py.html b/htmlcov/z_9b7bcb970ba14d6a_split_py.html new file mode 100644 index 0000000..583f3b8 --- /dev/null +++ b/htmlcov/z_9b7bcb970ba14d6a_split_py.html @@ -0,0 +1,170 @@ + + + + + Coverage for src/lyscripts/data/split.py: 52% + + + + + +
+
+

+ Coverage for src / lyscripts / data / split.py: + 52% +

+ +

+ 29 statements   + + + +

+

+ « prev     + ^ index     + » next +       + coverage.py v7.13.5, + created at 2026-04-08 14:17 +0000 +

+ +
+
+
+

1"""Split a dataset into cross-validation folds based on params.yaml file.""" 

+

2 

+

3import warnings 

+

4from pathlib import Path 

+

5 

+

6import numpy as np 

+

7import pandas as pd 

+

8from loguru import logger 

+

9from pydantic import Field 

+

10 

+

11from lyscripts.cli import assemble_main 

+

12from lyscripts.configs import BaseCLI, CrossValidationConfig, DataConfig 

+

13from lyscripts.data.utils import save_table_to_csv 

+

14 

+

15warnings.simplefilter(action="ignore", category=FutureWarning) 

+

16 

+

17 

+

18class SplitCLI(BaseCLI): 

+

19 """Split a dataset into cross-validation folds.""" 

+

20 

+

21 input: DataConfig 

+

22 cross_validation: CrossValidationConfig = CrossValidationConfig() 

+

23 output_dir: Path = Field(description="The folder to store the split CSV files in.") 

+

24 

+

25 def cli_cmd(self) -> None: 

+

26 """Run the ``split`` subcommand. 

+

27 

+

28 This will load the dataset specified in the ``input`` argument and split it 

+

29 into the number of folds specified in the ``cross_validation`` argument. The 

+

30 resulting splits will be stored in the folder specified in the ``output_dir`` 

+

31 argument. 

+

32 """ 

+

33 logger.debug(self.model_dump_json(indent=2)) 

+

34 

+

35 self.output_dir.mkdir(parents=True, exist_ok=True) 

+

36 logger.info(f"Ensure output directory {self.output_dir} exists") 

+

37 

+

38 data = self.input.load() 

+

39 

+

40 shuffled_data = data.sample( 

+

41 frac=1.0, 

+

42 replace=False, 

+

43 random_state=self.cross_validation.seed, 

+

44 ).reset_index(drop=True) 

+

45 

+

46 split_datas = np.array_split( 

+

47 ary=shuffled_data, 

+

48 indices_or_sections=self.cross_validation.folds, 

+

49 ) 

+

50 for fold in range(self.cross_validation.folds): 

+

51 _train_datas = [ 

+

52 split_datas[i] for i in range(self.cross_validation.folds) if i != fold 

+

53 ] 

+

54 train_data = pd.concat( 

+

55 objs=_train_datas, 

+

56 axis="index", 

+

57 ignore_index=True, 

+

58 ) 

+

59 eval_data = split_datas[fold] 

+

60 

+

61 save_table_to_csv( 

+

62 file_path=self.output_dir / f"{fold}_train.csv", 

+

63 table=train_data, 

+

64 ) 

+

65 save_table_to_csv( 

+

66 file_path=self.output_dir / f"{fold}_eval.csv", 

+

67 table=eval_data, 

+

68 ) 

+

69 

+

70 

+

71if __name__ == "__main__": 

+

72 main = assemble_main(settings_cls=SplitCLI, prog_name="split") 

+

73 main() 

+
+ + + diff --git a/htmlcov/z_9b7bcb970ba14d6a_utils_py.html b/htmlcov/z_9b7bcb970ba14d6a_utils_py.html new file mode 100644 index 0000000..4a73044 --- /dev/null +++ b/htmlcov/z_9b7bcb970ba14d6a_utils_py.html @@ -0,0 +1,113 @@ + + + + + Coverage for src/lyscripts/data/utils.py: 100% + + + + + +
+
+

+ Coverage for src / lyscripts / data / utils.py: + 100% +

+ +

+ 9 statements   + + + +

+

+ « prev     + ^ index     + » next +       + coverage.py v7.13.5, + created at 2026-04-08 14:17 +0000 +

+ +
+
+
+

1"""Utilities related to the commands for data cleaning and processing.""" 

+

2 

+

3from pathlib import Path 

+

4 

+

5import pandas as pd 

+

6from loguru import logger 

+

7 

+

8from lyscripts.decorators import check_output_dir_exists 

+

9 

+

10 

+

11@check_output_dir_exists 

+

12def save_table_to_csv(file_path: Path, table: pd.DataFrame): 

+

13 """Save a ``table`` to ``output_path``.""" 

+

14 shape = table.shape 

+

15 logger.info(f"Saving table with {shape=} to {file_path.resolve()}") 

+

16 table.to_csv(file_path, index=None) 

+
+ + + diff --git a/htmlcov/z_f60392fe1c3f3e73___init___py.html b/htmlcov/z_f60392fe1c3f3e73___init___py.html new file mode 100644 index 0000000..e18c170 --- /dev/null +++ b/htmlcov/z_f60392fe1c3f3e73___init___py.html @@ -0,0 +1,247 @@ + + + + + Coverage for src/lyscripts/data/collect/__init__.py: 53% + + + + + +
+
+

+ Coverage for src / lyscripts / data / collect / __init__.py: + 53% +

+ +

+ 58 statements   + + + +

+

+ « prev     + ^ index     + » next +       + coverage.py v7.13.5, + created at 2026-04-08 14:17 +0000 +

+ +
+
+
+

1"""Submodule to collect data interactively using a simple web interface. 

+

2 

+

3With the simple command 

+

4 

+

5.. code-block:: bash 

+

6 

+

7 lyscripts data collect 

+

8 

+

9One can start a very basic web server that serves an interactive UI at 

+

10``http://localhost:8000/``. There, one can enter patient, tumor, and lymphatic 

+

11involvement data one by one. When completed, the "submit" button will parse, validate, 

+

12and convert the data to serve a downloadable CSV file. 

+

13 

+

14The resulting CSV file is in the correct format to be used in `LyProX`_ and for 

+

15inference using our `lymph-model`_ library. 

+

16 

+

17.. _LyProX: https://lyprox.org 

+

18.. _lymph-model: https://lymph-model.readthedocs.io 

+

19""" 

+

20 

+

21import io 

+

22import logging 

+

23from pathlib import Path 

+

24from typing import Any 

+

25 

+

26import lydata 

+

27import lydata.validator 

+

28import pandas as pd 

+

29from fastapi import FastAPI, HTTPException 

+

30from fastapi.responses import StreamingResponse 

+

31from loguru import logger 

+

32from pydantic import Field, RootModel 

+

33from starlette.responses import FileResponse, HTMLResponse 

+

34 

+

35from lyscripts.cli import InterceptHandler, _current_log_level 

+

36from lyscripts.configs import BaseCLI 

+

37 

+

38app = FastAPI( 

+

39 title="lyDATA Collector", 

+

40 description=( 

+

41 "A simple web interface to collect data for the lyDATA datasets. " 

+

42 "This is a prototype and not intended for production use." 

+

43 ), 

+

44 version=lydata.__version__, 

+

45) 

+

46 

+

47BASE_DIR = Path(__file__).parent 

+

48modalities = lydata.schema.get_default_modalities() 

+

49RecordModel = lydata.schema.create_full_record_model(modalities, model_name="Record") 

+

50ROOT_MODEL = RootModel[list[RecordModel]] 

+

51 

+

52 

+

53@app.get("/") 

+

54def serve_index_html() -> HTMLResponse: 

+

55 """Serve the ``index.html`` file at the URL's root.""" 

+

56 with open(BASE_DIR / "index.html") as file: 

+

57 content = file.read() 

+

58 return HTMLResponse(content=content) 

+

59 

+

60 

+

61@app.get("/schema") 

+

62def serve_schema() -> dict[str, Any]: 

+

63 """Serve the JSON schema for the patient and tumor records.""" 

+

64 return ROOT_MODEL.model_json_schema() 

+

65 

+

66 

+

67@app.get("/collector.js") 

+

68def serve_collector_js() -> FileResponse: 

+

69 """Serve the ``collector.js`` file under ``"http://{host}:{port}/collector.js"``. 

+

70 

+

71 This frontend JavaScript file loads the `JSON-Editor`_ library and initializes it 

+

72 using the schema returned by the :py:func:`serve_schema` function. 

+

73 

+

74 .. _JSON-Editor: https://github.com/json-editor/json-editor/ 

+

75 """ 

+

76 return FileResponse(BASE_DIR / "collector.js") 

+

77 

+

78 

+

79@app.post("/submit") 

+

80async def process(data: RootModel) -> StreamingResponse: 

+

81 """Process the submitted data to a DataFrame. 

+

82 

+

83 `FastAPI`_ will automatically parse the received JSON data into the list of 

+

84 instances of he pydantic type defined by the 

+

85 :py:func:`lydata.schema.create_full_record_model` function. 

+

86 

+

87 From this list, we create a pandas DataFrame and return it as a downloadable CSV 

+

88 file. 

+

89 

+

90 .. _FastAPI: https://fastapi.tiangolo.com/ 

+

91 """ 

+

92 logger.info(f"Received data: {data.root}") 

+

93 

+

94 if len(data.root) == 0: 

+

95 logger.warning("No records provided in the data.") 

+

96 raise HTTPException( 

+

97 status_code=400, 

+

98 detail="No records provided in the data.", 

+

99 ) 

+

100 

+

101 flattened_records = [] 

+

102 

+

103 for record in data.root: 

+

104 flattened_record = lydata.validator.flatten(record) 

+

105 logger.debug(f"Flattened record: {flattened_record}") 

+

106 flattened_records.append(flattened_record) 

+

107 

+

108 df = pd.DataFrame(flattened_records) 

+

109 df.columns = pd.MultiIndex.from_tuples(flattened_record.keys()) 

+

110 logger.info(df.patient.core.head()) 

+

111 

+

112 buffer = io.StringIO() 

+

113 df.to_csv(buffer, index=False) 

+

114 buffer.seek(0) 

+

115 logger.success("Data prepared for download") 

+

116 return StreamingResponse( 

+

117 buffer, 

+

118 media_type="text/csv", 

+

119 headers={"Content-Disposition": "attachment; filename=lydata_records.csv"}, 

+

120 ) 

+

121 

+

122 

+

123class CollectorCLI(BaseCLI): 

+

124 """Serve a FastAPI web app for collecting involvement patterns as CSV files.""" 

+

125 

+

126 hostname: str = Field( 

+

127 default="localhost", 

+

128 description="Hostname to run the FastAPI app on.", 

+

129 ) 

+

130 port: int = Field( 

+

131 default=8000, 

+

132 description="Port to run the FastAPI app on.", 

+

133 ) 

+

134 

+

135 def cli_cmd(self) -> None: 

+

136 """Run the FastAPI app.""" 

+

137 logger.debug(self.model_dump_json(indent=2)) 

+

138 import uvicorn 

+

139 

+

140 # Intercept standard logging and redirect it to Loguru 

+

141 logging.basicConfig(handlers=[InterceptHandler()], level=0, force=True) 

+

142 logger.enable("lydata") 

+

143 

+

144 uvicorn.run( 

+

145 app, 

+

146 host=self.hostname, 

+

147 port=self.port, 

+

148 log_level=_current_log_level.lower(), 

+

149 log_config=None, 

+

150 ) 

+
+ + + diff --git a/pyproject.toml b/pyproject.toml deleted file mode 100644 index 75401c0..0000000 --- a/pyproject.toml +++ /dev/null @@ -1,210 +0,0 @@ -[build-system] -requires = [ - "setuptools >= 61", - "setuptools_scm", - "wheel" -] -build-backend = "setuptools.build_meta" - -[project] -name = "lyscripts" -description = "Package to interact with lymphatic progression data and models." -authors = [ - {name = "Roman Ludwig", email = "gygqdstu3@mozmail.com"}, - {name = "Yoel Pรฉrez Haas", email = "yoel.perezhaas@usz.ch"}, - {name = "Noemi Bรผhrer", email = "noemi.buehrer@usz.ch"}, -] -readme = "README.md" -requires-python = ">=3.10" -keywords = ["scripts", "lymph", "inference"] -license = {text = "MIT"} -classifiers = [ - "Development Status :: 4 - Beta", - "Intended Audience :: Developers", - "Intended Audience :: Science/Research", - "License :: OSI Approved :: MIT License", - "Natural Language :: English", - "Programming Language :: Python", - "Programming Language :: Python :: 3", - "Programming Language :: Python :: 3.10", - "Programming Language :: Python :: 3.11", - "Programming Language :: Python :: 3.12", - "Programming Language :: Python :: 3.13", - "Topic :: Scientific/Engineering", - "Topic :: Software Development :: Libraries", -] -dependencies = [ - "numpy", - "pandas", - "emcee", - "h5py", - "tables", - "matplotlib", - "corner", - "scipy", - "xlrd", - "rich", - "rich-argparse", - "pyyaml", - "lymph-model >= 1.3.3", - "deprecated", - "joblib", - "pydantic", - "pydantic-settings >= 2.7.0, != 2.9.1, != 2.9.0", - "numpydantic", - "loguru", - "fastapi", - "uvicorn", - "lydata >= 0.4.0", -] -dynamic = ["version"] - -[project.urls] -source = "https://github.com/lycosystem/lyscripts" -documentation = "https://lyscripts.readthedocs.io" - -[project.optional-dependencies] -docs = [ - "sphinx", - "sphinx-book-theme", - "sphinx-autodoc-typehints", - "sphinxcontrib-programoutput", - "myst_parser", - "autodoc_pydantic", -] -tests = [ - "pytest", - "pytest-cov", - "pytest-mpl", -] -dev = [ - "pre-commit", - "git-cliff", -] - -[project.scripts] -lyscripts = "lyscripts:main" - -[tool.setuptools] -include-package-data = true - -[tool.setuptools.packages.find] -where = ["src"] - -[tool.setuptools.package-data] -"lyscripts" = [ - "src/lyscripts/data/collect/collector.js", - "src/lyscripts/data/collect/index.html", -] - -[tool.setuptools_scm] -write_to = "src/lyscripts/_version.py" -local_scheme = "no-local-version" - -[tool.pytest.ini_options] -testpaths = "." - -[tool.ruff.lint] -select = ["E", "F", "W", "B", "C", "R", "U", "D", "I", "S", "T", "A", "N", "COM", "FURB", "NPY", "UP"] -ignore = ["D409"] - -[tool.ruff.lint.per-file-ignores] -"__init__.py" = ["E402"] -"{tests,docs}/*" = [ - "D103", - "E402", - "S101", - "S603", - "S607", -] - -[tool.coverage.paths] -source = [ - "src/", - "**/site-packages/", -] - -[tool.coverage.run] -relative_files = true - -# git-cliff ~ default configuration file -# https://git-cliff.org/docs/configuration -# -# Lines starting with "#" are comments. -# Configuration options are organized into tables and keys. -# See documentation for more information on available options. - -[tool.git-cliff.changelog] -# changelog header -header = """ -# Changelog\n -All notable changes to this project will be documented in this file.\n -""" -# template for the changelog body -# https://tera.netlify.app/docs -body = """ -{% if version %}\ - ## [{{ version | trim_start_matches(pat="v") }}] - {{ timestamp | date(format="%Y-%m-%d") }} -{% else %}\ - ## [unreleased] -{% endif %}\ -{% for group, commits in commits | group_by(attribute="group") %} - ### {{ group | upper_first }} - {% for c in commits %} - - {% if c.breaking %}โš  **BREAKING** {% endif -%} - {% if c.scope %}(**{{ c.scope }}**) {% endif -%} - {{ c.message | upper_first }}. - {%- if c.footers %}{% for f in c.footers %}{% if not f.breaking %} {{ f.token }} [{{ f.value }}].{% endif %}{% endfor %}{% endif %} - {%- if c.body %}\\ - {{ c.body | indent(prefix=" ", first=True) }} - {% endif -%} - {% endfor %} -{% endfor %}\n -""" -# remove the leading and trailing whitespace from the template -trim = true -# changelog footer -footer = """ - -""" - -[tool.git-cliff.git] -# parse the commits based on https://www.conventionalcommits.org -conventional_commits = true -# filter out the commits that are not conventional -filter_unconventional = true -# process each line of a commit as an individual commit -split_commits = false -# regex for preprocessing the commit messages -commit_preprocessors = [ - # { pattern = '\((\w+\s)?#([0-9]+)\)', replace = "([#${2}](https://github.com/orhun/git-cliff/issues/${2}))"}, # replace issue numbers -] -# regex for parsing and grouping commits -commit_parsers = [ - { message = "^feat", group = "Features" }, - { message = "^fix", group = "Bug Fixes" }, - { message = "^docs", group = "Documentation" }, - { message = "^perf", group = "Performance" }, - { message = "^refactor", group = "Refactor" }, - { message = "^style", group = "Styling" }, - { message = "^test", group = "Testing" }, - { message = "^chore\\(release\\): prepare for", skip = true }, - { message = "^chore", group = "Miscellaneous Tasks" }, - { body = ".*security", group = "Security" }, -] -# protect breaking changes from being skipped due to matching a skipping commit_parser -protect_breaking_commits = false -# filter out the commits that are not matched by commit parsers -filter_commits = false -# glob pattern for matching git tags -tag_pattern = "[0-9]*" -# regex for skipping tags -skip_tags = "v0.1.0-beta.1" -# regex for ignoring tags -ignore_tags = "" -# sort the tags topologically -topo_order = false -# limit the number of commits included in the changelog. -# limit_commits = 42 -# sort the commits inside sections by oldest/newest order -sort_commits = "oldest" diff --git a/schemas/ly.json b/schemas/ly.json deleted file mode 100644 index 561127c..0000000 --- a/schemas/ly.json +++ /dev/null @@ -1,786 +0,0 @@ -{ - "$defs": { - "CrossValidationConfig": { - "description": "Configs for splitting a dataset into cross-validation folds.", - "properties": { - "seed": { - "default": 42, - "description": "Seed for the random number generator.", - "title": "Seed", - "type": "integer" - }, - "folds": { - "default": 5, - "description": "Number of folds to split the dataset into.", - "title": "Folds", - "type": "integer" - } - }, - "title": "CrossValidationConfig", - "type": "object" - }, - "DataConfig": { - "description": "Where to load lymphatic progression data from and how to feed it into a model.", - "properties": { - "source": { - "anyOf": [ - { - "format": "file-path", - "type": "string" - }, - { - "$ref": "#/$defs/LyDataset" - } - ], - "description": "Either a path to a CSV file or a config that specifies how and where to fetch the data from.", - "title": "Source" - }, - "side": { - "anyOf": [ - { - "enum": [ - "ipsi", - "contra" - ], - "type": "string" - }, - { - "type": "null" - } - ], - "default": null, - "description": "Side of the neck to load data for. Only for Unilateral models.", - "title": "Side" - }, - "mapping": { - "additionalProperties": { - "anyOf": [ - { - "type": "integer" - }, - { - "type": "string" - } - ] - }, - "description": "Optional mapping of numeric T-stages to model T-stages.", - "title": "Mapping", - "type": "object" - } - }, - "required": [ - "source" - ], - "title": "DataConfig", - "type": "object" - }, - "DiagnosisConfig": { - "description": "Defines an ipsi- and contralateral diagnosis pattern.", - "properties": { - "ipsi": { - "additionalProperties": { - "additionalProperties": { - "anyOf": [ - { - "enum": [ - false, - 0, - "healthy", - true, - 1, - "involved", - "micro", - "macro", - "notmacro" - ] - }, - { - "type": "null" - } - ] - }, - "type": "object" - }, - "default": {}, - "description": "Observed diagnoses by different modalities on the ipsi neck.", - "examples": [ - { - "CT": { - "II": true, - "III": false - } - } - ], - "title": "Ipsi", - "type": "object" - }, - "contra": { - "additionalProperties": { - "additionalProperties": { - "anyOf": [ - { - "enum": [ - false, - 0, - "healthy", - true, - 1, - "involved", - "micro", - "macro", - "notmacro" - ] - }, - { - "type": "null" - } - ] - }, - "type": "object" - }, - "default": {}, - "description": "Observed diagnoses by different modalities on the contra neck.", - "title": "Contra", - "type": "object" - } - }, - "title": "DiagnosisConfig", - "type": "object" - }, - "DistributionConfig": { - "description": "Configuration defining a distribution over diagnose times.", - "properties": { - "kind": { - "default": "frozen", - "description": "Parametric distributions may be updated.", - "enum": [ - "frozen", - "parametric" - ], - "title": "Kind", - "type": "string" - }, - "func": { - "const": "binomial", - "default": "binomial", - "description": "Name of predefined function to use as distribution.", - "title": "Func", - "type": "string" - }, - "params": { - "additionalProperties": { - "anyOf": [ - { - "type": "integer" - }, - { - "type": "number" - } - ] - }, - "default": {}, - "description": "Parameters to pass to the predefined function.", - "title": "Params", - "type": "object" - } - }, - "title": "DistributionConfig", - "type": "object" - }, - "GraphConfig": { - "description": "Specifies how the tumor(s) and LNLs are connected in a DAG.", - "properties": { - "tumor": { - "additionalProperties": { - "items": { - "type": "string" - }, - "type": "array" - }, - "description": "Define the name of the tumor(s) and which LNLs it/they drain to.", - "title": "Tumor", - "type": "object" - }, - "lnl": { - "additionalProperties": { - "items": { - "type": "string" - }, - "type": "array" - }, - "description": "Define the name of the LNL(s) and which LNLs it/they drain to.", - "title": "Lnl", - "type": "object" - } - }, - "required": [ - "tumor", - "lnl" - ], - "title": "GraphConfig", - "type": "object" - }, - "InvolvementConfig": { - "description": "Config that defines an ipsi- and contralateral involvement pattern.", - "properties": { - "ipsi": { - "additionalProperties": { - "anyOf": [ - { - "enum": [ - false, - 0, - "healthy", - true, - 1, - "involved", - "micro", - "macro", - "notmacro" - ] - }, - { - "type": "null" - } - ] - }, - "default": {}, - "description": "Involvement pattern for the ipsilateral side of the neck.", - "examples": [ - { - "II": true, - "III": false - } - ], - "title": "Ipsi", - "type": "object" - }, - "contra": { - "additionalProperties": { - "anyOf": [ - { - "enum": [ - false, - 0, - "healthy", - true, - 1, - "involved", - "micro", - "macro", - "notmacro" - ] - }, - { - "type": "null" - } - ] - }, - "default": {}, - "description": "Involvement pattern for the contralateral side of the neck.", - "title": "Contra", - "type": "object" - } - }, - "title": "InvolvementConfig", - "type": "object" - }, - "LyDataset": { - "description": "Specification of a dataset.", - "properties": { - "year": { - "description": "Release year of dataset.", - "exclusiveMinimum": 0, - "maximum": 2025, - "title": "Year", - "type": "integer" - }, - "institution": { - "description": "Institution's short code. E.g., University Hospital Zurich: `usz`.", - "minLength": 1, - "title": "Institution", - "type": "string" - }, - "subsite": { - "description": "Tumor subsite(s) patients in this dataset were diagnosed with.", - "minLength": 1, - "title": "Subsite", - "type": "string" - }, - "repo_name": { - "anyOf": [ - { - "minLength": 1, - "type": "string" - }, - { - "type": "null" - } - ], - "default": "lycosystem/lydata", - "description": "GitHub `repository/owner`.", - "title": "Repo Name" - }, - "ref": { - "anyOf": [ - { - "minLength": 1, - "type": "string" - }, - { - "type": "null" - } - ], - "default": "main", - "description": "Branch/tag/commit of the repo.", - "title": "Ref" - }, - "local_dataset_dir": { - "anyOf": [ - { - "format": "directory-path", - "type": "string" - }, - { - "type": "null" - } - ], - "default": null, - "description": "Path to directory containing all the dataset subdirectories. So, e.g. if `path_on_disk` is `~/datasets` and the dataset is `2023-clb-multisite`, then the CSV file is expected to be at `~/datasets/2023-clb-multisite/data.csv`.", - "title": "Local Dataset Dir" - } - }, - "required": [ - "year", - "institution", - "subsite" - ], - "title": "LyDataset", - "type": "object" - }, - "ModalityConfig": { - "description": "Define a diagnostic or pathological modality.", - "properties": { - "spec": { - "description": "Specificity of the modality.", - "maximum": 1.0, - "minimum": 0.5, - "title": "Spec", - "type": "number" - }, - "sens": { - "description": "Sensitivity of the modality.", - "maximum": 1.0, - "minimum": 0.5, - "title": "Sens", - "type": "number" - }, - "kind": { - "default": "clinical", - "description": "Clinical modalities cannot detect microscopic disease.", - "enum": [ - "clinical", - "pathological" - ], - "title": "Kind", - "type": "string" - } - }, - "required": [ - "spec", - "sens" - ], - "title": "ModalityConfig", - "type": "object" - }, - "ModelConfig": { - "description": "Define which of the ``lymph`` models to use and how to set them up.", - "properties": { - "external_file": { - "anyOf": [ - { - "format": "file-path", - "type": "string" - }, - { - "type": "null" - } - ], - "default": null, - "description": "Path to a Python file that defines a model.", - "title": "External File" - }, - "class_name": { - "default": "Unilateral", - "description": "Name of the model class to use.", - "enum": [ - "Unilateral", - "Bilateral", - "Midline" - ], - "title": "Class Name", - "type": "string" - }, - "constructor": { - "default": "binary", - "description": "Trinary models differentiate btw. micro- and macroscopic disease.", - "enum": [ - "binary", - "trinary" - ], - "title": "Constructor", - "type": "string" - }, - "max_time": { - "default": 10, - "description": "Max. number of time-steps to evolve the model over.", - "title": "Max Time", - "type": "integer" - }, - "named_params": { - "default": null, - "description": "Subset of valid model parameters a sampler may provide in the form of a dictionary to the model instead of as an array. Or, after sampling, with this list, one may safely recover which parameter corresponds to which index in the sample.", - "items": { - "type": "string" - }, - "title": "Named Params", - "type": "array" - }, - "kwargs": { - "default": {}, - "description": "Additional keyword arguments to pass to the model constructor.", - "title": "Kwargs", - "type": "object" - } - }, - "title": "ModelConfig", - "type": "object" - }, - "SamplingConfig": { - "description": "Settings to configure the MCMC sampling.", - "properties": { - "storage_file": { - "description": "Path to HDF5 file store results or load last state.", - "format": "path", - "title": "Storage File", - "type": "string" - }, - "history_file": { - "anyOf": [ - { - "format": "path", - "type": "string" - }, - { - "type": "null" - } - ], - "default": null, - "description": "Path to store the burn-in metrics (as CSV file).", - "title": "History File" - }, - "dataset": { - "default": "mcmc", - "description": "Name of the dataset in the HDF5 file.", - "title": "Dataset", - "type": "string" - }, - "cores": { - "anyOf": [ - { - "exclusiveMinimum": 0, - "type": "integer" - }, - { - "type": "null" - } - ], - "default": 16, - "description": "Number of cores to use for parallel sampling. If `None`, no parallel processing is used.", - "title": "Cores" - }, - "seed": { - "default": 42, - "description": "Seed for the random number generator.", - "title": "Seed", - "type": "integer" - }, - "walkers_per_dim": { - "default": 20, - "description": "Number of walkers per parameter space dimension.", - "title": "Walkers Per Dim", - "type": "integer" - }, - "check_interval": { - "default": 50, - "description": "Check for convergence each time after this many steps.", - "title": "Check Interval", - "type": "integer" - }, - "trust_factor": { - "default": 50.0, - "description": "Trust the autocorrelation time only when it's smaller than this factor times the length of the chain.", - "title": "Trust Factor", - "type": "number" - }, - "relative_thresh": { - "default": 0.05, - "description": "Relative threshold for convergence.", - "title": "Relative Thresh", - "type": "number" - }, - "burnin_steps": { - "anyOf": [ - { - "type": "integer" - }, - { - "type": "null" - } - ], - "default": null, - "description": "Number of burn-in steps to take. If None, burn-in runs until convergence.", - "title": "Burnin Steps" - }, - "num_steps": { - "anyOf": [ - { - "type": "integer" - }, - { - "type": "null" - } - ], - "default": 100, - "description": "Number of steps to take in the MCMC sampling.", - "title": "Num Steps" - }, - "thin_by": { - "default": 10, - "description": "How many samples to draw before for saving one.", - "title": "Thin By", - "type": "integer" - }, - "inverse_temp": { - "default": 1.0, - "description": "Inverse temperature for thermodynamic integration. Note that this is not yet fully implemented.", - "title": "Inverse Temp", - "type": "number" - } - }, - "required": [ - "storage_file" - ], - "title": "SamplingConfig", - "type": "object" - }, - "ScenarioConfig": { - "description": "Define a scenario for which e.g. prevalences and risks may be computed.", - "properties": { - "t_stages": { - "description": "List of T-stages to marginalize over in the scenario.", - "examples": [ - [ - "early" - ], - [ - 3, - 4 - ] - ], - "items": { - "anyOf": [ - { - "type": "integer" - }, - { - "type": "string" - } - ] - }, - "title": "T Stages", - "type": "array" - }, - "t_stages_dist": { - "default": [ - 1.0 - ], - "description": "Distribution over T-stages to use for marginalization.", - "examples": [ - [ - 1.0 - ], - [ - 0.6, - 0.4 - ] - ], - "items": { - "type": "number" - }, - "title": "T Stages Dist", - "type": "array" - }, - "midext": { - "anyOf": [ - { - "type": "boolean" - }, - { - "type": "null" - } - ], - "default": null, - "description": "Whether the patient's tumor extends over the midline.", - "title": "Midext" - }, - "mode": { - "default": "HMM", - "description": "Which underlying model architecture to use.", - "enum": [ - "HMM", - "BN" - ], - "title": "Mode", - "type": "string" - }, - "involvement": { - "$ref": "#/$defs/InvolvementConfig", - "default": { - "ipsi": {}, - "contra": {} - } - }, - "diagnosis": { - "$ref": "#/$defs/DiagnosisConfig", - "default": { - "ipsi": {}, - "contra": {} - } - } - }, - "required": [ - "t_stages" - ], - "title": "ScenarioConfig", - "type": "object" - }, - "ScheduleConfig": { - "description": "Configuration for generating a schedule of inverse temperatures.", - "properties": { - "method": { - "default": "power", - "description": "Method to generate the inverse temperature schedule.", - "enum": [ - "geometric", - "linear", - "power" - ], - "title": "Method", - "type": "string" - }, - "num": { - "default": 32, - "description": "Number of inverse temperatures in the schedule.", - "title": "Num", - "type": "integer" - }, - "power": { - "default": 4.0, - "description": "If a power schedule is chosen, use this as power.", - "title": "Power", - "type": "number" - }, - "values": { - "anyOf": [ - { - "items": { - "type": "number" - }, - "type": "array" - }, - { - "type": "null" - } - ], - "default": null, - "description": "List of inverse temperatures to use instead of generating a schedule. If a list is provided, the other parameters are ignored.", - "title": "Values" - } - }, - "title": "ScheduleConfig", - "type": "object" - } - }, - "description": "Settings for generating a JSON schema for lyscripts configuration files.", - "properties": { - "version": { - "description": "For future compatibility reasons, every config file must have a `version: 1` field at the top level.", - "maximum": 1, - "minimum": 1, - "title": "Version", - "type": "integer" - }, - "cross_validation": { - "$ref": "#/$defs/CrossValidationConfig", - "default": null - }, - "data": { - "$ref": "#/$defs/DataConfig", - "default": null - }, - "diagnosis": { - "$ref": "#/$defs/DiagnosisConfig", - "default": null - }, - "distributions": { - "additionalProperties": { - "$ref": "#/$defs/DistributionConfig" - }, - "default": {}, - "title": "Distributions", - "type": "object" - }, - "graph": { - "$ref": "#/$defs/GraphConfig", - "default": null - }, - "involvement": { - "$ref": "#/$defs/InvolvementConfig", - "default": null - }, - "modalities": { - "additionalProperties": { - "$ref": "#/$defs/ModalityConfig" - }, - "default": {}, - "title": "Modalities", - "type": "object" - }, - "model": { - "$ref": "#/$defs/ModelConfig", - "default": null - }, - "sampling": { - "$ref": "#/$defs/SamplingConfig", - "default": null - }, - "scenarios": { - "default": [], - "items": { - "$ref": "#/$defs/ScenarioConfig" - }, - "title": "Scenarios", - "type": "array" - }, - "schedule": { - "$ref": "#/$defs/ScheduleConfig", - "default": null - } - }, - "required": [ - "version" - ], - "title": "SchemaSettings", - "type": "object" -} diff --git a/src/lyscripts/__init__.py b/src/lyscripts/__init__.py deleted file mode 100644 index 32808f2..0000000 --- a/src/lyscripts/__init__.py +++ /dev/null @@ -1,76 +0,0 @@ -"""Initial entry point for the lyscripts package and CLIs. - -This top-level module configures and provides the top-level CLI through which all -subcommands can be accessed. -""" - -import sys -from typing import Literal - -import pandas as pd -from loguru import logger -from pydantic import Field -from pydantic_settings import ( - BaseSettings, - CliApp, - CliImplicitFlag, - CliSubCommand, -) - -from lyscripts import compute, data, integrate, sample, schedule # noqa: F401 -from lyscripts._version import version -from lyscripts.cli import assemble_main, configure_logging -from lyscripts.utils import console - -__version__ = version -__description__ = "Package to interact with lymphatic progression data and models." -__author__ = "Roman Ludwig" -__email__ = "gygqdstu3@mozmail.com" -__uri__ = "https://github.com/lycosystem/lyscripts" - -# activate copy on write in pandas. -# See https://pandas.pydata.org/docs/user_guide/copy_on_write.html -pd.options.mode.copy_on_write = True - -logger.disable("lyscripts") - - -class LyscriptsCLI(BaseSettings): - """A CLI to interact with lymphatic progression data and models.""" - - version: CliImplicitFlag[bool] = Field( - default=False, - description="Display the version of lyscripts and exit.", - ) - log_level: Literal["DEBUG", "INFO", "WARNING", "ERROR", "CRITICAL"] = Field( - default="INFO", - description="Set the log level of the lyscripts CLI.", - ) - - data: CliSubCommand[data.DataCLI] - sample: CliSubCommand[sample.SampleCLI] - compute: CliSubCommand[compute.ComputeCLI] - schedule: CliSubCommand[schedule.ScheduleCLI] - integrate: CliSubCommand[integrate.IntegrateCLI] - - def __init__(self, **kwargs): - """Add logging configuration to the lyscripts CLI.""" - configure_logging(argv=sys.argv, console=console) - super().__init__(**kwargs) - - def cli_cmd(self) -> None: - """Start the main lyscripts CLI. - - If the ``version`` flag is set, the version of lyscripts is displayed and the - program exits. Otherwise, the lyscripts CLI runs one of the subcommands. - """ - logger.debug("Starting lyscripts CLI.") - - if self.version: - logger.info(f"lyscripts {__version__}") - return - - CliApp.run_subcommand(self) - - -main = assemble_main(settings_cls=LyscriptsCLI, prog_name="lyscripts") diff --git a/src/lyscripts/__main__.py b/src/lyscripts/__main__.py deleted file mode 100644 index 8d176b1..0000000 --- a/src/lyscripts/__main__.py +++ /dev/null @@ -1,6 +0,0 @@ -"""Utility for common tasks w.r.t. inference & prediction using `lymph` package.""" - -from lyscripts import main - -if __name__ == "__main__": - main() diff --git a/src/lyscripts/cli.py b/src/lyscripts/cli.py deleted file mode 100644 index b6448d0..0000000 --- a/src/lyscripts/cli.py +++ /dev/null @@ -1,123 +0,0 @@ -"""Utilities for configuring and running CLIs app. - -In this module, we define and configure a :py:class:`RichDefaultHelpFormatter` that -nicely displays the CLI's ``--help`` text. We also provide a function to -:py:func:`assemble a main function ` for the different CLI apps to save -some boilerplate code. Lastly, we have two functions related to the `loguru`_ setup. - -.. _loguru: https://loguru.readthedocs.io/en/stable -""" - -import inspect -import logging -from collections.abc import Callable -from typing import Literal - -from loguru import logger -from pydantic_settings import BaseSettings, CliApp, CliSettingsSource -from rich.console import Console -from rich.logging import RichHandler -from rich_argparse import ArgumentDefaultsRichHelpFormatter - -_current_log_level: Literal["DEBUG", "INFO", "WARNING", "ERROR", "CRITICAL"] = "INFO" - - -def assemble_main( - settings_cls: type[BaseSettings], - prog_name: str, -) -> Callable[[], None]: - """Assemble a ``main()`` function for a CLI app. - - It creates a :py:class:`~pydantic_settings.CliSettingsSource` object with the - provided ``settings_cls`` and ``prog_name``. Then, it fills in some default - settings for the CLI configuration and runs the CLI app. - - Assembling a ``main()`` function for all subcommands like this saves some - boilerplate code. - """ - - def main() -> None: - """Start the main CLI app.""" - cli_settings_source = CliSettingsSource( - settings_cls=settings_cls, - cli_prog_name=prog_name, - cli_kebab_case=True, - cli_use_class_docs_for_groups=True, - formatter_class=ArgumentDefaultsRichHelpFormatter, - ) - CliApp.run(settings_cls, cli_settings_source=cli_settings_source) - - return main - - -def somewhat_safely_get_loglevel( - argv: list[str], -) -> Literal["DEBUG", "INFO", "WARNING", "ERROR", "CRITICAL"]: - """Set the log level of the lyscripts CLI. - - This is a bit of a hack, since the :py:class:`~lyscripts.LyscriptsCLI` class is not - yet initialized when we need to set the log level. In case the provided log-level is - not valid, :py:class:`~lyscripts.LyscriptsCLI` will raise an exception at a later - point. - - Return ``"INFO"`` by default. - """ - args_str = " ".join(argv) - if "--log-level" in args_str: - for log_level in ["DEBUG", "INFO", "WARNING", "ERROR", "CRITICAL"]: - if log_level in args_str: - return log_level - - return "INFO" - - -def configure_logging( - argv: list[str], - console: Console, -) -> None: - """Configure the `loguru`_ logging system of the lyscripts CLI. - - This function sets the log level and format of the lyscripts CLI. Notably, for - a log-level of `DEBUG` the output will contain more information. - - .. _loguru: https://loguru.readthedocs.io/en/stable - """ - logger.enable("lyscripts") - logger.enable("lydata") - global _current_log_level - _current_log_level = somewhat_safely_get_loglevel(argv=argv) - logger.remove() - handler = RichHandler(console=console) - logger.add( - sink=handler, - level=_current_log_level, - format="{message}", - ) - - -class InterceptHandler(logging.Handler): - """Intercept logging messages and redirect them to Loguru.""" - - def emit(self, record: logging.LogRecord) -> None: - """Intercept the log record and redirect it to Loguru.""" - # Get corresponding Loguru level if it exists. - try: - level: str | int = logger.level(record.levelname).name - except ValueError: - level = record.levelno - - # Find caller from where originated the logged message. - frame, depth = inspect.currentframe(), 0 - while frame: - filename = frame.f_code.co_filename - is_logging = filename == logging.__file__ - is_frozen = "importlib" in filename and "_bootstrap" in filename - if depth > 0 and not (is_logging or is_frozen): - break - frame = frame.f_back - depth += 1 - - logger.opt(depth=depth, exception=record.exc_info).log( - level, - record.getMessage(), - ) diff --git a/src/lyscripts/compute/__init__.py b/src/lyscripts/compute/__init__.py deleted file mode 100644 index c2bbdff..0000000 --- a/src/lyscripts/compute/__init__.py +++ /dev/null @@ -1,22 +0,0 @@ -"""Commands to compute prior and posterior state distributions from model samples. - -This can in turn speed up the computation of risks and prevalences. -""" - -from pydantic_settings import BaseSettings, CliApp, CliSubCommand - -from lyscripts.compute import posteriors, prevalences, priors, risks, evidence - - -class ComputeCLI(BaseSettings): - """Compute priors, posteriors, risks, prevalences and model evidence from model samples.""" - - priors: CliSubCommand[priors.PriorsCLI] - posteriors: CliSubCommand[posteriors.PosteriorsCLI] - risks: CliSubCommand[risks.RisksCLI] - prevalences: CliSubCommand[prevalences.PrevalencesCLI] - evidence: CliSubCommand[evidence.EvidenceCLI] - - def cli_cmd(self) -> None: - """Start the ``compute`` subcommand.""" - CliApp.run_subcommand(self) diff --git a/src/lyscripts/compute/__main__.py b/src/lyscripts/compute/__main__.py deleted file mode 100644 index f6df909..0000000 --- a/src/lyscripts/compute/__main__.py +++ /dev/null @@ -1,8 +0,0 @@ -"""Run the compute module as a script.""" - -from lyscripts.cli import assemble_main -from lyscripts.compute import ComputeCLI - -if __name__ == "__main__": - main = assemble_main(settings_cls=ComputeCLI, prog_name="compute") - main() diff --git a/src/lyscripts/compute/evidence.py b/src/lyscripts/compute/evidence.py deleted file mode 100644 index ffce481..0000000 --- a/src/lyscripts/compute/evidence.py +++ /dev/null @@ -1,197 +0,0 @@ -"""Compute the model evidence from MCMC samples. - -Given the samples drawn during thermodynamic integration and their respective log -likelihoods, compute the model log evidence and the Bayesian Information Criterion. -""" - -from __future__ import annotations - -import json -from pathlib import Path - -import emcee -import h5py -import numpy as np -import pandas as pd -from loguru import logger -from pydantic import Field -from scipy.integrate import trapezoid - -from lyscripts.cli import assemble_main -from lyscripts.configs import ( - BaseCLI, - DataConfig, - SamplingConfig, - ScheduleConfig, -) - -RNG = np.random.default_rng() - - -def comp_bic(log_probs: np.ndarray, num_params: int, num_data: int) -> float: - r"""Compute the negative one half of the Bayesian Information Criterion (BIC). - - The BIC is defined as [^1] - $$ BIC = k \\ln{n} - 2 \\ln{\\hat{L}} $$ - where $k$ is the number of parameters ``num_params``, $n$ the number of datapoints - ``num_data`` and $\\hat{L}$ the maximum likelihood estimate of the ``log_prob``. - It is constructed such that the following is an - approximation of the model evidence: - $$ p(D \\mid m) \\approx \\exp{\\left( - BIC / 2 \\right)} $$ - which is why this function returns the negative one half of it. - - [^1]: https://en.wikipedia.org/wiki/Bayesian_information_criterion - """ - return np.max(log_probs) - num_params * np.log(num_data) / 2.0 - - -def compute_evidence( - temp_schedule: np.ndarray, - log_probs: np.ndarray, -) -> float: - """Compute the evidence. - - Given a ``temp_schedule`` of inverse temperatures and corresponding sets of - ``log_probs``, we calculate the mean ``log_prob`` over all samples to approximate - the expectation value under the corresponding power posterior for each step in the - ``temp_schedule``. The evidence is evaluated using trapezoidal integration of the - expectation values over the ``temp_schedule``. - """ - a_mc = np.mean(log_probs, axis=1) - return trapezoid(y=a_mc, x=temp_schedule) - - -def compute_ti_results( - settings: EvidenceCLI, - temp_schedule: np.ndarray, - metrics: dict, - ndim: int, - h5_file: Path, -) -> tuple[np.ndarray, np.ndarray]: - """Compute the results in case of a thermodynamic integration run.""" - num_temps = len(temp_schedule) - - if num_temps != len(h5_file["ti"]): - raise RuntimeError( - f"Parameters suggest temp schedule of length {num_temps}, " - f"but stored are {len(h5_file['ti'])}", - ) - - nwalker = ndim * settings.sampling.walkers_per_dim - nsteps = settings.sampling.num_steps - ti_log_probs = np.zeros(shape=(num_temps, nsteps * nwalker)) - - for i, run in enumerate(h5_file["ti"]): - reader = emcee.backends.HDFBackend( - settings.sampling.storage_file, - name=f"ti/{run}", - read_only=True, - ) - ti_log_probs[i] = reader.get_blobs(flat=True)["log_prob"] - - evidence = compute_evidence(temp_schedule, ti_log_probs) - metrics["evidence"] = evidence - - return temp_schedule, ti_log_probs - - -class EvidenceCLI(BaseCLI): - """Compute model evidence from thermodynamic integration samples.""" - - data: DataConfig - sampling: SamplingConfig - schedule: ScheduleConfig = Field( - description="Configuration for generating inverse temperature schedule.", - ) - plots: Path = Field( - default="./plots", - description="Directory for storing plots.", - ) - metrics: Path = Field( - default="./metrics.json", - description="Path to metrics file.", - ) - - def cli_cmd(self) -> None: - """Start the ``evidence`` subcommand. - - Given the MCMC samples from thermodynamic integration provided by the - ``sampling`` argument and the corresponding inverse temperature schedule, - specified in the ``schedule`` argument, the model evidence is computed using - the functions :py:func:`compute_ti_results` and :py:func`compute_evidence`. - Further the BIC is evaluated. - """ - data = self.data.load() - - metrics = {} - - temp_schedule = self.schedule.get_schedule() - - with h5py.File(self.sampling.storage_file, mode="r") as h5_file: - # Get ndim from the HDF5 backend - backend = emcee.backends.HDFBackend( - self.sampling.storage_file, - read_only=True, - name=self.sampling.dataset, - ) - ndim = backend.shape[1] - logger.info(f"Inferred {ndim} parameters from stored samples") - - # if TI has been performed, compute the evidence - if "ti" in h5_file: - temp_schedule, ti_log_probs = compute_ti_results( - settings=self, - temp_schedule=temp_schedule, - metrics=metrics, - ndim=ndim, - h5_file=h5_file, - ) - - logger.info( - "Computed results of thermodynamic integration with " - f"{len(temp_schedule)} steps", - ) - - # store inverse temperatures and log-probs in CSV file - self.plots.parent.mkdir(parents=True, exist_ok=True) - - beta_vs_accuracy = pd.DataFrame( - np.array( - [ - temp_schedule, - np.mean(ti_log_probs, axis=1), - np.std(ti_log_probs, axis=1), - ], - ).T, - columns=["ฮฒ", "accuracy", "std"], - ) - beta_vs_accuracy.to_csv(self.plots, index=False) - logger.info(f"Plotted ฮฒ vs accuracy at {self.plots}") - - # use blobs, because also for TI, this is the unscaled log-prob - final_log_probs = backend.get_blobs()["log_prob"] - logger.info( - f"Opened samples from emcee backend from {self.sampling.storage_file}", - ) - - # store metrics in JSON file - self.metrics.parent.mkdir(parents=True, exist_ok=True) - self.metrics.touch(exist_ok=True) - - metrics["BIC"] = comp_bic( - log_probs=final_log_probs, - num_params=ndim, - num_data=len(data), - ) - metrics["max_llh"] = np.max(final_log_probs) - metrics["mean_llh"] = np.mean(final_log_probs) - - with open(self.metrics, mode="w", encoding="utf-8") as metrics_file: - json.dump(metrics, metrics_file) - - logger.info(f"Wrote out metrics to {self.metrics}") - - -if __name__ == "__main__": - main = assemble_main(settings_cls=EvidenceCLI, prog_name="compute evidence") - main() diff --git a/src/lyscripts/compute/posteriors.py b/src/lyscripts/compute/posteriors.py deleted file mode 100644 index c017ccd..0000000 --- a/src/lyscripts/compute/posteriors.py +++ /dev/null @@ -1,142 +0,0 @@ -"""Compute posterior state distributions. - -The posteriors are computed from drawn samples for a list of defined scenarios. If -priors have already been computed from the samples and the ``--cache_dir`` argument -is the same as during that computation, the priors will automatically be loaded from -the cache. -""" - -from typing import Literal - -import numpy as np -from loguru import logger -from lymph import models -from pydantic import Field -from rich import progress - -from lyscripts.cli import assemble_main -from lyscripts.compute.priors import compute_priors -from lyscripts.compute.utils import BaseComputeCLI, HDF5FileStorage, get_cached -from lyscripts.configs import ( - DistributionConfig, - GraphConfig, - ModalityConfig, - ModelConfig, - add_distributions, - add_modalities, - construct_model, -) -from lyscripts.utils import console - - -def compute_posteriors( - model_config: ModelConfig, - graph_config: GraphConfig, - dist_configs: dict[str, DistributionConfig], - modality_configs: dict[str, ModalityConfig], - priors: np.ndarray, - diagnosis: dict[Literal["ipsi", "contra"], dict], - midext: bool | None = None, - mode: Literal["HMM", "BN"] = "HMM", - progress_desc: str = "Computing posteriors from priors", -) -> np.ndarray: - """Compute posterior state distributions from ``priors``. - - This calls the ``model`` method :py:meth:`~lymph.types.Model.posterior_state_dist` - for each of the pre-computed ``priors``, given the specified ``diagnosis`` pattern. - - For the :py:class:`~lymph.models.Midline` model, the ``midext`` argument can be - used to specify whether the midline extension is present or not. - """ - model = construct_model(model_config, graph_config) - model = add_distributions(model, dist_configs) - model = add_modalities(model, modality_configs) - posteriors = [] - kwargs = {"midext": midext} if isinstance(model, models.Midline) else {} - - if isinstance(model, models.Unilateral | models.HPVUnilateral): - diagnosis = diagnosis.get("ipsi") - - for prior in progress.track( - sequence=priors, - description=progress_desc, - total=len(priors), - console=console, - ): - posteriors.append( - model.posterior_state_dist( - given_state_dist=prior, - given_diagnosis=diagnosis, - mode=mode, - **kwargs, - ), - ) - - return np.stack(posteriors) - - -class PosteriorsCLI(BaseComputeCLI): - """Compute posterior state distributions for different diagnosis scenarios.""" - - modalities: dict[str, ModalityConfig] = Field( - default={}, - description=( - "Maps names of diagnostic modalities to their specificity/sensitivity." - ), - ) - posteriors: HDF5FileStorage = Field( - description="Storage for the computed posteriors.", - ) - - def cli_cmd(self) -> None: - """Start the ``posteriors`` subcommand. - - This will compute the posterior state distributions, given a personalized - diagnosis pattern, for each of the scenarios provided to the command. - """ - logger.debug(self.model_dump_json(indent=2)) - - global_attrs = self.model_dump( - include={"model", "graph", "distributions", "modalities"}, - ) - self.posteriors.set_attrs(attrs=global_attrs, dataset="/") - - samples = self.sampling.load() - cached_compute_priors = get_cached(compute_priors, self.cache_dir) - cached_compute_posteriors = get_cached(compute_posteriors, self.cache_dir) - num_scens = len(self.scenarios) - - for i, scenario in enumerate(self.scenarios): - _fields = {"t_stages", "t_stages_dist", "mode"} - prior_kwargs = scenario.model_dump(include=_fields) - - _priors = cached_compute_priors( - model_config=self.model, - graph_config=self.graph, - dist_configs=self.distributions, - samples=samples, - progress_desc=f"Computing priors for scenario {i + 1}/{num_scens}", - **prior_kwargs, - ) - - _fields = {"diagnosis", "midext", "mode"} - posterior_kwargs = scenario.model_dump(include=_fields) - - posteriors = cached_compute_posteriors( - model_config=self.model, - graph_config=self.graph, - dist_configs=self.distributions, - modality_configs=self.modalities, - priors=_priors, - progress_desc=f"Computing posteriors for scenario {i + 1}/{num_scens}", - **posterior_kwargs, - ) - - self.posteriors.save(values=posteriors, dataset=f"{i:03d}") - self.posteriors.set_attrs(attrs=prior_kwargs, dataset=f"{i:03d}") - self.posteriors.set_attrs(attrs=posterior_kwargs, dataset=f"{i:03d}") - - -if __name__ == "__main__": - main = assemble_main(settings_cls=PosteriorsCLI, prog_name="compute posteriors") - main() diff --git a/src/lyscripts/compute/prevalences.py b/src/lyscripts/compute/prevalences.py deleted file mode 100644 index 89b84c9..0000000 --- a/src/lyscripts/compute/prevalences.py +++ /dev/null @@ -1,236 +0,0 @@ -"""Prevalence prediction module. - -This computes the prevalence of an observed involvement pattern, given a trained model. -It can also compare this prediction to the observed prevalence in the data. As for the -risk prediction, this uses caching and computes the priors first. -""" - -from collections.abc import Callable -from typing import Literal - -import lydata # noqa: F401 -import numpy as np -import pandas as pd -from loguru import logger -from lydata import C, Q -from lydata.accessor import QueryPortion -from lydata.querier import NoneQ -from lydata.utils import is_old -from lymph import models -from pydantic import Field -from rich import progress - -from lyscripts.cli import assemble_main -from lyscripts.compute.priors import compute_priors -from lyscripts.compute.utils import ( - BaseComputeCLI, - HDF5FileStorage, - get_cached, -) -from lyscripts.configs import ( - DataConfig, - DiagnosisConfig, - DistributionConfig, - GraphConfig, - ModalityConfig, - ModelConfig, - ScenarioConfig, - add_distributions, - add_modalities, - construct_model, -) -from lyscripts.utils import console - - -def compute_prevalences( - model_config: ModelConfig, - graph_config: GraphConfig, - dist_configs: dict[str, DistributionConfig], - modality_configs: dict[str, ModalityConfig], - priors: np.ndarray, - diagnosis: dict[Literal["ipsi", "contra"], dict], - midext: bool | None = None, - progress_desc: str = "Computing prevalences from priors", -) -> np.ndarray: - """Compute the prevalence of a diagnosis given the priors and the model.""" - model = construct_model(model_config, graph_config) - model = add_distributions(model, dist_configs) - - if len(modality_configs) != 1: - msg = "Only one modality is supported for prevalence prediction." - logger.error(msg) - raise ValueError(msg) - - model = add_modalities(model, modality_configs) - prevalences = [] - kwargs = {"midext": midext} if isinstance(model, models.Midline) else {} - - for prior in progress.track( - sequence=priors, - description=progress_desc, - total=len(priors), - console=console, - ): - obs_dist = model.obs_dist(given_state_dist=prior) - involvement = { - side: diagnosis.get(side).get(next(iter(modality_configs))) - for side in ["ipsi", "contra"] - } - - if isinstance(model, models.Unilateral | models.HPVUnilateral): - involvement = involvement.get("ipsi") - - prevalence = model.marginalize( - given_state_dist=obs_dist, - involvement=involvement, - **kwargs, - ) - - if isinstance(model, models.Midline): - # In this case, we need to renormalize the prevalence by the marginalized - # probability of all states with midline extension. We must do this, because - # we compute the analogous quantity for the data. In principle, we could - # also compute the prevalence of the diagnosis *and* midline extension, but - # we have decided to compute the diagnosis *given* midline extension. - # https://github.com/lycosystem/lyscripts/blob/ea49ec/lyscripts/compute/prevalences.py#L217-L225 - midext_prob = model.marginalize( - involvement=None, - given_state_dist=obs_dist, - **kwargs, - ) - prevalence /= midext_prob - - prevalences.append(prevalence) - - return np.stack(prevalences) - - -def generate_query_from_diagnosis(diagnosis: DiagnosisConfig) -> Q: - """Transform a diagnosis into a query for the data.""" - result = NoneQ() - for side in ["ipsi", "contra"]: - for modality, pattern in getattr(diagnosis, side, {}).items(): - for lnl, value in pattern.items(): - column = (modality, side, lnl) - result &= C(column) == value - return result - - -def observe_prevalence( - data: pd.DataFrame, - scenario_config: ScenarioConfig, - mapping: dict[int, str] | Callable[[int], str] | None = None, -) -> QueryPortion: - """Extract prevalence defined in a ``scenario`` from the ``data``. - - ``mapping`` defines how the T-stages in the data are supposed to be mapped to the - T-stages defined in the ``scenario``. - - It returns the number of patients that match the given scenario and the total - number of patients that are considered. E.g., in the example below we 79 patients - are of late T-stage and have a tumor extending over the midline. Of those, 30 were - diagnosed with contralateral involvement in LNL II based on a CT scan. - - >>> data = next(lydata.load_datasets(year=2021, institution="usz")) - >>> scenario_config = ScenarioConfig( - ... t_stages=["late"], - ... midext=True, - ... diagnosis=DiagnosisConfig(contra={"CT": {"II": True}}), - ... ) - >>> observe_prevalence(data, scenario_config) - QueryPortion(match=np.int64(7), total=np.int64(79)) - """ - mapping = mapping or DataConfig.model_fields["mapping"].default_factory() - key = ("tumor", "1", "t_stage") if is_old(data) else ("tumor", "core", "t_stage") - data[key] = data.ly.t_stage.map(mapping) - - has_t_stage = C("t_stage").isin(scenario_config.t_stages) - if scenario_config.midext is None: - has_midext = NoneQ() - else: - has_midext = C("midext") == scenario_config.midext - - # Note that below we compute the prevalence of the diagnosis *given* midline - # extension. This means, that when computing the prevalence of the diagnosis in - # the model, we need to renormalize by diving by the probability of midline - # extension. For an older - but pretty surely correct - implementation see - # https://github.com/lycosystem/lyscripts/blob/ea49ec/lyscripts/compute/prevalences.py#L217-L225 - return data.ly.portion( - query=generate_query_from_diagnosis(scenario_config.diagnosis), - given=has_t_stage & has_midext, - ) - - -class PrevalencesCLI(BaseComputeCLI): - """Predict the prevalence of an involvement pattern from model samples.""" - - modalities: dict[str, ModalityConfig] = Field( - default={}, - description=( - "Maps names of diagnostic modalities to their specificity/sensitivity." - ), - ) - prevalences: HDF5FileStorage = Field( - description="Storage for the computed prevalences.", - ) - data: DataConfig - - def cli_cmd(self) -> None: - """Start the ``prevalences`` subcommand.""" - logger.debug(self.model_dump_json(indent=2)) - global_attrs = self.model_dump( - include={"model", "graph", "distributions", "modalities"}, - ) - self.prevalences.set_attrs(attrs=global_attrs, dataset="/") - - samples = self.sampling.load() - cached_compute_priors = get_cached(compute_priors, self.cache_dir) - cached_compute_prevalences = get_cached(compute_prevalences, self.cache_dir) - num_scens = len(self.scenarios) - - for i, scenario in enumerate(self.scenarios): - _fields = {"t_stages", "t_stages_dist", "mode"} - prior_kwargs = scenario.model_dump(include=_fields) - - _priors = cached_compute_priors( - model_config=self.model, - graph_config=self.graph, - dist_configs=self.distributions, - samples=samples, - progress_desc=f"Computing priors for scenario {i + 1}/{num_scens}", - **prior_kwargs, - ) - - _fields = {"diagnosis", "midext"} - prevalence_kwargs = scenario.model_dump(include=_fields) - - prevalences = cached_compute_prevalences( - model_config=self.model, - graph_config=self.graph, - dist_configs=self.distributions, - modality_configs=self.modalities, - priors=_priors, - progress_desc=f"Computing prevalences for scenario {i + 1}/{num_scens}", - **prevalence_kwargs, - ) - - portion = observe_prevalence( - data=self.data.load(), - scenario_config=scenario, - mapping=self.data.mapping, - ) - self.prevalences.save(values=prevalences, dataset=f"{i:03d}") - self.prevalences.set_attrs(attrs=prior_kwargs, dataset=f"{i:03d}") - self.prevalences.set_attrs(attrs=prevalence_kwargs, dataset=f"{i:03d}") - self.prevalences.set_attrs( - attrs={ - "num_match": portion.match, - "num_total": portion.total, - }, - dataset=f"{i:03d}", - ) - - -if __name__ == "__main__": - main = assemble_main(settings_cls=PrevalencesCLI, prog_name="compute prevalences") - main() diff --git a/src/lyscripts/compute/priors.py b/src/lyscripts/compute/priors.py deleted file mode 100644 index ae7307e..0000000 --- a/src/lyscripts/compute/priors.py +++ /dev/null @@ -1,111 +0,0 @@ -"""Given samples drawn during an MCMC round, compute the (prior) state distributions. - -This is done for each sample and for a list of specified scenarios. The computation is -cached at a location specified by the ``--cache_dir`` argument using ``joblib``. -""" - -from typing import Literal - -import numpy as np -from loguru import logger -from pydantic import Field -from rich import progress - -from lyscripts.cli import assemble_main -from lyscripts.compute.utils import BaseComputeCLI, HDF5FileStorage, get_cached -from lyscripts.configs import ( - DistributionConfig, - GraphConfig, - ModelConfig, - add_distributions, - construct_model, -) -from lyscripts.utils import console - - -def compute_priors( - model_config: ModelConfig, - graph_config: GraphConfig, - dist_configs: dict[str, DistributionConfig], - samples: np.ndarray, - t_stages: list[int | str], - t_stages_dist: list[float], - mode: Literal["HMM", "BN"] = "HMM", - progress_desc: str = "Computing priors from samples", -) -> np.ndarray: - """Compute prior state distributions from the ``samples`` for the ``model``. - - This will call the ``model`` method :py:meth:`~lymph.types.Model.state_dist` - for each of the ``samples``. The prior state distributions are computed for - each of the ``t_stages`` and marginalized over using the ``t_stages_dist``. - """ - model = construct_model(model_config, graph_config) - model = add_distributions(model, dist_configs) - priors = [] - - for sample in progress.track( - sequence=samples, - description=progress_desc, - total=len(samples), - console=console, - ): - model.set_params(*sample) - priors.append( - sum( - model.state_dist(t_stage=t, mode=mode) * p - for t, p in zip(t_stages, t_stages_dist, strict=False) - ), - ) - - return np.stack(priors) - - -class PriorsCLI(BaseComputeCLI): - """Compute the prior state distributions from MCMC samples.""" - - priors: HDF5FileStorage = Field(description="Storage for the computed priors.") - - def cli_cmd(self) -> None: - """Start the ``priors`` subcommand. - - Given a ``graph``, ``model``, ``distributions`` over diagnosis times, and - MCMC samples loaded from the ``sampling`` argument, this command computes the - prior state distributions for each of the specified ``scenarios``. - - Precomputing these state distributions is useful, because they largely only - depend on T-stage and not on the diagnosis or involvement of interest. Hence, - computing the :py:mod:`~lyscripts.compute.posteriors` and - :py:mod:`~lyscripts.compute.risks` can be sped up. - - Note that this command will use `joblib`_ to cache its computations. - - .. _joblib: https://joblib.readthedocs.io/ - """ - logger.debug(self.model_dump_json(indent=2)) - global_attrs = self.model_dump(include={"model", "graph", "distributions"}) - self.priors.set_attrs(attrs=global_attrs, dataset="/") - - samples = self.sampling.load() - cached_compute_priors = get_cached(compute_priors, self.cache_dir) - num_scenarios = len(self.scenarios) - - for i, scenario in enumerate(self.scenarios): - _fields = {"t_stages", "t_stages_dist", "mode"} - prior_kwargs = scenario.model_dump(include=_fields) - - priors = cached_compute_priors( - model_config=self.model, - graph_config=self.graph, - dist_configs=self.distributions, - samples=samples, - progress_desc=f"Computing priors for scenario {i + 1}/{num_scenarios}", - **prior_kwargs, - ) - - self.priors.save(values=priors, dataset=f"{i:03d}") - self.priors.set_attrs(attrs=prior_kwargs, dataset=f"{i:03d}") - - -if __name__ == "__main__": - main = assemble_main(settings_cls=PriorsCLI, prog_name="compute priors") - main() diff --git a/src/lyscripts/compute/risks.py b/src/lyscripts/compute/risks.py deleted file mode 100644 index 4b3e224..0000000 --- a/src/lyscripts/compute/risks.py +++ /dev/null @@ -1,140 +0,0 @@ -"""Predict risks of involvements for scenarios using drawn MCMC samples. - -As the priors and posteriors, this computation, too, uses caching and may skip the -computation of these two initial steps if the cache directory is the same as during -their computation. -""" - -from typing import Literal - -import numpy as np -from loguru import logger -from lymph import models -from pydantic import Field -from rich import progress - -from lyscripts.cli import assemble_main -from lyscripts.compute.posteriors import compute_posteriors -from lyscripts.compute.priors import compute_priors -from lyscripts.compute.utils import BaseComputeCLI, HDF5FileStorage, get_cached -from lyscripts.configs import ( - DistributionConfig, - GraphConfig, - ModalityConfig, - ModelConfig, - add_distributions, - add_modalities, - construct_model, -) -from lyscripts.utils import console - - -def compute_risks( - model_config: ModelConfig, - graph_config: GraphConfig, - dist_configs: dict[str, DistributionConfig], - modality_configs: dict[str, ModalityConfig], - posteriors: np.ndarray, - involvement: dict[Literal["ipsi", "contra"], dict], - progress_desc: str = "Computing risks from posteriors", -) -> np.ndarray: - """Compute the risk of ``involvement`` from each of the ``posteriors``. - - Essentially, this only calls the model's :py:meth:`lymph.models.Model.marginalize` - method, as nothing more is necessary than to marginalize the full posterior state - distribution over the states that correspond to the involvement of interest. - """ - model = construct_model(model_config, graph_config) - model = add_distributions(model, dist_configs) - model = add_modalities(model, modality_configs) - risks = [] - - if isinstance(model, models.Unilateral | models.HPVUnilateral): - involvement = involvement.get("ipsi") - - for posterior in progress.track( - sequence=posteriors, - description=progress_desc, - total=len(posteriors), - console=console, - ): - risks.append( - model.marginalize(involvement=involvement, given_state_dist=posterior), - ) - - return np.stack(risks) - - -class RisksCLI(BaseComputeCLI): - """Predict the risk of involvement scenarios from model samples given diagnoses.""" - - modalities: dict[str, ModalityConfig] = Field( - default={}, - description=( - "Maps names of diagnostic modalities to their specificity/sensitivity." - ), - ) - risks: HDF5FileStorage = Field(description="Storage for the computed risks.") - - def cli_cmd(self) -> None: - """Start the ``risks`` subcommand.""" - logger.debug(self.model_dump_json(indent=2)) - global_attrs = self.model_dump( - include={"model", "graph", "distributions", "modalities"}, - ) - self.risks.set_attrs(attrs=global_attrs, dataset="/") - - samples = self.sampling.load() - cached_compute_priors = get_cached(compute_priors, self.cache_dir) - cached_compute_posteriors = get_cached(compute_posteriors, self.cache_dir) - cached_compute_risks = get_cached(compute_risks, self.cache_dir) - num_scens = len(self.scenarios) - - for i, scenario in enumerate(self.scenarios): - _fields = {"t_stages", "t_stages_dist", "mode"} - prior_kwargs = scenario.model_dump(include=_fields) - - _priors = cached_compute_priors( - model_config=self.model, - graph_config=self.graph, - dist_configs=self.distributions, - samples=samples, - progress_desc=f"Computing priors for scenario {i + 1}/{num_scens}", - **prior_kwargs, - ) - - _fields = {"diagnosis", "midext", "mode"} - posterior_kwargs = scenario.model_dump(include=_fields) - - _posteriors = cached_compute_posteriors( - model_config=self.model, - graph_config=self.graph, - dist_configs=self.distributions, - modality_configs=self.modalities, - priors=_priors, - progress_desc=f"Computing posteriors for scenario {i + 1}/{num_scens}", - **posterior_kwargs, - ) - - _fields = {"involvement"} - risk_kwargs = scenario.model_dump(include=_fields) - - risks = cached_compute_risks( - model_config=self.model, - graph_config=self.graph, - dist_configs=self.distributions, - modality_configs=self.modalities, - posteriors=_posteriors, - progress_desc=f"Computing risks for scenario {i + 1}/{num_scens}", - **risk_kwargs, - ) - - self.risks.save(values=risks, dataset=f"{i:03d}") - self.risks.set_attrs(attrs=prior_kwargs, dataset=f"{i:03d}") - self.risks.set_attrs(attrs=posterior_kwargs, dataset=f"{i:03d}") - self.risks.set_attrs(attrs=risk_kwargs, dataset=f"{i:03d}") - - -if __name__ == "__main__": - main = assemble_main(settings_cls=RisksCLI, prog_name="compute risks") - main() diff --git a/src/lyscripts/compute/utils.py b/src/lyscripts/compute/utils.py deleted file mode 100644 index b3bed8a..0000000 --- a/src/lyscripts/compute/utils.py +++ /dev/null @@ -1,276 +0,0 @@ -"""Utilities for precomputing the priors and posteriors.""" - -import ast -import functools -from pathlib import Path -from typing import Annotated, Any - -import h5py -import numpy as np -from joblib import Memory -from loguru import logger -from pydantic import AfterValidator, BaseModel, Field - -from lyscripts.configs import ( - BaseCLI, - DistributionConfig, - GraphConfig, - ModelConfig, - SamplingConfig, - ScenarioConfig, -) - - -class BaseComputeCLI(BaseCLI): - """Common command line settings for the submodule ``compute``.""" - - graph: GraphConfig - model: ModelConfig = ModelConfig() - distributions: dict[str, DistributionConfig] = Field( - default={}, - description=( - "Mapping of model T-categories to predefined distributions over " - "diagnose times." - ), - ) - cache_dir: Path = Field( - default=Path.cwd() / ".cache", - description="Cache directory for storing function calls.", - ) - scenarios: list[ScenarioConfig] = Field( - default=[], - description="List of scenarios to compute risks for.", - ) - sampling: SamplingConfig - - -def is_hdf5_compatible(value: Any) -> bool: - """Check if the given ``value`` can be stored in an HDF5 file.""" - return isinstance( - value, - bool | str | bytes | int | float | np.ndarray | list | tuple, - ) - - -def to_hdf5_attrs(mapping: dict[str, Any]) -> dict[str, str]: - """Convert ``attrs`` to a dictionary of HDF5 compatible attributes or strings.""" - res = {} - for key, val in mapping.items(): - if is_hdf5_compatible(val): - res[key] = val - else: - res[key] = str(val) - return res - - -def from_hdf5_attrs(mapping: h5py.AttributeManager) -> dict[str, Any]: - """Convert the HDF5 attributes to a dictionary of Python objects.""" - attrs = {} - for key, value in mapping.items(): - try: - attrs[key] = ast.literal_eval(value) - except ValueError: - attrs[key] = value - return attrs - - -def extract_modalities(diagnosis: dict[str, Any]) -> set[str]: - """Get the set of modalities used in the ``diagnosis``. - - This is not used in the main apps anymore, but since it may be useful, I keep it. - - >>> diagnosis = { - ... "ipsi": { - ... "MRI": {"II": True, "III": False}, - ... "PET": {"II": False, "III": True}, - ... }, - ... "contra": {"MRI": {"II": False, "III": None}}, - ... } - >>> sorted(extract_modalities(diagnosis)) - ['MRI', 'PET'] - """ - modality_set = set() - - if "ipsi" not in diagnosis and "contra" not in diagnosis: - return modality_set | set(diagnosis.keys()) - - for side in ["ipsi", "contra"]: - if side in diagnosis: - modality_set |= set(diagnosis[side].keys()) - - return modality_set - - -def ensure_parent_dir(path: Path) -> Path: - """Create the parent directory of the given ``path``.""" - path.parent.mkdir(parents=True, exist_ok=True) - logger.debug(f"Ensured parent directory of {path}") - return path - - -HasParentPath = Annotated[Path, AfterValidator(ensure_parent_dir)] -"""Type hint for path whose parent dir is created if it doesn't exist.""" - - -class HDF5FileStorage(BaseModel): - """HDF5 file storage for in- and outputs of computations.""" - - file: HasParentPath = Field( - description="Path to the HDF5 file. Parent directories are created if needed.", - ) - dataset: str | None = Field( - default=None, - description=( - "Name of the dataset in the HDF5 file. Save/load methods can override this." - ), - ) - - def _get_dataset(self) -> str: - """Get attribute ``dataset`` or the first dataset in the file. - - >>> from tempfile import TemporaryDirectory - >>> tmp_path = Path(TemporaryDirectory().name) / "test.hdf5" - >>> storage = HDF5FileStorage(file=tmp_path) - >>> rand_data = np.random.rand(100, 100) - >>> storage.save(values=rand_data, dataset="test") - >>> np.all(storage.load(dataset="test") == rand_data) - np.True_ - >>> np.all(storage.load() == rand_data) # loads first dataset - np.True_ - >>> some_attrs = {"key": "value"} - >>> storage.set_attrs(attrs=some_attrs, dataset="test") - >>> storage.get_attrs(dataset="test") - {'key': 'value'} - """ - if self.dataset is not None: - return self.dataset - - with h5py.File(self.file, "r") as file: - return next(iter(file.keys())) - - def load(self, dataset: str | None = None) -> np.ndarray: - """Load the dataset with the name ``dataset``.""" - dataset = dataset or self._get_dataset() - - with h5py.File(self.file, "r") as file: - array = file[dataset][()] - - logger.debug(f"Loaded dataset {dataset} from {self.file}") - return array - - def get_attrs(self, dataset: str | None = None) -> dict[str, Any]: - """Get the attributes of the dataset ``dataset``.""" - dataset = dataset or self._get_dataset() - - with h5py.File(self.file, "r") as file: - attrs = from_hdf5_attrs(file[dataset].attrs) - - logger.debug(f"Loaded attrs for dataset '{dataset}' from {self.file}") - return attrs - - def save(self, values: np.ndarray, dataset: str | None = None) -> None: - """Set the ``values`` for the ``dataset`` dataset.""" - dataset = dataset or self._get_dataset() - - with h5py.File(self.file, "a") as file: - if dataset in file: - del file[dataset] - file[dataset] = values - - logger.debug(f"Stored dataset {dataset} in {self.file}") - - def set_attrs(self, attrs: dict[str, Any], dataset: str | None = None) -> None: - """Update the ``attrs`` for the ``dataset`` dataset.""" - dataset = dataset or self._get_dataset() - - with h5py.File(self.file, "a") as file: - if dataset not in file: - raise ValueError(f"Dataset '{dataset}' not found in {self.file}") - file[dataset].attrs.update(to_hdf5_attrs(attrs)) - - logger.debug(f"Stored attrs {attrs} for dataset '{dataset}' in {self.file}") - - -def reduce_pattern(pattern: dict[str, dict[str, bool]]) -> dict[str, dict[str, bool]]: - """Reduce a ``pattern`` by removing all entries that are ``None``. - - This way, it should be completely recoverable by the ``complete_pattern`` function - but be shorter to store. - - Unused but maybe useful for some cases. Keeping it in here for now. - - >>> full = { - ... "ipsi": {"I": None, "II": True, "III": None}, - ... "contra": {"I": None, "II": None, "III": None}, - ... } - >>> reduce_pattern(full) - {'ipsi': {'II': True}} - - """ - tmp_pattern = pattern.copy() - reduced_pattern = {} - for side in ["ipsi", "contra"]: - if not all(v is None for v in tmp_pattern[side].values()): - reduced_pattern[side] = {} - for lnl, val in tmp_pattern[side].items(): - if val is not None: - reduced_pattern[side][lnl] = val - - return reduced_pattern - - -def complete_pattern( - pattern: dict[str, dict[str, bool]] | None, - lnls: list[str], -) -> dict[str, dict[str, bool]]: - """Make sure the provided involvement ``pattern`` is correct. - - For each side of the neck, and for each of the ``lnls`` this should in the end - contain ``True``, ``False`` or ``None``. - - Unused but maybe useful for some cases. Keeping it in here for now. - - >>> pattern = {"ipsi": {"II": True}} - >>> lnls = ["II", "III"] - >>> complete_pattern(pattern, lnls) - {'ipsi': {'II': True, 'III': None}, 'contra': {'II': None, 'III': None}} - - """ - if pattern is None: - pattern = {} - - for side in ["ipsi", "contra"]: - if side not in pattern: - pattern[side] = {} - - for lnl in lnls: - if lnl not in pattern[side]: - pattern[side][lnl] = None - elif pattern[side][lnl] is None: - continue - else: - pattern[side][lnl] = bool(pattern[side][lnl]) - - return pattern - - -def get_cached(func: callable, cache_dir: Path) -> callable: - """Return cached ``func`` with a cache at ``cache_dir``.""" - memory = Memory(location=cache_dir, verbose=0) - cached_func = memory.cache(func, ignore=["progress_desc"]) - logger.info(f"Initialized cache for {func.__name__} at {cache_dir}") - - @functools.wraps(func) - def log_cache_info_wrapper(*args, **kwargs): - logger.debug(f"Calling {func.__name__}({args}, {kwargs})") - if cached_func.check_call_in_cache(*args, **kwargs): - logger.info(f"Cache hit for {func.__name__}, returning stored result") - else: - logger.info(f"Cache miss for {func.__name__}, computing result") - - result = cached_func(*args, **kwargs) - logger.debug(f"Computed {result = }") - return result - - log_cache_info_wrapper._cached_func = cached_func - return log_cache_info_wrapper diff --git a/src/lyscripts/configs.py b/src/lyscripts/configs.py deleted file mode 100644 index ebae236..0000000 --- a/src/lyscripts/configs.py +++ /dev/null @@ -1,825 +0,0 @@ -"""Using `pydantic`_, we define configurations for the package. - -Most importantly, these configurations are part of the CLIs that the package provides. -but they also help with programmatically validating and constructing various objects. -Maybe most importantly, the :py:class:`GraphConfig` and :py:class:`ModelConfig` may be -used to precisely and reproducibly define how the function :py:func:`construct_model` -should create lymphatic progression :py:mod:`~lymph.models`. - -.. _pydantic: https://docs.pydantic.dev/latest/ -""" - -from __future__ import annotations - -import importlib -import importlib.util -import os -import warnings -from collections.abc import Callable, Sequence -from copy import deepcopy -from pathlib import Path -from typing import Annotated, Any, Literal - -import numpy as np -import pandas as pd -import yaml -from loguru import logger -from lydata.loader import LyDataset -from lydata.utils import ModalityConfig -from lymph import graph, models -from lymph.modalities import Pathological -from lymph.types import Model, PatternType -from pydantic import ( - AfterValidator, - BaseModel, - ConfigDict, - Field, - FilePath, -) -from pydantic_settings import ( - BaseSettings, - PydanticBaseSettingsSource, - YamlConfigSettingsSource, -) -from pydantic_settings.sources import DEFAULT_PATH - -from lyscripts.utils import binom_pmf, flatten, load_model_samples, load_patient_data - -FuncNameType = Literal["binomial"] - - -DIST_MAP: dict[FuncNameType, Callable] = { - "binomial": binom_pmf, -} - - -class CrossValidationConfig(BaseModel): - """Configs for splitting a dataset into cross-validation folds.""" - - seed: int = Field( - default=42, - description="Seed for the random number generator.", - ) - folds: int = Field( - default=5, - description="Number of folds to split the dataset into.", - ) - - -class DataConfig(BaseModel): - """Where to load lymphatic progression data from and how to feed it into a model.""" - - source: FilePath | LyDataset = Field( - description=( - "Either a path to a CSV file or a config that specifies how and where " - "to fetch the data from." - ), - ) - side: Literal["ipsi", "contra"] | None = Field( - default=None, - description="Side of the neck to load data for. Only for Unilateral models.", - ) - mapping: dict[Literal[0, 1, 2, 3, 4] | str, int | str] = Field( - default_factory=lambda: {i: "early" if i <= 2 else "late" for i in range(5)}, - description="Optional mapping of numeric T-stages to model T-stages.", - ) - - def load(self, **get_dataframe_kwargs) -> pd.DataFrame: - """Load data from path or the :py:class:`~lydata.loader.LyDataset`.""" - if isinstance(self.source, LyDataset): - return self.source.get_dataframe(**get_dataframe_kwargs) - - return load_patient_data(self.source, **get_dataframe_kwargs) - - def get_load_kwargs(self, **read_csv_kwargs: dict[str, Any]) -> dict[str, Any]: - """Get kwargs for :py:meth:`~lymph.types.Model.load_patient_data`.""" - return { - "patient_data": self.load(**(read_csv_kwargs or {})), - **self.model_dump(exclude={"source"}, exclude_none=True), - } - - -def check_pattern(value: PatternType) -> Any: - """Check if the value can be converted to a boolean value.""" - return {lnl: map_to_optional_bool(v) for lnl, v in value.items()} - - -class DiagnosisConfig(BaseModel): - """Defines an ipsi- and contralateral diagnosis pattern.""" - - ipsi: dict[str, Annotated[PatternType, AfterValidator(check_pattern)]] = Field( - default={}, - description="Observed diagnoses by different modalities on the ipsi neck.", - examples=[{"CT": {"II": True, "III": False}}], - ) - contra: dict[str, Annotated[PatternType, AfterValidator(check_pattern)]] = Field( - default={}, - description="Observed diagnoses by different modalities on the contra neck.", - ) - - def to_involvement(self, modality: str) -> InvolvementConfig: - """Convert the diagnosis pattern to an involvement pattern for ``modality``.""" - return InvolvementConfig( - ipsi=self.ipsi.get(modality, {}), - contra=self.contra.get(modality, {}), - ) - - -class DistributionConfig(BaseModel): - """Configuration defining a distribution over diagnose times.""" - - kind: Literal["frozen", "parametric"] = Field( - default="frozen", - description="Parametric distributions may be updated.", - ) - func: FuncNameType = Field( - default="binomial", - description="Name of predefined function to use as distribution.", - ) - params: dict[str, int | float] = Field( - default={}, - description="Parameters to pass to the predefined function.", - ) - - -class InvolvementConfig(BaseModel): - """Config that defines an ipsi- and contralateral involvement pattern.""" - - ipsi: Annotated[PatternType, AfterValidator(check_pattern)] = Field( - default={}, - description="Involvement pattern for the ipsilateral side of the neck.", - examples=[{"II": True, "III": False}], - ) - contra: Annotated[PatternType, AfterValidator(check_pattern)] = Field( - default={}, - description="Involvement pattern for the contralateral side of the neck.", - ) - - -def retrieve_graph_representation(model: Model) -> graph.Representation: - """Retrieve the graph representation from a model.""" - if hasattr(model, "graph"): - return model.graph - - if hasattr(model, "hpv"): - return retrieve_graph_representation(model.hpv) - - if hasattr(model, "ipsi"): - return retrieve_graph_representation(model.ipsi) - - if hasattr(model, "ext"): - return retrieve_graph_representation(model.ext) - - raise ValueError("Model does not have a graph representation.") - - -class GraphConfig(BaseModel): - """Specifies how the tumor(s) and LNLs are connected in a DAG.""" - - tumor: dict[str, list[str]] = Field( - description="Define the name of the tumor(s) and which LNLs it/they drain to.", - ) - lnl: dict[str, list[str]] = Field( - description="Define the name of the LNL(s) and which LNLs it/they drain to.", - ) - - @classmethod - def from_model(cls: type, model: Model) -> GraphConfig: - """Create a ``GraphConfig`` from a ``Model``.""" - graph = retrieve_graph_representation(model) - return cls( - tumor={ - name: [edge.child.name for edge in tumor.out] - for name, tumor in graph.tumors.items() - }, - lnl={ - name: [edge.child.name for edge in lnl.out] # noqa - for name, lnl in graph.lnls.items() - }, - ) - - -def has_model_symbol(path: Path) -> Path: - """Check if the Python file at ``path`` defines a symbol named ``model``.""" - spec = importlib.util.spec_from_file_location(path.stem, path) - module = importlib.util.module_from_spec(spec) - spec.loader.exec_module(module) - - if not hasattr(module, "model"): - raise ValueError(f"Python file at {path} does not define a symbol 'model'.") - - return path - - -def get_symmetry_kwargs(model: Model) -> dict[str, Any]: - """Get the symmetry kwargs from a model.""" - if isinstance(model, models.Unilateral | models.HPVUnilateral): - raise TypeError("Unilateral models do not have symmetry kwargs.") - - if hasattr(model, "ext"): - return get_symmetry_kwargs(model.ext) - - return getattr(model, "is_symmetric", {}) - - -class ModelConfig(BaseModel): - """Define which of the ``lymph`` models to use and how to set them up.""" - - external_file: Annotated[FilePath, AfterValidator(has_model_symbol)] | None = Field( - default=None, - description="Path to a Python file that defines a model.", - ) - class_name: Literal["Unilateral", "Bilateral", "Midline"] = Field( - default="Unilateral", - description="Name of the model class to use.", - ) - constructor: Literal["binary", "trinary"] = Field( - default="binary", - description="Trinary models differentiate btw. micro- and macroscopic disease.", - ) - max_time: int = Field( - default=10, - description="Max. number of time-steps to evolve the model over.", - ) - named_params: Sequence[str] = Field( - default=None, - description=( - "Subset of valid model parameters a sampler may provide in the form of a " - "dictionary to the model instead of as an array. Or, after sampling, with " - "this list, one may safely recover which parameter corresponds to which " - "index in the sample." - ), - ) - kwargs: dict[str, Any] = Field( - default={}, - description="Additional keyword arguments to pass to the model constructor.", - ) - - @classmethod - def from_model(cls: type, model: Model) -> ModelConfig: - """Create a ``ModelConfig`` from a ``Model``.""" - warnings.warn( - message=( - "Not all kwargs passed at initialization can be recovered into a " - "config. Make sure to manually double-check the config." - ), - category=UserWarning, - stacklevel=2, - ) - - if getattr(model, "_named_params", None): - additional_kwargs = {"named_params": list(model.named_params)} - else: - additional_kwargs = {} - - try: - additional_kwargs["is_symmetric"] = get_symmetry_kwargs(model) - except TypeError: - pass - - if isinstance(model, models.Midline): - additional_kwargs["use_midext_evo"] = model.use_midext_evo - additional_kwargs["use_central"] = hasattr(model, "_central") - additional_kwargs["use_mixing"] = hasattr(model, "mixing_param") - - if not hasattr(model, "_unknown"): - additional_kwargs["marginalize_unknown"] = False - - return cls( - class_name=model.__class__.__name__, - constructor="trinary" if model.is_trinary else "binary", - max_time=model.max_time, - kwargs=additional_kwargs, - ) - - -def modalityconfig_from_model(model: Model, modality_name: str) -> ModalityConfig: - """Create a ``ModalityConfig`` from a ``Model``.""" - modality = model.get_modality(modality_name) - return ModalityConfig( - spec=modality.spec, - sens=modality.sens, - kind="pathological" if isinstance(modality, Pathological) else "clinical", - ) - - -class DeprecatedModelConfig(BaseModel): - """Model configuration prior to ``lyscripts`` major version 1. - - This is implemented for backwards compatibility. Its sole job is to translate - the outdated settings format into the new one. Note that the only stuff that needs - to be translated is the model configuration itself and the distributions for - marginalization over diagnosis times. The :py:class:`~GraphConfig` is still - compatible. - """ - - first_binom_prob: float = Field( - description="Fixed parameter for first binomial dist over diagnosis times.", - ge=0.0, - le=1.0, - ) - max_t: int = Field( - description="Max. number of time-steps to evolve the model over.", - gt=0, - ) - t_stages: list[int | str] = Field( - description=( - "List of T-stages to marginalize over in the scenario. The old format " - "assumed all T-stages except the first one to be parametric. Only binomial " - "distributions are supported." - ), - ) - class_: Literal["Unilateral", "Bilateral", "Midline", "MidlineBilateral"] = Field( - description="Name of the model class. Only binary models are supported.", - alias="class", - ) - kwargs: dict[str, Any] = Field( - default={}, - description="Additional keyword arguments to pass to the model constructor.", - ) - - def model_post_init(self, __context): - """Issue a deprecation warning.""" - warnings.warn( - message="The 'DeprecatedModelConfig' is deprecated.", - category=DeprecationWarning, - stacklevel=2, - ) - if "Midline" in self.class_: - self.class_ = "Midline" - warnings.warn( - "Model may not be recreated as expected due to extra parameter " - "`midext_prob`. Make sure to manually handle edge cases.", - stacklevel=2, - ) - return super().model_post_init(__context) - - def translate(self) -> tuple[ModelConfig, dict[int | str, DistributionConfig]]: - """Translate the deprecated model config to the new format.""" - old_kwargs = self.kwargs.copy() - new_kwargs = {"use_midext_evo": False} if "Midline" in self.class_ else {} - - if (tumor_spread := old_kwargs.pop("base_symmetric")) is not None: - new_kwargs["is_symmetric"] = new_kwargs.get("is_symmetric", {}) - new_kwargs["is_symmetric"]["tumor_spread"] = tumor_spread - - if (lnl_spread := old_kwargs.pop("trans_symmetric")) is not None: - new_kwargs["is_symmetric"] = new_kwargs.get("is_symmetric", {}) - new_kwargs["is_symmetric"]["lnl_spread"] = lnl_spread - - new_kwargs.update(old_kwargs) - - model_config = ModelConfig( - class_name=self.class_, - constructor="binary", - max_time=self.max_t, - kwargs=new_kwargs, - ) - - distribution_configs = {} - for i, t_stage in enumerate(self.t_stages): - distribution_configs[t_stage] = DistributionConfig( - kind="frozen" if i == 0 else "parametric", - func="binomial", - params={"p": self.first_binom_prob}, - ) - - return model_config, distribution_configs - - -class SamplingConfig(BaseModel): - """Settings to configure the MCMC sampling.""" - - storage_file: Path = Field( - description="Path to HDF5 file store results or load last state.", - ) - history_file: Path | None = Field( - default=None, - description="Path to store the burn-in metrics (as CSV file).", - ) - dataset: str = Field( - default="mcmc", - description="Name of the dataset in the HDF5 file.", - ) - cores: int | None = Field( - gt=0, - default=os.cpu_count(), - description=( - "Number of cores to use for parallel sampling. If `None`, no parallel " - "processing is used." - ), - ) - seed: int = Field( - default=42, - description="Seed for the random number generator.", - ) - walkers_per_dim: int = Field( - default=20, - description="Number of walkers per parameter space dimension.", - ) - check_interval: int = Field( - default=50, - description="Check for convergence each time after this many steps.", - ) - trust_factor: float = Field( - default=50.0, - description=( - "Trust the autocorrelation time only when it's smaller than this factor " - "times the length of the chain." - ), - ) - relative_thresh: float = Field( - default=0.05, - description="Relative threshold for convergence.", - ) - burnin_steps: int | None = Field( - default=None, - description=( - "Number of burn-in steps to take. If None, burn-in runs until convergence." - ), - ) - num_steps: int | None = Field( - default=100, - description=("Number of steps to take in the MCMC sampling."), - ) - thin_by: int = Field( - default=10, - description="How many samples to draw before for saving one.", - ) - inverse_temp: float = Field( - default=1.0, - description=( - "Inverse temperature for thermodynamic integration. Note that this is not " - "yet fully implemented." - ), - ) - - def load(self, thin: int = 1) -> np.ndarray: - """Load the samples from the HDF5 file. - - Note that the ``thin`` represents another round of thinning and is usually - not necessary if the samples were already thinned during the sampling process. - """ - return load_model_samples( - file_path=self.storage_file, - name=self.dataset, - thin=thin, - ) - - -def geometric_schedule(num: int, *_a) -> np.ndarray: - """Create a geometric sequence of ``num`` numbers from 0 to 1.""" - log_seq = np.logspace(0.0, 1.0, num) - shifted_seq = log_seq - 1.0 - return shifted_seq / 9.0 - - -def linear_schedule(num: int, *_a) -> np.ndarray: - """Create a linear sequence of ``num`` numbers from 0 to 1. - - Equivalent to the :py:func:`power_schedule` with ``power=1``. - """ - return np.linspace(0.0, 1.0, num) - - -def power_schedule(num: int, power: float, *_a) -> np.ndarray: - """Create a power sequence of ``num`` numbers from 0 to 1. - - This is essentially a :py:func:`linear_schedule` of ``num`` numbers from 0 to 1, - but each number is raised to the power of ``power``. - """ - lin_seq = np.linspace(0.0, 1.0, num) - return lin_seq**power - - -SCHEDULES = { - "geometric": geometric_schedule, - "linear": linear_schedule, - "power": power_schedule, -} - - -class ScheduleConfig(BaseModel): - """Configuration for generating a schedule of inverse temperatures.""" - - method: Literal["geometric", "linear", "power"] = Field( - default="power", - description="Method to generate the inverse temperature schedule.", - ) - num: int = Field( - default=32, - description="Number of inverse temperatures in the schedule.", - ) - power: float = Field( - default=4.0, - description="If a power schedule is chosen, use this as power.", - ) - values: list[float] | None = Field( - default=None, - description=( - "List of inverse temperatures to use instead of generating a schedule. " - "If a list is provided, the other parameters are ignored." - ), - ) - - def get_schedule(self) -> np.ndarray: - """Get the inverse temperature schedule as a numpy array.""" - if self.values is not None: - logger.debug("Using provided inverse temperature values.") - schedule = np.array(self.values) - else: - logger.debug(f"Generating inverse temperature schedule with {self.method}.") - func = SCHEDULES[self.method] - schedule = func(self.num, self.power) - - logger.info(f"Generated inverse temperature schedule: {schedule}") - return schedule - - -def map_to_optional_bool(value: Any) -> Any: - """Try to convert the options in the `PatternType` to a boolean value.""" - if value in [True, "involved", 1]: - return True - - if value in [False, "healthy", 0]: - return False - - return value - - -class ScenarioConfig(BaseModel): - """Define a scenario for which e.g. prevalences and risks may be computed.""" - - t_stages: list[int | str] = Field( - description="List of T-stages to marginalize over in the scenario.", - examples=[["early"], [3, 4]], - ) - t_stages_dist: list[float] = Field( - default=[1.0], - description="Distribution over T-stages to use for marginalization.", - examples=[[1.0], [0.6, 0.4]], - ) - midext: bool | None = Field( - default=None, - description="Whether the patient's tumor extends over the midline.", - ) - mode: Literal["HMM", "BN"] = Field( - default="HMM", - description="Which underlying model architecture to use.", - ) - involvement: InvolvementConfig = InvolvementConfig() - diagnosis: DiagnosisConfig = DiagnosisConfig() - - def model_post_init(self, __context: Any) -> None: - """Interpolate and normalize the distribution.""" - self.interpolate() - self.normalize() - - def interpolate(self): - """Interpolate the distribution to the number of ``t_stages``.""" - if len(self.t_stages) != len(self.t_stages_dist): - new_x = np.linspace(0.0, 1.0, len(self.t_stages)) - old_x = np.linspace(0.0, 1.0, len(self.t_stages_dist)) - # cast to list to make ``__eq__`` work - self.t_stages_dist = np.interp(new_x, old_x, self.t_stages_dist).tolist() - - def normalize(self): - """Normalize the distribution to sum to 1.""" - if not np.isclose(np.sum(self.t_stages_dist), 1.0): - self.t_stages_dist = ( - np.array(self.t_stages_dist) / np.sum(self.t_stages_dist) - ).tolist() # cast to list to make ``__eq__`` work - - -def _construct_model_from_external(path: Path) -> Model: - """Construct a model from a Python file.""" - module_name = path.stem - spec = importlib.util.spec_from_file_location(module_name, path) - module = importlib.util.module_from_spec(spec) - spec.loader.exec_module(module) - logger.info(f"Loaded model from {path}. This ignores model and graph configs.") - return module.model - - -def construct_model( - model_config: ModelConfig, - graph_config: GraphConfig, -) -> Model: - """Construct a model from a ``model_config``. - - The default/expected use of this is to specify a model class from the - `lymph`_ package and pass the necessary arguments to its constructor. - However, it is also possible to load a model from an external Python file via the - ``external`` attribute of the ``model_config`` argument. In this case, a symbol - with name ``model`` must be defined in the file that is to be loaded. - - .. note:: - - No check is performed on the model's compatibility with the command/pipeline - it is used in. It is assumed the model complies with the - :py:class:`model type ` specifications of the `lymph`_ - package. - - .. _lymph: https://lymph-model.readthedocs.io/stable/ - """ - if model_config.external_file is not None: - return _construct_model_from_external(model_config.external_file) - - cls = getattr(models, model_config.class_name) - constructor = getattr(cls, model_config.constructor) - model = constructor( - graph_dict=flatten(graph_config.model_dump()), - max_time=model_config.max_time, - named_params=model_config.named_params, - **model_config.kwargs, - ) - logger.info(f"Constructed model: {model}") - return model - - -def add_distributions( - model: Model, - configs: dict[str | int, DistributionConfig], - mapping: dict[FuncNameType, Callable] | None = None, - inplace: bool = False, -) -> Model: - """Construct and add distributions over diagnose times to a ``model``.""" - if not inplace: - model = deepcopy(model) - logger.debug("Created deepcopy of model.") - - mapping = mapping or DIST_MAP - - for t_stage, dist_config in configs.items(): - if dist_config.kind == "frozen": - support = np.arange(model.max_time + 1) - dist = mapping[dist_config.func](support, **dist_config.params) - elif dist_config.kind == "parametric": - dist = mapping[dist_config.func] - else: - raise ValueError(f"Unknown distribution kind: {dist_config.kind}") - - model.set_distribution(t_stage, dist) - if dist_config.kind == "parametric" and dist_config.params: - params = {f"{t_stage}_{k}": v for k, v in dist_config.params.items()} - model.set_params(**params) - - logger.debug(f"Set {dist_config.kind} distribution for '{t_stage}': {dist}") - - logger.info(f"Added {len(configs)} distributions to model: {model}") - return model - - -def add_modalities( - model: Model, - modalities: dict[str, ModalityConfig], - inplace: bool = False, -) -> Model: - """Add ``modalities`` to a ``model``.""" - if not inplace: - model = deepcopy(model) - logger.debug("Created deepcopy of model.") - - for modality, modality_config in modalities.items(): - model.set_modality(modality, **modality_config.model_dump()) - logger.debug(f"Added modality {modality} to model: {modality_config}") - - logger.info(f"Added {len(modalities)} modalities to model: {model}") - return model - - -def add_data( - model: Model, - path: Path, - side: Literal["ipsi", "contra"], - mapping: dict[Literal[0, 1, 2, 3, 4], int | str] | None = None, - inplace: bool = False, -) -> Model: - """Add data to a ``model``.""" - data = pd.read_csv(path, header=[0, 1, 2]) - logger.debug(f"Loaded data from {path}: Shape: {data.shape}") - - kwargs = {"patient_data": data, "mapping": mapping} - if isinstance(model, models.Unilateral): - kwargs["side"] = side - - if not inplace: - model = deepcopy(model) - logger.debug("Created deepcopy of model.") - - model.load_patient_data(**kwargs) - logger.info(f"Added data to model: {model}") - return model - - -PathType = Path | str | Sequence[Path | str] - - -class DynamicYamlConfigSettingsSource(YamlConfigSettingsSource): - """YAML config source that allows dynamic file path specification. - - This is heavily inspired by `this comment`_ in the discussion on a related issue - of the `pydantic-settings`_ GitHub repository. - - Essentially, this little hack allows a user to specify a one or multiple YAML files - from which the CLI should read configurations. Normally, `pydantic-settings` only - allows hard-coding the location of these config files. - - .. _this comment: https://github.com/pydantic/pydantic-settings/issues/259#issuecomment-2549444286 - .. _pydantic-settings: https://github.com/pydantic/pydantic-settings - """ - - def __init__( - self, - settings_cls, - yaml_file: PathType | None = DEFAULT_PATH, - yaml_file_encoding: str | None = None, - yaml_file_path_field: str = "configs", - ) -> None: - """Allow getting the YAML file path from any key in the current state. - - The argument ``yaml_file_path_field`` should be the :py:class:`BaseSettings` - field that contains the path(s) to the YAML file(s). - - Note that all config files must have a ``version: 1`` key in them to be - recognized as valid config files. - """ - self.yaml_file_path_field = yaml_file_path_field - super().__init__(settings_cls, yaml_file, yaml_file_encoding) - - def _read_file(self, file_path: Path) -> dict[str, Any]: - """Read the YAML and raise exception when ``version: 1`` not found.""" - with open(file_path, encoding=self.yaml_file_encoding) as yaml_file: - data = yaml.safe_load(yaml_file) or {} - if data.get("version") != 1: - raise ValueError( - f"Config file {file_path} does not have a 'version: 1' key. " - "For compatibility reasons, all config files must have this key.", - ) - return data - - def __call__(self) -> dict[str, Any]: - """Reload the config files from the paths in the current state.""" - yaml_file_to_reload = self.current_state.get( - self.yaml_file_path_field, - self.yaml_file_path, - ) - logger.debug(f"Reloading YAML files from {yaml_file_to_reload} (if it exists).") - self.__init__( - settings_cls=self.settings_cls, - yaml_file=yaml_file_to_reload, - yaml_file_encoding=self.yaml_file_encoding, - yaml_file_path_field=self.yaml_file_path_field, - ) - return super().__call__() - - def __repr__(self) -> str: - """Return a string representation of the source.""" - return ( - self.__class__.__name__ - + "(" - + f"yaml_file={self.yaml_file_path!r}, " - + f"yaml_file_encoding={self.yaml_file_encoding!r}, " - + f"yaml_file_path_field={self.yaml_file_path_field!r}" - + ")" - ) - - -class BaseCLI(BaseSettings): - """Base settings class for all CLI scripts to inherit from.""" - - model_config = ConfigDict(yaml_file="config.yaml", extra="ignore") - - configs: list[Path] = Field( - default=["config.yaml"], - description=( - "Path to the YAML file(s) that contain the configuration(s). Configs from " - "YAML files may be overwritten by command line arguments. When multiple " - "files are specified, the configs are merged in the order they are given. " - "Note that every config file must have a `version: 1` key in it." - ), - ) - - @classmethod - def settings_customise_sources( - cls, - settings_cls: type[BaseSettings], - init_settings: PydanticBaseSettingsSource, - env_settings: PydanticBaseSettingsSource, - dotenv_settings: PydanticBaseSettingsSource, - file_secret_settings: PydanticBaseSettingsSource, - ) -> tuple[PydanticBaseSettingsSource, ...]: - """Add the dynamic YAML config source to the CLI settings.""" - dynamic_yaml_config_source = DynamicYamlConfigSettingsSource( - settings_cls=settings_cls, - yaml_file_path_field="configs", - yaml_file_encoding="utf-8", - ) - logger.debug(f"Created {dynamic_yaml_config_source = }") - return ( - init_settings, - env_settings, - dotenv_settings, - file_secret_settings, - dynamic_yaml_config_source, - ) diff --git a/src/lyscripts/data/__init__.py b/src/lyscripts/data/__init__.py deleted file mode 100644 index 29ffe69..0000000 --- a/src/lyscripts/data/__init__.py +++ /dev/null @@ -1,53 +0,0 @@ -"""Commands and functions for managing CSV data on patterns of lymphatic progression. - -This contains helpful CLI commands that allow building quick and reproducible workflows -even when using language-agnostic tools like `Make`_ or `DVC`_. - -Most of these commands can load `LyProX`_ style data from CSV files, but also from -the installed datasets provided by the `lydata`_ package and directly from the -associated `GitHub repository`_. - -Another cool feature is the built-in mini web application that allows collecting nodal -involvement data interactively and in the same standardized format as we have published -in the past, both on `LyProX`_ and in our `GitHub repository`_. It can be launched by -running `lyscripts data collect` in the terminal. See the docs for the -:py:mod:`lyscripts.data.collect` submodule on more information. - -.. _Make: https://www.gnu.org/software/make/ -.. _DVC: https://dvc.org -.. _LyProX: https://lyprox.org -.. _lydata: https://lydata.readthedocs.io -.. _GitHub repository: https://github.com/lycosystem/lydata -""" - -from pydantic_settings import BaseSettings, CliApp, CliSubCommand - -from lyscripts.data import ( # noqa: F401 - collect, - enhance, - fetch, - generate, - join, - lyproxify, - split, -) - -# Avoid conflict with built-in `filter` function -from lyscripts.data import filter as filter_ - - -class DataCLI(BaseSettings): - """Work with lymphatic progression data through this CLI.""" - - collect: CliSubCommand[collect.CollectorCLI] - lyproxify: CliSubCommand[lyproxify.LyproxifyCLI] - join: CliSubCommand[join.JoinCLI] - split: CliSubCommand[split.SplitCLI] - fetch: CliSubCommand[fetch.FetchCLI] - filter: CliSubCommand[filter_.FilterCLI] - enhance: CliSubCommand[enhance.EnhanceCLI] - generate: CliSubCommand[generate.GenerateCLI] - - def cli_cmd(self) -> None: - """Run one of the ``data`` subcommands.""" - CliApp.run_subcommand(self) diff --git a/src/lyscripts/data/__main__.py b/src/lyscripts/data/__main__.py deleted file mode 100644 index a78ddd1..0000000 --- a/src/lyscripts/data/__main__.py +++ /dev/null @@ -1,36 +0,0 @@ -"""Run the data module as a script.""" - -import argparse - -from lyscripts import exit_cli -from lyscripts.cli import RichDefaultHelpFormatter -from lyscripts.data import enhance, generate, join, split - -# Avoid conflict with built-in `filter` function -from lyscripts.data import filter as filter_ - - -def main(args: argparse.Namespace): - """Run the main script.""" - parser = argparse.ArgumentParser( - prog="lyscripts data", - description=__doc__, - formatter_class=RichDefaultHelpFormatter, - ) - parser.set_defaults(run_main=exit_cli) - subparsers = parser.add_subparsers() - - # the individual scripts add `ArgumentParser` instances and their arguments to - # this `subparsers` object - enhance._add_parser(subparsers, help_formatter=parser.formatter_class) - generate._add_parser(subparsers, help_formatter=parser.formatter_class) - join._add_parser(subparsers, help_formatter=parser.formatter_class) - split._add_parser(subparsers, help_formatter=parser.formatter_class) - filter_._add_parser(subparsers, help_formatter=parser.formatter_class) - - args = parser.parse_args() - args.run_main(args, parser) - - -if __name__ == "__main__": - main() diff --git a/src/lyscripts/data/collect/__init__.py b/src/lyscripts/data/collect/__init__.py deleted file mode 100644 index 425c728..0000000 --- a/src/lyscripts/data/collect/__init__.py +++ /dev/null @@ -1,150 +0,0 @@ -"""Submodule to collect data interactively using a simple web interface. - -With the simple command - -.. code-block:: bash - - lyscripts data collect - -One can start a very basic web server that serves an interactive UI at -``http://localhost:8000/``. There, one can enter patient, tumor, and lymphatic -involvement data one by one. When completed, the "submit" button will parse, validate, -and convert the data to serve a downloadable CSV file. - -The resulting CSV file is in the correct format to be used in `LyProX`_ and for -inference using our `lymph-model`_ library. - -.. _LyProX: https://lyprox.org -.. _lymph-model: https://lymph-model.readthedocs.io -""" - -import io -import logging -from pathlib import Path -from typing import Any - -import lydata -import lydata.validator -import pandas as pd -from fastapi import FastAPI, HTTPException -from fastapi.responses import StreamingResponse -from loguru import logger -from pydantic import Field, RootModel -from starlette.responses import FileResponse, HTMLResponse - -from lyscripts.cli import InterceptHandler, _current_log_level -from lyscripts.configs import BaseCLI - -app = FastAPI( - title="lyDATA Collector", - description=( - "A simple web interface to collect data for the lyDATA datasets. " - "This is a prototype and not intended for production use." - ), - version=lydata.__version__, -) - -BASE_DIR = Path(__file__).parent -modalities = lydata.schema.get_default_modalities() -RecordModel = lydata.schema.create_full_record_model(modalities, model_name="Record") -ROOT_MODEL = RootModel[list[RecordModel]] - - -@app.get("/") -def serve_index_html() -> HTMLResponse: - """Serve the ``index.html`` file at the URL's root.""" - with open(BASE_DIR / "index.html") as file: - content = file.read() - return HTMLResponse(content=content) - - -@app.get("/schema") -def serve_schema() -> dict[str, Any]: - """Serve the JSON schema for the patient and tumor records.""" - return ROOT_MODEL.model_json_schema() - - -@app.get("/collector.js") -def serve_collector_js() -> FileResponse: - """Serve the ``collector.js`` file under ``"http://{host}:{port}/collector.js"``. - - This frontend JavaScript file loads the `JSON-Editor`_ library and initializes it - using the schema returned by the :py:func:`serve_schema` function. - - .. _JSON-Editor: https://github.com/json-editor/json-editor/ - """ - return FileResponse(BASE_DIR / "collector.js") - - -@app.post("/submit") -async def process(data: RootModel) -> StreamingResponse: - """Process the submitted data to a DataFrame. - - `FastAPI`_ will automatically parse the received JSON data into the list of - instances of he pydantic type defined by the - :py:func:`lydata.schema.create_full_record_model` function. - - From this list, we create a pandas DataFrame and return it as a downloadable CSV - file. - - .. _FastAPI: https://fastapi.tiangolo.com/ - """ - logger.info(f"Received data: {data.root}") - - if len(data.root) == 0: - logger.warning("No records provided in the data.") - raise HTTPException( - status_code=400, - detail="No records provided in the data.", - ) - - flattened_records = [] - - for record in data.root: - flattened_record = lydata.validator.flatten(record) - logger.debug(f"Flattened record: {flattened_record}") - flattened_records.append(flattened_record) - - df = pd.DataFrame(flattened_records) - df.columns = pd.MultiIndex.from_tuples(flattened_record.keys()) - logger.info(df.patient.core.head()) - - buffer = io.StringIO() - df.to_csv(buffer, index=False) - buffer.seek(0) - logger.success("Data prepared for download") - return StreamingResponse( - buffer, - media_type="text/csv", - headers={"Content-Disposition": "attachment; filename=lydata_records.csv"}, - ) - - -class CollectorCLI(BaseCLI): - """Serve a FastAPI web app for collecting involvement patterns as CSV files.""" - - hostname: str = Field( - default="localhost", - description="Hostname to run the FastAPI app on.", - ) - port: int = Field( - default=8000, - description="Port to run the FastAPI app on.", - ) - - def cli_cmd(self) -> None: - """Run the FastAPI app.""" - logger.debug(self.model_dump_json(indent=2)) - import uvicorn - - # Intercept standard logging and redirect it to Loguru - logging.basicConfig(handlers=[InterceptHandler()], level=0, force=True) - logger.enable("lydata") - - uvicorn.run( - app, - host=self.hostname, - port=self.port, - log_level=_current_log_level.lower(), - log_config=None, - ) diff --git a/src/lyscripts/data/collect/collector.js b/src/lyscripts/data/collect/collector.js deleted file mode 100644 index 3d8d5d9..0000000 --- a/src/lyscripts/data/collect/collector.js +++ /dev/null @@ -1,151 +0,0 @@ -/** - * Client-side helper functions for collecting user input through JSONEditor, - * validating it against a fetched JSON Schema, submitting the validated data - * to the backend, and presenting a downloadable CSV returned by the server. - * - * NOTE: Functionality is intentionally unchanged; only readability and - * documentation have been improved. - */ - -/** - * Ensure an alert element (used to display validation errors) exists. - * Creates and appends it if missing. - * - * @returns {HTMLDivElement} The existing or newly created alert element. - */ -function ensureAlertExists() { - let alertElement = document.querySelector('.alert'); - if (!alertElement) { - alertElement = document.createElement('div'); - } - alertElement.className = 'alert alert-danger'; - const editorHolder = document.getElementById('editor_holder'); - editorHolder.appendChild(alertElement); - return alertElement; -} - -/** - * Remove an existing validation alert if present. - */ -function ensureAlertRemoved() { - const existingAlert = document.querySelector('.alert'); - if (existingAlert) { - console.log('Clearing existing alert'); - existingAlert.remove(); - } -} - -/** - * Remove an existing download button (if it exists) to avoid duplicates. - */ -function ensureDownloadButtonRemoved() { - const existingButton = document.getElementById('download_link'); - if (existingButton) { - console.log('Clearing existing download button'); - existingButton.remove(); - } -} - -/** - * Create (or replace) a download button for a CSV blob returned by the server. - * - * @param {Blob} blob - The CSV data blob to make downloadable. - */ -function createDownloadButton(blob) { - ensureDownloadButtonRemoved(); - - const url = window.URL.createObjectURL(blob); - const downloadLink = document.createElement('a'); - downloadLink.id = 'download_link'; - downloadLink.href = url; - downloadLink.textContent = 'Download CSV'; - downloadLink.className = 'btn btn-success'; - downloadLink.download = 'lydata_records.csv'; - - document.getElementById('editor_holder').appendChild(downloadLink); - console.log('Download button created:', downloadLink); -} - -/** - * Send validated editor data to the backend for processing. Expects a CSV blob - * in response which is then exposed via a generated download button. - * - * @param {JSONEditor} editor - The JSONEditor instance from which to read data. - */ -async function sendEditorData(editor) { - const data = editor.getValue(); - console.log('Sending data:', data); - - try { - const response = await fetch('/submit', { - method: 'POST', - headers: { 'Content-Type': 'application/json' }, - body: JSON.stringify(data) - }); - - if (!response.ok) { - // Try to parse error details from JSON, fallback to text - let errorMsg = 'Unknown error'; - try { - const err = await response.json(); - errorMsg = err.detail || err.message || errorMsg; - } catch { - errorMsg = await response.text(); - } - throw new Error(errorMsg); - } - - const blob = await response.blob(); - console.log('Received processed data as blob:', blob); - createDownloadButton(blob); - } catch (error) { - ensureDownloadButtonRemoved(); - console.error('Error submitting data:', error); - const alert = ensureAlertExists(); - alert.textContent = 'Error submitting data: ' + error.message; - alert.classList.add('alert-danger'); - } -} - -/** - * Validate the editor content. If there are validation errors they are - * displayed in an alert; otherwise the data is submitted to the backend. - * - * @param {JSONEditor} editor - The JSONEditor instance to validate & submit. - */ -function processEditor(editor) { - const errors = editor.validate(); - - if (errors.length) { - console.error('Validation errors:', errors); - const alert = ensureAlertExists(); - alert.textContent = 'Validation errors: ' + errors.map(e => e.message).join(', '); - } else { - console.log('Data successfully validated'); - ensureAlertRemoved(); - sendEditorData(editor); - } -} - -// Fetch the JSON Schema to initialize the editor -fetch('/schema') - .then(response => response.json()) - .then(schema => { - const element = document.getElementById('editor_holder'); - const options = { - disable_edit_json: true, - theme: 'bootstrap5', - iconlib: 'bootstrap', - object_layout: 'grid', - disable_properties: true, - schema: schema - }; - const editor = new JSONEditor(element, options); - - // Bind the submit button to validation + submission flow - document.getElementById('submit').addEventListener('click', () => { - console.log('Submit button clicked'); - processEditor(editor); - }); - }) - .catch(error => console.error('Error loading schema:', error)); diff --git a/src/lyscripts/data/collect/index.html b/src/lyscripts/data/collect/index.html deleted file mode 100644 index 931dacf..0000000 --- a/src/lyscripts/data/collect/index.html +++ /dev/null @@ -1,26 +0,0 @@ - - - - - - Basic JSON Editor Example - - - - - - - - -
-

LyDATA Collector

- -
- - - -
- - - diff --git a/src/lyscripts/data/enhance.py b/src/lyscripts/data/enhance.py deleted file mode 100644 index 87b1e23..0000000 --- a/src/lyscripts/data/enhance.py +++ /dev/null @@ -1,53 +0,0 @@ -"""Enhance the dataset by inferring additional columns from the data. - -This is a command-line interface to the methods -:py:meth:`~lydata.accessor.LyDataAccessor.combine` and -:py:meth:`~lydata.accessor.LyDataAccessor.augment` of the -:py:class:`~lydata.accessor.LyDataAccessor` class. -""" - -from typing import Literal - -from loguru import logger -from lydata.accessor import LyDataFrame -from lydata.utils import ModalityConfig - -from lyscripts.cli import assemble_main -from lyscripts.configs import BaseCLI, DataConfig -from lyscripts.data.utils import save_table_to_csv - - -class EnhanceCLI(BaseCLI): - """Enhance the dataset by inferring additional columns from the data.""" - - input: DataConfig - modalities: dict[str, ModalityConfig] | None = None - method: Literal["max_llh", "rank"] = "max_llh" - lnl_subdivisions: dict[str, list[str]] = { - "I": ["a", "b"], - "II": ["a", "b"], - "V": ["a", "b"], - } - output_file: str - - def cli_cmd(self) -> None: - """Infer additional columns from the data and save the enhanced dataset. - - This basically provides a CLI to the - :py:func:`~lydata.accessor.LyDataAccessor.augment` function. See its docs for - more details on what exactly is happening here. - """ - logger.debug(self.model_dump_json(indent=2)) - - data: LyDataFrame = self.input.load() - data = data.ly.enhance( - modalities=self.modalities, - method=self.method, - subdivisions=self.lnl_subdivisions, - ) - save_table_to_csv(file_path=self.output_file, table=data) - - -if __name__ == "__main__": - main = assemble_main(settings_cls=EnhanceCLI, prog_name="enhance") - main() diff --git a/src/lyscripts/data/fetch.py b/src/lyscripts/data/fetch.py deleted file mode 100644 index dccbf87..0000000 --- a/src/lyscripts/data/fetch.py +++ /dev/null @@ -1,57 +0,0 @@ -"""Small command to fetch the data from a remote using the lydata package.""" - -from pathlib import Path - -import lydata # noqa: F401 -from loguru import logger -from lydata.loader import LyDataset -from pydantic import Field - -from lyscripts.cli import assemble_main -from lyscripts.configs import BaseCLI - - -class FetchCLI(LyDataset, BaseCLI): - """Fetch a specific dataset from the lyDATA repository.""" - - github_token: str | None = Field( - default=None, - description=( - "GitHub token to access private datasets. Can also be provided as " - "`GITHUB_TOKEN` environment variable." - ), - ) - github_user: str | None = Field( - default=None, - description=( - "GitHub user for non-token login. Can also be provided as " - "`GITHUB_USER` environment variable." - ), - ) - github_password: str | None = Field( - default=None, - description=( - "GitHub password for non-token login. Can also be provided as " - "`GITHUB_PASSWORD` environment variable." - ), - ) - output_file: Path = Field(description="The path to save the dataset to.") - - def cli_cmd(self): - """Execute the ``fetch`` command.""" - logger.enable("lydata") - logger.debug(self.model_dump_json(indent=2)) - - dataset = self.get_dataframe( - use_github=True, - token=self.github_token, - user=self.github_user, - password=self.github_password, - ) - dataset.to_csv(self.output_file, index=False) - logger.success(f"Fetched dataset and saved to {self.output_file}") - - -if __name__ == "__main__": - main = assemble_main(settings_cls=FetchCLI, prog_name="fetch") - main() diff --git a/src/lyscripts/data/filter.py b/src/lyscripts/data/filter.py deleted file mode 100644 index 3e09fe8..0000000 --- a/src/lyscripts/data/filter.py +++ /dev/null @@ -1,99 +0,0 @@ -"""Filter a dataset according to some common criteria. - -This is essentially a command line interface to building a -:py:class:`query object ` and applying it to the dataset. -""" - -from pathlib import Path -from typing import Literal - -from loguru import logger -from lydata import Q -from pydantic import Field -from pydantic_settings import CliImplicitFlag - -from lyscripts.cli import assemble_main -from lyscripts.configs import BaseCLI, DataConfig -from lyscripts.data.utils import save_table_to_csv - - -class FilterCLI(BaseCLI): - """In- or exclude patients where a certain column fulfills a certain condition.""" - - input: DataConfig - include: CliImplicitFlag[bool] = Field( - False, - description="Include patients where the condition is met (default: exclude).", - ) - column: list[str] | str = Field( - description=( - "The column to filter by. May be a tuple of three strings, since data " - "has a three-level header. If it is only one string, the lydata package " - "tries to map that to a three-level header." - ), - ) - operator: Literal["==", "!=", ">", "<", ">=", "<=", "in", "contains"] = Field( - description="The operator to use for comparison.", - ) - value: float | int | str = Field(description="The value to compare against.") - output_file: Path = Field(description="The path to save the filtered dataset to.") - - def model_post_init(self, __context): - """Cast to ``float``, if not possible ``int``, if not possible ``str``.""" - if isinstance(self.column, list): - if len(self.column) == 1: - self.column = self.column[0] - elif len(self.column) == 3: - self.column = tuple(self.column) - else: - raise ValueError( - "The column attribute must be an iterable of three strings or a " - f"single string, but it is {self.column}.", - ) - - try: - self.value = float(self.value) - return super().model_post_init(__context) - except ValueError: - pass - - try: - self.value = int(self.value) - return super().model_post_init(__context) - except ValueError: - pass - - return super().model_post_init(__context) - - def cli_cmd(self): - """Execute the ``filter`` command. - - This command uses the :py:class:`~lydata.querier.Q` objects of the `lydata`_ - library to filter the dataset according to the given criteria. - - .. _lydata: https://lydata.readthedocs.io - """ - logger.debug(self.model_dump_json(indent=2)) - - data = self.input.load() - query = Q( - column=self.column, - operator=self.operator, - value=self.value, - ) - logger.debug(f"Created query object: {query}") - mask = query.execute(data) - - if self.include: - filtered = data[mask] - logger.info(f"Keeping {sum(mask)} of {len(data)} patients.") - else: - filtered = data[~mask] - logger.info(f"Excluding {sum(mask)} of {len(data)} patients.") - - save_table_to_csv(file_path=self.output_file, table=filtered) - - -if __name__ == "__main__": - main = assemble_main(settings_cls=FilterCLI, prog_name="filter") - main() diff --git a/src/lyscripts/data/generate.py b/src/lyscripts/data/generate.py deleted file mode 100644 index 8dadf03..0000000 --- a/src/lyscripts/data/generate.py +++ /dev/null @@ -1,96 +0,0 @@ -"""Script to generate a synthetic dataset. - -The generation is done by the :py:meth:`~lymph.models.Unilateral.draw_patients` method -of -the `lymph`_ package, which is why this requires the specification of a model -via the :py:class:`~lyscripts.configs.ModelConfig` class. - -.. _lymph: https://lymph-model.readthedocs.io/ -""" - -import numpy as np -from loguru import logger -from lydata.utils import ModalityConfig -from pydantic import Field - -from lyscripts.cli import assemble_main -from lyscripts.configs import ( - BaseCLI, - DistributionConfig, - GraphConfig, - ModelConfig, - add_distributions, - add_modalities, - construct_model, -) -from lyscripts.data.utils import save_table_to_csv - - -class GenerateCLI(BaseCLI): - """Settings for the command-line interface.""" - - graph: GraphConfig - model: ModelConfig = ModelConfig() - distributions: dict[str, DistributionConfig] = Field( - default={}, - description=( - "Mapping of model T-categories to predefined distributions over " - "diagnose times." - ), - ) - t_stages_dist: dict[str, float] = Field( - description=( - "Specify what fraction of generated patients should come from the " - "respective T-Stage." - ), - ) - modalities: dict[str, ModalityConfig] - params: dict[str, float] - num_patients: int = 200 - output_file: str - seed: int = 42 - - def model_post_init(self, __context) -> None: - """Make sure distribution over T-stages is normalized.""" - total = 0.0 - for t_stage in self.distributions: - if t_stage not in self.t_stages_dist: - raise ValueError(f"Missing distribution for T-stage {t_stage}.") - - total += self.t_stages_dist[t_stage] - - if not np.isclose(total, 1.0): - raise ValueError("Sum of T-stage distributions must be 1.") - - return super().model_post_init(__context) - - def cli_cmd(self) -> None: - """Run the ``generate`` command. - - Here, the command constructs a model from the settings provided via the - arguments. It then generates a synthetic dataset using the - :py:meth:`~lymph.models.Unilateral.draw_patients` from the `lymph`_ package. - - .. _lymph: https://lymph-model.readthedocs.io/ - """ - logger.debug(self.model_dump_json(indent=2)) - - model = construct_model(self.model, self.graph) - model = add_distributions(model, self.distributions) - model = add_modalities(model, self.modalities) - model.set_params(**self.params) - logger.info(f"Set parameters: {model.get_params(as_dict=True)}") - - synth_data = model.draw_patients( - num=self.num_patients, - stage_dist=list(self.t_stages_dist.values()), - seed=self.seed, - ) - logger.info(f"Generated synthetic data with shape {synth_data.shape}") - - save_table_to_csv(file_path=self.output_file, table=synth_data) - - -if __name__ == "__main__": - main = assemble_main(settings_cls=GenerateCLI, prog_name="data generate") - main() diff --git a/src/lyscripts/data/join.py b/src/lyscripts/data/join.py deleted file mode 100644 index 8b0547e..0000000 --- a/src/lyscripts/data/join.py +++ /dev/null @@ -1,82 +0,0 @@ -"""Join multiple lymphatic progression datasets into a single dataset.""" - -from pathlib import Path - -import pandas as pd -from lydata.validator import cast_dtypes -from pydantic import Field - -from lyscripts.cli import assemble_main -from lyscripts.configs import BaseCLI, DataConfig -from lyscripts.data.utils import save_table_to_csv - - -class JoinCLI(BaseCLI): - """Join multiple lymphatic progression datasets into a single dataset.""" - - inputs: list[DataConfig] = Field(description="The datasets to join.") - output_file: Path = Field(description="The path to the output dataset.") - - def cli_cmd(self) -> None: - r"""Start the ``join`` subcommand. - - This will load all datasets specified in the ``inputs`` attribute and - concatenate them into a single dataset. - - Unfortunately, the use of `pydantic`_ does make this particular command a - little bit more complicated (but also more powerful): If one simply wants to - concatenate multiple datasets on disk, the ``inputs`` should be provided like - this: - - .. code-block:: bash - - lyscripts data join \ - --inputs '{"source": "file1.csv"}' \ - --inputs '{"source": "file2.csv"}' \ - --output-file "joined.csv" - - But it also allows for concatenating datasets fetched directly from the - `lydata Github repo`_. Due to the rather complex command signature, we - recommend defining what to concatenate using a YAML file: - - .. code-block:: yaml - - inputs: - - data.year: 2021 - data.institution: "usz" - data.subsite: "oropharynx" - - data.year: 2021 - data.institution: "clb" - data.subsite: "oropharynx" - - Then, the command will look like this: - - .. code-block:: bash - - lyscripts data join --configs datasets.ly.yaml --output-file joined.csv - - .. _pydantic: https://docs.pydantic.dev/latest/ - .. _lydata Github repo: https://github.com/lycosystem/lydata - """ - joined = None - - for data_config in self.inputs: - data = data_config.load() - # `cast_dtypes()` ensures that e.g. boolean values are not suddenly - # converted to strings when a dataset with missing values is concatenated. - data = cast_dtypes(data) - if joined is None: - joined = data - else: - joined = pd.concat( - [joined, data], - axis="index", - ignore_index=True, - ) - - save_table_to_csv(file_path=self.output_file, table=joined) - - -if __name__ == "__main__": - main = assemble_main(settings_cls=JoinCLI, prog_name="join") - main() diff --git a/src/lyscripts/data/lyproxify.py b/src/lyscripts/data/lyproxify.py deleted file mode 100644 index d810824..0000000 --- a/src/lyscripts/data/lyproxify.py +++ /dev/null @@ -1,341 +0,0 @@ -"""Consumes raw data and transforms it into a CSV that `LyProX`_ understands. - -To do so, it needs a dictionary that defines a mapping from raw columns to the LyProX -style data format. See the documentation of the :py:func:`.transform_to_lyprox` function -for more information. - -.. _LyProX: https://lyprox.org -""" - -import importlib.util -import warnings -from pathlib import Path -from typing import Annotated, Any - -import lydata # noqa: F401 -import pandas as pd -from loguru import logger -from lydata import C -from pydantic import AfterValidator, Field, FilePath - -from lyscripts.cli import assemble_main -from lyscripts.configs import BaseCLI -from lyscripts.data.utils import save_table_to_csv -from lyscripts.utils import delete_private_keys, flatten, load_patient_data - -warnings.simplefilter(action="ignore", category=FutureWarning) - - -def ensure_python_file(file: Path) -> Path: - """Check if the file is a Python file.""" - if file.suffix != ".py": - raise ValueError("Mapping file must be a Python file.") - - return file - - -def ensure_column_map(file: Path) -> Path: - """Ensure the Python file contains a ``COLUMN_MAP`` dictionary.""" - spec = importlib.util.spec_from_file_location("map_module", file) - mapping = importlib.util.module_from_spec(spec) - spec.loader.exec_module(mapping) - - if not hasattr(mapping, "COLUMN_MAP"): - raise ValueError("Mapping file must contain a `COLUMN_MAP` dictionary.") - - return file - - -class LyproxifyCLI(BaseCLI): - """Map any CSV file to the LyProX format with the help of a Python mapping dict.""" - - input_file: FilePath = Field(description="Location of raw CSV data.") - num_header_rows: int = Field( - default=1, - description="Number of rows comprising the header of the raw CSV file.", - ) - mapping_file: Annotated[ - FilePath, - AfterValidator(ensure_python_file), - AfterValidator(ensure_column_map), - ] = Field( - description=( - "Location of Python file containing a `COLUMN_MAP` dictionary. It may also " - "contain an `EXCLUDE` list of tuples `(column, check)` to exclude patients." - ), - ) - drop_rows: list[int] = Field( - default=[], - description=( - "Delete rows of specified indices. Counting of rows start at 0 _after_ " - "the `header-rows`." - ), - ) - drop_cols: list[int] = Field( - default=[], - description="Delete columns of specified indices.", - ) - output_file: Path = Field(description="Location to store the lyproxified CSV file.") - - def cli_cmd(self) -> None: - """Start the ``lyproxify`` subcommand. - - After reading in the specified file, it will first ``drop_rows`` and - ``drop_cols``, as specified in the command line arguments. Then, it will - call :py:func:`.exclude_patients` which will further remove patients based - on the ``EXCLUDE`` object in the ``mapping_file``. Finally, it will call - :py:func:`.transform_to_lyprox` to transform the data into the LyProX format - given the ``COLUMN_MAP`` object in the ``mapping_file``. - """ - logger.debug(self.model_dump_json(indent=2)) - - raw = load_patient_data( - file_path=self.input_file, - header=list(range(self.num_header_rows)), - ) - raw = clean_header( - table=raw, - num_cols=raw.shape[1], - num_header_rows=self.num_header_rows, - ) - - cols_to_drop = raw.columns[self.drop_cols] - trimmed = raw.drop(cols_to_drop, axis="columns") - trimmed = trimmed.drop(index=self.drop_rows) - trimmed = trimmed.dropna(axis="index", how="all") - logger.info(f"Dropped rows {self.drop_rows} and columns {cols_to_drop}.") - - spec = importlib.util.spec_from_file_location("map_module", self.mapping_file) - mapping = importlib.util.module_from_spec(spec) - spec.loader.exec_module(mapping) - logger.info(f"Imported mapping instructions from {self.mapping_file}") - - reduced = exclude_patients(trimmed, mapping.EXCLUDE) - processed = transform_to_lyprox(reduced, mapping.COLUMN_MAP) - - if "side" in processed.ly: - processed = leftright_to_ipsicontra(processed) - - save_table_to_csv(file_path=self.output_file, table=processed) - - -class ParsingError(Exception): - """Error while parsing the CSV file.""" - - -def clean_header( - table: pd.DataFrame, - num_cols: int, - num_header_rows: int, -) -> pd.DataFrame: - """Rename the header cells in the ``table``.""" - table = table.copy() - - for col in range(num_cols): - for row in range(num_header_rows): - table.rename( - columns={f"Unnamed: {col}_level_{row}": f"{col}_lvl_{row}"}, - inplace=True, - ) - - logger.debug("Cleaned headers of the raw data.") - return table - - -def get_instruction_depth(nested_column_map: dict[tuple, dict[str, Any]]) -> int: - """Get the depth at which the column mapping instructions are nested. - - Instructions are a dictionary that contains either a 'func' or 'default' key. - - >>> nested_column_map = {"patient": {"age": {"func": int}}} - >>> get_instruction_depth(nested_column_map) - 2 - >>> flat_column_map = flatten(nested_column_map, max_depth=2) - >>> get_instruction_depth(flat_column_map) - 1 - >>> nested_column_map = {"patient": {"__doc__": "some patient info", "age": 61}} - >>> get_instruction_depth(nested_column_map) - Traceback (most recent call last): - ... - ValueError: Leaf of column map must be a dictionary with 'func' or 'default' key. - """ - for _, value in nested_column_map.items(): - if isinstance(value, dict): - if "func" in value or "default" in value: - return 1 - - return 1 + get_instruction_depth(value) - - raise ValueError( - "Leaf of column map must be a dictionary with 'func' or 'default' key.", - ) - - raise ValueError("Empty column map.") - - -def generate_markdown_docs( - nested_column_map: dict[tuple, dict[str, Any]], - depth: int = 0, - indent_len: int = 4, -) -> str: - r"""Generate a markdown nested, ordered list as documentation for the column map. - - A key in the doctionary is supposed to be documented, when its value is a dictionary - containing a ``"__doc__"`` key. - - >>> nested_column_map = { - ... "patient": { - ... "__doc__": "some patient info", - ... "age": { - ... "__doc__": "age of the patient", - ... "func": int, - ... "columns": ["age"], - ... }, - ... }, - ... } - >>> generate_markdown_docs(nested_column_map) - '1. **`patient:`** some patient info\n 1. **`age:`** age of the patient\n' - """ - md_docs = "" - indent = " " * indent_len * depth - i = 1 - for key, value in nested_column_map.items(): - if isinstance(value, dict): - if "__doc__" in value: - md_docs += f"{indent}{i}. **`{key}:`** {value['__doc__']}\n" - i += 1 - - md_docs += generate_markdown_docs(value, depth + 1, indent_len) - - return md_docs - - -def transform_to_lyprox( - raw: pd.DataFrame, - column_map: dict[tuple, dict[str, Any]], -) -> pd.DataFrame: - """Transform ``raw`` data into table that can be uploaded directly to `LyProX`_. - - To do so, it uses instructions in the `colum_map` dictionary, that needs to have - a particular structure: - - For each column in the final 'lyproxified' `pd.DataFrame`, one entry must exist in - the `column_map` dictionary. E.g., for the column corresponding to a patient's age, - the dictionary should contain a key-value pair of this shape: - - .. code-block:: python - - column_map = { - ("patient", "core", "age"): { - "func": compute_age_from_raw, - "kwargs": {"randomize": False}, - "columns": ["birthday", "date of diagnosis"] - }, - } - - In this example, the function ``compute_age_from_raw`` is called with the - values of the columns ``"birthday"`` and ``"date of diagnosis"`` as positional - arguments, and the keyword argument ``"randomize"`` is set to ``False``. The - function then returns the patient's age, which is subsequently stored in the column - ``("patient", "core", "age")``. - - Note that the ``column_map`` dictionary must have either a ``"default"`` key or - ``"func"`` along with ``"columns"`` and ``"kwargs"``, depending on the function - definition. If the function does not take any arguments, ``"columns"`` can be - omitted. If it also does not take any keyword arguments, ``"kwargs"`` can be - omitted, too. - - .. _LyProX: https://lyprox.org - """ - column_map = delete_private_keys(column_map) - - if (instruction_depth := get_instruction_depth(column_map)) > 1: - column_map = flatten(column_map, max_depth=instruction_depth) - - multi_idx = pd.MultiIndex.from_tuples(column_map.keys()) - processed = pd.DataFrame(columns=multi_idx) - - for multi_idx_col, instruction in column_map.items(): - if instruction != "": - if "default" in instruction: - processed[multi_idx_col] = [instruction["default"]] * len(raw) - elif "func" in instruction: - cols = instruction.get("columns", []) - kwargs = instruction.get("kwargs", {}) - func = instruction["func"] - - try: - processed[multi_idx_col] = [ - func(*vals, **kwargs) for vals in raw[cols].values - ] - except Exception as exc: - raise ParsingError( - f"Exception encountered while parsing column {multi_idx_col}", - ) from exc - else: - raise ParsingError( - f"Column {multi_idx_col} has neither a `default` value nor `func` " - "describing how to fill this column.", - ) - - logger.info("Transformed raw data to LyProX format.") - return processed - - -def leftright_to_ipsicontra(data: pd.DataFrame): - """Change absolute side reporting to tumor-relative. - - Transform reporting of LNL involvement by absolute side (right & left) to a - reporting relative to the tumor (ipsi- & contralateral). The table ``data`` should - already be in the format LyProX requires, except for the side-reporting of LNL - involvement. - """ - len_before = len(data) - left_data = data.ly.query(C("side") != "right") - right_data = data.ly.query(C("side") == "right") - - left_data = left_data.rename(columns={"left": "ipsi"}, level=1) - left_data = left_data.rename(columns={"right": "contra"}, level=1) - right_data = right_data.rename(columns={"left": "contra"}, level=1) - right_data = right_data.rename(columns={"right": "ipsi"}, level=1) - - data = pd.concat([left_data, right_data], ignore_index=True) - if len_before != len(data): - raise RuntimeError("Number of patients changed") - - logger.info("Transformed side reporting to ipsi- and contralateral.") - return data - - -def exclude_patients(raw: pd.DataFrame, exclude: list[tuple[str, Any]]): - """Exclude patients in the ``raw`` data based on a list of what to ``exclude``. - - The ``exclude`` list contains tuples ``(column, check)``. The ``check`` function - will then exclude any patients from the cohort where ``check(raw[column])`` - evaluates to ``True``. - - >>> exclude = [("age", lambda s: s > 50)] - >>> table = pd.DataFrame({ - ... "age": [43, 82, 18, 67], - ... "T-category": [ 3, 4, 2, 1], - ... }) - >>> exclude_patients(table, exclude) - age T-category - 0 43 3 - 2 18 2 - """ - num_before = len(raw) - filtered = raw.copy() - - for column, check in exclude: - is_excluded = check(filtered[column]) - filtered = filtered.loc[~is_excluded] - - num_after = len(filtered) - logger.info(f"Excluded {num_before - num_after} patients.") - return filtered - - -if __name__ == "__main__": - main = assemble_main(settings_cls=LyproxifyCLI, prog_name="lyproxify") - main() diff --git a/src/lyscripts/data/split.py b/src/lyscripts/data/split.py deleted file mode 100644 index 2c9789e..0000000 --- a/src/lyscripts/data/split.py +++ /dev/null @@ -1,73 +0,0 @@ -"""Split a dataset into cross-validation folds based on params.yaml file.""" - -import warnings -from pathlib import Path - -import numpy as np -import pandas as pd -from loguru import logger -from pydantic import Field - -from lyscripts.cli import assemble_main -from lyscripts.configs import BaseCLI, CrossValidationConfig, DataConfig -from lyscripts.data.utils import save_table_to_csv - -warnings.simplefilter(action="ignore", category=FutureWarning) - - -class SplitCLI(BaseCLI): - """Split a dataset into cross-validation folds.""" - - input: DataConfig - cross_validation: CrossValidationConfig = CrossValidationConfig() - output_dir: Path = Field(description="The folder to store the split CSV files in.") - - def cli_cmd(self) -> None: - """Run the ``split`` subcommand. - - This will load the dataset specified in the ``input`` argument and split it - into the number of folds specified in the ``cross_validation`` argument. The - resulting splits will be stored in the folder specified in the ``output_dir`` - argument. - """ - logger.debug(self.model_dump_json(indent=2)) - - self.output_dir.mkdir(parents=True, exist_ok=True) - logger.info(f"Ensure output directory {self.output_dir} exists") - - data = self.input.load() - - shuffled_data = data.sample( - frac=1.0, - replace=False, - random_state=self.cross_validation.seed, - ).reset_index(drop=True) - - split_datas = np.array_split( - ary=shuffled_data, - indices_or_sections=self.cross_validation.folds, - ) - for fold in range(self.cross_validation.folds): - _train_datas = [ - split_datas[i] for i in range(self.cross_validation.folds) if i != fold - ] - train_data = pd.concat( - objs=_train_datas, - axis="index", - ignore_index=True, - ) - eval_data = split_datas[fold] - - save_table_to_csv( - file_path=self.output_dir / f"{fold}_train.csv", - table=train_data, - ) - save_table_to_csv( - file_path=self.output_dir / f"{fold}_eval.csv", - table=eval_data, - ) - - -if __name__ == "__main__": - main = assemble_main(settings_cls=SplitCLI, prog_name="split") - main() diff --git a/src/lyscripts/data/utils.py b/src/lyscripts/data/utils.py deleted file mode 100644 index a5399a9..0000000 --- a/src/lyscripts/data/utils.py +++ /dev/null @@ -1,16 +0,0 @@ -"""Utilities related to the commands for data cleaning and processing.""" - -from pathlib import Path - -import pandas as pd -from loguru import logger - -from lyscripts.decorators import check_output_dir_exists - - -@check_output_dir_exists -def save_table_to_csv(file_path: Path, table: pd.DataFrame): - """Save a ``table`` to ``output_path``.""" - shape = table.shape - logger.info(f"Saving table with {shape=} to {file_path.resolve()}") - table.to_csv(file_path, index=None) diff --git a/src/lyscripts/decorators.py b/src/lyscripts/decorators.py deleted file mode 100644 index 366ccd9..0000000 --- a/src/lyscripts/decorators.py +++ /dev/null @@ -1,88 +0,0 @@ -"""Decorators to avoid repetitive snippets of code. - -E.g. safely opening files or logging the state of a function call. - -This is *not* a command line tool. -""" - -import functools -import logging -from collections.abc import Callable -from functools import wraps -from pathlib import Path -from typing import Any - - -def assemble_signature(*args, **kwargs) -> str: - """Assemble the signature of the function call.""" - args_str = ", ".join(str(arg) for arg in args) - kwargs_str = ", ".join(f"{key}={value}" for key, value in kwargs.items()) - return ", ".join([args_str, kwargs_str]) - - -def log_state(log_level: int = logging.INFO) -> Callable: - """Provide a decorator that logs the state of the function execution. - - The log message will simply be the function name where underscores are replaced - with spaces. The `log_level` can be set in the decorator call. - """ - - def log_decorator(func: Callable): - """Decorate function for which to add logs.""" - - @functools.wraps(func) - def wrapper(*args, **kwargs): - """Execute decorated function.""" - logger = logging.getLogger(func.__module__) - signature = assemble_signature(*args, **kwargs) - logger.debug(f"Executing {func.__name__}({signature}).") - log_msg_from_func = func.__name__.replace("_", " ").capitalize() + "." - - try: - logger.log( - log_level, - log_msg_from_func, - extra={ - "func_filepath": f"{func.__module__.replace('.', '/')}.py", - "func_name": func.__name__, - "module_name": func.__module__, - }, - ) - return func(*args, **kwargs) - - except Exception as exc: - logger.error(f"Error calling {func.__name__}().", exc_info=exc) - raise exc - - return wrapper - - return log_decorator - - -def check_input_file_exists(loading_func: Callable) -> Callable: - """Check if the file path provided to the `loading_func` exists.""" - - @wraps(loading_func) - def inner(file_path: str, *args, **kwargs) -> Any: - """Execute wrapped loading function.""" - file_path = Path(file_path) - if not file_path.is_file(): - raise FileNotFoundError(f"File {file_path} does not exist.") - - return loading_func(file_path, *args, **kwargs) - - return inner - - -def check_output_dir_exists(saving_func: Callable) -> Callable: - """Make sure the parent directory of the saved file exists.""" - - @wraps(saving_func) - def inner(file_path: str, *args, **kwargs) -> Any: - """Execute wrapped saving function.""" - file_path = Path(file_path) - file_path.parent.mkdir(parents=True, exist_ok=True) - - return saving_func(file_path, *args, **kwargs) - - return inner diff --git a/src/lyscripts/evaluate.py b/src/lyscripts/evaluate.py deleted file mode 100644 index 5f9dbd0..0000000 --- a/src/lyscripts/evaluate.py +++ /dev/null @@ -1,205 +0,0 @@ -"""Evaluate the performance of the trained model. - -This is done by computing quantities like the Bayesian information criterion (BIC) or -(if thermodynamic integration was performed) the actual evidence (with error) of the -model. -""" - -import argparse -import json -from pathlib import Path - -import emcee -import h5py -import numpy as np -import pandas as pd -from loguru import logger -from scipy.integrate import trapezoid - -from lyscripts.utils import load_patient_data, load_yaml_params - -RNG = np.random.default_rng() - - -def _add_parser( - subparsers: argparse._SubParsersAction, - help_formatter, -): - """Add an ``ArgumentParser`` to the subparsers action.""" - parser = subparsers.add_parser( - Path(__file__).name.replace(".py", ""), - description=__doc__, - help=__doc__, - formatter_class=help_formatter, - ) - _add_arguments(parser) - - -def _add_arguments(parser: argparse.ArgumentParser): - """Add arguments to a ``subparsers`` instance and run its main function when chosen. - - This is called by the parent module that is called via the command line. - """ - parser.add_argument( - "data", - type=Path, - help="Path to the tables of patient data (CSV).", - ) - parser.add_argument("model", type=Path, help="Path to model output files (HDF5).") - - parser.add_argument( - "-p", - "--params", - default="./params.yaml", - type=Path, - help="Path to parameter file", - ) - parser.add_argument( - "--plots", - default="./plots", - type=Path, - help="Directory for storing plots", - ) - parser.add_argument( - "--metrics", - default="./metrics.json", - type=Path, - help="Path to metrics file", - ) - - parser.set_defaults(run_main=main) - - -def comp_bic(log_probs: np.ndarray, num_params: int, num_data: int) -> float: - r"""Compute the negative one half of the Bayesian Information Criterion (BIC). - - The BIC is defined as [^1] - $$ BIC = k \\ln{n} - 2 \\ln{\\hat{L}} $$ - where $k$ is the number of parameters ``num_params``, $n$ the number of datapoints - ``num_data`` and $\\hat{L}$ the maximum likelihood estimate of the ``log_prob``. - It is constructed such that the following is an - approximation of the model evidence: - $$ p(D \\mid m) \\approx \\exp{\\left( - BIC / 2 \\right)} $$ - which is why this function returns the negative one half of it. - - [^1]: https://en.wikipedia.org/wiki/Bayesian_information_criterion - """ - return np.max(log_probs) - num_params * np.log(num_data) / 2.0 - - -def compute_evidence( - temp_schedule: np.ndarray, - log_probs: np.ndarray, -) -> float: - """Compute the evidence. - - Given a ``temp_schedule`` of inverse temperatures and corresponding sets of - ``log_probs``, we calculate the mean ``log_prob`` over all samples to approximate - the expectation value under the corresponding power posterior for each step in the - ``temp_schedule``. The evidence is evaluated using trapezoidal integration of the - expectation values over the ``temp_schedule``. - """ - a_mc = np.mean(log_probs, axis=1) - return trapezoid(y=a_mc, x=temp_schedule) - - -def compute_ti_results( - metrics: dict, - params: dict, - ndim: int, - h5_file: Path, - model: Path, -) -> tuple[np.ndarray, np.ndarray]: - """Compute the results in case of a thermodynamic integration run.""" - temp_schedule = params["sampling"]["temp_schedule"] - num_temps = len(temp_schedule) - - if num_temps != len(h5_file["ti"]): - raise RuntimeError( - f"Parameters suggest temp schedule of length {num_temps}, " - f"but stored are {len(h5_file['ti'])}", - ) - - nwalker = ndim * params["sampling"]["walkers_per_dim"] - nsteps = params["sampling"]["nsteps"] - ti_log_probs = np.zeros(shape=(num_temps, nsteps * nwalker)) - - for i, run in enumerate(h5_file["ti"]): - reader = emcee.backends.HDFBackend(model, name=f"ti/{run}", read_only=True) - ti_log_probs[i] = reader.get_blobs(flat=True) - - evidence = compute_evidence(temp_schedule, ti_log_probs) - metrics["evidence"] = evidence - - return temp_schedule, ti_log_probs - - -def main(args: argparse.Namespace): - """Run main script.""" - metrics = {} - - params = load_yaml_params(args.params) - model = None # create_model(params) - ndim = len(model.get_params()) - data = load_patient_data(args.data) - h5_file = h5py.File(args.model, mode="r") - - # if TI has been performed, compute the accuracy for every step - if "ti" in h5_file: - temp_schedule, ti_log_probs = compute_ti_results( - metrics=metrics, - params=params, - ndim=ndim, - h5_file=h5_file, - model=args.model, - ) - logger.info( - "Computed results of thermodynamic integration with " - f"{len(temp_schedule)} steps", - ) - - # store inverse temperatures and log-probs in CSV file - args.plots.parent.mkdir(exist_ok=True) - - beta_vs_accuracy = pd.DataFrame( - np.array( - [ - temp_schedule, - np.mean(ti_log_probs, axis=1), - np.std(ti_log_probs, axis=1), - ], - ).T, - columns=["ฮฒ", "accuracy", "std"], - ) - beta_vs_accuracy.to_csv(args.plots, index=False) - logger.info(f"Plotted ฮฒ vs accuracy at {args.plots}") - - # use blobs, because also for TI, this is the unscaled log-prob - backend = emcee.backends.HDFBackend(args.model, read_only=True, name="mcmc") - final_log_probs = backend.get_blobs() - logger.info(f"Opened samples from emcee backend from {args.model}") - - # store metrics in JSON file - args.metrics.parent.mkdir(parents=True, exist_ok=True) - args.metrics.touch(exist_ok=True) - - metrics["BIC"] = comp_bic( - final_log_probs, - ndim, - len(data), - ) - metrics["max_llh"] = np.max(final_log_probs) - metrics["mean_llh"] = np.mean(final_log_probs) - - with open(args.metrics, mode="w", encoding="utf-8") as metrics_file: - json.dump(metrics, metrics_file) - - logger.info(f"Wrote out metrics to {args.metrics}") - - -if __name__ == "__main__": - parser = argparse.ArgumentParser(description=__doc__) - _add_arguments(parser) - - args = parser.parse_args() - args.run_main(args) diff --git a/src/lyscripts/integrate.py b/src/lyscripts/integrate.py deleted file mode 100644 index c213116..0000000 --- a/src/lyscripts/integrate.py +++ /dev/null @@ -1,163 +0,0 @@ -"""Perform thermodynamic integration to evaluate the model evidence. - -Using the functions provided by the `sample` module, this script implements -thermodynamic integration (TI) in order to compute the model evidence. -This is done by sampling the model parameters at different inverse temperatures -following a specified schedule. -""" - -from __future__ import annotations - -import os -from typing import Any - -import emcee -import h5py -import numpy as np -from loguru import logger -from lydata.utils import ModalityConfig -from pydantic import Field - -import lyscripts.sample as sample_module # Import the module to set its global MODEL -from lyscripts.cli import assemble_main -from lyscripts.configs import ( - BaseCLI, - DataConfig, - DistributionConfig, - GraphConfig, - ModelConfig, - SamplingConfig, - ScheduleConfig, - add_distributions, - add_modalities, - construct_model, -) -from lyscripts.utils import get_hdf5_backend - - -def init_ti_sampler( - settings: IntegrateCLI, - temp_idx: int, - ndim: int, - inv_temp: float, - pool: Any, -) -> emcee.EnsembleSampler: - """Initialize the ``emcee.EnsembleSampler`` for TI with the given ``settings''.""" - nwalkers = ndim * settings.sampling.walkers_per_dim - backend = get_hdf5_backend( - file_path=settings.sampling.storage_file, - dataset=f"ti/{temp_idx + 1:0>2d}", - nwalkers=nwalkers, - ndim=ndim, - ) - return emcee.EnsembleSampler( - nwalkers=nwalkers, - ndim=ndim, - log_prob_fn=sample_module.log_prob_fn, - kwargs={"inverse_temp": inv_temp}, - moves=[(emcee.moves.DEMove(), 0.8), (emcee.moves.DESnookerMove(), 0.2)], - backend=backend, - pool=pool, - blobs_dtype=[("log_prob", np.float64)], - parameter_names=list(MODEL.get_named_params().keys()), - ) - - -class IntegrateCLI(BaseCLI): - """Perform thermodynamic integration to compute the model evidence.""" - - graph: GraphConfig - model: ModelConfig = ModelConfig() - distributions: dict[str, DistributionConfig] = Field( - default={}, - description=( - "Mapping of model T-categories to predefined distributions over " - "diagnose times." - ), - ) - modalities: dict[str, ModalityConfig] = Field( - default={}, - description=( - "Maps names of diagnostic modalities to their specificity/sensitivity." - ), - ) - data: DataConfig - sampling: SamplingConfig - schedule: ScheduleConfig = Field( - description="Configuration for generating inverse temperature schedule.", - ) - - def cli_cmd(self) -> None: - """Start the ``integrate`` subcommand. - - The model construction and setup is done analogously to the - ``sample`` command. Afterwards, an :py:class:`emcee.EnsembleSampler` - is initialized (see :py:func:`init_sampler`) and :py:func:`run_sampling`, - implemented in the ``sample``module, is executed twice for each TI step: - once for the burn-in phase and once for the actual sampling phase. - Thereby, the log likelihood is scaled by the respective inverse - temperature of that step. All necessary settings for the sampling - are passed by the ``sampling``argument, except for the inverse - temperatures, which are provided by the ``schedule`` argument. - """ - # as recommended in https://emcee.readthedocs.io/en/stable/tutorials/parallel/# - os.environ["OMP_NUM_THREADS"] = "1" - - logger.debug(self.model_dump_json(indent=2)) - - # ugly, but necessary for pickling - global MODEL - MODEL = construct_model(self.model, self.graph) - MODEL = add_distributions(MODEL, self.distributions) - MODEL = add_modalities(MODEL, self.modalities) - MODEL.load_patient_data(**self.data.get_load_kwargs()) - ndim = MODEL.get_num_dims() - - # set MODEL in the sample module's namespace so log_prob_fn can access it - sample_module.MODEL = MODEL - - schedule = self.schedule.get_schedule() - - # emcee does not support numpy's new random number generator yet. - np.random.seed(self.sampling.seed) # noqa: NPY002 - - with sample_module.get_pool(self.sampling.cores) as pool: - for idx, inv_temp in enumerate(schedule): - sampler = init_ti_sampler( - settings=self, - temp_idx=idx, - ndim=ndim, - inv_temp=inv_temp, - pool=pool, - ) - - sample_module.run_sampling( - description=f"Burn-in phase: TI step {idx + 1}/{len(schedule)}", - sampler=sampler, - num_steps=self.sampling.burnin_steps, - check_interval=self.sampling.check_interval, - trust_factor=self.sampling.trust_factor, - relative_thresh=self.sampling.relative_thresh, - history_file=self.sampling.history_file, - ) - - sample_module.run_sampling( - description=f"Sampling phase: TI step {idx + 1}/{len(schedule)}", - sampler=sampler, - num_steps=self.sampling.num_steps, - reset_backend=True, - check_interval=self.sampling.num_steps, - thin_by=self.sampling.thin_by, - ) - # copy last sampling round over to a group in the HDF5 file called "mcmc" - with h5py.File(self.sampling.storage_file, mode="r+") as h5_file: - h5_file.copy( - f"ti/{len(schedule):0>2d}", - h5_file, - name=self.sampling.dataset, - ) - - -if __name__ == "__main__": - main = assemble_main(settings_cls=IntegrateCLI, prog_name="integrate") - main() diff --git a/src/lyscripts/plots.py b/src/lyscripts/plots.py deleted file mode 100644 index 07335b0..0000000 --- a/src/lyscripts/plots.py +++ /dev/null @@ -1,411 +0,0 @@ -"""Utility functions for the plotting commands.""" - -from __future__ import annotations - -from abc import abstractmethod -from collections.abc import Mapping -from dataclasses import field -from itertools import cycle -from pathlib import Path -from typing import TYPE_CHECKING, Any, TypeVar - -import h5py -import matplotlib.pyplot as plt -import numpy as np -import scipy as sp -from numpydantic import NDArray, Shape -from pydantic import BaseModel - -from lyscripts.decorators import ( - check_input_file_exists, - check_output_dir_exists, - log_state, -) - -if TYPE_CHECKING: - from matplotlib.axes._axes import Axes as MPLAxes - from matplotlib.figure import Figure - -# define USZ colors -COLORS = { - "blue": "#005ea8", - "orange": "#f17900", - "green": "#00afa5", - "red": "#ae0060", - "gray": "#c5d5db", -} -COLOR_CYCLE = cycle(COLORS.values()) -CM_PER_INCH = 2.54 - - -def floor_at_decimal(value: float, decimal: int) -> float: - """Compute the floor of ``value`` for the specified ``decimal``. - - Essentially the distance to the right of the decimal point. May be negative. - """ - power = 10**decimal - return np.floor(power * value) / power - - -def ceil_at_decimal(value: float, decimal: int) -> float: - """Compute the ceiling of ``value`` for the specified ``decimal``. - - Analog to :py:func:`.floor_at_decimal`, this is the distance to the right of the - decimal point. May be negative. - """ - return -floor_at_decimal(-value, decimal) - - -def floor_to_step(value: float, step: float) -> float: - """Compute next value on ladder of stepsize ``step`` still below ``value``.""" - return (value // step) * step - - -def ceil_to_step(value: float, step: float) -> float: - """Compute next value on ladder of stepsize ``step`` still above ``value``.""" - return floor_to_step(value, step) + step - - -def clean_and_check(filename: str | Path) -> Path: - """Check if file with ``filename`` exists. - - If not, raise error, otherwise return cleaned :py:class:`~pathlib.PosixPath`. - """ - filepath = Path(filename) - if not filepath.exists(): - msg = f"File with the name {filename} does not exist at {filepath.resolve()}" - raise FileNotFoundError(msg) - return filepath - - -AbstractDistributionT = TypeVar("AbstractDistributionT", bound="AbstractDistribution") - - -class AbstractDistribution(BaseModel): - """Abstract class for distributions that should be plotted.""" - - scale: float = 100.0 - offset: float = 0.0 - kwargs: dict[str, Any] = field(default_factory=lambda: {}) - - @abstractmethod - def draw(self, axes: MPLAxes) -> MPLAxes: - """Draw the distribution into the provided ``axes``.""" - ... - - @abstractmethod - def left_percentile(self, percent: float) -> float: - """Compute the point where ``percent`` of the values are to the left.""" - ... - - @abstractmethod - def right_percentile(self, percent: float) -> float: - """Compute the point where ``percent`` of the values are to the right.""" - ... - - def _get_label(self) -> str: - """Compute label for when ``kwargs`` does not contain one.""" - - @property - def label(self) -> str: - """Return the label of the histogram.""" - return self.kwargs.get("label", self._get_label()) - - -class Histogram(AbstractDistribution): - """Class containing data for plotting a histogram.""" - - raw_values: NDArray[Shape["*"], float] # noqa: F722 - - @property - def values(self) -> np.ndarray: - """Return the values of the histogram scaled and offset.""" - return self.raw_values * self.scale + self.offset - - @classmethod - def from_hdf5( - cls: type[Histogram], - filename: str | Path, - dataname: str, - scale: float = 100.0, - offset: float = 0.0, - **kwargs, - ) -> Histogram: - """Create a histogram from an HDF5 file.""" - filename = clean_and_check(filename) - with h5py.File(filename, mode="r") as h5file: - dataset = h5file[dataname] - if "label" not in kwargs: - kwargs["label"] = get_label(dataset.attrs) - return cls(raw_values=dataset[:], scale=scale, offset=offset, kwargs=kwargs) - - def left_percentile(self, percent: float) -> float: - """Compute the point where `percent` of the values are to the left.""" - return np.percentile(self.values, percent) - - def right_percentile(self, percent: float) -> float: - """Compute the point where `percent` of the values are to the right.""" - return np.percentile(self.values, 100.0 - percent) - - def draw(self, axes: MPLAxes, **defaults) -> Any: - """Draw the histogram into the provided ``axes``.""" - xlim = axes.get_xlim() - - hist_kwargs = defaults.get("hist", {}).copy() - hist_kwargs.update(self.kwargs) - - if self.label is not None: - hist_kwargs["label"] = self.label - - return axes.hist(self.values, range=xlim, **hist_kwargs) - - -class BetaPosterior(AbstractDistribution): - """Class for storing plot configs for a Beta posterior.""" - - num_success: int - num_total: int - - @classmethod - def from_hdf5( - cls: type[BetaPosterior], - filename: str | Path, - dataname: str, - scale: float = 100.0, - offset: float = 0.0, - **kwargs, - ) -> BetaPosterior: - """Initialize data container for Beta posteriors from HDF5 file.""" - filename = clean_and_check(filename) - with h5py.File(filename, mode="r") as h5file: - dataset = h5file[dataname] - try: - num_success = int(dataset.attrs["num_match"]) - num_total = int(dataset.attrs["num_total"]) - except KeyError as key_err: - raise KeyError( - "Dataset does not contain observed prevalence data", - ) from key_err - - return cls( - num_success=num_success, - num_total=num_total, - scale=scale, - offset=offset, - kwargs=kwargs, - ) - - def _get_label(self) -> str: - return f"data: {self.num_success} of {self.num_total}" - - @property - def num_fail(self): - """Return the number of failures, i.e. the totals minus the successes.""" - return self.num_total - self.num_success - - def pdf(self, x: np.ndarray) -> np.ndarray: - """Compute the probability density function.""" - return sp.stats.beta.pdf( - x, - a=self.num_success + 1, - b=self.num_fail + 1, - loc=self.offset, - scale=self.scale, - ) - - def left_percentile(self, percent: float) -> float: - """Return the point where the CDF reaches ``percent``.""" - return sp.stats.beta.ppf( - percent / 100.0, - a=self.num_success + 1, - b=self.num_fail + 1, - scale=self.scale, - ) - - def right_percentile(self, percent: float) -> float: - """Return the point where 100% minus the CDF equals ``percent``.""" - return sp.stats.beta.ppf( - 1.0 - (percent / 100.0), - a=self.num_success + 1, - b=self.num_fail + 1, - scale=self.scale, - ) - - def draw(self, axes: MPLAxes, resolution: int = 300, **defaults) -> Any: - """Draw the Beta posterior into the provided ``axes``. - - Returns a handle and a label for the legend. - """ - left, right = axes.get_xlim() - x = np.linspace(left, right, resolution) - y = self.pdf(x) - - plot_kwargs = defaults.get("plot", {}).copy() - plot_kwargs.update(self.kwargs) - - if self.label is not None: - plot_kwargs["label"] = self.label - - return axes.plot(x, y, **plot_kwargs) - - -def get_size(width="single", unit="cm", ratio="golden"): - """Return a tuple of figure sizes in inches. - - This is provided as the ``matplotlib`` keyword argument ``figsize`` expects it. - This figure size is computed from a ``width``, in the ``unit`` of centimeters by - default, and a ``ratio`` which is set to the golden ratio by default. - - >>> get_size(width="single", ratio="golden") - (3.937007874015748, 2.4332557935820445) - >>> get_size(width="full", ratio=2.) - (6.299212598425196, 3.149606299212598) - >>> get_size(width=10., ratio=1.) - (3.937007874015748, 3.937007874015748) - >>> get_size(width=5, unit="inches", ratio=2./3.) - (5, 7.5) - """ - if width == "single": - width = 10 - elif width == "full": - width = 16 - - ratio = 1.618 if ratio == "golden" else ratio - width = width / CM_PER_INCH if unit == "cm" else width - height = width / ratio - return (width, height) - - -def get_label(attrs: Mapping) -> str: - """Extract label of a histogram from the HDF5 ``attrs`` object of the dataset.""" - label = [] - transforms = { - "label": str, - "modality": str, - "t_stage": str, - "midline_ext": lambda x: "ext" if x else "noext", - } - for key, func in transforms.items(): - if key in attrs and attrs[key] is not None: - label.append(func(attrs[key])) - return " | ".join(label) - - -def get_xlims( - contents: AbstractDistributionT, - percent_lims: tuple[float] = (10.0, 10.0), -) -> tuple[float]: - """Get the x-axis limits for a plot containing multiple distribution. - - Compute the ``xlims`` of a plot containing histograms and probability density - functions by considering their smallest and largest percentiles. - """ - left_percentiles = np.array( - [c.left_percentile(percent_lims[0]) for c in contents], - ) - left_lim = np.min(left_percentiles) - right_percentiles = np.array( - [c.right_percentile(percent_lims[0]) for c in contents], - ) - right_lim = np.max(right_percentiles) - return left_lim, right_lim - - -def draw( - axes: MPLAxes, - contents: list[AbstractDistribution], - percent_lims: tuple[float, float] = (10.0, 10.0), - xlims: tuple[float] | None = None, - hist_kwargs: dict[str, Any] | None = None, - plot_kwargs: dict[str, Any] | None = None, -) -> MPLAxes: - """Draw histograms and Beta posterior from ``contents`` into ``axes``. - - The limits of the x-axis is computed to be the smallest and largest left and right - percentile of all provided ``contents`` respectively via the ``percent_lims`` tuple. - - The ``hist_kwargs`` define general settings that will be applied to all histograms. - One additional key ``'nbins'`` may be used to adjust only the numbers, not the - spacing of the histogram bins. - Similarly, ``plot_kwargs`` adjusts the default settings for the Beta posteriors. - - Both these keyword arguments can be overwritten by what the individual ``contents`` - have defined. - """ - if not all(isinstance(c, AbstractDistribution) for c in contents): - raise TypeError("Contents must be subclasses of `AbstractDistribution`") - - xlims = xlims or get_xlims(contents, percent_lims) - - if len(xlims) != 2 or xlims[0] > xlims[-1]: - raise ValueError("`xlims` must be tuple of two increasing values") - - axes.set_xlim(*xlims) - - default_kwargs = { - "hist": { - "density": True, - "histtype": "stepfilled", - "alpha": 0.7, - "bins": 50, - }, - "plot": {}, - } - default_kwargs["hist"].update(hist_kwargs or {}) - default_kwargs["plot"].update(plot_kwargs or {}) - - for content in contents: - content.draw(axes, **default_kwargs) - - return axes - - -def split_legends( - axes: MPLAxes, - titles: list[str], - locs: list[tuple[float, float]], - **kwargs, -) -> None: - """Separate labels in ``axes`` into separate legends with ``titles`` at ``locs``.""" - legend_kwargs = { - "title_fontsize": "small", - "labelspacing": 0.1, - "loc": "upper left", - } - legend_kwargs.update(kwargs) - - handles, labels = axes.get_legend_handles_labels() - labels_per_legend = len(labels) // len(titles) - - for i, (title, loc) in enumerate(zip(titles, locs, strict=True)): - start = i * labels_per_legend - stop = (i + 1) * labels_per_legend if i < len(titles) - 1 else None - idx = slice(start, stop) - - legend = axes.legend( - handles[idx], - labels[idx], - bbox_to_anchor=loc, - title=title, - **legend_kwargs, - ) - axes.add_artist(legend) - - -@log_state() -@check_input_file_exists -def use_mpl_stylesheet(file_path: str | Path): - """Load a ``.mplstyle`` stylesheet from ``file_path``.""" - plt.style.use(file_path) - - -@log_state() -@check_output_dir_exists -def save_figure( - output_path: str | Path, - figure: Figure, - formats: list[str] | None, -): - """Save a ``figure`` to ``output_path`` in every one of the provided ``formats``.""" - for frmt in formats: - figure.savefig(output_path.with_suffix(f".{frmt}")) diff --git a/src/lyscripts/sample.py b/src/lyscripts/sample.py deleted file mode 100644 index 1fe7096..0000000 --- a/src/lyscripts/sample.py +++ /dev/null @@ -1,426 +0,0 @@ -"""Implementation of flexible MCMC sampling for lymphatic progression models. - -This module provides both helpful functions for programmatically building and running -sampling pipelines, as well a CLI interface for th most common sampling use cases. - -The core is the :py:func:`run_sampling` function. It has a flexible interface and -built-in convergence detection, as well as bookkeeping for monitoring and resuming -interrupted sampling runs. It can be used both during the burn-in phase and the actual -sampling phase. - -.. warning:: - - We strongly recommend to set the CLI's ``--cores`` argument to ``None`` (or ``null`` - in the YAML config file) if you are on MacOS or Windows. This is because we haven't - yet figured out how we can safely and efficiently use the ``multiprocess(ing)`` - library on these two platforms. -""" - -from __future__ import annotations - -import os -import sys -from typing import Any - -from loguru import logger - -from lyscripts.cli import assemble_main - -try: - import multiprocess as mp -except ModuleNotFoundError: - import multiprocessing as mp - -if sys.platform == "darwin": - logger.warning("Detected MacOS. Setting multiprocess(ing) start method to 'fork'.") - mp.set_start_method("fork") - -from pathlib import Path - -import emcee -import numpy as np -import pandas as pd -from lydata.utils import ModalityConfig -from lymph.types import ParamsType -from pydantic import BaseModel, Field -from rich.progress import Progress, ProgressColumn, Task, TimeElapsedColumn -from rich.text import Text - -from lyscripts.configs import ( - BaseCLI, - DataConfig, - DistributionConfig, - GraphConfig, - ModelConfig, - SamplingConfig, - add_distributions, - add_modalities, - construct_model, -) -from lyscripts.utils import console, get_hdf5_backend - - -class CompletedItersColumn(ProgressColumn): - """A column that displays the completed number of iterations.""" - - def __init__(self, table_column=None, it: int = 0): - """Initialize the column with number of previous iterations.""" - super().__init__(table_column) - self.it = it - - def render(self, task: Task) -> Text: - """Render total iterations.""" - if task.completed is None: - return Text("? it", style="progress.data.steps") - return Text(f"{task.completed + self.it} it", style="progress.data.steps") - - -class ItersPerSecondColumn(ProgressColumn): - """A column that displays the number of iterations per second.""" - - def render(self, task: Task) -> Text: - """Render iterations per second.""" - speed = task.finished_speed or task.speed - if speed is None: - return Text("? it/s", style="progress.data.speed") - return Text(f"{speed:.2f} it/s", style="progress.data.speed") - - -class AcorTime(BaseModel, validate_assignment=True): - """Storage for old and new autocorrelation times.""" - - old: float - new: float - - def update(self, new: float) -> None: - """Update the autocorrelation time.""" - self.old = self.new - self.new = new - - @property - def relative_diff(self) -> float: - """Get the relative difference between new and old autocorrelation time.""" - return np.abs(self.new - self.old) / self.new - - -class NumAccepted(BaseModel, validate_assignment=True): - """Storage for old and new number of accepted proposals.""" - - old: int - new: int - - def update(self, new: int) -> None: - """Update the number of accepted proposals.""" - self.old = self.new - self.new = new - - @property - def newly_accepted(self) -> int: - """Get the number of newly accepted proposals.""" - return self.new - self.old - - -MODEL = None - - -def log_prob_fn(theta: ParamsType, inverse_temp: float = 1.0) -> tuple[float, float]: - """Compute log-prob using global variables because of pickling. - - An inverse temperature ``inverse_temp`` can be provided for thermodynamic - integration. - """ - llh = MODEL.likelihood(given_params=theta) - if np.isinf(llh): # to prevent the case of 0 * inf = NaN - return -np.inf, -np.inf - return inverse_temp * llh, llh - - -def ensure_initial_state(sampler: emcee.EnsembleSampler) -> np.ndarray: - """Try to extract a starting state from a ``sampler``. - - Create a random starting state if no one was found. - """ - try: - state = sampler.backend.get_last_sample() - logger.info( - f"Resuming from {sampler.backend.filename} with {sampler.iteration} " - "stored iterations.", - ) - except AttributeError: - state = np.random.uniform(size=(sampler.nwalkers, sampler.ndim)) # noqa: NPY002 - logger.debug(f"No stored samples found. Starting from random state {state}.") - - return state - - -def ensure_history_table(file: Path | None) -> pd.DataFrame: - """Return the history table from a file or an empty DataFrame. - - It will try to load a history at the given ``file`` location, but with a ``.tmp`` - extension. This is the expected name and location of a history file that was - stored during an interrupted sampling run. - - If no file is found, an empty DataFrame is returned. - """ - if file is None or not file.with_suffix(".tmp").exists(): - return pd.DataFrame( - columns=[ - "steps", - "acor_times", - "accept_fracs", - "max_log_probs", - ], - ).set_index("steps") - - return pd.read_csv(file.with_suffix(".tmp"), index_col="steps") - - -def update_history_table( - history: pd.DataFrame, - history_file: Path | None, - iteration: int, - acor_time: float, - accepted_frac: float, - max_log_prob: float, -) -> pd.DataFrame: - """Update the history table with the current iteration's information.""" - history.loc[iteration] = [acor_time, accepted_frac, max_log_prob] - logger.debug(history.iloc[-1].to_dict()) - - if history_file is not None: - history.to_csv(history_file.with_suffix(".tmp")) - - return history - - -def is_converged( - iteration: int, - acor_time: AcorTime, - trust_factor: float, - relative_thresh: float, -) -> bool: - """Check if the chain has converged based on the autocorrelation time. - - The criterion is based on the relative change of the autocorrelation time and - whether the autocorrelation extimate can be trusted. Essentially, we only trust - the estimate if it is smaller than ``trust_factor`` times the current ``iteration``. - - More details can be found in the `emcee documentation`_. - - .. _emcee documentation: https://emcee.readthedocs.io/en/stable/tutorials/autocorr/ - """ - return ( - acor_time.new * trust_factor < iteration - and acor_time.relative_diff < relative_thresh - ) - - -def _get_columns(it: int = 0) -> list[ProgressColumn]: - """Get the default progress columns for the MCMC sampling.""" - return [ - *Progress.get_default_columns(), - ItersPerSecondColumn(), - CompletedItersColumn(it=it), - TimeElapsedColumn(), - ] - - -def run_sampling( - sampler: emcee.EnsembleSampler, - initial_state: np.ndarray | None = None, - num_steps: int | None = None, - thin_by: int = 1, - check_interval: int = 100, - trust_factor: float = 50.0, - relative_thresh: float = 0.05, - history_file: Path | None = None, - reset_backend: bool = False, - description: str = "Burn-in phase", -) -> None: - """Run MCMC sampling. - - This will run the ``sampler`` either for ``num_steps`` steps or - if it set to - ``None`` - until convergence. Convergence is determined once within a - ``check_interval`` of steps by the :py:func:`is_converged` function. The - convergence criterion is based on a trustworthy estimate of the autocorrelation - time. This is elaborated in the `emcee documentation`_. - - Some bookkeeping parameters may be stored in a ``history_file``. During sampling, - the history is stored in a temporary file with the suffix ``.tmp``. If the sampling - is interrupted, the history and the last state of the ``sampler`` can be recovered - and the sampling can be continued. - - One may choose to ``reset_backend``, e.g. in case the previous sampling was run - until convergence and now one wants to store a length of the converged chain. This - may also be thinned by a factor of ``thin_by`` (directly passed to the - :py:class:`emcee.EnsembleSampler` class). - - .. _emcee documentation: https://emcee.readthedocs.io/en/stable/tutorials/autocorr/ - """ - state = initial_state or ensure_initial_state(sampler) - history = ensure_history_table(history_file) - - if reset_backend: - logger.debug("Resetting backend of sampler.") - sampler.backend.reset(sampler.nwalkers, sampler.ndim) - - acor_time = AcorTime(old=np.inf, new=np.inf) - accepted = NumAccepted(old=0, new=sampler.backend.accepted.sum()) - - with Progress(*_get_columns(it=sampler.iteration), console=console) as progress: - task = progress.add_task(description=description, total=num_steps) - while sampler.iteration < (num_steps or np.inf): - for state in sampler.sample( # noqa: B007, B020 - initial_state=state, - iterations=check_interval - sampler.iteration % check_interval, - thin_by=thin_by, - ): - progress.update(task, advance=1) - - acor_time.update(new=sampler.get_autocorr_time(tol=0).mean()) - accepted.update(new=sampler.backend.accepted.sum()) - - history = update_history_table( - history=history, - history_file=history_file, - iteration=sampler.iteration, - acor_time=acor_time.new, - accepted_frac=( - accepted.newly_accepted / (check_interval * sampler.nwalkers) - ), - max_log_prob=np.max(state.log_prob), - ) - - if num_steps is None and is_converged( - iteration=sampler.iteration, - acor_time=acor_time, - trust_factor=trust_factor, - relative_thresh=relative_thresh, - ): - logger.info(f"Sampling converged after {sampler.iteration} steps.") - break - - if history_file is not None: - history_file.with_suffix(".tmp").rename(history_file) - - -class DummyPool: - """Dummy class to allow for no multiprocessing.""" - - def __enter__(self) -> None: - """Enter the context manager.""" - ... - - def __exit__(self, *args) -> None: - """Exit the context manager.""" - ... - - -def get_pool(num_cores: int | None) -> Any | DummyPool: # type: ignore - """Get a ``multiprocess(ing)`` pool or ``DummyPool``. - - Returns a ``multiprocess(ing)`` pool with ``num_cores`` cores if ``num_cores`` is - not ``None``. Otherwise, a ``DummyPool`` is returned. - """ - return mp.Pool(num_cores) if num_cores is not None else DummyPool() - - -def init_sampler(settings: SampleCLI, ndim: int, pool: Any) -> emcee.EnsembleSampler: - """Initialize the ``emcee.EnsembleSampler`` with the given ``settings``.""" - nwalkers = ndim * settings.sampling.walkers_per_dim - backend = get_hdf5_backend( - file_path=settings.sampling.storage_file, - dataset=settings.sampling.dataset, - nwalkers=nwalkers, - ndim=ndim, - ) - return emcee.EnsembleSampler( - nwalkers=nwalkers, - ndim=ndim, - log_prob_fn=log_prob_fn, - kwargs={"inverse_temp": settings.sampling.inverse_temp}, - moves=[(emcee.moves.DEMove(), 0.8), (emcee.moves.DESnookerMove(), 0.2)], - backend=backend, - pool=pool, - blobs_dtype=[("log_prob", np.float64)], - parameter_names=list(MODEL.get_named_params().keys()), - ) - - -class SampleCLI(BaseCLI): - """Use MCMC to infer distributions over model parameters from data.""" - - graph: GraphConfig - model: ModelConfig = ModelConfig() - distributions: dict[str, DistributionConfig] = Field( - default={}, - description=( - "Mapping of model T-categories to predefined distributions over " - "diagnose times." - ), - ) - modalities: dict[str, ModalityConfig] = Field( - default={}, - description=( - "Maps names of diagnostic modalities to their specificity/sensitivity." - ), - ) - data: DataConfig - sampling: SamplingConfig - - def cli_cmd(self) -> None: - """Start the ``sample`` subcommand. - - First, it will construct the model from the ``graph`` and ``model`` arguments. - Then, it will add distributions over diagnose times via the dictionary from - the ``distributions`` argument. It will also set sensitivity and specificity of - diagnostic modalities via the dictionary provided through the ``modalities`` - argument. Finally, it will load the patient data as specified via the ``data`` - argument. - - When the model is constructed, an :py:class:`emcee.EnsembleSampler` is - initialized (see :py:func:`init_sampler`) and :py:func:`run_sampling` is - executed twice: once for the burn-in phase and once for the actual sampling - phase. The ``sampling`` argument provides all necessary settings for the - sampling. - """ - # as recommended in https://emcee.readthedocs.io/en/stable/tutorials/parallel/# - os.environ["OMP_NUM_THREADS"] = "1" - - logger.debug(self.model_dump_json(indent=2)) - - # ugly, but necessary for pickling - global MODEL - MODEL = construct_model(self.model, self.graph) - MODEL = add_distributions(MODEL, self.distributions) - MODEL = add_modalities(MODEL, self.modalities) - MODEL.load_patient_data(**self.data.get_load_kwargs()) - ndim = MODEL.get_num_dims() - - # emcee does not support numpy's new random number generator yet. - np.random.seed(self.sampling.seed) # noqa: NPY002 - - with get_pool(self.sampling.cores) as pool: - sampler = init_sampler(settings=self, ndim=ndim, pool=pool) - run_sampling( - description="Burn-in phase", - sampler=sampler, - num_steps=self.sampling.burnin_steps, - check_interval=self.sampling.check_interval, - trust_factor=self.sampling.trust_factor, - relative_thresh=self.sampling.relative_thresh, - history_file=self.sampling.history_file, - ) - run_sampling( - description="Sampling phase", - sampler=sampler, - num_steps=self.sampling.num_steps, - check_interval=self.sampling.num_steps, - reset_backend=True, - thin_by=self.sampling.thin_by, - ) - - -if __name__ == "__main__": - main = assemble_main(settings_cls=SampleCLI, prog_name="sample") - main() diff --git a/src/lyscripts/schedule.py b/src/lyscripts/schedule.py deleted file mode 100644 index 99fa334..0000000 --- a/src/lyscripts/schedule.py +++ /dev/null @@ -1,33 +0,0 @@ -r"""Generate inverse temperature schedules for thermodynamic integration. - -Thermodynamic integration is quite sensitive to the specific schedule which is used. -I noticed in my models, that within the interval :math:`[0, 0.1]`, the increase in the -expected log-likelihood is very steep. Hence, the inverse temperature :math:`\beta` -must be more densely spaced in the beginning. - -This can be achieved by using a power sequence: Generate :math:`n` linearly spaced -points in the interval :math:`[0, 1]` and then transform each point by computing -:math:`\beta_i^k` where :math:`k` could e.g. be 5. -""" - -from loguru import logger - -from lyscripts.cli import assemble_main -from lyscripts.configs import BaseCLI, ScheduleConfig - - -class ScheduleCLI(ScheduleConfig, BaseCLI): - """Generate an inverse temperature schedule for thermodynamic integration.""" - - def cli_cmd(self) -> None: - """Start the ``schedule`` command.""" - logger.debug(self.model_dump_json(indent=2)) - - for inv_temp in self.get_schedule(): - # print is necessary to allow piping the output - print(inv_temp) # noqa: T201 - - -if __name__ == "__main__": - main = assemble_main(settings_cls=ScheduleCLI, prog_name="schedule") - main() diff --git a/src/lyscripts/schema.py b/src/lyscripts/schema.py deleted file mode 100644 index 7470af3..0000000 --- a/src/lyscripts/schema.py +++ /dev/null @@ -1,66 +0,0 @@ -"""A fusion of all :py:mod:`configs`, allowing the creation of a JSON schema. - -This command is not intended to be used by the end user. Rather, it exists such that -the developers and maintainers can create a JSON schema from all the defined -:py:mod:`configs` an store that in the `source code repository`_. Subsequently, the -end user can point their IDE to this schema, hosted on GitHub to provide them with -auto-completion and validation of their YAML configuration files that they feed into -the lyscripts CLIs when they build pipelines or scripts with it. - -The `URL for the schema`_ can for example be used in the settings of VS Code like this: - -.. code:: json - - { - "yaml.schemas": { - "https://raw.githubusercontent.com/lycosystem/lyscripts/main/schemas/ly.json": "*.ly.yaml" - }, - } - -Which would enable auto-completion and validation for all files with the extension -``.ly.yaml`` in the workspace. - -.. _source code repository: https://github.com/lycosystem/lyscripts -.. _URL for the schema: https://raw.githubusercontent.com/lycosystem/lyscripts/main/schemas/ly.json -""" # noqa: E501 - -import json - -from lydata.utils import ModalityConfig -from pydantic import BaseModel, Field - -from lyscripts import configs - - -class SchemaSettings(BaseModel): - """Settings for generating a JSON schema for lyscripts configuration files.""" - - version: int = Field( - description=( - "For future compatibility reasons, every config file must have a " - "`version: 1` field at the top level." - ), - ge=1, - le=1, - ) - cross_validation: configs.CrossValidationConfig = None - data: configs.DataConfig = None - diagnosis: configs.DiagnosisConfig = None - distributions: dict[str, configs.DistributionConfig] = {} - graph: configs.GraphConfig = None - involvement: configs.InvolvementConfig = None - modalities: dict[str, ModalityConfig] = {} - model: configs.ModelConfig = None - sampling: configs.SamplingConfig = None - scenarios: list[configs.ScenarioConfig] = [] - schedule: configs.ScheduleConfig = None - - -def main() -> None: - """Generate a JSON schema for lyscripts configuration files.""" - schema = SchemaSettings.model_json_schema() - print(json.dumps(schema, indent=2)) # noqa: T201 - - -if __name__ == "__main__": - main() diff --git a/src/lyscripts/utils.py b/src/lyscripts/utils.py deleted file mode 100644 index b6b1261..0000000 --- a/src/lyscripts/utils.py +++ /dev/null @@ -1,199 +0,0 @@ -"""General utility functions for the lyscripts package.""" - -from pathlib import Path - -import numpy as np -import pandas as pd -import yaml -from emcee.backends import HDFBackend -from loguru import logger -from rich.console import Console -from scipy.special import factorial - -from lyscripts.decorators import ( - check_input_file_exists, - check_output_dir_exists, -) - -console = Console() - - -def binom_pmf(support: list[int] | np.ndarray, p: float = 0.5): - """Binomial PMF that is much faster than the one from scipy.""" - max_time = len(support) - 1 - if p > 1.0 or p < 0.0: - raise ValueError("Binomial prob must be btw. 0 and 1") - q = 1.0 - p - binom_coeff = factorial(max_time) / ( - factorial(support) * factorial(max_time - support) - ) - return binom_coeff * p**support * q ** (max_time - support) - - -def get_dict_depth(nested: dict) -> int: - """Get the depth of a nested dictionary. - - >>> get_dict_depth({"a": {"b": 1}}) - 2 - >>> varying_depth = {"a": {"b": 1}, "c": {"d": {"e": 2}}} - >>> get_dict_depth(varying_depth) - 3 - """ - if not isinstance(nested, dict): - return 0 - - max_depth = None - for _, value in nested.items(): - value_depth = get_dict_depth(value) - max_depth = max(max_depth or value_depth, value_depth) - - return 1 + (max_depth or 0) - - -def delete_private_keys(nested: dict) -> dict: - """Delete private keys from a nested dictionary. - - A 'private' key is a key whose name starts with an underscore. For example: - - >>> delete_private_keys({"patient": {"__doc__": "some patient info", "age": 61}}) - {'patient': {'age': 61}} - >>> delete_private_keys({"patient": {"age": 61}}) - {'patient': {'age': 61}} - """ - cleaned = {} - - if isinstance(nested, dict): - for key, value in nested.items(): - if not (isinstance(key, str) and key.startswith("_")): - cleaned[key] = delete_private_keys(value) - else: - cleaned = nested - - return cleaned - - -def flatten( - nested: dict, - prev_key: tuple = (), - max_depth: int | None = None, -) -> dict: - """Flatten ``nested`` dict by creating key tuples for each value at ``max_depth``. - - >>> nested = {"tumor": {"1": {"t_stage": 1, "size": 12.3}}} - >>> flatten(nested) - {('tumor', '1', 't_stage'): 1, ('tumor', '1', 'size'): 12.3} - >>> mapping = {"patient": {"#": {"age": {"func": int, "columns": ["age"]}}}} - >>> flatten(mapping, max_depth=3) - {('patient', '#', 'age'): {'func': , 'columns': ['age']}} - - Note that flattening an already flat dictionary will yield some weird results. - """ - result = {} - - for key, value in nested.items(): - is_dict = isinstance(value, dict) - has_reached_max_depth = max_depth is not None and len(prev_key) >= max_depth - 1 - - if is_dict and not has_reached_max_depth: - result.update(flatten(value, (*prev_key, key), max_depth)) - else: - result[(*prev_key, key)] = value - - return result - - -def unflatten(flat: dict) -> dict: - """Take a flat dictionary with tuples of keys and create nested dict from it. - - >>> flat = {('tumor', '1', 't_stage'): 1, ('tumor', '1', 'size'): 12.3} - >>> unflatten(flat) - {'tumor': {'1': {'t_stage': 1, 'size': 12.3}}} - >>> mapping = {('patient', '#', 'age'): {'func': int, 'columns': ['age']}} - >>> unflatten(mapping) - {'patient': {'#': {'age': {'func': , 'columns': ['age']}}}} - """ - result = {} - - for keys, value in flat.items(): - current = result - for key in keys[:-1]: - current = current.setdefault(key, {}) - - current[keys[-1]] = value - - return result - - -def get_modalities_subset( - defined_modalities: dict[str, list[float]], - selection: list[str], -) -> dict[str, list[float]]: - """Of the ``defined_modalities`` return only those mentioned in the ``selection``. - - >>> modalities = {"CT": [0.76, 0.81], "MRI": [0.63, 0.86]} - >>> get_modalities_subset(modalities, ["CT"]) - {'CT': [0.76, 0.81]} - """ - selected_modalities = {} - for mod in selection: - try: - selected_modalities[mod] = defined_modalities[mod] - except KeyError as key_err: - raise KeyError(f"Modality {mod} has not been defined yet") from key_err - return selected_modalities - - -def load_patient_data( - file_path: Path, - **read_csv_kwargs: dict, -) -> pd.DataFrame: - """Load patient data from a CSV file stored at ``file``.""" - if "header" not in read_csv_kwargs: - read_csv_kwargs["header"] = [0, 1, 2] - - data = pd.read_csv(file_path, **read_csv_kwargs) - logger.info(f"Loaded {len(data)} patient records from {file_path}") - return data - - -@check_input_file_exists -def load_yaml_params(file_path: Path) -> dict: - """Load parameters from a YAML ``file``.""" - with open(file_path, encoding="utf-8") as file: - loaded_params = yaml.safe_load(file) - logger.info(f"Loaded YAML parameters from {file_path}") - return loaded_params - - -@check_input_file_exists -def load_model_samples( - file_path: Path, - name: str = "mcmc", - flat: bool = True, - discard: int = 0, - thin: int = 1, -) -> np.ndarray: - """Load MCMC samples stored in HDF5 file at ``file_path`` under a key ``name``.""" - backend = HDFBackend(file_path, name=name, read_only=True) - samples = backend.get_chain(flat=flat, discard=discard, thin=thin) - logger.info(f"Loaded samples with shape {samples.shape} from {file_path}") - return samples - - -@check_output_dir_exists -def get_hdf5_backend( - file_path: Path, - dataset: str = "mcmc", - nwalkers: int | None = None, - ndim: int | None = None, - reset: bool = False, -) -> HDFBackend: - """Open an HDF5 file at ``file_path`` and return a backend.""" - backend = HDFBackend(file_path, name=dataset) - logger.info(f"Opened HDF5 file at {file_path}") - - if reset: - logger.info(f"Resetting backend at {file_path} to {nwalkers=} and {ndim=}") - backend.reset(nwalkers, ndim) - - return backend diff --git a/tests/__init__.py b/tests/__init__.py deleted file mode 100644 index e69de29..0000000 diff --git a/tests/_dummy_model.py b/tests/_dummy_model.py deleted file mode 100644 index 22eeab2..0000000 --- a/tests/_dummy_model.py +++ /dev/null @@ -1,7 +0,0 @@ -"""Define loadable dummy model for testing.""" - -class DummyModel: - def __init__(self, was_externally_loaded: bool = False): - self.was_externally_loaded = was_externally_loaded - -model = DummyModel(was_externally_loaded=True) diff --git a/tests/_dummy_no_model.py b/tests/_dummy_no_model.py deleted file mode 100644 index 67b8a47..0000000 --- a/tests/_dummy_no_model.py +++ /dev/null @@ -1,3 +0,0 @@ -"""File that does not provide a `model`.""" - -no_model = 42 diff --git a/tests/compute/posteriors_test.py b/tests/compute/posteriors_test.py deleted file mode 100644 index 85f3b49..0000000 --- a/tests/compute/posteriors_test.py +++ /dev/null @@ -1,135 +0,0 @@ -"""Test utilities of the predict submodule.""" - -import numpy as np -import pytest -from lydata.utils import ModalityConfig - -from lyscripts.compute.posteriors import compute_posteriors -from lyscripts.compute.priors import compute_priors -from lyscripts.compute.utils import complete_pattern -from lyscripts.configs import ( - DiagnosisConfig, - DistributionConfig, - GraphConfig, - ModelConfig, - add_distributions, - construct_model, -) - -RNG = np.random.default_rng(42) - - -@pytest.fixture(params=["Unilateral", "Bilateral"]) -def model_config(request) -> ModelConfig: - """Create unilateral model config.""" - return ModelConfig(class_name=request.param) - - -@pytest.fixture -def graph_config() -> GraphConfig: - """Create simple graph.""" - return GraphConfig( - tumor={"T": ["I", "II", "III"]}, - lnl={"I": ["II"], "II": ["III"], "III": []}, - ) - - -@pytest.fixture -def dist_configs() -> dict[str, DistributionConfig]: - """Provide early and late distributions.""" - return { - "early": DistributionConfig(kind="frozen", func="binomial"), - "late": DistributionConfig(kind="parametric", func="binomial"), - } - - -@pytest.fixture -def modality_config() -> ModalityConfig: - """Create modality config.""" - return ModalityConfig(spec=0.9, sens=0.8) - - -@pytest.fixture -def diagnosis_config() -> DiagnosisConfig: - """Create a simple diagnosis config.""" - return DiagnosisConfig( - ipsi={"D": {"I": True, "II": True, "III": False}}, - contra={"D": {"I": False, "II": True, "III": False}}, - ) - - -@pytest.fixture -def samples( - model_config: ModelConfig, - graph_config: GraphConfig, - dist_configs: dict[str, DistributionConfig], -) -> np.ndarray: - """Generate some samples.""" - model = construct_model(model_config, graph_config) - model = add_distributions(model, dist_configs) - return RNG.uniform(size=(100, model.get_num_dims())) - - -@pytest.fixture -def priors( - model_config: ModelConfig, - graph_config: GraphConfig, - dist_configs: dict[str, DistributionConfig], - samples: np.ndarray, -) -> np.ndarray: - """Provide some priors.""" - return compute_priors( - model_config=model_config, - graph_config=graph_config, - dist_configs=dist_configs, - samples=samples, - t_stages=["late"], - t_stages_dist=[1.0], - ) - - -def test_compute_posterior( - model_config: ModelConfig, - graph_config: GraphConfig, - dist_configs: dict[str, DistributionConfig], - modality_config: ModalityConfig, - diagnosis_config: DiagnosisConfig, - priors: np.ndarray, -) -> None: - """Ensure that the diagnosis is correctly treated.""" - posteriors = compute_posteriors( - model_config=model_config, - graph_config=graph_config, - dist_configs=dist_configs, - modality_configs={"D": modality_config}, - priors=priors, - diagnosis=diagnosis_config.model_dump(), - ) - - assert np.all(posteriors >= 0), "Negative probabilities in posterior." - assert np.all(posteriors <= 1), "Probabilities above 1 in posterior." - - -def test_clean_pattern(): - """Test outdated utility function.""" - empty_pattern = {} - one_pos_pattern = {"ipsi": {"II": True}} - nums_pattern = {"ipsi": {"I": 1}, "contra": {"III": 0}} - lnls = ["I", "II", "III"] - - empty_cleaned = complete_pattern(empty_pattern, lnls) - one_pos_cleaned = complete_pattern(one_pos_pattern, lnls) - nums_cleaned = complete_pattern(nums_pattern, lnls) - - assert empty_cleaned == { - "ipsi": {"I": None, "II": None, "III": None}, - "contra": {"I": None, "II": None, "III": None}, - }, "Empty pattern does not get filled correctly." - assert one_pos_cleaned == { - "ipsi": {"I": None, "II": True, "III": None}, - "contra": {"I": None, "II": None, "III": None}, - }, "Pattern with one positive LNL not cleaned properly." - assert nums_cleaned == { - "ipsi": {"I": True, "II": None, "III": None}, - "contra": {"I": None, "II": None, "III": False}, - }, "Number pattern cleaned wrongly." diff --git a/tests/compute/prevalences_test.py b/tests/compute/prevalences_test.py deleted file mode 100644 index 3fe2151..0000000 --- a/tests/compute/prevalences_test.py +++ /dev/null @@ -1,41 +0,0 @@ -"""Test the computation of the prevalences.""" - -import pandas as pd -import pytest -from lydata import load_datasets - -from lyscripts.compute.prevalences import observe_prevalence -from lyscripts.configs import DiagnosisConfig, ScenarioConfig - - -@pytest.fixture -def scenario_config() -> ScenarioConfig: - """Create a simple scenario config.""" - return ScenarioConfig( - t_stages=["early"], - diagnosis=DiagnosisConfig( - ipsi={"max_llh": {"II": "involved", "III": False}}, - contra={"max_llh": {"II": 0}}, - ), - ) - - -@pytest.fixture -def data() -> pd.DataFrame: - """Load one of the lyDATA datasets.""" - data = next(load_datasets(year=2021, institution="usz")) - return data.ly.enhance() - - -def test_observe_prevalence( - data: pd.DataFrame, - scenario_config: ScenarioConfig, -) -> None: - """Ensure that observing the prevalence works.""" - portion = observe_prevalence( - data=data, - scenario_config=scenario_config, - ) - - assert portion.match == 66 - assert portion.total == 150 diff --git a/tests/configs_test.py b/tests/configs_test.py deleted file mode 100644 index 477083b..0000000 --- a/tests/configs_test.py +++ /dev/null @@ -1,125 +0,0 @@ -"""Test the configs module.""" - -from pathlib import Path - -import pytest -import yaml -from lydata.utils import ModalityConfig -from pydantic import ValidationError - -from lyscripts.configs import ( - DistributionConfig, - GraphConfig, - ModelConfig, - add_distributions, - add_modalities, - construct_model, - modalityconfig_from_model, -) - - -@pytest.fixture -def yaml_config_dict() -> dict: - """Load the YAML params file.""" - with open("tests/test_params_v1.yaml") as file: - return yaml.safe_load(file) - - -@pytest.fixture -def external_model_config() -> ModelConfig: - return ModelConfig(external_file=Path("tests/_dummy_model.py")) - - -@pytest.fixture -def model_config() -> ModelConfig: - return ModelConfig( - class_name="Bilateral", - constructor="trinary", - named_params=["spread", "TtoII_spread", "late_p"], - ) - - -@pytest.fixture -def graph_config() -> GraphConfig: - return GraphConfig( - tumor={"T": ["II", "III"]}, - lnl={"II": ["III"], "III": []}, - ) - - -@pytest.fixture -def distribution_configs() -> dict[str, DistributionConfig]: - return { - "early": DistributionConfig(kind="frozen", params={"p": 0.3}), - "late": DistributionConfig(kind="parametric", params={"p": 0.7}), - } - - -def test_model_from_external( - external_model_config: ModelConfig, - graph_config: GraphConfig, -): - """Check if loading model from external file works.""" - model = construct_model(external_model_config, graph_config) - assert model.was_externally_loaded - - -def test_no_model_from_external() -> None: - """Ensure a `ValidationError` is raised when no model is provided.""" - with pytest.raises(ValidationError): - ModelConfig(external_file=Path("tests/_dummy_no_model.py")) - - -def test_model_from_no_file() -> None: - """Ensure a `ValidationError` is raised when the file does not exist.""" - with pytest.raises(ValidationError): - ModelConfig(external_file=Path("tests/_no_file.py")) - - -def test_model_from_config( - model_config: ModelConfig, - graph_config: GraphConfig, - distribution_configs: dict[str, DistributionConfig], -): - """Check that loading the model works correctly. Especially the named params.""" - model = construct_model( - model_config=model_config, - graph_config=graph_config, - ) - model = add_distributions( - model=model, - configs=distribution_configs, - ) - assert model.ipsi.get_distribution(t_stage="late") == model.contra.get_distribution( - t_stage="late" - ) - assert model.get_num_dims() == len(model_config.named_params) - - -def test_config_recovery(yaml_config_dict: dict) -> None: - """Ensure the round trip config -> mode -> config works.""" - model = construct_model( - model_config=ModelConfig(**yaml_config_dict["model"]), - graph_config=GraphConfig(**yaml_config_dict["graph"]), - ) - model = add_modalities( - model=model, - modalities={ - name: ModalityConfig(**config) - for name, config in yaml_config_dict["modalities"].items() - }, - ) - - recovered_config = {} - recovered_config["model"] = ModelConfig.from_model(model).model_dump( - exclude_none=True - ) - recovered_config["graph"] = GraphConfig.from_model(model).model_dump() - recovered_config["modalities"] = { - name: modalityconfig_from_model(model, name).model_dump() - for name in model.get_all_modalities() - } - - yaml_config_dict.pop("distributions") - yaml_config_dict.pop("version") - assert recovered_config == yaml_config_dict diff --git a/tests/integration/.gitignore b/tests/integration/.gitignore deleted file mode 100644 index be15619..0000000 --- a/tests/integration/.gitignore +++ /dev/null @@ -1,2 +0,0 @@ -*.csv -*.hdf5 diff --git a/tests/integration/config/data.ly.yaml b/tests/integration/config/data.ly.yaml deleted file mode 100644 index dd296f1..0000000 --- a/tests/integration/config/data.ly.yaml +++ /dev/null @@ -1,6 +0,0 @@ -version: 1 - -data: - mapping: - early: early - late: late diff --git a/tests/integration/config/distributions.ly.yaml b/tests/integration/config/distributions.ly.yaml deleted file mode 100644 index 48ca2d5..0000000 --- a/tests/integration/config/distributions.ly.yaml +++ /dev/null @@ -1,15 +0,0 @@ -version: 1 - -distributions: - early: - func: binomial - kind: frozen - params: {p: 0.3} - late: - func: binomial - kind: parametric - params: {p: 0.7} - -t_stages_dist: - early: 0.6 - late: 0.4 diff --git a/tests/integration/config/graph.ly.yaml b/tests/integration/config/graph.ly.yaml deleted file mode 100644 index 3215b1d..0000000 --- a/tests/integration/config/graph.ly.yaml +++ /dev/null @@ -1,14 +0,0 @@ -version: 1 - -graph: - tumor: - T: [II, III] - lnl: - II: [III] - III: [] - -params: - TtoII_spread: 0.3 - TtoIII_spread: 0.1 - IItoIII_spread: 0.2 - late_p: 0.7 diff --git a/tests/integration/config/modalities.ly.yaml b/tests/integration/config/modalities.ly.yaml deleted file mode 100644 index 2ff7de5..0000000 --- a/tests/integration/config/modalities.ly.yaml +++ /dev/null @@ -1,7 +0,0 @@ -version: 1 - -modalities: - imaging: - spec: 0.85 - sens: 0.85 - kind: clinical diff --git a/tests/integration/config/model.ly.yaml b/tests/integration/config/model.ly.yaml deleted file mode 100644 index 609ae0a..0000000 --- a/tests/integration/config/model.ly.yaml +++ /dev/null @@ -1,11 +0,0 @@ -version: 1 - -model: - class_name: Unilateral - constructor: binary - max_time: 10 - named_params: - - TtoII_spread - - TtoIII_spread - - IItoIII_spread - - late_p diff --git a/tests/integration/config/sampling.ly.yaml b/tests/integration/config/sampling.ly.yaml deleted file mode 100644 index 16c1b31..0000000 --- a/tests/integration/config/sampling.ly.yaml +++ /dev/null @@ -1,10 +0,0 @@ -version: 1 - -sampling: - seed: 42 - walkers_per_dim: 2 - check_interval: 10 - trust_factor: 10 - relative_thresh: 0.1 - thin_by: 1 - num_steps: 10 diff --git a/tests/integration/config/scenarios.ly.yaml b/tests/integration/config/scenarios.ly.yaml deleted file mode 100644 index 725c145..0000000 --- a/tests/integration/config/scenarios.ly.yaml +++ /dev/null @@ -1,14 +0,0 @@ -version: 1 - -scenarios: - - t_stages: [early] - mode: "HMM" - diagnosis: - ipsi: - imaging: {II: involved} - - t_stages: [early, late] - t_stages_dist: [0.7, 0.3] - mode: "HMM" - diagnosis: - ipsi: - imaging: {III: 1} diff --git a/tests/integration/integration_test.py b/tests/integration/integration_test.py deleted file mode 100644 index 2441afe..0000000 --- a/tests/integration/integration_test.py +++ /dev/null @@ -1,444 +0,0 @@ -"""Test the ``generate`` CLI.""" - -import shutil -import sys -from pathlib import Path - -import h5py -import numpy as np -import pandas as pd -import pytest -from lydata import C -from lydata.utils import ModalityConfig -from pydantic import TypeAdapter - -from lyscripts.cli import assemble_main -from lyscripts.compute.prevalences import PrevalencesCLI -from lyscripts.compute.priors import PriorsCLI, compute_priors -from lyscripts.compute.utils import get_cached -from lyscripts.configs import ( - DistributionConfig, - GraphConfig, - ModelConfig, - SamplingConfig, - ScenarioConfig, -) -from lyscripts.data.generate import GenerateCLI -from lyscripts.sample import SampleCLI -from lyscripts.utils import load_patient_data, load_yaml_params - - -@pytest.fixture(scope="session") -def monkeymodule(): - """Create a session scoped monkeypatch fixture. - - This can be used to e.g. mock the command line arguments by setting the - ``sys.argv`` variable. - """ - with pytest.MonkeyPatch.context() as mp: - yield mp - - -@pytest.fixture(scope="session") -def data_file() -> Path: - """Provide the path to the generated data. - - Delete any file at the beginning of a session if it exists. - """ - res = Path("tests/integration/generated.csv") - res.parent.mkdir(exist_ok=True) - if res.exists(): - res.unlink() - return res - - -@pytest.fixture(scope="session") -def samples_file() -> Path: - """Provide the path to the generated samples. - - Delete any file at the beginning of a session if it exists. - """ - res = Path("tests/integration/samples.hdf5") - res.parent.mkdir(exist_ok=True) - if res.exists(): - res.unlink() - return res - - -def _get_config_file(name: str) -> Path: - return Path(f"tests/integration/config/{name}.ly.yaml") - - -@pytest.fixture(scope="session") -def model_config_file() -> Path: - """Provide the path to the model configuration file.""" - return _get_config_file("model") - - -@pytest.fixture(scope="session") -def graph_config_file() -> Path: - """Provide the path to the graph configuration file.""" - return _get_config_file("graph") - - -@pytest.fixture(scope="session") -def distributions_config_file() -> Path: - """Provide the path to the distributions configuration file.""" - return _get_config_file("distributions") - - -@pytest.fixture(scope="session") -def modalities_config_file() -> Path: - """Provide the path to the modalities configuration file.""" - return _get_config_file("modalities") - - -@pytest.fixture(scope="session") -def scenarios_config_file() -> Path: - """Provide the path to the scenarios configuration file.""" - return _get_config_file("scenarios") - - -@pytest.fixture(scope="session") -def sampling_config_file() -> Path: - """Provide the path to the sampling configuration file.""" - return _get_config_file("sampling") - - -@pytest.fixture(scope="session") -def data_config_file() -> Path: - """Provide the path to the data configuration file.""" - return _get_config_file("data") - - -@pytest.fixture(scope="session") -def model_config(model_config_file: Path) -> ModelConfig: - """Provide the model configuration.""" - yaml_config = load_yaml_params(model_config_file) - return ModelConfig(**yaml_config["model"]) - - -@pytest.fixture(scope="session") -def graph_config(graph_config_file: Path) -> GraphConfig: - """Provide the graph configuration.""" - yaml_config = load_yaml_params(graph_config_file) - return GraphConfig(**yaml_config["graph"]) - - -@pytest.fixture(scope="session") -def distributions_config( - distributions_config_file: Path, -) -> dict[str, DistributionConfig]: - """Provide the distributions configuration.""" - yaml_config = load_yaml_params(distributions_config_file) - type_adapter = TypeAdapter(dict[str, DistributionConfig]) - return type_adapter.validate_python(yaml_config["distributions"]) - - -@pytest.fixture(scope="session") -def modalities_config(modalities_config_file: Path) -> dict[str, ModalityConfig]: - """Provide the modalities configuration.""" - yaml_config = load_yaml_params(modalities_config_file) - type_adapter = TypeAdapter(dict[str, ModalityConfig]) - return type_adapter.validate_python(yaml_config["modalities"]) - - -@pytest.fixture(scope="session") -def scenarios_config(scenarios_config_file: Path) -> list[ScenarioConfig]: - """Provide a list of defined scenarios.""" - yaml_config = load_yaml_params(scenarios_config_file) - type_adapter = TypeAdapter(list[ScenarioConfig]) - return type_adapter.validate_python(yaml_config["scenarios"]) - - -@pytest.fixture(scope="session") -def sampling_config(sampling_config_file: Path) -> SamplingConfig: - """Provide the sampling configuration.""" - yaml_config = load_yaml_params(sampling_config_file) - return SamplingConfig(**yaml_config["sampling"]) - - -@pytest.fixture(scope="session") -def generated_data( - monkeymodule, - data_file: Path, - model_config_file: Path, - graph_config_file: Path, - distributions_config_file: Path, - modalities_config_file: Path, -) -> pd.DataFrame: - """Execute the generate CLI and provide the generated data as a fixture.""" - monkeymodule.setattr( - sys, - "argv", - [ - "generate", - "--configs", - str(model_config_file.resolve()), - "--configs", - str(graph_config_file.resolve()), - "--configs", - str(distributions_config_file.resolve()), - "--configs", - str(modalities_config_file.resolve()), - "--num-patients", - "200", - "--output-file", - str(data_file), - "--seed", - "42", - ], - ) - main = assemble_main(settings_cls=GenerateCLI, prog_name="generate") - main() - return load_patient_data(data_file) - - -@pytest.fixture(scope="session") -def drawn_samples( - monkeymodule, - generated_data: pd.DataFrame, - data_file: Path, - model_config_file: Path, - graph_config_file: Path, - distributions_config_file: Path, - modalities_config_file: Path, - sampling_config_file: Path, - samples_file: Path, -) -> np.ndarray: - """Execute the sampling CLI and provide the samples as a fixture.""" - monkeymodule.setattr( - sys, - "argv", - [ - "sample", - "--configs", - str(model_config_file.resolve()), - "--configs", - str(graph_config_file.resolve()), - "--configs", - str(distributions_config_file.resolve()), - "--configs", - str(modalities_config_file.resolve()), - "--configs", - str(sampling_config_file.resolve()), - "--sampling.storage-file", - str(samples_file.resolve()), - # mapping because generated data already has the correct T-stage column - '--data.mapping={"early": "early", "late": "late"}', - "--data.source", - str(data_file), - ], - ) - main = assemble_main(settings_cls=SampleCLI, prog_name="sample") - main() - _yaml_params = load_yaml_params(sampling_config_file) - _sampling_config = SamplingConfig( - storage_file=samples_file, **_yaml_params["sampling"] - ) - return _sampling_config.load() - - -@pytest.fixture(scope="session") -def cache_dir() -> Path: - """Provide the path to the cache directory as a fixture. - - Delete any directory at the beginning of a session if it exists. - """ - res = Path("tests/integration/.cache") - if res.exists(): - shutil.rmtree(res) - return res - - -@pytest.fixture(scope="session") -def priors_file() -> Path: - """Provide the path to the computed priors as a fixture. - - Delete any file at the beginning of a session if it exists. - """ - res = Path("tests/integration/priors.hdf5") - res.parent.mkdir(exist_ok=True) - if res.exists(): - res.unlink() - return res - - -@pytest.fixture(scope="session") -def prevalences_file() -> Path: - """Provide the path to the computed prevalences as a fixture. - - Delete any file at the beginning of a session if it exists. - """ - res = Path("tests/integration/prevalences.hdf5") - res.parent.mkdir(exist_ok=True) - if res.exists(): - res.unlink() - return res - - -@pytest.fixture(scope="session") -def computed_priors( - monkeymodule, - cache_dir: Path, - model_config_file: Path, - graph_config_file: Path, - distributions_config_file: Path, - scenarios_config_file: Path, - sampling_config_file: Path, - samples_file: Path, - priors_file: Path, - dataset: str = "000", -) -> np.ndarray: - """Execute the ``priors`` CLI and provide the computed arrays as a fixture.""" - monkeymodule.setattr( - sys, - "argv", - [ - "priors", - "--cache-dir", - str(cache_dir.resolve()), - "--configs", - str(model_config_file.resolve()), - "--configs", - str(graph_config_file.resolve()), - "--configs", - str(distributions_config_file.resolve()), - "--configs", - str(scenarios_config_file.resolve()), - "--configs", - str(sampling_config_file.resolve()), - "--sampling.storage-file", - str(samples_file.resolve()), - "--priors.file", - str(priors_file), - ], - ) - main = assemble_main(settings_cls=PriorsCLI, prog_name="priors") - main() - with h5py.File(priors_file, "r") as h5file: - return h5file[dataset][:] - - -@pytest.fixture(scope="session") -def computed_prevalences( - monkeymodule, - cache_dir: Path, - model_config_file: Path, - graph_config_file: Path, - distributions_config_file: Path, - scenarios_config_file: Path, - modalities_config_file: Path, - sampling_config_file: Path, - samples_file: Path, - prevalences_file: Path, - data_file: Path, - # to ensure the correct execution order, also require data and samples - generated_data: pd.DataFrame, - drawn_samples: np.ndarray, - dataset: str = "000", -) -> tuple[np.ndarray, int, int]: - """Provide the computed prevalences as a fixture.""" - monkeymodule.setattr( - sys, - "argv", - [ - "prevalences", - "--cache-dir", - str(cache_dir.resolve()), - "--configs", - str(graph_config_file.resolve()), - "--configs", - str(model_config_file.resolve()), - "--configs", - str(distributions_config_file.resolve()), - "--configs", - str(scenarios_config_file.resolve()), - "--configs", - str(modalities_config_file.resolve()), - "--configs", - str(sampling_config_file.resolve()), - "--sampling.storage-file", - str(samples_file.resolve()), - "--prevalences.file", - str(prevalences_file), - "--data.source", - str(data_file.resolve()), - "--data.mapping", - '{"early": "early", "late": "late"}', - ], - ) - main = assemble_main(settings_cls=PrevalencesCLI, prog_name="prevalences") - main() - with h5py.File(prevalences_file, "r") as h5file: - return ( - h5file[dataset][:], - h5file[dataset].attrs["num_match"], - h5file[dataset].attrs["num_total"], - ) - - -def test_generated_data(generated_data: pd.DataFrame) -> None: - """Test the generated data.""" - assert generated_data.shape == (200, 3) - assert ( - generated_data["imaging", "ipsi", "II"].sum() - > generated_data["imaging", "ipsi", "III"].sum() - ) - assert generated_data.ly.t_stage.isin(["early", "late"]).all() - assert all( - generated_data.ly.query(C("t_stage") == "early")["imaging", "ipsi"].mean() - < generated_data.ly.query(C("t_stage") == "late")["imaging", "ipsi"].mean() - ) - - -def test_scenarios(scenarios_config: list[ScenarioConfig]) -> None: - """Check the loaded scenarios.""" - for scenario in scenarios_config: - assert np.isclose(np.sum(scenario.t_stages_dist), 1.0) - - -def test_drawn_samples(drawn_samples: np.ndarray) -> None: - """Test the drawn samples.""" - assert drawn_samples.shape[-1] == 4 - - -def test_computed_priors( - cache_dir: Path, - model_config: ModelConfig, - graph_config: GraphConfig, - distributions_config: dict[str, DistributionConfig], - drawn_samples: np.ndarray, - scenarios_config: list[ScenarioConfig], - computed_priors: np.ndarray, -) -> None: - """Test the computed priors.""" - scenario = scenarios_config[0] - kwargs = scenario.model_dump(include={"t_stages", "t_stages_dist", "mode"}) - cached_compute_priors = get_cached(compute_priors, cache_dir) - - kwargs.update( - { - "model_config": model_config, - "graph_config": graph_config, - "dist_configs": distributions_config, - "samples": drawn_samples, - } - ) - assert cached_compute_priors._cached_func.check_call_in_cache(**kwargs) - cached_output = cached_compute_priors(**kwargs) - assert np.allclose(computed_priors, cached_output) - - assert computed_priors.shape[-1] == 4 - assert np.all(computed_priors >= 0.0) - assert np.all(computed_priors <= 1.0) - - -def test_computed_prevalences( - computed_prevalences: tuple[np.ndarray, int, int], -) -> None: - """Test the computed prevalences.""" - prevalences, num_match, num_total = computed_prevalences - num_match, num_total = int(num_match), int(num_total) - assert num_match == 64 - assert num_total == 123 diff --git a/tests/plot/baseline/sine.png b/tests/plot/baseline/sine.png deleted file mode 100644 index 1c12e3f..0000000 Binary files a/tests/plot/baseline/sine.png and /dev/null differ diff --git a/tests/plot/baseline/sine.svg b/tests/plot/baseline/sine.svg deleted file mode 100644 index 302bf38..0000000 --- a/tests/plot/baseline/sine.svg +++ /dev/null @@ -1,515 +0,0 @@ - - - - - - - - 2022-11-03T17:25:58.517193 - image/svg+xml - - - Matplotlib v3.5.2, https://matplotlib.org/ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - diff --git a/tests/plot/baseline/sine_svg.png b/tests/plot/baseline/sine_svg.png deleted file mode 100644 index edf255e..0000000 Binary files a/tests/plot/baseline/sine_svg.png and /dev/null differ diff --git a/tests/plot/baseline/test_draw.png b/tests/plot/baseline/test_draw.png deleted file mode 100644 index 9decc57..0000000 Binary files a/tests/plot/baseline/test_draw.png and /dev/null differ diff --git a/tests/plot/data/beta_samples.hdf5 b/tests/plot/data/beta_samples.hdf5 deleted file mode 100644 index b08fb84..0000000 Binary files a/tests/plot/data/beta_samples.hdf5 and /dev/null differ diff --git a/tests/plot/plot_utils_test.py b/tests/plot/plot_utils_test.py deleted file mode 100644 index 9140cf3..0000000 --- a/tests/plot/plot_utils_test.py +++ /dev/null @@ -1,284 +0,0 @@ -"""Testing of the utilities implemented for the plotting routines.""" - -from pathlib import Path - -import matplotlib.pyplot as plt -import matplotlib.testing.compare as mpl_comp -import numpy as np -import pytest - -from lyscripts.plots import ( - BetaPosterior, - Histogram, - ceil_to_step, - draw, - floor_to_step, - get_size, - save_figure, -) - - -@pytest.fixture -def beta_samples() -> str: - """Name of HDF5 file where some samples from a Beta distribution are stored.""" - return "./tests/plot/data/beta_samples.hdf5" - - -def test_floor_to_step(): - """Check correct rounding down to a given step size.""" - numbers = np.array([0.0, 3.0, 7.4, 2.01, np.pi, 12.7, 12.7, 17.3]) - steps = np.array([2, 2, 5, 2, 3, 3, 5, 0.17]) - exp_res = np.array([0.0, 2.0, 5.0, 2.0, 3.0, 12.0, 10.0, 17.17]) - - comp_res = np.zeros_like(exp_res) - for i, (num, step) in enumerate(zip(numbers, steps, strict=False)): - comp_res[i] = floor_to_step(num, step) - - assert all(np.isclose(comp_res, exp_res)), "Floor to step did not work properly." - - -def test_ceil_to_step(): - """Check correct rounding up to a given step size.""" - numbers = np.array([0.0, 3.0, 7.4, 2.01, np.pi, 12.7, 12.7, 17.3]) - steps = np.array([2, 2, 5, 2, 3, 3, 5, 0.17]) - exp_res = np.array([2.0, 4.0, 10.0, 4.0, 6.0, 15.0, 15.0, 17.34]) - - comp_res = np.zeros_like(exp_res) - for i, (num, step) in enumerate(zip(numbers, steps, strict=False)): - comp_res[i] = ceil_to_step(num, step) - - assert all(np.isclose(comp_res, exp_res)), "Ceil to step did not work properly." - - -def test_histogram_cls(beta_samples: str): - """Make sure the histogram data container works as intended.""" - str_filename = beta_samples - path_filename = Path(str_filename) - non_existent_filename = "non_existent.hdf5" - custom_label = "Lorem ipsum" - - hist_from_str = Histogram.from_hdf5(filename=str_filename, dataname="beta") - hist_from_path = Histogram.from_hdf5( - filename=path_filename, - dataname="beta", - scale=10.0, - label=custom_label, - ) - - with pytest.raises(FileNotFoundError): - Histogram.from_hdf5(filename=non_existent_filename, dataname="does_not_matter") - - assert np.all( - np.isclose(hist_from_str.values, 10.0 * hist_from_path.values) - ), "Scaling of data does not work correclty" - assert np.all( - np.isclose( - hist_from_str.left_percentile(50.0), - hist_from_str.right_percentile(50.0), - ) - ), "50% percentiles should be the same from the left and from the right." - assert np.all( - np.isclose( - hist_from_path.left_percentile(10.0), - hist_from_path.right_percentile(90.0), - ) - ), "10% from the left is not the same as 90% from the right" - assert ( - hist_from_str.kwargs["label"] == "beta | mega scan | 100 | ext" - ), "Label extraction did not work" - assert ( - hist_from_path.kwargs["label"] == custom_label - ), "Keyword override did not work" - - -def test_inverted_histogram_cls(beta_samples: str): - """Make sure the histogram data container works as intended.""" - str_filename = beta_samples - path_filename = Path(str_filename) - custom_label = "Lorem ipsum" - - hist_from_str = Histogram.from_hdf5(filename=str_filename, dataname="beta") - hist_from_path = Histogram.from_hdf5( - filename=path_filename, - dataname="beta", - scale=-100.0, - offset=100.0, - label=custom_label, - ) - - assert np.all( - np.isclose(100.0 - hist_from_str.values, hist_from_path.values) - ), "Scaling and offsetting of data does not work correclty" - assert np.all( - np.isclose( - hist_from_str.left_percentile(50.0), - hist_from_str.right_percentile(50.0), - ) - ), "50% percentiles should be the same from the left and from the right." - assert np.all( - np.isclose( - hist_from_path.left_percentile(10.0), - hist_from_path.right_percentile(90.0), - ) - ), "10% from the left is not the same as 90% from the right" - assert ( - hist_from_str.kwargs["label"] == "beta | mega scan | 100 | ext" - ), "Label extraction did not work" - assert ( - hist_from_path.kwargs["label"] == custom_label - ), "Keyword override did not work" - - -def test_posterior_cls(beta_samples: str): - """Test the container class for Beta posteriors.""" - str_filename = beta_samples - path_filename = Path(str_filename) - non_existent_filename = "non_existent.hdf5" - custom_label = "Lorem ipsum" - x_10 = np.linspace(0.0, 10.0, 100) - x_100 = np.linspace(0.0, 100.0, 100) - - post_from_str = BetaPosterior.from_hdf5(filename=str_filename, dataname="beta") - post_from_path = BetaPosterior.from_hdf5( - filename=path_filename, - dataname="beta", - scale=10.0, - label=custom_label, - ) - - with pytest.raises(FileNotFoundError): - BetaPosterior.from_hdf5( - filename=non_existent_filename, dataname="does_not_matter" - ) - - assert ( - post_from_str.num_success == post_from_path.num_success == 20 - ), "Number of successes not correctly extracted" - assert ( - post_from_str.num_total == post_from_path.num_total == 40 - ), "Total number of trials not correctly extracted" - assert ( - post_from_str.num_fail == post_from_path.num_fail == 20 - ), "Number of failures not correctly computed" - assert np.all( - np.isclose( - 10 * post_from_str.pdf(x_100), - post_from_path.pdf(x_10), - ) - ), "PDFs with different scaling do not match" - assert np.all( - np.isclose( - post_from_str.left_percentile(50.0), - post_from_str.right_percentile(50.0), - ) - ), "50% percentiles should be the same from the left and from the right." - assert np.all( - np.isclose( - post_from_path.left_percentile(10.0), - post_from_path.right_percentile(90.0), - ) - ), "10% from the left is not the same as 90% from the right" - - -@pytest.mark.mpl_image_compare -def test_draw(beta_samples: str): - """Check the drawing function.""" - filename = Path(beta_samples) - dataname = "beta" - hist = Histogram.from_hdf5(filename, dataname) - post = BetaPosterior.from_hdf5(filename, dataname) - fig, ax = plt.subplots() - ax = draw(axes=ax, contents=[hist, post], percent_lims=(2.0, 2.0)) - return fig - - -def test_draw_hist_kwargs(beta_samples: str): - """Make sure the `hist_kwargs` can override the defaults.""" - filename = Path(beta_samples) - dataname = "beta" - - hist = Histogram.from_hdf5(filename, dataname) - default_kwargs_path = "./tests/plot/results/default_kwargs" - fig, default_kwargs_ax = plt.subplots() - default_kwargs_ax = draw(default_kwargs_ax, contents=[hist]) - save_figure(default_kwargs_path, fig, ["png"]) - - bins_kwargs_path = "./tests/plot/results/bins_kwargs" - fig, bins_kwargs_ax = plt.subplots() - bins_kwargs_ax = draw(bins_kwargs_ax, contents=[hist], hist_kwargs={"bins": 70}) - save_figure(bins_kwargs_path, fig, ["png"]) - - global_kwargs_path = "./tests/plot/results/global_kwargs" - fig, global_kwargs_ax = plt.subplots() - global_kwargs_ax = draw( - global_kwargs_ax, contents=[hist], hist_kwargs={"alpha": 0.3} - ) - save_figure(global_kwargs_path, fig, ["png"]) - - hist = Histogram.from_hdf5(filename, dataname, alpha=0.3) - local_kwargs_path = "./tests/plot/results/local_kwargs" - fig, local_kwargs_ax = plt.subplots() - local_kwargs_ax = draw(local_kwargs_ax, contents=[hist], hist_kwargs={"alpha": 1.0}) - save_figure(local_kwargs_path, fig, ["png"]) - - assert ( - mpl_comp.compare_images( - expected=default_kwargs_path + ".png", - actual=bins_kwargs_path + ".png", - tol=0.001, - ) - is not None - ), "Changing bin number did not result in different plot" - - assert ( - mpl_comp.compare_images( - expected=default_kwargs_path + ".png", - actual=global_kwargs_path + ".png", - tol=0.001, - ) - is not None - ), "Changing global kwargs in `draw` did not result in different plot" - - assert ( - mpl_comp.compare_images( - expected=local_kwargs_path + ".png", - actual=global_kwargs_path + ".png", - tol=0.001, - ) - is None - ), "Overriding global with `Histogram` specific kwargs did not work" - - -def test_save_figure(capsys): - """Check that figures get stored correctly.""" - x = np.linspace(0.0, 2 * np.pi, 200) - y = np.sin(x) - fig, ax = plt.subplots(figsize=get_size()) - ax.plot(x, y) - output_path = "./tests/plot/results/sine" - formats = ["png", "svg"] - - save_figure(output_path, fig, formats) - - assert ( - mpl_comp.compare_images( - expected="./tests/plot/baseline/sine.png", - actual="./tests/plot/results/sine.png", - tol=0.0, - ) - is None - ), "PNG of figure was not stored correctly." - - # Commented out, because I recently got the following message from matplotlib: - # `SKIPPED (Don't know how to convert .svg files to png)` - # So, I am commenting out this test for now. - - # assert mpl_comp.compare_images( - # expected="./tests/plot/baseline/sine.svg", - # actual="./tests/plot/results/sine.svg", - # tol=0., - # ) is None, "SVG of figure was not stored correctly." - - # assert save_figure_capture.out == expected_output, ( - # "The output during the save figure procedure was wrong." - # ) diff --git a/tests/run_doctests.py b/tests/run_doctests.py deleted file mode 100644 index 241943e..0000000 --- a/tests/run_doctests.py +++ /dev/null @@ -1,12 +0,0 @@ -"""Script to run doctests in the modules of `lyscripts`.""" - -import doctest - -from lyscripts import plots, utils -from lyscripts.compute import prevalences, risks - -if __name__ == "__main__": - doctest.testmod(utils, verbose=True) - doctest.testmod(plots, verbose=True) - doctest.testmod(prevalences, verbose=True) - doctest.testmod(risks, verbose=True) diff --git a/tests/test.yaml b/tests/test.yaml deleted file mode 100644 index 552e4e2..0000000 --- a/tests/test.yaml +++ /dev/null @@ -1 +0,0 @@ -test: This is just for testing diff --git a/tests/test_backend.hdf5 b/tests/test_backend.hdf5 deleted file mode 100644 index 6312b3d..0000000 Binary files a/tests/test_backend.hdf5 and /dev/null differ diff --git a/tests/test_data.csv b/tests/test_data.csv deleted file mode 100644 index 3fbd2a1..0000000 --- a/tests/test_data.csv +++ /dev/null @@ -1,23 +0,0 @@ -synth_CT,synth_CT,synth_CT,tumor -ipsi,ipsi,ipsi,1 -a,b,c,t_stage -False,True,True,late -False,True,True,early -True,True,False,early -False,False,True,late -True,True,False,early -True,False,True,early -False,True,True,early -False,False,True,early -False,False,True,early -False,True,True,early -False,False,True,early -True,False,True,early -True,False,True,early -False,True,True,early -True,True,False,early -False,False,False,early -True,False,False,early -True,True,True,early -False,False,True,early -False,True,True,early diff --git a/tests/test_params_v0.yaml b/tests/test_params_v0.yaml deleted file mode 100644 index 492e0cb..0000000 --- a/tests/test_params_v0.yaml +++ /dev/null @@ -1,69 +0,0 @@ -######################################################################################## -# This dictionary defines which LNLs are present in the -# graph and how they are connected. In this case, the -# primary tumors spreads to the LNLs II, III and IV. -# Subsequently, level II spreads to III and III to IV. -graph: - tumor: - T: [a, b, c] - lnl: - a: [b, c] - b: [c] - c: [] - -######################################################################################## -# Below are some parameters necessary for the model setup: -model: - first_binom_prob: 0.3 # defines shape of time prior of first T-stage - max_t: 10 # max number of time steps to evolve system - t_stages: [early, late] # T-stages to consider in the data - class: MidlineBilateral # model class to use (see `lymph-model` docs) - # keyword arguments to pass to the model constructor - kwargs: - base_symmetric: false - trans_symmetric: true - use_mixing: true - -######################################################################################## -# Choose how to create the synthetic dataset. The currently set values -# reflect what one can see in the data (e.g. on https://lyprox.org) -synthetic: - t_stages_dist: - early: 0.6 # 60% of synethetic patients are early T-stage... - late: 0.4 # ...and 40% late T-stage - midline_ext_prob: 0.3 # create 30% of patients with midline extension - - # Use these made-up specificity & sensitivity values - # to create the fake observations - modalities: - synth_CT: [0.85, 0.85] - -######################################################################################## -# Under this key we define the specficity and sensitivity for a range -# of diagnostic modalities. They are taken from literature and during -# sampling, everything that's not commented our here will be used for -# inference. -modalities: - synth_CT: [0.87, 0.83] - -# this lists the above defined modalities that will be used for -# inference and evaluation. -# The wird nested way of writing them down here is so that DVC can unpack the dict -# directly into the command. -inference_modalities: - modalities: - - synth_CT - -######################################################################################## -# This defines the sampler settings and the thermodynamic integration path -sampling: - walkers_per_dim: 5 # num of parallel walkers per parameter space dimension - burnin: 10 # burn-in steps to discard - nsteps: 10 # do this many serious steps per sampling round - thin_by: 1 # draw this many samples for one step in `nsteps` - # only used when sampling until convergence - kwargs: - max_steps: 10 - check_interval: 20 - trust_threshold: 30.0 - rel_acor_threshold: 0.075 diff --git a/tests/test_params_v1.yaml b/tests/test_params_v1.yaml deleted file mode 100644 index 913dd8c..0000000 --- a/tests/test_params_v1.yaml +++ /dev/null @@ -1,50 +0,0 @@ -version: 1 - -######################################################################################## -# This dictionary defines which LNLs are present in the -# graph and how they are connected. In this case, the -# primary tumors spreads to the LNLs II, III and IV. -# Subsequently, level II spreads to III and III to IV. -graph: - tumor: - T: [a, b, c] - lnl: - a: [b, c] - b: [c] - c: [] - -######################################################################################## -# Below are some parameters necessary for the model setup: -model: - class_name: Midline - constructor: binary - max_time: 10 - kwargs: - is_symmetric: - tumor_spread: false - lnl_spread: true - use_mixing: true - use_central: false - use_midext_evo: true - -# marginalize over diagnose times. Keys are T-stages -distributions: - early: - kind: frozen - func: binomial - params: {p: 0.3} - late: - kind: parametric - func: binomial - params: {p: 0.3} - -# diagnostic modalities and their sensitivity/specificity values -modalities: - CT: - spec: 0.76 - sens: 0.81 - kind: clinical - FNA: - spec: 0.98 - sens: 0.80 - kind: pathological diff --git a/tests/test_sample_params.yaml b/tests/test_sample_params.yaml deleted file mode 100644 index ff917d3..0000000 --- a/tests/test_sample_params.yaml +++ /dev/null @@ -1,27 +0,0 @@ -version: 1 - -# graph -graph: - tumor: - T: [II, III] - lnl: - II: [III] - III: [] - -# model setup -model: - class: Unilateral - distributions: # marginalize over diagnose times. Keys are T-stages - early: - frozen: binomial - max_time: 10 - kwargs: {p: 0.3} - late: - parametric: binomial - max_time: 10 - modalities: [CT, FNA] # List of modalities, defined further down - -# definition of sensitivities and specificities -modalities: - CT: [0.76, 0.81] - FNA: [0.98, 0.80] diff --git a/tests/utils_test.py b/tests/utils_test.py deleted file mode 100644 index ebeaad1..0000000 --- a/tests/utils_test.py +++ /dev/null @@ -1,90 +0,0 @@ -"""Test the core utility functions of the package.""" - -from pathlib import Path -from typing import Any - -import pytest -import yaml -from pydantic import TypeAdapter - -from lyscripts.configs import DeprecatedModelConfig, DistributionConfig, ModelConfig -from lyscripts.utils import ( - flatten, - get_modalities_subset, -) - - -def test_flatten(): - """Check if the dictionary flattening works.""" - nested = { - "A": {"a": 1, "b": 2, "c": 3}, - "B": {"a": 4, "b": 5, "c": 6}, - "C": {"a": {"i": 7, "ii": 8}}, - } - exp_flattened = { - ("A", "a"): 1, - ("A", "b"): 2, - ("A", "c"): 3, - ("B", "a"): 4, - ("B", "b"): 5, - ("B", "c"): 6, - ("C", "a", "i"): 7, - ("C", "a", "ii"): 8, - } - - actual_flattened = flatten(nested) - assert actual_flattened == exp_flattened, "Dictionary was not flattened properly." - - -def test_get_modalities_subset(): - """Test the extraction of a modality subset.""" - modalities = { - "CT": [0.76, 0.81], - "MRI": [0.63, 0.86], - "PET": [0.79, 0.83], - "path": [1.0, 1.0], - } - selected = ["CT", "path"] - exp_subset = { - "CT": [0.76, 0.81], - "path": [1.0, 1.0], - } - - actual_subset = get_modalities_subset(modalities, selected) - assert actual_subset == exp_subset, "Extraction of modalities did not work." - - -@pytest.fixture -def v0_config() -> dict[str, Any]: - """Return a deprecated model configuration.""" - config_path = Path("tests/test_params_v0.yaml") - with open(config_path) as config_file: - return yaml.safe_load(config_file) - - -@pytest.fixture -def v1_config() -> dict[str, Any]: - """Return a deprecated model configuration.""" - config_path = Path("tests/test_params_v1.yaml") - with open(config_path) as config_file: - return yaml.safe_load(config_file) - - -def test_translate_deprecated_model_config( - v0_config: dict[str, Any], - v1_config: dict[str, Any], -): - """Test the translation of the deprecated model configuration.""" - adapter = TypeAdapter(dict[str | int, DistributionConfig]) - - old_model_config = DeprecatedModelConfig(**v0_config["model"]) - exp_model_config = ModelConfig(**v1_config["model"]) - exp_dist_configs = adapter.validate_python(v1_config["distributions"]) - - trans_model_config, trans_dist_configs = old_model_config.translate() - - assert ( # noqa - exp_model_config.model_dump(exclude="kwargs") - == trans_model_config.model_dump(exclude="kwargs") - ) - assert exp_dist_configs == trans_dist_configs