From 305e3b0bc8a91cfe783f1903f57a71a11c79067e Mon Sep 17 00:00:00 2001
From: noemibuehrer <nritzmann@student.ethz.ch>
Date: Tue, 5 Aug 2025 08:28:37 +0200
Subject: [PATCH 01/20] docs: update documentation for `integrate` and
 `evidence` commands

---
 src/lyscripts/compute/evidence.py | 12 ++++++++++--
 src/lyscripts/integrate.py        | 20 ++++++++++++++++++--
 2 files changed, 28 insertions(+), 4 deletions(-)

diff --git a/src/lyscripts/compute/evidence.py b/src/lyscripts/compute/evidence.py
index 8959d46..53aa885 100644
--- a/src/lyscripts/compute/evidence.py
+++ b/src/lyscripts/compute/evidence.py
@@ -1,4 +1,7 @@
-"""Given samples drawn during thermodynamic integration, compute the model log evidence."""
+"""Given the samples drawn during thermodynamic integration and their
+respective log likelihoods, compute the model log evidence and 
+the Bayesian Information Criterion.
+"""
 
 from __future__ import annotations
 import numpy as np
@@ -112,7 +115,12 @@ class EvidenceCLI(BaseCLI):
 
 
     def cli_cmd(self) -> None:
-        """Start the evidence computation process."""
+        """Start the ``evidence`` subcommand.
+        Given the MCMC samples from thermodynamic integration provided by the ``sampling``
+        argument and the corresponding inverse temperature schedule, specified in the
+        ``schedule`` argument, the model evidence is computed using the functions
+        ``compute_ti_results`` and ``compute_evidence``. Further the BIC is evaluated.
+        """
 
         data = self.data.load()
 
diff --git a/src/lyscripts/integrate.py b/src/lyscripts/integrate.py
index d54a9d6..926e547 100644
--- a/src/lyscripts/integrate.py
+++ b/src/lyscripts/integrate.py
@@ -1,5 +1,10 @@
 """
 Perform thermodynamic integration to evaluate the model evidence.
+
+Using the functions provided by the `sample` module, this script implements
+thermodynamic integration (TI) in order to compute the model evidence.
+This is done by sampling the model parameters at different inverse temperatures
+following a specified schedule.
 """
 
 from __future__ import annotations
@@ -42,7 +47,7 @@ def init_ti_sampler(
     inv_temp: float, 
     pool: Any    
 ) -> emcee.EnsembleSampler:
-    """Initialize the ``emcee.EnsembleSampler``with the given ``settings''."""
+    """Initialize the ``emcee.EnsembleSampler`` for TI with the given ``settings''."""
     nwalkers = ndim * settings.sampling.walkers_per_dim
     backend = get_hdf5_backend(
                     file_path=settings.sampling.storage_file,
@@ -88,7 +93,18 @@ class IntegrateCLI(BaseCLI):
 
 
     def cli_cmd(self) -> None:
-        """Start the thermodynamic integration process."""
+        """Start the ``integrate`` subcommand.
+        
+        The model construction and setup is done analogously to the 
+        ``sample`` command. Afterwards, an :py:class:`emcee.EnsembleSampler` 
+        is initialized (see :py:func:`init_sampler`) and :py:func:`run_sampling`, 
+        implemented in the ``sample``module, is executed twice for each TI step:
+        once for the burn-in phase and once for the actual sampling phase. 
+        Thereby, the log likelihood is scaled by the respective inverse 
+        temperature of that step. All necessary settings for the sampling 
+        are passed by the ``sampling``argument, except for the inverse 
+        temperatures, which are provided by the ``schedule`` argument.
+        """
         # as recommended in https://emcee.readthedocs.io/en/stable/tutorials/parallel/#
         os.environ["OMP_NUM_THREADS"] = "1"
 

From e2311bdc842345617a54b0e8c811d22e8d7b3d81 Mon Sep 17 00:00:00 2001
From: Roman Ludwig <48687784+rmnldwg@users.noreply.github.com>
Date: Wed, 13 Aug 2025 10:15:06 +0200
Subject: [PATCH 02/20] change: centralize inv temp schedule generation

---
 src/lyscripts/compute/evidence.py | 81 +++++++++++++-------------
 src/lyscripts/configs.py          | 91 ++++++++++++++++++++++-------
 src/lyscripts/integrate.py        | 95 ++++++++++++++-----------------
 src/lyscripts/schedule.py         | 58 +------------------
 4 files changed, 158 insertions(+), 167 deletions(-)

diff --git a/src/lyscripts/compute/evidence.py b/src/lyscripts/compute/evidence.py
index 53aa885..1cd51e2 100644
--- a/src/lyscripts/compute/evidence.py
+++ b/src/lyscripts/compute/evidence.py
@@ -1,18 +1,21 @@
-"""Given the samples drawn during thermodynamic integration and their
-respective log likelihoods, compute the model log evidence and 
-the Bayesian Information Criterion.
+"""Compute the model evidence from MCMC samples.
+
+Given the samples drawn during thermodynamic integration and their respective log
+likelihoods, compute the model log evidence and the Bayesian Information Criterion.
 """
 
 from __future__ import annotations
-import numpy as np
+
 import json
+from pathlib import Path
+
+import emcee
+import h5py
+import numpy as np
 import pandas as pd
 from loguru import logger
 from pydantic import Field
-
-import h5py
-import emcee
-from pathlib import Path
+from scipy.integrate import trapezoid
 
 from lyscripts.cli import assemble_main
 from lyscripts.configs import (
@@ -21,12 +24,10 @@
     SamplingConfig,
     ScheduleConfig,
 )
-from lyscripts.schedule import SCHEDULES
-
-from scipy.integrate import trapezoid
 
 RNG = np.random.default_rng()
 
+
 def comp_bic(log_probs: np.ndarray, num_params: int, num_data: int) -> float:
     r"""Compute the negative one half of the Bayesian Information Criterion (BIC).
 
@@ -65,6 +66,7 @@ def compute_evidence(
         integrals[i] = trapezoid(y=drawn_accuracy, x=temp_schedule)
     return np.mean(integrals), np.std(integrals)
 
+
 def compute_ti_results(
     settings: EvidenceCLI,
     temp_schedule: np.ndarray,
@@ -86,8 +88,12 @@ def compute_ti_results(
     ti_log_probs = np.zeros(shape=(num_temps, nsteps * nwalker))
 
     for i, run in enumerate(h5_file["ti"]):
-        reader = emcee.backends.HDFBackend(settings.sampling.storage_file, name=f"ti/{run}", read_only=True)
-        ti_log_probs[i] = reader.get_blobs(flat=True)['log_prob']
+        reader = emcee.backends.HDFBackend(
+            settings.sampling.storage_file,
+            name=f"ti/{run}",
+            read_only=True,
+        )
+        ti_log_probs[i] = reader.get_blobs(flat=True)["log_prob"]
 
     evidence, evidence_std = compute_evidence(temp_schedule, ti_log_probs)
     metrics["evidence"] = evidence
@@ -102,7 +108,7 @@ class EvidenceCLI(BaseCLI):
     data: DataConfig
     sampling: SamplingConfig
     schedule: ScheduleConfig = Field(
-        description="Configuration for generating inverse temperature schedule."
+        description="Configuration for generating inverse temperature schedule.",
     )
     plots: Path = Field(
         default="./plots",
@@ -113,35 +119,31 @@ class EvidenceCLI(BaseCLI):
         description="Path to metrics file.",
     )
 
-
     def cli_cmd(self) -> None:
         """Start the ``evidence`` subcommand.
-        Given the MCMC samples from thermodynamic integration provided by the ``sampling``
-        argument and the corresponding inverse temperature schedule, specified in the
-        ``schedule`` argument, the model evidence is computed using the functions
-        ``compute_ti_results`` and ``compute_evidence``. Further the BIC is evaluated.
-        """
 
+        Given the MCMC samples from thermodynamic integration provided by the
+        ``sampling`` argument and the corresponding inverse temperature schedule,
+        specified in the ``schedule`` argument, the model evidence is computed using
+        the functions :py:func:`compute_ti_results` and :py:func`compute_evidence`.
+        Further the BIC is evaluated.
+        """
         data = self.data.load()
 
         metrics = {}
 
-        # temperature schedule: use direct list or generate
-        if self.schedule.values is not None:
-            temp_schedule = np.array(self.schedule.values)
-            logger.info(f"Using direct temperature values: {temp_schedule}")
-        else:
-            func = SCHEDULES[self.schedule.method]
-            temp_schedule = func(self.schedule.num, self.schedule.power)
-            logger.info(f"Generated {self.schedule.method} schedule: {temp_schedule}")
-        
-
-        with h5py.File(self.sampling.storage_file, mode='r') as h5_file:
+        temp_schedule = self.schedule.get_schedule()
+
+        with h5py.File(self.sampling.storage_file, mode="r") as h5_file:
             # Get ndim from the HDF5 backend
-            backend = emcee.backends.HDFBackend(self.sampling.storage_file, read_only=True, name=self.sampling.dataset)
+            backend = emcee.backends.HDFBackend(
+                self.sampling.storage_file,
+                read_only=True,
+                name=self.sampling.dataset,
+            )
             ndim = backend.shape[1]
             logger.info(f"Inferred {ndim} parameters from stored samples")
-            
+
             # if TI has been performed, compute the evidence
             if "ti" in h5_file:
                 temp_schedule, ti_log_probs = compute_ti_results(
@@ -172,10 +174,12 @@ def cli_cmd(self) -> None:
                 )
                 beta_vs_accuracy.to_csv(self.plots, index=False)
                 logger.info(f"Plotted β vs accuracy at {self.plots}")
-            
+
             # use blobs, because also for TI, this is the unscaled log-prob
-            final_log_probs = backend.get_blobs()['log_prob']
-            logger.info(f"Opened samples from emcee backend from {self.sampling.storage_file}")
+            final_log_probs = backend.get_blobs()["log_prob"]
+            logger.info(
+                f"Opened samples from emcee backend from {self.sampling.storage_file}",
+            )
 
             # store metrics in JSON file
             self.metrics.parent.mkdir(parents=True, exist_ok=True)
@@ -191,11 +195,10 @@ def cli_cmd(self) -> None:
 
             with open(self.metrics, mode="w", encoding="utf-8") as metrics_file:
                 json.dump(metrics, metrics_file)
-            
+
             logger.info(f"Wrote out metrics to {self.metrics}")
 
+
 if __name__ == "__main__":
     main = assemble_main(settings_cls=EvidenceCLI, prog_name="compute evidence")
     main()
-
-
diff --git a/src/lyscripts/configs.py b/src/lyscripts/configs.py
index ed9ffb5..ebae236 100644
--- a/src/lyscripts/configs.py
+++ b/src/lyscripts/configs.py
@@ -18,7 +18,7 @@
 from collections.abc import Callable, Sequence
 from copy import deepcopy
 from pathlib import Path
-from typing import Annotated, Any, Literal, Optional
+from typing import Annotated, Any, Literal
 
 import numpy as np
 import pandas as pd
@@ -436,7 +436,7 @@ class SamplingConfig(BaseModel):
         default=None,
         description=(
             "Number of burn-in steps to take. If None, burn-in runs until convergence."
-        )
+        ),
     )
     num_steps: int | None = Field(
         default=100,
@@ -466,28 +466,75 @@ def load(self, thin: int = 1) -> np.ndarray:
             thin=thin,
         )
 
+
+def geometric_schedule(num: int, *_a) -> np.ndarray:
+    """Create a geometric sequence of ``num`` numbers from 0 to 1."""
+    log_seq = np.logspace(0.0, 1.0, num)
+    shifted_seq = log_seq - 1.0
+    return shifted_seq / 9.0
+
+
+def linear_schedule(num: int, *_a) -> np.ndarray:
+    """Create a linear sequence of ``num`` numbers from 0 to 1.
+
+    Equivalent to the :py:func:`power_schedule` with ``power=1``.
+    """
+    return np.linspace(0.0, 1.0, num)
+
+
+def power_schedule(num: int, power: float, *_a) -> np.ndarray:
+    """Create a power sequence of ``num`` numbers from 0 to 1.
+
+    This is essentially a :py:func:`linear_schedule` of ``num`` numbers from 0 to 1,
+    but each number is raised to the power of ``power``.
+    """
+    lin_seq = np.linspace(0.0, 1.0, num)
+    return lin_seq**power
+
+
+SCHEDULES = {
+    "geometric": geometric_schedule,
+    "linear": linear_schedule,
+    "power": power_schedule,
+}
+
+
 class ScheduleConfig(BaseModel):
-        """Configuration for generating a schedule of inverse temperatures."""
+    """Configuration for generating a schedule of inverse temperatures."""
+
+    method: Literal["geometric", "linear", "power"] = Field(
+        default="power",
+        description="Method to generate the inverse temperature schedule.",
+    )
+    num: int = Field(
+        default=32,
+        description="Number of inverse temperatures in the schedule.",
+    )
+    power: float = Field(
+        default=4.0,
+        description="If a power schedule is chosen, use this as power.",
+    )
+    values: list[float] | None = Field(
+        default=None,
+        description=(
+            "List of inverse temperatures to use instead of generating a schedule. "
+            "If a list is provided, the other parameters are ignored."
+        ),
+    )
+
+    def get_schedule(self) -> np.ndarray:
+        """Get the inverse temperature schedule as a numpy array."""
+        if self.values is not None:
+            logger.debug("Using provided inverse temperature values.")
+            schedule = np.array(self.values)
+        else:
+            logger.debug(f"Generating inverse temperature schedule with {self.method}.")
+            func = SCHEDULES[self.method]
+            schedule = func(self.num, self.power)
+
+        logger.info(f"Generated inverse temperature schedule: {schedule}")
+        return schedule
 
-        method: Literal["geometric", "linear", "power"] = Field(
-            default="power",
-            description="Method to generate the inverse temperature schedule.",
-        )
-        num: int = Field(
-            default=32,
-            description="Number of inverse temperatures in the schedule.",
-        )
-        power: float = Field(
-            default=4.0,
-            description="If a power schedule is chosen, use this as power.",
-        )
-        values: list[float] | None = Field(
-            default=None,
-            description=(
-                "List of inverse temperatures to use instead of generating a schedule. "
-                "If a list is provided, the other parameters are ignored."
-            ),
-        )
 
 def map_to_optional_bool(value: Any) -> Any:
     """Try to convert the options in the `PatternType` to a boolean value."""
diff --git a/src/lyscripts/integrate.py b/src/lyscripts/integrate.py
index 926e547..c213116 100644
--- a/src/lyscripts/integrate.py
+++ b/src/lyscripts/integrate.py
@@ -1,5 +1,4 @@
-"""
-Perform thermodynamic integration to evaluate the model evidence.
+"""Perform thermodynamic integration to evaluate the model evidence.
 
 Using the functions provided by the `sample` module, this script implements
 thermodynamic integration (TI) in order to compute the model evidence.
@@ -11,17 +10,16 @@
 
 import os
 from typing import Any
-from loguru import logger
 
 import emcee
-import numpy as np
 import h5py
-
-from lyscripts.cli import assemble_main
-from pydantic import Field
-
+import numpy as np
+from loguru import logger
 from lydata.utils import ModalityConfig
+from pydantic import Field
 
+import lyscripts.sample as sample_module  # Import the module to set its global MODEL
+from lyscripts.cli import assemble_main
 from lyscripts.configs import (
     BaseCLI,
     DataConfig,
@@ -30,31 +28,28 @@
     ModelConfig,
     SamplingConfig,
     ScheduleConfig,
-    construct_model,
     add_distributions,
     add_modalities,
+    construct_model,
 )
-
-import lyscripts.sample as sample_module  # Import the module to set its global MODEL
-from lyscripts.schedule import SCHEDULES
-
 from lyscripts.utils import get_hdf5_backend
 
+
 def init_ti_sampler(
-    settings: IntegrateCLI, 
-    temp_idx: int, 
-    ndim: int, 
-    inv_temp: float, 
-    pool: Any    
+    settings: IntegrateCLI,
+    temp_idx: int,
+    ndim: int,
+    inv_temp: float,
+    pool: Any,
 ) -> emcee.EnsembleSampler:
     """Initialize the ``emcee.EnsembleSampler`` for TI with the given ``settings''."""
     nwalkers = ndim * settings.sampling.walkers_per_dim
     backend = get_hdf5_backend(
-                    file_path=settings.sampling.storage_file,
-                    dataset=f"ti/{temp_idx+1:0>2d}",
-                    nwalkers=nwalkers,
-                    ndim=ndim,
-                )
+        file_path=settings.sampling.storage_file,
+        dataset=f"ti/{temp_idx + 1:0>2d}",
+        nwalkers=nwalkers,
+        ndim=ndim,
+    )
     return emcee.EnsembleSampler(
         nwalkers=nwalkers,
         ndim=ndim,
@@ -67,6 +62,7 @@ def init_ti_sampler(
         parameter_names=list(MODEL.get_named_params().keys()),
     )
 
+
 class IntegrateCLI(BaseCLI):
     """Perform thermodynamic integration to compute the model evidence."""
 
@@ -88,21 +84,20 @@ class IntegrateCLI(BaseCLI):
     data: DataConfig
     sampling: SamplingConfig
     schedule: ScheduleConfig = Field(
-        description="Configuration for generating inverse temperature schedule."
+        description="Configuration for generating inverse temperature schedule.",
     )
 
-
     def cli_cmd(self) -> None:
         """Start the ``integrate`` subcommand.
-        
-        The model construction and setup is done analogously to the 
-        ``sample`` command. Afterwards, an :py:class:`emcee.EnsembleSampler` 
-        is initialized (see :py:func:`init_sampler`) and :py:func:`run_sampling`, 
+
+        The model construction and setup is done analogously to the
+        ``sample`` command. Afterwards, an :py:class:`emcee.EnsembleSampler`
+        is initialized (see :py:func:`init_sampler`) and :py:func:`run_sampling`,
         implemented in the ``sample``module, is executed twice for each TI step:
-        once for the burn-in phase and once for the actual sampling phase. 
-        Thereby, the log likelihood is scaled by the respective inverse 
-        temperature of that step. All necessary settings for the sampling 
-        are passed by the ``sampling``argument, except for the inverse 
+        once for the burn-in phase and once for the actual sampling phase.
+        Thereby, the log likelihood is scaled by the respective inverse
+        temperature of that step. All necessary settings for the sampling
+        are passed by the ``sampling``argument, except for the inverse
         temperatures, which are provided by the ``schedule`` argument.
         """
         # as recommended in https://emcee.readthedocs.io/en/stable/tutorials/parallel/#
@@ -110,41 +105,34 @@ def cli_cmd(self) -> None:
 
         logger.debug(self.model_dump_json(indent=2))
 
-        # ugly, but necessary for pickling  
+        # ugly, but necessary for pickling
         global MODEL
         MODEL = construct_model(self.model, self.graph)
         MODEL = add_distributions(MODEL, self.distributions)
         MODEL = add_modalities(MODEL, self.modalities)
         MODEL.load_patient_data(**self.data.get_load_kwargs())
         ndim = MODEL.get_num_dims()
-        
+
         # set MODEL in the sample module's namespace so log_prob_fn can access it
         sample_module.MODEL = MODEL
 
-        # temperature schedule: use direct list or generate
-        if self.schedule.values is not None:
-            temp_schedule = np.array(self.schedule.values)
-            logger.info(f"Using direct temperature values: {temp_schedule}")
-        else:
-            func = SCHEDULES[self.schedule.method]
-            temp_schedule = func(self.schedule.num, self.schedule.power)
-            logger.info(f"Generated {self.schedule.method} schedule: {temp_schedule}")
-        
+        schedule = self.schedule.get_schedule()
+
         # emcee does not support numpy's new random number generator yet.
-        np.random.seed(self.sampling.seed) # noqa: NPY002
+        np.random.seed(self.sampling.seed)  # noqa: NPY002
 
         with sample_module.get_pool(self.sampling.cores) as pool:
-            for idx, inv_temp in enumerate(temp_schedule):
+            for idx, inv_temp in enumerate(schedule):
                 sampler = init_ti_sampler(
                     settings=self,
                     temp_idx=idx,
                     ndim=ndim,
                     inv_temp=inv_temp,
-                    pool=pool
+                    pool=pool,
                 )
 
                 sample_module.run_sampling(
-                    description=f"Burn-in phase: TI step {idx+1}/{len(temp_schedule)}",
+                    description=f"Burn-in phase: TI step {idx + 1}/{len(schedule)}",
                     sampler=sampler,
                     num_steps=self.sampling.burnin_steps,
                     check_interval=self.sampling.check_interval,
@@ -154,7 +142,7 @@ def cli_cmd(self) -> None:
                 )
 
                 sample_module.run_sampling(
-                    description=f"Sampling phase: TI step {idx+1}/{len(temp_schedule)}",
+                    description=f"Sampling phase: TI step {idx + 1}/{len(schedule)}",
                     sampler=sampler,
                     num_steps=self.sampling.num_steps,
                     reset_backend=True,
@@ -163,8 +151,13 @@ def cli_cmd(self) -> None:
                 )
             # copy last sampling round over to a group in the HDF5 file called "mcmc"
             with h5py.File(self.sampling.storage_file, mode="r+") as h5_file:
-                h5_file.copy(f"ti/{len(temp_schedule):0>2d}", h5_file, name=self.sampling.dataset)
+                h5_file.copy(
+                    f"ti/{len(schedule):0>2d}",
+                    h5_file,
+                    name=self.sampling.dataset,
+                )
+
 
 if __name__ == "__main__":
     main = assemble_main(settings_cls=IntegrateCLI, prog_name="integrate")
-    main()
\ No newline at end of file
+    main()
diff --git a/src/lyscripts/schedule.py b/src/lyscripts/schedule.py
index 16b367f..99fa334 100644
--- a/src/lyscripts/schedule.py
+++ b/src/lyscripts/schedule.py
@@ -10,72 +10,20 @@
 :math:`\beta_i^k` where :math:`k` could e.g. be 5.
 """
 
-from typing import Literal
-
-import numpy as np
 from loguru import logger
-from pydantic import Field
 
 from lyscripts.cli import assemble_main
-from lyscripts.configs import BaseCLI
-
-
-def geometric_schedule(num: int, *_a) -> np.ndarray:
-    """Create a geometric sequence of ``num`` numbers from 0 to 1."""
-    log_seq = np.logspace(0.0, 1.0, num)
-    shifted_seq = log_seq - 1.0
-    return shifted_seq / 9.0
-
-
-def linear_schedule(num: int, *_a) -> np.ndarray:
-    """Create a linear sequence of ``num`` numbers from 0 to 1.
-
-    Equivalent to the :py:func:`power_schedule` with ``power=1``.
-    """
-    return np.linspace(0.0, 1.0, num)
-
+from lyscripts.configs import BaseCLI, ScheduleConfig
 
-def power_schedule(num: int, power: float, *_a) -> np.ndarray:
-    """Create a power sequence of ``num`` numbers from 0 to 1.
 
-    This is essentially a :py:func:`linear_schedule` of ``num`` numbers from 0 to 1,
-    but each number is raised to the power of ``power``.
-    """
-    lin_seq = np.linspace(0.0, 1.0, num)
-    return lin_seq**power
-
-
-SCHEDULES = {
-    "geometric": geometric_schedule,
-    "linear": linear_schedule,
-    "power": power_schedule,
-}
-
-
-class ScheduleCLI(BaseCLI):
+class ScheduleCLI(ScheduleConfig, BaseCLI):
     """Generate an inverse temperature schedule for thermodynamic integration."""
 
-    method: Literal["geometric", "linear", "power"] = Field(
-        default="geometric",
-        description="Choose the method to distribute the inverse temperatures.",
-    )
-    num: int = Field(
-        default=32,
-        description="Number of inverse temperatures in the schedule.",
-    )
-    power: float = Field(
-        default=4,
-        description="If a power schedule is chosen, use this as power.",
-    )
-
     def cli_cmd(self) -> None:
         """Start the ``schedule`` command."""
         logger.debug(self.model_dump_json(indent=2))
 
-        func = SCHEDULES[self.method]
-        schedule = func(self.num, self.power)
-
-        for inv_temp in schedule:
+        for inv_temp in self.get_schedule():
             # print is necessary to allow piping the output
             print(inv_temp)  # noqa: T201
 

From 663bd69c54dfd050cabad5ea3acdd071a389bf4a Mon Sep 17 00:00:00 2001
From: Roman Ludwig <48687784+rmnldwg@users.noreply.github.com>
Date: Wed, 13 Aug 2025 12:13:55 +0200
Subject: [PATCH 03/20] change: store selected loglevel globally

---
 src/lyscripts/cli.py | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/src/lyscripts/cli.py b/src/lyscripts/cli.py
index 26870c7..c1baae2 100644
--- a/src/lyscripts/cli.py
+++ b/src/lyscripts/cli.py
@@ -17,6 +17,8 @@
 from rich.logging import RichHandler
 from rich_argparse import ArgumentDefaultsRichHelpFormatter
 
+_current_log_level: Literal["DEBUG", "INFO", "WARNING", "ERROR", "CRITICAL"] = "INFO"
+
 
 def assemble_main(
     settings_cls: type[BaseSettings],
@@ -80,11 +82,12 @@ def configure_logging(
     """
     logger.enable("lyscripts")
     logger.enable("lydata")
-    log_level = somewhat_safely_get_loglevel(argv=argv)
+    global _current_log_level
+    _current_log_level = somewhat_safely_get_loglevel(argv=argv)
     logger.remove()
     handler = RichHandler(console=console)
     logger.add(
         sink=handler,
-        level=log_level,
+        level=_current_log_level,
         format="<lvl>{message}</>",
     )

From ef6f51bf7e7cf920dd5db7c7cf72303d913e60ed Mon Sep 17 00:00:00 2001
From: Roman Ludwig <48687784+rmnldwg@users.noreply.github.com>
Date: Wed, 13 Aug 2025 13:54:07 +0200
Subject: [PATCH 04/20] feat: add collector utility/command

---
 pyproject.toml                          |  11 +-
 src/lyscripts/__init__.py               |   2 +-
 src/lyscripts/data/__init__.py          |   2 +
 src/lyscripts/data/collect/__init__.py  | 163 ++++++++++++++++++++++++
 src/lyscripts/data/collect/collector.js | 150 ++++++++++++++++++++++
 src/lyscripts/data/collect/index.html   |  26 ++++
 6 files changed, 352 insertions(+), 2 deletions(-)
 create mode 100644 src/lyscripts/data/collect/__init__.py
 create mode 100644 src/lyscripts/data/collect/collector.js
 create mode 100644 src/lyscripts/data/collect/index.html

diff --git a/pyproject.toml b/pyproject.toml
index 5659c16..69ba6a6 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -51,7 +51,7 @@ dependencies = [
     "pydantic-settings >= 2.7.0, != 2.9.1, != 2.9.0",
     "numpydantic",
     "loguru",
-    "lydata @ git+https://github.com/lycosystem/lydata-package@eb99daf6cd65d93879cfa3552923f25c0a9c29bf",
+    "lydata @ git+https://github.com/lycosystem/lydata-package@79c5d4996b37a833afca457629d8d854acc05a33",
 ]
 dynamic = ["version"]
 
@@ -81,9 +81,18 @@ dev = [
 [project.scripts]
 lyscripts = "lyscripts:main"
 
+[tool.setuptools]
+include-package-data = true
+
 [tool.setuptools.packages.find]
 where = ["src"]
 
+[tool.setuptools.package-data]
+"lyscripts" = [
+    "src/lyscripts/data/collect/collector.js",
+    "src/lyscripts/data/collect/index.html",
+]
+
 [tool.setuptools_scm]
 write_to = "src/lyscripts/_version.py"
 local_scheme = "no-local-version"
diff --git a/src/lyscripts/__init__.py b/src/lyscripts/__init__.py
index 6727547..e9dbcb1 100644
--- a/src/lyscripts/__init__.py
+++ b/src/lyscripts/__init__.py
@@ -17,7 +17,7 @@
     CliSubCommand,
 )
 
-from lyscripts import compute, data, sample, schedule, integrate  # noqa: F401
+from lyscripts import compute, data, integrate, sample, schedule  # noqa: F401
 from lyscripts._version import version
 from lyscripts.cli import assemble_main, configure_logging
 from lyscripts.utils import console
diff --git a/src/lyscripts/data/__init__.py b/src/lyscripts/data/__init__.py
index c9863ec..eb86dbd 100644
--- a/src/lyscripts/data/__init__.py
+++ b/src/lyscripts/data/__init__.py
@@ -17,6 +17,7 @@
 from pydantic_settings import BaseSettings, CliApp, CliSubCommand
 
 from lyscripts.data import (  # noqa: F401
+    collect,
     enhance,
     fetch,
     generate,
@@ -32,6 +33,7 @@
 class DataCLI(BaseSettings):
     """Work with lymphatic progression data through this CLI."""
 
+    collect: CliSubCommand[collect.CollectorCLI]
     lyproxify: CliSubCommand[lyproxify.LyproxifyCLI]
     join: CliSubCommand[join.JoinCLI]
     split: CliSubCommand[split.SplitCLI]
diff --git a/src/lyscripts/data/collect/__init__.py b/src/lyscripts/data/collect/__init__.py
new file mode 100644
index 0000000..44e1f19
--- /dev/null
+++ b/src/lyscripts/data/collect/__init__.py
@@ -0,0 +1,163 @@
+"""Submodule to collect data interactively using a simple web interface.
+
+With the simply command
+
+.. code-block:: bash
+
+    lyscripts data collect
+
+One can start a very basic web server that serves an interactive UI at
+``http://localhost:8000/``. There, one can enter patient, tumor, and lymphatic
+involvement data one by one. When completed, the "submit" button will parse, validate,
+and convert the data to serve a downloadable CSV file.
+
+This resulting CSV file is in the correct format to be used in `LyProX`_ and for
+inference using our `lymph-model`_ library.
+
+.. _LyProX: https://lyprox.org
+.. _lymph-model: https://lymph-model.readthedocs.io
+"""
+
+import inspect
+import io
+import logging
+from pathlib import Path
+from typing import Any
+
+import lydata
+import lydata.validator
+import pandas as pd
+from fastapi import FastAPI, HTTPException
+from fastapi.responses import StreamingResponse
+from loguru import logger
+from pydantic import Field, RootModel
+from starlette.responses import FileResponse, HTMLResponse
+
+from lyscripts.cli import _current_log_level
+from lyscripts.configs import BaseCLI
+
+app = FastAPI(
+    title="lyDATA Collector",
+    description=(
+        "A simple web interface to collect data for the lyDATA datasets. "
+        "This is a prototype and not intended for production use."
+    ),
+    version=lydata.__version__,
+)
+
+BASE_DIR = Path(__file__).parent
+modalities = lydata.schema.get_default_modalities()
+RecordModel = lydata.schema.create_full_record_model(modalities, title="Record")
+ROOT_MODEL = RootModel[list[RecordModel]]
+
+
+@app.get("/")
+def serve_index() -> HTMLResponse:
+    """Serve the index.html file."""
+    with open(BASE_DIR / "index.html") as file:
+        content = file.read()
+    return HTMLResponse(content=content)
+
+
+@app.get("/schema")
+def serve_schema() -> dict[str, Any]:
+    """Serve the JSON schema for the patient and tumor records."""
+    return ROOT_MODEL.model_json_schema()
+
+
+@app.get("/collector.js")
+def serve_collector_js() -> FileResponse:
+    """Serve the collector.js file."""
+    return FileResponse(BASE_DIR / "collector.js")
+
+
+@app.post("/submit")
+async def process(data: RootModel) -> StreamingResponse:
+    """Convert the submitted data to a DataFrame."""
+    logger.info(f"Received data: {data.root}")
+
+    if len(data.root) == 0:
+        logger.warning("No records provided in the data.")
+        raise HTTPException(
+            status_code=400,
+            detail="No records provided in the data.",
+        )
+
+    flattened_records = []
+
+    for record in data.root:
+        flattened_record = lydata.validator.flatten(record)
+        logger.debug(f"Flattened record: {flattened_record}")
+        flattened_records.append(flattened_record)
+
+    df = pd.DataFrame(flattened_records)
+    df.columns = pd.MultiIndex.from_tuples(flattened_record.keys())
+    logger.info(df.patient.core.head())
+
+    buffer = io.StringIO()
+    df.to_csv(buffer, index=False)
+    buffer.seek(0)
+    logger.success("Data prepared for download")
+    return StreamingResponse(
+        buffer,
+        media_type="text/csv",
+        headers={"Content-Disposition": "attachment; filename=lydata_records.csv"},
+    )
+
+
+class InterceptHandler(logging.Handler):
+    """Intercept logging messages and redirect them to Loguru."""
+
+    def emit(self, record: logging.LogRecord) -> None:
+        """Intercept the log record and redirect it to Loguru."""
+        # Get corresponding Loguru level if it exists.
+        try:
+            level: str | int = logger.level(record.levelname).name
+        except ValueError:
+            level = record.levelno
+
+        # Find caller from where originated the logged message.
+        frame, depth = inspect.currentframe(), 0
+        while frame:
+            filename = frame.f_code.co_filename
+            is_logging = filename == logging.__file__
+            is_frozen = "importlib" in filename and "_bootstrap" in filename
+            if depth > 0 and not (is_logging or is_frozen):
+                break
+            frame = frame.f_back
+            depth += 1
+
+        logger.opt(depth=depth, exception=record.exc_info).log(
+            level,
+            record.getMessage(),
+        )
+
+
+class CollectorCLI(BaseCLI):
+    """Command-line interface for the lyDATA collector."""
+
+    hostname: str = Field(
+        default="localhost",
+        description="Hostname to run the FastAPI app on.",
+    )
+    port: int = Field(
+        default=8000,
+        description="Port to run the FastAPI app on.",
+    )
+
+    def cli_cmd(self) -> None:
+        """Run the FastAPI app."""
+        logger.debug(self.model_dump_json(indent=2))
+        import uvicorn
+
+        # Intercept standard logging and redirect it to Loguru
+        logging.basicConfig(handlers=[InterceptHandler()], level=0, force=True)
+        logger.enable("lydata")
+
+        uvicorn.run(
+            app,
+            host=self.hostname,
+            port=self.port,
+            log_level=_current_log_level.lower(),
+            log_config=None,
+        )
diff --git a/src/lyscripts/data/collect/collector.js b/src/lyscripts/data/collect/collector.js
new file mode 100644
index 0000000..d5e4a59
--- /dev/null
+++ b/src/lyscripts/data/collect/collector.js
@@ -0,0 +1,150 @@
+/**
+ * Client-side helper functions for collecting user input through JSONEditor,
+ * validating it against a fetched JSON Schema, submitting the validated data
+ * to the backend, and presenting a downloadable CSV returned by the server.
+ *
+ * NOTE: Functionality is intentionally unchanged; only readability and
+ * documentation have been improved.
+ */
+
+/**
+ * Ensure an alert element (used to display validation errors) exists.
+ * Creates and appends it if missing.
+ *
+ * @returns {HTMLDivElement} The existing or newly created alert element.
+ */
+function ensureAlertExists() {
+  let alertElement = document.querySelector('.alert');
+  if (!alertElement) {
+    alertElement = document.createElement('div');
+  }
+  alertElement.className = 'alert alert-danger';
+  const editorHolder = document.getElementById('editor_holder');
+  editorHolder.appendChild(alertElement);
+  return alertElement;
+}
+
+/**
+ * Remove an existing validation alert if present.
+ */
+function ensureAlertRemoved() {
+  const existingAlert = document.querySelector('.alert');
+  if (existingAlert) {
+    console.log('Clearing existing alert');
+    existingAlert.remove();
+  }
+}
+
+/**
+ * Remove an existing download button (if it exists) to avoid duplicates.
+ */
+function ensureDownloadButtonRemoved() {
+  const existingButton = document.getElementById('download_link');
+  if (existingButton) {
+    console.log('Clearing existing download button');
+    existingButton.remove();
+  }
+}
+
+/**
+ * Create (or replace) a download button for a CSV blob returned by the server.
+ *
+ * @param {Blob} blob - The CSV data blob to make downloadable.
+ */
+function createDownloadButton(blob) {
+  ensureDownloadButtonRemoved();
+
+  const url = window.URL.createObjectURL(blob);
+  const downloadLink = document.createElement('a');
+  downloadLink.id = 'download_link';
+  downloadLink.href = url;
+  downloadLink.textContent = 'Download CSV';
+  downloadLink.className = 'btn btn-success';
+  downloadLink.download = 'lydata_records.csv';
+
+  document.getElementById('editor_holder').appendChild(downloadLink);
+  console.log('Download button created:', downloadLink);
+}
+
+/**
+ * Send validated editor data to the backend for processing. Expects a CSV blob
+ * in response which is then exposed via a generated download button.
+ *
+ * @param {JSONEditor} editor - The JSONEditor instance from which to read data.
+ */
+async function sendEditorData(editor) {
+  const data = editor.getValue();
+  console.log('Sending data:', data);
+
+  try {
+    const response = await fetch('/submit', {
+      method: 'POST',
+      headers: { 'Content-Type': 'application/json' },
+      body: JSON.stringify(data)
+    });
+
+    if (!response.ok) {
+      // Try to parse error details from JSON, fallback to text
+      let errorMsg = 'Unknown error';
+      try {
+        const err = await response.json();
+        errorMsg = err.detail || err.message || errorMsg;
+      } catch {
+        errorMsg = await response.text();
+      }
+      throw new Error(errorMsg);
+    }
+
+    const blob = await response.blob();
+    console.log('Received processed data as blob:', blob);
+    createDownloadButton(blob);
+  } catch (error) {
+    ensureDownloadButtonRemoved();
+    console.error('Error submitting data:', error);
+    const alert = ensureAlertExists();
+    alert.textContent = 'Error submitting data: ' + error.message;
+    alert.classList.add('alert-danger');
+  }
+}
+
+/**
+ * Validate the editor content. If there are validation errors they are
+ * displayed in an alert; otherwise the data is submitted to the backend.
+ *
+ * @param {JSONEditor} editor - The JSONEditor instance to validate & submit.
+ */
+function processEditor(editor) {
+  const errors = editor.validate();
+
+  if (errors.length) {
+    console.error('Validation errors:', errors);
+    const alert = ensureAlertExists();
+    alert.textContent = 'Validation errors: ' + errors.map(e => e.message).join(', ');
+  } else {
+    console.log('Data successfully validated');
+    ensureAlertRemoved();
+    sendEditorData(editor);
+  }
+}
+
+// Fetch the JSON Schema to initialize the editor
+fetch('/schema')
+  .then(response => response.json())
+  .then(schema => {
+    const element = document.getElementById('editor_holder');
+    const options = {
+      disable_edit_json: true,
+      theme: 'bootstrap5',
+      iconlib: 'bootstrap',
+      object_layout: 'grid',
+      schema: schema
+  };
+  const editor = new JSONEditor(element, options);
+
+    // Bind the submit button to validation + submission flow
+    document.getElementById('submit').addEventListener('click', () => {
+      console.log('Submit button clicked');
+      processEditor(editor);
+    });
+  })
+  .catch(error => console.error('Error loading schema:', error));
diff --git a/src/lyscripts/data/collect/index.html b/src/lyscripts/data/collect/index.html
new file mode 100644
index 0000000..931dacf
--- /dev/null
+++ b/src/lyscripts/data/collect/index.html
@@ -0,0 +1,26 @@
+<!DOCTYPE html>
+<html>
+
+<head>
+  <meta charset="utf-8" />
+  <title>Basic JSON Editor Example</title>
+
+  <link href="https://cdn.jsdelivr.net/npm/bootstrap@5.0.2/dist/css/bootstrap.min.css" rel="stylesheet"
+    integrity="sha384-EVSTQN3/azprG1Anm3QDgpJLIm9Nao0Yz1ztcQTwFspd3yD65VohhpuuCOmLASjC" crossorigin="anonymous">
+  <link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/bootstrap-icons@1.13.1/font/bootstrap-icons.min.css">
+
+  <script src="https://cdn.jsdelivr.net/npm/@json-editor/json-editor@latest/dist/jsoneditor.min.js"></script>
+</head>
+
+<body>
+  <main class="container">
+    <h1>LyDATA Collector</h1>
+
+    <div id='editor_holder'></div>
+    <button class="btn btn-primary" id='submit'>Submit (console.log)</button>
+
+    <script src="/collector.js"></script>
+  </main>
+</body>
+
+</html>

From 0bc2a64801acecab04a9711151e3cbd6654a1268 Mon Sep 17 00:00:00 2001
From: Roman Ludwig <48687784+rmnldwg@users.noreply.github.com>
Date: Wed, 13 Aug 2025 15:02:00 +0200
Subject: [PATCH 05/20] docs: improve collect description

---
 README.md                              |  6 ++-
 docs/source/data/collect.rst           | 13 ++++++
 docs/source/data/init.rst              |  1 +
 docs/source/index.rst                  |  2 +-
 src/lyscripts/cli.py                   | 30 +++++++++++++
 src/lyscripts/data/__init__.py         |  6 +++
 src/lyscripts/data/collect/__init__.py | 61 ++++++++++----------------
 7 files changed, 80 insertions(+), 39 deletions(-)
 create mode 100644 docs/source/data/collect.rst

diff --git a/README.md b/README.md
index 9621e28..e64323b 100644
--- a/README.md
+++ b/README.md
@@ -41,7 +41,11 @@ Simply run
 lyscripts --help
 ```
 
-in your terminal and let the output guide you through the functions of the program.
+in your terminal to display the help text for the main command. It will list all subcommands that are avialable, which you can then also call with `lyscripts <subcommand> --help` to get more information on its use and the available arguments.
+
+For example, one subcommand is `lyscripts data collect`, which will launch a small web server that allows a user to enter patient records on lymphatic involvement in head and neck cancer one row at a time and construct a standardized CSV file from it.
+
+<!--- ### --->
 
 You can also refer to the [documentation] for a written-down version of all these help texts and even more context on how and why to use the provided commands.
 
diff --git a/docs/source/data/collect.rst b/docs/source/data/collect.rst
new file mode 100644
index 0000000..8c8b08b
--- /dev/null
+++ b/docs/source/data/collect.rst
@@ -0,0 +1,13 @@
+.. currentmodule:: lyscripts.data.collect
+
+Collect lyDATA Tables Interactively
+===================================
+
+.. automodule:: lyscripts.data.collect
+    :members:
+    :show-inheritance:
+
+Command Help
+------------
+
+.. program-output:: lyscripts data collect --help
diff --git a/docs/source/data/init.rst b/docs/source/data/init.rst
index e53ee60..014bb85 100644
--- a/docs/source/data/init.rst
+++ b/docs/source/data/init.rst
@@ -19,6 +19,7 @@ Submodules
 .. toctree::
     :maxdepth: 1
 
+    collect
     lyproxify
     join
     split
diff --git a/docs/source/index.rst b/docs/source/index.rst
index 1f36252..647e4e1 100644
--- a/docs/source/index.rst
+++ b/docs/source/index.rst
@@ -8,7 +8,7 @@ Introduction
 
 
 .. include:: ../../README.md
-    :end-line: 30
+    :end-before: <!--- ### --->
     :parser: myst_parser.sphinx_
 
 
diff --git a/src/lyscripts/cli.py b/src/lyscripts/cli.py
index c1baae2..b6448d0 100644
--- a/src/lyscripts/cli.py
+++ b/src/lyscripts/cli.py
@@ -8,6 +8,8 @@
 .. _loguru: https://loguru.readthedocs.io/en/stable
 """
 
+import inspect
+import logging
 from collections.abc import Callable
 from typing import Literal
 
@@ -91,3 +93,31 @@ def configure_logging(
         level=_current_log_level,
         format="<lvl>{message}</>",
     )
+
+
+class InterceptHandler(logging.Handler):
+    """Intercept logging messages and redirect them to Loguru."""
+
+    def emit(self, record: logging.LogRecord) -> None:
+        """Intercept the log record and redirect it to Loguru."""
+        # Get corresponding Loguru level if it exists.
+        try:
+            level: str | int = logger.level(record.levelname).name
+        except ValueError:
+            level = record.levelno
+
+        # Find caller from where originated the logged message.
+        frame, depth = inspect.currentframe(), 0
+        while frame:
+            filename = frame.f_code.co_filename
+            is_logging = filename == logging.__file__
+            is_frozen = "importlib" in filename and "_bootstrap" in filename
+            if depth > 0 and not (is_logging or is_frozen):
+                break
+            frame = frame.f_back
+            depth += 1
+
+        logger.opt(depth=depth, exception=record.exc_info).log(
+            level,
+            record.getMessage(),
+        )
diff --git a/src/lyscripts/data/__init__.py b/src/lyscripts/data/__init__.py
index eb86dbd..29ffe69 100644
--- a/src/lyscripts/data/__init__.py
+++ b/src/lyscripts/data/__init__.py
@@ -7,6 +7,12 @@
 the installed datasets provided by the `lydata`_ package and directly from the
 associated `GitHub repository`_.
 
+Another cool feature is the built-in mini web application that allows collecting nodal
+involvement data interactively and in the same standardized format as we have published
+in the past, both on `LyProX`_ and in our `GitHub repository`_. It can be launched by
+running `lyscripts data collect` in the terminal. See the docs for the
+:py:mod:`lyscripts.data.collect` submodule on more information.
+
 .. _Make: https://www.gnu.org/software/make/
 .. _DVC: https://dvc.org
 .. _LyProX: https://lyprox.org
diff --git a/src/lyscripts/data/collect/__init__.py b/src/lyscripts/data/collect/__init__.py
index 44e1f19..cd681c8 100644
--- a/src/lyscripts/data/collect/__init__.py
+++ b/src/lyscripts/data/collect/__init__.py
@@ -1,6 +1,6 @@
 """Submodule to collect data interactively using a simple web interface.
 
-With the simply command
+With the simple command
 
 .. code-block:: bash
 
@@ -11,14 +11,13 @@
 involvement data one by one. When completed, the "submit" button will parse, validate,
 and convert the data to serve a downloadable CSV file.
 
-This resulting CSV file is in the correct format to be used in `LyProX`_ and for
+The resulting CSV file is in the correct format to be used in `LyProX`_ and for
 inference using our `lymph-model`_ library.
 
 .. _LyProX: https://lyprox.org
 .. _lymph-model: https://lymph-model.readthedocs.io
 """
 
-import inspect
 import io
 import logging
 from pathlib import Path
@@ -33,7 +32,7 @@
 from pydantic import Field, RootModel
 from starlette.responses import FileResponse, HTMLResponse
 
-from lyscripts.cli import _current_log_level
+from lyscripts.cli import InterceptHandler, _current_log_level
 from lyscripts.configs import BaseCLI
 
 app = FastAPI(
@@ -52,8 +51,8 @@
 
 
 @app.get("/")
-def serve_index() -> HTMLResponse:
-    """Serve the index.html file."""
+def serve_index_html() -> HTMLResponse:
+    """Serve the ``index.html`` file at the URL's root."""
     with open(BASE_DIR / "index.html") as file:
         content = file.read()
     return HTMLResponse(content=content)
@@ -67,13 +66,29 @@ def serve_schema() -> dict[str, Any]:
 
 @app.get("/collector.js")
 def serve_collector_js() -> FileResponse:
-    """Serve the collector.js file."""
+    """Serve the ``collector.js`` file under ``"http://{host}:{port}/collector.js"``.
+
+    This frontend JavaScript file loads the `JSON-Editor`_ library and initializes it
+    using the schema returned by the :py:func:`serve_schema` function.
+
+    .. _JSON-Editor: https://github.com/json-editor/json-editor/
+    """
     return FileResponse(BASE_DIR / "collector.js")
 
 
 @app.post("/submit")
 async def process(data: RootModel) -> StreamingResponse:
-    """Convert the submitted data to a DataFrame."""
+    """Process the submitted data to a DataFrame.
+
+    `FastAPI`_ will automatically parse the received JSON data into the list of
+    instances of he pydantic type defined by the
+    :py:func:`lydata.schema.create_full_record_model` function.
+
+    From this list, we create a pandas DataFrame and return it as a downloadable CSV
+    file.
+
+    .. _FastAPI: https://fastapi.tiangolo.com/
+    """
     logger.info(f"Received data: {data.root}")
 
     if len(data.root) == 0:
@@ -105,36 +120,8 @@ async def process(data: RootModel) -> StreamingResponse:
     )
 
 
-class InterceptHandler(logging.Handler):
-    """Intercept logging messages and redirect them to Loguru."""
-
-    def emit(self, record: logging.LogRecord) -> None:
-        """Intercept the log record and redirect it to Loguru."""
-        # Get corresponding Loguru level if it exists.
-        try:
-            level: str | int = logger.level(record.levelname).name
-        except ValueError:
-            level = record.levelno
-
-        # Find caller from where originated the logged message.
-        frame, depth = inspect.currentframe(), 0
-        while frame:
-            filename = frame.f_code.co_filename
-            is_logging = filename == logging.__file__
-            is_frozen = "importlib" in filename and "_bootstrap" in filename
-            if depth > 0 and not (is_logging or is_frozen):
-                break
-            frame = frame.f_back
-            depth += 1
-
-        logger.opt(depth=depth, exception=record.exc_info).log(
-            level,
-            record.getMessage(),
-        )
-
-
 class CollectorCLI(BaseCLI):
-    """Command-line interface for the lyDATA collector."""
+    """Serve a FastAPI web app for collecting involvement patterns as CSV files."""
 
     hostname: str = Field(
         default="localhost",

From 1bb0cbfa515250138ca23f779458d7b70950915a Mon Sep 17 00:00:00 2001
From: Roman Ludwig <48687784+rmnldwg@users.noreply.github.com>
Date: Fri, 15 Aug 2025 11:33:46 +0200
Subject: [PATCH 06/20] chore: add CITATION.cff

---
 CITATION.cff | 25 +++++++++++++++++++++++++
 1 file changed, 25 insertions(+)
 create mode 100755 CITATION.cff

diff --git a/CITATION.cff b/CITATION.cff
new file mode 100755
index 0000000..70404a2
--- /dev/null
+++ b/CITATION.cff
@@ -0,0 +1,25 @@
+# This CITATION.cff file was generated with cffinit.
+# Visit https://bit.ly/cffinit to generate yours today!
+
+cff-version: 1.2.0
+title: lyscripts
+message: >-
+  If you use this software, please cite it using the
+  metadata from this file.
+type: software
+authors:
+  - given-names: Roman
+    family-names: Ludwig
+    orcid: 'https://orcid.org/0000-0001-9434-328X'
+    affiliation: University Hospital Zurich
+repository-code: 'https://github.com/lycosystem/lyscripts'
+url: 'https://lyscripts.readthedocs.io'
+abstract: >-
+  Scripts for reproducible research on lymphatic tumor
+  progression in head and neck cancer.
+keywords:
+  - cancer
+  - metastasis
+  - lymphatic system
+  - head and neck
+license: MIT

From 73e60688bd7d12b66fbccb4132377b23a06f734f Mon Sep 17 00:00:00 2001
From: Roman Ludwig <48687784+rmnldwg@users.noreply.github.com>
Date: Mon, 18 Aug 2025 13:48:29 +0200
Subject: [PATCH 07/20] build: add uvicorn, fastapi to deps

---
 pyproject.toml | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/pyproject.toml b/pyproject.toml
index 69ba6a6..112ecf2 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -51,6 +51,8 @@ dependencies = [
     "pydantic-settings >= 2.7.0, != 2.9.1, != 2.9.0",
     "numpydantic",
     "loguru",
+    "fastapi",
+    "uvicorn",
     "lydata @ git+https://github.com/lycosystem/lydata-package@79c5d4996b37a833afca457629d8d854acc05a33",
 ]
 dynamic = ["version"]

From fbff10240ab7e7fced48bea4880c13a133b7a252 Mon Sep 17 00:00:00 2001
From: Roman Ludwig <48687784+rmnldwg@users.noreply.github.com>
Date: Mon, 18 Aug 2025 13:48:42 +0200
Subject: [PATCH 08/20] change: disable properties in collector

---
 src/lyscripts/data/collect/collector.js | 1 +
 1 file changed, 1 insertion(+)

diff --git a/src/lyscripts/data/collect/collector.js b/src/lyscripts/data/collect/collector.js
index d5e4a59..3d8d5d9 100644
--- a/src/lyscripts/data/collect/collector.js
+++ b/src/lyscripts/data/collect/collector.js
@@ -137,6 +137,7 @@ fetch('/schema')
       theme: 'bootstrap5',
       iconlib: 'bootstrap',
       object_layout: 'grid',
+      disable_properties: true,
       schema: schema
   };
   const editor = new JSONEditor(element, options);

From 4d49a9f6db3e3017826968755267e2d21c608866 Mon Sep 17 00:00:00 2001
From: Roman Ludwig <48687784+rmnldwg@users.noreply.github.com>
Date: Tue, 26 Aug 2025 11:17:01 +0200
Subject: [PATCH 09/20] fix: use fixed lydata `.enhance()` method

---
 pyproject.toml                         | 2 +-
 src/lyscripts/data/collect/__init__.py | 2 +-
 src/lyscripts/data/enhance.py          | 5 +----
 3 files changed, 3 insertions(+), 6 deletions(-)

diff --git a/pyproject.toml b/pyproject.toml
index 112ecf2..af6b90d 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -53,7 +53,7 @@ dependencies = [
     "loguru",
     "fastapi",
     "uvicorn",
-    "lydata @ git+https://github.com/lycosystem/lydata-package@79c5d4996b37a833afca457629d8d854acc05a33",
+    "lydata @ git+https://github.com/lycosystem/lydata-package@8469a26bea1d275b7ced43fbe3a269c8b30b60f4",
 ]
 dynamic = ["version"]
 
diff --git a/src/lyscripts/data/collect/__init__.py b/src/lyscripts/data/collect/__init__.py
index cd681c8..425c728 100644
--- a/src/lyscripts/data/collect/__init__.py
+++ b/src/lyscripts/data/collect/__init__.py
@@ -46,7 +46,7 @@
 
 BASE_DIR = Path(__file__).parent
 modalities = lydata.schema.get_default_modalities()
-RecordModel = lydata.schema.create_full_record_model(modalities, title="Record")
+RecordModel = lydata.schema.create_full_record_model(modalities, model_name="Record")
 ROOT_MODEL = RootModel[list[RecordModel]]
 
 
diff --git a/src/lyscripts/data/enhance.py b/src/lyscripts/data/enhance.py
index a8a795a..87b1e23 100644
--- a/src/lyscripts/data/enhance.py
+++ b/src/lyscripts/data/enhance.py
@@ -40,12 +40,9 @@ def cli_cmd(self) -> None:
         logger.debug(self.model_dump_json(indent=2))
 
         data: LyDataFrame = self.input.load()
-        data[self.method] = data.ly.combine(
+        data = data.ly.enhance(
             modalities=self.modalities,
             method=self.method,
-        )
-        data[self.method] = data.ly.augment(
-            modality=self.method,
             subdivisions=self.lnl_subdivisions,
         )
         save_table_to_csv(file_path=self.output_file, table=data)

From 7cddc7a9964e5848cfec8952845b3cf38b6510c5 Mon Sep 17 00:00:00 2001
From: Roman Ludwig <48687784+rmnldwg@users.noreply.github.com>
Date: Tue, 26 Aug 2025 12:06:33 +0200
Subject: [PATCH 10/20] test: update tests for new lydata

---
 tests/compute/prevalences_test.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/tests/compute/prevalences_test.py b/tests/compute/prevalences_test.py
index 471b78d..3fe2151 100644
--- a/tests/compute/prevalences_test.py
+++ b/tests/compute/prevalences_test.py
@@ -2,7 +2,7 @@
 
 import pandas as pd
 import pytest
-from lydata import infer_and_combine_levels, load_datasets
+from lydata import load_datasets
 
 from lyscripts.compute.prevalences import observe_prevalence
 from lyscripts.configs import DiagnosisConfig, ScenarioConfig
@@ -24,7 +24,7 @@ def scenario_config() -> ScenarioConfig:
 def data() -> pd.DataFrame:
     """Load one of the lyDATA datasets."""
     data = next(load_datasets(year=2021, institution="usz"))
-    return infer_and_combine_levels(data)
+    return data.ly.enhance()
 
 
 def test_observe_prevalence(
@@ -37,5 +37,5 @@ def test_observe_prevalence(
         scenario_config=scenario_config,
     )
 
-    assert portion.match == 67
+    assert portion.match == 66
     assert portion.total == 150

From 255af68809ea9af8691d05ccc9ba6e105f77e3be Mon Sep 17 00:00:00 2001
From: Roman Ludwig <48687784+rmnldwg@users.noreply.github.com>
Date: Thu, 4 Sep 2025 11:28:53 +0200
Subject: [PATCH 11/20] build: require at least lydata 0.4.0

---
 pyproject.toml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pyproject.toml b/pyproject.toml
index af6b90d..1f095be 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -53,7 +53,7 @@ dependencies = [
     "loguru",
     "fastapi",
     "uvicorn",
-    "lydata @ git+https://github.com/lycosystem/lydata-package@8469a26bea1d275b7ced43fbe3a269c8b30b60f4",
+    "lydata >= 0.4.0",
 ]
 dynamic = ["version"]
 

From e4f38fa791cc0b981e60e53ab18b84982ba98f46 Mon Sep 17 00:00:00 2001
From: Roman Ludwig <48687784+rmnldwg@users.noreply.github.com>
Date: Thu, 4 Sep 2025 11:34:40 +0200
Subject: [PATCH 12/20] chore: update changelog

---
 CHANGELOG.md | 40 ++++++++++++++++++++++++++++++++++++++++
 1 file changed, 40 insertions(+)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index cfdd3f0..4b0fcda 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -2,6 +2,44 @@
 
 All notable changes to this project will be documented in this file.
 
+## [1.0.0] - 2025-09-04
+
+### Bug Fixes
+
+- Change `info` to `core` for mid-level lydata columns
+- Use bug-fixed lydata `.ly.enhance()` method.
+
+### Documentation
+
+- Update documentation for `integrate` and `evidence` commands.
+- Improve `data collect` description.
+
+### Features
+
+- Add `integrate` command for thermodynamic integration. Thanks [@noemibuehrer]!
+- Add command spawning websever for interactive data collection.
+
+### Miscellaneous Tasks
+
+- Add missing links to changelog.
+- Add CITATION.cff.
+
+### Testing
+
+- Update tests for new lydata.
+
+### Build
+
+- Add uvicorn, fastapi to deps.
+- Require at least lydata 0.4.0.
+
+### Change
+
+- Make compatible with new lyDATA version.
+- Centralize inverse temperature schedule generation.
+- Store selected log-level globally.
+- Disable properties in collector.
+
 ## [1.0.0rc3] - 2025-07-22
 
 ### Documentation
@@ -871,6 +909,7 @@ returns `None` instead. Fixes [#11]
 ## [0.5.3] - 2022-08-22
 
 <!-- markdownlint-disable-file MD024 -->
+[1.0.0]: https://github.com/lycosystem/lyscripts/compare/1.0.0rc3...1.0.0
 [1.0.0rc3]: https://github.com/lycosystem/lyscripts/compare/1.0.0rc2...1.0.0rc3
 [1.0.0rc2]: https://github.com/lycosystem/lyscripts/compare/1.0.0rc1...1.0.0rc2
 [1.0.0rc1]: https://github.com/lycosystem/lyscripts/compare/1.0.0.a7...1.0.0rc1
@@ -938,3 +977,4 @@ returns `None` instead. Fixes [#11]
 [`rich`]: https://rich.readthedocs.io/en/latest/
 [`rich_argparse`]: https://github.com/hamdanal/rich_argparse
 [LyProX]: https://lyprox.org
+[@noemibuehrer]: https://github.com/noemibuehrer

From 8e3f0324547073422eebe6ef5a0faebe640e0897 Mon Sep 17 00:00:00 2001
From: Roman Ludwig <48687784+rmnldwg@users.noreply.github.com>
Date: Thu, 4 Sep 2025 11:38:38 +0200
Subject: [PATCH 13/20] change: update JSON schema for YAML files

---
 schemas/ly.json         | 103 +++++++++++++++++++++++++++++++++++++---
 src/lyscripts/schema.py |   1 +
 2 files changed, 98 insertions(+), 6 deletions(-)

diff --git a/schemas/ly.json b/schemas/ly.json
index 6bb3ca1..561127c 100644
--- a/schemas/ly.json
+++ b/schemas/ly.json
@@ -308,18 +308,46 @@
           "type": "string"
         },
         "repo_name": {
+          "anyOf": [
+            {
+              "minLength": 1,
+              "type": "string"
+            },
+            {
+              "type": "null"
+            }
+          ],
           "default": "lycosystem/lydata",
           "description": "GitHub `repository/owner`.",
-          "minLength": 1,
-          "title": "Repo Name",
-          "type": "string"
+          "title": "Repo Name"
         },
         "ref": {
+          "anyOf": [
+            {
+              "minLength": 1,
+              "type": "string"
+            },
+            {
+              "type": "null"
+            }
+          ],
           "default": "main",
           "description": "Branch/tag/commit of the repo.",
-          "minLength": 1,
-          "title": "Ref",
-          "type": "string"
+          "title": "Ref"
+        },
+        "local_dataset_dir": {
+          "anyOf": [
+            {
+              "format": "directory-path",
+              "type": "string"
+            },
+            {
+              "type": "null"
+            }
+          ],
+          "default": null,
+          "description": "Path to directory containing all the dataset subdirectories. So, e.g. if `path_on_disk` is `~/datasets` and the dataset is `2023-clb-multisite`, then the CSV file is expected to be at `~/datasets/2023-clb-multisite/data.csv`.",
+          "title": "Local Dataset Dir"
         }
       },
       "required": [
@@ -501,6 +529,19 @@
           "title": "Relative Thresh",
           "type": "number"
         },
+        "burnin_steps": {
+          "anyOf": [
+            {
+              "type": "integer"
+            },
+            {
+              "type": "null"
+            }
+          ],
+          "default": null,
+          "description": "Number of burn-in steps to take. If None, burn-in runs until convergence.",
+          "title": "Burnin Steps"
+        },
         "num_steps": {
           "anyOf": [
             {
@@ -623,6 +664,52 @@
       ],
       "title": "ScenarioConfig",
       "type": "object"
+    },
+    "ScheduleConfig": {
+      "description": "Configuration for generating a schedule of inverse temperatures.",
+      "properties": {
+        "method": {
+          "default": "power",
+          "description": "Method to generate the inverse temperature schedule.",
+          "enum": [
+            "geometric",
+            "linear",
+            "power"
+          ],
+          "title": "Method",
+          "type": "string"
+        },
+        "num": {
+          "default": 32,
+          "description": "Number of inverse temperatures in the schedule.",
+          "title": "Num",
+          "type": "integer"
+        },
+        "power": {
+          "default": 4.0,
+          "description": "If a power schedule is chosen, use this as power.",
+          "title": "Power",
+          "type": "number"
+        },
+        "values": {
+          "anyOf": [
+            {
+              "items": {
+                "type": "number"
+              },
+              "type": "array"
+            },
+            {
+              "type": "null"
+            }
+          ],
+          "default": null,
+          "description": "List of inverse temperatures to use instead of generating a schedule. If a list is provided, the other parameters are ignored.",
+          "title": "Values"
+        }
+      },
+      "title": "ScheduleConfig",
+      "type": "object"
     }
   },
   "description": "Settings for generating a JSON schema for lyscripts configuration files.",
@@ -685,6 +772,10 @@
       },
       "title": "Scenarios",
       "type": "array"
+    },
+    "schedule": {
+      "$ref": "#/$defs/ScheduleConfig",
+      "default": null
     }
   },
   "required": [
diff --git a/src/lyscripts/schema.py b/src/lyscripts/schema.py
index cab7c68..7470af3 100644
--- a/src/lyscripts/schema.py
+++ b/src/lyscripts/schema.py
@@ -53,6 +53,7 @@ class SchemaSettings(BaseModel):
     model: configs.ModelConfig = None
     sampling: configs.SamplingConfig = None
     scenarios: list[configs.ScenarioConfig] = []
+    schedule: configs.ScheduleConfig = None
 
 
 def main() -> None:

From 349507f215a168c5eb4d2d00a129f8ae234a6e07 Mon Sep 17 00:00:00 2001
From: Roman Ludwig <48687784+rmnldwg@users.noreply.github.com>
Date: Wed, 10 Sep 2025 16:49:08 +0200
Subject: [PATCH 14/20] fix: convert dtypes during joining

---
 src/lyscripts/data/join.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/src/lyscripts/data/join.py b/src/lyscripts/data/join.py
index d7eb62e..a48db9a 100644
--- a/src/lyscripts/data/join.py
+++ b/src/lyscripts/data/join.py
@@ -60,7 +60,9 @@ def cli_cmd(self) -> None:
         joined = None
 
         for data_config in self.inputs:
-            data = data_config.load()
+            # `.convert_dtypes()` ensures that e.g. boolean values are not suddenly
+            # converted to strings when a dataset with missing values is concatenated.
+            data = data_config.load().convert_dtypes()
             if joined is None:
                 joined = data
             else:

From e017b881403763850692059a04a161649e180865 Mon Sep 17 00:00:00 2001
From: Roman Ludwig <48687784+rmnldwg@users.noreply.github.com>
Date: Wed, 10 Sep 2025 16:55:43 +0200
Subject: [PATCH 15/20] chore: update changelog

---
 CHANGELOG.md | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 4b0fcda..76e866e 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -2,6 +2,12 @@
 
 All notable changes to this project will be documented in this file.
 
+## [1.0.1] - 2025-09-10
+
+### Bug Fixes
+
+- `.convert_dtypes()` during joining of tables. Not doing this caused pandas to interpret e.g. booleans with missing values as strings.
+
 ## [1.0.0] - 2025-09-04
 
 ### Bug Fixes
@@ -909,6 +915,7 @@ returns `None` instead. Fixes [#11]
 ## [0.5.3] - 2022-08-22
 
 <!-- markdownlint-disable-file MD024 -->
+[1.0.1]: https://github.com/lycosystem/lyscripts/compare/1.0.0...1.0.1
 [1.0.0]: https://github.com/lycosystem/lyscripts/compare/1.0.0rc3...1.0.0
 [1.0.0rc3]: https://github.com/lycosystem/lyscripts/compare/1.0.0rc2...1.0.0rc3
 [1.0.0rc2]: https://github.com/lycosystem/lyscripts/compare/1.0.0rc1...1.0.0rc2

From 8adbf4a4a6b351be7d422c1c031d2d1b123192bc Mon Sep 17 00:00:00 2001
From: Roman Ludwig <48687784+rmnldwg@users.noreply.github.com>
Date: Wed, 10 Sep 2025 17:00:04 +0200
Subject: [PATCH 16/20] change: use lydata's `cast_dtypes()`

---
 src/lyscripts/data/join.py | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/src/lyscripts/data/join.py b/src/lyscripts/data/join.py
index a48db9a..8b0547e 100644
--- a/src/lyscripts/data/join.py
+++ b/src/lyscripts/data/join.py
@@ -3,6 +3,7 @@
 from pathlib import Path
 
 import pandas as pd
+from lydata.validator import cast_dtypes
 from pydantic import Field
 
 from lyscripts.cli import assemble_main
@@ -60,9 +61,10 @@ def cli_cmd(self) -> None:
         joined = None
 
         for data_config in self.inputs:
-            # `.convert_dtypes()` ensures that e.g. boolean values are not suddenly
+            data = data_config.load()
+            # `cast_dtypes()` ensures that e.g. boolean values are not suddenly
             # converted to strings when a dataset with missing values is concatenated.
-            data = data_config.load().convert_dtypes()
+            data = cast_dtypes(data)
             if joined is None:
                 joined = data
             else:

From 9848cb5def0ec56218b189ccb6b254879dc4c0cd Mon Sep 17 00:00:00 2001
From: Roman Ludwig <48687784+rmnldwg@users.noreply.github.com>
Date: Wed, 10 Sep 2025 17:00:47 +0200
Subject: [PATCH 17/20] chore: update changlog

---
 CHANGELOG.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 76e866e..c5e5ef2 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -6,7 +6,7 @@ All notable changes to this project will be documented in this file.
 
 ### Bug Fixes
 
-- `.convert_dtypes()` during joining of tables. Not doing this caused pandas to interpret e.g. booleans with missing values as strings.
+- Convert dtypes during joining using lydata's `cast_dtypes()`.
 
 ## [1.0.0] - 2025-09-04
 

From deccc8972e9d271286ef1a564df8be0eeec2a1e4 Mon Sep 17 00:00:00 2001
From: Roman Ludwig <48687784+rmnldwg@users.noreply.github.com>
Date: Mon, 29 Sep 2025 15:50:18 +0200
Subject: [PATCH 18/20] chore: change email addresses

---
 pyproject.toml            | 4 +++-
 src/lyscripts/__init__.py | 2 +-
 2 files changed, 4 insertions(+), 2 deletions(-)

diff --git a/pyproject.toml b/pyproject.toml
index 1f095be..75401c0 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -10,7 +10,9 @@ build-backend = "setuptools.build_meta"
 name = "lyscripts"
 description = "Package to interact with lymphatic progression data and models."
 authors = [
-	{name = "Roman Ludwig", email = "roman.ludwig@usz.ch"}
+    {name = "Roman Ludwig", email = "gygqdstu3@mozmail.com"},
+    {name = "Yoel Pérez Haas", email = "yoel.perezhaas@usz.ch"},
+    {name = "Noemi Bührer", email = "noemi.buehrer@usz.ch"},
 ]
 readme = "README.md"
 requires-python = ">=3.10"
diff --git a/src/lyscripts/__init__.py b/src/lyscripts/__init__.py
index e9dbcb1..32808f2 100644
--- a/src/lyscripts/__init__.py
+++ b/src/lyscripts/__init__.py
@@ -25,7 +25,7 @@
 __version__ = version
 __description__ = "Package to interact with lymphatic progression data and models."
 __author__ = "Roman Ludwig"
-__email__ = "roman.ludwig@usz.ch"
+__email__ = "gygqdstu3@mozmail.com"
 __uri__ = "https://github.com/lycosystem/lyscripts"
 
 # activate copy on write in pandas.

From c96615b8f18b6da5f8d8e2c92f6819f04fdea77a Mon Sep 17 00:00:00 2001
From: noemibuehrer <noeminaijia.buehrer@uzh.ch>
Date: Mon, 2 Mar 2026 11:15:43 +0100
Subject: [PATCH 19/20] fix: correct TI evaluation and leave out uncertainty
 reporting

---
 src/lyscripts/compute/evidence.py | 25 +++++++++----------------
 src/lyscripts/evaluate.py         | 25 +++++++++----------------
 2 files changed, 18 insertions(+), 32 deletions(-)

diff --git a/src/lyscripts/compute/evidence.py b/src/lyscripts/compute/evidence.py
index 1cd51e2..ffce481 100644
--- a/src/lyscripts/compute/evidence.py
+++ b/src/lyscripts/compute/evidence.py
@@ -48,23 +48,17 @@ def comp_bic(log_probs: np.ndarray, num_params: int, num_data: int) -> float:
 def compute_evidence(
     temp_schedule: np.ndarray,
     log_probs: np.ndarray,
-    num: int = 1000,
-) -> tuple[float, float]:
-    """Compute the evidence and its standard deviation.
+) -> float:
+    """Compute the evidence.
 
     Given a ``temp_schedule`` of inverse temperatures and corresponding sets of
-    ``log_probs``, draw ``num`` "paths" of log-probabilities and compute the evidence
-    for each using trapezoidal integration.
-
-    The evidence is then the mean of those ``num`` integrations, while the error is
-    their standard deviation.
+    ``log_probs``, we calculate the mean ``log_prob`` over all samples to approximate
+    the expectation value under the corresponding power posterior for each step in the
+    ``temp_schedule``. The evidence is evaluated using trapezoidal integration of the
+    expectation values over the ``temp_schedule``.
     """
-    integrals = np.zeros(shape=num)
-    for i in range(num):
-        rand_idx = RNG.choice(log_probs.shape[1], size=log_probs.shape[0])
-        drawn_accuracy = log_probs[np.arange(log_probs.shape[0]), rand_idx].copy()
-        integrals[i] = trapezoid(y=drawn_accuracy, x=temp_schedule)
-    return np.mean(integrals), np.std(integrals)
+    a_mc = np.mean(log_probs, axis=1)
+    return trapezoid(y=a_mc, x=temp_schedule)
 
 
 def compute_ti_results(
@@ -95,9 +89,8 @@ def compute_ti_results(
         )
         ti_log_probs[i] = reader.get_blobs(flat=True)["log_prob"]
 
-    evidence, evidence_std = compute_evidence(temp_schedule, ti_log_probs)
+    evidence = compute_evidence(temp_schedule, ti_log_probs)
     metrics["evidence"] = evidence
-    metrics["evidence_std"] = evidence_std
 
     return temp_schedule, ti_log_probs
 
diff --git a/src/lyscripts/evaluate.py b/src/lyscripts/evaluate.py
index d09c5dd..5f9dbd0 100644
--- a/src/lyscripts/evaluate.py
+++ b/src/lyscripts/evaluate.py
@@ -90,23 +90,17 @@ def comp_bic(log_probs: np.ndarray, num_params: int, num_data: int) -> float:
 def compute_evidence(
     temp_schedule: np.ndarray,
     log_probs: np.ndarray,
-    num: int = 1000,
-) -> tuple[float, float]:
-    """Compute the evidence and its standard deviation.
+) -> float:
+    """Compute the evidence.
 
     Given a ``temp_schedule`` of inverse temperatures and corresponding sets of
-    ``log_probs``, draw ``num`` "paths" of log-probabilities and compute the evidence
-    for each using trapezoidal integration.
-
-    The evidence is then the mean of those ``num`` integrations, while the error is
-    their standard deviation.
+    ``log_probs``, we calculate the mean ``log_prob`` over all samples to approximate
+    the expectation value under the corresponding power posterior for each step in the
+    ``temp_schedule``. The evidence is evaluated using trapezoidal integration of the
+    expectation values over the ``temp_schedule``.
     """
-    integrals = np.zeros(shape=num)
-    for i in range(num):
-        rand_idx = RNG.choice(log_probs.shape[1], size=log_probs.shape[0])
-        drawn_accuracy = log_probs[np.arange(log_probs.shape[0]), rand_idx].copy()
-        integrals[i] = trapezoid(y=drawn_accuracy, x=temp_schedule)
-    return np.mean(integrals), np.std(integrals)
+    a_mc = np.mean(log_probs, axis=1)
+    return trapezoid(y=a_mc, x=temp_schedule)
 
 
 def compute_ti_results(
@@ -134,9 +128,8 @@ def compute_ti_results(
         reader = emcee.backends.HDFBackend(model, name=f"ti/{run}", read_only=True)
         ti_log_probs[i] = reader.get_blobs(flat=True)
 
-    evidence, evidence_std = compute_evidence(temp_schedule, ti_log_probs)
+    evidence = compute_evidence(temp_schedule, ti_log_probs)
     metrics["evidence"] = evidence
-    metrics["evidence_std"] = evidence_std
 
     return temp_schedule, ti_log_probs
 

From d38f77c2c6fbc242b30330e5143ecdfd79f84e5c Mon Sep 17 00:00:00 2001
From: noemibuehrer <noeminaijia.buehrer@uzh.ch>
Date: Wed, 8 Apr 2026 16:13:49 +0200
Subject: [PATCH 20/20] chore: update changelog

---
 CHANGELOG.md | 12 ++++++++++++
 1 file changed, 12 insertions(+)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index c5e5ef2..84abe31 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -2,6 +2,16 @@
 
 All notable changes to this project will be documented in this file.
 
+## [1.0.2] - 2026-04-08
+
+### Bug Fixes
+
+- Correct TI evaluation and leave out uncertainty reporting. Fixes [#81].
+
+### Miscellaneous Tasks
+
+- Change email addresses.
+
 ## [1.0.1] - 2025-09-10
 
 ### Bug Fixes
@@ -915,6 +925,7 @@ returns `None` instead. Fixes [#11]
 ## [0.5.3] - 2022-08-22
 
 <!-- markdownlint-disable-file MD024 -->
+[1.0.2]: https://github.com/lycosystem/lyscripts/compare/1.0.1...1.0.2
 [1.0.1]: https://github.com/lycosystem/lyscripts/compare/1.0.0...1.0.1
 [1.0.0]: https://github.com/lycosystem/lyscripts/compare/1.0.0rc3...1.0.0
 [1.0.0rc3]: https://github.com/lycosystem/lyscripts/compare/1.0.0rc2...1.0.0rc3
@@ -979,6 +990,7 @@ returns `None` instead. Fixes [#11]
 [#72]: https://github.com/lycosystem/lyscripts/issues/72
 [#74]: https://github.com/lycosystem/lyscripts/issues/74
 [#75]: https://github.com/lycosystem/lyscripts/issues/75
+[#81]: https://github.com/lycosystem/lyscripts/issues/81
 
 [`emcee`]: https://emcee.readthedocs.io/en/stable/
 [`rich`]: https://rich.readthedocs.io/en/latest/