diff --git a/.github/workflows/python-package.yml b/.github/workflows/python-package.yml index 8d0bcb93..d8e32a7c 100644 --- a/.github/workflows/python-package.yml +++ b/.github/workflows/python-package.yml @@ -10,12 +10,12 @@ jobs: runs-on: ${{ matrix.os }} strategy: matrix: - os: [windows-latest, macos-latest, macos-12, ubuntu-22.04] + os: [windows-latest, macos-latest, macos-13, ubuntu-22.04] python-version: ["3.10", "3.11", "3.12"] include: - os: macos-latest architecture: aarch64-apple-darwin - - os: macos-12 + - os: macos-13 architecture: x86_64-apple-darwin steps: - uses: actions/checkout@v3 diff --git a/build.py b/build.py index 2884f5c9..5c56bfa3 100644 --- a/build.py +++ b/build.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- # -# Copyright 2023-2025 Ramil Nugmanov +# Copyright 2023-2026 Ramil Nugmanov # This file is part of chython. # # chython is free software; you can redistribute it and/or modify @@ -62,7 +62,8 @@ extra_compile_args=extra_compile_args) ] -ext_modules = cythonize(extensions, language_level=3) +ext_modules = cythonize(extensions, language_level=3, + compiler_directives={'freethreading_compatible': True}) cmd = build_ext(Distribution({'ext_modules': ext_modules})) cmd.ensure_finalized() cmd.run() diff --git a/chython/__init__.py b/chython/__init__.py index 803ae753..2b4fc171 100644 --- a/chython/__init__.py +++ b/chython/__init__.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- # -# Copyright 2014-2025 Ramil Nugmanov +# Copyright 2014-2026 Ramil Nugmanov # Copyright 2014-2019 Timur Madzhidov tmadzhidov@gmail.com features and API discussion # Copyright 2014-2019 Alexandre Varnek base idea of CGR approach # This file is part of chython. @@ -27,7 +27,6 @@ from .utils import * -torch_device = 'cpu' # AAM model device. Change before first `reset_mapping` call! clean2d_engine: Literal['rdkit', 'smilesdrawer', 'cdk', 'obabel', 'indigo'] = 'smilesdrawer' conformer_engine: Literal['rdkit', 'cdpkit'] = 'rdkit' class_paths = [getenv('CDK_PATH', 'cdk.jar'), getenv('OPSIN_PATH', 'opsin.jar')] diff --git a/chython/_functions.py b/chython/_functions.py index d71c3a78..1aed4fd8 100644 --- a/chython/_functions.py +++ b/chython/_functions.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- # -# Copyright 2020, 2021 Ramil Nugmanov +# Copyright 2020-2026 Ramil Nugmanov # This file is part of chython. # # chython is free software; you can redistribute it and/or modify @@ -16,9 +16,35 @@ # You should have received a copy of the GNU Lesser General Public License # along with this program; if not, see . # +from functools import wraps from itertools import product +_SENTINEL = object() + + +def cached_method(func): + """Cache no-argument method result in instance __dict__. Cleared by flush_cache(). + + Thread-safe for concurrent reads without locking: + - dict.get/setitem are atomic in CPython 3.14 free-threaded mode + - Wrapped functions are pure (deterministic, read-only on self) + - Duplicate computation on cold cache is benign (same result) + - Mutations must be sequential (caller's responsibility) + """ + key = f'__cached_method_{func.__name__}' + + @wraps(func) + def wrapper(self): + val = self.__dict__.get(key, _SENTINEL) + if val is not _SENTINEL: + return val + val = func(self) + self.__dict__[key] = val + return val + return wrapper + + # lazy itertools.product with diagonal combination precedence def lazy_product(*args): if len(args) == 1: @@ -66,4 +92,4 @@ def lazy_product(*args): yield tuple(p[x] for x, p in zip(ind, pools)) -__all__ = ['lazy_product'] +__all__ = ['cached_method', 'lazy_product'] diff --git a/chython/_java.py b/chython/_java.py new file mode 100644 index 00000000..61f215de --- /dev/null +++ b/chython/_java.py @@ -0,0 +1,57 @@ +# -*- coding: utf-8 -*- +# +# Copyright 2026 Ramil Nugmanov +# This file is part of chython. +# +# chython is free software; you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation; either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public License +# along with this program; if not, see . +# +from functools import cache + + +@cache +def _start_jvm(): + """Start JVM once with all Java dependencies on classpath. Thread-safe via @cache.""" + from jpype import isJVMStarted, startJVM + + if not isJVMStarted(): + from chython import class_paths + + startJVM('--enable-native-access=ALL-UNNAMED', classpath=class_paths) + + +@cache +def get_cdk(): + """Get CDK Java package. Starts JVM if needed.""" + try: + from jpype import JPackage + + _start_jvm() + return JPackage('org').openscience.cdk + except (ImportError, AttributeError): + raise ImportError('Java/JPype/CDK.jar is not installed or broken. make sure CDK_PATH env variable is set') + + +@cache +def get_opsin(): + """Get OPSIN NameToStructure instance. Starts JVM if needed.""" + try: + from jpype import JPackage + + _start_jvm() + return JPackage('uk').ac.cam.ch.wwmm.opsin.NameToStructure.getInstance() + except (ImportError, AttributeError): + raise ImportError('Java/JPype/OPSIN.jar is not installed or broken. make sure OPSIN_PATH env variable is set') + + +__all__ = ['get_cdk', 'get_opsin'] diff --git a/chython/algorithms/__init__.py b/chython/algorithms/__init__.py index bdecf99b..51bc5e02 100644 --- a/chython/algorithms/__init__.py +++ b/chython/algorithms/__init__.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- # -# Copyright 2017-2021 Ramil Nugmanov +# Copyright 2017-2026 Ramil Nugmanov # This file is part of chython. # # chython is free software; you can redistribute it and/or modify diff --git a/chython/algorithms/_isomorphism.pyx b/chython/algorithms/_isomorphism.pyx index 3d39f200..f2277892 100644 --- a/chython/algorithms/_isomorphism.pyx +++ b/chython/algorithms/_isomorphism.pyx @@ -1,6 +1,6 @@ -# -*- coding: utf-8 -*- +# cython: freethreading_compatible=True # -# Copyright 2021-2025 Ramil Nugmanov +# Copyright 2021-2026 Ramil Nugmanov # Copyright 2021 Aleksandr Sizov # This file is part of chython. # diff --git a/chython/algorithms/_rings.pyx b/chython/algorithms/_rings.pyx index d6e09241..13b8937b 100644 --- a/chython/algorithms/_rings.pyx +++ b/chython/algorithms/_rings.pyx @@ -1,3 +1,4 @@ +# cython: freethreading_compatible=True # cython: undeclared_check_usage=error # cython: warn.undeclared=True # cython: warn.unused=True @@ -7,7 +8,7 @@ # cython: wraparound=False # -# Copyright 2025 Ramil Nugmanov +# Copyright 2025, 2026 Ramil Nugmanov # This file is part of chython. # # chython is free software; you can redistribute it and/or modify diff --git a/chython/algorithms/aromatics/__init__.py b/chython/algorithms/aromatics/__init__.py index 078a0b34..95f090cf 100644 --- a/chython/algorithms/aromatics/__init__.py +++ b/chython/algorithms/aromatics/__init__.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- # -# Copyright 2018-2021 Ramil Nugmanov +# Copyright 2018-2026 Ramil Nugmanov # This file is part of chython. # # chython is free software; you can redistribute it and/or modify diff --git a/chython/algorithms/aromatics/_rules.py b/chython/algorithms/aromatics/_rules.py index 49b69cd6..67953b5b 100644 --- a/chython/algorithms/aromatics/_rules.py +++ b/chython/algorithms/aromatics/_rules.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- # -# Copyright 2021-2024 Ramil Nugmanov +# Copyright 2021-2026 Ramil Nugmanov # This file is part of chython. # # chython is free software; you can redistribute it and/or modify diff --git a/chython/algorithms/aromatics/kekule.py b/chython/algorithms/aromatics/kekule.py index 840452f9..b24e0705 100644 --- a/chython/algorithms/aromatics/kekule.py +++ b/chython/algorithms/aromatics/kekule.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- # -# Copyright 2021-2025 Ramil Nugmanov +# Copyright 2021-2026 Ramil Nugmanov # This file is part of chython. # # chython is free software; you can redistribute it and/or modify @@ -17,16 +17,12 @@ # along with this program; if not, see . # from collections import defaultdict, deque -from typing import List, Optional, Tuple, TYPE_CHECKING, Union +from collections.abc import Iterator from ._rules import rules from ..._functions import lazy_product from ...exceptions import InvalidAromaticRing -if TYPE_CHECKING: - from chython import MoleculeContainer - - # atomic number constants B = 5 C = 6 @@ -42,7 +38,7 @@ class Kekule: __slots__ = () - def kekule(self: Union['Kekule', 'MoleculeContainer'], *, buffer_size=7, ignore_pyrrole_hydrogen=False) -> bool: + def kekule(self, *, buffer_size=7, ignore_pyrrole_hydrogen=False) -> bool: """ Convert structure to kekule form. Return True if found any aromatic ring. Set implicit hydrogen count and hybridization marks on atoms. @@ -69,7 +65,7 @@ def kekule(self: Union['Kekule', 'MoleculeContainer'], *, buffer_size=7, ignore_ return True return fixed - def enumerate_kekule(self: Union['Kekule', 'MoleculeContainer'], ignore_pyrrole_hydrogen=False): + def enumerate_kekule(self, ignore_pyrrole_hydrogen=False) -> Iterator['MoleculeContainer']: """ Enumerate all possible kekule forms of molecule. """ @@ -87,7 +83,7 @@ def enumerate_kekule(self: Union['Kekule', 'MoleculeContainer'], ignore_pyrrole_ copy.calc_labels() yield copy - def __fix_rings(self: 'MoleculeContainer'): + def __fix_rings(self): atoms = self._atoms bonds = self._bonds seen = set() @@ -117,7 +113,7 @@ def __fix_rings(self: 'MoleculeContainer'): return True return False - def __prepare_rings(self: 'MoleculeContainer', ignore_pyrrole_hydrogen): + def __prepare_rings(self, ignore_pyrrole_hydrogen): atoms = self._atoms bonds = self._bonds @@ -344,7 +340,7 @@ def __kekule_full(self, buffer_size, ignore_pyrrole_hydrogen): def _kekule_component(rings, double_bonded, pyrroles, buffer_size): # (current atom, previous atom, bond between cp atoms, path deep for cutting [None if cut impossible]) - stack: List[List[Tuple[int, int, int, Optional[int]]]] + stack: list[list[tuple[int, int, int, int | None]]] if double_bonded: # start from double bonded if exists start = next(iter(double_bonded)) stack = [[(next(iter(rings[start])), start, 1, 0)]] diff --git a/chython/algorithms/aromatics/test/__init__.py b/chython/algorithms/aromatics/test/__init__.py index 031c963a..aafc6f13 100644 --- a/chython/algorithms/aromatics/test/__init__.py +++ b/chython/algorithms/aromatics/test/__init__.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- # -# Copyright 2025 Ramil Nugmanov +# Copyright 2025, 2026 Ramil Nugmanov # This file is part of chython. # # chython is free software; you can redistribute it and/or modify diff --git a/chython/algorithms/aromatics/test/test_kekule.py b/chython/algorithms/aromatics/test/test_kekule.py index be33e701..bc84bab6 100644 --- a/chython/algorithms/aromatics/test/test_kekule.py +++ b/chython/algorithms/aromatics/test/test_kekule.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- # -# Copyright 2025 Ramil Nugmanov +# Copyright 2025, 2026 Ramil Nugmanov # Copyright 2025 Tagir Akhmetshin # This file is part of chython. # diff --git a/chython/algorithms/aromatics/test/test_thiele.py b/chython/algorithms/aromatics/test/test_thiele.py index a8b7d087..699d464e 100644 --- a/chython/algorithms/aromatics/test/test_thiele.py +++ b/chython/algorithms/aromatics/test/test_thiele.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- # -# Copyright 2025 Ramil Nugmanov +# Copyright 2025, 2026 Ramil Nugmanov # Copyright 2025 Tagir Akhmetshin # This file is part of chython. # diff --git a/chython/algorithms/aromatics/thiele.py b/chython/algorithms/aromatics/thiele.py index 90c1e907..7efc9ccb 100644 --- a/chython/algorithms/aromatics/thiele.py +++ b/chython/algorithms/aromatics/thiele.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- # -# Copyright 2021-2025 Ramil Nugmanov +# Copyright 2021-2026 Ramil Nugmanov # This file is part of chython. # # chython is free software; you can redistribute it and/or modify @@ -17,16 +17,11 @@ # along with this program; if not, see . # from collections import defaultdict -from typing import TYPE_CHECKING from ._rules import freak_rules from .._rings import sssr from ..rings import _connected_components -if TYPE_CHECKING: - from chython import MoleculeContainer - - # atomic number constants B = 5 C = 6 @@ -40,7 +35,7 @@ class Thiele: __slots__ = () - def thiele(self: 'MoleculeContainer', *, fix_tautomers=True) -> bool: + def thiele(self, *, fix_tautomers=True) -> bool: """ Convert structure to aromatic form (Huckel rule ignored). Return True if found any kekule ring. Also marks atoms as aromatic. diff --git a/chython/algorithms/calculate2d/__init__.py b/chython/algorithms/calculate2d/__init__.py index f0c35a13..3ede6256 100644 --- a/chython/algorithms/calculate2d/__init__.py +++ b/chython/algorithms/calculate2d/__init__.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- # -# Copyright 2019-2025 Ramil Nugmanov +# Copyright 2019-2026 Ramil Nugmanov # This file is part of chython. # # chython is free software; you can redistribute it and/or modify diff --git a/chython/algorithms/calculate2d/molecule.py b/chython/algorithms/calculate2d/molecule.py index 99717424..f8e56c14 100644 --- a/chython/algorithms/calculate2d/molecule.py +++ b/chython/algorithms/calculate2d/molecule.py @@ -17,21 +17,14 @@ # You should have received a copy of the GNU Lesser General Public License # along with this program; if not, see . # +from importlib.resources import files from random import random -from typing import TYPE_CHECKING, Union, Dict, Literal +from typing import Literal from ...exceptions import ImplementationError from ...periodictable.base.vector import Vector - -if TYPE_CHECKING: - from chython import MoleculeContainer - try: from py_mini_racer import MiniRacer - try: - from importlib.resources import files - except ImportError: # python3.8 - from importlib_resources import files ctx = MiniRacer() ctx.eval('const self = this') @@ -42,11 +35,8 @@ class Calculate2DMolecule: __slots__ = () - _atoms: Dict[int, 'Element'] - _bonds: Dict[int, Dict[int, 'Bond']] - def clean2d(self: Union['MoleculeContainer', 'Calculate2DMolecule'], - *, engine: Literal['rdkit', 'smilesdrawer', 'cdk', 'obabel', 'indigo'] = None): + def clean2d(self, *, engine: Literal['rdkit', 'smilesdrawer', 'cdk', 'obabel', 'indigo'] = None): """ Calculate 2d layout of graph. @@ -85,7 +75,9 @@ def clean2d(self: Union['MoleculeContainer', 'Calculate2DMolecule'], for n, (x, y) in zip(order, xy): plane[n] = (x - shift_x, shift_y - y) elif engine == 'cdk': - sdg = self._cdk_engine.layout.StructureDiagramGenerator() + from ..._java import get_cdk + + sdg = get_cdk().layout.StructureDiagramGenerator() sdg.setUseTemplates(False) sdg.setMolecule(self.to_cdk()) sdg.generateCoordinates() @@ -95,8 +87,10 @@ def clean2d(self: Union['MoleculeContainer', 'Calculate2DMolecule'], xy = mol.getAtom(i).getPoint2d() plane[n] = (xy.x, xy.y) elif engine == 'obabel': + from openbabel import openbabel + mol = self.to_openbabel() - assert self._obgen2d(mol), 'OpenBabel failed to generate 2d layout' + assert openbabel.OBOp.FindType('gen2D').Do(mol), 'OpenBabel failed to generate 2d layout' assert mol.NumAtoms() == len(self), 'OpenBabel modified molecule' for i, n in enumerate(self.smiles_atoms_order, 1): @@ -122,7 +116,7 @@ def clean2d(self: Union['MoleculeContainer', 'Calculate2DMolecule'], shift_x = self._fix_plane_mean(shift_x, component=c) + .9 self.__dict__.pop('__cached_method__repr_svg_', None) - def rescale2d(self: 'MoleculeContainer'): + def rescale2d(self): """ Rescale coordinates to average bond length 0.825. """ @@ -187,7 +181,7 @@ def _fix_plane_min(self, shift_x: float, shift_y=0., component=None) -> float: max_x += .25 return max_x - def __clean2d_prepare(self: 'MoleculeContainer', entry): + def __clean2d_prepare(self, entry): w = {n: random() for n in self._atoms} w[entry] = -1 smiles, order = self._smiles(w.__getitem__, random=True, charges=False, stereo=False, _return_order=True) diff --git a/chython/algorithms/calculate2d/reaction.py b/chython/algorithms/calculate2d/reaction.py index 536643aa..8165aa8d 100644 --- a/chython/algorithms/calculate2d/reaction.py +++ b/chython/algorithms/calculate2d/reaction.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- # -# Copyright 2019-2025 Ramil Nugmanov +# Copyright 2019-2026 Ramil Nugmanov # This file is part of chython. # # chython is free software; you can redistribute it and/or modify @@ -16,17 +16,12 @@ # You should have received a copy of the GNU Lesser General Public License # along with this program; if not, see . # -from typing import TYPE_CHECKING - - -if TYPE_CHECKING: - from chython import ReactionContainer class Calculate2DReaction: __slots__ = () - def clean2d(self: 'ReactionContainer'): + def clean2d(self): """ Recalculate 2d coordinates """ @@ -34,7 +29,7 @@ def clean2d(self: 'ReactionContainer'): m.clean2d() self.fix_positions() - def fix_positions(self: 'ReactionContainer'): + def fix_positions(self): """ Fix coordinates of molecules in reaction """ diff --git a/chython/algorithms/conformers.py b/chython/algorithms/conformers.py index 863529d8..849ad116 100644 --- a/chython/algorithms/conformers.py +++ b/chython/algorithms/conformers.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- # -# Copyright 2025 Ramil Nugmanov +# Copyright 2025, 2026 Ramil Nugmanov # This file is part of chython. # # chython is free software; you can redistribute it and/or modify @@ -17,17 +17,13 @@ # along with this program; if not, see . # from io import StringIO -from typing import Literal, TYPE_CHECKING - - -if TYPE_CHECKING: - from chython import MoleculeContainer +from typing import Literal class Conformers: __slots__ = () - def generate_conformers(self: 'MoleculeContainer', limit: int = 10, *, optimize: bool = False, + def generate_conformers(self, limit: int = 10, *, optimize: bool = False, engine: Literal['rdkit', 'cdpkit'] = None, **kwargs) -> int: """ Generate conformers for the molecule ignoring implicit hydrogens. Set them manually to have a full 3D structure. diff --git a/chython/algorithms/depict.py b/chython/algorithms/depict.py index 6e45fce0..08517399 100644 --- a/chython/algorithms/depict.py +++ b/chython/algorithms/depict.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- # -# Copyright 2018-2025 Ramil Nugmanov +# Copyright 2018-2026 Ramil Nugmanov # Copyright 2019-2020 Dinar Batyrshin # This file is part of chython. # @@ -18,18 +18,14 @@ # along with this program; if not, see . # from asyncio import new_event_loop -from CachedMethods import cached_method from collections import defaultdict from math import atan2, sin, cos, hypot from os.path import join from tempfile import TemporaryDirectory -from typing import Tuple, TYPE_CHECKING, Union, Literal +from typing import Literal from uuid import uuid4 from zlib import compress - - -if TYPE_CHECKING: - from chython import ReactionContainer, MoleculeContainer +from .._functions import cached_method cpk = tuple(''' #909090 #D9FFFF @@ -162,8 +158,8 @@ def _render_aromatic_bond(n_x, n_y, m_x, m_y, c_x, c_y): def depict_settings(*, carbon: bool = False, aam: bool = True, monochrome: bool = False, bond_color: str = 'black', aam_color: str = '#0305A7', atoms_colors: tuple = cpk, - bond_width: float = .04, wedge_space: float = .08, dashes: Tuple[float, float] = (.2, .1), - aromatic_dashes: Tuple[float, float] = (.15, .05), dx_ci: float = .05, dy_ci: float = .2, + bond_width: float = .04, wedge_space: float = .08, dashes: tuple[float, float] = (.2, .1), + aromatic_dashes: tuple[float, float] = (.15, .05), dx_ci: float = .05, dy_ci: float = .2, dx_m: float = .05, dy_m: float = .2, dx_s: float = .05, dy_s: float = .1, span_dy: float = .15, double_space: float = .06, triple_space: float = .13, aromatic_space: float = .14, atom_radius: float = .2, bond_radius=.02, font_size: float = .5, other_size: float = .3, @@ -238,9 +234,9 @@ def depict_settings(*, carbon: bool = False, aam: bool = True, monochrome: bool class DepictMolecule: __slots__ = () - def depict(self: Union['MoleculeContainer', 'DepictMolecule'], *, width=None, height=None, clean2d: bool = True, + def depict(self, *, width=None, height=None, clean2d: bool = True, format: Literal['svg', 'png', 'svgz'] = 'svg', png_width=1000, png_heigh=1000, png_scale=1., - _embedding=False) -> Union[str, bytes]: + _embedding=False) -> str | bytes: """ Depict molecule in SVG or PNG format. @@ -298,7 +294,7 @@ def depict(self: Union['MoleculeContainer', 'DepictMolecule'], *, width=None, he def _repr_svg_(self): return self.depict() - def __render_bonds(self: Union['MoleculeContainer', 'DepictMolecule']): + def __render_bonds(self): atoms = self._atoms svg = [] double_space = _render_config['double_space'] @@ -359,7 +355,7 @@ def __render_bonds(self: Union['MoleculeContainer', 'DepictMolecule']): svg.append(aromatic) return svg - def __render_atoms(self: 'MoleculeContainer', uid): + def __render_atoms(self, uid): bonds = self._bonds carbon = _render_config['carbon'] @@ -505,9 +501,9 @@ def __render_atoms(self: 'MoleculeContainer', uid): class DepictReaction: __slots__ = () - def depict(self: 'ReactionContainer', *, width=None, height=None, clean2d: bool = True, + def depict(self, *, width=None, height=None, clean2d: bool = True, format: Literal['svg', 'png', 'svgz'] = 'svg', - png_width=1000, png_heigh=1000, png_scale=1.) -> Union[str, bytes]: + png_width=1000, png_heigh=1000, png_scale=1.) -> str | bytes: """ Depict reaction in SVG format. diff --git a/chython/algorithms/fingerprints/__init__.py b/chython/algorithms/fingerprints/__init__.py index ec2121fa..d16320e5 100644 --- a/chython/algorithms/fingerprints/__init__.py +++ b/chython/algorithms/fingerprints/__init__.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- # -# Copyright 2021-2023 Ramil Nugmanov +# Copyright 2021-2026 Ramil Nugmanov # Copyright 2021 Aleksandr Sizov # This file is part of chython. # @@ -17,20 +17,15 @@ # You should have received a copy of the GNU Lesser General Public License # along with this program; if not, see . # -from typing import TYPE_CHECKING from .linear import * from .morgan import * -if TYPE_CHECKING: - from chython import MoleculeContainer, CGRContainer - - class Fingerprints(LinearFingerprint, MorganFingerprint): __slots__ = () @property - def _atom_identifiers(self: 'MoleculeContainer'): + def _atom_identifiers(self): return {idx: hash((atom.isotope or 0, atom.atomic_number, atom.charge, atom.is_radical)) for idx, atom in self.atoms()} @@ -39,7 +34,7 @@ class FingerprintsCGR(LinearFingerprint, MorganFingerprint): __slots__ = () @property - def _atom_identifiers(self: 'CGRContainer'): + def _atom_identifiers(self): return {idx: hash((atom.isotope or 0, atom.atomic_number, atom.charge, atom.p_charge, atom.is_radical, atom.p_is_radical)) for idx, atom in self._atoms.items()} diff --git a/chython/algorithms/fingerprints/linear.py b/chython/algorithms/fingerprints/linear.py index da196614..bc02fd8d 100644 --- a/chython/algorithms/fingerprints/linear.py +++ b/chython/algorithms/fingerprints/linear.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- # -# Copyright 2021-2023 Ramil Nugmanov +# Copyright 2021-2026 Ramil Nugmanov # Copyright 2021 Aleksandr Sizov # Copyright 2023 Timur Gimadiev # This file is part of chython. @@ -20,12 +20,7 @@ # from collections import defaultdict, deque from math import log2 -from numpy import uint8, zeros -from typing import Deque, Dict, List, Set, Tuple, TYPE_CHECKING - - -if TYPE_CHECKING: - from chython import MoleculeContainer +from numpy import ndarray, uint8, zeros class LinearFingerprint: @@ -44,7 +39,7 @@ class LinearFingerprint: def linear_fingerprint(self, min_radius: int = 1, max_radius: int = 4, length: int = 1024, number_active_bits: int = 2, - number_bit_pairs: int = 4): + number_bit_pairs: int = 4) -> ndarray: """ Transform structures into array of binary features. @@ -65,7 +60,7 @@ def linear_fingerprint(self, min_radius: int = 1, max_radius: int = 4, return fingerprints def linear_bit_set(self, min_radius: int = 1, max_radius: int = 4, length: int = 1024, number_active_bits: int = 2, - number_bit_pairs: int = 4) -> Set[int]: + number_bit_pairs: int = 4) -> set[int]: """ Transform structure into set of indexes of True-valued features. @@ -92,7 +87,7 @@ def linear_bit_set(self, min_radius: int = 1, max_radius: int = 4, length: int = active_bits.add(tpl & mask) return active_bits - def linear_hash_set(self, min_radius: int = 1, max_radius: int = 4, number_bit_pairs: int = 4) -> Set[int]: + def linear_hash_set(self, min_radius: int = 1, max_radius: int = 4, number_bit_pairs: int = 4) -> set[int]: """ Transform structure into set of integer hashes of fragments with count information. @@ -109,8 +104,8 @@ def linear_hash_set(self, min_radius: int = 1, max_radius: int = 4, number_bit_p self._fragments(min_radius, max_radius).items() for cnt in range(min(len(count), number_bit_pairs))} - def linear_hash_smiles(self: 'MoleculeContainer', min_radius: int = 1, max_radius: int = 4, - number_bit_pairs: int = 4) -> Dict[int, List[str]]: + def linear_hash_smiles(self, min_radius: int = 1, max_radius: int = 4, + number_bit_pairs: int = 4) -> dict[int, list[str]]: """ Transform structure into dict of integer hashes of fragments with count information and corresponding fragment SMILES. @@ -138,7 +133,7 @@ def linear_hash_smiles(self: 'MoleculeContainer', min_radius: int = 1, max_radiu return {k: list(v) for k, v in out.items()} def linear_smiles_hash(self, min_radius: int = 1, max_radius: int = 4, - number_bit_pairs: int = 4) -> Dict[str, List[int]]: + number_bit_pairs: int = 4) -> dict[str, list[int]]: """ Transform structure into dict of fragment SMILES and list of corresponding integer hashes of fragments. @@ -154,8 +149,8 @@ def linear_smiles_hash(self, min_radius: int = 1, max_radius: int = 4, out[s].append(k) return dict(out) - def _chains(self: 'MoleculeContainer', min_radius: int = 1, max_radius: int = 4) -> Set[Tuple[int, ...]]: - queue: Deque[Tuple[int, ...]] # typing + def _chains(self, min_radius: int = 1, max_radius: int = 4) -> set[tuple[int, ...]]: + queue: deque[tuple[int, ...]] # typing atoms = self._atoms bonds = self._bonds @@ -181,8 +176,7 @@ def _chains(self: 'MoleculeContainer', min_radius: int = 1, max_radius: int = 4) arr.add(frag if frag > rev else rev) return arr - def _fragments(self: 'MoleculeContainer', min_radius: int = 1, - max_radius: int = 4) -> Dict[Tuple[int, ...], List[Tuple[int, ...]]]: + def _fragments(self, min_radius: int = 1, max_radius: int = 4) -> dict[tuple[int, ...], list[tuple[int, ...]]]: atoms = self._atom_identifiers bonds = self._bonds out = defaultdict(list) @@ -201,7 +195,7 @@ def _fragments(self: 'MoleculeContainer', min_radius: int = 1, return dict(out) @property - def _atom_identifiers(self) -> Dict[int, int]: + def _atom_identifiers(self) -> dict[int, int]: raise NotImplementedError diff --git a/chython/algorithms/fingerprints/morgan.py b/chython/algorithms/fingerprints/morgan.py index 9f8d0ab5..36a1ec2b 100644 --- a/chython/algorithms/fingerprints/morgan.py +++ b/chython/algorithms/fingerprints/morgan.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- # -# Copyright 2021-2023 Ramil Nugmanov +# Copyright 2021-2026 Ramil Nugmanov # Copyright 2023 Timur Gimadiev # Copyright 2021 Aleksandr Sizov # This file is part of chython. @@ -20,19 +20,14 @@ # from collections import defaultdict from math import log2 -from numpy import uint8, zeros -from typing import Dict, List, Set, TYPE_CHECKING - - -if TYPE_CHECKING: - from chython import MoleculeContainer +from numpy import ndarray, uint8, zeros class MorganFingerprint: __slots__ = () def morgan_fingerprint(self, min_radius: int = 1, max_radius: int = 4, - length: int = 1024, number_active_bits: int = 2): + length: int = 1024, number_active_bits: int = 2) -> ndarray: """ Transform structures into array of binary features. Morgan fingerprints. Similar to RDkit implementation. @@ -50,7 +45,7 @@ def morgan_fingerprint(self, min_radius: int = 1, max_radius: int = 4, return fingerprints def morgan_bit_set(self, min_radius: int = 1, max_radius: int = 4, - length: int = 1024, number_active_bits: int = 2) -> Set[int]: + length: int = 1024, number_active_bits: int = 2) -> set[int]: """ Transform structures into set of indexes of True-valued features. @@ -73,7 +68,7 @@ def morgan_bit_set(self, min_radius: int = 1, max_radius: int = 4, active_bits.add(tpl & mask) return active_bits - def morgan_hash_set(self: 'MoleculeContainer', min_radius: int = 1, max_radius: int = 4) -> Set[int]: + def morgan_hash_set(self, min_radius: int = 1, max_radius: int = 4) -> set[int]: """ Transform structures into integer hashes of atoms with EC. @@ -82,7 +77,7 @@ def morgan_hash_set(self: 'MoleculeContainer', min_radius: int = 1, max_radius: """ return {x for x in self._morgan_hash_dict(min_radius, max_radius) for x in x.values()} - def morgan_hash_smiles(self: 'MoleculeContainer', min_radius: int = 1, max_radius: int = 4) -> Dict[int, List[str]]: + def morgan_hash_smiles(self, min_radius: int = 1, max_radius: int = 4) -> dict[int, list[str]]: """ Transform structures into dictionary of hashes of atoms with EC and corresponding SMILES. @@ -95,7 +90,7 @@ def morgan_hash_smiles(self: 'MoleculeContainer', min_radius: int = 1, max_radiu smiles_dict[morgan_hash].add(format(self.augmented_substructure((atom,), deep=radius), 'A')) return {k: list(v) for k, v in smiles_dict.items()} - def morgan_smiles_hash(self: 'MoleculeContainer', min_radius: int = 1, max_radius: int = 4) -> Dict[str, List[int]]: + def morgan_smiles_hash(self, min_radius: int = 1, max_radius: int = 4) -> dict[str, list[int]]: """ Transform structures into dictionary of smiles and corresponding hashes of atoms with EC. @@ -108,7 +103,7 @@ def morgan_smiles_hash(self: 'MoleculeContainer', min_radius: int = 1, max_radiu out[s].append(k) return dict(out) - def _morgan_hash_dict(self: 'MoleculeContainer', min_radius: int = 1, max_radius: int = 4) -> List[Dict[int, int]]: + def _morgan_hash_dict(self, min_radius: int = 1, max_radius: int = 4) -> list[dict[int, int]]: """ Transform structures into integer hashes of atoms with EC. Returns list of atom-hash pairs for different radii. @@ -130,7 +125,7 @@ def _morgan_hash_dict(self: 'MoleculeContainer', min_radius: int = 1, max_radius return out[-(max_radius - min_radius + 1):] # slice [min, max] radii range @property - def _atom_identifiers(self) -> Dict[int, int]: + def _atom_identifiers(self) -> dict[int, int]: raise NotImplementedError diff --git a/chython/algorithms/fingerprints/test/__init__.py b/chython/algorithms/fingerprints/test/__init__.py index 031c963a..aafc6f13 100644 --- a/chython/algorithms/fingerprints/test/__init__.py +++ b/chython/algorithms/fingerprints/test/__init__.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- # -# Copyright 2025 Ramil Nugmanov +# Copyright 2025, 2026 Ramil Nugmanov # This file is part of chython. # # chython is free software; you can redistribute it and/or modify diff --git a/chython/algorithms/fingerprints/test/test_linear.py b/chython/algorithms/fingerprints/test/test_linear.py index d0accfc8..a20db7e9 100644 --- a/chython/algorithms/fingerprints/test/test_linear.py +++ b/chython/algorithms/fingerprints/test/test_linear.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- # -# Copyright 2025 Ramil Nugmanov +# Copyright 2025, 2026 Ramil Nugmanov # Copyright 2025 Tagir Akhmetshin # This file is part of chython. # diff --git a/chython/algorithms/fingerprints/test/test_morgan.py b/chython/algorithms/fingerprints/test/test_morgan.py index 5b32e21f..b837d25b 100644 --- a/chython/algorithms/fingerprints/test/test_morgan.py +++ b/chython/algorithms/fingerprints/test/test_morgan.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- # -# Copyright 2025 Ramil Nugmanov +# Copyright 2025, 2026 Ramil Nugmanov # Copyright 2025 Tagir Akhmetshin # This file is part of chython. # diff --git a/chython/algorithms/groups/__init__.py b/chython/algorithms/groups/__init__.py new file mode 100644 index 00000000..e230c420 --- /dev/null +++ b/chython/algorithms/groups/__init__.py @@ -0,0 +1,202 @@ +# -*- coding: utf-8 -*- +# +# Copyright 2026 Ramil Nugmanov +# This file is part of chython. +# +# chython is free software; you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation; either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public License +# along with this program; if not, see . +# +from collections.abc import Iterator +from functools import cached_property +from itertools import permutations +from ._functional import rules as functional_rules +from ._oxidations import rules as oxidation_rules +from ._protective import rules as protective_rules +from ._reactions import rules as reaction_rules +from ._reductions import rules as reduction_rules +from ._transformations import rules as transformation_rules + + +class FunctionalGroups: + __slots__ = () + + @cached_property + def functional_groups(self) -> dict[str, int]: + """ + Dict of functional group names to their count in the molecule. + """ + found = {} + for name, q in functional_rules.items(): + c = sum(1 for _ in q.get_mapping(self)) + if c: + found[name] = c + return found + + @cached_property + def protective_groups(self) -> dict[str, int]: + """ + Dict of protective group names to their count in the molecule. + """ + found = {} + seen = set() + for name, (q, keep, *_) in protective_rules.items(): + c = 0 + for mp in q.get_mapping(self, automorphism_filter=False): + atoms = {m for n, m in mp.items() if n not in keep} + if seen.isdisjoint(atoms): + seen.update(atoms) + c += 1 + if c: + found[name] = c + return found + + def remove_protection(self, name=None) -> bool: + """ + Remove protective groups from the given molecule if applicable. + """ + to_delete = set() + to_add = [] + if name is None: + rules = protective_rules.values() + elif name in protective_rules: + rules = [protective_rules[name]] + else: + raise ValueError(f'Unknown protective group: {name}') + + kept_atoms = set() + for q, keep, add, *_ in rules: + for mp in q.get_mapping(self, automorphism_filter=False): + delete = {m for n, m in mp.items() if n not in keep} + if not to_delete.isdisjoint(delete): + continue + to_delete.update(delete) + for n in keep: + kept_atoms.add(mp[n]) + for n, a, b in add: + to_add.append((mp[n], a, b)) + + for n, a, b in to_add: + m = self.add_atom(a, _skip_calculation=True) + self.add_bond(m, n, b, _skip_calculation=True) + for n in to_delete: + self.delete_atom(n, _skip_calculation=True) + if to_delete or to_add: + self.fix_structure() + # fix implicit H on aromatic N freed from PG + for n in kept_atoms: + a = self.atom(n) + if a.atomic_symbol == 'N' and a.hybridization == 4 and a.implicit_hydrogens is None: + a._implicit_hydrogens = 1 + self.fix_stereo() + return True + return False + + def react(self, *others, reaction=None) -> Iterator[tuple[str, 'ReactionContainer']]: + """ + Enumerate possible reaction products between molecules. + + mol1.react(mol2) -> [(reaction_name, ReactionContainer), ...] + mol1.react(mol2, mol3) -> [(reaction_name, ReactionContainer), ...] # multi-component + mol1.react(mol2, reaction='suzuki') -> only suzuki coupling + + :param reaction: optional reaction name to apply selectively. + """ + mols = [self, *others] + + for name, fg_names, reactor in reaction_rules: + if reaction is not None and name != reaction: + continue + if len(fg_names) != len(mols): + continue + for perm in permutations(mols): + if all(fg in mol.functional_groups for mol, fg in zip(perm, fg_names)): + for rxn in reactor(*perm): + yield name, rxn + break + + def oxidize(self, reaction=None) -> Iterator[tuple[str, 'ReactionContainer']]: + """ + Enumerate possible single-step oxidation products. + + mol.oxidize() -> [(reaction_name, ReactionContainer), ...] + mol.oxidize(reaction='alcohol_to_aldehyde') -> only this oxidation + + :param reaction: optional reaction name to apply selectively. + """ + fgs = self.functional_groups + for name, fg_name, _, reactor in oxidation_rules: + if reaction is not None and name != reaction: + continue + if fg_name in fgs: + for rxn in reactor(self): + yield name, rxn + + def reduce(self, reaction=None) -> Iterator[tuple[str, 'ReactionContainer']]: + """ + Enumerate possible single-step reduction products. + + mol.reduce() -> [(reaction_name, ReactionContainer), ...] + mol.reduce(reaction='ketone_to_alcohol') -> only this reduction + + :param reaction: optional reaction name to apply selectively. + """ + fgs = self.functional_groups + for name, fg_name, _, reactor in reduction_rules: + if reaction is not None and name != reaction: + continue + if fg_name in fgs: + for rxn in reactor(self): + yield name, rxn + + def transform(self, reaction=None) -> Iterator[tuple[str, 'ReactionContainer']]: + """ + Enumerate possible single-molecule functional group interconversions + (ring formations from open-chain precursors with implicit reagents). + + mol.transform() -> [(reaction_name, ReactionContainer), ...] + mol.transform(reaction='appel') -> only Appel reaction + + :param reaction: optional reaction name to apply selectively. + """ + fgs = self.functional_groups + for name, fg_name, _, reactor in transformation_rules: + if reaction is not None and name != reaction: + continue + if fg_name in fgs: + for rxn in reactor(self): + yield name, rxn + + def __invert__(self): + """ + Enumerate all possible single-step molecular transformations + (oxidations, reductions, and functional group interconversions). + + ~mol -> [(reaction_name, ReactionContainer), ...] + """ + yield from self.oxidize() + yield from self.reduce() + yield from self.transform() + + def __matmul__(self, other): + """ + Enumerate possible reaction products between molecules. + + mol1 @ mol2 -> [(reaction_name, ReactionContainer), ...] + mol1 @ [mol2, mol3] -> [(reaction_name, ReactionContainer), ...] # multi-component + """ + if isinstance(other, (list, tuple)): + return self.react(*other) + return self.react(other) + + +__all__ = ['FunctionalGroups'] diff --git a/chython/algorithms/groups/_functional.py b/chython/algorithms/groups/_functional.py new file mode 100644 index 00000000..6d38c2ee --- /dev/null +++ b/chython/algorithms/groups/_functional.py @@ -0,0 +1,273 @@ +# -*- coding: utf-8 -*- +# +# Copyright 2024-2026 Ramil Nugmanov +# This file is part of chython. +# +# chython is free software; you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation; either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public License +# along with this program; if not, see . +# +from lazy_object_proxy import Proxy + + +def _rules(): + from ... import smarts + + rules = {} + + # carbohydrides + rules['terminal_alkene'] = smarts('[C;z2;x0;D1:1]=[C;z2;x0;D2,D3:2]') + rules['alkene'] = smarts('[C;z2;x0;D2,D3:1]=[C;z2;x0;D2,D3:2]') + rules['terminal_alkyne'] = smarts('[C;z3;x0;D1:1]#[C;x0;D2:2]') + rules['alkyne'] = smarts('[C;z3;x0;D2:1]#[C;x0;D2:2]') + + rules['vicinal_diol'] = smarts('[O;D1;z1;x0:1]-[C;z1;x1]-[C;z1;x1]-[O;D1;z1;x0:2]') + + rules['benzylic_ch'] = smarts('[C;z1;h1,h2;D2,D3:1]-[C;a:2]') + + # halides + rules['aryl_fluoride'] = smarts('[F;D1:100]-[C;a:1]') + rules['aryl_chloride'] = smarts('[Cl;D1:100]-[C;a:1]') + rules['aryl_bromide'] = smarts('[Br;D1:100]-[C;a:1]') + rules['aryl_iodide'] = smarts('[I;D1:100]-[C;a:1]') + + rules['alkyl_fluoride'] = smarts('[F;D1:100][C;z1;x1:1]') + rules['alkyl_chloride'] = smarts('[Cl;D1:100][C;z1;x1:1]') + rules['alkyl_bromide'] = smarts('[Br;D1:100][C;z1;x1:1]') + rules['alkyl_iodide'] = smarts('[I;D1:100][C;z1;x1:1]') + + rules['alkenyl_fluoride'] = smarts('[F;D1:100][C;z2;x1:1]=[C:2]') + rules['alkenyl_chloride'] = smarts('[Cl;D1:100][C;z2;x1:1]=[C:2]') + rules['alkenyl_bromide'] = smarts('[Br;D1:100][C;z2;x1:1]=[C:2]') + rules['alkenyl_iodide'] = smarts('[I;D1:100][C;z2;x1:1]=[C:2]') + + rules['alkynyl_fluoride'] = smarts('[F;D1:100][C;z3;x1:1]') + rules['alkynyl_chloride'] = smarts('[Cl;D1:100][C;z3;x1:1]') + rules['alkynyl_bromide'] = smarts('[Br;D1:100][C;z3;x1:1]') + rules['alkynyl_iodide'] = smarts('[I;D1:100][C;z3;x1:1]') + + # pseudohalides + rules['aryl_triflate'] = smarts('[S;D4](=O)(=O)(-[O:100]-;!@[C;a:1])-[C;D4](F)(F)F') + rules['aryl_mesylate'] = smarts('[S;D4](=O)(=O)(-[O:100]-;!@[C;a:1])-[C;D1]') + rules['aryl_tosylate'] = smarts('[S;D4](=O)(=O)(-[O:100]-;!@[C;a:1])-[C;a]:1:[C;D2]:[C;D2]:[C](-[C;D1]):[C;D2]:[C;D2]:1') + + rules['alkyl_triflate'] = smarts('[S;D4](=O)(=O)(-[O:100]-;!@[C;z1;x1:1])-[C;D4](F)(F)F') + rules['alkyl_mesylate'] = smarts('[S;D4](=O)(=O)(-[O:100]-;!@[C;z1;x1:1])-[C;D1]') + rules['alkyl_tosylate'] = smarts('[S;D4](=O)(=O)(-[O:100]-;!@[C;z1;x1:1])-[C;a]:1:[C;D2]:[C;D2]:[C](-[C;D1]):[C;D2]:[C;D2]:1') + + # boronic acids and esters + rules['aryl_boronic_acid'] = smarts('[B;D3;z1;x2:100](-[O;D1])(-[O;D1])-;!@[C;a:1]') + rules['aryl_boronic_ester'] = smarts('[B;D3;z1;x2:100](-[O;D2;x1])(-[O;D2;x1])-;!@[C;a:1]') + + rules['alkyl_boronic_acid'] = smarts('[B;D3;z1;x2:100](-[O;D1])(-[O;D1])-;!@[C;z1;x1:1]') + rules['alkyl_boronic_ester'] = smarts('[B;D3;z1;x2:100](-[O;D2;x1])(-[O;D2;x1])-;!@[C;z1;x1:1]') + + rules['alkenyl_boronic_acid'] = smarts('[B;D3;z1;x2:100](-[O;D1])(-[O;D1])-;!@[C;z2;x1:1]=[C:2]') + rules['alkenyl_boronic_ester'] = smarts('[B;D3;z1;x2:100](-[O;D2;x1])(-[O;D2;x1])-;!@[C;z2;x1:1]=[C:2]') + + rules['alkynyl_boronic_acid'] = smarts('[B;D3;z1;x2:100](-[O;D1])(-[O;D1])-;!@[C;z3;x1:1]') + rules['alkynyl_boronic_ester'] = smarts('[B;D3;z1;x2:100](-[O;D2;x1])(-[O;D2;x1])-;!@[C;z3;x1:1]') + + # molander salts (trifluoroborates) + rules['aryl_molander_salt'] = smarts('[B;D4;z1;x3;-:100](F)(F)(F)-;!@[C;a:1]') + rules['alkyl_molander_salt'] = smarts('[B;D4;z1;x3;-:100](F)(F)(F)-;!@[C;z1:1]') + rules['alkenyl_molander_salt'] = smarts('[B;D4;z1;x3;-:100](F)(F)(F)-;!@[C;z2:1]=[C:2]') + rules['alkynyl_molander_salt'] = smarts('[B;D4;z1;x3;-:100](F)(F)(F)-;!@[C;z3;x1:1]') + + # alcohols and phenols + rules['primary_alcohol'] = smarts('[O;D1;z1;x0:1][C;D2;x1;z1:2]') + rules['secondary_alcohol'] = smarts('[O;D1;z1;x0:1][C;D3;x1;z1:2]') + rules['tertiary_alcohol'] = smarts('[O;D1;z1;x0:1][C;D4;x1;z1:2]') + # tertiary alcohol with adjacent sp3 CH (eliminable): for dehydration + rules['tertiary_alcohol_with_alpha_h'] = smarts('[O;D1;z1;x0:1]-[C;D4;x1;z1:2]-[C;z1;h1,h2:3]') + rules['phenol'] = smarts('[O;D1;z1;x0:1]-[C;a:2]') + + # aldehydes and ketones + rules['aldehyde'] = smarts('[O;z2;x0:2]=[C;D2;x1;z2:1]') + rules['ketone'] = smarts('[O;z2;x0:2]=[C;D3;x1;z2:1]') + # enal: alpha,beta-unsaturated aldehyde (for Doebner-Miller) + rules['enal'] = smarts('[O;z2;x0:2]=[C;D2;x1;z2:1]-[C;z2;x0:3]=[C;x0:4]') + # fisher, friedlander + rules['alpha_ketone'] = smarts('[O;z2;x0:2]=[C;D3;x1:1]-[C;z1;D1,D2;x0:3]') + # hantzsch thiazole, imidazo[1,2-a]pyridine + rules['alpha_haloketone'] = smarts('[O;z2;x0:2]=[C;D3;x1:1]-[C;z1;D2,D3;x1:3]([Cl,Br,I;D1:100])') + # imidazo[1,2-a]pyridine from alpha-haloester (ester O masked, carbonyl O deleted) + rules['alpha_haloester'] = smarts('[O;D2;x0;M]-[C;D3;x2;z2:1](=[O:2])-[C;z1;D2,D3;x1:3]([Cl,Br,I;D1:100])') + + rules['1_2_diketone'] = smarts('[O;z2;x0:2]=[C;D3;x1:1]-[C;z2;x1;D3:3]=[O:4]') + rules['1_3_diketone'] = smarts('[O;z2;x0:2]=[C;D3;x1:1]-[C;z1;D2,D3:5]-[C;z2;x1;D3:3]=[O:4]') + rules['1_4_diketone'] = smarts('[O;z2;x0:2]=[C;D3;x1:1]-[C;z1;D2,D3:5]-[C;z1;D2,D3:6]-[C;z2;x1;D3:3]=[O:4]') + rules['beta_ketoester'] = smarts('[O;z2;x0:2]=[C;D3;x1:1]-[C;z1;D2;x0:5]-[C;z2;x2;D3:3](=[O:4])[O;D2:100]') + + # acids + rules['alkyl_carboxylic_acid'] = smarts('[O;D1;z1;x0:100][C;z2;x2;D3:1](=[O:2])[C;z1:3]') + rules['aryl_carboxylic_acid'] = smarts('[O;D1;z1;x0:100][C;z2;x2;D3:1](=[O:2])[C;a:3]') + rules['carboxylic_acid'] = smarts('[O;D1;z1;x0:100][C;z2;x2;D3:1]=[O:2]') + rules['acyl_chloride'] = smarts('[Cl:100][C;z2;x2;D3:1]=[O:2]') + rules['acyl_fluoride'] = smarts('[F:100][C;z2;x2;D3:1]=[O:2]') + rules['chloroformate'] = smarts('[Cl:100][C;z2;x3;D3:1](=[O:2])-[O;D2:3]') + rules['fluoroformate'] = smarts('[F:100][C;z2;x3;D3:1](=[O:2])-[O;D2:3]') + rules['carbamoyl_chloride'] = smarts('[Cl:100][C;z2;x3;D3:1](=[O:2])-[N;D2,D3:3]') + rules['carbamoyl_fluoride'] = smarts('[F:100][C;z2;x3;D3:1](=[O:2])-[N;D2,D3:3]') + + # amines + rules['primary_amine'] = smarts('[N;D1;z1;x0:1][C;z1:2]') + rules['primary_aniline'] = smarts('[N;D1;z1;x0:1][C;a:2]') + # NH2 on sp2 C=N (e.g. pyrazoline, amidine-like) + rules['primary_amidine_amine'] = smarts('[N;D1;z1;x0:1]-[C;z2:2]=[N;M]') + rules['secondary_amine'] = smarts('[N;D2;z1;x0:1]([C;z1:2])[C;z1:3]') + rules['secondary_aniline'] = smarts('[N;D2;z1;x0:1]([C;a:2])[C;z1:3]') + rules['biaryl_aniline'] = smarts('[N;D2;z1;x0:1]([C;a:2])[C;a:3]') + + # esters and amides + rules['ester'] = smarts('[O;z2;x0:2]=[C;D3;x2;z2:1]-[O;D2;x0:100]') + rules['primary_amide'] = smarts('[N;D1;z1;x0:1][C;z2;x2;D3:2]=[O:3]') + rules['secondary_amide'] = smarts('[N;D2;z1;x0:1][C;z2;x2;D3:2]=[O:3]') + + # sulfonyl + rules['sulfonyl_chloride'] = smarts('[S;D4:1](=[O:2])(=[O:3])[Cl;D1:100]') + rules['sulfonyl_fluoride'] = smarts('[S;D4:1](=[O:2])(=[O:3])[F;D1:100]') + rules['sulfonamide'] = smarts('[S;x3;D4:1](=[O:2])(=[O:3])-[N;z1:100]') + rules['sulfonyl_anhydride'] = smarts('[S;x3;D4:1](=[O:2])(=[O:3])-[O:100]-[S;x3;D4](=O)(=O)') + + # nitrogen functional groups + rules['nitrile'] = smarts('[N;D1;z3;x0:2]#[C;D2;x1:1]') + rules['azide'] = smarts('[N;x1;D2:1]=[N+:2]=[N-:3]') + rules['isocyanate'] = smarts('[N;z2;x0;D2:1]=[C:2]=[O:3]') + rules['isocyano'] = smarts('[N;D2;x0;+:1]#[C;-:2]') + rules['guanidine'] = smarts('[N;z1;x0:1][C;!R:2]([N;z1;x0:3])=[N;x0:4]') + rules['nitro'] = smarts('[N;D3;x2;+:1]([O;-:2])=[O:3]') + + # grignard reagents (RMgX) + rules['alkyl_grignard'] = smarts('[Mg;D2:100](-[F,Cl,Br,I])-[C;z1:1]') + rules['aryl_grignard'] = smarts('[Mg;D2:100](-[F,Cl,Br,I])-[C;a:1]') + rules['alkenyl_grignard'] = smarts('[Mg;D2:100](-[F,Cl,Br,I])-[C;z2:1]=[C:2]') + + # organozinc reagents (RZnX) + rules['alkyl_zinc'] = smarts('[Zn;D2:100](-[F,Cl,Br,I])-[C;z1:1]') + rules['aryl_zinc'] = smarts('[Zn;D2:100](-[F,Cl,Br,I])-[C;a:1]') + rules['alkenyl_zinc'] = smarts('[Zn;D2:100](-[F,Cl,Br,I])-[C;z2:1]=[C:2]') + + # boronate alkyl halides: B-CH2-X (one-carbon, for SN2) + rules['boronate_alkyl_chloride'] = smarts('[Cl;D1:100]-[C;z1;D2;x2:1]-[B;M]') + rules['boronate_alkyl_bromide'] = smarts('[Br;D1:100]-[C;z1;D2;x2:1]-[B;M]') + rules['boronate_alkyl_iodide'] = smarts('[I;D1:100]-[C;z1;D2;x2:1]-[B;M]') + + # stannanes (R-SnR3) + rules['aryl_stannane'] = smarts('[Sn;D4;z1:100]-;!@[C;a:1]') + rules['alkenyl_stannane'] = smarts('[Sn;D4;z1:100]-;!@[C;z2:1]=[C:2]') + rules['alkyl_stannane'] = smarts('[Sn;D4;z1:100]-;!@[C;z1;D2,D3,D4:1]') + + # silanes (R-SiR3, for Hiyama coupling) + rules['aryl_silane'] = smarts('[Si;D4:100]-;!@[C;a:1]') + rules['alkenyl_silane'] = smarts('[Si;D4:100]-;!@[C;z2:1]=[C:2]') + + # phosphorus ylides and phosphonates + rules['phosphonium_ylide'] = smarts('[P;D4;z2;x0:100]=[C:1]') + rules['phosphonate'] = smarts('[P;D4;x3:100](=O)([O;D2;x1])([O;D2;x1])-[C:1]') + + # weinreb amide + rules['weinreb_amide'] = smarts('[O:2]=[C;D3;x2:1]-[N;D3;x1:100][O;D2;x1]') + + # arene C-H (for electrophilic aromatic substitution) + rules['arene_ch'] = smarts('[C;a;D2:1]') + + # sulfur + rules['thiol'] = smarts('[S;x0;D1;z1:1][C;z1:2]') + rules['thioether'] = smarts('[S;D2;z1;x0:1]([C:2])[C:3]') + rules['sulfoxide'] = smarts('[S;D3;z2:1](=[O:2])([C:3])[C:4]') + rules['sulfone'] = smarts('[S;D4:1](=[O:2])(=[O:3])([C:4])[C:5]') + + # pyridone/lactam halides (for extended ullmann/BH) + for _x, _name in (('F', 'fluoride'), ('Cl', 'chloride'), ('Br', 'bromide'), ('I', 'iodide')): + rules[f'pyridone_4_{_name}'] = smarts(f'[{_x};D1:100]-[C;z2;r6:1]-[N;D3;M]-;@[C;z2;r6;M]=[O;M]') + rules[f'pyridone_3_{_name}'] = smarts(f'[{_x};D1:100]-[C;z2;r6:1]=[C,N;M]-;@[N;D3;r6;M]-;@[C;z2;r6;M]=[O;M]') + rules[f'pyridone_2_{_name}'] = smarts(f'[{_x};D1:100]-[C;z2;r6:1]=[C,N;M]-;@[C;z2;r6;M](=[O;M])-[N;D3;M]') + rules[f'pyridone_1_{_name}'] = smarts(f'[{_x};D1:100]-[C;z2;r6:1]-[C;z2;x2;M](=[O;M])-[N;D3;M]') + + # pyridol (hydroxypyridine tautomer of pyridone; chython stores NH-pyridones as pyridols) + rules['pyridol'] = smarts('[N;r5,r6;D2;a:1]:[C:2]-[O;D1:3]') + + # pyrrole. for tautomerism handling H not in template. + rules['pyrrole'] = smarts('[N;h1;D2;a;r5:1]') + rules['pyrazole'] = smarts('[N;h1;D2;a;r5:1]:[N;h0;D2;r5:2]') + rules['imidazole'] = smarts('[N;h1;D2;a;r5:1]:[A:2]:[N;h0;D2;r5:3]') + + # heterocycles (for screening) + rules['isoxazole'] = smarts('[O;a;D2;r5:1]:[N;a;D2;r5:2]') + rules['pyridazine'] = smarts('[N;a;D2;r6:1]:[N;a;D2;r6:2]') + + # hydrazines + rules['alkyl_hydrazine'] = smarts('[N;D1;z1;x1:2]-[N;D2;z1;x1:1]-[C;z1:3]') + rules['aryl_hydrazine'] = smarts('[N;D1;z1;x1:2]-[N;D2;z1;x1:1]-[C;a:3]:[C;a;D2:4]') + + # hydrazone (C=N-NH-R, product of carbonyl + hydrazine condensation) + rules['hydrazone'] = smarts('[C;z2:1]=[N;D2;z2;x1:2]-[N;D2;z1;x1:3]') + + # thioamide (for Hantzsch thiazole) + rules['thioamide'] = smarts('[S;z2;x0;D1:2]=[C;D3;x2:1]-[N;D1:3]') + + # ortho-bifunctional arenes + rules['o_diaminoarene'] = smarts('[N;D1;z1;x0:1]-[C;a:3]:[C;a:4]-[N;D1,D2;z1;x0:2]') + rules['o_aminophenol'] = smarts('[N;D1;z1;x0:1]-[C;a:3]:[C;a:4]-[O;D1:2]') + rules['o_aminothiophenol'] = smarts('[N;D1;z1;x0:1]-[C;a:3]:[C;a:4]-[S;D1:2]') + rules['o_aminobenzaldehyde'] = smarts('[N;D1;z1;x0:1]-[C;a:3]:[C;a:4]-[C;D2;z2;x1:5]=[O:6]') + rules['anthranilic_acid'] = smarts('[N;D1;z1;x0:1]-[C;a:3]:[C;a:4]-[C;z2;x2;D3:5](=[O:6])[O;D1:100]') + + # amidoxime: RC(=NH)NHOH canonical form (for 1,2,4-oxadiazole) + rules['amidoxime'] = smarts('[N;D1;z2;x0:3]=[C;D3;x2:1]-[N;D2;z1;x1:2]-[O;D1:4]') + + # amidine: RC(=NH)NH2 (for pyrimidine) + rules['amidine'] = smarts('[N;D1;z1;x0:3]-[C;D3;z2;x2:1]=[N;D1:2]') + # urea/thiourea (for Biginelli) + rules['urea'] = smarts('[N;D1;z1;x0:1]-[C;D3;z2;x3:2](=[O:3])-[N;D1:4]') + rules['thiourea'] = smarts('[N;D1;z1;x0:1]-[C;D3;z2;x3:2](=[S;D1:3])-[N;D1:4]') + + # beta-arylethylamine (for Pictet-Spengler) + rules['beta_arylethylamine'] = smarts('[N;D1;z1;x0:1]-[C;z1:2]-[C;z1:3]-[C;a:4]:[C;a;D2:5]') + # 2-aminopyridine / 2-aminoazine (for GBB, imidazo[1,2-a]pyridine) + rules['aminopyridine'] = smarts('[N;D1;z1;x0:1]-[C;a:2]:[N;a;h0;D2:3]') + + # amino alcohol (for oxazoline formation): H2N-C-C-OH + rules['amino_alcohol'] = smarts('[N;D1;z1;x0:1]-[C;z1:2]-[C;z1:3]-[O;D1:4]') + + # hydroxamic acid: R-C(=O)-NH-OH + rules['hydroxamic_acid'] = smarts('[O;D1;z1;x1:1]-[N;D2;z1;x1:2]-[C;z2;x2:3]=[O:4]') + + # oxime: C=N-OH + rules['oxime'] = smarts('[O;D1;z1;x1:1]-[N;D2;z2;x1:2]=[C:3]') + + # O-alkylhydroxylamine: R-O-NH2 (for oxime ether formation) + rules['O_alkylhydroxylamine'] = smarts('[N;D1;z1;x1:1]-[O;D2;z1;x1:2]-[C:3]') + + # alpha-isocyano (for Van Leusen oxazole) + rules['tosyl_isocyanide'] = smarts('[C;-;D1:2]#[N;+;D2:1]-[C;D2,D3;z1;x2:3]-[S;D4;x2:100](=O)=O') + + # thioester (for Liebeskind-Srogl): R-C(=O)-S-R' + rules['thioester'] = smarts('[O;z2;x0:2]=[C;D3;x2;z2:1]-[S;D2;z1;x0:100]') + + # active methylene (for Knoevenagel): CH flanked by 2 EWGs + rules['active_methylene'] = smarts('[C;z1;D2,D3;x0:1](-[C;z2,z3;x1,x2:2])-[C;z2,z3;x1,x2:3]') + + # aniline with ortho C-H (for Doebner-Miller) + rules['aniline_ortho_ch'] = smarts('[N;D1;z1;x0:1]-[C;a:2]:[C;a;D2:3]') + + # ortho-haloaniline (for Larock indole) + rules['o_haloaniline'] = smarts('[N;D1;z1;x0:1]-[C;a:2]:[C;a:3]-[Cl,Br,I;D1:100]') + + return rules + + +rules = Proxy(_rules) + + +__all__ = ['rules'] diff --git a/chython/algorithms/groups/_oxidations.py b/chython/algorithms/groups/_oxidations.py new file mode 100644 index 00000000..cd200449 --- /dev/null +++ b/chython/algorithms/groups/_oxidations.py @@ -0,0 +1,64 @@ +# -*- coding: utf-8 -*- +# +# Copyright 2026 Ramil Nugmanov +# This file is part of chython. +# +# chython is free software; you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation; either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public License +# along with this program; if not, see . +# +from lazy_object_proxy import Proxy + + +def _make_reactor(rxn_name, fg_name, output_fg, product_smarts): + from ._functional import rules + from ...reactor import Reactor + from ... import smarts + + q = rules[fg_name] + product = smarts(product_smarts) + return rxn_name, fg_name, output_fg, Reactor((q,), (product,), + delete_atoms=True, one_shot=True, fix_broken_pyrroles=True) + + +def _rules(): + rules = [] + + # primary_alcohol → aldehyde (Swern, Dess-Martin, PCC) + rules.append(_make_reactor('alcohol_to_aldehyde', 'primary_alcohol', 'aldehyde', '[A:1]=[A:2]')) + + # secondary_alcohol → ketone (Dess-Martin, Jones, PCC) + rules.append(_make_reactor('alcohol_to_ketone', 'secondary_alcohol', 'ketone', '[A:1]=[A:2]')) + + # aldehyde → carboxylic_acid (Pinnick, Jones, KMnO4) + rules.append(_make_reactor('aldehyde_to_acid', 'aldehyde', 'carboxylic_acid', '[A:2]=[A:1]-[O:20]')) + + # alkene → 1,2-diol (Sharpless dihydroxylation, OsO4/KMnO4) + rules.append(_make_reactor('dihydroxylation', 'terminal_alkene', 'vicinal_diol', '[A:1](-[O:20])-[A:2](-[O:21])')) + rules.append(_make_reactor('dihydroxylation', 'alkene', 'vicinal_diol', '[A:1](-[O:20])-[A:2](-[O:21])')) + + # thioether → sulfoxide (mCPBA, H2O2, NaIO4) + rules.append(_make_reactor('thioether_to_sulfoxide', 'thioether', 'sulfoxide', '[A:1](=[O:20])(-[A:2])-[A:3]')) + + # thioether → sulfone (excess mCPBA, H2O2/AcOH, Oxone) + rules.append(_make_reactor('thioether_to_sulfone', 'thioether', 'sulfone', '[A:1](=[O:20])(=[O:21])(-[A:2])-[A:3]')) + + # sulfoxide → sulfone + rules.append(_make_reactor('sulfoxide_to_sulfone', 'sulfoxide', 'sulfone', '[A:1](=[A:2])(=[O:20])(-[A:3])-[A:4]')) + + return rules + + +rules = Proxy(_rules) + + +__all__ = ['rules'] diff --git a/chython/algorithms/groups/_protective.py b/chython/algorithms/groups/_protective.py new file mode 100644 index 00000000..84008d8d --- /dev/null +++ b/chython/algorithms/groups/_protective.py @@ -0,0 +1,892 @@ +# -*- coding: utf-8 -*- +# +# Copyright 2022-2026 Ramil Nugmanov +# This file is part of chython. +# +# chython is free software; you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation; either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public License +# along with this program; if not, see . +# +from lazy_object_proxy import Proxy + + +def _rules(): + from ... import smarts + + rules = {} + + rules['hydroxyl_thiocarbamate'] = ( + smarts('[O;D2:1]-;!@[C;x3;z2](=[S;D1])[N;D3;x0]([C;D1])[C;D1]'), + [1], # atoms to keep + [], # atoms to add (atom_number, atom_type, bond_type) + 'CC(C)OC(=S)N(C)C', # protected + 'CC(C)O', # cleaved + [] # optional decoys + ) + + rules['hydroxyl_fmoc'] = ( + smarts('[O;D2:1]-;!@[C;z2;x3](=O)[O;D2;x0]-[C;D2;x1;z1][C;D3;z1;x0;r5]1[C;a;r6]:2:[C;D2]:[C;D2]:[C;D2]:[C;D2]:[C;D3]:2-[C;a;r6]:3:[C;D2]:[C;D2]:[C;D2]:[C;D2]:C1:3'), + [1], + [], + 'CC(C)OC(=O)OCC1C2=CC=CC=C2C2=C1C=CC=C2', + 'CC(C)O', + [] + ) + + rules['hydroxyl_troc'] = ( + smarts('[O;D2:1]-;!@[C;z2;x3](=O)[O;D2;x0]-[C;D2][C;D4;x3]([Cl;D1])([Cl;D1])[Cl;D1]'), + [1], + [], + 'CC(C)OC(=O)OCC(Cl)(Cl)Cl', + 'CC(C)O', + [] + ) + + rules['hydroxyl_teoc'] = ( + smarts('[O;D2:1]-;!@[C;z2;x3](=O)[O;D2;x0]-[C;D2;z1;x1][C;D2;x1;z1][Si;D4;z1;x0]([C;D1])([C;D1])[C;D1]'), + [1], + [], + 'CC(C)OC(=O)OCC[Si](C)(C)C', + 'CC(C)O', + [] + ) + + rules['hydroxyl_alloc'] = ( + smarts('[O;D2:1]-;!@[C;z2;x3](=O)[O;D2;x0]-[C;D2;z1;x1][C;D2;x0;z2]=[C;D1]'), + [1], + [], + 'CC(C)OC(=O)OCC=C', + 'CC(C)O', + [] + ) + + rules['hydroxyl_tms'] = ( + smarts('[O;D2:1]-;!@[Si;D4;z1;x1]([C;D1])([C;D1])[C;D1]'), + [1], + [], + 'CC(C)O[Si](C)(C)C', + 'CC(C)O', + ['CC(C)O[SiH](C)C', 'CC(C)O[Si](C)(C)OC', 'CC(C)O[Si](C)(C)CC'] + ) + + rules['hydroxyl_tes'] = ( + smarts('[O;D2:1]-;!@[Si;D4;z1;x1]([C;D2;x1;z1][C;D1])([C;D2;x1;z1][C;D1])[C;D2;x1;z1][C;D1]'), + [1], + [], + 'CC(C)O[Si](CC)(CC)CC', + 'CC(C)O', + ['CC(C)O[SiH](C)C', 'CC(C)O[Si](C)(C)OC'] + ) + + rules['hydroxyl_tbs'] = ( + smarts('[O;D2:1]-;!@[Si;D4;z1;x1]([C;D1])([C;D1])[C;D4;x1;z1]([C;D1])([C;D1])[C;D1]'), + [1], + [], + 'CC(C)O[Si](C)(C)C(C)(C)C', + 'CC(C)O', + ['CC(C)O[SiH](C)C', 'CC(C)O[Si](C)(C)OC'] + ) + + rules['hydroxyl_tips'] = ( + smarts('[O;D2:1]-;!@[Si;D4;z1;x1]([C;D3;z1;x1]([C;D1])[C;D1])([C;D3;z1;x1]([C;D1])[C;D1])[C;D3;z1;x1]([C;D1])[C;D1]'), + [1], + [], + 'CC(C)O[Si](C(C)C)(C(C)C)C(C)C', + 'CC(C)O', + ['CC(C)O[SiH](C)C', 'CC(C)O[Si](C)(C)OC'] + ) + + rules['hydroxyl_tbdps'] = ( + smarts('[O;D2:1]-;!@[Si;D4;z1;x1]([C;a;r6]:1:[C;D2]:[C;D2]:[C;D2]:[C;D2]:[C;D2]:1)([C;a;r6]:1:[C;D2]:[C;D2]:[C;D2]:[C;D2]:[C;D2]:1)[C;D4;x1;z1]([C;D1])([C;D1])[C;D1]'), + [1], + [], + 'CC(C)O[Si](c1ccccc1)(c1ccccc1)C(C)(C)C', + 'CC(C)O', + ['CC(C)O[SiH](C)C', 'CC(C)O[Si](c1ccc(C)cc1)(c1ccccc1)C(C)(C)C'] + ) + + rules['hydroxyl_o_nitrobenzyl'] = ( + smarts('[O;D2:1]-;!@[C;D2;z1;x1]-[C;a;r6]:1:[C;D3;x1]([N+](=O)[O-]):[C;D2]:[C;D2]:[C;D2]:[C;D2]:1'), + [1], + [], + 'CC(C)OCc1c(N(=O)=O)cccc1', + 'CC(C)O', + ['CC(C)OC(OC)c1c(N(=O)=O)cccc1'] + ) + + rules['hydroxyl_methoxy_benzyl'] = ( + smarts('[O;D2:1]-;!@[C;D2;z1;x1]-[C;a;r6]:1:[C;D2]:[C;D2]:[C;D3;x1]([O;D2;x0][C;D1]):[C;D2]:[C;D2]:1'), + [1], + [], + 'CC(C)OCc1ccc(OC)cc1', + 'CC(C)O', + ['CC(C)OCc1ccc(OCC)cc1', 'CC(C)OCc1cc(OC)ccc1'] + ) + + rules['hydroxyl_dimethoxybenzyl'] = ( + smarts('[O;D2:1]-;!@[C;D2;z1;x1]-[C;a;r6]:1:[C;D3;x1]([O;D2;x0][C;D1]):[C;D2]:[C;D3;x1]([O;D2;x0][C;D1]):[C;D2]:[C;D2]:1'), + [1], + [], + 'CC(C)OCc1c(OC)cc(OC)cc1', + 'CC(C)O', + [] + ) + + rules['hydroxyl_naphthyl'] = ( + smarts('[O;D2:1]-;!@[C;D2;z1;x1]-[C;a;r6]:1:[C;D2]:[C;D3]:2:[C;D2]:[C;D2]:[C;D2]:[C;D2]:[C;D3]:2:[C;D2]:[C;D2]:1'), + [1], + [], + 'CC(C)OCC1=CC2=C(C=CC=C2)C=C1', + 'CC(C)O', + ['CC(C)OCc1ccccc1'] + ) + + rules['hydroxyl_bom'] = ( + smarts('[O;D2:1]-;!@[C;D2;x2;z1][O;D2;x0][C;D2;z1;x1]-[C;a;r6]:1:[C;D2]:[C;D2]:[C;D2]:[C;D2]:[C;D2]:1'), + [1], + [], + 'CC(C)OCOCc1ccccc1', + 'CC(C)O', + [] + ) + + rules['hydroxyl_piv'] = ( + smarts('[O;D2:1]-;!@[C;z2;x2](=O)-[C;D4;x0;z1]([C;D1])([C;D1])[C;D1]'), + [1], + [], + 'CC(C)OC(=O)C(C)(C)C', + 'CC(C)O', + [] + ) + + rules['hydroxyl_methoxy_benzoate'] = ( + smarts('[O;D2:1]-;!@[C;z2;x2](=O)-[C;a;r6]:1:[C;D2]:[C;D2]:[C;D3;x1]([O;D2;x0][C;D1]):[C;D2]:[C;D2]:1'), + [1], + [], + 'COC1=CC=C(C=C1)C(=O)OC(C)C', + 'CC(C)O', + ['C1=CC=C(C=C1)C(=O)OC(C)C'] + ) + + rules['hydroxyl_benzoate'] = ( + smarts('[O;D2:1]-;!@[C;z2;x2](=O)-[C;a;r6]:1:[C;D2]:[C;D2]:[C;D2]:[C;D2]:[C;D2]:1'), + [1], + [], + 'C1=CC=C(C=C1)C(=O)OC(C)C', + 'CC(C)O', + ['COC1=CC=C(C=C1)C(=O)OC(C)C'] + ) + + rules['hydroxyl_tfa'] = ( + smarts('[O;D2:1]-;!@[C;z2;x2](=O)-[C;D4;z1;x3](F)(F)F'), + [1], + [], + 'CC(C)OC(=O)C(F)(F)F', + 'CC(C)O', + [] + ) + + rules['hydroxyl_mom'] = ( + smarts('[O;D2:1]-;!@[C;D2;x2;z1][O;D2;x0][C;D1]'), + [1], + [], + 'CC(C)OCOC', + 'CC(C)O', + ['CC(C)OC(C)OC'] + ) + + rules['hydroxyl_mem'] = ( + smarts('[O;D2:1]-;!@[C;D2;x2;z1][O;D2;x0][C;D2;z1;x1][C;D2;z1;x1][O;D2;x0][C;D1]'), + [1], + [], + 'COCCOCOC(C)C', + 'CC(C)O', + [] + ) + + rules['hydroxyl_thp'] = ( + smarts('[O;D2:1]-;!@[C;D3;x2;z1;r6]1[O;D2][C;D2][C;D2][C;D2][C;D2]1'), + [1], + [], + 'CC(C)OC1CCCCO1', + 'CC(C)O', + [] + ) + + rules['hydroxyl_ee'] = ( + smarts('[O;D2:1]-;!@[C;D3;x2;z1]([O;D2;x0][C;D2;x1;z1][C;D1])[C;D1]'), + [1], + [], + 'CC(C)OC(C)OCC', + 'CC(C)O', + ['CC(C)OC(CC)OCC'] + ) + + rules['hydroxyl_mop'] = ( + smarts('[O;D2:1]-;!@[C;D4;x2;z1]([O;D2;x0][C;D1])([C;D1])[C;D1]'), + [1], + [], + 'CC(C)OC(C)(C)OC', + 'CC(C)O', + [] + ) + + rules['hydroxyl_sem'] = ( + smarts('[O;D2:1]-;!@[C;D2;x2;z1][O;D2;x0][C;D2;z1;x1][C;D2;z1;x1][Si;D4;z1;x0]([C;D1])([C;D1])[C;D1]'), + [1], + [], + 'CC(C)OCOCC[Si](C)(C)C', + 'CC(C)O', + [] + ) + + rules['hydroxyl_tritil'] = ( + smarts('[O;D2:1]-;!@[C;D4;z1;x1](-[C;a;r6]:1:[C;D2]:[C;D2]:[C;D2]:[C;D2]:[C;D2]:1)(-[C;a;r6]:1:[C;D2]:[C;D2]:[C;D2]:[C;D2]:[C;D2]:1)-[C;a;r6]:1:[C;D2]:[C;D2]:[C;D2]:[C;D2]:[C;D2]:1'), + [1], + [], + 'CC(C)OC(c1ccccc1)(c1ccccc1)c1ccccc1', + 'CC(C)O', + ['COc1ccc(cc1)C(OC(C)C)(c1ccccc1)c1ccc(OC)cc1'] + ) + + rules['hydroxyl_dimetoxy_tritil'] = ( + smarts('[O;D2:1]-;!@[C;D4;z1;x1](-[C;a;r6]:1:[C;D2]:[C;D2]:[C;D3;x1]([O;D2;x0][C;D1]):[C;D2]:[C;D2]:1)(-[C;a;r6]:1:[C;D2]:[C;D2]:[C;D3;x1]([O;D2;x0][C;D1]):[C;D2]:[C;D2]:1)-[C;a;r6]:1:[C;D2]:[C;D2]:[C;D2]:[C;D2]:[C;D2]:1'), + [1], + [], + 'COc1ccc(cc1)C(OC(C)C)(c1ccccc1)c1ccc(OC)cc1', + 'CC(C)O', + ['CC(C)OC(c1ccccc1)(c1ccccc1)c1ccccc1'] + ) + + rules['hydroxyl_chloro_tritil'] = ( + smarts('[O;D2:1]-;!@[C;D4;z1;x1](-[C;a;r6]:1:[C;D3;x1]([Cl;D1]):[C;D2]:[C;D2]:[C;D2]:[C;D2]:1)(-[C;a;r6]:1:[C;D2]:[C;D2]:[C;D2]:[C;D2]:[C;D2]:1)-[C;a;r6]:1:[C;D2]:[C;D2]:[C;D2]:[C;D2]:[C;D2]:1'), + [1], + [], + 'CC(C)OC(c1c(Cl)cccc1)(c1ccccc1)c1ccccc1', + 'CC(C)O', + [] + ) + + rules['hydroxyl_mmt'] = ( + smarts('[O;D2:1]-;!@[C;D4;z1;x1](-[C;a;r6]:1:[C;D2]:[C;D2]:[C;D3;x1]([O;D2;x0][C;D1]):[C;D2]:[C;D2]:1)(-[C;a;r6]:1:[C;D2]:[C;D2]:[C;D2]:[C;D2]:[C;D2]:1)-[C;a;r6]:1:[C;D2]:[C;D2]:[C;D2]:[C;D2]:[C;D2]:1'), + [1], + [], + 'c1ccc(cc1)C(OC(C)C)(c1ccccc1)c1ccc(OC)cc1', + 'CC(C)O', + ['CC(C)OC(c1ccccc1)(c1ccccc1)c1ccccc1'] + ) + + rules['hydroxyl_mpe'] = ( + smarts('[O;D2:1]-;!@[C;D4;x1;z1]([C;D1])([C;D2;x0;z1][C;D1])[C;D2;x0;z1][C;D1]'), + [1], + [], + 'CC(C)OC(CC)(CC)C', + 'CC(C)O', + [] + ) + + rules['hydroxyl_trifluoroethyl'] = ( + smarts('[O;D2:1]-;!@[C;D2;x1;z1][C;D4;x3;z1](F)(F)F'), + [1], + [], + 'CC(C)OCC(F)(F)F', + 'CC(C)O', + [] + ) + + rules['hydroxyl_dmab_enamine'] = ( + smarts('[O;D2:1]-;!@[C;D2;x1;z1]-[C;a;r6]:1:[C;D2]:[C;D2]:[C;D3;x1](:[C;D2]:[C;D2]:1)-[N;D2;x0;z1]-[C;z2;x1;D3]([C;D2;x0;z1][C;D3;x0;z1]([C;D1])[C;D1])=[C;D3;r6;x0;z2]1[C;x1;z2;D3](=O)[C;D2][C;D4;x0;z1]([C;D1])([C;D1])[C;D2][C;D3;x1;z2]1=O'), + [1], + [], + 'CC(C)CC(NC1=CC=C(COC(C)=O)C=C1)=C1C(=O)CC(C)(C)CC1=O', + 'CC(O)=O', + [] + ) + + rules['hydroxyl_dmab_imine'] = ( + smarts('[O;D2:1]-;!@[C;D2;x1;z1]-[C;a;r6]:1:[C;D2]:[C;D2]:[C;D3;x1](:[C;D2]:[C;D2]:1)-[N;D2;x0;z2]=[C;x1;D3]([C;D2;x0;z1][C;D3;x0;z1]([C;D1])[C;D1])-[C;D3;r6;x0;z1]1[C;x1;z2;D3](=O)[C;D2][C;D4;x0;z1]([C;D1])([C;D1])[C;D2][C;D3;x1;z2]1=O'), + [1], + [], + 'CC(C)CC(=NC1=CC=C(COC(C)=O)C=C1)C1C(=O)CC(C)(C)CC1=O', + 'CC(O)=O', + [] + ) + + rules['diol_12_acetone'] = ( + smarts('[O;D2;x0;r5:1]1-;@[C;D4;x2;z1]([C;D1])([C;D1])-[O;D2;x0:2][C:3]!#[C:4]1'), + [1, 2, 3, 4], + [], + 'CC1COC(C)(C)O1', + 'CC(O)CO', + [] + ) + + rules['diol_13_acetone'] = ( + smarts('[O;D2;x0;r6:1]1-;@[C;D4;x2;z1]([C;D1])([C;D1])-[O;D2;x0:2][C:3][C:4]!#[C:5]1'), + [1, 2, 3, 4, 5], + [], + 'CC1CCOC(C)(C)O1', + 'CC(O)CCO', + [] + ) + + rules['hydroxyl_amine_acetone'] = ( + smarts('[O;D2;x0;r5:1]1-;@[C;D4;x2;z1]([C;D1])([C;D1])-[N;z1:2][C:3]!#[C:4]1'), + [1, 2, 3, 4], + [], + 'CC1CN(C(C)=O)C(C)(C)O1', + 'CC(O)CNC(C)=O', + [] + ) + + rules['diol_12_formalin'] = ( + smarts('[O;D2;x0;r5:1]1-;@[C;D2;x2;z1]-[O;D2;x0:2][C:3]!#[C:4]1'), + [1, 2, 3, 4], + [], + 'CC1COCO1', + 'CC(O)CO', + [] + ) + + rules['diol_13_formalin'] = ( + smarts('[O;D2;x0;r6:1]1-;@[C;D2;x2;z1]-[O;D2;x0:2][C:3][C:4]!#[C:5]1'), + [1, 2, 3, 4, 5], + [], + 'CC1CCOCO1', + 'CC(O)CCO', + [] + ) + + rules['diol_12_cyclopentanone'] = ( + smarts('[O;D2;x0;r5:1]1-;@[C;D4;x2;z1]2([C;D2;x0;z1][C;D2;x0;z1][C;D2;x0;z1][C;D2;x0;z1]2)-[O;D2;x0:2][C:3]!#[C:4]1'), + [1, 2, 3, 4], + [], + 'CC1COC2(CCCC2)O1', + 'CC(O)CO', + [] + ) + + rules['diol_13_cyclopentanone'] = ( + smarts('[O;D2;x0;r6:1]1-;@[C;D4;x2;z1]2([C;D2;x0;z1][C;D2;x0;z1][C;D2;x0;z1][C;D2;x0;z1]2)-[O;D2;x0:2][C:3][C:4]!#[C:5]1'), + [1, 2, 3, 4, 5], + [], + 'CC1CCOC2(CCCC2)O1', + 'CC(O)CCO', + [] + ) + + rules['diol_12_cyclohexanone'] = ( + smarts('[O;D2;x0;r5:1]1-;@[C;D4;x2;z1]2([C;D2;x0;z1][C;D2;x0;z1][C;D2;x0;z1][C;D2;x0;z1][C;D2;x0;z1]2)-[O;D2;x0:2][C:3]!#[C:4]1'), + [1, 2, 3, 4], + [], + 'CC1COC2(CCCCC2)O1', + 'CC(O)CO', + [] + ) + + rules['diol_13_cyclohexanone'] = ( + smarts('[O;D2;x0;r6:1]1-;@[C;D4;x2;z1]2([C;D2;x0;z1][C;D2;x0;z1][C;D2;x0;z1][C;D2;x0;z1][C;D2;x0;z1]2)-[O;D2;x0:2][C:3][C:4]!#[C:5]1'), + [1, 2, 3, 4, 5], + [], + 'CC1CCOC2(CCCCC2)O1', + 'CC(O)CCO', + [] + ) + + rules['diol_12_diacetal'] = ( + smarts('[O;D2;x0;r6:1]1-;@[C;D4;x2;z1]([O;D2;x0][C;D1])([C;D1])[C;D4;x2;z1]([O;D2;x0][C;D1])([C;D1])-[O;D2;x0:2][C:3]!#[C:4]1'), + [1, 2, 3, 4], + [], + 'COC1(C)OCC(C)OC1(C)OC', + 'CC(O)CO', + [] + ) + + rules['diol_13_diacetal'] = ( + smarts('[O;D2;x0;r7:1]1-;@[C;D4;x2;z1]([O;D2;x0][C;D1])([C;D1])[C;D4;x2;z1]([O;D2;x0][C;D1])([C;D1])-[O;D2;x0:2][C:3][C:4]!#[C:5]1'), + [1, 2, 3, 4, 5], + [], + 'COC1(C)OCCC(C)OC1(C)OC', + 'CC(O)CCO', + [] + ) + + rules['diol_12_benzylidene'] = ( + smarts('[O;D2;x0;r5:1]1-;@[C;D3;x2;z1]([C;a;r6]:2:[C;D2]:[C;D2]:[C;D2]:[C;D2]:[C;D2]:2)-[O;D2;x0:2][C:3]!#[C:4]1'), + [1, 2, 3, 4], + [], + 'CC1COC(O1)c1ccccc1', + 'CC(O)CO', + [] + ) + + rules['diol_13_benzylidene'] = ( + smarts('[O;D2;x0;r6:1]1-;@[C;D3;x2;z1]([C;a;r6]:2:[C;D2]:[C;D2]:[C;D2]:[C;D2]:[C;D2]:2)-[O;D2;x0:2][C:3][C:4]!#[C:5]1'), + [1, 2, 3, 4, 5], + [], + 'CC1CCOC(O1)c1ccccc1', + 'CC(O)CCO', + [] + ) + + rules['carbonyl_dithiolane'] = ( + smarts('[C;D3,D4;z1;x2;r5:1]1[S;D2;x0;z1][C;D2;x1;z1][C;D2;x1;z1][S;D2;x0;z1]1'), + [1], + [(1, 'O', 2)], + 'CC1SCCS1', + 'CC=O', + [] + ) + + rules['carbonyl_dithiane'] = ( + smarts('[C;D3,D4;z1;x2;r6:1]1[S;D2;x0;z1][C;D2;x1;z1][C;D2;x0;z1][C;D2;x1;z1][S;D2;x0;z1]1'), + [1], + [(1, 'O', 2)], + 'CC1SCCCS1', + 'CC=O', + [] + ) + + rules['carbonyl_dimethylsulfide'] = ( + smarts('[C;D3,D4;z1;x2:1](-;!@[S;D2][C;D1])-;!@[S;D2][C;D1]'), + [1], + [(1, 'O', 2)], + 'CSC(CC)SC', + 'CCC=O', + [] + ) + + rules['carbonyl_dioxolane'] = ( + smarts('[C;D3,D4;z1;x2;r5:1]1[O;D2;x0][C;D2;x1;z1][C;D2;x1;z1][O;D2;x0]1'), + [1], + [(1, 'O', 2)], + 'CC1OCCO1', + 'CC=O', + [] + ) + + rules['carbonyl_dioxane'] = ( + smarts('[C;D3,D4;z1;x2;r6:1]1[O;D2;x0][C;D2;x1;z1][C;D2;x0;z1][C;D2;x1;z1][O;D2;x0]1'), + [1], + [(1, 'O', 2)], + 'CC1OCCCO1', + 'CC=O', + [] + ) + + rules['carbonyl_dimethoxy'] = ( + smarts('[C;D3,D4;z1;x2:1](-;!@[O;D2;x0][C;D1])-;!@[O;D2;x0][C;D1]'), + [1], + [(1, 'O', 2)], + 'COC(C)OC', + 'CC=O', + [] + ) + + rules['carboxyl_trioxabicyclooctane'] = ( + smarts('[C;D4;x3;r6:1]12-;@[O;D2][C;D2;x1;z1][C;D4;x0;z1]([C;D1])([C;D2;x1;z1][O;D2]1)[C;D2;x1;z1][O;D2]2'), + [1], + [(1, 'O', 2), (1, 'O', 1)], + 'CC(C)C12OCC(C)(CO1)CO2', + 'CC(C)C(O)=O', + ['CC(C)C12OCC(CC)(CO1)CO2', 'CC(C)C12OC(C)C(C)(CO1)CO2'] + ) + + rules['amine_methylcarbamate'] = ( + smarts('[N;D2,D3:1]-;!@[C;z2;x3](=O)[O;D2;x0][C;D1]'), + [1], + [], + 'c1ccccc1NC(=O)OC', + 'c1ccccc1N', + ['c1ccccc1NC(=O)OCC'] + ) + + rules['amine_ethylcarbamate'] = ( + smarts('[N;D2,D3:1]-;!@[C;z2;x3](=O)[O;D2;x0][C;D2;x1;z1][C;D1]'), + [1], + [], + 'c1ccccc1NC(=O)OCC', + 'c1ccccc1N', + [] + ) + + rules['amine_alloc'] = ( + smarts('[N;D2,D3:1]-;!@[C;z2;x3](=O)[O;D2;x0]-[C;D2;z1;x1][C;D2;x0;z2]=[C;D1]'), + [1], + [], + 'c1ccccc1NC(=O)OCC=C', + 'c1ccccc1N', + ['c1ccccc1NC(=O)OCC=CC'] + ) + + rules['amine_teoc'] = ( + smarts('[N;D2,D3:1]-;!@[C;z2;x3](=O)[O;D2;x0]-[C;D2;z1;x1][C;D2;x1;z1][Si;D4;z1;x0]([C;D1])([C;D1])[C;D1]'), + [1], + [], + 'c1ccccc1NC(=O)OCC[Si](C)(C)C', + 'c1ccccc1N', + [] + ) + + rules['amine_sem'] = ( + smarts('[N;D2,D3:1]-;!@[C;D2;x2;z1][O;D2;x0]-[C;D2;z1;x1][C;D2;x1;z1][Si;D4;z1;x0]([C;D1])([C;D1])[C;D1]'), + [1], + [], + 'CN(C)COCC[Si](C)(C)C', + 'CNC', + [] + ) + + rules['amine_troc'] = ( + smarts('[N;D2,D3:1]-;!@[C;z2;x3](=O)[O;D2;x0]-[C;D2][C;D4;x3]([Cl;D1])([Cl;D1])[Cl;D1]'), + [1], + [], + 'c1ccccc1NC(=O)OCC(Cl)(Cl)Cl', + 'c1ccccc1N', + ['c1ccccc1NC(=O)OC(C)C(Cl)(Cl)Cl'] + ) + + rules['amine_cbz'] = ( + smarts('[N;D2,D3:1]-;!@[C;z2;x3](=O)-[O;D2;x0][C;D2;x1;z1][C;a;r6]:1:[C;D2]:[C;D2]:[C;D2]:[C;D2]:[C;D2]:1'), + [1], + [], + 'c1ccccc1NC(=O)OCc2ccccc2', + 'c1ccccc1N', + ['c1ccccc1NC(=O)OC(C)c2ccccc2'] + ) + + rules['amine_chloro_cbz'] = ( + smarts('[N;D2,D3:1]-;!@[C;z2;x3](=O)-[O;D2;x0][C;D2;x1;z1][C;a;r6]:1:[C;D3;x1]([Cl;D1]):[C;D2]:[C;D2]:[C;D2]:[C;D2]:1'), + [1], + [], + 'Clc1ccccc1COC(=O)Nc1ccccc1', + 'c1ccccc1N', + ['c1ccccc1NC(=O)OC(C)c2ccccc2'] + ) + + rules['amine_phenylsulfonyl'] = ( + smarts('[N;D2,D3:1]-;!@[S;D4;x3](=O)(=O)-[C;a;r6]:1:[C;D2]:[C;D2]:[C;D2]:[C;D2]:[C;D2]:1'), + [1], + [], + 'c1ccccc1S(=O)(=O)Nc1ccccc1', + 'c1ccccc1N', + [] + ) + + rules['amine_tosyl'] = ( + smarts('[N;D2,D3:1]-;!@[S;D4;x3](=O)(=O)-[C;a;r6]:1:[C;D2]:[C;D2]:[C;D3;x0]([C;D1]):[C;D2]:[C;D2]:1'), + [1], + [], + 'Cc1ccc(cc1)S(=O)(=O)Nc1ccccc1', + 'c1ccccc1N', + [] + ) + + rules['amine_nosyl'] = ( + smarts('[N;D2,D3:1]-;!@[S;D4;x3](=O)(=O)-[C;a;r6]:1:[C;D3;x1]([N+](=O)[O-]):[C;D2]:[C;D2]:[C;D2]:[C;D2]:1'), + [1], + [], + '[O-][N+](=O)c1ccccc1S(=O)(=O)Nc1ccccc1', + 'c1ccccc1N', + [] + ) + + rules['amine_boc'] = ( + smarts('[N;D2,D3:1]-;!@[C;z2;x3](=O)-[O;D2;x0]-[C;D4;x1;z1]([C;D1])([C;D1])[C;D1]'), + [1], + [], + 'c1ccccc1NC(=O)OC(C)(C)C', + 'c1ccccc1N', + [] + ) + + rules['amine_tfa'] = ( + smarts('[N;D2,D3:1]-;!@[C;z2;x2](=O)-[C;D4;z1;x3](F)(F)F'), + [1], + [], + 'CNC(=O)C(F)(F)F', + 'CN', + [] + ) + + rules['amine_fmoc'] = ( + smarts('[N;D2,D3:1]-;!@[C;z2;x3](=O)[O;D2;x0]-[C;D2;x1;z1][C;D3;z1;x0;r5]1[C;a;r6]:2:[C;D2]:[C;D2]:[C;D2]:[C;D2]:[C;D3]:2-[C;a;r6]:3:[C;D2]:[C;D2]:[C;D2]:[C;D2]:C1:3'), + [1], + [], + 'O=C(Nc1ccccc1)OCC1c2ccccc2-c2ccccc12', + 'c1ccccc1N', + [] + ) + + rules['amine_pbf'] = ( + smarts('[N;D2,D3:1]-;!@[S;D4;x3](=O)(=O)-[C;a;r6]:1:[C;D3;x0]([C;D1]):[C;D3;x0]([C;D1]):[C;D3;x1]:2-[O;D2;x0;r5][C;D4;x1]([C;D1])([C;D1])[C;D2;x0;z1][C;D3]:2:[C;D3;x0]([C;D1]):1'), + [1], + [], + 'CN(C)S(=O)(=O)c1c(C)c2CC(C)(C)Oc2c(C)c1C', + 'CNC', + [] + ) + + rules['amine_mtr'] = ( + smarts('[N;D2,D3:1]-;!@[S;D4;x3](=O)(=O)-[C;a;r6]:1:[C;D3;x0]([C;D1]):[C;D3;x0]([C;D1]):[C;D3;x1](-;!@[O;D2;x0][C;D1]):[C;D2]:[C;D3;x0]([C;D1]):1'), + [1], + [], + 'COC1=C(C)C(C)=C(C(C)=C1)S(=O)(=O)N(C)C', + 'CNC', + [] + ) + + rules['amine_dde_enamine'] = ( + smarts('[N;D2,D3:1]-;!@[C;z2;x1;D3]([C;D1])=[C;D3;r6;x0;z2]1[C;x1;z2;D3](=O)[C;D2][C;D4;x0;z1]([C;D1])([C;D1])[C;D2][C;D3;x1;z2]1=O'), + [1], + [], + 'CC(C)NC(C)=C1C(=O)CC(C)(C)CC1=O', + 'CC(C)N', + [] + ) + + rules['amine_dde_imine'] = ( + smarts('[N;D2:1]=;!@[C;x1;D3]([C;D1])-[C;D3;r6;x0;z1]1[C;x1;z2;D3](=O)[C;D2][C;D4;x0;z1]([C;D1])([C;D1])[C;D2][C;D3;x1;z2]1=O'), + [1], + [], + 'CC(C)N=C(C)C1C(=O)CC(C)(C)CC1=O', + 'CC(C)N', + [] + ) + + rules['amine_ivdde_enamine'] = ( + smarts('[N;D2,D3:1]-;!@[C;z2;x1;D3]([C;D2;x0;z1][C;D3;x0;z1]([C;D1])[C;D1])=[C;D3;r6;x0;z2]1[C;x1;z2;D3](=O)[C;D2][C;D4;x0;z1]([C;D1])([C;D1])[C;D2][C;D3;x1;z2]1=O'), + [1], + [], + 'CC(C)CC(NC(C)C)=C1C(=O)CC(C)(C)CC1=O', + 'CC(C)N', + [] + ) + + rules['amine_ivdde_imine'] = ( + smarts('[N;D2:1]=;!@[C;x1;D3]([C;D2;x0;z1][C;D3;x0;z1]([C;D1])[C;D1])-[C;D3;r6;x0;z1]1[C;x1;z2;D3](=O)[C;D2][C;D4;x0;z1]([C;D1])([C;D1])[C;D2][C;D3;x1;z2]1=O'), + [1], + [], + 'CC(C)CC(=NC(C)C)C1C(=O)CC(C)(C)CC1=O', + 'CC(C)N', + [] + ) + + rules['amine_benzyl'] = ( + smarts('[N;D2,D3:1]-;!@[C;D2;z1;x1]-[C;a;r6]:1:[C;D2]:[C;D2]:[C;D2]:[C;D2]:[C;D2]:1'), + [1], + [], + 'CC(C)NCc1ccccc1', + 'CC(C)N', + [] + ) + + rules['amine_methoxy_benzyl'] = ( + smarts('[N;D2,D3:1]-;!@[C;D2;z1;x1]-[C;a;r6]:1:[C;D2]:[C;D2]:[C;D3;x1]([O;D2;x0][C;D1]):[C;D2]:[C;D2]:1'), + [1], + [], + 'CC(C)NCc1ccc(OC)cc1', + 'CC(C)N', + [] + ) + + rules['amine_dimethoxybenzyl'] = ( + smarts('[N;D2,D3:1]-;!@[C;D2;z1;x1]-[C;a;r6]:1:[C;D3;x1]([O;D2;x0][C;D1]):[C;D2]:[C;D3;x1]([O;D2;x0][C;D1]):[C;D2]:[C;D2]:1'), + [1], + [], + 'CC(C)NCc1c(OC)cc(OC)cc1', + 'CC(C)N', + [] + ) + + rules['amine_mtt'] = ( + smarts('[N;D2,D3:1]-;!@[C;D4;z1;x1](-[C;a;r6]:1:[C;D2]:[C;D2]:[C;D3;x0]([C;D1]):[C;D2]:[C;D2]:1)(-[C;a;r6]:1:[C;D2]:[C;D2]:[C;D2]:[C;D2]:[C;D2]:1)-[C;a;r6]:1:[C;D2]:[C;D2]:[C;D2]:[C;D2]:[C;D2]:1'), + [1], + [], + 'CC(C)NC(c1ccccc1)(c1ccccc1)c1ccc(C)cc1', + 'CC(C)N', + [] + ) + + rules['amine_bhoc'] = ( + smarts('[N;D2,D3:1]-;!@[C;z2;x3](=O)[O;D2;x0]-[C;D3;z1;x1](-[C;a;r6]:1:[C;D2]:[C;D2]:[C;D2]:[C;D2]:[C;D2]:1)-[C;a;r6]:1:[C;D2]:[C;D2]:[C;D2]:[C;D2]:[C;D2]:1'), + [1], + [], + 'CNC(=O)OC(c1ccccc1)c1ccccc1', + 'CN', + [] + ) + + rules['amine_tritil'] = ( + smarts('[N;D2,D3:1]-;!@[C;D4;z1;x1](-[C;a;r6]:1:[C;D2]:[C;D2]:[C;D2]:[C;D2]:[C;D2]:1)(-[C;a;r6]:1:[C;D2]:[C;D2]:[C;D2]:[C;D2]:[C;D2]:1)-[C;a;r6]:1:[C;D2]:[C;D2]:[C;D2]:[C;D2]:[C;D2]:1'), + [1], + [], + 'CC(C)NC(c1ccccc1)(c1ccccc1)c1ccccc1', + 'CC(C)N', + [] + ) + + rules['amine_chloro_tritil'] = ( + smarts('[N;D2,D3:1]-;!@[C;D4;z1;x1](-[C;a;r6]:1:[C;D3;x1]([Cl;D1]):[C;D2]:[C;D2]:[C;D2]:[C;D2]:1)(-[C;a;r6]:1:[C;D2]:[C;D2]:[C;D2]:[C;D2]:[C;D2]:1)-[C;a;r6]:1:[C;D2]:[C;D2]:[C;D2]:[C;D2]:[C;D2]:1'), + [1], + [], + 'CC(C)NC(c1c(Cl)cccc1)(c1ccccc1)c1ccccc1', + 'CC(C)N', + [] + ) + + rules['thiol_tritil'] = ( + smarts('[S;D2;x0;z1:1]-;!@[C;D4;z1;x1](-[C;a;r6]:1:[C;D2]:[C;D2]:[C;D2]:[C;D2]:[C;D2]:1)(-[C;a;r6]:1:[C;D2]:[C;D2]:[C;D2]:[C;D2]:[C;D2]:1)-[C;a;r6]:1:[C;D2]:[C;D2]:[C;D2]:[C;D2]:[C;D2]:1'), + [1], + [], + 'CC(C)SC(c1ccccc1)(c1ccccc1)c1ccccc1', + 'CC(C)S', + [] + ) + + rules['thiol_mmt'] = ( + smarts('[S;D2;x0;z1:1]-;!@[C;D4;z1;x1](-[C;a;r6]:1:[C;D2]:[C;D2]:[C;D3;x1]([O;D2;x0][C;D1]):[C;D2]:[C;D2]:1)(-[C;a;r6]:1:[C;D2]:[C;D2]:[C;D2]:[C;D2]:[C;D2]:1)-[C;a;r6]:1:[C;D2]:[C;D2]:[C;D2]:[C;D2]:[C;D2]:1'), + [1], + [], + 'c1ccc(cc1)C(SC(C)C)(c1ccccc1)c1ccc(OC)cc1', + 'CC(C)S', + [] + ) + + rules['thiol_dimetoxy_tritil'] = ( + smarts('[S;D2;x0;z1:1]-;!@[C;D4;z1;x1](-[C;a;r6]:1:[C;D2]:[C;D2]:[C;D3;x1]([O;D2;x0][C;D1]):[C;D2]:[C;D2]:1)(-[C;a;r6]:1:[C;D2]:[C;D2]:[C;D3;x1]([O;D2;x0][C;D1]):[C;D2]:[C;D2]:1)-[C;a;r6]:1:[C;D2]:[C;D2]:[C;D2]:[C;D2]:[C;D2]:1'), + [1], + [], + 'COc1ccc(cc1)C(SC(C)C)(c1ccccc1)c1ccc(OC)cc1', + 'CC(C)S', + [] + ) + + rules['thiol_chloro_tritil'] = ( + smarts('[S;D2;x0;z1:1]-;!@[C;D4;z1;x1](-[C;a;r6]:1:[C;D3;x1]([Cl;D1]):[C;D2]:[C;D2]:[C;D2]:[C;D2]:1)(-[C;a;r6]:1:[C;D2]:[C;D2]:[C;D2]:[C;D2]:[C;D2]:1)-[C;a;r6]:1:[C;D2]:[C;D2]:[C;D2]:[C;D2]:[C;D2]:1'), + [1], + [], + 'CC(C)SC(c1c(Cl)cccc1)(c1ccccc1)c1ccccc1', + 'CC(C)S', + [] + ) + + rules['thiol_benzyl'] = ( + smarts('[S;D2;x0;z1:1]-;!@[C;D2;z1;x1]-[C;a;r6]:1:[C;D2]:[C;D2]:[C;D2]:[C;D2]:[C;D2]:1'), + [1], + [], + 'CC(C)SCc1ccccc1', + 'CC(C)S', + [] + ) + + rules['thiol_tbu'] = ( + smarts('[S;D2;x0;z1:1]-;!@[C;D4;x1;z1]([C;D1])([C;D1])[C;D1]'), + [1], + [], + 'CC(C)SC(C)(C)C', + 'CC(C)S', + [] + ) + + rules['thiol_stbu'] = ( + smarts('[S;D2;x1;z1:1]-;!@[S;D2;z1;x1]-[C;D4;x1;z1]([C;D1])([C;D1])[C;D1]'), + [1], + [], + 'CC(C)SSC(C)(C)C', + 'CC(C)S', + [] + ) + + rules['thiol_strimethoxyphenyl'] = ( + smarts('[S;D2;x1;z1:1]-;!@[S;D2;z1;x1]-[C;a;r6]:1:[C;D3;x1]([O;D2;x0][C;D1]):[C;D2]:[C;D3;x1]([O;D2;x0][C;D1]):[C;D2]:[C;D3;x1]:1[O;D2;x0][C;D1]'), + [1], + [], + 'COc1cc(OC)c(SSC(C)C)c(OC)c1', + 'CC(C)S', + [] + ) + + rules['thiol_amine_dimethoxybenzyl'] = ( + smarts('[S;D2;r5;x0;z1:1]1[C:3][C:4][N;z1:2]-[C;D3;x2;z1]1-[C;a;r6]:1:[C;D3;x1]([O;D2;x0][C;D1]):[C;D2]:[C;D3;x1]([O;D2;x0][C;D1]):[C;D2]:[C;D2]:1'), + [1, 2, 3, 4], + [], + 'COC1=CC=C(C2NC(C)CS2)C(OC)=C1', + 'NC(C)CS', + [] + ) + + # hydroxyl Boc: O-C(=O)-O-C(C)(C)C (tert-butoxycarbonyl on O) + rules['hydroxyl_boc'] = ( + smarts('[O;D2:1]-;!@[C;z2;x3](=O)[O;D2;x0]-[C;D4;x1;z1]([C;D1])([C;D1])[C;D1]'), + [1], + [], + 'CC(C)OC(=O)OC(C)(C)C', + 'CC(C)O', + [] + ) + + # General patterns that are subsets of more specific ones above. + # Must come last to prevent false matches due to atom overlap. + + rules['hydroxyl_tbu'] = ( + smarts('[O;D2:1]-;!@[C;D4;x1;z1]([C;D1])([C;D1])[C;D1]'), + [1], + [], + 'CC(C)OC(C)(C)C', + 'CC(C)O', + [] + ) + + rules['hydroxyl_allyl'] = ( + smarts('[O;D2:1]-;!@[C;D2;z1;x1][C;D2;x0;z2]=[C;D1]'), + [1], + [], + 'CC(C)OCC=C', + 'CC(C)O', + [] + ) + + rules['hydroxyl_benzyl'] = ( + smarts('[O;D2:1]-;!@[C;D2;z1;x1]-[C;a;r6]:1:[C;D2]:[C;D2]:[C;D2]:[C;D2]:[C;D2]:1'), + [1], + [], + 'CC(C)OCc1ccccc1', + 'CC(C)O', + ['CC(C)OCc1cccc(C)c1', 'CC(C)OC(C)c1ccccc1'] + ) + + rules['hydroxyl_acyl'] = ( + smarts('[O;D2:1]-;!@[C;z2;x2](=O)-[C;D1]'), + [1], + [], + 'CC(C)OC(=O)C', + 'CC(C)O', + ['CC(C)OC(=O)CC'] + ) + + rules['hydroxyl_methyl'] = ( + smarts('[O;D2:1]-;!@[C;D1]'), + [1], + [], + 'CC(C)OC', + 'CC(C)O', + ['CC(C)OCC'] + ) + + return rules + + +rules = Proxy(_rules) + + +__all__ = ['rules'] diff --git a/chython/algorithms/groups/_reactions.py b/chython/algorithms/groups/_reactions.py new file mode 100644 index 00000000..b5042181 --- /dev/null +++ b/chython/algorithms/groups/_reactions.py @@ -0,0 +1,786 @@ +# -*- coding: utf-8 -*- +# +# Copyright 2026 Ramil Nugmanov +# This file is part of chython. +# +# chython is free software; you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation; either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public License +# along with this program; if not, see . +# +from lazy_object_proxy import Proxy + + +def _make_reactor(rxn_name, reactants, product_smarts): + from ._functional import rules + from ...reactor import Reactor + from ... import smarts + + queries = [] + fg_names = [] + for fg_name, remap in reactants: + q = rules[fg_name] + if remap: + q = q.copy() + q.remap(remap) + queries.append(q) + fg_names.append(fg_name) + + product = smarts(product_smarts) + return rxn_name, fg_names, Reactor(tuple(queries), (product,), + delete_atoms=True, one_shot=True, fix_broken_pyrroles=True) + + +def _rules(): + rules = [] + + # amidation: RCOX + R'NH2 -> RC(=O)NHR' + for acyl in ('carboxylic_acid', 'acyl_chloride'): + for amine in ('primary_amine', 'primary_aniline'): + rules.append(_make_reactor('amidation', + [(acyl, None), + (amine, {1: 3, 2: 4})], + '[A:1](=[A:2])-[A:3]-[A:4]')) + for amine in ('secondary_amine', 'secondary_aniline'): + rules.append(_make_reactor('amidation', + [(acyl, None), + (amine, {1: 3, 2: 4, 3: 5})], + '[A:1](=[A:2])-[A:3](-[A:4])-[A:5]')) + + # carbamoylation: ROC(=O)X + amine -> ROC(=O)NR'R'' (carbamate from chloroformate) + for formate in ('chloroformate', 'fluoroformate'): + for amine in ('primary_amine', 'primary_aniline'): + rules.append(_make_reactor('carbamoylation', + [(formate, None), + (amine, {1: 4, 2: 5})], + '[A:3]-[A:1](=[A:2])-[A:4]-[A:5]')) + for amine in ('secondary_amine', 'secondary_aniline'): + rules.append(_make_reactor('carbamoylation', + [(formate, None), + (amine, {1: 4, 2: 5, 3: 6})], + '[A:3]-[A:1](=[A:2])-[A:4](-[A:5])-[A:6]')) + rules.append(_make_reactor('carbamoylation', + [(formate, None), + ('pyrrole', {1: 4})], + '[A:3]-[A:1](=[A:2])-[A:4]')) + rules.append(_make_reactor('carbamoylation', + [(formate, None), + ('imidazole', {1: 4, 2: 5, 3: 6})], + '[A:3]-[A:1](=[A:2])-[A:6]:[A:5]:[A:4]')) + + # urea from carbamoyl chloride: R2NC(=O)X + amine -> R2NC(=O)NR'R'' + for carbamoyl in ('carbamoyl_chloride', 'carbamoyl_fluoride'): + for amine in ('primary_amine', 'primary_aniline'): + rules.append(_make_reactor('urea_from_carbamoyl', + [(carbamoyl, None), + (amine, {1: 4, 2: 5})], + '[A:3]-[A:1](=[A:2])-[A:4]-[A:5]')) + for amine in ('secondary_amine', 'secondary_aniline'): + rules.append(_make_reactor('urea_from_carbamoyl', + [(carbamoyl, None), + (amine, {1: 4, 2: 5, 3: 6})], + '[A:3]-[A:1](=[A:2])-[A:4](-[A:5])-[A:6]')) + + # suzuki + # aryl: ArX + ArB(OH)2 -> Ar-Ar + # alkenyl: ArX + alkenyl_boronic -> Ar-CH=CH-R + # alkyl: ArX + alkyl_boronic -> Ar-R + for halide in ('aryl_chloride', 'aryl_bromide', 'aryl_iodide'): + for boron in ('aryl_boronic_acid', 'aryl_boronic_ester', 'aryl_molander_salt'): + rules.append(_make_reactor('suzuki', + [(halide, {100: 200}), + (boron, {1: 2})], + '[A:1]-[A:2]')) + + for boron in ('alkenyl_boronic_acid', 'alkenyl_boronic_ester', 'alkenyl_molander_salt'): + rules.append(_make_reactor('suzuki', + [(halide, {100: 200}), + (boron, {1: 2, 2: 3})], + '[A:1]-[A:2]=[A:3]')) + + for boron in ('alkyl_boronic_acid', 'alkyl_boronic_ester', 'alkyl_molander_salt'): + rules.append(_make_reactor('suzuki', + [(halide, {100: 200}), + (boron, {1: 2})], + '[A:1]-[A:2]')) + + # buchwald-hartwig: ArX + R'NH2 -> ArNHR' + for halide in ('aryl_chloride', 'aryl_bromide', 'aryl_iodide'): + for amine in ('primary_amine', 'primary_aniline', 'primary_amidine_amine'): + rules.append(_make_reactor('buchwald_hartwig', + [(halide, None), + (amine, {1: 3, 2: 4})], + '[A:1]-[A:3]-[A:4]')) + for amine in ('secondary_amine', 'secondary_aniline'): + rules.append(_make_reactor('buchwald_hartwig', + [(halide, None), + (amine, {1: 3, 2: 4, 3: 5})], + '[A:1]-[A:3](-[A:4])-[A:5]')) + + # buchwald-hartwig: pyridone/lactam halides + amines + for _x in ('fluoride', 'chloride', 'bromide', 'iodide'): + for _n in ('1', '2', '3', '4'): + pyridone = f'pyridone_{_n}_{_x}' + for amine in ('primary_amine', 'primary_aniline'): + rules.append(_make_reactor('buchwald_hartwig', + [(pyridone, None), + (amine, {1: 3, 2: 4})], + '[A:1]-[A:3]-[A:4]')) + for amine in ('secondary_amine', 'secondary_aniline'): + rules.append(_make_reactor('buchwald_hartwig', + [(pyridone, None), + (amine, {1: 3, 2: 4, 3: 5})], + '[A:1]-[A:3](-[A:4])-[A:5]')) + + # ugi 3CR: RCHO + R'NH2 + R''NC -> R'NH-CH(R)-C(=O)NHR'' + rules.append(_make_reactor('ugi_3cr', + [('aldehyde', None), + ('primary_amine', {1: 3, 2: 4}), + ('isocyano', {1: 5, 2: 6})], + '[A:1](-[A:3](-[A:4]))-[A:6](=[O:20])-[A:5]')) + + # mitsunobu ether: ROH + ArOH -> R-O-Ar; ester: ROH + RCOOH -> R-OC(=O)R' + for alcohol in ('primary_alcohol', 'secondary_alcohol'): + rules.append(_make_reactor('mitsunobu', + [(alcohol, None), + ('phenol', {1: 3, 2: 4})], + '[A:2]-[A:3]-[A:4]')) + rules.append(_make_reactor('mitsunobu', + [(alcohol, None), + ('carboxylic_acid', {1: 3, 2: 4})], + '[A:2]-[A:100]-[A:3](=[A:4])')) + + # deoxygenative coupling: ROH + ArX -> R-Ar + for alcohol in ('primary_alcohol', 'secondary_alcohol'): + for halide in ('aryl_chloride', 'aryl_bromide', 'aryl_iodide'): + rules.append(_make_reactor('deoxygenative_coupling', + [(alcohol, None), + (halide, {1: 3})], + '[A:2]-[A:3]')) + + # decarboxylative coupling: R-COOH + ArX -> R-Ar + for acid in ('alkyl_carboxylic_acid', 'aryl_carboxylic_acid'): + for halide in ('aryl_chloride', 'aryl_bromide', 'aryl_iodide'): + rules.append(_make_reactor('decarboxylative_coupling', + [(acid, None), + (halide, {100: 200, 1: 4})], + '[A:3]-[A:4]')) + + # XEC (cross-electrophile coupling): ArX + R'X -> Ar-R' + for halide in ('aryl_chloride', 'aryl_bromide', 'aryl_iodide'): + for alkyl_halide in ('alkyl_chloride', 'alkyl_bromide', 'alkyl_iodide'): + rules.append(_make_reactor('xec', + [(halide, None), + (alkyl_halide, {100: 200, 1: 2})], + '[A:1]-[A:2]')) + + # reductive amination: RCHO/R2CO + amine -> amine + for carbonyl in ('aldehyde', 'ketone'): + for amine in ('primary_amine', 'primary_aniline'): + rules.append(_make_reactor('reductive_amination', + [(carbonyl, None), + (amine, {1: 3, 2: 4})], + '[A:1]-[A:3]-[A:4]')) + for amine in ('secondary_amine', 'secondary_aniline'): + rules.append(_make_reactor('reductive_amination', + [(carbonyl, None), + (amine, {1: 3, 2: 4, 3: 5})], + '[A:1]-[A:3](-[A:4])-[A:5]')) + + # ullmann phenol: ArX + ArOH -> Ar-O-Ar + for halide in ('aryl_chloride', 'aryl_bromide', 'aryl_iodide'): + rules.append(_make_reactor('ullmann_phenol', + [(halide, None), + ('phenol', {1: 3, 2: 4})], + '[A:1]-[A:3]-[A:4]')) + + # ullmann pyrrole: ArX + pyrrole-NH -> Ar-N(pyrrole) + for halide in ('aryl_chloride', 'aryl_bromide', 'aryl_iodide'): + rules.append(_make_reactor('ullmann_pyrrole', + [(halide, None), + ('pyrrole', {1: 3})], + '[A:1]-[A:3]')) + rules.append(_make_reactor('ullmann_pyrrole', + [(halide, None), + ('pyrazole', {1: 3, 2: 4})], + '[A:1]-[A:4]:[A:3]')) + rules.append(_make_reactor('ullmann_pyrrole', + [(halide, None), + ('imidazole', {1: 3, 2: 4, 3: 5})], + '[A:1]-[A:5]:[A:4]:[A:3]')) + + # ullmann pyrrole: pyridone/lactam halides + N-heterocycles + for _x in ('fluoride', 'chloride', 'bromide', 'iodide'): + for _n in ('1', '2', '3', '4'): + pyridone = f'pyridone_{_n}_{_x}' + rules.append(_make_reactor('ullmann_pyrrole', + [(pyridone, None), + ('pyrrole', {1: 3})], + '[A:1]-[A:3]')) + rules.append(_make_reactor('ullmann_pyrrole', + [(pyridone, None), + ('pyrazole', {1: 3, 2: 4})], + '[A:1]-[A:4]:[A:3]')) + rules.append(_make_reactor('ullmann_pyrrole', + [(pyridone, None), + ('imidazole', {1: 3, 2: 4, 3: 5})], + '[A:1]-[A:5]:[A:4]:[A:3]')) + + # ullmann pyridol: ArX + hydroxypyridine -> Ar-N-C=O (CN coupling on pyridol tautomer) + for halide in ('aryl_fluoride', 'aryl_chloride', 'aryl_bromide', 'aryl_iodide'): + rules.append(_make_reactor('ullmann_pyrrole', + [(halide, None), + ('pyridol', {1: 3, 2: 4, 3: 5})], + '[A:1]-[A:3]-[A:4]=[A:5]')) + + # chan-lam: ArB(OH)2 + amine/phenol -> Ar-N/Ar-O + for boron in ('aryl_boronic_acid', 'aryl_boronic_ester'): + for amine in ('primary_amine', 'primary_aniline'): + rules.append(_make_reactor('chan_lam', + [(boron, None), + (amine, {1: 3, 2: 4})], + '[A:1]-[A:3]-[A:4]')) + for amine in ('secondary_amine', 'secondary_aniline'): + rules.append(_make_reactor('chan_lam', + [(boron, None), + (amine, {1: 3, 2: 4, 3: 5})], + '[A:1]-[A:3](-[A:4])-[A:5]')) + rules.append(_make_reactor('chan_lam', + [(boron, None), + ('phenol', {1: 3, 2: 4})], + '[A:1]-[A:3]-[A:4]')) + + # sulfonylation: RSO2X + alcohol/phenol -> sulfonate ester + for sulfonyl in ('sulfonyl_chloride', 'sulfonyl_fluoride'): + for alcohol in ('primary_alcohol', 'secondary_alcohol'): + rules.append(_make_reactor('sulfonylation', + [(sulfonyl, None), + (alcohol, {1: 4, 2: 5})], + '[A:1](=[A:2])(=[A:3])-[A:4]-[A:5]')) + rules.append(_make_reactor('sulfonylation', + [(sulfonyl, None), + ('phenol', {1: 4, 2: 5})], + '[A:1](=[A:2])(=[A:3])-[A:4]-[A:5]')) + + # sulfonamide: RSO2X + amine -> sulfonamide + for sulfonyl in ('sulfonyl_chloride', 'sulfonyl_fluoride'): + for amine in ('primary_amine', 'primary_aniline'): + rules.append(_make_reactor('sulfonamide_formation', + [(sulfonyl, None), + (amine, {1: 4, 2: 5})], + '[A:1](=[A:2])(=[A:3])-[A:4]-[A:5]')) + for amine in ('secondary_amine', 'secondary_aniline'): + rules.append(_make_reactor('sulfonamide_formation', + [(sulfonyl, None), + (amine, {1: 4, 2: 5, 3: 6})], + '[A:1](=[A:2])(=[A:3])-[A:4](-[A:5])-[A:6]')) + + # sulfonamide from amide N-H: RSO2X + R'C(=O)NHR" -> R'C(=O)N(SO2R)R" + for sulfonyl in ('sulfonyl_chloride', 'sulfonyl_fluoride'): + for amide in ('primary_amide', 'secondary_amide'): + rules.append(_make_reactor('sulfonamide_formation', + [(sulfonyl, None), + (amide, {1: 4, 2: 5, 3: 6})], + '[A:1](=[A:2])(=[A:3])-[A:4]-[A:5]=[A:6]')) + + # aminolysis: ester + amine -> amide + for amine in ('primary_amine', 'primary_aniline'): + rules.append(_make_reactor('aminolysis', + [('ester', None), + (amine, {1: 3, 2: 4})], + '[A:1](=[A:2])-[A:3]-[A:4]')) + for amine in ('secondary_amine', 'secondary_aniline'): + rules.append(_make_reactor('aminolysis', + [('ester', None), + (amine, {1: 3, 2: 4, 3: 5})], + '[A:1](=[A:2])-[A:3](-[A:4])-[A:5]')) + + # grignard: RMgX + RCHO/R2CO -> alcohol + for grignard in ('alkyl_grignard', 'aryl_grignard'): + for carbonyl in ('aldehyde', 'ketone'): + rules.append(_make_reactor('grignard', + [(grignard, None), + (carbonyl, {1: 3, 2: 4})], + '[A:1]-[A:3]-[A:4]')) + for carbonyl in ('aldehyde', 'ketone'): + rules.append(_make_reactor('grignard', + [('alkenyl_grignard', None), + (carbonyl, {1: 3, 2: 4})], + '[A:1]=[A:2]-[A:3]-[A:4]')) + # grignard from halide surrogates + for alkyl_halide in ('alkyl_chloride', 'alkyl_bromide', 'alkyl_iodide'): + for carbonyl in ('aldehyde', 'ketone'): + rules.append(_make_reactor('grignard', + [(alkyl_halide, None), + (carbonyl, {1: 2, 2: 3})], + '[A:1]-[A:2]-[A:3]')) + for aryl_halide in ('aryl_chloride', 'aryl_bromide', 'aryl_iodide'): + for carbonyl in ('aldehyde', 'ketone'): + rules.append(_make_reactor('grignard', + [(aryl_halide, None), + (carbonyl, {1: 2, 2: 3})], + '[A:1]-[A:2]-[A:3]')) + + # sonogashira: ArX + terminal alkyne -> Ar-C≡C-R + for halide in ('aryl_chloride', 'aryl_bromide', 'aryl_iodide'): + rules.append(_make_reactor('sonogashira', + [(halide, None), + ('terminal_alkyne', {1: 2, 2: 3})], + '[A:1]-[A:2]#[A:3]')) + + # urea synthesis: R-N=C=O + amine -> R-NH-C(=O)-NH-R' + for amine in ('primary_amine', 'primary_aniline'): + rules.append(_make_reactor('urea_synthesis', + [('isocyanate', None), + (amine, {1: 4, 2: 5})], + '[A:1]-[A:2](=[A:3])-[A:4]-[A:5]')) + for amine in ('secondary_amine', 'secondary_aniline'): + rules.append(_make_reactor('urea_synthesis', + [('isocyanate', None), + (amine, {1: 4, 2: 5, 3: 6})], + '[A:1]-[A:2](=[A:3])-[A:4](-[A:5])-[A:6]')) + + # CuAAC: azide + terminal alkyne -> 1,2,3-triazole + rules.append(_make_reactor('cuaac', + [('azide', None), + ('terminal_alkyne', {1: 4, 2: 5})], + '[A:1]:1:[A:2]:[A:3]:[A:5]:[A:4]:1')) + + # SNAr: ArF + amine -> Ar-NR2 (nucleophilic aromatic substitution) + for amine in ('primary_amine', 'primary_aniline'): + rules.append(_make_reactor('snar', + [('aryl_fluoride', None), + (amine, {1: 3, 2: 4})], + '[A:1]-[A:3]-[A:4]')) + for amine in ('secondary_amine', 'secondary_aniline'): + rules.append(_make_reactor('snar', + [('aryl_fluoride', None), + (amine, {1: 3, 2: 4, 3: 5})], + '[A:1]-[A:3](-[A:4])-[A:5]')) + + # SNAr: ArF + phenol -> Ar-O-Ar + rules.append(_make_reactor('snar', + [('aryl_fluoride', None), + ('phenol', {1: 3, 2: 4})], + '[A:1]-[A:3]-[A:4]')) + + # SNAr: ArF + thiol -> Ar-S-R + rules.append(_make_reactor('snar', + [('aryl_fluoride', None), + ('thiol', {1: 3, 2: 4})], + '[A:1]-[A:3]-[A:4]')) + + # Williamson ether: + # alkyl_halide + phenol -> R-O-Ar + # alkyl_halide + primary_alcohol -> R-O-R' + for halide in ('alkyl_chloride', 'alkyl_bromide', 'alkyl_iodide'): + rules.append(_make_reactor('williamson', + [(halide, None), + ('phenol', {1: 3, 2: 4})], + '[A:1]-[A:3]-[A:4]')) + + for alcohol in ('primary_alcohol', 'secondary_alcohol', 'tertiary_alcohol'): + rules.append(_make_reactor('williamson', + [(halide, None), + (alcohol, {1: 3, 2: 4})], + '[A:1]-[A:3]-[A:4]')) + + # Williamson ether with pseudohalides: + for pseudohalide in ('alkyl_triflate', 'alkyl_mesylate', 'alkyl_tosylate'): + rules.append(_make_reactor('williamson', + [(pseudohalide, None), + ('phenol', {1: 3, 2: 4})], + '[A:1]-[A:3]-[A:4]')) + for alcohol in ('primary_alcohol', 'secondary_alcohol', 'tertiary_alcohol'): + rules.append(_make_reactor('williamson', + [(pseudohalide, None), + (alcohol, {1: 3, 2: 4})], + '[A:1]-[A:3]-[A:4]')) + + # acylation (ester): acyl_chloride + alcohol -> ester + for alcohol in ('primary_alcohol', 'secondary_alcohol'): + rules.append(_make_reactor('acylation', + [('acyl_chloride', None), + (alcohol, {1: 3, 2: 4})], + '[A:1](=[A:2])-[A:3]-[A:4]')) + rules.append(_make_reactor('acylation', + [('acyl_chloride', None), + ('phenol', {1: 3, 2: 4})], + '[A:1](=[A:2])-[A:3]-[A:4]')) + + # thioether: thiol + alkyl_halide -> R-S-R' + for halide in ('alkyl_chloride', 'alkyl_bromide', 'alkyl_iodide'): + rules.append(_make_reactor('thioether', + [('thiol', None), + (halide, {100: 200, 1: 3})], + '[A:1](-[A:2])-[A:3]')) + + # carbamate: isocyanate + alcohol -> R-NH-C(=O)-OR' + for alcohol in ('primary_alcohol', 'secondary_alcohol'): + rules.append(_make_reactor('carbamate', + [('isocyanate', None), + (alcohol, {1: 4, 2: 5})], + '[A:1]-[A:2](=[A:3])-[A:4]-[A:5]')) + rules.append(_make_reactor('carbamate', + [('isocyanate', None), + ('phenol', {1: 4, 2: 5})], + '[A:1]-[A:2](=[A:3])-[A:4]-[A:5]')) + + # tetrazole: azide + nitrile -> 2,5-disubstituted tetrazole (ring formation) + rules.append(_make_reactor('tetrazole', + [('azide', None), + ('nitrile', {1: 4, 2: 5})], + '[A:1]:1:[A:2]:[A:3]:[A:4]:[A:5]:1')) + + # wittig: phosphonium_ylide + aldehyde/ketone -> alkene + for carbonyl in ('aldehyde', 'ketone'): + rules.append(_make_reactor('wittig', + [('phosphonium_ylide', None), + (carbonyl, {1: 2, 2: 3})], + '[A:1]=[A:2]')) + + # hwe: phosphonate + aldehyde/ketone -> alkene (E-selective) + for carbonyl in ('aldehyde', 'ketone'): + rules.append(_make_reactor('hwe', + [('phosphonate', None), + (carbonyl, {1: 2, 2: 3})], + '[A:1]=[A:2]')) + + # weinreb: weinreb_amide + grignard -> ketone + for grignard in ('alkyl_grignard', 'aryl_grignard'): + rules.append(_make_reactor('weinreb', + [('weinreb_amide', None), + (grignard, {100: 200, 101: 201, 1: 3})], + '[A:2]=[A:1]-[A:3]')) + # weinreb from ester surrogate: ester + grignard -> ketone + for grignard in ('alkyl_grignard', 'aryl_grignard'): + rules.append(_make_reactor('weinreb', + [('ester', None), + (grignard, {100: 200, 101: 201, 1: 3})], + '[A:2]=[A:1]-[A:3]')) + # weinreb from acyl_chloride surrogate: acyl_chloride + grignard -> ketone + for grignard in ('alkyl_grignard', 'aryl_grignard'): + rules.append(_make_reactor('weinreb', + [('acyl_chloride', None), + (grignard, {100: 200, 101: 201, 1: 3})], + '[A:2]=[A:1]-[A:3]')) + + # friedel-crafts acylation: acyl_chloride + arene -> Ar-COR + rules.append(_make_reactor('friedel_crafts', + [('acyl_chloride', None), + ('arene_ch', {1: 3})], + '[A:1](=[A:2])-[A:3]')) + + # heck: ArX + alkene -> Ar-CH=CH-R + for halide in ('aryl_chloride', 'aryl_bromide', 'aryl_iodide'): + for alkene in ('terminal_alkene', 'alkene'): + rules.append(_make_reactor('heck', + [(halide, None), + (alkene, {1: 2, 2: 3})], + '[A:1]-[A:2]=[A:3]')) + + # cross metathesis: terminal_alkene + terminal_alkene -> internal alkene + rules.append(_make_reactor('cross_metathesis', + [('terminal_alkene', None), + ('terminal_alkene', {1: 3, 2: 4})], + '[A:2]=[A:4]')) + + # kumada: ArX + RMgX -> Ar-R + for halide in ('aryl_chloride', 'aryl_bromide', 'aryl_iodide'): + for grignard in ('alkyl_grignard', 'aryl_grignard'): + rules.append(_make_reactor('kumada', + [(halide, None), + (grignard, {100: 200, 101: 201, 1: 2})], + '[A:1]-[A:2]')) + rules.append(_make_reactor('kumada', + [(halide, None), + ('alkenyl_grignard', {100: 200, 101: 201, 1: 2, 2: 3})], + '[A:1]-[A:2]=[A:3]')) + + # negishi: ArX + RZnX -> Ar-R + for halide in ('aryl_chloride', 'aryl_bromide', 'aryl_iodide'): + for zinc in ('alkyl_zinc', 'aryl_zinc'): + rules.append(_make_reactor('negishi', + [(halide, None), + (zinc, {100: 200, 101: 201, 1: 2})], + '[A:1]-[A:2]')) + rules.append(_make_reactor('negishi', + [(halide, None), + ('alkenyl_zinc', {100: 200, 101: 201, 1: 2, 2: 3})], + '[A:1]-[A:2]=[A:3]')) + + # stille: ArX + R-SnR3 -> Ar-R + for halide in ('aryl_chloride', 'aryl_bromide', 'aryl_iodide'): + rules.append(_make_reactor('stille', + [(halide, None), + ('aryl_stannane', {100: 200, 1: 2})], + '[A:1]-[A:2]')) + rules.append(_make_reactor('stille', + [(halide, None), + ('alkenyl_stannane', {100: 200, 1: 2, 2: 3})], + '[A:1]-[A:2]=[A:3]')) + rules.append(_make_reactor('stille', + [(halide, None), + ('alkyl_stannane', {100: 200, 1: 2})], + '[A:1]-[A:2]')) + + # hiyama: ArX + R-SiR3 -> Ar-R + for halide in ('aryl_chloride', 'aryl_bromide', 'aryl_iodide'): + rules.append(_make_reactor('hiyama', + [(halide, None), + ('aryl_silane', {100: 200, 1: 2})], + '[A:1]-[A:2]')) + rules.append(_make_reactor('hiyama', + [(halide, None), + ('alkenyl_silane', {100: 200, 1: 2, 2: 3})], + '[A:1]-[A:2]=[A:3]')) + + # hantzsch thiazole: alpha_haloketone + thioamide -> thiazole + rules.append(_make_reactor('hantzsch_thiazole', + [('alpha_haloketone', None), + ('thioamide', {1: 4, 2: 5, 3: 6})], + '[A:5]:1:[A:4]:[A:6]:[A:1]:[A:3]:1')) + + # knorr pyrazole: 1,3-diketone + alkyl_hydrazine -> pyrazole + rules.append(_make_reactor('knorr_pyrazole', + [('1_3_diketone', None), + ('alkyl_hydrazine', {1: 6, 2: 7, 3: 8})], + '[A:6](:1:[A:7]:[A:1]:[A:5]:[A:3]:1)-[A:8]')) + + # paal_knorr pyrrole: 1,4-diketone + primary_amine -> pyrrole + for amine in ('primary_amine', 'primary_aniline'): + rules.append(_make_reactor('paal_knorr', + [('1_4_diketone', None), + (amine, {1: 7, 2: 8})], + '[A:7](:1:[A:1]:[A:5]:[A:6]:[A:3]:1)-[A:8]')) + + # fischer indole: aryl_hydrazine + alpha_ketone -> indole + rules.append(_make_reactor('fischer_indole', + [('aryl_hydrazine', None), + ('alpha_ketone', {1: 5, 2: 6, 3: 7})], + '[A:1]:1:[A:3]:[A:4]:[A:7]:[A:5]:1')) + + # benzimidazole: o_diaminoarene + aldehyde/acid -> benzimidazole + for carbonyl in ('aldehyde', 'carboxylic_acid'): + rules.append(_make_reactor('benzimidazole', + [('o_diaminoarene', None), + (carbonyl, {1: 5, 2: 6})], + '[A:1]:1:[A:3]:[A:4]:[A:2]:[A:5]:1')) + + # benzoxazole: o_aminophenol + aldehyde/acid -> benzoxazole + for carbonyl in ('aldehyde', 'carboxylic_acid'): + rules.append(_make_reactor('benzoxazole', + [('o_aminophenol', None), + (carbonyl, {1: 5, 2: 6})], + '[A:1]:1:[A:3]:[A:4]:[A:2]:[A:5]:1')) + + # benzothiazole: o_aminothiophenol + aldehyde/acid -> benzothiazole + for carbonyl in ('aldehyde', 'carboxylic_acid'): + rules.append(_make_reactor('benzothiazole', + [('o_aminothiophenol', None), + (carbonyl, {1: 5, 2: 6})], + '[A:1]:1:[A:3]:[A:4]:[A:2]:[A:5]:1')) + + # quinoxaline: o_diaminoarene + 1,2-diketone -> quinoxaline + rules.append(_make_reactor('quinoxaline', + [('o_diaminoarene', None), + ('1_2_diketone', {1: 5, 2: 6, 3: 7, 4: 8})], + '[A:1]:1:[A:3]:[A:4]:[A:2]:[A:7]:[A:5]:1')) + + # friedlander quinoline: o_aminobenzaldehyde + alpha_ketone -> quinoline + rules.append(_make_reactor('friedlander', + [('o_aminobenzaldehyde', None), + ('alpha_ketone', {1: 7, 2: 8, 3: 9})], + '[A:1]:1:[A:3]:[A:4]:[A:5]:[A:9]:[A:7]:1')) + + # pictet_spengler: beta_arylethylamine + aldehyde -> THIQ + rules.append(_make_reactor('pictet_spengler', + [('beta_arylethylamine', None), + ('aldehyde', {1: 6, 2: 7})], + '[A:1]1-[A:2]-[A:3]-[A:4]:[A:5]-[A:6]-1')) + + # 1,2,4-oxadiazole: amidoxime + acyl source -> 1,2,4-oxadiazole + for acyl in ('acyl_chloride', 'acyl_fluoride', 'carboxylic_acid'): + rules.append(_make_reactor('oxadiazole_124', + [('amidoxime', None), + (acyl, {1: 5, 2: 6, 100: 200})], + '[A:4]:1:[A:1]:[A:3]:[A:5]:[A:2]:1')) + + # pyrimidine: 1,3-diketone + amidine -> pyrimidine + rules.append(_make_reactor('pyrimidine', + [('1_3_diketone', None), + ('amidine', {1: 6, 2: 7, 3: 8})], + '[A:7]:1:[A:6]:[A:8]:[A:1]:[A:5]:[A:3]:1')) + + # van_leusen oxazole: alpha_isocyano + aldehyde -> oxazole + rules.append(_make_reactor('van_leusen_oxazole', + [('tosyl_isocyanide', None), + ('aldehyde', {1: 4, 2: 5})], + '[A:1]:1:[A:2]:[A:3]:[A:5]:[A:4]:1')) + + # imidazo[1,2-a]pyridine: aminopyridine + alpha_haloketone + rules.append(_make_reactor('imidazopyridine', + [('aminopyridine', None), + ('alpha_haloketone', {1: 4, 2: 5, 3: 6, 100: 200})], + '[A:3]:1:[A:6]:[A:4]:[A:1]:[A:2]:1')) + # imidazo[1,2-a]pyridine: aminopyridine + alpha_haloester (ester O masked, carbonyl O deleted) + rules.append(_make_reactor('imidazopyridine', + [('aminopyridine', None), + ('alpha_haloester', {1: 4, 2: 5, 3: 6, 100: 200})], + '[A:3]:1:[A:6]:[A:4]:[A:1]:[A:2]:1')) + + # biginelli: aldehyde + beta_ketoester + urea/thiourea -> DHPM (3-component) + for urea_fg in ('urea', 'thiourea'): + rules.append(_make_reactor('biginelli', + [('aldehyde', None), + ('beta_ketoester', {1: 3, 2: 4, 3: 5, 4: 6, 5: 7, 100: 200}), + (urea_fg, {1: 8, 2: 9, 3: 10, 4: 11})], + '[A:8]1-[A:9](=[A:10])-[A:11]-[A:3]=[A:7]-[A:1]-1')) + + # niementowski quinazoline: anthranilic_acid + primary_amide -> 4-oxoquinazoline + rules.append(_make_reactor('niementowski', + [('anthranilic_acid', None), + ('primary_amide', {1: 7, 2: 8, 3: 9})], + '[A:1]:1:[A:3]:[A:4]:[A:5](-[A:6]):[A:7]:[A:8]:1')) + + # ugi 4CR: RCHO + R'NH2 + R"COOH + R"'NC -> R"C(=O)N(R')CH(R)C(=O)NHR"' + rules.append(_make_reactor('ugi_4cr', + [('aldehyde', None), + ('primary_amine', {1: 3, 2: 4}), + ('carboxylic_acid', {100: 200, 1: 7, 2: 8}), + ('isocyano', {1: 5, 2: 6})], + '[A:7](=[A:8])-[A:3](-[A:4])-[A:1]-[A:6](=[O:20])-[A:5]')) + + # liebeskind-srogl: R-C(=O)-SR' + ArB(OH)2 -> R-C(=O)-Ar (ketone synthesis) + for boron in ('aryl_boronic_acid', 'aryl_boronic_ester'): + rules.append(_make_reactor('liebeskind_srogl', + [('thioester', None), + (boron, {100: 200, 101: 201, 102: 202, 1: 3})], + '[A:1](=[A:2])-[A:3]')) + + # knoevenagel: active_methylene + aldehyde -> alkene (condensation) + rules.append(_make_reactor('knoevenagel', + [('active_methylene', None), + ('aldehyde', {1: 4, 2: 5})], + '[A:4]=[A:1](-[A:2])-[A:3]')) + + # aldol: alpha_ketone + aldehyde -> beta-hydroxy carbonyl + rules.append(_make_reactor('aldol', + [('alpha_ketone', None), + ('aldehyde', {1: 4, 2: 5})], + '[A:1](=[A:2])-[A:3]-[A:4]-[A:5]')) + + # larock indole: o_haloaniline + alkyne -> indole + rules.append(_make_reactor('larock_indole', + [('o_haloaniline', None), + ('alkyne', {1: 4, 2: 5})], + '[A:1]:1:[A:2]:[A:3]:[A:5]:[A:4]:1')) + + # doebner-miller: aniline_ortho_ch + enal -> quinoline + rules.append(_make_reactor('doebner_miller', + [('aniline_ortho_ch', None), + ('enal', {1: 4, 2: 5, 3: 6, 4: 7})], + '[A:1]:1:[A:2]:[A:3]:[A:7]:[A:6]:[A:4]:1')) + + # hantzsch pyridine: aldehyde + 2x beta_ketoester -> pyridine (NH3 implicit) + rules.append(_make_reactor('hantzsch_pyridine', + [('aldehyde', {1: 11, 2: 12}), + ('beta_ketoester', None), + ('beta_ketoester', {1: 6, 2: 7, 3: 8, 4: 9, 5: 10, 100: 200})], + '[N:20]:1:[A:1]:[A:5](-[A:3](=[A:4])-[A:100]):[A:11]:[A:10](-[A:8](=[A:9])-[A:200]):[A:6]:1')) + + # nitrile_grignard: R-C≡N + RMgX → ketone (via imine hydrolysis) + for grignard in ('alkyl_grignard', 'aryl_grignard'): + rules.append(_make_reactor('nitrile_grignard', + [('nitrile', None), + (grignard, {100: 200, 101: 201, 1: 3})], + '[A:1](=[O:20])-[A:3]')) + + # N-alkylation: alkyl_halide/triflate/tosylate + pyrrole/pyrazole/imidazole/pyridol + for halide in ('alkyl_chloride', 'alkyl_bromide', 'alkyl_iodide', 'alkyl_triflate', 'alkyl_tosylate'): + rules.append(_make_reactor('n_alkylation', + [(halide, None), + ('pyrrole', {1: 3})], + '[A:1]-[A:3]')) + rules.append(_make_reactor('n_alkylation', + [(halide, None), + ('pyrazole', {1: 3, 2: 4})], + '[A:1]-[A:4]:[A:3]')) + rules.append(_make_reactor('n_alkylation', + [(halide, None), + ('imidazole', {1: 3, 2: 4, 3: 5})], + '[A:1]-[A:5]:[A:4]:[A:3]')) + rules.append(_make_reactor('n_alkylation', + [(halide, None), + ('pyridol', {1: 3, 2: 4, 3: 5})], + '[A:1]-[A:3]-[A:4]=[A:5]')) + + # N-alkylation: alkyl_halide/triflate + primary/secondary amine + for halide in ('alkyl_chloride', 'alkyl_bromide', 'alkyl_iodide', 'alkyl_triflate', + 'boronate_alkyl_chloride', 'boronate_alkyl_bromide', 'boronate_alkyl_iodide'): + for amine in ('primary_amine', 'primary_aniline'): + rules.append(_make_reactor('n_alkylation', + [(halide, None), + (amine, {1: 3, 2: 4})], + '[A:1]-[A:3]-[A:4]')) + for amine in ('secondary_amine', 'secondary_aniline'): + rules.append(_make_reactor('n_alkylation', + [(halide, None), + (amine, {1: 3, 2: 4, 3: 5})], + '[A:1]-[A:3](-[A:4])-[A:5]')) + + # urea from 2 amines (CDI/phosgene implicit) + for amine1 in ('primary_amine', 'primary_aniline'): + for amine2 in ('primary_amine', 'primary_aniline'): + rules.append(_make_reactor('urea_from_amines', + [(amine1, None), + (amine2, {1: 3, 2: 4})], + '[A:1](-[A:2])-[C:20](=[O:21])-[A:3]-[A:4]')) + + # oxazoline: amino_alcohol + carboxylic_acid → 2-oxazoline ring + rules.append(_make_reactor('oxazoline', + [('amino_alcohol', None), + ('carboxylic_acid', {100: 200, 1: 5, 2: 6})], + '[A:5]1=[A:1]-[A:2]-[A:3]-[A:4]-1')) + + # oxime O-alkylation: oxime + alkyl_halide → oxime ether + for halide in ('alkyl_chloride', 'alkyl_bromide', 'alkyl_iodide'): + rules.append(_make_reactor('oxime_alkylation', + [('oxime', None), + (halide, {100: 200, 1: 4})], + '[A:1](-[A:4])-[A:2]=[A:3]')) + + # oxime ether formation: O-alkylhydroxylamine + aldehyde → R-O-N=CH-R' + rules.append(_make_reactor('oxime_ether', + [('O_alkylhydroxylamine', None), + ('aldehyde', {1: 4, 2: 5})], + '[A:3]-[A:2]-[A:1]=[A:4]')) + + # aldol with ketone (extension): alpha_ketone + ketone + rules.append(_make_reactor('aldol', + [('alpha_ketone', None), + ('ketone', {1: 4, 2: 5})], + '[A:1](=[A:2])-[A:3]-[A:4]-[A:5]')) + + # hydrazone formation: aldehyde/ketone + aryl_hydrazine -> C=N-NH-Ar + for carbonyl in ('aldehyde', 'ketone'): + rules.append(_make_reactor('hydrazone', + [(carbonyl, None), + ('aryl_hydrazine', {1: 3, 2: 4, 3: 5, 4: 6})], + '[A:1]=[A:4]-[A:3]-[A:5]:[A:6]')) + + return rules + + +rules = Proxy(_rules) + + +__all__ = ['rules'] diff --git a/chython/algorithms/groups/_reductions.py b/chython/algorithms/groups/_reductions.py new file mode 100644 index 00000000..ef3b0262 --- /dev/null +++ b/chython/algorithms/groups/_reductions.py @@ -0,0 +1,81 @@ +# -*- coding: utf-8 -*- +# +# Copyright 2026 Ramil Nugmanov +# This file is part of chython. +# +# chython is free software; you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation; either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public License +# along with this program; if not, see . +# +from lazy_object_proxy import Proxy + + +def _make_reactor(rxn_name, fg_name, output_fg, product_smarts): + from ._functional import rules + from ...reactor import Reactor + from ... import smarts + + q = rules[fg_name] + product = smarts(product_smarts) + return rxn_name, fg_name, output_fg, Reactor((q,), (product,), + delete_atoms=True, one_shot=True, fix_broken_pyrroles=True) + + +def _rules(): + rules = [] + + # aldehyde → primary_alcohol (NaBH4, LiAlH4) + rules.append(_make_reactor('aldehyde_to_alcohol', 'aldehyde', 'primary_alcohol', '[A:2]-[A:1]')) + + # ketone → secondary_alcohol (NaBH4, LiAlH4) + rules.append(_make_reactor('ketone_to_alcohol', 'ketone', 'secondary_alcohol', '[A:2]-[A:1]')) + + # carboxylic_acid → primary_alcohol (LiAlH4) + rules.append(_make_reactor('acid_to_alcohol', 'carboxylic_acid', 'primary_alcohol', '[A:2]-[A:1]')) + + # ester → alcohol (LiAlH4). OR' group is lost. + rules.append(_make_reactor('ester_to_alcohol', 'ester', 'primary_alcohol', '[A:2]-[A:1]')) + + # primary_amide → amine (LiAlH4, BH3) + rules.append(_make_reactor('amide_to_amine', 'primary_amide', 'primary_amine', '[A:1]-[A:2]')) + + # secondary_amide → amine (LiAlH4, BH3) + rules.append(_make_reactor('amide_to_amine', 'secondary_amide', 'secondary_amine', '[A:1]-[A:2]')) + + # nitrile → primary_amine (LiAlH4, H2/cat) + rules.append(_make_reactor('nitrile_to_amine', 'nitrile', 'primary_amine', '[A:2]-[A:1]')) + + # nitro → primary_amine (H2/Pd, SnCl2, Fe/HCl) + rules.append(_make_reactor('nitro_to_amine', 'nitro', 'primary_aniline', '[A:1]')) + + # azide → primary_amine (PPh3/H2O, H2/Pd) + rules.append(_make_reactor('azide_to_amine', 'azide', 'primary_amine', '[A:1]')) + + # alkene hydrogenation: C=C → C-C (H2/Pd, H2/Pt) + rules.append(_make_reactor('alkene_hydrogenation', 'alkene', None, '[A:1]-[A:2]')) + rules.append(_make_reactor('alkene_hydrogenation', 'terminal_alkene', None, '[A:1]-[A:2]')) + + # deoxygenation: alcohol → alkane (Barton-McCombie, Appel+reduction) + rules.append(_make_reactor('deoxygenation', 'primary_alcohol', None, '[A:2]')) + rules.append(_make_reactor('deoxygenation', 'secondary_alcohol', None, '[A:2]')) + + # carbonyl → primary amine (reductive amination with NH3) + rules.append(_make_reactor('carbonyl_to_amine', 'aldehyde', 'primary_amine', '[A:1]-[N:20]')) + rules.append(_make_reactor('carbonyl_to_amine', 'ketone', 'primary_amine', '[A:1]-[N:20]')) + + return rules + + +rules = Proxy(_rules) + + +__all__ = ['rules'] diff --git a/chython/algorithms/groups/_transformations.py b/chython/algorithms/groups/_transformations.py new file mode 100644 index 00000000..639cb3b0 --- /dev/null +++ b/chython/algorithms/groups/_transformations.py @@ -0,0 +1,133 @@ +# -*- coding: utf-8 -*- +# +# Copyright 2026 Ramil Nugmanov +# This file is part of chython. +# +# chython is free software; you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation; either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public License +# along with this program; if not, see . +# +from lazy_object_proxy import Proxy + + +def _make_reactor(rxn_name, fg_name, output_fg, product_smarts): + from ._functional import rules + from ...reactor import Reactor + from ... import smarts + + q = rules[fg_name] + product = smarts(product_smarts) + return rxn_name, fg_name, output_fg, Reactor((q,), (product,), + delete_atoms=True, one_shot=True, fix_broken_pyrroles=True) + + +def _rules(): + rules = [] + + # 1,3-diketone → isoxazole (+ hydroxylamine, implicit) + rules.append(_make_reactor('isoxazole', '1_3_diketone', 'isoxazole', '[A:1]:1:[O:20]:[N:21]:[A:3]:[A:5]:1')) + + # 1,4-diketone → pyridazine (+ hydrazine, implicit) + rules.append(_make_reactor('pyridazine', '1_4_diketone', 'pyridazine', '[A:1]:1:[N:20]:[N:21]:[A:3]:[A:6]:[A:5]:1')) + + # Appel: alcohol → alkyl halide (CCl4/CBr4 + PPh3) + rules.append(_make_reactor('appel', 'primary_alcohol', 'alkyl_bromide', '[Br:20]-[A:2]')) + rules.append(_make_reactor('appel', 'secondary_alcohol', 'alkyl_bromide', '[Br:20]-[A:2]')) + rules.append(_make_reactor('appel_chloride', 'primary_alcohol', 'alkyl_chloride', '[Cl:20]-[A:2]')) + rules.append(_make_reactor('appel_chloride', 'secondary_alcohol', 'alkyl_chloride', '[Cl:20]-[A:2]')) + + # Miyaura borylation: ArX → ArB(OH)2 + rules.append(_make_reactor('borylation_acid', 'aryl_bromide', 'aryl_boronic_acid', '[A:1]-[B:20](-[O:21])-[O:22]')) + rules.append(_make_reactor('borylation_acid', 'aryl_iodide', 'aryl_boronic_acid', '[A:1]-[B:20](-[O:21])-[O:22]')) + rules.append(_make_reactor('borylation_acid', 'aryl_chloride', 'aryl_boronic_acid', '[A:1]-[B:20](-[O:21])-[O:22]')) + + # Miyaura borylation: ArX → ArBpin (Pd, B2pin2) + rules.append(_make_reactor('borylation_ester', 'aryl_bromide', 'aryl_boronic_ester', + '[A:1]-[B:20]1-[O:21]-[C:22]([C:23])([C:24])-[C:25]([C:26])([C:27])-[O:28]-1')) + rules.append(_make_reactor('borylation_ester', 'aryl_iodide', 'aryl_boronic_ester', + '[A:1]-[B:20]1-[O:21]-[C:22]([C:23])([C:24])-[C:25]([C:26])([C:27])-[O:28]-1')) + rules.append(_make_reactor('borylation_ester', 'aryl_chloride', 'aryl_boronic_ester', + '[A:1]-[B:20]1-[O:21]-[C:22]([C:23])([C:24])-[C:25]([C:26])([C:27])-[O:28]-1')) + + # Nitrile hydrolysis: R-C≡N → R-C(=O)-NH2 (partial, to amide) + rules.append(_make_reactor('nitrile_hydrolysis', 'nitrile', 'primary_amide', '[A:2]-[A:1]=[O:20]')) + + # Electrophilic aromatic nitration: Ar-H → Ar-NO2 + rules.append(_make_reactor('nitration', 'arene_ch', 'nitro', '[A:1]-[N;+:20](=[O:21])[O;-:22]')) + + # Electrophilic aromatic halogenation: Ar-H → Ar-X + rules.append(_make_reactor('bromination', 'arene_ch', 'aryl_bromide', '[A:1]-[Br:20]')) + rules.append(_make_reactor('chlorination', 'arene_ch', 'aryl_chloride', '[A:1]-[Cl:20]')) + rules.append(_make_reactor('iodination', 'arene_ch', 'aryl_iodide', '[A:1]-[I:20]')) + + # benzylic halogenation: Ar-CH2R → Ar-CHBrR (NBS, Br2/hv) + rules.append(_make_reactor('benzylic_bromination', 'benzylic_ch', 'alkyl_bromide', '[A:1](-[Br:20])-[A:2]')) + + # alpha-halogenation: alpha_ketone → alpha-bromoketone (NBS/Br2) + rules.append(_make_reactor('alpha_halogenation', 'alpha_ketone', 'alpha_haloketone', '[A:1](=[A:2])-[A:3](-[Br:20])')) + + # alkyl borylation: R-X → R-B(OH)2 + rules.append(_make_reactor('borylation_acid', 'alkyl_bromide', 'alkyl_boronic_acid', '[A:1]-[B:20](-[O:21])-[O:22]')) + rules.append(_make_reactor('borylation_acid', 'alkyl_iodide', 'alkyl_boronic_acid', '[A:1]-[B:20](-[O:21])-[O:22]')) + rules.append(_make_reactor('borylation_acid', 'alkyl_chloride', 'alkyl_boronic_acid', '[A:1]-[B:20](-[O:21])-[O:22]')) + + # alkyl borylation: R-X → R-Bpin + rules.append(_make_reactor('borylation_ester', 'alkyl_bromide', 'alkyl_boronic_ester', + '[A:1]-[B:20]1-[O:21]-[C:22]([C:23])([C:24])-[C:25]([C:26])([C:27])-[O:28]-1')) + rules.append(_make_reactor('borylation_ester', 'alkyl_iodide', 'alkyl_boronic_ester', + '[A:1]-[B:20]1-[O:21]-[C:22]([C:23])([C:24])-[C:25]([C:26])([C:27])-[O:28]-1')) + rules.append(_make_reactor('borylation_ester', 'alkyl_chloride', 'alkyl_boronic_ester', + '[A:1]-[B:20]1-[O:21]-[C:22]([C:23])([C:24])-[C:25]([C:26])([C:27])-[O:28]-1')) + + # ester to amide: R-C(=O)-OR' → R-C(=O)-NH2 (NH3) + rules.append(_make_reactor('ester_to_amide', 'ester', 'primary_amide', '[A:1](=[A:2])-[N:20]')) + + # ester to hydroxamic acid: R-C(=O)-OR' → R-C(=O)-NH-OH (NH2OH) + rules.append(_make_reactor('ester_to_hydroxamic_acid', 'ester', 'hydroxamic_acid', '[A:1](=[A:2])-[N:20]-[O:21]')) + + # amide hydrolysis: R-C(=O)-NHR → R-C(=O)-OH + rules.append(_make_reactor('amide_hydrolysis', 'primary_amide', 'carboxylic_acid', '[A:2](=[A:3])-[O:20]')) + rules.append(_make_reactor('amide_hydrolysis', 'secondary_amide', 'carboxylic_acid', '[A:2](=[A:3])-[O:20]')) + + # cyanation: R-X → R-CN (NaCN/CuCN) + rules.append(_make_reactor('cyanation', 'alkyl_bromide', 'nitrile', '[A:1]-[C:20]#[N:21]')) + rules.append(_make_reactor('cyanation', 'alkyl_iodide', 'nitrile', '[A:1]-[C:20]#[N:21]')) + rules.append(_make_reactor('cyanation', 'alkyl_chloride', 'nitrile', '[A:1]-[C:20]#[N:21]')) + rules.append(_make_reactor('cyanation', 'aryl_bromide', 'nitrile', '[A:1]-[C:20]#[N:21]')) + rules.append(_make_reactor('cyanation', 'aryl_iodide', 'nitrile', '[A:1]-[C:20]#[N:21]')) + rules.append(_make_reactor('cyanation', 'aryl_chloride', 'nitrile', '[A:1]-[C:20]#[N:21]')) + + # dehydration: R-C(=O)-NH2 → R-C≡N (P2O5/SOCl2) + rules.append(_make_reactor('dehydration', 'primary_amide', 'nitrile', '[A:2]#[A:1]')) + + # triflation: phenol/alcohol → triflate (Tf2O) + rules.append(_make_reactor('triflation', 'phenol', 'aryl_triflate', '[A:2]-[A:1]-[S:20](=[O:21])(=[O:22])-[C:23]([F:24])([F:25])[F:26]')) + rules.append(_make_reactor('triflation', 'primary_alcohol', 'alkyl_triflate', '[A:2]-[A:1]-[S:20](=[O:21])(=[O:22])-[C:23]([F:24])([F:25])[F:26]')) + rules.append(_make_reactor('triflation', 'secondary_alcohol', 'alkyl_triflate', '[A:2]-[A:1]-[S:20](=[O:21])(=[O:22])-[C:23]([F:24])([F:25])[F:26]')) + + # nitrile full hydrolysis: R-C≡N → R-COOH + rules.append(_make_reactor('nitrile_to_acid', 'nitrile', 'carboxylic_acid', '[A:1](=[O:20])-[O:21]')) + + # tertiary alcohol dehydration: R3C-OH → R2C=CR (loss of water) + rules.append(_make_reactor('alcohol_dehydration', 'tertiary_alcohol_with_alpha_h', 'alkene', '[A:2]=[A:3]')) + + # oximation: ketone/aldehyde → oxime (+ NH2OH implicit) + rules.append(_make_reactor('oximation', 'ketone', 'oxime', '[O:20]-[N:21]=[A:1]')) + rules.append(_make_reactor('oximation', 'aldehyde', 'oxime', '[O:20]-[N:21]=[A:1]')) + + return rules + + +rules = Proxy(_rules) + + +__all__ = ['rules'] diff --git a/chython/reactor/retro/_aryl_amination.py b/chython/algorithms/groups/test/__init__.py similarity index 66% rename from chython/reactor/retro/_aryl_amination.py rename to chython/algorithms/groups/test/__init__.py index 89b3d47a..c80c3773 100644 --- a/chython/reactor/retro/_aryl_amination.py +++ b/chython/algorithms/groups/test/__init__.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- # -# Copyright 2024 Ramil Nugmanov +# Copyright 2026 Ramil Nugmanov # This file is part of chython. # # chython is free software; you can redistribute it and/or modify @@ -16,17 +16,3 @@ # You should have received a copy of the GNU Lesser General Public License # along with this program; if not, see . # - -template = { - 'name': 'Aryl amination reaction', - 'description': 'C-N coupling of halo-aryles and amines', - 'templates': [ - { - 'product': '[N;D2,D3;z1:1]-;!@[C;a:2]', - 'reactants': [ - '[A:1]', - '[A:2]-[Br;M]' - ] - } - ] -} diff --git a/chython/algorithms/groups/test/test_deprotection.py b/chython/algorithms/groups/test/test_deprotection.py new file mode 100644 index 00000000..1de37f56 --- /dev/null +++ b/chython/algorithms/groups/test/test_deprotection.py @@ -0,0 +1,108 @@ +# -*- coding: utf-8 -*- +# +# Copyright 2022-2026 Ramil Nugmanov +# This file is part of chython. +# +# chython is free software; you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation; either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public License +# along with this program; if not, see . +# +import pytest +from chython import smiles +from chython.algorithms.groups._protective import rules + + +_test_cases = [(name, *rule) for name, rule in rules.items()] + + +@pytest.mark.parametrize('name,q,keep,add,test_smi,cleaved_smi,decoys', _test_cases, ids=[x[0] for x in _test_cases]) +def test_deprotection(name, q, keep, add, test_smi, cleaved_smi, decoys): + t = smiles(test_smi) + t.canonicalize() + # query matches protected form + assert q < t, f'query does not match protected form' + # deprotection produces expected product + a = smiles(cleaved_smi) + a.canonicalize() + t.remove_protection(name) + assert t == a, f'deprotection gave {t}, expected {a}' + # decoys are not matched + for d in decoys: + d = smiles(d) + d.canonicalize() + assert not q < d, f'query matches decoy {d}' + + +def test_no_false_matches_from_ordering(): + """Ensure general patterns come after specific ones to prevent false atom claims. + + If pattern B matches within the protected molecule of pattern A, then A must come + BEFORE B in the rules dict (so A claims atoms first). This test fails when B comes + before A - indicating rules dict needs reordering. + """ + names = list(rules.keys()) + for name_a, (_, _, _, test_smi, *_) in rules.items(): + mol = smiles(test_smi) + mol.canonicalize() + idx_a = names.index(name_a) + for name_b in names[:idx_a]: # patterns that come BEFORE name_a + q_b = rules[name_b][0] + if next(q_b.get_mapping(mol), None) is not None: + pytest.fail( + f"Pattern '{name_b}' (idx {names.index(name_b)}) matches within " + f"protected molecule of '{name_a}' (idx {idx_a}), but comes before it. " + f"Move '{name_b}' after '{name_a}' in _protective.py rules dict." + ) + + +def test_protective_groups_no_overlap(): + """Ensure protective_groups doesn't report false positives due to sub-pattern overlap. + + Checks that deletable atoms (atoms not in keep list) of different detected PGs + don't overlap - matching the logic in the protective_groups property. + """ + for name, (q, keep, add, test_smi, *_) in rules.items(): + mol = smiles(test_smi) + mol.canonicalize() + pgs = mol.protective_groups + assert name in pgs, f'{name} not detected in its own protected SMILES' + # verify deletable atoms of detected PGs don't overlap + for other in pgs: + if other == name: + continue + q_other, keep_other = rules[other][0], rules[other][1] + q_self = rules[name][0] + self_atoms = set() + for mp in q_self.get_mapping(mol, automorphism_filter=False): + self_atoms.update(m for n, m in mp.items() if n not in keep) + for mp in q_other.get_mapping(mol, automorphism_filter=False): + other_atoms = {m for n, m in mp.items() if n not in keep_other} + assert not self_atoms.intersection(other_atoms), ( + f"'{other}' deletable atoms overlap with '{name}' on '{test_smi}'. " + f"Reorder rules in _protective.py so '{name}' comes before '{other}'." + ) + + +@pytest.mark.parametrize('smi,expected', [ + ('CC(C)(C)OC(=O)N(C(=O)OC(C)(C)C)c1ccccc1', {'amine_boc': 2}), + ('CC(C)(C)OC(=O)Nc1ccc(OC(C)(C)C)cc1', {'amine_boc': 1, 'hydroxyl_tbu': 1}), + ('c1ccc(COC(=O)Nc2ccc(OCc3ccccc3)cc2)cc1', {'amine_cbz': 1, 'hydroxyl_benzyl': 1}), + ('CC(C)OCOCc1ccccc1', {'hydroxyl_bom': 1}), + ('C(OCOC)(OCOC)C', {'hydroxyl_mom': 2}), + ('OC(CO[Si](C)(C)C)O[Si](C)(C)C', {'hydroxyl_tms': 2}), +], ids=['double_boc', 'boc_and_tbu', 'cbz_and_benzyl', 'bom_no_benzyl', 'double_mom', 'double_tms']) +def test_multiple_pg_on_molecule(smi, expected): + """Test correct detection of multiple PGs and no false positives from sub-pattern overlap.""" + mol = smiles(smi) + mol.canonicalize() + pgs = mol.protective_groups + assert pgs == expected, f'got {pgs}' diff --git a/chython/algorithms/groups/test/test_reactions.py b/chython/algorithms/groups/test/test_reactions.py new file mode 100644 index 00000000..bfc49821 --- /dev/null +++ b/chython/algorithms/groups/test/test_reactions.py @@ -0,0 +1,376 @@ +# -*- coding: utf-8 -*- +# +# Copyright 2026 Ramil Nugmanov +# This file is part of chython. +# +# chython is free software; you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation; either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public License +# along with this program; if not, see . +# +import pytest +from chython import smiles + + +# (test_id, reaction_name, reaction_smiles, expected_count, exclude) +# Reaction SMILES are parsed, canonicalized, then reactants are fed to @ operator. +# expected_count: number of results from @ (2 for tautomeric azoles). +# exclude: optional set of reaction names to filter from results (known overlaps). +_two_component = [ + ('amidation_acyl_chloride_primary_amine', + 'amidation', 'ClC(=O)C.NCC>>CCNC(=O)C', 1), + ('suzuki_aryl_bromide_boronic_acid', + 'suzuki', 'Brc1ccc(cc1)C.OB(O)c1ccccc1>>c1cc(ccc1-c1ccccc1)C', 1), + ('suzuki_alkenyl_boronic_acid', + 'suzuki', 'Brc1ccc(cc1)C.OB(O)C=CC>>c1cc(ccc1C=CC)C', 1), + ('suzuki_alkyl_boronic_acid', + 'suzuki', 'Brc1ccc(cc1)C.OB(O)CCC>>c1cc(ccc1CCC)C', 1), + ('buchwald_hartwig_aryl_bromide_primary_amine', + 'buchwald_hartwig', 'Brc1ccc(cc1)C.NCC>>c1cc(ccc1C)NCC', 1), + ('buchwald_hartwig_pyridone_bromide_primary_amine', + 'buchwald_hartwig', 'CN1C=CC(Br)=CC1=O.NCC>>CN1C=CC(=CC1=O)NCC', 1), + ('mitsunobu_ether_primary_alcohol_phenol', + 'mitsunobu', 'OCC.Oc1ccc(cc1)C>>c1cc(ccc1C)OCC', 1), + ('mitsunobu_ester_primary_alcohol_acid', + 'mitsunobu', 'OCC.OC(=O)c1ccccc1>>c1ccccc1C(OCC)=O', 1), + ('deoxygenative_coupling_alcohol_aryl_bromide', + 'deoxygenative_coupling', 'OCC.Brc1ccc(cc1)C>>c1cc(ccc1C)CC', 1), + ('decarboxylative_coupling_acid_aryl_bromide', + 'decarboxylative_coupling', 'OC(=O)CC.Brc1ccc(cc1)C>>c1cc(ccc1C)CC', 1), + ('xec_aryl_bromide_alkyl_bromide', + 'xec', 'Brc1ccc(cc1)C.BrCCC>>c1cc(ccc1CCC)C', 1), + ('reductive_amination_aldehyde_primary_amine', + 'reductive_amination', 'O=CC.NCc1ccccc1>>c1cc(ccc1)CNCC', 1), + ('reductive_amination_ketone_secondary_amine', + 'reductive_amination', 'O=C(C)C.N(C)CC>>CC(C)N(C)CC', 1), + ('ullmann_phenol_aryl_bromide_cresol', + 'ullmann_phenol', 'Brc1ccc(cc1)C.Oc1ccc(cc1)CC>>c1cc(ccc1C)Oc1ccc(cc1)CC', 1), + ('ullmann_pyrrole_aryl_bromide_pyrrole', + 'ullmann_pyrrole', 'Brc1ccc(cc1)C.[nH]1cccc1>>c1ccn(c1)-c1ccc(cc1)C', 1), + ('ullmann_pyrrole_aryl_bromide_pyrazole', + 'ullmann_pyrrole', 'Brc1ccc(cc1)C.[nH]1nccc1>>c1cc(ccc1-n1nccc1)C', 2), + ('ullmann_pyrrole_aryl_bromide_imidazole', + 'ullmann_pyrrole', 'Brc1ccc(cc1)C.[nH]1cncc1>>c1cc(ccc1-n1ccnc1)C', 2), + ('ullmann_pyrrole_pyridone_chloride_pyrrole', + 'ullmann_pyrrole', 'CN1C=CC(Cl)=CC1=O.[nH]1cccc1>>CN1C=CC(=CC1=O)-n1cccc1', 1), + ('ullmann_pyrrole_pyridol_aryl_bromide', + 'ullmann_pyrrole', 'Brc1ccc(cc1)C.Oc1ccccn1>>O=C1C=CC=CN1c1ccc(cc1)C', 1, {'ullmann_phenol'}), + ('chan_lam_boronic_acid_primary_amine', + 'chan_lam', 'OB(O)c1ccc(cc1)C.NCC>>c1cc(ccc1C)NCC', 1), + ('chan_lam_boronic_acid_phenol', + 'chan_lam', 'OB(O)c1ccc(cc1)C.Oc1ccc(cc1)CC>>c1cc(ccc1C)Oc1ccc(cc1)CC', 1), + ('sulfonylation_sulfonyl_chloride_alcohol', + 'sulfonylation', 'ClS(=O)(=O)c1ccccc1.OCCC>>c1ccccc1S(=O)(=O)OCCC', 1), + ('sulfonamide_formation_sulfonyl_chloride_amine', + 'sulfonamide_formation', 'ClS(=O)(=O)c1ccccc1.NCC>>c1ccccc1S(=O)(=O)NCC', 1), + ('aminolysis_ester_primary_amine', + 'aminolysis', 'COC(=O)c1ccc(cc1)C.NCC>>c1cc(ccc1C(NCC)=O)C', 1), + ('grignard_alkyl_bromide_aldehyde', + 'grignard', 'BrCC.O=Cc1ccc(cc1)C>>c1cc(ccc1C)C(CC)O', 1), + ('grignard_alkyl_bromide_ketone', + 'grignard', 'BrCC.O=C(C)c1ccccc1>>c1cc(ccc1)C(O)(C)CC', 1), + ('sonogashira_aryl_bromide_terminal_alkyne', + 'sonogashira', 'Brc1ccc(cc1)C.C#CC>>c1cc(ccc1C)C#CC', 1), + ('urea_synthesis_isocyanate_primary_amine', + 'urea_synthesis', 'O=C=Nc1ccccc1.NCC>>c1ccccc1NC(NCC)=O', 1), + ('cuaac_azide_terminal_alkyne', + 'cuaac', 'CC#C.CN=[N+]=[N-]>>Cc1cn(C)nn1', 1), + ('snar_aryl_fluoride_primary_amine', + 'snar', 'Fc1ccc(cc1)C(F)(F)F.NCC>>c1cc(ccc1C(F)(F)F)NCC', 1), + ('snar_aryl_fluoride_phenol', + 'snar', 'Fc1ccc(cc1)C(F)(F)F.Oc1ccc(cc1)C>>c1cc(ccc1C(F)(F)F)Oc1ccc(cc1)C', 1), + ('snar_aryl_fluoride_thiol', + 'snar', 'Fc1ccc(cc1)C(F)(F)F.SCC>>c1cc(ccc1C(F)(F)F)SCC', 1), + ('williamson_alkyl_bromide_phenol', + 'williamson', 'BrCC.Oc1ccc(cc1)C>>c1cc(ccc1C)OCC', 1), + ('williamson_alkyl_bromide_alcohol', + 'williamson', 'BrCCC.OCC>>CCCOCC', 1), + ('williamson_mesylate_phenol', + 'williamson', 'CS(=O)(=O)OCC.Oc1ccc(cc1)C>>c1cc(ccc1C)OCC', 1), + ('williamson_triflate_alcohol', + 'williamson', 'FC(F)(F)S(=O)(=O)OCC.OCCC>>CCCOCC', 1), + ('acylation_acyl_chloride_alcohol', + 'acylation', 'ClC(=O)c1ccccc1.OCC>>c1ccccc1C(OCC)=O', 1), + ('thioether_thiol_alkyl_bromide', + 'thioether', 'SCC.BrCCC>>CCSCCC', 1), + ('carbamate_isocyanate_alcohol', + 'carbamate', 'O=C=Nc1ccccc1.OCC>>c1ccccc1NC(OCC)=O', 1), + ('tetrazole_azide_nitrile', + 'tetrazole', 'CN=[N+]=[N-].N#Cc1ccccc1>>c1ccc(cc1)-c1nnn(C)n1', 1), + ('kumada_aryl_bromide_alkyl_grignard', + 'kumada', 'Brc1ccc(cc1)C.C[Mg]Br>>c1cc(ccc1C)C', 1), + ('kumada_aryl_bromide_aryl_grignard', + 'kumada', 'Brc1ccc(cc1)C.c1ccccc1[Mg]Br>>c1cc(ccc1-c1ccccc1)C', 1), + ('negishi_aryl_bromide_alkyl_zinc', + 'negishi', 'Brc1ccc(cc1)C.CC[Zn]Cl>>c1cc(ccc1C)CC', 1), + ('stille_aryl_bromide_aryl_stannane', + 'stille', 'Brc1ccc(cc1)C.c1ccccc1[Sn](C)(C)C>>c1cc(ccc1-c1ccccc1)C', 1), + ('stille_aryl_bromide_alkenyl_stannane', + 'stille', 'Brc1ccc(cc1)C.C=C[Sn](C)(C)C>>c1cc(ccc1C=C)C', 1), + ('hiyama_aryl_bromide_aryl_silane', + 'hiyama', 'Brc1ccc(cc1)C.c1ccccc1[Si](C)(C)C>>c1cc(ccc1-c1ccccc1)C', 1), + ('grignard_explicit_alkyl_grignard_aldehyde', + 'grignard', 'CC[Mg]Br.O=Cc1ccccc1>>c1ccccc1C(CC)O', 1), + ('nitrile_grignard_alkyl', + 'nitrile_grignard', 'N#CCC.CC[Mg]Br>>CCC(=O)CC', 1), + ('heck_aryl_bromide_terminal_alkene', + 'heck', 'Brc1ccc(cc1)C.C=CC>>c1cc(ccc1C=CC)C', 1), + ('heck_aryl_bromide_internal_alkene', + 'heck', 'Brc1ccccc1.CC=CC>>c1ccccc1C(C)=CC', 1), + ('cross_metathesis_two_terminal_alkenes', + 'cross_metathesis', 'C=CC.C=CCCC>>CC=CCCC', 1), + ('wittig_ylide_aldehyde', + 'wittig', 'c1ccccc1P(c1ccccc1)(c1ccccc1)=C.c1cc(ccc1C=O)C>>c1cc(ccc1C)C=C', 1), + ('hwe_phosphonate_aldehyde', + 'hwe', 'CCP(=O)(OC)OC.c1cc(ccc1C=O)C>>c1cc(ccc1C=CC)C', 1), + ('weinreb_explicit_grignard', + 'weinreb', 'CC(=O)N(C)OC.CC[Mg]Br>>CCC(=O)C', 1), + ('weinreb_ester_surrogate_grignard', + 'weinreb', 'COC(=O)c1ccccc1.CC[Mg]Br>>c1ccccc1C(=O)CC', 1), + ('weinreb_acyl_chloride_surrogate_grignard', + 'weinreb', 'c1ccccc1C(=O)Cl.CC[Mg]Br>>c1ccccc1C(=O)CC', 1), + ('friedel_crafts_acyl_chloride_benzene', + 'friedel_crafts', 'CC(=O)Cl.c1ccccc1>>c1ccccc1C(=O)C', 1), + ('hantzsch_thiazole_chloroacetone_thioacetamide', + 'hantzsch_thiazole', 'ClCC(=O)C.NC(=S)C>>c1(C)scc(C)n1', 1), + ('knorr_pyrazole_acetylacetone_methylhydrazine', + 'knorr_pyrazole', 'O=C(C)CC(=O)C.NNC>>c1(C)nn(c(C)c1)C', 1), + ('paal_knorr_hexanedione_ethylamine', + 'paal_knorr', 'O=C(C)CCC(=O)C.NCC>>Cc1ccc(C)n1CC', 1, {'reductive_amination'}), + ('fischer_indole_phenylhydrazine_acetone', + 'fischer_indole', 'NNc1ccccc1.O=C(C)C>>c1cc2c(cc(C)[nH]2)cc1', 1, {'hydrazone'}), + ('benzimidazole_diamine_aldehyde', + 'benzimidazole', 'Nc1ccccc1N.O=CC>>c1(C)[nH]c2c(cccc2)n1', 1, {'reductive_amination'}), + ('benzoxazole_aminophenol_aldehyde', + 'benzoxazole', 'Nc1ccccc1O.O=CC>>c1cccc2nc(oc12)C', 1, {'reductive_amination'}), + ('benzothiazole_aminothiophenol_aldehyde', + 'benzothiazole', 'Nc1ccccc1S.O=CC>>Cc1nc2c(s1)cccc2', 1, {'reductive_amination'}), + ('quinoxaline_diamine_diketone', + 'quinoxaline', 'Nc1ccccc1N.O=C(C)C(=O)C>>c12c(nc(c(n1)C)C)cccc2', 1, {'reductive_amination'}), + ('friedlander_aminobenzaldehyde_acetone', + 'friedlander', 'Nc1ccccc1C=O.O=C(C)C>>n1c2c(ccc1C)cccc2', 1, {'reductive_amination', 'aldol'}), + ('pictet_spengler_phenethylamine_aldehyde', + 'pictet_spengler', 'NCCc1ccccc1.O=CC>>N1CCc2ccccc2C1C', 1, {'reductive_amination'}), + ('oxadiazole_124_amidoxime_acyl_chloride', + 'oxadiazole_124', 'N=C(NO)C.ClC(=O)C>>c1(C)noc(n1)C', 1), + ('pyrimidine_diketone_amidine', + 'pyrimidine', 'O=C(C)CC(=O)C.NC(=N)C>>Cc1nc(nc(C)c1)C', 1), + ('van_leusen_oxazole_tosmic_aldehyde', + 'van_leusen_oxazole', '[C-]#[N+]CS(=O)(=O)c1ccc(cc1)C.O=CC>>c1oc(C)nc1', 1), + ('imidazopyridine_aminopyridine_haloketone', + 'imidazopyridine', 'Nc1ncccc1.ClCC(=O)C>>c1cccn2c1nc(C)c2', 1, {'reductive_amination', 'n_alkylation'}), + ('niementowski_anthranilic_acid_amide', + 'niementowski', 'Nc1ccccc1C(=O)O.NC(=O)C>>Oc1c2c(nc(n1)C)cccc2', 1), + ('liebeskind_srogl_thioester_boronic_acid', + 'liebeskind_srogl', 'O=C(SC)c1ccccc1.OB(O)c1ccccc1>>O=C(c1ccccc1)c1ccccc1', 1), + ('knoevenagel_malononitrile_aldehyde', + 'knoevenagel', 'N#CCC#N.O=Cc1ccccc1>>c1cc(ccc1)C=C(C#N)C#N', 1), + ('aldol_acetone_benzaldehyde', + 'aldol', 'O=C(C)C.O=Cc1ccccc1>>c1cc(ccc1)C(CC(=O)C)O', 1, {'reductive_amination'}), + ('larock_indole_bromoaniline_butyne', + 'larock_indole', 'Nc1ccccc1Br.CC#CC>>Cc1[nH]c2c(cccc2)c1C', 1, {'reductive_amination'}), + ('doebner_miller_aniline_crotonaldehyde', + 'doebner_miller', 'Nc1ccccc1.O=CC=CC>>C1=CC=CC=2C(C)=CC=NC12', 1, {'reductive_amination'}), +] + +_three_component = [ + ('ugi_3cr_aldehyde_amine_isocyano', + 'ugi_3cr', 'O=CC.NCC.[C-]#[N+]C>>CCNC(C(=O)NC)C', 1), + ('biginelli_aldehyde_ketoester_urea', + 'biginelli', 'O=CC.O=C(C)CC(=O)OC.NC(=O)N>>N1C(NC(C)C=C1C)=O', 1), + ('hantzsch_pyridine_aldehyde_ketoester', + 'hantzsch_pyridine', 'O=CC.O=C(C)CC(=O)OCC.O=C(C)CC(=O)OCC>>c1(C)c(c(nc(c1C(=O)OCC)C)C)C(=O)OCC', 1), +] + +_four_component = [ + ('ugi_4cr_aldehyde_amine_acid_isocyano', + 'ugi_4cr', 'O=CC.NCC.OC(=O)C.[C-]#[N+]C>>N(CC)(C(C(NC)=O)C)C(C)=O', 1), +] + +_transformations = [ + ('isoxazole_from_diketone', 'isoxazole', 'O=C(C)CC(=O)C>>c1(onc(C)c1)C', 1, + {'alpha_halogenation', 'oximation'}), + ('pyridazine_from_diketone', 'pyridazine', 'O=C(C)CCC(=O)C>>c1(C)nnc(C)cc1', 1, + {'alpha_halogenation', 'oximation'}), + ('appel_primary_alcohol', 'appel', 'OCC>>BrCC', 1, {'triflation', 'appel_chloride'}), + ('appel_secondary_alcohol', 'appel', 'OC(C)CC>>BrC(C)CC', 1, {'triflation', 'appel_chloride'}), + ('borylation_aryl_bromide', 'borylation_acid', 'Brc1c(C)c(C)c(C)c(C)c1C>>OB(O)c1c(C)c(C)c(C)c(C)c1C', 1, + {'borylation_ester', 'cyanation'}), + ('nitrile_hydrolysis', 'nitrile_hydrolysis', 'N#CCC>>NC(=O)CC', 1, {'nitrile_to_acid'}), + ('nitration_benzene', 'nitration', 'c1ccccc1>>[O-][N+](=O)c1ccccc1', 1, + {'bromination', 'chlorination', 'iodination'}), + ('bromination_benzene', 'bromination', 'c1ccccc1>>Brc1ccccc1', 1, + {'nitration', 'chlorination', 'iodination'}), + ('chlorination_benzene', 'chlorination', 'c1ccccc1>>Clc1ccccc1', 1, + {'nitration', 'bromination', 'iodination'}), + ('iodination_benzene', 'iodination', 'c1ccccc1>>Ic1ccccc1', 1, + {'nitration', 'bromination', 'chlorination'}), +] + +_oxidations = [ + ('alcohol_to_aldehyde', 'alcohol_to_aldehyde', 'OCC>>O=CC', 1), + ('alcohol_to_ketone', 'alcohol_to_ketone', 'OC(C)C>>O=C(C)C', 1), + ('aldehyde_to_acid', 'aldehyde_to_acid', 'O=CC>>OC(=O)C', 1), + ('dihydroxylation', 'dihydroxylation', 'C=Cc1ccccc1>>OC(CO)c1ccccc1', 1), + ('thioether_to_sulfoxide', 'thioether_to_sulfoxide', 'CSC>>CS(=O)C', 1, {'thioether_to_sulfone'}), + ('thioether_to_sulfone', 'thioether_to_sulfone', 'CSC>>CS(=O)(=O)C', 1, {'thioether_to_sulfoxide'}), + ('sulfoxide_to_sulfone', 'sulfoxide_to_sulfone', 'CS(=O)C>>CS(=O)(=O)C', 1), +] + +_reductions = [ + ('aldehyde_to_alcohol', 'aldehyde_to_alcohol', 'O=Cc1ccccc1>>OCc1ccccc1', 1, {'carbonyl_to_amine'}), + ('ketone_to_alcohol', 'ketone_to_alcohol', 'O=C(C)c1ccccc1>>OC(C)c1ccccc1', 1, {'carbonyl_to_amine'}), + ('acid_to_alcohol', 'acid_to_alcohol', 'OC(=O)c1ccccc1>>OCc1ccccc1', 1), + ('ester_to_alcohol', 'ester_to_alcohol', 'COC(=O)c1ccccc1>>OCc1ccccc1', 1), + ('amide_to_amine_primary', 'amide_to_amine', 'NC(=O)c1ccccc1>>NCc1ccccc1', 1), + ('amide_to_amine_secondary', 'amide_to_amine', 'CNC(=O)c1ccccc1>>CNCc1ccccc1', 1), + ('nitrile_to_amine', 'nitrile_to_amine', 'N#Cc1ccccc1>>NCc1ccccc1', 1), + ('nitro_to_amine', 'nitro_to_amine', '[O-][N+](=O)c1ccccc1>>Nc1ccccc1', 1), + ('azide_to_amine', 'azide_to_amine', 'CN=[N+]=[N-]>>CN', 1), + ('deoxygenation_primary', 'deoxygenation', 'OCC1CCCCC1>>CC1CCCCC1', 1), + ('deoxygenation_secondary', 'deoxygenation', 'OC(C)c1ccccc1>>CCc1ccccc1', 1), +] + + +@pytest.mark.parametrize( + 'args', + _two_component, + ids=[x[0] for x in _two_component] +) +def test_two_component(args): + test_id, rxn_name, rxn_smi, expected_count = args[:4] + exclude = args[4] if len(args) > 4 else set() + + r = smiles(rxn_smi) + r.canonicalize() + r1, r2 = r.reactants + + results = [(n, rxn) for n, rxn in r1 @ r2 if n not in exclude] + + names = set(n for n, _ in results) + assert names == {rxn_name}, f'expected {rxn_name}, got {sorted(names)}' + assert len(results) == expected_count, f'expected {expected_count}, got {len(results)}' + for _, rxn in results: + assert rxn == r, f'expected {r}, got {rxn}' + + +@pytest.mark.parametrize( + 'test_id,rxn_name,rxn_smi,expected_count', + _three_component, + ids=[x[0] for x in _three_component] +) +def test_three_component(test_id, rxn_name, rxn_smi, expected_count): + r = smiles(rxn_smi) + r.canonicalize() + r1, r2, r3 = r.reactants + + results = list(r1 @ [r2, r3]) + + names = set(n for n, _ in results) + assert names == {rxn_name}, f'expected {rxn_name}, got {sorted(names)}' + + assert len(results) == expected_count, f'expected {expected_count}, got {len(results)}' + + for _, rxn in results: + assert rxn == r, f'expected {r}, got {rxn}' + + +@pytest.mark.parametrize( + 'test_id,rxn_name,rxn_smi,expected_count', + _four_component, + ids=[x[0] for x in _four_component] +) +def test_four_component(test_id, rxn_name, rxn_smi, expected_count): + r = smiles(rxn_smi) + r.canonicalize() + r1, r2, r3, r4 = r.reactants + + results = list(r1 @ [r2, r3, r4]) + + names = set(n for n, _ in results) + assert names == {rxn_name}, f'expected {rxn_name}, got {sorted(names)}' + + assert len(results) == expected_count, f'expected {expected_count}, got {len(results)}' + + for _, rxn in results: + assert rxn == r, f'expected {r}, got {rxn}' + + +@pytest.mark.parametrize( + 'args', + _oxidations, + ids=[x[0] for x in _oxidations] +) +def test_oxidation(args): + test_id, rxn_name, rxn_smi, expected_count = args[:4] + exclude = args[4] if len(args) > 4 else set() + + r = smiles(rxn_smi) + r.canonicalize() + mol = r.reactants[0] + + results = [(n, rxn) for n, rxn in mol.oxidize() if n not in exclude] + + names = set(n for n, _ in results) + assert names == {rxn_name}, f'expected {rxn_name}, got {sorted(names)}' + + assert len(results) == expected_count, f'expected {expected_count}, got {len(results)}' + + for _, rxn in results: + assert rxn == r, f'expected {r}, got {rxn}' + + +@pytest.mark.parametrize( + 'args', + _reductions, + ids=[x[0] for x in _reductions] +) +def test_reduction(args): + test_id, rxn_name, rxn_smi, expected_count = args[:4] + exclude = args[4] if len(args) > 4 else set() + + r = smiles(rxn_smi) + r.canonicalize() + mol = r.reactants[0] + + results = [(n, rxn) for n, rxn in mol.reduce() if n not in exclude] + + names = set(n for n, _ in results) + assert names == {rxn_name}, f'expected {rxn_name}, got {sorted(names)}' + + assert len(results) == expected_count, f'expected {expected_count}, got {len(results)}' + + for _, rxn in results: + assert rxn == r, f'expected {r}, got {rxn}' + + +@pytest.mark.parametrize( + 'args', + _transformations, + ids=[x[0] for x in _transformations] +) +def test_transformation(args): + test_id, rxn_name, rxn_smi, expected_count = args[:4] + exclude = args[4] if len(args) > 4 else set() + + r = smiles(rxn_smi) + r.canonicalize() + mol = r.reactants[0] + + results = [(n, rxn) for n, rxn in mol.transform() if n not in exclude] + + names = set(n for n, _ in results) + assert names == {rxn_name}, f'expected {rxn_name}, got {sorted(names)}' + + assert len(results) == expected_count, f'expected {expected_count}, got {len(results)}' + + for _, rxn in results: + assert rxn == r, f'expected {r}, got {rxn}' diff --git a/chython/algorithms/isomorphism.py b/chython/algorithms/isomorphism.py index 914a68d9..e5a38137 100644 --- a/chython/algorithms/isomorphism.py +++ b/chython/algorithms/isomorphism.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- # -# Copyright 2018-2025 Ramil Nugmanov +# Copyright 2018-2026 Ramil Nugmanov # This file is part of chython. # # chython is free software; you can redistribute it and/or modify @@ -22,16 +22,12 @@ from io import BytesIO from itertools import permutations from struct import Struct -from typing import Any, Collection, Dict, Iterator, Optional, TYPE_CHECKING, Union +from collections.abc import Collection, Iterator +from typing import Any from .._functions import lazy_product from ..periodictable import Element, Query, AnyElement, AnyMetal, ListElement, QueryElement, ExtendedQuery -if TYPE_CHECKING: - from chython.containers.graph import Graph - from chython.containers import MoleculeContainer - - header_struct = Struct('I') m_atom_struct = Struct('QQQQIII') q_atom_struct = Struct('QQQQIIIII') @@ -80,7 +76,7 @@ def is_equal(self, other, /) -> bool: return True def _get_mapping(self, other: 'MoleculeContainer', /, *, automorphism_filter=True, searching_scope=None, - components=None, get_mapping=None) -> Iterator[Dict[int, int]]: + components=None, get_mapping=None) -> Iterator[dict[int, int]]: if components is None: # ad-hoc for QueryContainer components, closures = self._compiled_query get_mapping = partial(_get_mapping, query_closures=closures, o_atoms=other._atoms, o_bonds=other._bonds) @@ -124,14 +120,14 @@ def _get_mapping(self, other: 'MoleculeContainer', /, *, automorphism_filter=Tru yield mapping @cached_property - def _compiled_query(self: 'Graph'): + def _compiled_query(self): return _compile_query(self._atoms, self._bonds) class MoleculeIsomorphism(Isomorphism): __slots__ = () - def __contains__(self: 'MoleculeContainer', other: Union[Element, Query, str]): + def __contains__(self, other: Element | Query | str): """ Atom in Structure test. """ @@ -139,7 +135,7 @@ def __contains__(self: 'MoleculeContainer', other: Union[Element, Query, str]): return any(other == a.atomic_symbol for _, a in self.atoms()) return any(other == a for _, a in self.atoms()) - def is_automorphic(self): + def is_automorphic(self) -> bool: """ Test for automorphism symmetry of graph. """ @@ -149,14 +145,15 @@ def is_automorphic(self): return False return True - def get_automorphism_mapping(self: 'MoleculeContainer') -> Iterator[Dict[int, int]]: + def get_automorphism_mapping(self) -> Iterator[dict[int, int]]: """ Iterator of all possible automorphism mappings. """ return _get_automorphism_mapping(self._chiral_morgan, self._bonds) def get_mapping(self, other: 'MoleculeContainer', /, *, automorphism_filter: bool = True, - searching_scope: Optional[Collection[int]] = None, match_stereo: bool = False): + searching_scope: Collection[int] | None = None, + match_stereo: bool = False) -> Iterator[dict[int, int]]: """ Get self to other Molecule substructure mapping generator. @@ -182,7 +179,7 @@ def get_mapping(self, other: 'MoleculeContainer', /, *, automorphism_filter: boo else: yield mapping - def get_fast_mapping(self, other: 'MoleculeContainer') -> Optional[Dict[int, int]]: + def get_fast_mapping(self, other: 'MoleculeContainer') -> dict[int, int] | None: """ Get self to other fast (suboptimal) structure mapping. Only one possible atoms mapping returned. @@ -199,7 +196,7 @@ def get_fast_mapping(self, other: 'MoleculeContainer') -> Optional[Dict[int, int raise TypeError('MoleculeContainer expected') @cached_property - def _cython_compiled_structure(self: 'MoleculeContainer'): + def _cython_compiled_structure(self): # long I: # bond: single, double, triple, aromatic, special = 5 bit # bond in ring: 2 bit @@ -311,7 +308,7 @@ class QueryIsomorphism(Isomorphism): __slots__ = () def get_mapping(self, other: 'MoleculeContainer', /, *, automorphism_filter: bool = True, - searching_scope: Optional[Collection[int]] = None, _cython=True): + searching_scope: Collection[int] | None = None, _cython=True) -> Iterator[dict[int, int]]: """ Get Query to Molecule substructure mapping generator. @@ -574,7 +571,7 @@ def _cython_compiled_query(self): return components -def _get_automorphism_mapping(atoms: Dict[int, int], bonds: Dict[int, Dict[int, Any]]) -> Iterator[Dict[int, int]]: +def _get_automorphism_mapping(atoms: dict[int, int], bonds: dict[int, dict[int, Any]]) -> Iterator[dict[int, int]]: if len(atoms) == len(set(atoms.values())): return # all atoms unique diff --git a/chython/algorithms/mapping/__init__.py b/chython/algorithms/mapping/__init__.py index 12696794..98ae2223 100644 --- a/chython/algorithms/mapping/__init__.py +++ b/chython/algorithms/mapping/__init__.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- # -# Copyright 2022 Ramil Nugmanov +# Copyright 2022-2026 Ramil Nugmanov # This file is part of chython. # # chython is free software; you can redistribute it and/or modify @@ -16,13 +16,173 @@ # You should have received a copy of the GNU Lesser General Public License # along with this program; if not, see . # +from collections import ChainMap +from itertools import count , chain, repeat from .attention import Attention -from .fixmapper import FixMapper -from .groups import GroupsFix +from .reconstruct import Reconstruct +from ._groups import xonyl_groups, substituents_groups +from ._reactions import rules -class Mapping(GroupsFix, Attention, FixMapper): +class Mapping(Reconstruct, Attention): __slots__ = () + def reset_mapping(self) -> bool: + """ + Reset atom-to-atom mapping by remapping atoms to unique numbers. + """ + r = [n for m in chain(self.reactants, self.reagents) for n in m._atoms] + p = [n for m in self.products for n in m._atoms] + c = count(1) + if len(r) != len(set(r)): + for m in chain(self.reactants, self.reagents): + m.remap({n: next(c) for n in m._atoms}) + if len(p) != len(set(p)): + for m in self.products: + m.remap({n: next(c) for n in m._atoms}) + if next(c) != 1: + self.flush_cache() + return True + return False + + def fix_mapping(self, *, logging: bool = False) -> bool | list: + """ + Fix mapping of plipped functional groups and common mechanism errors. + """ + if not self: + if logging: + return [] + return False + log = self.__fix_groups() + log.extend(self.__fix_cgr()) + if logging: + return log + return bool(log) + + def __fix_cgr(self): + cgr = ~self + if not cgr.center_atoms: + return [] + + log = [] + free_number = count(max(cgr) + 1) + components = [(cgr.substructure(c), + cgr.augmented_substructure(c, 2), # deep DEPENDS on rules! + c) + for c in cgr.substructure(cgr.center_atoms).connected_components] + + r_atoms = ChainMap(*(x._atoms for x in self.reactants)) + for c, ac, cs in components: + for rule_num, (query, signature, restrict, fix) in enumerate(rules): + if str(c) == signature: + for mapping in query.get_mapping(ac, automorphism_filter=False): + if not cs.issubset(mapping.values()): + continue + if restrict is not None and any(a != r_atoms.get(mapping[n]) for n, a in restrict.atoms()): + continue + mapping = {mapping[n]: next(free_number) if m is None else mapping[m] for n, m in fix.items()} + for m in self.products: + m.remap(mapping) + log.append((rule_num, signature, tuple(mapping.values()))) + break + else: + continue + break # component remapped! + + if log: + self.flush_cache() + return log + + def __fix_groups(self): + log = [] + seen = set() + remap = {} + pamer = {} + r_groups = set() + p_groups = set() + r_subs = set() + p_subs = set() + + # find xonyl groups. any charged-neutral combinations + for pattern in xonyl_groups: + for m, g in chain(zip(self.reactants, repeat(r_groups)), zip(self.products, repeat(p_groups))): + atoms = m._atoms + for mapping in pattern.get_mapping(m, automorphism_filter=False): + n1, n2, n3 = mapping[1], mapping[2], mapping[3] + if (t := atoms[n2].atomic_number) == atoms[n3].atomic_number: + g.add((n1, n2, n3, atoms[n1].atomic_number, t)) + + for pattern, _map in substituents_groups: + for m, g in chain(zip(self.reactants, repeat(r_subs)), zip(self.products, repeat(p_subs))): + atoms = m._atoms + for mapping in pattern.get_mapping(m, automorphism_filter=False): + g.add((n := mapping[1], atoms[n].atomic_number, + tuple((n := mapping[x], y - 2, atoms[n].atomic_number) for x, y in _map), m)) + + r_groups = list(r_groups) + p_groups = list(p_groups) + + # find pairs + if r_groups and p_groups: + for n1, n2, n3, x1, x2 in r_groups: + if n1 in seen: # already remapped + continue + for i, (m1, m2, m3, y1, y2) in enumerate(p_groups): + if m1 not in seen and n1 == m1 and x1 == y1 and x2 == y2: # found pair + if n2 == m3 and n3 == m2: # found switch + remap[m2] = m3 + remap[m3] = m2 + seen.add(n1) + break + else: + continue + del p_groups[i] + + if not p_groups: # optimize + r_subs.clear() + + # hydrolysis, etc. + for (n1, x1, _map, m), g, r in chain(zip(r_subs, repeat(p_groups), repeat(remap)), + zip(p_subs, repeat(r_groups), repeat(pamer))): + if n1 in seen: + continue + for i, (m1, *m23, y1, y2) in enumerate(g): + if m1 not in seen and n1 == m1 and x1 == y1: # found center + if len(_map) == 1: # acids substitutions. + ni, _, xi = _map[0] + # second neighbor should be disconnected from central atom. + if xi == y2 and m23[0] == ni and (m23[1] not in m._atoms or m23[1] not in m._bonds[n1]): + m2, m3 = m23 + r[m2] = m3 + r[m3] = m2 + seen.add(n1) + break + elif all(xi == y2 and m23[mi] == ni for ni, mi, xi in _map): + m2, m3 = m23 + r[m2] = m3 + r[m3] = m2 + seen.add(n1) + break + else: + continue + del g[i] + + if remap: + seen = set(remap) + for m in self.products: + if not seen.isdisjoint(m): + m.remap(remap) + log.append(('products groups remapped', tuple(remap))) + if pamer: + seen = set(pamer) + for m in self.reactants: + if not seen.isdisjoint(m): + m.remap(pamer) + log.append(('reactants groups remapped', tuple(pamer))) + + if log: + self.flush_cache() + return log + __all__ = ['Mapping'] diff --git a/chython/algorithms/mapping/_groups.py b/chython/algorithms/mapping/_groups.py index ab034257..102ece8e 100644 --- a/chython/algorithms/mapping/_groups.py +++ b/chython/algorithms/mapping/_groups.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- # -# Copyright 2021-2023 Ramil Nugmanov +# Copyright 2021-2026 Ramil Nugmanov # This file is part of chython. # # chython is free software; you can redistribute it and/or modify diff --git a/chython/algorithms/mapping/_reactions.py b/chython/algorithms/mapping/_reactions.py index 2e58cbf2..1e6c0a62 100644 --- a/chython/algorithms/mapping/_reactions.py +++ b/chython/algorithms/mapping/_reactions.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- # -# Copyright 2022 Ramil Nugmanov +# Copyright 2022-2026 Ramil Nugmanov # This file is part of chython. # # chython is free software; you can redistribute it and/or modify diff --git a/chython/algorithms/mapping/attention.py b/chython/algorithms/mapping/attention.py index bc4e870a..811edb4b 100644 --- a/chython/algorithms/mapping/attention.py +++ b/chython/algorithms/mapping/attention.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- # -# Copyright 2022-2024 Ramil Nugmanov +# Copyright 2022-2026 Ramil Nugmanov # Copyright 2024 Philippe Gantzer # This file is part of chython. # @@ -17,78 +17,225 @@ # You should have received a copy of the GNU Lesser General Public License # along with this program; if not, see . # -from CachedMethods import class_cached_property -from itertools import chain, count, repeat +from functools import cache +from itertools import repeat from logging import getLogger, INFO -from numpy import ix_, unravel_index, argmax, zeros, array, isclose, nonzero, ones, mean -from typing import TYPE_CHECKING, Union - - -if TYPE_CHECKING: - from chython import ReactionContainer +from numpy import array, argmax, clip, concatenate, int32, int64, isclose, ix_, mean, nan_to_num +from numpy import nonzero, ones, unravel_index, zeros +from scipy.sparse.csgraph import shortest_path logger = getLogger('chython.attention') logger.setLevel(INFO) +_max_distance = 10 +_max_neighbors = 14 + + +@cache +def _get_session(): + import onnxruntime as ort + from os import cpu_count + + try: + from chython_rxnmap import model_path + except ImportError: + raise ImportError('chython-rxnmap package is required for attention mapping. ' + 'Install it with: pip install chython-rxnmap') + + opts = ort.SessionOptions() + opts.inter_op_num_threads = 1 + opts.intra_op_num_threads = min(cpu_count() or 4, 8) + opts.graph_optimization_level = ort.GraphOptimizationLevel.ORT_ENABLE_ALL + return ort.InferenceSession(model_path, opts, providers=['CPUExecutionProvider']) + + +def _run_model(reactants, products, p2r, r2p, equal_atoms): + """Run ONNX model on reaction, return symmetrized attention filtered by atom type.""" + atoms, neighbors, distances, roles = _encode_reaction(reactants, products) + am = _get_session().run(None, { + 'atoms': atoms[None].astype(int64), + 'neighbors': neighbors[None].astype(int64), + 'distances': distances[None].astype(int64), + 'roles': roles[None].astype(int64), + })[0] + return (am[p2r] + am[r2p].T) * equal_atoms + + +def _encode_reaction(reactants, products): + """Encode reaction into model input tensors (atoms, neighbors, distances, roles).""" + atoms_list = [array([0], dtype=int32)] # rxn_cls + neighbors_list = [array([0], dtype=int32)] + roles_list = [1] # rxn_cls role + distances_list = [] + + for mol in reactants: + a, n, d = _encode_molecule(mol) + atoms_list.append(a) + neighbors_list.append(n) + distances_list.append(d) + roles_list.append(0) # mol_cls (hidden) + roles_list.extend(repeat(2, len(mol))) + + for mol in products: + a, n, d = _encode_molecule(mol) + atoms_list.append(a) + neighbors_list.append(n) + distances_list.append(d) + roles_list.append(0) + roles_list.extend(repeat(3, len(mol))) + + atoms = concatenate(atoms_list) + neighbors = concatenate(neighbors_list) + roles = array(roles_list, dtype=int32) + + total = len(roles) + distances = zeros((total, total), dtype=int32) + distances[0, 0] = 1 # rxn_cls self-loop + i = 1 + for d in distances_list: + j = i + d.shape[0] + distances[i:j, i:j] = d + i = j + + return atoms, neighbors, distances, roles + + +def _encode_molecule(mol): + """Encode molecule into (atoms, neighbors, distances) tensors. + + Encoding: + atoms: 0=padding, 1=mol_cls, 2+=atomic_number+2 + neighbors: 0=cls, 2+=neighbor_count+2 + distances: 0=padding, 1=unreachable/cross-component, 2+=shortest_path+2 + """ + n_atoms = len(mol) + size = n_atoms + 1 # +1 for mol_cls + + atoms = zeros(size, dtype=int32) + neighbors = zeros(size, dtype=int32) + atoms[0] = 1 # mol_cls + + bonds = mol._bonds + for i, (n, a) in enumerate(mol.atoms(), 1): + atoms[i] = a.atomic_number + 2 + nb = len(bonds[n]) + (a.implicit_hydrogens or 0) + if nb > _max_neighbors: + nb = _max_neighbors + neighbors[i] = nb + 2 + + adj = mol.adjacency_matrix() + dist = shortest_path(adj, method='FW', directed=False, unweighted=True) + nan_to_num(dist, copy=False, posinf=1.0) # unreachable -> 1 (cross-component attention) + clip(dist, None, _max_distance, out=dist) + dist = (dist + 2).astype(int32) + + distances = ones((size, size), dtype=int32) # mol_cls distance=1 to all atoms + distances[1:, 1:] = dist + return atoms, neighbors, distances + + +def _prepare_masks(reactants, products, reagents): + """Build atom maps, adjacency matrices, element-equality mask, and token-level index slices.""" + r_map = [n for m in reactants for n in m] + p_map = [n for m in products for n in m] + rg_map = [n for m in reagents for n in m] + ra = len(r_map) + pa = len(p_map) + + # token layout: [rxn_cls, mol_cls_1, atoms_1, ..., mol_cls_n, atoms_n, ...] + ram = [False] # rxn_cls + r_atoms = [] + r_adj = zeros((ra, ra), dtype=bool) + i = 0 + for m in reactants: + ram.append(False) # mol_cls + ram.extend(repeat(True, len(m))) + j = i + len(m) + r_adj[i:j, i:j] = m.adjacency_matrix() + i = j + r_atoms.extend(a.atomic_number for _, a in m.atoms()) + + pam = [False] * len(ram) + p_atoms = [] + p_adj = zeros((pa, pa), dtype=bool) + i = 0 + for m in products: + pam.append(False) + pam.extend(repeat(True, len(m))) + j = i + len(m) + p_adj[i:j, i:j] = m.adjacency_matrix() + i = j + p_atoms.extend(a.atomic_number for _, a in m.atoms()) + + ram.extend(repeat(False, len(pam) - len(ram))) + ram = array(ram, dtype=bool) + pam = array(pam, dtype=bool) + r_atoms = array(r_atoms, dtype=int) + p_atoms = array(p_atoms, dtype=int) + + equal_atoms = p_atoms[:, None] == r_atoms + return r_map, p_map, rg_map, equal_atoms, ix_(pam, ram), ix_(ram, pam), r_adj, p_adj + + +def _greedy_mapping(am, r_map, p_map, r_adj, p_adj, multiplier): + """Greedy attention-based product-to-reactant atom assignment.""" + amc = am.copy() + pa = len(p_map) + mapping = {} + scope = zeros(pa, dtype=bool) + seen = ones(pa, dtype=bool) + score = [] + + for x in range(pa): + if not x: + i, j = unravel_index(argmax(am), am.shape) + else: + ams = am[scope] + if ams.size: + i, j = unravel_index(argmax(ams), ams.shape) + i = nonzero(scope)[0][i] + else: + i, j = unravel_index(argmax(am), am.shape) + + if isclose(am[i, j], 0.): + for n in set(p_map).difference(mapping): + mapping[n] = 0 + break + + score.append(amc[i, j]) + mapping[p_map[i]] = r_map[j] + am[ix_(p_adj[i], r_adj[j])] *= multiplier + am[i] = am[:, j] = 0 + seen[i] = False + scope[i] = False + scope[p_adj[i] & seen] = True + + return mapping, float(mean(score)) if score else 0. + class Attention: __slots__ = () - __class_cache__ = {} - - def reset_mapping(self: Union['ReactionContainer', 'Attention'], *, return_score: bool = False, multiplier=1.75, - keep_reactants_numbering=False) -> Union[bool, float]: - """ - Do atom-to-atom mapping. Return True if mapping changed. - """ - if any(len(bs) > 14 for m in self.molecules() for bs in m._bonds.values()): + + def attention_mapping(self, *, return_score: bool = False, multiplier=1.75, + keep_reactants_numbering=False) -> bool | float: + """Do atom-to-atom mapping. Return True if mapping changed.""" + if any(len(bs) > _max_neighbors for m in self.molecules() for bs in m._bonds.values()): logger.info('atom-to-atom mapping not supported for hypervalent compounds') return False - fixed = self.__fix_collisions() - equal_atoms, p2r, r2p, r_adj, p_adj, r_map, p_map, pa, rg_map = self.__prepare_remapping() - - # rxnmapper-inspired algorithm - am = self.__get_attention() - # sum of reactants to products attention and vice-versa for equal atom types only - am = (am[p2r] + am[r2p].T) * equal_atoms - amc = am.copy() - - mapping = {} - scope = zeros(pa, dtype=bool) - seen = ones(pa, dtype=bool) - score = [] - for x in range(pa): # iteratively map each product atom to reactant - # select highest attention - # todo: optimize - if not x: - i, j = unravel_index(argmax(am), am.shape) - else: - ams = am[scope] - if ams.size: - i, j = unravel_index(argmax(ams), ams.shape) - i = nonzero(scope)[0][i] - else: - i, j = unravel_index(argmax(am), am.shape) - if isclose(am[i, j], 0.): # no more products atoms in reactants - # mark as unmapped - for n in set(p_map).difference(mapping): - mapping[n] = 0 - break - else: - score.append(amc[i, j]) - mapping[p_map[i]] = r_map[j] - am[ix_(p_adj[i], r_adj[j])] *= multiplier # highlight neighbors - am[i] = am[:, j] = 0 # mask mapped product and reactant atoms - seen[i] = False - scope[i] = False - scope[p_adj[i] & seen] = True - - score = float(mean(score)) if score else 0. - # mapping done. - if any(n != m for n, m in mapping.items()): # old mapping changed + + fixed = self.reset_mapping() + r_map, p_map, rg_map, equal_atoms, p2r, r2p, r_adj, p_adj = _prepare_masks( + self.reactants, self.products, self.reagents + ) + + am = _run_model(self.reactants, self.products, p2r, r2p, equal_atoms) + mapping, score = _greedy_mapping(am, r_map, p_map, r_adj, p_adj, multiplier) + + if any(n != m for n, m in mapping.items()): if keep_reactants_numbering or fixed: r_mapping = {n: n for n in r_map} else: - r_mapping = {m: n for n, m in enumerate(r_map, 1)} # remap reactants to contiguous range + r_mapping = {m: n for n, m in enumerate(r_map, 1)} for m in self.reactants: m.remap(r_mapping) @@ -97,120 +244,23 @@ def reset_mapping(self: Union['ReactionContainer', 'Attention'], *, return_score for n, m in mapping.items(): if m := r_mapping.get(m): p_mapping[n] = m - else: # not found in reactants atoms. set unique numbers. + else: nm += 1 p_mapping[n] = nm - for m in self.products: m.remap(p_mapping) if not keep_reactants_numbering and not fixed: - rg_mapping = {m: n for n, m in enumerate(rg_map, nm+1)} # remap reagents to contiguous range without overlapping + rg_mapping = {m: n for n, m in enumerate(rg_map, nm + 1)} for m in self.reagents: m.remap(rg_mapping) self.flush_cache() fixed = True - if self.fix_groups_mapping(): # fix carboxy etc - fixed = True - if self.fix_mapping(): # fix common mistakes in mechanisms + if self.fix_mapping(): fixed = True - if return_score: - return score - return fixed - - def __fix_collisions(self: 'ReactionContainer'): - r = [n for m in chain(self.reactants, self.reagents) for n in m._atoms] - p = [n for m in self.products for n in m._atoms] - c = count(1) - if len(r) != len(set(r)): - for m in chain(self.reactants, self.reagents): - m.remap({n: next(c) for n in m._atoms}) - if len(p) != len(set(p)): - for m in self.products: - m.remap({n: next(c) for n in m._atoms}) - if next(c) != 1: - self.flush_cache() - return True - return False - - def __prepare_remapping(self: 'ReactionContainer'): - r_map = [n for m in self.reactants for n in m] - p_map = [n for m in self.products for n in m] - rg_map = [n for m in self.reagents for n in m] - ra = len(r_map) # number of reactants atoms - pa = len(p_map) # number of products atoms - - ram = [False] # reactants atoms mask - r_atoms = [] - r_adj = zeros((ra, ra), dtype=bool) - i = 0 - for m in self.reactants: - ram.append(False) - ram.extend(repeat(True, len(m))) - a = m.adjacency_matrix() - j = i + len(m) - r_adj[i:j, i:j] = a - i = j - r_atoms.extend(a.atomic_number for _, a in m.atoms()) - r_atoms = array(r_atoms, dtype=int) - - pam = [False] * len(ram) # products atoms mask - p_atoms = [] - p_adj = zeros((pa, pa), dtype=bool) - i = 0 - for m in self.products: - pam.append(False) - pam.extend(repeat(True, len(m))) - a = m.adjacency_matrix() - j = i + len(m) - p_adj[i:j, i:j] = a - i = j - p_atoms.extend(a.atomic_number for _, a in m.atoms()) - p_atoms = array(p_atoms, dtype=int) - - ram.extend(repeat(False, len(pam) - len(ram))) - ram = array(ram, dtype=bool) - pam = array(pam, dtype=bool) - return p_atoms[:, None] == r_atoms, ix_(pam, ram), ix_(ram, pam), r_adj, p_adj, r_map, p_map, pa, rg_map - - @class_cached_property - def __attention_model(self): - from chython import torch_device - from chytorch.zoo.rxnmap import Model - - return Model().to(torch_device) - - @class_cached_property - def __autocast(self): - from chython import torch_device - - if torch_device.startswith('cuda'): - try: - from torch import autocast - except ImportError: # torch 1.8 ad-hoc - from torch.cuda.amp import autocast - - return autocast() - else: - return autocast('cuda') - return autocast_filler() - - def __get_attention(self): - from torch import no_grad - - with no_grad(), self.__autocast: - am = self.__attention_model(self).float().cpu().numpy() - return am - - -class autocast_filler: - def __enter__(self): - return self - - def __exit__(self, exc_type, exc_val, exc_tb): - ... + return score if return_score else fixed __all__ = ['Attention'] diff --git a/chython/algorithms/mapping/fixmapper.py b/chython/algorithms/mapping/fixmapper.py deleted file mode 100644 index 251eea95..00000000 --- a/chython/algorithms/mapping/fixmapper.py +++ /dev/null @@ -1,84 +0,0 @@ -# -*- coding: utf-8 -*- -# -# Copyright 2022-2024 Ramil Nugmanov -# This file is part of chython. -# -# chython is free software; you can redistribute it and/or modify -# it under the terms of the GNU Lesser General Public License as published by -# the Free Software Foundation; either version 3 of the License, or -# (at your option) any later version. -# -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU Lesser General Public License for more details. -# -# You should have received a copy of the GNU Lesser General Public License -# along with this program; if not, see . -# -from collections import ChainMap -from itertools import count -from typing import List, Tuple, TYPE_CHECKING, Union -from ._reactions import rules - - -if TYPE_CHECKING: - from chython import ReactionContainer - - -class FixMapper: - __slots__ = () - - def fix_mapping(self: 'ReactionContainer', *, logging: bool = False) -> \ - Union[bool, List[Tuple[int, str, Tuple[int, ...]]]]: - """ - Fix mapping by using loaded rules. - """ - if not self: - if logging: - return [] - return False - - cgr = ~self - if not cgr.center_atoms: - if logging: - return [] - return False - del self.__dict__['__cached_method_compose'] - - log = [] - free_number = count(max(cgr) + 1) - components = [(cgr.substructure(c), - cgr.augmented_substructure(c, 2), # deep DEPENDS on rules! - c) - for c in cgr.substructure(cgr.center_atoms).connected_components] - - r_atoms = ChainMap(*(x._atoms for x in self.reactants)) - for c, ac, cs in components: - for rule_num, (query, signature, restrict, fix) in enumerate(rules): - if str(c) == signature: - for mapping in query.get_mapping(ac, automorphism_filter=False): - if not cs.issubset(mapping.values()): - continue - if restrict is not None and any(a != r_atoms.get(mapping[n]) for n, a in restrict.atoms()): - continue - mapping = {mapping[n]: next(free_number) if m is None else mapping[m] for n, m in fix.items()} - for m in self.products: - m.remap(mapping) - log.append((rule_num, signature, tuple(mapping.values()))) - break - else: - continue - break # component remapped! - - if log: - self.flush_cache() - if logging: - return log - return True - elif logging: - return log - return False - - -__all__ = ['FixMapper'] diff --git a/chython/algorithms/mapping/groups.py b/chython/algorithms/mapping/groups.py deleted file mode 100644 index 14d40a69..00000000 --- a/chython/algorithms/mapping/groups.py +++ /dev/null @@ -1,137 +0,0 @@ -# -*- coding: utf-8 -*- -# -# Copyright 2022 Ramil Nugmanov -# This file is part of chython. -# -# chython is free software; you can redistribute it and/or modify -# it under the terms of the GNU Lesser General Public License as published by -# the Free Software Foundation; either version 3 of the License, or -# (at your option) any later version. -# -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU Lesser General Public License for more details. -# -# You should have received a copy of the GNU Lesser General Public License -# along with this program; if not, see . -# -from itertools import chain, repeat -from typing import List, Tuple, TYPE_CHECKING, Union -from ._groups import * - - -if TYPE_CHECKING: - from chython import ReactionContainer - - -class GroupsFix: - __slots__ = () - - def fix_groups_mapping(self: 'ReactionContainer', *, logging: bool = False) -> \ - Union[bool, List[Tuple[str, Tuple[int, ...]]]]: - """ - Fix atom-to-atom mapping of some functional groups. Return True if found AAM errors. - """ - if not self: - if logging: - return [] - return False - - log = [] - seen = set() - remap = {} - pamer = {} - r_groups = set() - p_groups = set() - r_subs = set() - p_subs = set() - - # find xonyl groups. any charged-neutral combinations - for pattern in xonyl_groups: - for m, g in chain(zip(self.reactants, repeat(r_groups)), zip(self.products, repeat(p_groups))): - atoms = m._atoms - for mapping in pattern.get_mapping(m, automorphism_filter=False): - n1, n2, n3 = mapping[1], mapping[2], mapping[3] - if (t := atoms[n2].atomic_number) == atoms[n3].atomic_number: - g.add((n1, n2, n3, atoms[n1].atomic_number, t)) - - for pattern, _map in substituents_groups: - for m, g in chain(zip(self.reactants, repeat(r_subs)), zip(self.products, repeat(p_subs))): - atoms = m._atoms - for mapping in pattern.get_mapping(m, automorphism_filter=False): - g.add((n := mapping[1], atoms[n].atomic_number, - tuple((n := mapping[x], y - 2, atoms[n].atomic_number) for x, y in _map), m)) - - r_groups = list(r_groups) - p_groups = list(p_groups) - - # find pairs - if r_groups and p_groups: - for n1, n2, n3, x1, x2 in r_groups: - if n1 in seen: # already remapped - continue - for i, (m1, m2, m3, y1, y2) in enumerate(p_groups): - if m1 not in seen and n1 == m1 and x1 == y1 and x2 == y2: # found pair - if n2 == m3 and n3 == m2: # found switch - remap[m2] = m3 - remap[m3] = m2 - seen.add(n1) - break - else: - continue - del p_groups[i] - - if not p_groups: # optimize - r_subs.clear() - - # hydrolysis, etc. - for (n1, x1, _map, m), g, r in chain(zip(r_subs, repeat(p_groups), repeat(remap)), - zip(p_subs, repeat(r_groups), repeat(pamer))): - if n1 in seen: - continue - for i, (m1, *m23, y1, y2) in enumerate(g): - if m1 not in seen and n1 == m1 and x1 == y1: # found center - if len(_map) == 1: # acids substitutions. - ni, _, xi = _map[0] - # second neighbor should be disconnected from central atom. - if xi == y2 and m23[0] == ni and (m23[1] not in m._atoms or m23[1] not in m._bonds[n1]): - m2, m3 = m23 - r[m2] = m3 - r[m3] = m2 - seen.add(n1) - break - elif all(xi == y2 and m23[mi] == ni for ni, mi, xi in _map): - m2, m3 = m23 - r[m2] = m3 - r[m3] = m2 - seen.add(n1) - break - else: - continue - del g[i] - - if remap: - seen = set(remap) - for m in self.products: - if not seen.isdisjoint(m): - m.remap(remap) - log.append(('products groups remapped', tuple(remap))) - if pamer: - seen = set(pamer) - for m in self.reactants: - if not seen.isdisjoint(m): - m.remap(pamer) - log.append(('reactants groups remapped', tuple(pamer))) - - if log: - self.flush_cache() - if logging: - return log - return True - elif logging: - return [] - return False - - -__all__ = ['GroupsFix'] diff --git a/chython/algorithms/mapping/reconstruct.py b/chython/algorithms/mapping/reconstruct.py new file mode 100644 index 00000000..c19c909b --- /dev/null +++ b/chython/algorithms/mapping/reconstruct.py @@ -0,0 +1,375 @@ +# -*- coding: utf-8 -*- +# +# Copyright 2026 Ramil Nugmanov +# This file is part of chython. +# +# chython is free software; you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation; either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public License +# along with this program; if not, see . +# +from itertools import combinations, permutations +from ..groups._oxidations import rules as oxidation_rules +from ..groups._reactions import rules as reaction_rules +from ..groups._reductions import rules as reduction_rules +from ..groups._transformations import rules as transformation_rules + + +class Reconstruct: + __slots__ = () + + def reconstruct_mapping(self) -> list[str]: + """ + Annotate reaction by trying to reconstruct the product from reactants + using predefined reaction templates. + + Tries in order: + 1. Standalone deprotection + 2. Standalone protection (reverse) + 3. Single-molecule transforms (oxidize/reduce/transform) + 4. Deprotection + transform composition + 5. Multi-component reactions (subset-based) + + Returns list of matched reaction/deprotection names. + Empty list if no match found. + If found, updates atom-to-atom mapping. + """ + assert self.reactants, 'No reactants in reaction' + assert len(self.products) == 1, 'Only single product reactions supported' + self.reset_mapping() + + # Prepare raw copies (no NH patch) for FG detection and reactor input + reactants = [_prepare(m) for m in self.reactants] + + # Prepare product: raw for FG detection, patched for comparison + product = _prepare(self.products[0]) + product_patched = product.copy(keep_sssr=True, keep_components=True) + _patch_nh(product_patched) + pfgs = product.functional_groups + product_size = len(product) + + # Iterate over candidate subsets by atom count + for subset_indices in _candidate_subsets(reactants, product_size): + if len(subset_indices) == 1: + r = reactants[subset_indices[0]] + result = _try_single(r, product_patched, pfgs) + else: + subset = [reactants[i] for i in subset_indices] + result = _try_multi(subset, product_patched) + if result: + mapping, labels = result + _safe_remap(self.products[0], mapping) + return labels + + return [] + + +def _safe_remap(mol, mapping): + """Remap mol atoms using mapping, handling potential overlaps via temp numbers.""" + try: + mol.remap(mapping) + except ValueError: + # Overlap: remap all atoms to safe temp space first + existing = list(mol._atoms) + targets = set(mapping.values()) + base = max(max(existing), max(targets), max(mapping)) + 1 + # Step 1: all atoms to temp + temp = {n: base + i for i, n in enumerate(existing)} + mol.remap(temp) + # Step 2: temp to final (mapped atoms get target, unmapped get unique new numbers) + used = set(mapping.values()) + counter = base + len(existing) + final = {} + for i, n in enumerate(existing): + if n in mapping: + final[base + i] = mapping[n] + else: + # Pick a number not in use + while counter in used: + counter += 1 + final[base + i] = counter + used.add(counter) + counter += 1 + mol.remap(final) + + +def _prepare(mol): + """Clean copy for FG detection and reactor input. NO NH patching.""" + c = mol.copy(keep_sssr=True, keep_components=True) + c.clean_stereo() + c.clean_isotopes() + return c + + +def _patch_nh(mol): + """Set implicit_hydrogens=None on aromatic N with h=1 for tautomeric equivalence.""" + patched = False + for _, a in mol.atoms(): + if a.atomic_symbol == 'N' and a.hybridization == 4 and a.implicit_hydrogens == 1: + a._implicit_hydrogens = None + patched = True + if patched: + mol.flush_cache() + + +def _match(generated, product_patched): + """Patch NH on generated (reactor output), then compare to pre-patched product.""" + _patch_nh(generated) + return product_patched.get_fast_mapping(generated) + + +def _candidate_subsets(reactants, product_size): + """ + Yield reactant index tuples ordered by likelihood of match. + + 1. Singles (sorted by atom count descending, threshold: size >= product_size * 0.5) + 2. Pairs (sorted by combined size descending, threshold: combined >= product_size * 0.7) + 3. Triples+ (sorted by combined size descending) + """ + sizes = [len(r) for r in reactants] + n = len(reactants) + + # Singles: largest first, threshold 50% of product + threshold_single = product_size * 0.5 + singles = [(i,) for i in range(n) if sizes[i] >= threshold_single] + singles.sort(key=lambda t: sizes[t[0]], reverse=True) + yield from singles + + # Pairs: largest combined first, threshold 70% of product + if n >= 2: + threshold_pair = product_size * 0.7 + pairs = [] + for combo in combinations(range(n), 2): + combined = sum(sizes[i] for i in combo) + if combined >= threshold_pair: + pairs.append(combo) + pairs.sort(key=lambda t: sum(sizes[i] for i in t), reverse=True) + yield from pairs + + # Triples + if n >= 3: + triples = list(combinations(range(n), 3)) + triples.sort(key=lambda t: sum(sizes[i] for i in t), reverse=True) + yield from triples + + # Quads + if n >= 4: + quads = list(combinations(range(n), 4)) + quads.sort(key=lambda t: sum(sizes[i] for i in t), reverse=True) + yield from quads + + +def _try_single(r, product_patched, pfgs): + """ + Try all single-reactant paths in order: + 1. Standalone deprotection + 2. Standalone protection (reverse) + 3. Single-molecule transforms (unified oxidize/reduce/transform) + 4. Deprotection + transform composition + + Returns (mapping, labels) or None. + """ + # 1. Standalone deprotection + result = _try_deprotect(r, product_patched) + if result: + return result + + # 2. Standalone protection (reverse: product is deprotected form of reactant) + result = _try_protect(r, product_patched) + if result: + return result + + # 3. Single-molecule transforms with FG screening + result = _try_transforms(r, product_patched, pfgs) + if result: + return result + + # 4. Deprotection + transform composition + result = _try_deprotect_then_transform(r, product_patched, pfgs) + if result: + return result + + return None + + +def _try_deprotect(r, product_patched): + """ + Try standalone deprotection: reactant has PGs, product has none of those. + Returns (mapping, labels) or None. + """ + rpg = r.protective_groups + ppg = product_patched.protective_groups + # product must have NO PGs that reactant doesn't have + for p in ppg: + if p not in rpg: + return None + # shared PGs must have same counts + for p in rpg: + if p in ppg and rpg[p] != ppg[p]: + return None + # PGs to remove: present in reactant, absent from product + to_remove = [p for p in rpg if p not in ppg] + if not to_remove: + return None + # remove all instances of each PG type + m = r.copy(keep_sssr=True, keep_components=True) + for p in to_remove: + m.remove_protection(p) + _patch_nh(m) + if x := product_patched.get_fast_mapping(m): + return x, [f'deprotect:{name}' for name in to_remove] + return None + + +def _try_protect(r, product_patched): + """ + Try standalone protection (reverse direction): + product is reactant with PGs added. + Equivalent to: product has PGs that reactant doesn't. + We check if deprotecting product yields reactant. + + Returns (mapping, labels) or None. + """ + rpg = r.protective_groups + ppg = product_patched.protective_groups + # reactant must have NO PGs that product doesn't have + for p in rpg: + if p not in ppg: + return None + # shared PGs must have same counts + for p in ppg: + if p in rpg and ppg[p] != rpg[p]: + return None + # PGs to add (present in product, absent from reactant) + to_add = [p for p in ppg if p not in rpg] + if not to_add: + return None + # deprotect the product_patched and compare against reactant + # Patch NH on reactant copy for comparison + r_patched = r.copy(keep_sssr=True, keep_components=True) + _patch_nh(r_patched) + # Deprotect a copy of product + m = product_patched.copy(keep_sssr=True, keep_components=True) + for p in to_add: + m.remove_protection(p) + _patch_nh(m) + if x := r_patched.get_fast_mapping(m): + return x, [f'protect:{name}' for name in to_add] + return None + + +def _try_transforms(r, product_patched, pfgs): + """ + Unified loop over oxidation, reduction, and transformation rules. + FG detection uses raw (unpatched) reactant. Reactor operates on raw reactant. + Comparison patches NH on output. + + Returns (mapping, labels) or None. + """ + rfgs = r.functional_groups + + # Oxidation rules + for name, fg_name, output_fg, reactor in oxidation_rules: + if fg_name not in rfgs: + continue + if output_fg is not None and pfgs.get(output_fg, 0) - rfgs.get(output_fg, 0) != 1: + continue + if output_fg is None and rfgs[fg_name] - pfgs.get(fg_name, 0) != 1: + continue + for rxn in reactor(r): + p = rxn.products[0] + if x := _match(p, product_patched): + return x, [f'oxidize:{name}'] + + # Reduction rules + for name, fg_name, output_fg, reactor in reduction_rules: + if fg_name not in rfgs: + continue + if output_fg is not None and pfgs.get(output_fg, 0) - rfgs.get(output_fg, 0) != 1: + continue + if output_fg is None and rfgs[fg_name] - pfgs.get(fg_name, 0) != 1: + continue + for rxn in reactor(r): + p = rxn.products[0] + if x := _match(p, product_patched): + return x, [f'reduce:{name}'] + + # Transformation rules + for name, fg_name, output_fg, reactor in transformation_rules: + if fg_name not in rfgs: + continue + if output_fg is not None and pfgs.get(output_fg, 0) - rfgs.get(output_fg, 0) != 1: + continue + if output_fg is None and rfgs[fg_name] - pfgs.get(fg_name, 0) != 1: + continue + for rxn in reactor(r): + p = rxn.products[0] + if x := _match(p, product_patched): + return x, [f'transform:{name}'] + + return None + + +def _try_deprotect_then_transform(r, product_patched, pfgs): + """ + Composition: deprotect first, then apply single-molecule transforms. + Returns (mapping, labels) or None. + """ + rpg = r.protective_groups + ppg = product_patched.protective_groups + # Validate PG delta (same logic as standalone deprotect) + for p in ppg: + if p not in rpg: + return None + for p in rpg: + if p in ppg and rpg[p] != ppg[p]: + return None + to_remove = [p for p in rpg if p not in ppg] + if not to_remove: + return None + # Deprotect on a raw copy (preserves atom numbers for transform) + deprot = r.copy(keep_sssr=True, keep_components=True) + for p in to_remove: + deprot.remove_protection(p) + # Use deprotected molecule's FGs for screening (raw, unpatched) + result = _try_transforms(deprot, product_patched, pfgs) + if result: + mapping, transform_labels = result + labels = [f'deprotect:{n}' for n in to_remove] + transform_labels + return mapping, labels + return None + + +def _try_multi(subset, product_patched): + """ + Multi-component reactions: try all reaction templates that match + the subset length and FG requirements. + + Returns (mapping, labels) or None. + """ + fgs = [r.functional_groups for r in subset] + n = len(subset) + + for name, fg_names, reactor in reaction_rules: + if len(fg_names) != n: + continue + for perm in permutations(range(n)): + if all(fg_names[i] in fgs[j] for i, j in enumerate(perm)): + for rxn in reactor(*(subset[j] for j in perm)): + p = rxn.products[0] + if x := _match(p, product_patched): + return x, [f'react:{name}'] + break + + return None + + +__all__ = ['Reconstruct'] diff --git a/chython/algorithms/mcs.py b/chython/algorithms/mcs.py index 8e1bf41b..ad9daace 100644 --- a/chython/algorithms/mcs.py +++ b/chython/algorithms/mcs.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- # -# Copyright 2018-2021 Ramil Nugmanov +# Copyright 2018-2026 Ramil Nugmanov # This file is part of chython. # # chython is free software; you can redistribute it and/or modify @@ -18,20 +18,19 @@ # from collections import defaultdict from itertools import product, combinations, islice -from typing import Dict, Set, Iterator, Tuple -from ..containers import molecule +from collections.abc import Iterator class MCS: __slots__ = () - def get_mcs_mapping(self, other: 'molecule.MoleculeContainer', /, *, limit=10000) -> Iterator[Dict[int, int]]: + def get_mcs_mapping(self, other: 'MoleculeContainer', /, *, limit=10000) -> Iterator[dict[int, int]]: """ Find maximum common substructure. Based on clique searching in product graph. :param limit: limit tested cliques """ - if not isinstance(other, molecule.MoleculeContainer): + if not isinstance(other, MCS): raise TypeError('MoleculeContainer expected') core_product, full_product = self.__get_product(other) @@ -92,7 +91,7 @@ def get_mcs_mapping(self, other: 'molecule.MoleculeContainer', /, *, limit=10000 hits2.append(mapping) yield from (dict(x) for x in hits2) - def __get_product(self: 'molecule.MoleculeContainer', other: 'molecule.MoleculeContainer'): + def __get_product(self, other): bonds = self._bonds o_bonds = other._bonds @@ -157,7 +156,7 @@ def __get_product(self: 'molecule.MoleculeContainer', other: 'molecule.MoleculeC return core_product, full_product -def _clique(graph) -> Iterator[Set[Tuple[int, int]]]: +def _clique(graph) -> Iterator[set[tuple[int, int]]]: """ clique search diff --git a/chython/algorithms/morgan.py b/chython/algorithms/morgan.py index 8c8c1b30..007da514 100644 --- a/chython/algorithms/morgan.py +++ b/chython/algorithms/morgan.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- # -# Copyright 2017-2024 Ramil Nugmanov +# Copyright 2017-2026 Ramil Nugmanov # This file is part of chython. # # chython is free software; you can redistribute it and/or modify @@ -20,21 +20,16 @@ from itertools import groupby from logging import getLogger from operator import itemgetter -from typing import Dict, TYPE_CHECKING logger = getLogger('chython.morgan') -if TYPE_CHECKING: - from chython.containers import MoleculeContainer - - class Morgan: __slots__ = () @cached_property - def atoms_order(self: 'MoleculeContainer') -> Dict[int, int]: + def atoms_order(self) -> dict[int, int]: """ Morgan like algorithm for graph nodes ordering @@ -47,14 +42,14 @@ def atoms_order(self: 'MoleculeContainer') -> Dict[int, int]: return _morgan({n: hash(a) for n, a in self.atoms()}, self.int_adjacency) @cached_property - def int_adjacency(self: 'MoleculeContainer') -> Dict[int, Dict[int, int]]: + def int_adjacency(self) -> dict[int, dict[int, int]]: """ Adjacency with integer-coded bonds. """ return {n: {m: hash(b) for m, b in mb.items()} for n, mb in self._bonds.items()} -def _morgan(atoms: Dict[int, int], bonds: Dict[int, Dict[int, int]]) -> Dict[int, int]: +def _morgan(atoms: dict[int, int], bonds: dict[int, dict[int, int]]) -> dict[int, int]: tries = len(atoms) - 1 numb = len(set(atoms.values())) stab = old_numb = 0 diff --git a/chython/algorithms/rings.py b/chython/algorithms/rings.py index 8f5f7ccd..47a4980c 100644 --- a/chython/algorithms/rings.py +++ b/chython/algorithms/rings.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- # -# Copyright 2017-2025 Ramil Nugmanov +# Copyright 2017-2026 Ramil Nugmanov # This file is part of chython. # # chython is free software; you can redistribute it and/or modify @@ -18,19 +18,15 @@ # from collections import defaultdict, deque from functools import cached_property -from typing import Any, Dict, List, Set, Tuple, Union, TYPE_CHECKING +from typing import Any from ._rings import sssr -if TYPE_CHECKING: - from chython.containers import MoleculeContainer - - class Rings: __slots__ = () @cached_property - def sssr(self) -> List[Tuple[int, ...]]: + def sssr(self) -> list[tuple[int, ...]]: """ Smallest Set of Smallest Rings. Special bonds ignored. @@ -47,7 +43,7 @@ def sssr(self) -> List[Tuple[int, ...]]: return [] @cached_property - def atoms_rings(self) -> Dict[int, List[Tuple[int, ...]]]: + def atoms_rings(self) -> dict[int, list[tuple[int, ...]]]: """ A dictionary with atom numbers as keys and a list of tuples (representing SSSR rings) as values. """ @@ -58,7 +54,7 @@ def atoms_rings(self) -> Dict[int, List[Tuple[int, ...]]]: return dict(rings) @cached_property - def atoms_rings_sizes(self) -> Dict[int, Set[int]]: + def atoms_rings_sizes(self) -> dict[int, set[int]]: """ Sizes of SSSR rings containing atom. """ @@ -73,7 +69,7 @@ def rings_count(self) -> int: return sum(len(x) for x in bonds.values()) // 2 - len(bonds) + len(_connected_components(bonds)) @cached_property - def not_special_connectivity(self: 'MoleculeContainer') -> Dict[int, Set[int]]: + def not_special_connectivity(self) -> dict[int, set[int]]: """ Graph connectivity without special bonds. """ @@ -86,7 +82,7 @@ def not_special_connectivity(self: 'MoleculeContainer') -> Dict[int, Set[int]]: return bonds @cached_property - def connected_components(self: 'MoleculeContainer') -> List[Set[int]]: + def connected_components(self) -> list[set[int]]: """ Isolated components of single graph. E.g. salts as ion pair. """ @@ -100,14 +96,14 @@ def connected_components_count(self) -> int: return len(self.connected_components) @cached_property - def skin_graph(self: 'MoleculeContainer') -> Dict[int, Set[int]]: + def skin_graph(self) -> dict[int, set[int]]: """ Graph without terminal atoms. Only rings and linkers """ return _skin_graph(self._bonds) @cached_property - def rings_graph(self: 'MoleculeContainer'): + def rings_graph(self) -> dict[int, set[int]]: """ Graph of rings. Linkers are not included. Special bonds are considered. """ @@ -146,7 +142,7 @@ def rings_graph(self: 'MoleculeContainer'): return bonds -def _connected_components(bonds: Dict[int, Union[Set[int], Dict[int, Any]]]) -> List[Set[int]]: +def _connected_components(bonds: dict[int, set[int] | dict[int, Any]]) -> list[set[int]]: atoms = set(bonds) components = [] while atoms: @@ -164,7 +160,7 @@ def _connected_components(bonds: Dict[int, Union[Set[int], Dict[int, Any]]]) -> return components -def _skin_graph(bonds: Dict[int, Union[Set[int], Dict[int, Any]]]) -> Dict[int, Set[int]]: +def _skin_graph(bonds: dict[int, set[int] | dict[int, Any]]) -> dict[int, set[int]]: """ Graph without terminal nodes. Only rings and linkers """ diff --git a/chython/algorithms/smiles.py b/chython/algorithms/smiles.py index acef4fdc..ce025eca 100644 --- a/chython/algorithms/smiles.py +++ b/chython/algorithms/smiles.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- # -# Copyright 2017-2025 Ramil Nugmanov +# Copyright 2017-2026 Ramil Nugmanov # Copyright 2019 Timur Gimadiev # This file is part of chython. # @@ -18,18 +18,13 @@ # along with this program; if not, see . # from abc import ABC, abstractmethod -from CachedMethods import cached_method from collections import defaultdict from functools import cached_property from heapq import heappop, heappush from itertools import product from random import random -from typing import Callable, Optional, Tuple, TYPE_CHECKING, Union - - -if TYPE_CHECKING: - from chython import MoleculeContainer, CGRContainer - from chython.containers.graph import Graph +from collections.abc import Callable +from .._functions import cached_method charge_str = {-4: '-4', -3: '-3', -2: '-2', -1: '-', 0: '0', 1: '+', 2: '+2', 3: '+3', 4: '+4'} order_str = {1: '-', 2: '=', 3: '#', 4: ':', 8: '~', None: '.'} @@ -59,7 +54,7 @@ class Smiles(ABC): __slots__ = () @property - def smiles(self): + def smiles(self) -> str: """ Generate SMILES string of the molecule. """ @@ -75,7 +70,7 @@ def __str__(self): self.__dict__['smiles_atoms_order'] = tuple(order) # cache smiles_atoms_order return ''.join(smiles) - def __format__(self: Union['Graph', 'Smiles'], format_spec, *, _return_order=False): + def __format__(self, format_spec, *, _return_order=False): """ Signature generation options. @@ -146,7 +141,7 @@ def __hash__(self): return hash(str(self)) @cached_property - def smiles_atoms_order(self) -> Tuple[int, ...]: + def smiles_atoms_order(self) -> tuple[int, ...]: """ Atoms order in canonic SMILES. """ @@ -157,7 +152,7 @@ def smiles_atoms_order(self) -> Tuple[int, ...]: self.__dict__['__cached_method___str__'] = ''.join(smiles) # cache smiles return tuple(order) - def _smiles(self: Union['Graph', 'Smiles'], weights, *, asymmetric_closures=False, + def _smiles(self, weights, *, asymmetric_closures=False, open_parenthesis='(', close_parenthesis=')', delimiter='.', _return_order=False, **kwargs): if not self._atoms: return [] @@ -326,16 +321,16 @@ def _format_bond(self, n, m, adjacency, **kwargs): def _smiles_order(self, stereo=True) -> Callable: ... - def _format_cxsmiles(self, order) -> Optional[str]: + def _format_cxsmiles(self, order) -> str | None: ... class MoleculeSmiles(Smiles): __slots__ = () - def sticky_smiles(self: Union['MoleculeContainer', 'MoleculeSmiles'], left: int = None, right: int = None, *, + def sticky_smiles(self, left: int = None, right: int = None, *, remove_left: bool = False, remove_right: bool = False, tries: int = 10, - keep_bond_left: bool = False, keep_bond_right: bool = False, hydrogens: bool = False): + keep_bond_left: bool = False, keep_bond_right: bool = False, hydrogens: bool = False) -> str: """ Generate smiles with fixed left and/or right terminal atoms. The right atom must be terminal if set. Use a temporary attached atom with remove_right=True as a workaround. @@ -402,13 +397,13 @@ def sticky_smiles(self: Union['MoleculeContainer', 'MoleculeSmiles'], left: int raise ValueError('either left or right atom should be specified') return ''.join(smiles) - def _smiles_order(self: 'MoleculeContainer', stereo=True): + def _smiles_order(self, stereo=True): if stereo: return self._chiral_morgan.__getitem__ else: return self.atoms_order.__getitem__ - def _format_cxsmiles(self: 'MoleculeContainer', order): + def _format_cxsmiles(self, order): cx = [] rd = [] es = defaultdict(list) @@ -433,7 +428,7 @@ def _format_cxsmiles(self: 'MoleculeContainer', order): return '|' + ','.join(cx) + '|' return None - def _format_atom(self: 'MoleculeContainer', n, adjacency, **kwargs): + def _format_atom(self, n, adjacency, **kwargs): atom = self._atoms[n] smi = ['', # [ @@ -495,7 +490,7 @@ def _format_atom(self: 'MoleculeContainer', n, adjacency, **kwargs): smi[2] = atom.atomic_symbol return ''.join(smi) - def _format_bond(self: Union['MoleculeContainer', 'MoleculeSmiles'], n, m, adjacency, **kwargs): + def _format_bond(self, n, m, adjacency, **kwargs): if not kwargs.get('bonds', True): return '' bond = self._bonds[n][m] @@ -522,7 +517,7 @@ def _format_bond(self: Union['MoleculeContainer', 'MoleculeSmiles'], n, m, adjac else: # bond == 8 return '~' - def __ct_map(self: 'MoleculeContainer', adjacency): + def __ct_map(self, adjacency): stereo_bonds = {n for n, mb in self._bonds.items() if any(b.stereo is not None for m, b in mb.items())} if not stereo_bonds: return {} @@ -574,10 +569,10 @@ def __ct_map(self: 'MoleculeContainer', adjacency): class CGRSmiles(Smiles): __slots__ = () - def _smiles_order(self: 'CGRContainer', stereo=True): + def _smiles_order(self, stereo=True): return self.atoms_order.__getitem__ - def _format_atom(self: 'CGRContainer', n, adjacency, **kwargs): + def _format_atom(self, n, adjacency, **kwargs): atom = self._atoms[n] if atom.isotope: smi = [str(atom.isotope), atom.atomic_symbol] @@ -594,7 +589,7 @@ def _format_atom(self: 'CGRContainer', n, adjacency, **kwargs): smi.append(']') return ''.join(smi) - def _format_bond(self: 'CGRContainer', n, m, adjacency, **kwargs): + def _format_bond(self, n, m, adjacency, **kwargs): bond = self._bonds[n][m] return dyn_order_str[(bond.order, bond.p_order)] diff --git a/chython/algorithms/standardize/__init__.py b/chython/algorithms/standardize/__init__.py index 1751a68b..11f64ff4 100644 --- a/chython/algorithms/standardize/__init__.py +++ b/chython/algorithms/standardize/__init__.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- # -# Copyright 2021 Ramil Nugmanov +# Copyright 2021-2026 Ramil Nugmanov # This file is part of chython. # # chython is free software; you can redistribute it and/or modify diff --git a/chython/algorithms/standardize/_charged.py b/chython/algorithms/standardize/_charged.py index da2dff77..93a03d6d 100644 --- a/chython/algorithms/standardize/_charged.py +++ b/chython/algorithms/standardize/_charged.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- # -# Copyright 2021-2023 Ramil Nugmanov +# Copyright 2021-2026 Ramil Nugmanov # This file is part of chython. # # chython is free software; you can redistribute it and/or modify diff --git a/chython/algorithms/standardize/_groups.py b/chython/algorithms/standardize/_groups.py index 828a3c67..251647cd 100644 --- a/chython/algorithms/standardize/_groups.py +++ b/chython/algorithms/standardize/_groups.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- # -# Copyright 2021-2025 Ramil Nugmanov +# Copyright 2021-2026 Ramil Nugmanov # This file is part of chython. # # chython is free software; you can redistribute it and/or modify diff --git a/chython/algorithms/standardize/_metal_organics.py b/chython/algorithms/standardize/_metal_organics.py index b2f9b4b0..5cc538a1 100644 --- a/chython/algorithms/standardize/_metal_organics.py +++ b/chython/algorithms/standardize/_metal_organics.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- # -# Copyright 2021-2023 Ramil Nugmanov +# Copyright 2021-2026 Ramil Nugmanov # This file is part of chython. # # chython is free software; you can redistribute it and/or modify diff --git a/chython/algorithms/standardize/_reagents.py b/chython/algorithms/standardize/_reagents.py index 7f93a0eb..ed445ae8 100644 --- a/chython/algorithms/standardize/_reagents.py +++ b/chython/algorithms/standardize/_reagents.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- # -# Copyright 2022 Ramil Nugmanov +# Copyright 2022-2026 Ramil Nugmanov # This file is part of chython. # # chython is free software; you can redistribute it and/or modify diff --git a/chython/algorithms/standardize/_salts.py b/chython/algorithms/standardize/_salts.py index 0fb8edb2..9a2273c2 100644 --- a/chython/algorithms/standardize/_salts.py +++ b/chython/algorithms/standardize/_salts.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- # -# Copyright 2022, 2023 Ramil Nugmanov +# Copyright 2022-2026 Ramil Nugmanov # This file is part of chython. # # chython is free software; you can redistribute it and/or modify diff --git a/chython/algorithms/standardize/_tautomers.py b/chython/algorithms/standardize/_tautomers.py index e8eb6031..3c7fdb5c 100644 --- a/chython/algorithms/standardize/_tautomers.py +++ b/chython/algorithms/standardize/_tautomers.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- # -# Copyright 2025 Ramil Nugmanov +# Copyright 2025, 2026 Ramil Nugmanov # This file is part of chython. # # chython is free software; you can redistribute it and/or modify diff --git a/chython/algorithms/standardize/molecule.py b/chython/algorithms/standardize/molecule.py index 2d5dc617..42532517 100644 --- a/chython/algorithms/standardize/molecule.py +++ b/chython/algorithms/standardize/molecule.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- # -# Copyright 2018-2025 Ramil Nugmanov +# Copyright 2018-2026 Ramil Nugmanov # Copyright 2021 Dmitrij Zanadvornykh # Copyright 2018 Tagir Akhmetshin # This file is part of chython. @@ -19,7 +19,6 @@ # along with this program; if not, see . # from collections import defaultdict -from typing import List, TYPE_CHECKING, Union, Tuple from ._charged import fixed_rules, morgan_rules from ._groups import * from ._tautomers import rules as tautomers_rules @@ -29,10 +28,6 @@ from ...periodictable import H as _H -if TYPE_CHECKING: - from chython import MoleculeContainer - - # atomic number constants H = 1 C = 6 @@ -41,9 +36,9 @@ class Standardize: __slots__ = () - def canonicalize(self: 'MoleculeContainer', *, fix_tautomers=True, keep_kekule=False, + def canonicalize(self, *, fix_tautomers=True, keep_kekule=False, ignore_pyrrole_hydrogen=False, buffer_size=7, - logging=False, ignore=True) -> Union[bool, List[Tuple[Tuple[int, ...], int, str]]]: + logging=False, ignore=True) -> bool | list: """ Convert molecule to canonical forms of functional groups and aromatic rings without explicit hydrogens. @@ -100,8 +95,7 @@ def canonicalize(self: 'MoleculeContainer', *, fix_tautomers=True, keep_kekule=F return s return bool(k or s or h or t or c or a) - def standardize(self: Union['MoleculeContainer', 'Standardize'], *, logging=False, ignore=True, fix_tautomers=True, - _fix_stereo=True) -> Union[bool, List[Tuple[Tuple[int, ...], int, str]]]: + def standardize(self, *, logging=False, ignore=True, fix_tautomers=True, _fix_stereo=True) -> bool | list: """ Standardize functional groups. Return True if any non-canonical group found. @@ -145,15 +139,14 @@ def standardize(self: Union['MoleculeContainer', 'Standardize'], *, logging=Fals return log return bool(fixed) - def standardize_charges(self: 'MoleculeContainer', *, logging=False, prepare_molecule=True, - _fix_stereo=True) -> Union[bool, List[int]]: + def standardize_charges(self, *, logging=False, prepare_molecule=True, _fix_stereo=True) -> bool | list: """ Set canonical positions of charges in heterocycles and ferrocenes. :param logging: return list of changed atoms. :param prepare_molecule: do thiele procedure. """ - changed: List[int] = [] + changed: list[int] = [] bonds = self._bonds nsc = self.not_special_connectivity atoms = self._atoms @@ -270,15 +263,14 @@ def standardize_charges(self: 'MoleculeContainer', *, logging=False, prepare_mol return [] return False - def standardize_tautomers(self: 'MoleculeContainer', *, logging=False, prepare_molecule=True, - _fix_stereo=True) -> Union[bool, List[int]]: + def standardize_tautomers(self, *, logging=False, prepare_molecule=True, _fix_stereo=True) -> bool | list: """ Set canonical positions of hydrogens in azoles, guanidines, etc. :param logging: return a list of changed atoms. :param prepare_molecule: apply thiele procedure. """ - changed: List[int] = [] + changed: list[int] = [] atoms = self._atoms bonds = self._bonds @@ -368,7 +360,7 @@ def standardize_tautomers(self: 'MoleculeContainer', *, logging=False, prepare_m return [] return False - def remove_coordinate_bonds(self: 'MoleculeContainer', *, keep_to_terminal=True, _fix_stereo=True) -> int: + def remove_coordinate_bonds(self, *, keep_to_terminal=True, _fix_stereo=True) -> int: """Remove coordinate (or hydrogen) bonds marked with 8 (any) bond :param keep_to_terminal: Keep any bonds to terminal hydrogens @@ -391,8 +383,7 @@ def remove_coordinate_bonds(self: 'MoleculeContainer', *, keep_to_terminal=True, self.fix_stereo() return len(ab) - def implicify_hydrogens(self: 'MoleculeContainer', *, logging=False, _fix_stereo=True) -> \ - Union[int, Tuple[int, List[int]]]: + def implicify_hydrogens(self, *, logging=False, _fix_stereo=True) -> int | tuple: """ Remove explicit hydrogen if possible. Return number of removed hydrogens. Works only with Kekule forms of aromatic structures. @@ -460,8 +451,7 @@ def implicify_hydrogens(self: 'MoleculeContainer', *, logging=False, _fix_stereo return len(to_remove), list(fixed) return len(to_remove) - def explicify_hydrogens(self: 'MoleculeContainer', *, start_map=None, _return_map=False, _fix_stereo=True) -> \ - Union[int, List[Tuple[int, int]]]: + def explicify_hydrogens(self, *, start_map=None, _return_map=False, _fix_stereo=True) -> int | list: """ Add explicit hydrogens to atoms. @@ -498,7 +488,7 @@ def explicify_hydrogens(self: 'MoleculeContainer', *, start_map=None, _return_ma return [] return 0 - def check_valence(self: 'MoleculeContainer') -> List[int]: + def check_valence(self) -> list[int]: """ Check valences of all atoms. @@ -507,7 +497,7 @@ def check_valence(self: 'MoleculeContainer') -> List[int]: # only invalid atoms have None hydrogens. return [n for n, a in self.atoms() if a.implicit_hydrogens is None] - def clean_isotopes(self: 'MoleculeContainer') -> bool: + def clean_isotopes(self) -> bool: """ Clean isotope marks from molecule. Return True if any isotope found. @@ -521,7 +511,7 @@ def clean_isotopes(self: 'MoleculeContainer') -> bool: return True return False - def __standardize(self: 'MoleculeContainer', rules, fix_tautomers): + def __standardize(self, rules, fix_tautomers): atoms = self._atoms bonds = self._bonds diff --git a/chython/algorithms/standardize/reaction.py b/chython/algorithms/standardize/reaction.py index 0a2d5e34..046adcdb 100644 --- a/chython/algorithms/standardize/reaction.py +++ b/chython/algorithms/standardize/reaction.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- # -# Copyright 2018-2025 Ramil Nugmanov +# Copyright 2018-2026 Ramil Nugmanov # Copyright 2021 Timur Gimadiev # Copyright 2024 Philippe Gantzer # This file is part of chython. @@ -19,61 +19,28 @@ # along with this program; if not, see . # from collections import defaultdict -from typing import List, Tuple, TYPE_CHECKING, Union from ._reagents import * from ...exceptions import MappingError -if TYPE_CHECKING: - from chython import ReactionContainer - - class StandardizeReaction: __slots__ = () - def canonicalize(self: 'ReactionContainer', *, fix_mapping: bool = True, logging=False, fix_tautomers=True) -> \ - Union[bool, List[Tuple[int, Tuple[int, ...], int, str]]]: + def canonicalize(self, *, fix_mapping: bool = True, logging=False, fix_tautomers=True) -> bool | list: """ Convert molecules to canonical forms of functional groups and aromatic rings without explicit hydrogens. Return True if in any molecule found not canonical group. :param fix_mapping: Search AAM errors of functional groups. - :param logging: return log from molecules with index of molecule. - Otherwise, return True if these groups found in any molecule. - :param fix_tautomers: convert tautomers to canonical forms. - """ - total = [] - for n, m in enumerate(self.molecules()): - total.extend((n, *x) for x in m.canonicalize(logging=True, fix_tautomers=fix_tautomers)) - - if fix_mapping: - total.extend((-1, x, -1, m) for m, x in self.fix_groups_mapping(logging=True)) - - if total: - self.flush_cache(keep_molecule_cache=True) - if logging: - return total - return bool(total) - - def standardize(self: 'ReactionContainer', *, fix_mapping: bool = True, logging=False, fix_tautomers=True) -> \ - Union[bool, List[Tuple[int, Tuple[int, ...], int, str]]]: - """ - Fix functional groups representation. - Return True if in any molecule fixed group. - - Deprecated method. Use `canonicalize` directly. - - :param fix_mapping: Search AAM errors of functional groups. - :param logging: return log from molecules with index of molecule. - Otherwise, return True if these groups found in any molecule. + :param logging: return processing log. :param fix_tautomers: convert tautomers to canonical forms. """ total = [] for n, m in enumerate(self.molecules()): - total.extend((n, *x) for x in m.standardize(logging=True, fix_tautomers=fix_tautomers)) + total.extend((n, x) for x in m.canonicalize(logging=True, fix_tautomers=fix_tautomers)) if fix_mapping: - total.extend((-1, x, -1, m) for m, x in self.fix_groups_mapping(logging=True)) + total.extend(self.fix_mapping(logging=True)) if total: self.flush_cache(keep_molecule_cache=True) @@ -81,7 +48,7 @@ def standardize(self: 'ReactionContainer', *, fix_mapping: bool = True, logging= return total return bool(total) - def thiele(self: 'ReactionContainer', *, fix_tautomers=True) -> bool: + def thiele(self, *, fix_tautomers=True) -> bool: """ Convert structures to aromatic form. Return True if in any molecule found kekule ring @@ -96,7 +63,7 @@ def thiele(self: 'ReactionContainer', *, fix_tautomers=True) -> bool: self.flush_cache(keep_molecule_cache=True) return total - def kekule(self: 'ReactionContainer', *, buffer_size=7, ignore_pyrrole_hydrogen=False) -> bool: + def kekule(self, *, buffer_size=7, ignore_pyrrole_hydrogen=False) -> bool: """ Convert structures to a kekule form. Return True if in any molecule found aromatic ring @@ -112,7 +79,7 @@ def kekule(self: 'ReactionContainer', *, buffer_size=7, ignore_pyrrole_hydrogen= self.flush_cache(keep_molecule_cache=True) return total - def clean_isotopes(self: 'ReactionContainer') -> bool: + def clean_isotopes(self) -> bool: """ Clean isotope marks for all molecules in reaction. Returns True if in any molecule found isotope. @@ -125,7 +92,7 @@ def clean_isotopes(self: 'ReactionContainer') -> bool: self.flush_cache(keep_molecule_cache=True) return flag - def clean_stereo(self: 'ReactionContainer'): + def clean_stereo(self): """ Remove stereo data """ @@ -133,20 +100,7 @@ def clean_stereo(self: 'ReactionContainer'): m.clean_stereo() self.flush_cache(keep_molecule_cache=True) - def check_valence(self: 'ReactionContainer') -> List[Tuple[int, Tuple[int, ...]]]: - """ - Check valences of all atoms of all molecules. - - Works only on molecules with aromatic rings in Kekule form. - :return: list of invalid molecules with invalid atoms lists - """ - out = [] - for n, m in enumerate(self.molecules()): - if c := m.check_valence(): - out.append((n, tuple(c))) - return out - - def implicify_hydrogens(self: 'ReactionContainer') -> int: + def implicify_hydrogens(self) -> int: """ Remove explicit hydrogens if possible. @@ -159,7 +113,7 @@ def implicify_hydrogens(self: 'ReactionContainer') -> int: self.flush_cache(keep_molecule_cache=True) return total - def explicify_hydrogens(self: 'ReactionContainer') -> int: + def explicify_hydrogens(self) -> int: """ Add explicit hydrogens to atoms @@ -222,7 +176,7 @@ def remove_reagents(self, *, keep_reagents: bool = False, mapping: bool = True) return self.__remove_reagents_mapping(keep_reagents) return self.__remove_reagents_rules(keep_reagents) - def __remove_reagents_rules(self: 'ReactionContainer', keep_reagents): + def __remove_reagents_rules(self, keep_reagents): if not self.reactants or not self.products: # there is no reaction return False @@ -279,7 +233,7 @@ def __remove_reagents_rules(self: 'ReactionContainer', keep_reagents): self.fix_positions() return True - def __remove_reagents_mapping(self: 'ReactionContainer', keep_reagents): + def __remove_reagents_mapping(self, keep_reagents): cgr = ~self if cgr.center_atoms: active = set(cgr.center_atoms) @@ -315,7 +269,7 @@ def __remove_reagents_mapping(self: 'ReactionContainer', keep_reagents): return False raise MappingError("Reaction center is absent according to mapping") - def contract_ions(self: 'ReactionContainer') -> bool: + def contract_ions(self) -> bool: """ Contract ions into salts (Molecules with disconnected components). Note: works only for unambiguous cases. e.g. equal anions/cations and different or equal cations/anions. diff --git a/chython/algorithms/standardize/resonance.py b/chython/algorithms/standardize/resonance.py index 38176308..21a98444 100644 --- a/chython/algorithms/standardize/resonance.py +++ b/chython/algorithms/standardize/resonance.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- # -# Copyright 2021-2025 Ramil Nugmanov +# Copyright 2021-2026 Ramil Nugmanov # This file is part of chython. # # chython is free software; you can redistribute it and/or modify @@ -16,14 +16,9 @@ # You should have received a copy of the GNU Lesser General Public License # along with this program; if not, see . # -from typing import List, TYPE_CHECKING, Union from ...exceptions import ValenceError -if TYPE_CHECKING: - from chython import MoleculeContainer - - # atomic number constants B = 5 C = 6 @@ -40,8 +35,7 @@ class Resonance: __slots__ = () - def fix_resonance(self: Union['MoleculeContainer', 'Resonance'], *, logging=False, - _fix_stereo=True) -> Union[bool, List[int]]: + def fix_resonance(self, *, logging=False, _fix_stereo=True) -> bool | list: """ Transform biradical or dipole resonance structures into neutral form. Return True if structure form changed. @@ -104,7 +98,7 @@ def fix_resonance(self: Union['MoleculeContainer', 'Resonance'], *, logging=Fals return [] return False - def __find_delocalize_path(self: 'MoleculeContainer', start, finish, constrains, odd_only): + def __find_delocalize_path(self, start, finish, constrains, odd_only): bonds = self._bonds stack = [(start, n, 0, b.order + 1) for n, b in bonds[start].items() if n in constrains and b.order < 3] path = [] @@ -132,7 +126,7 @@ def __find_delocalize_path(self: 'MoleculeContainer', start, finish, constrains, stack.extend((current, n, depth, bo) for n, b in bonds[current].items() if n not in seen and n in constrains and 1 <= (bo := b.order + diff) <= 3) - def __entries(self: 'MoleculeContainer'): + def __entries(self): atoms = self._atoms bonds = self._bonds errors = {n for n, a in self.atoms() if a.implicit_hydrogens is None} diff --git a/chython/algorithms/standardize/salts.py b/chython/algorithms/standardize/salts.py index 29cccdaf..9c4e5bb7 100644 --- a/chython/algorithms/standardize/salts.py +++ b/chython/algorithms/standardize/salts.py @@ -16,14 +16,9 @@ # You should have received a copy of the GNU Lesser General Public License # along with this program; if not, see . # -from typing import TYPE_CHECKING, List, Tuple, Union from ._salts import acids, rules -if TYPE_CHECKING: - from chython import MoleculeContainer - - # atomic number constants H = 1 N = 7 @@ -34,8 +29,7 @@ class Salts: __slots__ = () - def remove_metals(self: 'MoleculeContainer', *, - skip_elements: List[int] = None, logging=False) -> Union[bool, List]: + def remove_metals(self, *, skip_elements: list[int] = None, logging=False) -> bool | list: """ Remove disconnected S-metals and ammonia. @@ -64,7 +58,7 @@ def remove_metals(self: 'MoleculeContainer', *, return [] return False - def remove_acids(self: 'MoleculeContainer', *, logging=False) -> Union[bool, List[int]]: + def remove_acids(self, *, logging=False) -> bool | list: """ Remove common acids from organic bases salts. Works only for neutral pairs like HA+B. Use `neutralize` before. @@ -92,9 +86,7 @@ def remove_acids(self: 'MoleculeContainer', *, logging=False) -> Union[bool, Lis return [] return False - def split_metal_salts(self: 'MoleculeContainer', *, - skip_elements: List[int] = None, - logging=False) -> Union[bool, List[Tuple[int, int]]]: + def split_metal_salts(self, *, skip_elements: list[int] = None, logging=False) -> bool | list: """ Split connected S-metal salts to cation/anion pairs. diff --git a/chython/algorithms/standardize/saturation.py b/chython/algorithms/standardize/saturation.py index 03fa9c1e..d24f8725 100644 --- a/chython/algorithms/standardize/saturation.py +++ b/chython/algorithms/standardize/saturation.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- # -# Copyright 2021-2024 Ramil Nugmanov +# Copyright 2021-2026 Ramil Nugmanov # This file is part of chython. # # chython is free software; you can redistribute it and/or modify @@ -20,14 +20,9 @@ from itertools import product from operator import itemgetter from random import shuffle -from typing import TYPE_CHECKING, Dict, Optional, Union, List from ...containers.bonds import Bond from ...exceptions import ValenceError - -if TYPE_CHECKING: - from chython import MoleculeContainer - # atom, charge, unsaturation tuned_priority = {(7, 0, 0): -3, # amine (7, 0, 1): -3, # X=N-X @@ -54,9 +49,9 @@ class Saturation: __slots__ = () - def saturate(self: 'MoleculeContainer', neighbors_distances: Optional[Dict[int, Dict[int, float]]] = None, + def saturate(self, neighbors_distances: dict[int, dict[int, float]] | None = None, reset_electrons: bool = True, expected_charge: int = 0, expected_radicals_count: int = 0, - allow_errors: bool = True, logging: bool = False) -> Union[bool, List[str]]: + allow_errors: bool = True, logging: bool = False) -> bool | list: """ Saturate molecules with double and triple bonds and charges and radical states to correct valences of atoms. Note: works only with fully explicit hydrogens! @@ -186,7 +181,7 @@ def _find_possible_valences(atoms, neighbors_distances, charges, radicals, allow el = len(env) dc = charges[n] dr = radicals[n] - for charge, is_radical, valence, implicit, explicit_dict in atoms[n]._compiled_saturation_rules: + for charge, is_radical, valence, implicit, explicit_dict in atoms[n].saturation_rules: if valence < el or dc is not None and dc != charge or dr is not None and dr != is_radical: continue # skip impossible rules if explicit_dict: diff --git a/chython/algorithms/standardize/test/__init__.py b/chython/algorithms/standardize/test/__init__.py index 0f342cf9..8a32d263 100644 --- a/chython/algorithms/standardize/test/__init__.py +++ b/chython/algorithms/standardize/test/__init__.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- # -# Copyright 2023 Ramil Nugmanov +# Copyright 2023-2026 Ramil Nugmanov # This file is part of chython. # # chython is free software; you can redistribute it and/or modify diff --git a/chython/algorithms/standardize/test/test_canonicalize.py b/chython/algorithms/standardize/test/test_canonicalize.py new file mode 100644 index 00000000..59a16cfa --- /dev/null +++ b/chython/algorithms/standardize/test/test_canonicalize.py @@ -0,0 +1,54 @@ +# -*- coding: utf-8 -*- +from chython import smiles + + +def test_canonicalize_nitro(): + mol = smiles('c1ccccc1N(=O)=O') + mol.canonicalize() + s = str(mol) + assert '[N+]' in s and '[O-]' in s + + +def test_canonicalize_benzene(): + mol = smiles('C1=CC=CC=C1') + mol.canonicalize() + assert str(mol) == 'c1ccccc1' + + +def test_kekule_thiele(): + mol = smiles('c1ccccc1') + mol.kekule() + s = str(mol) + assert 'c' not in s + assert '=' in s + mol.thiele() + assert str(mol) == 'c1ccccc1' + + +def test_standardize(): + mol = smiles('c1ccccc1N(=O)=O') + mol.standardize() + s = str(mol) + assert '[N+]' in s + + +def test_neutralize(): + mol = smiles('[NH3+]CC(=O)[O-]') + mol.neutralize() + s = str(mol) + assert '+' not in s or '-' not in s + + +def test_check_valence_clean(): + mol = smiles('CCO') + errors = mol.check_valence() + assert errors == [] + + +def test_canonicalize_idempotent(): + mol = smiles('OCC') + mol.canonicalize() + s1 = str(mol) + mol.canonicalize() + s2 = str(mol) + assert s1 == s2 diff --git a/chython/algorithms/standardize/test/test_groups.py b/chython/algorithms/standardize/test/test_groups.py index c34cc505..e43f3169 100644 --- a/chython/algorithms/standardize/test/test_groups.py +++ b/chython/algorithms/standardize/test/test_groups.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- # -# Copyright 2023-2025 Ramil Nugmanov +# Copyright 2023-2026 Ramil Nugmanov # This file is part of chython. # # chython is free software; you can redistribute it and/or modify diff --git a/chython/algorithms/standardize/test/test_tautomers.py b/chython/algorithms/standardize/test/test_tautomers.py index 82d5a793..26430d04 100644 --- a/chython/algorithms/standardize/test/test_tautomers.py +++ b/chython/algorithms/standardize/test/test_tautomers.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- # -# Copyright 2025 Ramil Nugmanov +# Copyright 2025, 2026 Ramil Nugmanov # This file is part of chython. # # chython is free software; you can redistribute it and/or modify diff --git a/chython/algorithms/stereo.py b/chython/algorithms/stereo.py index de2df5a6..a91fa2c4 100644 --- a/chython/algorithms/stereo.py +++ b/chython/algorithms/stereo.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- # -# Copyright 2019-2024 Ramil Nugmanov +# Copyright 2019-2026 Ramil Nugmanov # This file is part of chython. # # chython is free software; you can redistribute it and/or modify @@ -20,7 +20,6 @@ from functools import cached_property from itertools import combinations, product from logging import getLogger, INFO -from typing import Dict, Set, Tuple, Union, List, Optional, TYPE_CHECKING from .morgan import _morgan from ..exceptions import AtomNotFound, IsChiral, NotChiral @@ -29,10 +28,6 @@ logger.setLevel(INFO) -if TYPE_CHECKING: - from chython import MoleculeContainer - - # atomic number constants H = 1 C = 6 @@ -151,7 +146,7 @@ def _allene_sign(mark, u, v, w): class MoleculeStereo: __slots__ = () - def clean_stereo(self: 'MoleculeContainer'): + def clean_stereo(self): """ Remove stereo data. """ @@ -162,7 +157,7 @@ def clean_stereo(self: 'MoleculeContainer'): self.flush_cache(keep_sssr=True, keep_components=True, keep_special_connectivity=True) @cached_property - def tetrahedrons(self: 'MoleculeContainer') -> Tuple[int, ...]: + def tetrahedrons(self) -> tuple[int, ...]: """ Carbon sp3 atom numbers. """ @@ -177,7 +172,7 @@ def tetrahedrons(self: 'MoleculeContainer') -> Tuple[int, ...]: return tuple(tetra) @cached_property - def cumulenes(self: 'MoleculeContainer') -> List[Tuple[int, ...]]: + def cumulenes(self) -> list[tuple[int, ...]]: """ All double-bonds chains (e.g. alkenes, allenes, cumulenes). """ @@ -215,7 +210,7 @@ def cumulenes(self: 'MoleculeContainer') -> List[Tuple[int, ...]]: return cumulenes @cached_property - def stereogenic_tetrahedrons(self: 'MoleculeContainer') -> Dict[int, Union[Tuple[int, int, int], Tuple[int, int, int, int]]]: + def stereogenic_tetrahedrons(self) -> dict[int, tuple[int, int, int] | tuple[int, int, int, int]]: """ Tetrahedrons which contains at least 3 non-hydrogen neighbors and corresponding neighbors order. """ @@ -236,7 +231,7 @@ def stereogenic_tetrahedrons(self: 'MoleculeContainer') -> Dict[int, Union[Tuple return tetrahedrons @cached_property - def stereogenic_cumulenes(self: 'MoleculeContainer') -> Dict[Tuple[int, ...], Tuple[int, int, Optional[int], Optional[int]]]: + def stereogenic_cumulenes(self) -> dict[tuple[int, ...], tuple[int, int, int | None, int | None]]: """ Cumulenes which contains at least one non-hydrogen neighbor on both ends and corresponding neighbors order. """ @@ -267,14 +262,14 @@ def stereogenic_cumulenes(self: 'MoleculeContainer') -> Dict[Tuple[int, ...], Tu return cumulenes @cached_property - def stereogenic_allenes(self) -> Dict[int, Tuple[int, int, Optional[int], Optional[int]]]: + def stereogenic_allenes(self) -> dict[int, tuple[int, int, int | None, int | None]]: """ Allenes which contains at least one non-hydrogen neighbor on both ends and corresponding neighbors order. """ return {path[len(path) // 2]: env for path, env in self.stereogenic_cumulenes.items() if len(path) % 2} @cached_property - def stereogenic_cis_trans(self) -> Dict[Tuple[int, int], Tuple[int, int, Optional[int], Optional[int]]]: + def stereogenic_cis_trans(self) -> dict[tuple[int, int], tuple[int, int, int | None, int | None]]: """ Cis-trans bonds which contains at least one non-hydrogen neighbor on both ends and corresponding neighbors order. """ @@ -286,7 +281,7 @@ def stereogenic_cis_trans(self) -> Dict[Tuple[int, int], Tuple[int, int, Optiona return stereo @cached_property - def ring_tetrahedrons(self: 'MoleculeContainer') -> Dict[int, Union[Tuple[int, int], Tuple[int], Tuple]]: + def ring_tetrahedrons(self) -> dict[int, tuple[int, int] | tuple[int] | tuple]: """ Tetrahedrons in rings, except ring-linkers. Values are non-ring atoms. """ @@ -301,7 +296,7 @@ def ring_tetrahedrons(self: 'MoleculeContainer') -> Dict[int, Union[Tuple[int, i return out @cached_property - def rings_linker_tetrahedrons(self: 'MoleculeContainer') -> Dict[int, Tuple[int, int, int, int]]: + def rings_linker_tetrahedrons(self) -> dict[int, tuple[int, int, int, int]]: """ A dictionary where the keys are tetrahedron atoms shared between two rings (not condensed rings) and the values are tuples representing their neighbors in the first and second rings respectively. @@ -319,7 +314,7 @@ def rings_linker_tetrahedrons(self: 'MoleculeContainer') -> Dict[int, Tuple[int, return out @cached_property - def ring_cumulenes_terminals(self: 'MoleculeContainer') -> Set[Tuple[int, int]]: + def ring_cumulenes_terminals(self) -> set[tuple[int, int]]: """ Terminal atoms of inside ring cumulenes. """ @@ -331,7 +326,7 @@ def ring_cumulenes_terminals(self: 'MoleculeContainer') -> Set[Tuple[int, int]]: return out @cached_property - def rings_linker_cumulenes_terminals(self: 'MoleculeContainer') -> Dict[Tuple[int, int], Tuple[int, int, int, int]]: + def rings_linker_cumulenes_terminals(self) -> dict[tuple[int, int], tuple[int, int, int, int]]: """ Terminal atoms of cumulenes connecting two rings. Values are neighbors in first and second rings. """ @@ -344,7 +339,7 @@ def rings_linker_cumulenes_terminals(self: 'MoleculeContainer') -> Dict[Tuple[in return out @cached_property - def ring_attached_cumulenes(self: 'MoleculeContainer') -> Dict[Tuple[int, int], Union[Tuple[int, int], Tuple[int]]]: + def ring_attached_cumulenes(self) -> dict[tuple[int, int], tuple[int, int] | tuple[int]]: """ Cumulenes attached to rings from one side. Values are out of ring neighbor atoms. """ @@ -366,27 +361,27 @@ def ring_attached_cumulenes(self: 'MoleculeContainer') -> Dict[Tuple[int, int], return out @property - def chiral_tetrahedrons(self) -> Set[int]: + def chiral_tetrahedrons(self) -> set[int]: """ Chiral tetrahedrons except already labeled ones. """ return self.__chiral_centers[0] @property - def chiral_cis_trans(self) -> Set[Tuple[int, int]]: + def chiral_cis_trans(self) -> set[tuple[int, int]]: """ Chiral cis-trans bonds except already labeled ones. """ return self.__chiral_centers[1] @property - def chiral_allenes(self) -> Set[int]: + def chiral_allenes(self) -> set[int]: """ Chiral allenes except already labeled ones. """ return self.__chiral_centers[2] - def add_wedge(self: 'MoleculeContainer', n: int, m: int, mark: int, *, clean_cache=True): + def add_wedge(self, n: int, m: int, mark: int, *, clean_cache=True): """ Add stereo data by wedge notation of bonds. Use it for tetrahedrons of allenes. @@ -467,7 +462,7 @@ def add_wedge(self: 'MoleculeContainer', n: int, m: int, mark: int, *, clean_cac else: raise NotChiral - def calculate_cis_trans_from_2d(self: 'MoleculeContainer', *, clean_cache=True): + def calculate_cis_trans_from_2d(self, *, clean_cache=True): """ Calculate cis-trans stereo bonds from given 2d coordinates. Unusable for SMILES and INCHI. """ @@ -491,7 +486,7 @@ def calculate_cis_trans_from_2d(self: 'MoleculeContainer', *, clean_cache=True): if flag and clean_cache: self.flush_cache(keep_components=True, keep_sssr=True, keep_special_connectivity=True) - def add_atom_stereo(self: 'MoleculeContainer', n: int, env: Tuple[int, ...], mark: bool, *, clean_cache=True): + def add_atom_stereo(self, n: int, env: tuple[int, ...], mark: bool, *, clean_cache=True): """ Add stereo data for specified neighbors bypass. Use it for tetrahedrons or allenes. @@ -521,8 +516,7 @@ def add_atom_stereo(self: 'MoleculeContainer', n: int, env: Tuple[int, ...], mar else: # only tetrahedrons supported raise NotChiral - def add_cis_trans_stereo(self: 'MoleculeContainer', n: int, m: int, n1: int, n2: int, mark: bool, *, - clean_cache=True): + def add_cis_trans_stereo(self, n: int, m: int, n1: int, n2: int, mark: bool, *, clean_cache=True): """ Add stereo data to cis-trans double bonds (not allenes). @@ -566,7 +560,7 @@ def flush_stereo_cache(self): self.__dict__.pop('_chiral_morgan', None) self.__dict__.pop('_MoleculeStereo__chiral_centers', None) - def fix_stereo(self: 'MoleculeContainer'): + def fix_stereo(self): """ Reset stereo marks. """ @@ -635,7 +629,7 @@ def _cis_trans_count(self) -> int: return sum(b.stereo is not None for *_, b in self.bonds()) @cached_property - def _stereo_cis_trans_centers(self) -> Dict[int, Tuple[int, int]]: + def _stereo_cis_trans_centers(self) -> dict[int, tuple[int, int]]: """ Cis-Trans terminal atoms to cis-trans key mapping. Key is central double bond in a cumulene chain. """ @@ -649,7 +643,7 @@ def _stereo_cis_trans_centers(self) -> Dict[int, Tuple[int, int]]: return terminals @cached_property - def _stereo_cis_trans_terminals(self) -> Dict[int, Tuple[int, int]]: + def _stereo_cis_trans_terminals(self) -> dict[int, tuple[int, int]]: """ Cis-Trans terminal and central atoms to terminal pair mapping. """ @@ -663,7 +657,7 @@ def _stereo_cis_trans_terminals(self) -> Dict[int, Tuple[int, int]]: return terminals @cached_property - def _stereo_cis_trans_counterpart(self) -> Dict[int, int]: + def _stereo_cis_trans_counterpart(self) -> dict[int, int]: """ Cis-Trans terminal atoms counterparts """ @@ -677,7 +671,7 @@ def _stereo_cis_trans_counterpart(self) -> Dict[int, int]: return counterpart @cached_property - def _stereo_allenes_centers(self) -> Dict[int, int]: + def _stereo_allenes_centers(self) -> dict[int, int]: """ Allene terminal atom to center mapping """ @@ -687,13 +681,13 @@ def _stereo_allenes_centers(self) -> Dict[int, int]: return terminals @cached_property - def _stereo_allenes_terminals(self) -> Dict[int, Tuple[int, int]]: + def _stereo_allenes_terminals(self) -> dict[int, tuple[int, int]]: """ Allene center atom to terminals mapping """ return {path[len(path) // 2]: (path[0], path[-1]) for path in self.stereogenic_cumulenes if len(path) % 2} - def _translate_tetrahedron_sign(self: 'MoleculeContainer', n, env, s=None): + def _translate_tetrahedron_sign(self, n, env, s=None): """ Get sign of chiral tetrahedron atom for specified neighbors order @@ -724,7 +718,7 @@ def _translate_tetrahedron_sign(self: 'MoleculeContainer', n, env, s=None): return not s return s - def _translate_cis_trans_sign(self: 'MoleculeContainer', n, m, nn, nm, s=None): + def _translate_cis_trans_sign(self, n, m, nn, nm, s=None): """ Get sign for specified opposite neighbors @@ -786,7 +780,7 @@ def _translate_cis_trans_sign(self: 'MoleculeContainer', n, m, nn, nm, s=None): return not s return s - def _translate_allene_sign(self: 'MoleculeContainer', c, nn, nm, s=None): + def _translate_allene_sign(self, c, nn, nm, s=None): """ get sign for specified opposite neighbors @@ -841,7 +835,7 @@ def _translate_allene_sign(self: 'MoleculeContainer', c, nn, nm, s=None): return s @cached_property - def _wedge_map(self: Union['MoleculeContainer', 'MoleculeStereo']): + def _wedge_map(self): atoms = self._atoms overlap = set() @@ -925,7 +919,7 @@ def _wedge_map(self: Union['MoleculeContainer', 'MoleculeStereo']): logger.info('wedge stereo mapping failed') return solved - def __wedge_sign(self: 'MoleculeContainer', order): + def __wedge_sign(self, order): if order[-1]: # allene s = self._translate_allene_sign(order[-2], order[0], order[1]) v = _allene_sign(1, self._atoms[order[2]].xy, self._atoms[order[3]].xy, self._atoms[order[1]].xy) @@ -962,7 +956,7 @@ def __wedge_sign(self: 'MoleculeContainer', order): return n, order[0], -v @cached_property - def _chiral_morgan(self: Union['MoleculeContainer', 'MoleculeStereo']) -> Dict[int, int]: + def _chiral_morgan(self) -> dict[int, int]: stereo_atoms = {n for n, a in self.atoms() if a.stereo is not None} stereo_bonds = {n for n, mb in self._bonds.items() if any(b.stereo is not None for m, b in mb.items())} if not stereo_atoms and not stereo_bonds: @@ -996,7 +990,7 @@ def _chiral_morgan(self: Union['MoleculeContainer', 'MoleculeStereo']) -> Dict[i return morgan @cached_property - def __chiral_centers(self: Union['MoleculeStereo', 'MoleculeContainer']): + def __chiral_centers(self): atoms_rings = self.atoms_rings tetrahedrons = self.stereogenic_tetrahedrons cis_trans = self.stereogenic_cis_trans @@ -1116,8 +1110,7 @@ def __chiral_centers(self: Union['MoleculeStereo', 'MoleculeContainer']): diff.add(cis_trans_terminals[n]) return chiral_t, diff, chiral_a - def __differentiation(self: Union['MoleculeStereo', 'MoleculeContainer'], morgan, - atoms_stereo, cis_trans_stereo, allenes_stereo): + def __differentiation(self, morgan, atoms_stereo, cis_trans_stereo, allenes_stereo): bonds = self.int_adjacency tetrahedrons = self.stereogenic_tetrahedrons diff --git a/chython/algorithms/tautomers/__init__.py b/chython/algorithms/tautomers/__init__.py index e180eaef..465e3423 100644 --- a/chython/algorithms/tautomers/__init__.py +++ b/chython/algorithms/tautomers/__init__.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- # -# Copyright 2020-2024 Ramil Nugmanov +# Copyright 2020-2026 Ramil Nugmanov # Copyright 2020 Nail Samikaev # This file is part of chython. # @@ -18,23 +18,19 @@ # along with this program; if not, see . # from collections import deque -from typing import TYPE_CHECKING, Iterator, Union +from collections.abc import Iterator from .acid_base import * from .heteroarenes import * from .keto_enol import * -if TYPE_CHECKING: - from chython import MoleculeContainer - - class Tautomers(AcidBase, HeteroArenes, KetoEnol): """ Oxides and sulphides ignored. """ __slots__ = () - def enumerate_tautomers(self: Union['MoleculeContainer', 'Tautomers'], *, prepare_molecules=True, zwitter=True, + def enumerate_tautomers(self, *, prepare_molecules=True, zwitter=True, partial=False, increase_aromaticity=True, keep_sugars=True, heteroarenes=True, keto_enol=True, limit: int = 1000) -> Iterator['MoleculeContainer']: """ @@ -144,9 +140,9 @@ def enumerate_tautomers(self: Union['MoleculeContainer', 'Tautomers'], *, prepar if counter == limit: return - def enumerate_charged_tautomers(self: 'MoleculeContainer', *, prepare_molecules=True, partial=False, + def enumerate_charged_tautomers(self, *, prepare_molecules=True, partial=False, increase_aromaticity=True, keep_sugars=True, heteroarenes=True, - keto_enol=True, deep: int = 4, limit: int = 1000): + keto_enol=True, deep: int = 4, limit: int = 1000) -> Iterator['MoleculeContainer']: """ Enumerate tautomers and protonated-deprotonated forms. Better to use on neutralized non-ionic molecules. diff --git a/chython/algorithms/tautomers/_acid.py b/chython/algorithms/tautomers/_acid.py index eb4e9cfc..ff22231a 100644 --- a/chython/algorithms/tautomers/_acid.py +++ b/chython/algorithms/tautomers/_acid.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- # -# Copyright 2021-2023 Ramil Nugmanov +# Copyright 2021-2026 Ramil Nugmanov # This file is part of chython. # # chython is free software; you can redistribute it and/or modify diff --git a/chython/algorithms/tautomers/_base.py b/chython/algorithms/tautomers/_base.py index d4759cbc..161fe0a0 100644 --- a/chython/algorithms/tautomers/_base.py +++ b/chython/algorithms/tautomers/_base.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- # -# Copyright 2021-2023 Ramil Nugmanov +# Copyright 2021-2026 Ramil Nugmanov # This file is part of chython. # # chython is free software; you can redistribute it and/or modify diff --git a/chython/algorithms/tautomers/_keto_enol.py b/chython/algorithms/tautomers/_keto_enol.py index 71a38eeb..7d4d1a7d 100644 --- a/chython/algorithms/tautomers/_keto_enol.py +++ b/chython/algorithms/tautomers/_keto_enol.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- # -# Copyright 2022, 2023 Ramil Nugmanov +# Copyright 2022-2026 Ramil Nugmanov # This file is part of chython. # # chython is free software; you can redistribute it and/or modify diff --git a/chython/algorithms/tautomers/acid_base.py b/chython/algorithms/tautomers/acid_base.py index 0773ee24..9f9dc057 100644 --- a/chython/algorithms/tautomers/acid_base.py +++ b/chython/algorithms/tautomers/acid_base.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- # -# Copyright 2022-2024 Ramil Nugmanov +# Copyright 2022-2026 Ramil Nugmanov # This file is part of chython. # # chython is free software; you can redistribute it and/or modify @@ -16,21 +16,16 @@ # You should have received a copy of the GNU Lesser General Public License # along with this program; if not, see . # +from collections.abc import Iterator from itertools import combinations, product -from typing import TYPE_CHECKING, Union, List from ._acid import rules as acid_rules, stripped_rules as stripped_acid_rules from ._base import rules as base_rules, stripped_rules as stripped_base_rules -if TYPE_CHECKING: - from chython import MoleculeContainer - - class AcidBase: __slots__ = () - def neutralize(self: 'MoleculeContainer', *, keep_charge=True, logging=False, - _fix_stereo=True) -> Union[bool, List[int]]: + def neutralize(self, *, keep_charge=True, logging=False, _fix_stereo=True) -> bool | list: """ Convert organic salts to neutral form if possible. Only one possible form used for charge unbalanced structures. @@ -52,7 +47,7 @@ def neutralize(self: 'MoleculeContainer', *, keep_charge=True, logging=False, return list(changed) return True - def enumerate_charged_forms(self: 'MoleculeContainer', *, deep: int = 4, limit: int = 1000): + def enumerate_charged_forms(self, *, deep: int = 4, limit: int = 1000) -> Iterator['MoleculeContainer']: """ Enumerate protonated and deprotonated ions. Use on neutralized molecules. @@ -128,7 +123,7 @@ def enumerate_charged_forms(self: 'MoleculeContainer', *, deep: int = 4, limit: if not limit: return - def _neutralize(self: 'MoleculeContainer', keep_charge=True): + def _neutralize(self, keep_charge=True): donors = set() acceptors = set() for q in stripped_acid_rules: @@ -190,7 +185,7 @@ def _neutralize(self: 'MoleculeContainer', keep_charge=True): a._charge += 1 yield mol, donors | acceptors - def _enumerate_zwitter_tautomers(self: 'MoleculeContainer'): + def _enumerate_zwitter_tautomers(self): donors = set() acceptors = set() for q in acid_rules: diff --git a/chython/algorithms/tautomers/heteroarenes.py b/chython/algorithms/tautomers/heteroarenes.py index 99a154f4..b2ec5362 100644 --- a/chython/algorithms/tautomers/heteroarenes.py +++ b/chython/algorithms/tautomers/heteroarenes.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- # -# Copyright 2022-2024 Ramil Nugmanov +# Copyright 2022-2026 Ramil Nugmanov # This file is part of chython. # # chython is free software; you can redistribute it and/or modify @@ -18,15 +18,10 @@ # from collections import deque, defaultdict from itertools import product -from typing import TYPE_CHECKING from ..aromatics.kekule import _kekule_component from ...exceptions import InvalidAromaticRing -if TYPE_CHECKING: - from chython import MoleculeContainer - - # atomic number constants B = 5 C = 6 @@ -37,7 +32,7 @@ class HeteroArenes: __slots__ = () - def _enumerate_hetero_arene_tautomers(self: 'MoleculeContainer'): + def _enumerate_hetero_arene_tautomers(self): atoms = self._atoms bonds = self._bonds diff --git a/chython/algorithms/tautomers/keto_enol.py b/chython/algorithms/tautomers/keto_enol.py index ba80f63b..107a723d 100644 --- a/chython/algorithms/tautomers/keto_enol.py +++ b/chython/algorithms/tautomers/keto_enol.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- # -# Copyright 2022-2024 Ramil Nugmanov +# Copyright 2022-2026 Ramil Nugmanov # This file is part of chython. # # chython is free software; you can redistribute it and/or modify @@ -19,14 +19,9 @@ from collections import defaultdict from functools import cached_property from itertools import chain, repeat -from typing import TYPE_CHECKING, Union from ._keto_enol import * -if TYPE_CHECKING: - from chython import MoleculeContainer - - # atomic number constants C = 6 @@ -34,7 +29,7 @@ class KetoEnol: __slots__ = () - def _enumerate_keto_enol_tautomers(self: Union['MoleculeContainer', 'KetoEnol'], partial=False): + def _enumerate_keto_enol_tautomers(self, partial=False): for fix, ket in self.__enumerate_bonds(partial): if ket: a = fix[-1][1] @@ -64,7 +59,7 @@ def _sugar_groups(self): ek.append((e, k)) return ek - def __enumerate_bonds(self: 'MoleculeContainer', partial): + def __enumerate_bonds(self, partial): atoms = self._atoms bonds = self._bonds rings = self.atoms_rings_sizes diff --git a/chython/algorithms/tautomers/test/__init__.py b/chython/algorithms/tautomers/test/__init__.py index 601bac80..ee0a0a95 100644 --- a/chython/algorithms/tautomers/test/__init__.py +++ b/chython/algorithms/tautomers/test/__init__.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- # -# Copyright 2021 Ramil Nugmanov +# Copyright 2021-2026 Ramil Nugmanov # This file is part of chython. # # chython is free software; you can redistribute it and/or modify diff --git a/chython/algorithms/tautomers/test/test_tautomers.py b/chython/algorithms/tautomers/test/test_tautomers.py index 33b42692..a325e8bf 100644 --- a/chython/algorithms/tautomers/test/test_tautomers.py +++ b/chython/algorithms/tautomers/test/test_tautomers.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- # -# Copyright 2021 Ramil Nugmanov +# Copyright 2021-2026 Ramil Nugmanov # This file is part of chython. # # chython is free software; you can redistribute it and/or modify diff --git a/chython/algorithms/test/__init__.py b/chython/algorithms/test/__init__.py index 031c963a..aafc6f13 100644 --- a/chython/algorithms/test/__init__.py +++ b/chython/algorithms/test/__init__.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- # -# Copyright 2025 Ramil Nugmanov +# Copyright 2025, 2026 Ramil Nugmanov # This file is part of chython. # # chython is free software; you can redistribute it and/or modify diff --git a/chython/algorithms/test/test_isomorphism.py b/chython/algorithms/test/test_isomorphism.py index 4cb2c141..f92ea340 100644 --- a/chython/algorithms/test/test_isomorphism.py +++ b/chython/algorithms/test/test_isomorphism.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- # -# Copyright 2025 Ramil Nugmanov +# Copyright 2025, 2026 Ramil Nugmanov # Copyright 2025 Tagir Akhmetshin # This file is part of chython. # diff --git a/chython/algorithms/test/test_smiles.py b/chython/algorithms/test/test_smiles.py index b61606ae..33a31653 100644 --- a/chython/algorithms/test/test_smiles.py +++ b/chython/algorithms/test/test_smiles.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- # -# Copyright 2025 Ramil Nugmanov +# Copyright 2025, 2026 Ramil Nugmanov # Copyright 2025 Tagir Akhmetshin # This file is part of chython. # diff --git a/chython/algorithms/x3dom.py b/chython/algorithms/x3dom.py index 73779280..b5ded735 100644 --- a/chython/algorithms/x3dom.py +++ b/chython/algorithms/x3dom.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- # -# Copyright 2020-2024 Ramil Nugmanov +# Copyright 2020-2026 Ramil Nugmanov # Copyright 2020 Dinar Batyrshin # This file is part of chython. # @@ -18,14 +18,9 @@ # along with this program; if not, see . # from math import acos, sqrt -from typing import TYPE_CHECKING, Union from .depict import _render_config -if TYPE_CHECKING: - from chython import MoleculeContainer - - def plane_normal(nmx, nmy, nmz, nox, noy, noz): # return normal to plane of two vectors nm and no # m <--- n @@ -136,7 +131,7 @@ def _render_dashes(nx, ny, nz, nmx, nmy, nmz, nm_ln, r_angle=None): class X3domMolecule: __slots__ = () - def depict3d(self: Union['MoleculeContainer', 'X3domMolecule'], index: int = 0) -> str: + def depict3d(self, index: int = 0) -> str: """Get X3DOM XML string. :param index: index of conformer @@ -166,7 +161,7 @@ def view3d(self, index: int = 0, width='600px', height='400px'): """ return JupyterWidget(self.depict3d(index), width, height) - def __render_atoms(self: 'MoleculeContainer', xyz): + def __render_atoms(self, xyz): font = _render_config['font_size'] carbon = _render_config['carbon'] radius = _render_config['atom_radius'] @@ -206,7 +201,7 @@ def __render_atoms(self: 'MoleculeContainer', xyz): " \n \n") return ''.join(atoms) - def __render_bonds(self: 'MoleculeContainer', xyz): + def __render_bonds(self, xyz): bonds = self._bonds bond_color = _render_config['bond_color'] diff --git a/chython/containers/__init__.py b/chython/containers/__init__.py index ff3dd48c..a7fee7e9 100644 --- a/chython/containers/__init__.py +++ b/chython/containers/__init__.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- # -# Copyright 2017-2025 Ramil Nugmanov +# Copyright 2017-2026 Ramil Nugmanov # This file is part of chython. # # chython is free software; you can redistribute it and/or modify diff --git a/chython/containers/_pack_v2.pyx b/chython/containers/_pack_v2.pyx index f216d299..b7d15e93 100644 --- a/chython/containers/_pack_v2.pyx +++ b/chython/containers/_pack_v2.pyx @@ -1,6 +1,6 @@ -# -*- coding: utf-8 -*- +# cython: freethreading_compatible=True # -# Copyright 2022-2025 Ramil Nugmanov +# Copyright 2022-2026 Ramil Nugmanov # This file is part of chython. # # chython is free software; you can redistribute it and/or modify diff --git a/chython/containers/_unpack_v0v2.pyx b/chython/containers/_unpack_v0v2.pyx index fd20fc5a..167f8581 100644 --- a/chython/containers/_unpack_v0v2.pyx +++ b/chython/containers/_unpack_v0v2.pyx @@ -1,7 +1,7 @@ -# -*- coding: utf-8 -*- # cython: language_level=3 +# cython: freethreading_compatible=True # -# Copyright 2021-2025 Ramil Nugmanov +# Copyright 2021-2026 Ramil Nugmanov # This file is part of chython. # # chython is free software; you can redistribute it and/or modify @@ -271,12 +271,12 @@ common_isotopes[:] = [0, -15, -12, -9, -7, -5, -4, -2, 0, 3, 4, 7, 8, 11, 12, 15 222, 221, 228, 227, 231, 231, 235, 236, 241, 242, 243, 244, 245, 254, 253, 254, 254, 262, 265, 265, 269, 262, 273, 273, 277, 281, 278] -cdef list elements -elements = [None, H, He, Li, Be, B, C, N, O, F, Ne, Na, Mg, Al, Si, P, S, Cl, Ar, K, Ca, Sc, Ti, V, Cr, Mn, Fe, Co, +cdef tuple elements +elements = (None, H, He, Li, Be, B, C, N, O, F, Ne, Na, Mg, Al, Si, P, S, Cl, Ar, K, Ca, Sc, Ti, V, Cr, Mn, Fe, Co, Ni, Cu, Zn, Ga, Ge, As, Se, Br, Kr, Rb, Sr, Y, Zr, Nb, Mo, Tc, Ru, Rh, Pd, Ag, Cd, In, Sn, Sb, Te, I, Xe, Cs, Ba, La, Ce, Pr, Nd, Pm, Sm, Eu, Gd, Tb, Dy, Ho, Er, Tm, Yb, Lu, Hf, Ta, W, Re, Os, Ir, Pt, Au, Hg, Tl, Pb, Bi, Po, At, Rn, Fr, Ra, Ac, Th, Pa, U, Np, Pu, Am, Cm, Bk, Cf, Es, Fm, Md, No, Lr, Rf, Db, Sg, - Bh, Hs, Mt, Ds, Rg, Cn, Nh, Fl, Mc, Lv, Ts, Og] + Bh, Hs, Mt, Ds, Rg, Cn, Nh, Fl, Mc, Lv, Ts, Og) cdef double double_from_bytes(unsigned char a, unsigned char b): diff --git a/chython/containers/bonds.py b/chython/containers/bonds.py index 727b15d7..3571041c 100644 --- a/chython/containers/bonds.py +++ b/chython/containers/bonds.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- # -# Copyright 2019-2025 Ramil Nugmanov +# Copyright 2019-2026 Ramil Nugmanov # This file is part of chython. # # chython is free software; you can redistribute it and/or modify diff --git a/chython/containers/cgr.py b/chython/containers/cgr.py index aca16051..2fe0c4d8 100644 --- a/chython/containers/cgr.py +++ b/chython/containers/cgr.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- # -# Copyright 2017-2025 Ramil Nugmanov +# Copyright 2017-2026 Ramil Nugmanov # This file is part of chython. # # chython is free software; you can redistribute it and/or modify diff --git a/chython/containers/chimera.py b/chython/containers/chimera.py index 0200a82c..b4f53e6c 100644 --- a/chython/containers/chimera.py +++ b/chython/containers/chimera.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- # -# Copyright 2025 Ramil Nugmanov +# Copyright 2025, 2026 Ramil Nugmanov # This file is part of chython. # # chython is free software; you can redistribute it and/or modify @@ -16,7 +16,7 @@ # You should have received a copy of the GNU Lesser General Public License # along with this program; if not, see . # -from CachedMethods import class_cached_property +from .._java import get_cdk class Chimera: @@ -29,7 +29,8 @@ def to_cdk(self): Due to translation through SMILES string, atom order is not preserved. Use `self.smiles_atoms_order` to map atoms back. """ - parser = self._cdk_engine.smiles.SmilesParser(self._cdk_engine.DefaultChemObjectBuilder.getInstance()) + cdk = get_cdk() + parser = cdk.smiles.SmilesParser(cdk.DefaultChemObjectBuilder.getInstance()) return parser.parseSmiles(str(self)) def to_openbabel(self): @@ -41,8 +42,10 @@ def to_openbabel(self): """ from openbabel import openbabel + conv = openbabel.OBConversion() + conv.SetInFormat('smi') mol = openbabel.OBMol() - assert self._obparser(mol, str(self)), 'OpenBabel failed to parse smiles' + assert conv.ReadString(mol, str(self)), 'OpenBabel failed to parse smiles' return mol def to_indigo(self): @@ -52,41 +55,9 @@ def to_indigo(self): Due to translation through SMILES string, atom order is not preserved. Use `self.smiles_atoms_order` to map atoms back. """ - return self._indigo_engine.loadMolecule(str(self)) - - @class_cached_property - def _cdk_engine(self): - try: - from jpype import isJVMStarted, startJVM, JPackage - - if not isJVMStarted(): - from chython import class_paths - - startJVM('--enable-native-access=ALL-UNNAMED', classpath=class_paths) - - return JPackage('org').openscience.cdk - except (ImportError, AttributeError): - raise ImportError('Java/JPype/CDK.jar is not installed or broken. make sure CDK_PATH env variable is set') - - @class_cached_property - def _indigo_engine(self): from indigo import Indigo - return Indigo() - - @class_cached_property - def _obparser(self): - from openbabel import openbabel - - obparser = openbabel.OBConversion() - obparser.SetInFormat('smi') - return obparser.ReadString - - @class_cached_property - def _obgen2d(self): - from openbabel import openbabel - - return openbabel.OBOp.FindType('gen2D').Do + return Indigo().loadMolecule(str(self)) __all__ = ['Chimera'] diff --git a/chython/containers/graph.py b/chython/containers/graph.py index 51fb0412..7c9c5141 100644 --- a/chython/containers/graph.py +++ b/chython/containers/graph.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- # -# Copyright 2018-2024 Ramil Nugmanov +# Copyright 2018-2026 Ramil Nugmanov # This file is part of chython. # # chython is free software; you can redistribute it and/or modify @@ -28,7 +28,6 @@ class Graph(Generic[Atom, Bond], ABC): __slots__ = ('_atoms', '_bonds', '__dict__') - __class_cache__ = {} _atoms: Dict[int, Atom] _bonds: Dict[int, Dict[int, Bond]] diff --git a/chython/containers/molecule.py b/chython/containers/molecule.py index 291327df..bd621209 100644 --- a/chython/containers/molecule.py +++ b/chython/containers/molecule.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- # -# Copyright 2017-2025 Ramil Nugmanov +# Copyright 2017-2026 Ramil Nugmanov # This file is part of chython. # # chython is free software; you can redistribute it and/or modify @@ -16,7 +16,6 @@ # You should have received a copy of the GNU Lesser General Public License # along with this program; if not, see . # -from CachedMethods import cached_args_method from collections import Counter, defaultdict from functools import cached_property from lazy_object_proxy import Proxy @@ -32,6 +31,7 @@ from ..algorithms.calculate2d import Calculate2DMolecule from ..algorithms.conformers import Conformers from ..algorithms.depict import DepictMolecule +from ..algorithms.groups import FunctionalGroups from ..algorithms.isomorphism import MoleculeIsomorphism from ..algorithms.fingerprints import Fingerprints from ..algorithms.mcs import MCS @@ -66,7 +66,7 @@ def _rotable_rules(): class MoleculeContainer(MoleculeStereo, Graph[Element, Bond], Morgan, Rings, MoleculeIsomorphism, Aromatize, StandardizeMolecule, MoleculeSmiles, DepictMolecule, Calculate2DMolecule, - Conformers, Fingerprints, Tautomers, RDkit, Chimera, MCS, X3domMolecule): + Conformers, Fingerprints, Tautomers, RDkit, Chimera, MCS, X3domMolecule, FunctionalGroups): __slots__ = ('_meta', '_name', '_conformers', '_changed', '_backup') def __init__(self): @@ -116,7 +116,6 @@ def environment(self, atom: int, include_bond: bool = True, include_atom: bool = return tuple(self._bonds[atom].items()) return tuple(self._bonds[atom]) - @cached_args_method def adjacency_matrix(self, set_bonds=False, /): """ Adjacency matrix of Graph. @@ -369,20 +368,24 @@ def substructure(self, atoms: Iterable[int], *, recalculate_hydrogens=True) -> ' """ if not atoms: raise ValueError('empty atoms list not allowed') - if set(atoms) - self._atoms.keys(): + atoms_set = set(atoms) + if atoms_set - self._atoms.keys(): raise ValueError('invalid atom numbers') - atoms = tuple(n for n in self if n in atoms) # save original order + atoms = tuple(n for n in self if n in atoms_set) # save original order sub = object.__new__(self.__class__) sub._name = sub._meta = sub._changed = sub._backup = None - sub._atoms = {n: self._atoms[n].copy(hydrogens=not recalculate_hydrogens, stereo=True) for n in atoms} + sub._atoms = {n: self._atoms[n].copy(hydrogens=True, stereo=True) for n in atoms} sub._bonds = sb = {} for n in atoms: sb[n] = sbn = {} for m, bond in self._bonds[n].items(): if m in sb: # bond partially exists. need back-connection. sbn[m] = sb[m][n] - elif m in atoms: + elif m in atoms_set: sbn[m] = bond.copy(stereo=True) + if recalculate_hydrogens: + # Only recalculate H on atoms that lost neighbors + sub._changed = {n for n in atoms if not self._bonds[n].keys() <= atoms_set} sub.fix_structure(recalculate_hydrogens=recalculate_hydrogens) sub.fix_stereo() return sub @@ -621,8 +624,12 @@ def fix_structure(self, recalculate_hydrogens=True): self.calc_labels() # refresh all labels if recalculate_hydrogens: - for n in (self._changed or self._atoms): - self.calc_implicit(n) # fix Hs count + if not self._changed: + for n in self._atoms: + self.calc_implicit(n) + else: + for n in self._changed.intersection(self._atoms): + self.calc_implicit(n) # fix Hs count self._changed = None def calc_labels(self): diff --git a/chython/containers/query.py b/chython/containers/query.py index 391bd452..341a53f9 100644 --- a/chython/containers/query.py +++ b/chython/containers/query.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- # -# Copyright 2018-2025 Ramil Nugmanov +# Copyright 2018-2026 Ramil Nugmanov # This file is part of chython. # # chython is free software; you can redistribute it and/or modify @@ -62,5 +62,10 @@ def union(self, other: 'QueryContainer', *, remap: bool = False, copy: bool = Tr raise TypeError('QueryContainer expected') return super().union(other, remap=remap, copy=copy) + def copy(self): + copy = super().copy() + copy._smarts = self._smarts + return copy + __all__ = ['QueryContainer'] diff --git a/chython/containers/rdkit.py b/chython/containers/rdkit.py index 106d6889..f43c6aa6 100644 --- a/chython/containers/rdkit.py +++ b/chython/containers/rdkit.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- # -# Copyright 2019-2025 Ramil Nugmanov +# Copyright 2019-2026 Ramil Nugmanov # This file is part of chython. # # chython is free software; you can redistribute it and/or modify diff --git a/chython/containers/reaction.py b/chython/containers/reaction.py index 92c3f3cb..f41296e1 100644 --- a/chython/containers/reaction.py +++ b/chython/containers/reaction.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- # -# Copyright 2017-2025 Ramil Nugmanov +# Copyright 2017-2026 Ramil Nugmanov # This file is part of chython. # # chython is free software; you can redistribute it and/or modify @@ -16,7 +16,6 @@ # You should have received a copy of the GNU Lesser General Public License # along with this program; if not, see . # -from CachedMethods import cached_method from functools import reduce from itertools import chain from math import ceil @@ -29,6 +28,7 @@ from ..algorithms.depict import DepictReaction from ..algorithms.mapping import Mapping from ..algorithms.standardize import StandardizeReaction +from .._functions import cached_method class ReactionContainer(StandardizeReaction, Mapping, Calculate2DReaction, DepictReaction): diff --git a/chython/containers/test/__init__.py b/chython/containers/test/__init__.py index 031c963a..aafc6f13 100644 --- a/chython/containers/test/__init__.py +++ b/chython/containers/test/__init__.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- # -# Copyright 2025 Ramil Nugmanov +# Copyright 2025, 2026 Ramil Nugmanov # This file is part of chython. # # chython is free software; you can redistribute it and/or modify diff --git a/chython/containers/test/test_molecule.py b/chython/containers/test/test_molecule.py new file mode 100644 index 00000000..f620cec5 --- /dev/null +++ b/chython/containers/test/test_molecule.py @@ -0,0 +1,117 @@ +# -*- coding: utf-8 -*- +from chython import smiles, MoleculeContainer + + +def test_smiles_parse_ethanol(): + mol = smiles('CCO') + assert len(mol) == 3 + assert str(mol) == 'CCO' + + +def test_smiles_parse_benzene(): + mol = smiles('c1ccccc1') + assert len(mol) == 6 + assert str(mol) == 'c1ccccc1' + + +def test_molecule_equality(): + mol1 = smiles('CCO') + mol2 = smiles('OCC') + assert mol1 == mol2 + assert hash(mol1) == hash(mol2) + + +def test_molecule_inequality(): + mol1 = smiles('CCO') + mol2 = smiles('CC=O') + assert mol1 != mol2 + + +def test_brutto(): + mol = smiles('CC(=O)Oc1ccccc1C(=O)O') # aspirin + assert mol.brutto == {'C': 9, 'H': 8, 'O': 4} + + +def test_molecular_mass(): + mol = smiles('O') # water + mass = mol.molecular_mass + assert 16 < mass < 20 + + +def test_rings(): + mol = smiles('c1ccc2ccccc2c1') # naphthalene + assert mol.rings_count == 2 + assert len(mol.sssr) == 2 + + +def test_aromatic_rings(): + mol = smiles('c1ccccc1') + assert len(mol.aromatic_rings) == 1 + + +def test_connected_components(): + mol = smiles('[Cl-].[Na+]') + assert mol.connected_components_count == 2 + parts = mol.split() + assert len(parts) == 2 + + +def test_add_atom_bond(): + mol = MoleculeContainer() + n1 = mol.add_atom('C') + n2 = mol.add_atom('C') + n3 = mol.add_atom('O') + mol.add_bond(n1, n2, 1) + mol.add_bond(n2, n3, 1) + assert str(mol) == 'CCO' + + +def test_implicit_hydrogens(): + mol = smiles('C') # methane + atom = mol.atom(next(iter(mol))) + assert atom.implicit_hydrogens == 4 + + +def test_copy(): + mol = smiles('CCO') + mol2 = mol.copy() + assert str(mol) == str(mol2) + assert mol is not mol2 + + +def test_substructure(): + mol = smiles('Cc1ccccc1') # toluene + atoms = list(mol) + ring_atoms = atoms[1:] + sub = mol.substructure(ring_atoms) + assert len(sub) == 6 + + +def test_union(): + mol1 = smiles('C') + mol2 = smiles('O') + merged = mol1.union(mol2, remap=True) + assert len(merged) == 2 + + +def test_explicify_implicify(): + mol = smiles('C') + n_before = len(mol) + mol.explicify_hydrogens() + assert len(mol) == 5 # C + 4H + mol.implicify_hydrogens() + assert len(mol) == n_before + + +def test_pickle(): + from pickle import loads, dumps + mol = smiles('c1ccccc1') + data = dumps(mol) + mol2 = loads(data) + assert str(mol) == str(mol2) + + +def test_format_specifiers(): + mol = smiles('[CH3:1][OH:2]') + s_map = format(mol, 'm') + assert ':1]' in s_map or ':2]' in s_map diff --git a/chython/containers/test/test_pack.py b/chython/containers/test/test_pack.py new file mode 100644 index 00000000..f8ac38da --- /dev/null +++ b/chython/containers/test/test_pack.py @@ -0,0 +1,60 @@ +# -*- coding: utf-8 -*- +from chython import smiles, MoleculeContainer, ReactionContainer, unpack + + +def test_molecule_pack_unpack(): + mol = smiles('c1ccccc1') + data = mol.pack() + mol2 = MoleculeContainer.unpack(data) + assert str(mol) == str(mol2) + + +def test_molecule_pack_uncompressed(): + mol = smiles('CCO') + data = mol.pack(compressed=False) + mol2 = unpack(data, compressed=False) + assert str(mol) == str(mol2) + + +def test_molecule_bytes(): + mol = smiles('[Cu+2]') + data = bytes(mol) + mol2 = unpack(data) + assert str(mol) == str(mol2) + + +def test_pack_preserves_coordinates(): + mol = smiles('CCO') + mol.clean2d() + data = mol.pack() + mol2 = MoleculeContainer.unpack(data) + for n in mol: + assert abs(mol.atom(n).x - mol2.atom(n).x) < 0.01 + assert abs(mol.atom(n).y - mol2.atom(n).y) < 0.01 + + +def test_pack_preserves_charge(): + mol = smiles('[NH4+]') + data = mol.pack() + mol2 = unpack(data) + assert str(mol) == str(mol2) + + +def test_pack_preserves_isotope(): + mol = smiles('[2H]C([2H])([2H])[2H]') + data = mol.pack() + mol2 = unpack(data) + assert str(mol) == str(mol2) + + +def test_reaction_pack_unpack(): + rxn = smiles('[CH3:1][OH:2]>>[CH3:1][NH2:3]') + data = rxn.pack() + rxn2 = ReactionContainer.unpack(data) + assert str(rxn) == str(rxn2) + + +def test_pack_len(): + mol = smiles('CCCCC') + data = mol.pack(compressed=False) + assert MoleculeContainer.pack_len(data, compressed=False) == 5 diff --git a/chython/containers/test/test_rdkit.py b/chython/containers/test/test_rdkit.py index 9c8a35b4..ce51b68a 100644 --- a/chython/containers/test/test_rdkit.py +++ b/chython/containers/test/test_rdkit.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- # -# Copyright 2025 Ramil Nugmanov +# Copyright 2025, 2026 Ramil Nugmanov # This file is part of chython. # # chython is free software; you can redistribute it and/or modify diff --git a/chython/containers/test/test_reaction.py b/chython/containers/test/test_reaction.py new file mode 100644 index 00000000..520ba462 --- /dev/null +++ b/chython/containers/test/test_reaction.py @@ -0,0 +1,40 @@ +# -*- coding: utf-8 -*- +from chython import smiles + + +def test_reaction_parse(): + rxn = smiles('[CH3:1][OH:2]>>[CH3:1][NH2:3]') + assert len(rxn.reactants) == 1 + assert len(rxn.products) == 1 + + +def test_reaction_components(): + rxn = smiles('CCO.CC(=O)O>>CCOC(=O)C.O') + assert len(rxn.reactants) == 2 + assert len(rxn.products) == 2 + + +def test_reaction_equality(): + rxn1 = smiles('[CH3:1][OH:2]>>[CH3:1][NH2:3]') + rxn2 = smiles('[CH3:1][OH:2]>>[CH3:1][NH2:3]') + assert rxn1 == rxn2 + + +def test_reaction_canonicalize(): + rxn = smiles('CCO.CC(=O)O>>CCOC(=O)C.O') + rxn.canonicalize() + assert str(rxn) + + +def test_reaction_molecules(): + rxn = smiles('[CH3:1][OH:2]>>[CH3:1][NH2:3]') + mols = list(rxn.molecules()) + assert len(mols) == 2 + + +def test_reaction_pickle(): + from pickle import loads, dumps + rxn = smiles('[CH3:1][OH:2]>>[CH3:1][NH2:3]') + data = dumps(rxn) + rxn2 = loads(data) + assert str(rxn) == str(rxn2) diff --git a/chython/exceptions.py b/chython/exceptions.py index 6f47d503..7bf6dac4 100644 --- a/chython/exceptions.py +++ b/chython/exceptions.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- # -# Copyright 2017-2024 Ramil Nugmanov +# Copyright 2017-2026 Ramil Nugmanov # This file is part of chython. # # chython is free software; you can redistribute it and/or modify diff --git a/chython/files/MRVrw.py b/chython/files/MRVrw.py index 543f33dd..f341c432 100644 --- a/chython/files/MRVrw.py +++ b/chython/files/MRVrw.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- # -# Copyright 2017-2024 Ramil Nugmanov +# Copyright 2017-2026 Ramil Nugmanov # This file is part of chython. # # chython is free software; you can redistribute it and/or modify @@ -17,11 +17,10 @@ # along with this program; if not, see . # from collections import defaultdict -from io import StringIO, BytesIO, TextIOWrapper, BufferedIOBase, BufferedReader from itertools import count, islice, chain -from lxml.etree import iterparse, QName, tostring from pathlib import Path from typing import Union, List, Iterator, Dict, Optional +from xml.etree.ElementTree import iterparse, tostring from ._convert import create_molecule, create_reaction from ._mapping import postprocess_parsed_molecule, postprocess_parsed_reaction from .mdl import postprocess_molecule @@ -33,6 +32,12 @@ 'Any': 8, 'any': 8, 'A': 4, 'a': 4, '1': 1, '2': 2, '3': 3} +def _local_name(tag): + if tag[0] == '{': + return tag.split('}', 1)[1] + return tag + + def xml_dict(parent_element, stop_list=None): stop_list = set() if stop_list is None else set(stop_list) out = {} @@ -46,9 +51,12 @@ def xml_dict(parent_element, stop_list=None): if len(parent_element): elements_grouped = defaultdict(list) for element in parent_element: - name = QName(element).localname + name = _local_name(element.tag) if name in stop_list: - text.append(tostring(element, encoding=str, with_tail=False)) + tail = element.tail + element.tail = None + text.append(tostring(element, encoding='unicode')) + element.tail = tail else: elements_grouped[name].append(element) @@ -97,18 +105,18 @@ def __init__(self, file, *, ignore: bool = True, remap: bool = False, elif isinstance(file, Path): self.__file = file.open('rb') self.__is_buffer = False - elif isinstance(file, (BytesIO, BufferedReader, BufferedIOBase)): + elif hasattr(file, 'read'): self.__file = file self.__is_buffer = True else: - raise TypeError('invalid file. BytesIO, BufferedReader and BufferedIOBase subclasses expected') + raise TypeError('invalid file. file-like object or path to file expected') self.__ignore = ignore self.__remap = remap self.__calc_cis_trans = calc_cis_trans self.__ignore_stereo = ignore_stereo self.__ignore_bad_isotopes = ignore_bad_isotopes self.__tell = 0 - self.__xml = iterparse(self.__file, tag='{*}MChemicalStruct') + self.__xml = iterparse(self.__file, events=('end',)) self.__buffer = None def read(self, amount: Optional[int] = None) -> List[Union[ReactionContainer, MoleculeContainer]]: @@ -171,8 +179,8 @@ def read_structure(self, *, current: bool = True): rxn = create_reaction(tmp, ignore_bad_isotopes=self.__ignore_bad_isotopes, _m_cls=self.molecule_cls, _r_cls=self.reaction_cls) if not self.__ignore_stereo: - for mol, tmp in zip(rxn.molecules(), chain(tmp['reactants'], tmp['reagents'], tmp['products'])): - postprocess_molecule(mol, tmp, calc_cis_trans=self.__calc_cis_trans) + for mol, parsed in zip(rxn.molecules(), chain(tmp['reactants'], tmp['reagents'], tmp['products'])): + postprocess_molecule(mol, parsed, calc_cis_trans=self.__calc_cis_trans) if meta: rxn.meta.update(meta) return rxn @@ -211,6 +219,7 @@ def read_metadata(self, *, current: bool = True) -> Dict[str, str]: if 'chython_unparsed_metadata' not in meta: meta['chython_unparsed_metadata'] = [] meta['chython_unparsed_metadata'].append(x) + return meta else: return {} @@ -250,13 +259,14 @@ def __next__(self) -> Union[ReactionContainer, MoleculeContainer]: def _read_block(self, *, current: bool = True) -> dict: if not current or not self.__buffer: self.__buffer = None - try: - e = next(self.__xml)[1] - except StopIteration: + for event, e in self.__xml: + if _local_name(e.tag) == 'MChemicalStruct': + self.__buffer = xml_dict(e) + self.__tell += 1 + e.clear() + break + else: raise EOFError - self.__buffer = xml_dict(e) - self.__tell += 1 - e.clear() return self.__buffer @@ -383,12 +393,11 @@ def __init__(self, file, mapping: bool = True): elif isinstance(file, Path): self.__file = file.open('w') self.__is_buffer = False - elif isinstance(file, (TextIOWrapper, StringIO)): + elif hasattr(file, 'write'): self.__file = file self.__is_buffer = True else: - raise TypeError('invalid file. ' - 'TextIOWrapper, StringIO, BytesIO, BufferedReader and BufferedIOBase subclasses possible') + raise TypeError('invalid file. file-like object or path to file expected') self.__writable = True self.__finalized = False self.__mapping = mapping diff --git a/chython/files/PDBrw.py b/chython/files/PDBrw.py index 01ad869f..1641c691 100644 --- a/chython/files/PDBrw.py +++ b/chython/files/PDBrw.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- # -# Copyright 2020-2024 Ramil Nugmanov +# Copyright 2020-2026 Ramil Nugmanov # This file is part of chython. # # chython is free software; you can redistribute it and/or modify @@ -16,8 +16,6 @@ # You should have received a copy of the GNU Lesser General Public License # along with this program; if not, see . # -from fileinput import FileInput -from io import StringIO, TextIOWrapper from itertools import islice from pathlib import Path from typing import Optional, Sequence, Iterator, List @@ -64,11 +62,11 @@ def __init__(self, file, *, buffer_size=10000, ignore: bool = True, element_name elif isinstance(file, Path): self.__file = file.open() self.__is_buffer = False - elif isinstance(file, (TextIOWrapper, StringIO, FileInput)): + elif hasattr(file, '__iter__') and hasattr(file, 'read'): self.__file = file self.__is_buffer = True else: - raise TypeError('invalid file. TextIOWrapper, StringIO subclasses expected') + raise TypeError('invalid file. file-like object or path to file expected') self.__radius_multiplier = radius_multiplier self.__ignore = ignore diff --git a/chython/files/RDFrw.py b/chython/files/RDFrw.py index 9e8a20f2..79ae6194 100644 --- a/chython/files/RDFrw.py +++ b/chython/files/RDFrw.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- # -# Copyright 2014-2024 Ramil Nugmanov +# Copyright 2014-2026 Ramil Nugmanov # Copyright 2019 Dinar Batyrshin # This file is part of chython. # @@ -196,7 +196,12 @@ def _read_metadata(self, *, current: bool = True): return data[self.__m_start:] -class _RDFWrite: +class RDFWrite(MOLWrite): + """ + MDL RDF files writer. works similar to opened for writing file object. support `with` context manager. + on initialization accept opened for writing in text mode file, string path to file, + pathlib.Path object or another buffered writer object + """ def __init__(self, file, *, append: bool = False, mapping: bool = True): """ :param append: append to existing file (True) or rewrite it (False). For buffered writer object append = False @@ -208,20 +213,10 @@ def __init__(self, file, *, append: bool = False, mapping: bool = True): self.write = self.__write def __write(self, data): - """ - write single molecule or reaction into file - """ del self.write self._file.write(strftime('$RDFILE 1\n$DATM %m/%d/%y %H:%M\n')) self.write(data) - -class RDFWrite(_RDFWrite, MOLWrite): - """ - MDL RDF files writer. works similar to opened for writing file object. support `with` context manager. - on initialization accept opened for writing in text mode file, string path to file, - pathlib.Path object or another buffered writer object - """ def write(self, data: Union[ReactionContainer, MoleculeContainer]): file = self._file if isinstance(data, ReactionContainer): @@ -240,12 +235,28 @@ def write(self, data: Union[ReactionContainer, MoleculeContainer]): file.write(f'$DTYPE {k}\n$DATUM {v}\n') -class ERDFWrite(_RDFWrite, EMOLWrite): +class ERDFWrite(EMOLWrite): """ MDL V3000 RDF files writer. works similar to opened for writing file object. support `with` context manager. on initialization accept opened for writing in text mode file, string path to file, pathlib.Path object or another buffered writer object """ + def __init__(self, file, *, append: bool = False, mapping: bool = True, absolute: bool = False): + """ + :param append: append to existing file (True) or rewrite it (False). For buffered writer object append = False + will write RDF header and append = True will omit the header. + :param mapping: write atom mapping. + :param absolute: explicitly write MDLV30/STEABS collection for stereocenters without extended stereo groups. + """ + super().__init__(file, append=append, mapping=mapping, absolute=absolute) + if not append or not (self._is_buffer or self._file.tell() != 0): + self.write = self.__write + + def __write(self, data): + del self.write + self._file.write(strftime('$RDFILE 1\n$DATM %m/%d/%y %H:%M\n')) + self.write(data) + def write(self, data: Union[ReactionContainer, MoleculeContainer]): file = self._file if isinstance(data, ReactionContainer): diff --git a/chython/files/SDFrw.py b/chython/files/SDFrw.py index 232f3fe6..552b702c 100644 --- a/chython/files/SDFrw.py +++ b/chython/files/SDFrw.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- # -# Copyright 2014-2024 Ramil Nugmanov +# Copyright 2014-2026 Ramil Nugmanov # This file is part of chython. # # chython is free software; you can redistribute it and/or modify diff --git a/chython/files/__init__.py b/chython/files/__init__.py index 1265b583..02ad2c2b 100644 --- a/chython/files/__init__.py +++ b/chython/files/__init__.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- # -# Copyright 2014-2025 Ramil Nugmanov +# Copyright 2014-2026 Ramil Nugmanov # This file is part of chython. # # chython is free software; you can redistribute it and/or modify diff --git a/chython/files/_convert.py b/chython/files/_convert.py index e25a93cd..6b98e1bd 100644 --- a/chython/files/_convert.py +++ b/chython/files/_convert.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- # -# Copyright 2023, 2024 Ramil Nugmanov +# Copyright 2023-2026 Ramil Nugmanov # This file is part of chython. # # chython is free software; you can redistribute it and/or modify diff --git a/chython/files/_mapping.py b/chython/files/_mapping.py index 57bbea86..5d6b5e7a 100644 --- a/chython/files/_mapping.py +++ b/chython/files/_mapping.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- # -# Copyright 2014-2025 Ramil Nugmanov +# Copyright 2014-2026 Ramil Nugmanov # This file is part of chython. # # chython is free software; you can redistribute it and/or modify diff --git a/chython/files/_xyz.pyx b/chython/files/_xyz.pyx index 559815d7..703c5776 100644 --- a/chython/files/_xyz.pyx +++ b/chython/files/_xyz.pyx @@ -1,6 +1,6 @@ -# -*- coding: utf-8 -*- +# cython: freethreading_compatible=True # -# Copyright 2022 Ramil Nugmanov +# Copyright 2022-2026 Ramil Nugmanov # This file is part of chython. # # chython is free software; you can redistribute it and/or modify diff --git a/chython/files/daylight/__init__.py b/chython/files/daylight/__init__.py index f220c01d..ca454664 100644 --- a/chython/files/daylight/__init__.py +++ b/chython/files/daylight/__init__.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- # -# Copyright 2018-2023 Ramil Nugmanov +# Copyright 2018-2026 Ramil Nugmanov # Copyright 2019 Artem Mukanov # This file is part of chython. # diff --git a/chython/files/daylight/parser.py b/chython/files/daylight/parser.py index f45d020c..d806c928 100644 --- a/chython/files/daylight/parser.py +++ b/chython/files/daylight/parser.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- # -# Copyright 2022, 2023 Ramil Nugmanov +# Copyright 2022-2026 Ramil Nugmanov # This file is part of chython. # # chython is free software; you can redistribute it and/or modify diff --git a/chython/files/daylight/smarts.py b/chython/files/daylight/smarts.py index 40c3cd58..cfbafdd5 100644 --- a/chython/files/daylight/smarts.py +++ b/chython/files/daylight/smarts.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- # -# Copyright 2022-2025 Ramil Nugmanov +# Copyright 2022-2026 Ramil Nugmanov # This file is part of chython. # # chython is free software; you can redistribute it and/or modify diff --git a/chython/files/daylight/smiles.py b/chython/files/daylight/smiles.py index 7cae7c20..3565b6cb 100644 --- a/chython/files/daylight/smiles.py +++ b/chython/files/daylight/smiles.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- # -# Copyright 2022-2025 Ramil Nugmanov +# Copyright 2022-2026 Ramil Nugmanov # This file is part of chython. # # chython is free software; you can redistribute it and/or modify diff --git a/chython/files/daylight/test/__init__.py b/chython/files/daylight/test/__init__.py index 031c963a..aafc6f13 100644 --- a/chython/files/daylight/test/__init__.py +++ b/chython/files/daylight/test/__init__.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- # -# Copyright 2025 Ramil Nugmanov +# Copyright 2025, 2026 Ramil Nugmanov # This file is part of chython. # # chython is free software; you can redistribute it and/or modify diff --git a/chython/files/daylight/test/test_daylight_smarts.py b/chython/files/daylight/test/test_daylight_smarts.py index cbca38f2..aa465345 100644 --- a/chython/files/daylight/test/test_daylight_smarts.py +++ b/chython/files/daylight/test/test_daylight_smarts.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- # -# Copyright 2025 Ramil Nugmanov +# Copyright 2025, 2026 Ramil Nugmanov # Copyright 2025 Tagir Akhmetshin # This file is part of chython. # diff --git a/chython/files/daylight/test/test_daylight_smiles.py b/chython/files/daylight/test/test_daylight_smiles.py index d60f3abc..5081abae 100644 --- a/chython/files/daylight/test/test_daylight_smiles.py +++ b/chython/files/daylight/test/test_daylight_smiles.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- # -# Copyright 2025 Ramil Nugmanov +# Copyright 2025, 2026 Ramil Nugmanov # Copyright 2025 Tagir Akhmetshin # This file is part of chython. # diff --git a/chython/files/daylight/test/test_parser.py b/chython/files/daylight/test/test_parser.py index f2f48388..602fe071 100644 --- a/chython/files/daylight/test/test_parser.py +++ b/chython/files/daylight/test/test_parser.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- # -# Copyright 2025 Ramil Nugmanov +# Copyright 2025, 2026 Ramil Nugmanov # Copyright 2025 Tagir Akhmetshin # This file is part of chython. # diff --git a/chython/files/daylight/test/test_tokenize.py b/chython/files/daylight/test/test_tokenize.py index 3532813a..aa636671 100644 --- a/chython/files/daylight/test/test_tokenize.py +++ b/chython/files/daylight/test/test_tokenize.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- # -# Copyright 2025 Ramil Nugmanov +# Copyright 2025, 2026 Ramil Nugmanov # Copyright 2025 Tagir Akhmetshin # This file is part of chython. # diff --git a/chython/files/daylight/tokenize.py b/chython/files/daylight/tokenize.py index fe626b9f..0397c073 100644 --- a/chython/files/daylight/tokenize.py +++ b/chython/files/daylight/tokenize.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- # -# Copyright 2022-2025 Ramil Nugmanov +# Copyright 2022-2026 Ramil Nugmanov # This file is part of chython. # # chython is free software; you can redistribute it and/or modify diff --git a/chython/files/libinchi/__init__.py b/chython/files/libinchi/__init__.py index a3cf7a72..324bfca5 100644 --- a/chython/files/libinchi/__init__.py +++ b/chython/files/libinchi/__init__.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- # -# Copyright 2023 Ramil Nugmanov +# Copyright 2023-2026 Ramil Nugmanov # This file is part of chython. # # chython is free software; you can redistribute it and/or modify diff --git a/chython/files/libinchi/wrapper.py b/chython/files/libinchi/wrapper.py index fb269dcc..f398dfe6 100644 --- a/chython/files/libinchi/wrapper.py +++ b/chython/files/libinchi/wrapper.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- # -# Copyright 2018-2024 Ramil Nugmanov +# Copyright 2018-2026 Ramil Nugmanov # This file is part of chython. # # chython is free software; you can redistribute it and/or modify @@ -17,8 +17,10 @@ # along with this program; if not, see . # from ctypes import c_char, c_double, c_short, c_long, c_char_p, c_byte, POINTER, Structure, cdll, byref +from importlib.resources import files, as_file from itertools import count from sysconfig import get_platform +from threading import Lock from warnings import warn from .._convert import create_molecule from ...containers import MoleculeContainer @@ -27,12 +29,6 @@ from ...periodictable import H as _H -try: - from importlib.resources import files, as_file -except ImportError: # python3.8 - from importlib_resources import files, as_file - - H = 1 @@ -44,54 +40,56 @@ def inchi(data, /, *, ignore_stereo: bool = False, _cls=MoleculeContainer) -> Mo raise ImportError('libINCHI not found') structure = INCHIStructure() - if lib.GetStructFromINCHI(byref(InputINCHI(data)), byref(structure)): - lib.FreeStructFromINCHI(byref(structure)) - raise ValueError('invalid INCHI') - - atoms, bonds = [], [] - protium = {} - deuterium = {} - tritium = {} - seen = set() - for n in range(structure.num_atoms): - seen.add(n) - atom = structure.atom[n] - - atoms.append({'element': atom.atomic_symbol, 'charge': atom.charge, 'x': atom.x, 'y': atom.y, - 'z': atom.z, 'isotope': atom.isotope, 'is_radical': atom.is_radical, - 'implicit_hydrogens': atom.implicit_hydrogens, 'delta_isotope': atom.delta_isotope}) - if atom.implicit_protium: - protium[n] = atom.implicit_protium - if atom.implicit_deuterium: - deuterium[n] = atom.implicit_deuterium - if atom.implicit_tritium: - tritium[n] = atom.implicit_tritium - - for k in range(atom.num_bonds): - m = atom.neighbor[k] - if m in seen: - continue - order = atom.bond_type[k] - if order: - bonds.append((n, m, order)) - - stereo_atoms = [] - stereo_allenes = [] - stereo_cumulenes = [] - for i in range(structure.num_stereo0D): - stereo = structure.stereo0D[i] - sign = stereo.sign - if sign is not None: - if stereo.is_tetrahedral: - stereo_atoms.append((stereo.central_atom, stereo.neighbors, sign)) - elif stereo.is_allene: - nn, *_, nm = stereo.neighbors - stereo_allenes.append((stereo.central_atom, nn, nm, sign)) - elif stereo.is_cumulene: - nn, n, m, nm = stereo.neighbors - stereo_cumulenes.append((n, m, nn, nm, sign)) - - lib.FreeStructFromINCHI(byref(structure)) + with _lib_lock: + if lib.GetStructFromINCHI(byref(InputINCHI(data)), byref(structure)): + lib.FreeStructFromINCHI(byref(structure)) + raise ValueError('invalid INCHI') + + try: + atoms, bonds = [], [] + protium = {} + deuterium = {} + tritium = {} + seen = set() + for n in range(structure.num_atoms): + seen.add(n) + atom = structure.atom[n] + + atoms.append({'element': atom.atomic_symbol, 'charge': atom.charge, 'x': atom.x, 'y': atom.y, + 'z': atom.z, 'isotope': atom.isotope, 'is_radical': atom.is_radical, + 'implicit_hydrogens': atom.implicit_hydrogens, 'delta_isotope': atom.delta_isotope}) + if atom.implicit_protium: + protium[n] = atom.implicit_protium + if atom.implicit_deuterium: + deuterium[n] = atom.implicit_deuterium + if atom.implicit_tritium: + tritium[n] = atom.implicit_tritium + + for k in range(atom.num_bonds): + m = atom.neighbor[k] + if m in seen: + continue + order = atom.bond_type[k] + if order: + bonds.append((n, m, order)) + + stereo_atoms = [] + stereo_allenes = [] + stereo_cumulenes = [] + for i in range(structure.num_stereo0D): + stereo = structure.stereo0D[i] + sign = stereo.sign + if sign is not None: + if stereo.is_tetrahedral: + stereo_atoms.append((stereo.central_atom, stereo.neighbors, sign)) + elif stereo.is_allene: + nn, *_, nm = stereo.neighbors + stereo_allenes.append((stereo.central_atom, nn, nm, sign)) + elif stereo.is_cumulene: + nn, n, m, nm = stereo.neighbors + stereo_cumulenes.append((n, m, nn, nm, sign)) + finally: + lib.FreeStructFromINCHI(byref(structure)) tmp = {'atoms': atoms, 'bonds': bonds, 'stereo_atoms': stereo_atoms, 'stereo_allenes': stereo_allenes, 'stereo_cumulenes': stereo_cumulenes, 'mapping': list(range(1, len(atoms) + 1)), @@ -522,6 +520,7 @@ class INCHIStructure(Structure): lib = None +_lib_lock = Lock() platform = get_platform() if platform == 'win-amd64': diff --git a/chython/files/mdl/__init__.py b/chython/files/mdl/__init__.py index 2310481a..61cbd587 100644 --- a/chython/files/mdl/__init__.py +++ b/chython/files/mdl/__init__.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- # -# Copyright 2017-2024 Ramil Nugmanov +# Copyright 2017-2026 Ramil Nugmanov # This file is part of chython. # # chython is free software; you can redistribute it and/or modify diff --git a/chython/files/mdl/emol.py b/chython/files/mdl/emol.py index e8390a9c..636aa260 100644 --- a/chython/files/mdl/emol.py +++ b/chython/files/mdl/emol.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- # -# Copyright 2020-2024 Ramil Nugmanov +# Copyright 2020-2026 Ramil Nugmanov # This file is part of chython. # # chython is free software; you can redistribute it and/or modify @@ -16,9 +16,14 @@ # You should have received a copy of the GNU Lesser General Public License # along with this program; if not, see . # +from re import compile from ...exceptions import EmptyMolecule +_stereo_collection = compile(r'MDLV30/STE(ABS|RAC|REL)(\d*)\s+ATOMS=\((.+)\)') +_v3000_token_re = compile(r'(?:[^\s"(]|"[^"]*"|\([^)]*\))+') + + def parse_mol_v3000(data, *, _header=True): if _header: title = data[0].strip() or None @@ -26,11 +31,17 @@ def parse_mol_v3000(data, *, _header=True): else: title = None - atom_count, bonds_count, *kvs = data[1][13:].split() - atom_count = int(atom_count) + try: + counts_tokens = data[1][13:].split() + atom_count = int(counts_tokens[0]) + except (IndexError, ValueError): + raise ValueError(f'V3000: cannot parse counts line: {data[1]!r:.80}') if not atom_count: raise EmptyMolecule - bonds_count = int(bonds_count) + try: + bonds_count = int(counts_tokens[1]) + except (IndexError, ValueError): + raise ValueError(f'V3000: cannot parse bond count from counts line: {data[1]!r:.80}') log = [] atoms = [] @@ -38,66 +49,89 @@ def parse_mol_v3000(data, *, _header=True): stereo = [] meta = {} atom_map = {} - star_points = [] + star_points = set() - for kv in kvs: + for kv in counts_tokens[2:]: if '=' in kv: k, v = kv.split('=', 1) if k and v: meta[k] = v - # concatenate line breaks + # concatenate line continuations using list+join tmp = [] - keep = None + parts = [] for line in data[3:]: if line.endswith('-\n'): - line = line[7:-2] # skip `M V30 ` and `-\n` - if keep: - keep += line - else: - keep = line.lstrip() + parts.append(line[7:-2]) # skip `M V30 ` prefix and `-\n` suffix else: - line = line[7:] # skip `M V30 ` - if keep: - tmp.append(keep + line.rstrip()) - keep = None + content = line[7:].rstrip() + if parts: + parts.append(content) + tmp.append(''.join(parts)) + parts = [] else: - tmp.append(line.strip()) + tmp.append(content.lstrip()) data = tmp + if len(data) < atom_count: + raise ValueError(f'V3000: expected {atom_count} atom lines but only {len(data)} lines available') + + # parse atom block + _split = _v3000_token_re.findall + for i, line in enumerate(data[:atom_count], 1): + try: + tokens = _split(line) + n, a, x, y, z, m = tokens[0], tokens[1], tokens[2], tokens[3], tokens[4], tokens[5] + kvs = tokens[6:] + except (IndexError, ValueError): + raise ValueError(f'V3000 atom line {i}: cannot parse: {line!r:.80}') - for line in data[:atom_count]: - n, a, x, y, z, m, *kvs = split(line) if a.startswith(('[', 'NOT')): - raise ValueError('list of atoms not supported') + raise ValueError(f'V3000 atom line {i}: list of atoms not supported') elif a == '*': - star_points.append(n) + star_points.add(n) continue elif a == 'R#': - raise ValueError('R-groups not supported') + raise ValueError(f'V3000 atom line {i}: R-groups not supported') - i = None - c = 0 - r = False + isotope = None + charge = 0 + is_radical = False for kv in kvs: k, v = kv.split('=', 1) if k == 'CHG': - c = int(v) + charge = int(v) elif k == 'MASS': - i = int(v) + isotope = int(v) elif k == 'RAD': - r = True + is_radical = True if a == 'D': - if i: - raise ValueError('isotope on deuterium atom') + if isotope: + raise ValueError(f'V3000 atom line {i}: isotope on deuterium atom') a = 'H' - i = 2 + isotope = 2 + + try: + fx, fy, fz = float(x), float(y), float(z) + except ValueError: + raise ValueError(f'V3000 atom line {i}: cannot parse coordinates from {x!r}, {y!r}, {z!r}') atom_map[n] = len(atoms) - atoms.append({'element': a, 'isotope': i, 'charge': c, 'is_radical': r, - 'x': float(x), 'y': float(y), 'z': float(z), 'parsed_mapping': int(m)}) + atoms.append({'element': a, 'isotope': isotope, 'charge': charge, 'is_radical': is_radical, + 'x': fx, 'y': fy, 'z': fz, 'parsed_mapping': int(m)}) + + # parse bond block + bond_start = 2 + atom_count + bond_end = bond_start + bonds_count + if bond_end > len(data): + raise ValueError(f'V3000: expected {bonds_count} bond lines but only {len(data) - bond_start} available') + for i, line in enumerate(data[bond_start:bond_end], 1): + try: + tokens = _split(line) + _, t, a1, a2 = tokens[0], tokens[1], tokens[2], tokens[3] + kvs = tokens[4:] + except (IndexError, ValueError): + raise ValueError(f'V3000 bond line {i}: cannot parse: {line!r:.80}') - for line in data[2 + atom_count: 2 + atom_count + bonds_count]: - _, t, a1, a2, *kvs = split(line) if a1 in star_points: if a2 in star_points: log.append('invalid bond ignored: star-point to star-point') @@ -105,24 +139,26 @@ def parse_mol_v3000(data, *, _header=True): try: star = atom_map[a2] except KeyError: - raise ValueError('invalid atoms number') + raise ValueError(f'V3000 bond line {i}: invalid atom number {a2}') endpoints = None elif a2 in star_points: try: star = atom_map[a1] except KeyError: - raise ValueError('invalid atoms number') + raise ValueError(f'V3000 bond line {i}: invalid atom number {a1}') endpoints = None else: star = None try: - t = int(t) - if t in (9, 10): # added ad-hoc for bond type 9 - t = 8 + bt = int(t) + if bt in (9, 10): + bt = 8 log.append('coordinate bond replaced to special') - bonds.append((atom_map[a1], atom_map[a2], t)) + bonds.append((atom_map[a1], atom_map[a2], bt)) except KeyError: - raise ValueError('invalid atoms numbers') + raise ValueError(f'V3000 bond line {i}: invalid atom numbers {a1}, {a2}') + except ValueError: + raise ValueError(f'V3000 bond line {i}: invalid bond type {t!r}') for kv in kvs: k, v = kv.split('=') @@ -136,74 +172,80 @@ def parse_mol_v3000(data, *, _header=True): elif k == 'ENDPTS': endpoints = v[1:-1].split() if len(endpoints) != int(endpoints[0]) + 1: - raise ValueError('invalid ENDPTS block') + raise ValueError(f'V3000 bond line {i}: invalid ENDPTS block') if star is not None: if endpoints: # noqa - for m in endpoints[1:]: # noqa + for ep in endpoints[1:]: # noqa try: - bonds.append((star, atom_map[m], 8)) + bonds.append((star, atom_map[ep], 8)) except KeyError: - raise ValueError('invalid atoms numbers in ENDPTS block') + raise ValueError(f'V3000 bond line {i}: invalid atom number {ep} in ENDPTS block') else: log.append('Bond ignored. Star atom not allowed as endpoint') - drop = True - for line in data[3 + atom_count + bonds_count:]: + # parse remaining blocks (SGROUP, COLLECTION, etc.) + in_sgroup = False + in_collection = False + remaining_start = 3 + atom_count + bonds_count + for line in data[remaining_start:]: if line.startswith('END CTAB'): break - elif drop: - if line.startswith('BEGIN SGROUP'): - drop = False - continue + elif line.startswith('BEGIN SGROUP'): + in_sgroup = True elif line.startswith('END SGROUP'): - break - - _, _type, i, *kvs = split(line) - if _type.startswith('DAT'): - a = f = d = None - for kv in kvs: - k, v = kv.split('=', 1) - if k == 'ATOMS': - a = tuple(atom_map[x] for x in v[1:-1].split()[1:] if x not in star_points) - elif k == 'FIELDNAME': - f = v.strip('"') - if k == 'FIELDDATA': - d = v.strip('"') - if a and f and d: - if f == 'MRV_IMPLICIT_H': - atoms[a[0]]['implicit_hydrogens'] = int(d[6:]) - else: - log.append(f'ignored SGROUP DAT {i}: {a}\t{f}\t{d}') - elif _type.startswith('SRU'): - raise ValueError('Polymers not supported') + in_sgroup = False + elif line.startswith('BEGIN COLLECTION'): + in_collection = True + elif line.startswith('END COLLECTION'): + in_collection = False + elif in_sgroup: + tokens = _split(line) + try: + _, _type, idx = tokens[0], tokens[1], tokens[2] + kvs = tokens[3:] + except IndexError: + log.append(f'V3000 SGROUP: cannot parse line: {line!r:.80}') + continue + if _type.startswith('DAT'): + a = f = d = None + for kv in kvs: + k, v = kv.split('=', 1) + if k == 'ATOMS': + a = tuple(atom_map[x] for x in v[1:-1].split()[1:] if x not in star_points) + elif k == 'FIELDNAME': + f = v.strip('"') + if k == 'FIELDDATA': + d = v.strip('"') + if a and f and d: + if f == 'MRV_IMPLICIT_H': + atoms[a[0]]['implicit_hydrogens'] = int(d[6:]) + else: + log.append(f'ignored SGROUP DAT {idx}: {a}\t{f}\t{d}') + elif _type.startswith('SRU'): + raise ValueError('Polymers not supported') + elif in_collection: + m = _stereo_collection.search(line) + if m: + stype, gid, atom_list = m.group(1), m.group(2), m.group(3) + atom_indices = atom_list.split() + atom_indices = atom_indices[1:] # skip count + if stype == 'RAC': # AND group = positive + sg = int(gid) if gid else 1 + for a in atom_indices: + try: + atoms[atom_map[a]]['extended_stereo'] = sg + except KeyError: + log.append(f'invalid atom in STERAC collection: {a}') + elif stype == 'REL': # OR group = negative + sg = -(int(gid) if gid else 1) + for a in atom_indices: + try: + atoms[atom_map[a]]['extended_stereo'] = sg + except KeyError: + log.append(f'invalid atom in STEREL collection: {a}') + # STEABS is the default (no extended_stereo needed) return {'title': title, 'atoms': atoms, 'bonds': bonds, 'stereo': stereo, 'meta': meta, 'log': log} -def split(line): # todo optimize - collect = [] - tmp = [] - until = None - for s in line: - if until: - tmp.append(s) - if s == until: - until = None - elif s == '(': - tmp.append('(') - until = ')' - elif s == '"': - tmp.append(s) - until = '"' - elif s == ' ': - if tmp: - collect.append(''.join(tmp)) - tmp = [] - else: - tmp.append(s) - if tmp: - collect.append(''.join(tmp)) - return collect - - __all__ = ['parse_mol_v3000'] diff --git a/chython/files/mdl/erxn.py b/chython/files/mdl/erxn.py index d088cabe..0724fba3 100644 --- a/chython/files/mdl/erxn.py +++ b/chython/files/mdl/erxn.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- # -# Copyright 2020-2024 Ramil Nugmanov +# Copyright 2020-2026 Ramil Nugmanov # This file is part of chython. # # chython is free software; you can redistribute it and/or modify diff --git a/chython/files/mdl/mol.py b/chython/files/mdl/mol.py index 88b21373..51a9c4a7 100644 --- a/chython/files/mdl/mol.py +++ b/chython/files/mdl/mol.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- # -# Copyright 2020-2024 Ramil Nugmanov +# Copyright 2020-2026 Ramil Nugmanov # This file is part of chython. # # chython is free software; you can redistribute it and/or modify @@ -26,11 +26,25 @@ def parse_mol_v2000(data): line = data[3] - atoms_count = int(line[0:3]) - bonds_count = int(line[3:6]) + try: + atoms_count = int(line[0:3]) + except (ValueError, IndexError): + raise InvalidV2000(f'V2000 counts line: cannot parse atom count from {line!r:.80}') + try: + bonds_count = int(line[3:6]) + except (ValueError, IndexError): + raise InvalidV2000(f'V2000 counts line: cannot parse bond count from {line!r:.80}') + if not atoms_count: raise EmptyMolecule + available_lines = len(data) - 4 + if available_lines < atoms_count: + raise InvalidV2000(f'V2000: expected {atoms_count} atom lines but only {available_lines} lines available') + if available_lines < atoms_count + bonds_count: + raise InvalidV2000(f'V2000: expected {bonds_count} bond lines but only {available_lines - atoms_count} ' + f'lines available after atom block') + log = [] title = data[0].strip() or None atoms = [] @@ -38,35 +52,54 @@ def parse_mol_v2000(data): stereo = [] dat = {} - for line in data[4: 4 + atoms_count]: + # parse atom block + _cm = _charge_map + for i, line in enumerate(data[4: 4 + atoms_count], 1): try: - charge = _charge_map[line[36:39]] + charge = _cm[line[36:39]] except KeyError: - raise InvalidCharge + raise InvalidCharge(f'V2000 atom line {i}: invalid charge field {line[36:39]!r} in: {line!r:.80}') + element = line[31:34].strip() isotope = line[34:36] - delta_isotope = None if element in 'AL': - raise ValueError('queries not supported') + raise ValueError(f'V2000 atom line {i}: queries not supported') elif element == 'D': element = 'H' if isotope != ' 0': - raise ValueError('isotope on deuterium atom') + raise ValueError(f'V2000 atom line {i}: isotope on deuterium atom') isotope = 2 + delta_isotope = None elif isotope != ' 0': delta_isotope = int(isotope) isotope = None else: isotope = None + delta_isotope = None + + try: + x = float(line[0:10]) + y = float(line[10:20]) + z = float(line[20:30]) + except ValueError: + raise InvalidV2000(f'V2000 atom line {i}: cannot parse coordinates from {line[0:30]!r}') mapping = line[60:63] atoms.append({'element': element, 'charge': charge, 'isotope': isotope, - 'parsed_mapping': int(mapping) if mapping else 0, 'x': float(line[0:10]), 'y': float(line[10:20]), - 'z': float(line[20:30]), 'delta_isotope': delta_isotope}) + 'parsed_mapping': int(mapping) if mapping.strip() else 0, + 'x': x, 'y': y, 'z': z, 'delta_isotope': delta_isotope}) - for line in data[4 + atoms_count: 4 + atoms_count + bonds_count]: - a1, a2 = int(line[0:3]) - 1, int(line[3:6]) - 1 + # parse bond block + for i, line in enumerate(data[4 + atoms_count: 4 + atoms_count + bonds_count], 1): + try: + a1 = int(line[0:3]) - 1 + a2 = int(line[3:6]) - 1 + except ValueError: + raise InvalidV2000(f'V2000 bond line {i}: cannot parse atom indices from {line[0:6]!r}') + if a1 < 0 or a1 >= atoms_count or a2 < 0 or a2 >= atoms_count: + raise InvalidV2000(f'V2000 bond line {i}: atom index out of range ' + f'(a1={a1 + 1}, a2={a2 + 1}, atoms={atoms_count})') s = line[9:12] if s == ' 1': stereo.append((a1, a2, 1)) @@ -75,11 +108,12 @@ def parse_mol_v2000(data): elif s != ' 0': log.append(f'unsupported or invalid stereo: {line}') b = int(line[6:9]) - if b == 9: # added ad-hoc for bond type 9 + if b == 9: b = 8 log.append(f'coordinate bond replaced with special: {line}') bonds.append((a1, a2, b)) + # parse properties block for line in data[4 + atoms_count + bonds_count:]: if line.startswith('M END'): break @@ -87,11 +121,19 @@ def parse_mol_v2000(data): raise ValueError('list of atoms not supported') elif line.startswith(('M ISO', 'M RAD', 'M CHG')): _type = _ctf_data[line[3]] - for i in range(int(line[6:9])): + try: + count = int(line[6:9]) + except ValueError: + raise InvalidV2000(f'V2000 properties: cannot parse entry count from {line!r:.80}') + for i in range(count): i8 = i * 8 - atom = int(line[10 + i8:13 + i8]) - if not atom or atom > len(atoms): - raise InvalidV2000('invalid atoms number') + try: + atom = int(line[10 + i8:13 + i8]) + except ValueError: + raise InvalidV2000(f'V2000 properties: cannot parse atom number in {line!r:.80}') + if not atom or atom > atoms_count: + raise InvalidV2000(f'V2000 properties: atom number {atom} out of range [1, {atoms_count}] ' + f'in: {line!r:.80}') atom = atoms[atom - 1] atom[_type] = int(line[14 + i8:17 + i8]) @@ -122,7 +164,7 @@ def parse_mol_v2000(data): log.append(f'ignored line: {line}') for a in atoms: - if 'is_radical' in a: # int to bool + if 'is_radical' in a: a['is_radical'] = True for x in dat.values(): try: diff --git a/chython/files/mdl/read.py b/chython/files/mdl/read.py index efcbf02b..058b9b1b 100644 --- a/chython/files/mdl/read.py +++ b/chython/files/mdl/read.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- # -# Copyright 2021-2023 Ramil Nugmanov +# Copyright 2021-2026 Ramil Nugmanov # This file is part of chython. # # chython is free software; you can redistribute it and/or modify @@ -18,8 +18,6 @@ # from abc import ABCMeta, abstractmethod from base64 import urlsafe_b64encode -from fileinput import FileInput -from io import StringIO, TextIOWrapper from itertools import islice from os.path import abspath, join from pathlib import Path @@ -50,11 +48,11 @@ def __init__(self, file, buffer_size=1000, indexable=False, ignore=True, remap=F elif isinstance(file, Path): self._file = file.open() self._is_buffer = False - elif isinstance(file, (TextIOWrapper, StringIO, FileInput)): + elif hasattr(file, '__iter__') and hasattr(file, 'read'): self._file = file self._is_buffer = True else: - raise TypeError('invalid file. TextIOWrapper, StringIO or FileInput subclasses or path to file expected') + raise TypeError('invalid file. file-like object or path to file expected') self._shifts = None self._tell = 0 self._buffer_size = buffer_size diff --git a/chython/files/mdl/rxn.py b/chython/files/mdl/rxn.py index 56977fe1..c32c568a 100644 --- a/chython/files/mdl/rxn.py +++ b/chython/files/mdl/rxn.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- # -# Copyright 2020-2024 Ramil Nugmanov +# Copyright 2020-2026 Ramil Nugmanov # This file is part of chython. # # chython is free software; you can redistribute it and/or modify diff --git a/chython/files/mdl/stereo.py b/chython/files/mdl/stereo.py index 212cb77d..a9ad48d5 100644 --- a/chython/files/mdl/stereo.py +++ b/chython/files/mdl/stereo.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- # -# Copyright 2020-2024 Ramil Nugmanov +# Copyright 2020-2026 Ramil Nugmanov # This file is part of chython. # # chython is free software; you can redistribute it and/or modify diff --git a/chython/files/mdl/write.py b/chython/files/mdl/write.py index a998251e..e20282ef 100644 --- a/chython/files/mdl/write.py +++ b/chython/files/mdl/write.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- # -# Copyright 2021-2024 Ramil Nugmanov +# Copyright 2021-2026 Ramil Nugmanov # This file is part of chython. # # chython is free software; you can redistribute it and/or modify @@ -17,7 +17,6 @@ # along with this program; if not, see . # from collections import defaultdict -from io import StringIO, TextIOWrapper from pathlib import Path from ...containers import MoleculeContainer @@ -39,11 +38,11 @@ def __init__(self, file, *, mapping: bool = True, append: bool = False): elif isinstance(file, Path): self._file = file.open('a' if append else 'w') self._is_buffer = False - elif isinstance(file, (TextIOWrapper, StringIO)): + elif hasattr(file, 'write'): self._file = file self._is_buffer = True else: - raise TypeError('invalid file. TextIOWrapper, StringIO subclasses or path to file expected') + raise TypeError('invalid file. file-like object or path to file expected') def close(self, force=False): """ @@ -68,6 +67,17 @@ def __write_closed(_): class EMOLWrite(IO): + def __init__(self, file, *, mapping: bool = True, append: bool = False, absolute: bool = False): + """ + :param mapping: write atom mapping. + :param append: open file path in append mode. + :param absolute: explicitly write MDLV30/STEABS collection for stereocenters without extended stereo groups. + By default chython treats any set chirality flag as absolute if not in an AND/OR group, + but some tools require the ABS collection to be explicitly present in the file. + """ + self._absolute = absolute + super().__init__(file, mapping=mapping, append=append) + def _write_molecule(self, g, write3d=None): if not isinstance(g, MoleculeContainer): raise TypeError('MoleculeContainer expected') @@ -111,7 +121,33 @@ def _write_molecule(self, g, write3d=None): if m not in wedge[n]: i += 1 file.write(f'M V30 {i} {b.order} {mapping[n]} {mapping[m]}\n') - file.write('M V30 END BOND\nM V30 END CTAB\n') + file.write('M V30 END BOND\n') + + # enhanced stereo collection + rac = defaultdict(list) # AND groups: positive extended_stereo + rel = defaultdict(list) # OR groups: negative extended_stereo + ast = [] # absolute stereo + for m, a in g.atoms(): + if (es := a.extended_stereo) is not None: + if es > 0: + rac[es].append(mapping[m]) + elif es < 0: + rel[-es].append(mapping[m]) + elif a.stereo is not None: + ast.append(mapping[m]) + if rac or rel or (self._absolute and ast): + file.write('M V30 BEGIN COLLECTION\n') + if self._absolute and ast: + file.write(f'M V30 MDLV30/STEABS ATOMS=({len(ast)} {" ".join(str(x) for x in ast)})\n') + for gid in sorted(rac): + al = rac[gid] + file.write(f'M V30 MDLV30/STERAC{gid} ATOMS=({len(al)} {" ".join(str(x) for x in al)})\n') + for gid in sorted(rel): + al = rel[gid] + file.write(f'M V30 MDLV30/STEREL{gid} ATOMS=({len(al)} {" ".join(str(x) for x in al)})\n') + file.write('M V30 END COLLECTION\n') + + file.write('M V30 END CTAB\n') class MOLWrite(IO): diff --git a/chython/files/opsin.py b/chython/files/opsin.py index c2aa320e..4c0f1da1 100644 --- a/chython/files/opsin.py +++ b/chython/files/opsin.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- # -# Copyright 2025 Ramil Nugmanov +# Copyright 2025, 2026 Ramil Nugmanov # This file is part of chython. # # chython is free software; you can redistribute it and/or modify @@ -19,26 +19,11 @@ from .daylight import smiles -_nametostruct = _opsin = None - - def opsin(string): - """Parse IUPAC name into MoleculeContainer. - """ - global _opsin, _nametostruct - - if _opsin is None: - from jpype import JPackage, isJVMStarted, startJVM - - if not isJVMStarted(): - from chython import class_paths - - startJVM('--enable-native-access=ALL-UNNAMED', classpath=class_paths) - - _opsin = JPackage('uk').ac.cam.ch.wwmm.opsin - _nametostruct = _opsin.NameToStructure.getInstance() + """Parse IUPAC name into MoleculeContainer.""" + from .._java import get_opsin - result = _nametostruct.parseChemicalName(string) + result = get_opsin().parseChemicalName(string) if str(result.getStatus()) == 'FAILURE': raise ValueError(f'Failed to convert `{string}`: {result.getMessage()}') return smiles(str(result.getSmiles())) diff --git a/chython/files/test/__init__.py b/chython/files/test/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/chython/files/test/test_inchi.py b/chython/files/test/test_inchi.py new file mode 100644 index 00000000..93366899 --- /dev/null +++ b/chython/files/test/test_inchi.py @@ -0,0 +1,27 @@ +# -*- coding: utf-8 -*- +import pytest + +try: + from chython import inchi + HAS_INCHI = True +except ImportError: + HAS_INCHI = False + + +@pytest.mark.skipif(not HAS_INCHI, reason='libinchi not available') +def test_inchi_parse_ethanol(): + mol = inchi('InChI=1S/C2H6O/c1-2-3/h3H,2H2,1H3') + assert len(mol) == 3 + assert 'O' in {a.atomic_symbol for _, a in mol.atoms()} + + +@pytest.mark.skipif(not HAS_INCHI, reason='libinchi not available') +def test_inchi_parse_benzene(): + mol = inchi('InChI=1S/C6H6/c1-2-4-6-5-3-1/h1-6H') + assert len(mol) == 6 + + +@pytest.mark.skipif(not HAS_INCHI, reason='libinchi not available') +def test_inchi_parse_charged(): + mol = inchi('InChI=1S/Na.H2O/h;1H2/q+1;/p-1') + assert mol is not None diff --git a/chython/files/test/test_mrv.py b/chython/files/test/test_mrv.py new file mode 100644 index 00000000..1d924d84 --- /dev/null +++ b/chython/files/test/test_mrv.py @@ -0,0 +1,47 @@ +# -*- coding: utf-8 -*- +from io import BytesIO, StringIO +from chython import smiles +from chython.files import MRVRead, MRVWrite + + +def test_mrv_read_molecule(): + with MRVRead('test/implicit.mrv') as f: + mol = next(f) + assert mol is not None + assert len(mol) > 0 + assert 'N' in {a.atomic_symbol for _, a in mol.atoms()} + + +def test_mrv_read_all(): + with MRVRead('test/implicit.mrv') as f: + mols = f.read() + assert len(mols) == 1 + + +def test_mrv_write_read_roundtrip(): + mol = smiles('c1ccccc1') + mol.clean2d() + + buf = StringIO() + with MRVWrite(buf) as w: + w.write(mol) + + xml_bytes = buf.getvalue().encode() + with MRVRead(BytesIO(xml_bytes)) as r: + mol2 = next(r) + + assert str(mol) == str(mol2) + + +def test_mrv_write_reaction(): + rxn = smiles('[CH3:1][OH:2]>>[CH3:1][NH2:3]') + for m in rxn.molecules(): + m.clean2d() + + buf = StringIO() + with MRVWrite(buf) as w: + w.write(rxn) + + assert '' in buf.getvalue() + assert 'reactantList' in buf.getvalue() + assert 'productList' in buf.getvalue() diff --git a/chython/files/test/test_sdf_rdf.py b/chython/files/test/test_sdf_rdf.py new file mode 100644 index 00000000..a054c40c --- /dev/null +++ b/chython/files/test/test_sdf_rdf.py @@ -0,0 +1,64 @@ +# -*- coding: utf-8 -*- +from io import StringIO +from chython import smiles +from chython.files import SDFRead, SDFWrite, RDFRead, RDFWrite + + +def test_sdf_read(): + with SDFRead('test/implicit.sdf') as f: + mols = f.read() + assert len(mols) > 0 + for mol in mols: + assert len(mol) > 0 + + +def test_sdf_read_stereo(): + with SDFRead('test/stereo.sdf') as f: + mols = f.read(amount=10) + assert len(mols) == 10 + + +def test_sdf_write_read_roundtrip(): + mol = smiles('CCO') + mol.clean2d() + mol.meta['name'] = 'ethanol' + + buf = StringIO() + with SDFWrite(buf) as w: + w.write(mol) + + buf.seek(0) + with SDFRead(buf) as r: + mol2 = next(r) + + assert str(mol) == str(mol2) + assert mol2.meta.get('name') == 'ethanol' + + +def test_rdf_read(): + from chython import ReactionContainer + with RDFRead('test/MR.rdf') as f: + records = f.read() + assert len(records) > 0 + rxns = [r for r in records if isinstance(r, ReactionContainer)] + assert len(rxns) > 0 + for rxn in rxns: + assert len(rxn.reactants) > 0 + assert len(rxn.products) > 0 + + +def test_rdf_write_read_roundtrip(): + rxn = smiles('[CH3:1][OH:2]>>[CH3:1][NH2:3]') + for m in rxn.molecules(): + m.clean2d() + + buf = StringIO() + with RDFWrite(buf) as w: + w.write(rxn) + + buf.seek(0) + with RDFRead(buf) as r: + rxn2 = next(r) + + assert len(rxn2.reactants) == 1 + assert len(rxn2.products) == 1 diff --git a/chython/files/test/test_xyz.py b/chython/files/test/test_xyz.py new file mode 100644 index 00000000..e68d4eba --- /dev/null +++ b/chython/files/test/test_xyz.py @@ -0,0 +1,20 @@ +# -*- coding: utf-8 -*- +from chython import xyz + + +def test_xyz_water(): + mol = xyz((('O', 0., 0., 0.), ('H', 1., 0., 0.), ('H', 0., 1., 0.))) + assert len(mol) == 3 + assert 'O' in {a.atomic_symbol for _, a in mol.atoms()} + + +def test_xyz_methane(): + mol = xyz(( + ('C', 0., 0., 0.), + ('H', 1.09, 0., 0.), + ('H', -0.36, 1.03, 0.), + ('H', -0.36, -0.51, 0.89), + ('H', -0.36, -0.51, -0.89), + )) + assert len(mol) == 5 + assert mol.atoms_count == 5 diff --git a/chython/files/xyz.py b/chython/files/xyz.py index a77a8489..6afaedf3 100644 --- a/chython/files/xyz.py +++ b/chython/files/xyz.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- # -# Copyright 2020-2024 Ramil Nugmanov +# Copyright 2020-2026 Ramil Nugmanov # This file is part of chython. # # chython is free software; you can redistribute it and/or modify diff --git a/chython/periodictable/__init__.py b/chython/periodictable/__init__.py index d494564e..8c14308f 100644 --- a/chython/periodictable/__init__.py +++ b/chython/periodictable/__init__.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- # -# Copyright 2018-2024 Ramil Nugmanov +# Copyright 2018-2026 Ramil Nugmanov # This file is part of chython. # # chython is free software; you can redistribute it and/or modify diff --git a/chython/periodictable/base/__init__.py b/chython/periodictable/base/__init__.py index 75806828..b713d7e3 100644 --- a/chython/periodictable/base/__init__.py +++ b/chython/periodictable/base/__init__.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- # -# Copyright 2019-2024 Ramil Nugmanov +# Copyright 2019-2026 Ramil Nugmanov # This file is part of chython. # # chython is free software; you can redistribute it and/or modify diff --git a/chython/periodictable/base/dynamic.py b/chython/periodictable/base/dynamic.py index c7af1a7a..b174f945 100644 --- a/chython/periodictable/base/dynamic.py +++ b/chython/periodictable/base/dynamic.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- # -# Copyright 2020-2024 Ramil Nugmanov +# Copyright 2020-2026 Ramil Nugmanov # This file is part of chython. # # chython is free software; you can redistribute it and/or modify diff --git a/chython/periodictable/base/element.py b/chython/periodictable/base/element.py index 51772ca0..ab0af866 100644 --- a/chython/periodictable/base/element.py +++ b/chython/periodictable/base/element.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- # -# Copyright 2020-2025 Ramil Nugmanov +# Copyright 2020-2026 Ramil Nugmanov # This file is part of chython. # # chython is free software; you can redistribute it and/or modify @@ -17,18 +17,80 @@ # along with this program; if not, see . # from abc import ABC, abstractmethod -from CachedMethods import class_cached_property from collections import defaultdict +from functools import cache from typing import Dict, List, Optional, Set, Tuple, Type from .vector import Vector from ...exceptions import ValenceError +@cache +def _compiled_valence_rules(cls): + elements_classes = {x.__name__: x.atomic_number.fget(None) for x in Element.__subclasses__()} + common_valences = cls._common_valences.fget(None) + + rules = defaultdict(list) + if common_valences[0] and cls.atomic_number.fget(None) != 1: # atom has implicit hydrogens by default except H. + # only first common valence represents implicit H. + valence = common_valences[0] + for h in range(valence + 1): + rules[(0, False, valence - h)].append((set(), {}, h)) # any atoms and bonds possible + + for valence in common_valences[1:]: + rules[(0, False, valence)].append((set(), {}, 0)) + else: + for valence in common_valences: + rules[(0, False, valence)].append((set(), {}, 0)) # any atoms and bonds possible + + for charge, is_radical, implicit, environment in cls._valences_exceptions.fget(None): + explicit = sum(x for x, _ in environment) + explicit_dict = defaultdict(int) + explicit_set = set() + for b, e in environment: + be = (b, elements_classes[e]) + explicit_set.add(be) + explicit_dict[be] += 1 + explicit_dict = dict(explicit_dict) + + if implicit: + valence = explicit + implicit + for h in range(implicit + 1): + rules[(charge, is_radical, valence - h)].append((explicit_set, explicit_dict, h)) + else: + rules[(charge, is_radical, explicit)].append((explicit_set, explicit_dict, 0)) + return dict(rules) + + +@cache +def _compiled_saturation_rules(cls): + elements_classes = {x.__name__: x.atomic_number.fget(None) for x in Element.__subclasses__()} + + rules = [] + for valence in cls._common_valences.fget(None): + rules.append((0, False, valence, 0, None)) # any atoms and bonds possible + + for charge, is_radical, implicit, environment in cls._valences_exceptions.fget(None): + if not environment: + rules.append((charge, is_radical, implicit, 0, None)) + else: + explicit_dict = defaultdict(int) + explicit = 0 + for b, e in environment: + explicit_dict[(b, elements_classes[e])] += 1 + explicit += b + rules.append((charge, is_radical, implicit + explicit, implicit, dict(explicit_dict))) + return rules + + +@cache +def _elements_map(): + return {x.atomic_number.fget(None): x for x in Element.__subclasses__()} + + class Element(ABC): __slots__ = ('_isotope', '_charge', '_is_radical', '_xy', '_implicit_hydrogens', '_explicit_hydrogens', '_stereo', '_parsed_mapping', '_neighbors', '_heteroatoms', '_hybridization', '_ring_sizes', '_in_ring', '_extended_stereo') - __class_cache__ = {} def __init__(self, isotope: Optional[int] = None, *, charge: int = 0, is_radical: bool = False, x: float = 0., y: float = 0., @@ -325,12 +387,7 @@ def from_atomic_number(cls, number: int) -> Type['Element']: get Element class by its number """ try: - elements = cls.__class_cache__['elements'] - except KeyError: - elements = {x.atomic_number.fget(None): x for x in Element.__subclasses__()} - cls.__class_cache__['elements'] = elements - try: - return elements[number] + return _elements_map()[number] except KeyError: raise ValueError(f'Element with number "{number}" not found') @@ -355,10 +412,14 @@ def valence_rules(self, valence: int) -> \ valence rules for element with specific charge/radical state """ try: - return self._compiled_valence_rules[(self.charge, self.is_radical, valence)] + return _compiled_valence_rules(self.__class__)[(self.charge, self.is_radical, valence)] except KeyError: raise ValenceError + @property + def saturation_rules(self) -> List[Tuple[int, bool, int, int, Optional[Dict[Tuple[int, int], int]]]]: + return _compiled_saturation_rules(self.__class__) + @property @abstractmethod def _common_valences(self) -> Tuple[int, ...]: @@ -386,80 +447,5 @@ def _valences_exceptions(self) -> Tuple[Tuple[int, bool, int, Tuple[Tuple[int, s (-1, False, 0, ((1, 'O'),)) and (-1, False, 0, ((1, 'N'),)) """ - @class_cached_property - def _compiled_charge_radical(self) -> Set[Tuple[int, bool]]: - """ - exceptions in charges, radical state - examples: - (-1, False) - anion, not radical - (0, True) - neutral radical - """ - return {(c, r) for c, r, *_ in self._valences_exceptions} - - @class_cached_property - def _compiled_valence_rules(self) -> \ - Dict[Tuple[int, bool, int], List[Tuple[Set[Tuple[int, int]], Dict[Tuple[int, int], int], int]]]: - """ - dictionary with key = (charge, is_radical, sum_of_bonds) and - value = list of possible neighbors and implicit H count - """ - elements_classes = {x.__name__: x.atomic_number.fget(None) for x in Element.__subclasses__()} - - rules = defaultdict(list) - if self._common_valences[0] and self.atomic_number != 1: # atom has implicit hydrogens by default except H. - # only first common valence represents implicit H. - valence = self._common_valences[0] - for h in range(valence + 1): - rules[(0, False, valence - h)].append((set(), {}, h)) # any atoms and bonds possible - - for valence in self._common_valences[1:]: - rules[(0, False, valence)].append((set(), {}, 0)) - else: - for valence in self._common_valences: - rules[(0, False, valence)].append((set(), {}, 0)) # any atoms and bonds possible - - for charge, is_radical, implicit, environment in self._valences_exceptions: - explicit = sum(x for x, _ in environment) - explicit_dict = defaultdict(int) - explicit_set = set() - for b, e in environment: - be = (b, elements_classes[e]) - explicit_set.add(be) - explicit_dict[be] += 1 - explicit_dict = dict(explicit_dict) - - if implicit: - valence = explicit + implicit - - for h in range(implicit + 1): - rules[(charge, is_radical, valence - h)].append((explicit_set, explicit_dict, h)) - else: - rules[(charge, is_radical, explicit)].append((explicit_set, explicit_dict, 0)) - return dict(rules) - - @class_cached_property - def _compiled_saturation_rules(self) -> List[Tuple[int, bool, int, int, Optional[Dict[Tuple[int, int], int]]]]: - """ - dictionary with key = (charge, is_radical, sum_of_bonds) and - value = list of possible neighbors - """ - elements_classes = {x.__name__: x.atomic_number.fget(None) for x in Element.__subclasses__()} - - rules = [] - for valence in self._common_valences: - rules.append((0, False, valence, 0, None)) # any atoms and bonds possible - - for charge, is_radical, implicit, environment in self._valences_exceptions: - if not environment: - rules.append((charge, is_radical, implicit, 0, None)) - else: - explicit_dict = defaultdict(int) - explicit = 0 - for b, e in environment: - explicit_dict[(b, elements_classes[e])] += 1 - explicit += b - rules.append((charge, is_radical, implicit + explicit, implicit, dict(explicit_dict))) - return rules - __all__ = ['Element'] diff --git a/chython/periodictable/base/groups.py b/chython/periodictable/base/groups.py index 75809c61..993095f7 100644 --- a/chython/periodictable/base/groups.py +++ b/chython/periodictable/base/groups.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- # -# Copyright 2019-2024 Ramil Nugmanov +# Copyright 2019-2026 Ramil Nugmanov # This file is part of chython. # # chython is free software; you can redistribute it and/or modify diff --git a/chython/periodictable/base/periods.py b/chython/periodictable/base/periods.py index f05e6d08..22623dcf 100644 --- a/chython/periodictable/base/periods.py +++ b/chython/periodictable/base/periods.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- # -# Copyright 2019-2024 Ramil Nugmanov +# Copyright 2019-2026 Ramil Nugmanov # This file is part of chython. # # chython is free software; you can redistribute it and/or modify diff --git a/chython/periodictable/base/query.py b/chython/periodictable/base/query.py index def97785..3b936861 100644 --- a/chython/periodictable/base/query.py +++ b/chython/periodictable/base/query.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- # -# Copyright 2020-2025 Ramil Nugmanov +# Copyright 2020-2026 Ramil Nugmanov # Copyright 2021 Dmitrij Zanadvornykh # This file is part of chython. # diff --git a/chython/periodictable/base/vector.py b/chython/periodictable/base/vector.py index c23d2773..5cf23a78 100644 --- a/chython/periodictable/base/vector.py +++ b/chython/periodictable/base/vector.py @@ -3,7 +3,7 @@ # Copyright 2024 Denis Lipatov # Copyright 2024 Vyacheslav Grigorev # Copyright 2024 Timur Gimadiev -# Copyright 2024, 2025 Ramil Nugmanov +# Copyright 2024-2026 Ramil Nugmanov # This file is part of chython. # # chython is free software; you can redistribute it and/or modify diff --git a/chython/periodictable/groupI.py b/chython/periodictable/groupI.py index df3631f2..3ff498ef 100644 --- a/chython/periodictable/groupI.py +++ b/chython/periodictable/groupI.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- # -# Copyright 2019-2024 Ramil Nugmanov +# Copyright 2019-2026 Ramil Nugmanov # This file is part of chython. # # chython is free software; you can redistribute it and/or modify diff --git a/chython/periodictable/groupII.py b/chython/periodictable/groupII.py index 8b6337d0..d13195e5 100644 --- a/chython/periodictable/groupII.py +++ b/chython/periodictable/groupII.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- # -# Copyright 2019-2024 Ramil Nugmanov +# Copyright 2019-2026 Ramil Nugmanov # Copyright 2019 Tagir Akhmetshin # This file is part of chython. # diff --git a/chython/periodictable/groupIII.py b/chython/periodictable/groupIII.py index ca11c5f1..0208ea2a 100644 --- a/chython/periodictable/groupIII.py +++ b/chython/periodictable/groupIII.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- # -# Copyright 2019-2024 Ramil Nugmanov +# Copyright 2019-2026 Ramil Nugmanov # Copyright 2019 Tagir Akhmetshin # This file is part of chython. # diff --git a/chython/periodictable/groupIV.py b/chython/periodictable/groupIV.py index 70c626b8..0725c9d5 100644 --- a/chython/periodictable/groupIV.py +++ b/chython/periodictable/groupIV.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- # -# Copyright 2019-2024 Ramil Nugmanov +# Copyright 2019-2026 Ramil Nugmanov # Copyright 2019 Tagir Akhmetshin # This file is part of chython. # diff --git a/chython/periodictable/groupIX.py b/chython/periodictable/groupIX.py index b1fe8055..b3b61041 100644 --- a/chython/periodictable/groupIX.py +++ b/chython/periodictable/groupIX.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- # -# Copyright 2019-2024 Ramil Nugmanov +# Copyright 2019-2026 Ramil Nugmanov # Copyright 2019 Tagir Akhmetshin # Copyright 2019 Tansu Nasyrova # This file is part of chython. diff --git a/chython/periodictable/groupV.py b/chython/periodictable/groupV.py index 67e56d7d..b816ce79 100644 --- a/chython/periodictable/groupV.py +++ b/chython/periodictable/groupV.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- # -# Copyright 2019-2024 Ramil Nugmanov +# Copyright 2019-2026 Ramil Nugmanov # Copyright 2019 Alexander Nikanshin <17071996sasha@gmail.com> # Copyright 2019 Tagir Akhmetshin # This file is part of chython. diff --git a/chython/periodictable/groupVI.py b/chython/periodictable/groupVI.py index 0511d734..c5609aeb 100644 --- a/chython/periodictable/groupVI.py +++ b/chython/periodictable/groupVI.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- # -# Copyright 2019-2024 Ramil Nugmanov +# Copyright 2019-2026 Ramil Nugmanov # Copyright 2019 Tagir Akhmetshin # Copyright 2019 Dayana Bashirova # This file is part of chython. diff --git a/chython/periodictable/groupVII.py b/chython/periodictable/groupVII.py index f754b97e..d782237f 100644 --- a/chython/periodictable/groupVII.py +++ b/chython/periodictable/groupVII.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- # -# Copyright 2019-2024 Ramil Nugmanov +# Copyright 2019-2026 Ramil Nugmanov # Copyright 2019 Tagir Akhmetshin # Copyright 2019 Alexander Nikanshin <17071996sasha@gmail.com> # This file is part of chython. diff --git a/chython/periodictable/groupVIII.py b/chython/periodictable/groupVIII.py index 15056c3f..8b99f824 100644 --- a/chython/periodictable/groupVIII.py +++ b/chython/periodictable/groupVIII.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- # -# Copyright 2019-2024 Ramil Nugmanov +# Copyright 2019-2026 Ramil Nugmanov # Copyright 2019 Tagir Akhmetshin # This file is part of chython. # diff --git a/chython/periodictable/groupX.py b/chython/periodictable/groupX.py index 8c8b2c08..75627f8d 100644 --- a/chython/periodictable/groupX.py +++ b/chython/periodictable/groupX.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- # -# Copyright 2019-2024 Ramil Nugmanov +# Copyright 2019-2026 Ramil Nugmanov # Copyright 2019 Tagir Akhmetshin # Copyright 2019 Dayana Bashirova # This file is part of chython. diff --git a/chython/periodictable/groupXI.py b/chython/periodictable/groupXI.py index 1c80d3d5..2852503b 100644 --- a/chython/periodictable/groupXI.py +++ b/chython/periodictable/groupXI.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- # -# Copyright 2019-2024 Ramil Nugmanov +# Copyright 2019-2026 Ramil Nugmanov # Copyright 2019 Alexander Nikanshin <17071996sasha@gmail.com> # Copyright 2019 Tagir Akhmetshin # This file is part of chython. diff --git a/chython/periodictable/groupXII.py b/chython/periodictable/groupXII.py index 2b59c90b..f700c272 100644 --- a/chython/periodictable/groupXII.py +++ b/chython/periodictable/groupXII.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- # -# Copyright 2019-2024 Ramil Nugmanov +# Copyright 2019-2026 Ramil Nugmanov # Copyright 2019 Dayana Bashirova # This file is part of chython. # diff --git a/chython/periodictable/groupXIII.py b/chython/periodictable/groupXIII.py index e89d745f..f88a9537 100644 --- a/chython/periodictable/groupXIII.py +++ b/chython/periodictable/groupXIII.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- # -# Copyright 2019-2024 Ramil Nugmanov +# Copyright 2019-2026 Ramil Nugmanov # Copyright 2019 Tagir Akhmetshin # Copyright 2019 Tansu Nasyrova # This file is part of chython. diff --git a/chython/periodictable/groupXIV.py b/chython/periodictable/groupXIV.py index 43cca943..ab67496f 100644 --- a/chython/periodictable/groupXIV.py +++ b/chython/periodictable/groupXIV.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- # -# Copyright 2019-2024 Ramil Nugmanov +# Copyright 2019-2026 Ramil Nugmanov # Copyright 2019 Dayana Bashirova # Copyright 2019 Tansu Nasyrova # This file is part of chython. diff --git a/chython/periodictable/groupXV.py b/chython/periodictable/groupXV.py index 5f031016..a78dd1e6 100644 --- a/chython/periodictable/groupXV.py +++ b/chython/periodictable/groupXV.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- # -# Copyright 2019-2024 Ramil Nugmanov +# Copyright 2019-2026 Ramil Nugmanov # Copyright 2019 Alexander Nikanshin <17071996sasha@gmail.com> # Copyright 2019 Tagir Akhmetshin # This file is part of chython. diff --git a/chython/periodictable/groupXVI.py b/chython/periodictable/groupXVI.py index 0c782531..43f25341 100644 --- a/chython/periodictable/groupXVI.py +++ b/chython/periodictable/groupXVI.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- # -# Copyright 2019-2024 Ramil Nugmanov +# Copyright 2019-2026 Ramil Nugmanov # Copyright 2019 Dayana Bashirova # Copyright 2019 Tagir Akhmetshin # Copyright 2019 Tansu Nasyrova diff --git a/chython/periodictable/groupXVII.py b/chython/periodictable/groupXVII.py index 3be4f6a7..daf2cc43 100644 --- a/chython/periodictable/groupXVII.py +++ b/chython/periodictable/groupXVII.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- # -# Copyright 2019-2024 Ramil Nugmanov +# Copyright 2019-2026 Ramil Nugmanov # Copyright 2019 Alexander Nikanshin <17071996sasha@gmail.com> # Copyright 2019 Tagir Akhmetshin # This file is part of chython. diff --git a/chython/periodictable/groupXVIII.py b/chython/periodictable/groupXVIII.py index b8137593..c41ba36a 100644 --- a/chython/periodictable/groupXVIII.py +++ b/chython/periodictable/groupXVIII.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- # -# Copyright 2019-2024 Ramil Nugmanov +# Copyright 2019-2026 Ramil Nugmanov # Copyright 2019 Tagir Akhmetshin # This file is part of chython. # diff --git a/chython/periodictable/test/__init__.py b/chython/periodictable/test/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/chython/periodictable/test/test_element.py b/chython/periodictable/test/test_element.py new file mode 100644 index 00000000..fb5ee047 --- /dev/null +++ b/chython/periodictable/test/test_element.py @@ -0,0 +1,100 @@ +# -*- coding: utf-8 -*- +import pytest +from chython.periodictable import C, N, O, H, S, F, Cl, Br +from chython.periodictable.base.element import Element, _compiled_valence_rules, _compiled_saturation_rules + + +def test_element_from_symbol(): + cls = Element.from_symbol('C') + assert cls is C + + +def test_element_from_atomic_number(): + cls = Element.from_atomic_number(6) + assert cls is C + cls = Element.from_atomic_number(7) + assert cls is N + + +def test_element_from_atomic_number_invalid(): + with pytest.raises(ValueError): + Element.from_atomic_number(999) + + +def test_carbon_valence_rules(): + c = C() + rules = c.valence_rules(4) + assert len(rules) > 0 + # Carbon with 4 bonds should have 0 implicit H + assert any(h == 0 for _, _, h in rules) + + +def test_carbon_valence_rules_partial(): + c = C() + rules = c.valence_rules(3) + # Carbon with 3 bonds should have 1 implicit H + assert any(h == 1 for _, _, h in rules) + + +def test_nitrogen_charged_valence(): + n = N(charge=1) + rules = n.valence_rules(4) + assert len(rules) > 0 + + +def test_invalid_valence(): + from chython.exceptions import ValenceError + c = C() + with pytest.raises(ValenceError): + c.valence_rules(99) + + +def test_saturation_rules(): + rules = _compiled_saturation_rules(C) + assert len(rules) > 0 + # Common valence 4 should be there + assert any(v == 4 for _, _, v, _, _ in rules) + + +def test_element_properties(): + c = C() + assert c.atomic_symbol == 'C' + assert c.atomic_number == 6 + assert c.charge == 0 + assert c.is_radical is False + + +def test_element_charge(): + n = N(charge=-1) + assert n.charge == -1 + + +def test_element_isotope(): + c = C(isotope=13) + assert c.isotope == 13 + + +def test_element_invalid_isotope(): + with pytest.raises(ValueError): + C(isotope=999) + + +def test_element_copy(): + c = C(isotope=13, charge=1) + c2 = c.copy() + assert c2.isotope == 13 + assert c2.charge == 1 + assert c2 is not c + + +def test_compiled_valence_rules_cached(): + # Call twice, should return same object (cached) + r1 = _compiled_valence_rules(C) + r2 = _compiled_valence_rules(C) + assert r1 is r2 + + +def test_different_elements_different_rules(): + r_c = _compiled_valence_rules(C) + r_n = _compiled_valence_rules(N) + assert r_c is not r_n diff --git a/chython/reactor/__init__.py b/chython/reactor/__init__.py index 0f39d3d9..a20c5fdf 100644 --- a/chython/reactor/__init__.py +++ b/chython/reactor/__init__.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- # -# Copyright 2021, 2022 Ramil Nugmanov +# Copyright 2021-2026 Ramil Nugmanov # This file is part of chython. # # chython is free software; you can redistribute it and/or modify diff --git a/chython/reactor/deprotection.py b/chython/reactor/deprotection.py deleted file mode 100644 index 22962538..00000000 --- a/chython/reactor/deprotection.py +++ /dev/null @@ -1,522 +0,0 @@ -# -*- coding: utf-8 -*- -# -# Copyright 2022-2025 Ramil Nugmanov -# This file is part of chython. -# -# chython is free software; you can redistribute it and/or modify -# it under the terms of the GNU Lesser General Public License as published by -# the Free Software Foundation; either version 3 of the License, or -# (at your option) any later version. -# -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU Lesser General Public License for more details. -# -# You should have received a copy of the GNU Lesser General Public License -# along with this program; if not, see . -# -from .. import smarts, MoleculeContainer -from .transformer import Transformer - -""" -Predefined transformers for most common protection groups cleavage. -""" - -_hydroxyl_thiocarbamate = ( # NaIO4 or H2O2/NaOH - ('[O;D2:1]-;!@[C;x3;z2](=[S;D1])[N;D3;x0]([C;D1])[C;D1]', '[A:1]', # rule - 'CC(C)OC(=S)N(C)C', 'CC(C)O'), # test -) - -_hydroxyl_fmoc = ( # Et3N pKa ~ 10 - ('[O;D2:1]-;!@[C;z2;x3](=O)[O;D2;x0]-[C;D2;x1;z1][C;D3;z1;x0;r5]1[C;a;r6]:2:[C;D2]:[C;D2]:[C;D2]:[C;D2]:[C;D3]:2-[C;a;r6]:3:[C;D2]:[C;D2]:[C;D2]:[C;D2]:C1:3', - '[A:1]', - 'CC(C)OC(=O)OCC1C2=CC=CC=C2C2=C1C=CC=C2', 'CC(C)O'), -) - -_hydroxyl_troc = ( # [Zn] - ('[O;D2:1]-;!@[C;z2;x3](=O)[O;D2;x0]-[C;D2][C;D4;x3]([Cl;D1])([Cl;D1])[Cl;D1]', '[A:1]', - 'CC(C)OC(=O)OCC(Cl)(Cl)Cl', 'CC(C)O'), -) - -_hydroxyl_teoc = ( # [F-] - ('[O;D2:1]-;!@[C;z2;x3](=O)[O;D2;x0]-[C;D2;z1;x1][C;D2;x1;z1][Si;D4;z1;x0]([C;D1])([C;D1])[C;D1]', '[A:1]', - 'CC(C)OC(=O)OCC[Si](C)(C)C', 'CC(C)O'), -) - -_hydroxyl_alloc = ( # [Pd] + NuH - ('[O;D2:1]-;!@[C;z2;x3](=O)[O;D2;x0]-[C;D2;z1;x1][C;D2;x0;z2]=[C;D1]', '[A:1]', - 'CC(C)OC(=O)OCC=C', 'CC(C)O'), -) - -_hydroxyl_allyl = ( # basic or Metal isomerization + hydrolysis - ('[O;D2:1]-;!@[C;D2;z1;x1][C;D2;x0;z2]=[C;D1]', '[A:1]', - 'CC(C)OCC=C', 'CC(C)O'), -) - -_hydroxyl_tms = ( # [F-] ion substitution - ('[O;D2:1]-;!@[Si;D4;z1;x1]([C;D1])([C;D1])[C;D1]', '[A:1]', - 'CC(C)O[Si](C)(C)C', 'CC(C)O', 'CC(C)O[SiH](C)C', 'CC(C)O[Si](C)(C)OC', 'CC(C)O[Si](C)(C)CC'), -) - -_hydroxyl_tes = ( - ('[O;D2:1]-;!@[Si;D4;z1;x1]([C;D2;x1;z1][C;D1])([C;D2;x1;z1][C;D1])[C;D2;x1;z1][C;D1]', '[A:1]', - 'CC(C)O[Si](CC)(CC)CC', 'CC(C)O', 'CC(C)O[SiH](C)C', 'CC(C)O[Si](C)(C)OC'), -) - -_hydroxyl_tbs = ( # TBS / TBDMS - ('[O;D2:1]-;!@[Si;D4;z1;x1]([C;D1])([C;D1])[C;D4;x1;z1]([C;D1])([C;D1])[C;D1]', '[A:1]', - 'CC(C)O[Si](C)(C)C(C)(C)C', 'CC(C)O', 'CC(C)O[SiH](C)C', 'CC(C)O[Si](C)(C)OC'), -) - -_hydroxyl_tips = ( - ('[O;D2:1]-;!@[Si;D4;z1;x1]([C;D3;z1;x1]([C;D1])[C;D1])([C;D3;z1;x1]([C;D1])[C;D1])[C;D3;z1;x1]([C;D1])[C;D1]', '[A:1]', - 'CC(C)O[Si](C(C)C)(C(C)C)C(C)C', 'CC(C)O', 'CC(C)O[SiH](C)C', 'CC(C)O[Si](C)(C)OC'), -) - -_hydroxyl_tbdps = ( - ('[O;D2:1]-;!@[Si;D4;z1;x1]([C;a;r6]:1:[C;D2]:[C;D2]:[C;D2]:[C;D2]:[C;D2]:1)([C;a;r6]:1:[C;D2]:[C;D2]:[C;D2]:[C;D2]:[C;D2]:1)[C;D4;x1;z1]([C;D1])([C;D1])[C;D1]', '[A:1]', - 'CC(C)O[Si](c1ccccc1)(c1ccccc1)C(C)(C)C', 'CC(C)O', 'CC(C)O[SiH](C)C', 'CC(C)O[Si](c1ccc(C)cc1)(c1ccccc1)C(C)(C)C'), -) - -_hydroxyl_benzyl = ( # [H], ... - ('[O;D2:1]-;!@[C;D2;z1;x1]-[C;a;r6]:1:[C;D2]:[C;D2]:[C;D2]:[C;D2]:[C;D2]:1', '[A:1]', - 'CC(C)OCc1ccccc1', 'CC(C)O', 'CC(C)OCc1cccc(C)c1', 'CC(C)OC(C)c1ccccc1'), # test + decoys -) - -_hydroxyl_o_nitrobenzyl = ( # UV-light - ('[O;D2:1]-;!@[C;D2;z1;x1]-[C;a;r6]:1:[C;D3;x1]([N+](=O)[O-]):[C;D2]:[C;D2]:[C;D2]:[C;D2]:1', '[A:1]', - 'CC(C)OCc1c(N(=O)=O)cccc1', 'CC(C)O', 'CC(C)OC(OC)c1c(N(=O)=O)cccc1'), -) - -_hydroxyl_methoxy_benzyl = ( # PMB or MPM - ('[O;D2:1]-;!@[C;D2;z1;x1]-[C;a;r6]:1:[C;D2]:[C;D2]:[C;D3;x1]([O;D2;x0][C;D1]):[C;D2]:[C;D2]:1', '[A:1]', - 'CC(C)OCc1ccc(OC)cc1', 'CC(C)O', 'CC(C)OCc1ccc(OCC)cc1', 'CC(C)OCc1cc(OC)ccc1'), -) - -_hydroxyl_dimethoxybenzyl = ( - ('[O;D2:1]-;!@[C;D2;z1;x1]-[C;a;r6]:1:[C;D3;x1]([O;D2;x0][C;D1]):[C;D2]:[C;D3;x1]([O;D2;x0][C;D1]):[C;D2]:[C;D2]:1', '[A:1]', - 'CC(C)OCc1c(OC)cc(OC)cc1', 'CC(C)O'), -) - -_hydroxyl_naphthyl = ( # Nap - ('[O;D2:1]-;!@[C;D2;z1;x1]-[C;a;r6]:1:[C;D2]:[C;D3]:2:[C;D2]:[C;D2]:[C;D2]:[C;D2]:[C;D3]:2:[C;D2]:[C;D2]:1', '[A:1]', - 'CC(C)OCC1=CC2=C(C=CC=C2)C=C1', 'CC(C)O', 'CC(C)OCc1ccccc1'), -) - -_hydroxyl_bom = ( # like Bn - ('[O;D2:1]-;!@[C;D2;x2;z1][O;D2;x0][C;D2;z1;x1]-[C;a;r6]:1:[C;D2]:[C;D2]:[C;D2]:[C;D2]:[C;D2]:1', '[A:1]', - 'CC(C)OCOCc1ccccc1', 'CC(C)O'), -) - -_hydroxyl_piv = ( - ('[O;D2:1]-;!@[C;z2;x2](=O)-[C;D4;x0;z1]([C;D1])([C;D1])[C;D1]', '[A:1]', - 'CC(C)OC(=O)C(C)(C)C', 'CC(C)O'), -) - -_hydroxyl_methoxy_benzoate = ( # pMeO-Bz - ('[O;D2:1]-;!@[C;z2;x2](=O)-[C;a;r6]:1:[C;D2]:[C;D2]:[C;D3;x1]([O;D2;x0][C;D1]):[C;D2]:[C;D2]:1', '[A:1]', - 'COC1=CC=C(C=C1)C(=O)OC(C)C', 'CC(C)O', 'C1=CC=C(C=C1)C(=O)OC(C)C'), -) - -_hydroxyl_benzoate = ( # Bz - ('[O;D2:1]-;!@[C;z2;x2](=O)-[C;a;r6]:1:[C;D2]:[C;D2]:[C;D2]:[C;D2]:[C;D2]:1', '[A:1]', - 'C1=CC=C(C=C1)C(=O)OC(C)C', 'CC(C)O', 'COC1=CC=C(C=C1)C(=O)OC(C)C'), -) - -_hydroxyl_acyl = ( # Ac - ('[O;D2:1]-;!@[C;z2;x2](=O)-[C;D1]', '[A:1]', - 'CC(C)OC(=O)C', 'CC(C)O', 'CC(C)OC(=O)CC'), -) - -_hydroxyl_tfa = ( - ('[O;D2:1]-;!@[C;z2;x2](=O)-[C;D4;z1;x3](F)(F)F', '[A:1]', - 'CC(C)OC(=O)C(F)(F)F', 'CC(C)O'), -) - -_hydroxyl_mom = ( - ('[O;D2:1]-;!@[C;D2;x2;z1][O;D2;x0][C;D1]', '[A:1]', - 'CC(C)OCOC', 'CC(C)O', 'CC(C)OC(C)OC'), -) - -_hydroxyl_mem = ( - ('[O;D2:1]-;!@[C;D2;x2;z1][O;D2;x0][C;D2;z1;x1][C;D2;z1;x1][O;D2;x0][C;D1]', '[A:1]', - 'COCCOCOC(C)C', 'CC(C)O'), -) - -_hydroxyl_thp = ( - ('[O;D2:1]-;!@[C;D3;x2;z1;r6]1[O;D2][C;D2][C;D2][C;D2][C;D2]1', '[A:1]', - 'CC(C)OC1CCCCO1', 'CC(C)O'), -) - -_hydroxyl_ee = ( - ('[O;D2:1]-;!@[C;D3;x2;z1]([O;D2;x0][C;D2;x1;z1][C;D1])[C;D1]', '[A:1]', - 'CC(C)OC(C)OCC', 'CC(C)O', 'CC(C)OC(CC)OCC'), -) - -_hydroxyl_mop = ( - ('[O;D2:1]-;!@[C;D4;x2;z1]([O;D2;x0][C;D1])([C;D1])[C;D1]', '[A:1]', - 'CC(C)OC(C)(C)OC', 'CC(C)O'), -) - -_hydroxyl_sem = ( - ('[O;D2:1]-;!@[C;D2;x2;z1][O;D2;x0][C;D2;z1;x1][C;D2;z1;x1][Si;D4;z1;x0]([C;D1])([C;D1])[C;D1]', '[A:1]', - 'CC(C)OCOCC[Si](C)(C)C', 'CC(C)O'), -) - -_hydroxyl_tritil = ( - ('[O;D2:1]-;!@[C;D4;z1;x1](-[C;a;r6]:1:[C;D2]:[C;D2]:[C;D2]:[C;D2]:[C;D2]:1)(-[C;a;r6]:1:[C;D2]:[C;D2]:[C;D2]:[C;D2]:[C;D2]:1)-[C;a;r6]:1:[C;D2]:[C;D2]:[C;D2]:[C;D2]:[C;D2]:1', '[A:1]', - 'CC(C)OC(c1ccccc1)(c1ccccc1)c1ccccc1', 'CC(C)O', 'COc1ccc(cc1)C(OC(C)C)(c1ccccc1)c1ccc(OC)cc1'), -) - -_hydroxyl_dimetoxy_tritil = ( - ('[O;D2:1]-;!@[C;D4;z1;x1](-[C;a;r6]:1:[C;D2]:[C;D2]:[C;D3;x1]([O;D2;x0][C;D1]):[C;D2]:[C;D2]:1)(-[C;a;r6]:1:[C;D2]:[C;D2]:[C;D3;x1]([O;D2;x0][C;D1]):[C;D2]:[C;D2]:1)-[C;a;r6]:1:[C;D2]:[C;D2]:[C;D2]:[C;D2]:[C;D2]:1', '[A:1]', - 'COc1ccc(cc1)C(OC(C)C)(c1ccccc1)c1ccc(OC)cc1', 'CC(C)O', 'CC(C)OC(c1ccccc1)(c1ccccc1)c1ccccc1'), -) - -_hydroxyl_chloro_tritil = ( - ('[O;D2:1]-;!@[C;D4;z1;x1](-[C;a;r6]:1:[C;D3;x1]([Cl;D1]):[C;D2]:[C;D2]:[C;D2]:[C;D2]:1)(-[C;a;r6]:1:[C;D2]:[C;D2]:[C;D2]:[C;D2]:[C;D2]:1)-[C;a;r6]:1:[C;D2]:[C;D2]:[C;D2]:[C;D2]:[C;D2]:1', '[A:1]', - 'CC(C)OC(c1c(Cl)cccc1)(c1ccccc1)c1ccccc1', 'CC(C)O'), -) - -_hydroxyl_mmt = ( - ('[O;D2:1]-;!@[C;D4;z1;x1](-[C;a;r6]:1:[C;D2]:[C;D2]:[C;D3;x1]([O;D2;x0][C;D1]):[C;D2]:[C;D2]:1)(-[C;a;r6]:1:[C;D2]:[C;D2]:[C;D2]:[C;D2]:[C;D2]:1)-[C;a;r6]:1:[C;D2]:[C;D2]:[C;D2]:[C;D2]:[C;D2]:1', '[A:1]', - 'c1ccc(cc1)C(OC(C)C)(c1ccccc1)c1ccc(OC)cc1', 'CC(C)O', 'CC(C)OC(c1ccccc1)(c1ccccc1)c1ccccc1'), -) - -_hydroxyl_tbu = ( - ('[O;D2:1]-;!@[C;D4;x1;z1]([C;D1])([C;D1])[C;D1]', '[A:1]', - 'CC(C)OC(C)(C)C', 'CC(C)O'), -) - -_hydroxyl_methyl = ( - ('[O;D2:1]-;!@[C;D1]', '[A:1]', - 'CC(C)OC', 'CC(C)O', 'CC(C)OCC'), -) - -_hydroxyl_mpe = ( - ('[O;D2:1]-;!@[C;D4;x1;z1]([C;D1])([C;D2;x0;z1][C;D1])[C;D2;x0;z1][C;D1]', '[A:1]', - 'CC(C)OC(CC)(CC)C', 'CC(C)O'), -) - -_hydroxyl_trifluoroethyl = ( - ('[O;D2:1]-;!@[C;D2;x1;z1][C;D4;x3;z1](F)(F)F', '[A:1]', - 'CC(C)OCC(F)(F)F', 'CC(C)O'), -) - - -_hydroxyl_dmab = ( - ('[O;D2:1]-;!@[C;D2;x1;z1]-[C;a;r6]:1:[C;D2]:[C;D2]:[C;D3;x1](:[C;D2]:[C;D2]:1)-[N;D2;x0;z1]-[C;z2;x1;D3]([C;D2;x0;z1][C;D3;x0;z1]([C;D1])[C;D1])=[C;D3;r6;x0;z2]1[C;x1;z2;D3](=O)[C;D2][C;D4;x0;z1]([C;D1])([C;D1])[C;D2][C;D3;x1;z2]1=O', '[A:1]', - 'CC(C)CC(NC1=CC=C(COC(C)=O)C=C1)=C1C(=O)CC(C)(C)CC1=O', 'CC(O)=O'), - ('[O;D2:1]-;!@[C;D2;x1;z1]-[C;a;r6]:1:[C;D2]:[C;D2]:[C;D3;x1](:[C;D2]:[C;D2]:1)-[N;D2;x0;z2]=[C;x1;D3]([C;D2;x0;z1][C;D3;x0;z1]([C;D1])[C;D1])-[C;D3;r6;x0;z1]1[C;x1;z2;D3](=O)[C;D2][C;D4;x0;z1]([C;D1])([C;D1])[C;D2][C;D3;x1;z2]1=O', '[A:1]', - 'CC(C)CC(=NC1=CC=C(COC(C)=O)C=C1)C1C(=O)CC(C)(C)CC1=O', 'CC(O)=O'), -) - -_diol_acetone = ( - # 1,2 - ('[O;D2;x0;r5:1]1-;@[C;D4;x2;z1]([C;D1])([C;D1])-[O;D2;x0:2][C;M]!#[C;M]1', '[A:1].[A:2]', - 'CC1COC(C)(C)O1', 'CC(O)CO'), - # 1,3 - ('[O;D2;x0;r6:1]1-;@[C;D4;x2;z1]([C;D1])([C;D1])-[O;D2;x0:2][C;M][C;M]!#[C;M]1', '[A:1].[A:2]', - 'CC1CCOC(C)(C)O1', 'CC(O)CCO'), -) - -_hydroxyl_amine_acetone = ( - ('[O;D2;x0;r5:1]1-;@[C;D4;x2;z1]([C;D1])([C;D1])-[N;z1:2][C;M]!#[C;M]1', '[A:1].[A:2]', - 'CC1CN(C(C)=O)C(C)(C)O1', 'CC(O)CNC(C)=O'), -) - -_diol_formalin = ( - # 1,2 - ('[O;D2;x0;r5:1]1-;@[C;D2;x2;z1]-[O;D2;x0:2][C;M]!#[C;M]1', '[A:1].[A:2]', - 'CC1COCO1', 'CC(O)CO'), - # 1,3 - ('[O;D2;x0;r6:1]1-;@[C;D2;x2;z1]-[O;D2;x0:2][C;M][C;M]!#[C;M]1', '[A:1].[A:2]', - 'CC1CCOCO1', 'CC(O)CCO'), -) - -_diol_cyclopentanone = ( - # 1,2 - ('[O;D2;x0;r5:1]1-;@[C;D4;x2;z1]2([C;D2;x0;z1][C;D2;x0;z1][C;D2;x0;z1][C;D2;x0;z1]2)-[O;D2;x0:2][C;M]!#[C;M]1', '[A:1].[A:2]', - 'CC1COC2(CCCC2)O1', 'CC(O)CO'), - # 1,3 - ('[O;D2;x0;r6:1]1-;@[C;D4;x2;z1]2([C;D2;x0;z1][C;D2;x0;z1][C;D2;x0;z1][C;D2;x0;z1]2)-[O;D2;x0:2][C;M][C;M]!#[C;M]1', '[A:1].[A:2]', - 'CC1CCOC2(CCCC2)O1', 'CC(O)CCO'), -) - -_diol_cyclohexanone = ( - # 1,2 - ('[O;D2;x0;r5:1]1-;@[C;D4;x2;z1]2([C;D2;x0;z1][C;D2;x0;z1][C;D2;x0;z1][C;D2;x0;z1][C;D2;x0;z1]2)-[O;D2;x0:2][C;M]!#[C;M]1', '[A:1].[A:2]', - 'CC1COC2(CCCCC2)O1', 'CC(O)CO'), - # 1,3 - ('[O;D2;x0;r6:1]1-;@[C;D4;x2;z1]2([C;D2;x0;z1][C;D2;x0;z1][C;D2;x0;z1][C;D2;x0;z1][C;D2;x0;z1]2)-[O;D2;x0:2][C;M][C;M]!#[C;M]1', '[A:1].[A:2]', - 'CC1CCOC2(CCCCC2)O1', 'CC(O)CCO'), -) - -_diol_diacetal = ( - # 1,2 - ('[O;D2;x0;r6:1]1-;@[C;D4;x2;z1]([O;D2;x0][C;D1])([C;D1])[C;D4;x2;z1]([O;D2;x0][C;D1])([C;D1])-[O;D2;x0:2][C;M]!#[C;M]1', '[A:1].[A:2]', - 'COC1(C)OCC(C)OC1(C)OC', 'CC(O)CO'), - # 1,3 - ('[O;D2;x0;r7:1]1-;@[C;D4;x2;z1]([O;D2;x0][C;D1])([C;D1])[C;D4;x2;z1]([O;D2;x0][C;D1])([C;D1])-[O;D2;x0:2][C;M][C;M]!#[C;M]1', '[A:1].[A:2]', - 'COC1(C)OCCC(C)OC1(C)OC', 'CC(O)CCO'), -) - -_diol_benzylidene = ( - # 1,2 - ('[O;D2;x0;r5:1]1-;@[C;D3;x2;z1]([C;a;r6]:2:[C;D2]:[C;D2]:[C;D2]:[C;D2]:[C;D2]:2)-[O;D2;x0:2][C;M]!#[C;M]1', '[A:1].[A:2]', - 'CC1COC(O1)c1ccccc1', 'CC(O)CO'), - # 1,3 - ('[O;D2;x0;r6:1]1-;@[C;D3;x2;z1]([C;a;r6]:2:[C;D2]:[C;D2]:[C;D2]:[C;D2]:[C;D2]:2)-[O;D2;x0:2][C;M][C;M]!#[C;M]1', '[A:1].[A:2]', - 'CC1CCOC(O1)c1ccccc1', 'CC(O)CCO'), -) - -_carbonyl_dithiolane = ( # MeI - S methylation + hydrolysis - ('[C;D3,D4;z1;x2;r5:1]1[S;D2;x0;z1][C;D2;x1;z1][C;D2;x1;z1][S;D2;x0;z1]1', '[A:1]=[O;M]', - 'CC1SCCS1', 'CC=O'), -) - -_carbonyl_dithiane = ( # MeI - S methylation + hydrolysis - ('[C;D3,D4;z1;x2;r6:1]1[S;D2;x0;z1][C;D2;x1;z1][C;D2;x0;z1][C;D2;x1;z1][S;D2;x0;z1]1', '[A:1]=[O;M]', - 'CC1SCCCS1', 'CC=O'), -) - -_carbonyl_dimethylsulfide = ( # MeI - S methylation + hydrolysis - ('[C;D3,D4;z1;x2:1](-;!@[S;D2][C;D1])-;!@[S;D2][C;D1]', '[A:1]=[O;M]'), -) - -_carbonyl_dioxolane = ( - ('[C;D3,D4;z1;x2;r5:1]1[O;D2;x0][C;D2;x1;z1][C;D2;x1;z1][O;D2;x0]1', '[A:1]=[O;M]', - 'CC1OCCO1', 'CC=O'), -) - -_carbonyl_dioxane = ( - ('[C;D3,D4;z1;x2;r6:1]1[O;D2;x0][C;D2;x1;z1][C;D2;x0;z1][C;D2;x1;z1][O;D2;x0]1', '[A:1]=[O;M]', - 'CC1OCCCO1', 'CC=O'), -) - -_carbonyl_dimethoxy = ( - ('[C;D3,D4;z1;x2:1](-;!@[O;D2;x0][C;D1])-;!@[O;D2;x0][C;D1]', '[A:1]=[O;M]', - 'COC(C)OC', 'CC=O'), -) - -_carboxyl_trioxabicyclooctane = ( # [H+]. Note! a second step of basic hydrolysis is required. - ('[C;D4;x3;r6:1]12-;@[O;D2][C;D2;x1;z1][C;D4;x0;z1]([C;D1])([C;D2;x1;z1][O;D2]1)[C;D2;x1;z1][O;D2]2', '[A:1](=[O;M])[O;M]', - 'CC(C)C12OCC(C)(CO1)CO2', 'CC(C)C(O)=O', 'CC(C)C12OCC(CC)(CO1)CO2', 'CC(C)C12OC(C)C(C)(CO1)CO2'), -) - -_amine_methylcarbamate = ( # PrSLi or [OH-] - ('[N;D2,D3:1]-;!@[C;z2;x3](=O)[O;D2;x0][C;D1]', '[A:1]', - 'c1ccccc1NC(=O)OC', 'c1ccccc1N', 'c1ccccc1NC(=O)OCC'), -) - -_amine_ethylcarbamate = ( - ('[N;D2,D3:1]-;!@[C;z2;x3](=O)[O;D2;x0][C;D2;x1;z1][C;D1]', '[A:1]', - 'c1ccccc1NC(=O)OCC', 'c1ccccc1N'), -) - -_amine_alloc = ( # [Pd] - ('[N;D2,D3:1]-;!@[C;z2;x3](=O)[O;D2;x0]-[C;D2;z1;x1][C;D2;x0;z2]=[C;D1]', '[A:1]', - 'c1ccccc1NC(=O)OCC=C', 'c1ccccc1N', 'c1ccccc1NC(=O)OCC=CC'), -) - -_amine_teoc = ( # [F-] - ('[N;D2,D3:1]-;!@[C;z2;x3](=O)[O;D2;x0]-[C;D2;z1;x1][C;D2;x1;z1][Si;D4;z1;x0]([C;D1])([C;D1])[C;D1]', '[A:1]', - 'c1ccccc1NC(=O)OCC[Si](C)(C)C', 'c1ccccc1N'), -) - -_amine_sem = ( - ('[N;D2,D3:1]-;!@[C;D2;x2;z1][O;D2;x0]-[C;D2;z1;x1][C;D2;x1;z1][Si;D4;z1;x0]([C;D1])([C;D1])[C;D1]', '[A:1]', - 'CN(C)COCC[Si](C)(C)C', 'CNC'), -) - -_amine_troc = ( # [Zn] - ('[N;D2,D3:1]-;!@[C;z2;x3](=O)[O;D2;x0]-[C;D2][C;D4;x3]([Cl;D1])([Cl;D1])[Cl;D1]', '[A:1]', - 'c1ccccc1NC(=O)OCC(Cl)(Cl)Cl', 'c1ccccc1N', 'c1ccccc1NC(=O)OC(C)C(Cl)(Cl)Cl'), -) - -_amine_cbz = ( # [Pd] or Na/NH3 - ('[N;D2,D3:1]-;!@[C;z2;x3](=O)-[O;D2;x0][C;D2;x1;z1][C;a;r6]:1:[C;D2]:[C;D2]:[C;D2]:[C;D2]:[C;D2]:1', '[A:1]', - 'c1ccccc1NC(=O)OCc2ccccc2', 'c1ccccc1N', 'c1ccccc1NC(=O)OC(C)c2ccccc2'), -) - -_amine_chloro_cbz = ( - ('[N;D2,D3:1]-;!@[C;z2;x3](=O)-[O;D2;x0][C;D2;x1;z1][C;a;r6]:1:[C;D3;x1]([Cl;D1]):[C;D2]:[C;D2]:[C;D2]:[C;D2]:1', '[A:1]', - 'Clc1ccccc1COC(=O)Nc1ccccc1', 'c1ccccc1N', 'c1ccccc1NC(=O)OC(C)c2ccccc2'), -) - -_amine_nosyl = ( # NS. With SH-CH2-CH2-OH - ('[N;D2,D3:1]-;!@[S;D4;x3](=O)(=O)-[C;a;r6]:1:[C;D3;x1]([N+](=O)[O-]):[C;D2]:[C;D2]:[C;D2]:[C;D2]:1', '[A:1]', - '[O-][N+](=O)c1ccccc1S(=O)(=O)Nc1ccccc1', 'c1ccccc1N'), -) - -_amine_boc = ( - ('[N;D2,D3:1]-;!@[C;z2;x3](=O)-[O;D2;x0]-[C;D4;x1;z1]([C;D1])([C;D1])[C;D1]', '[A:1]', - 'c1ccccc1NC(=O)OC(C)(C)C', 'c1ccccc1N'), -) - -_amine_tfa = ( - ('[N;D2,D3:1]-;!@[C;z2;x2](=O)-[C;D4;z1;x3](F)(F)F', '[A:1]', - 'CNC(=O)C(F)(F)F', 'CN'), -) - -_amine_fmoc = ( - ('[N;D2,D3:1]-;!@[C;z2;x3](=O)[O;D2;x0]-[C;D2;x1;z1][C;D3;z1;x0;r5]1[C;a;r6]:2:[C;D2]:[C;D2]:[C;D2]:[C;D2]:[C;D3]:2-[C;a;r6]:3:[C;D2]:[C;D2]:[C;D2]:[C;D2]:C1:3', '[A:1]', - 'O=C(Nc1ccccc1)OCC1c2ccccc2-c2ccccc12', 'c1ccccc1N'), -) - -_amine_pbf = ( - ('[N;D2,D3:1]-;!@[S;D4;x3](=O)(=O)-[C;a;r6]:1:[C;D3;x0]([C;D1]):[C;D3;x0]([C;D1]):[C;D3;x1]:2-[O;D2;x0;r5][C;D4;x1]([C;D1])([C;D1])[C;D2;x0;z1][C;D3]:2:[C;D3;x0]([C;D1]):1', '[A:1]', - 'CN(C)S(=O)(=O)c1c(C)c2CC(C)(C)Oc2c(C)c1C', 'CNC'), -) - -_amine_mtr = ( - ('[N;D2,D3:1]-;!@[S;D4;x3](=O)(=O)-[C;a;r6]:1:[C;D3;x0]([C;D1]):[C;D3;x0]([C;D1]):[C;D3;x1](-;!@[O;D2;x0][C;D1]):[C;D2]:[C;D3;x0]([C;D1]):1', '[A:1]', - 'COC1=C(C)C(C)=C(C(C)=C1)S(=O)(=O)N(C)C', 'CNC'), -) - -_amine_dde = ( - ('[N;D2,D3:1]-;!@[C;z2;x1;D3]([C;D1])=[C;D3;r6;x0;z2]1[C;x1;z2;D3](=O)[C;D2][C;D4;x0;z1]([C;D1])([C;D1])[C;D2][C;D3;x1;z2]1=O', '[A:1]', - 'CC(C)NC(C)=C1C(=O)CC(C)(C)CC1=O', 'CC(C)N'), - ('[N;D2:1]=;!@[C;x1;D3]([C;D1])-[C;D3;r6;x0;z1]1[C;x1;z2;D3](=O)[C;D2][C;D4;x0;z1]([C;D1])([C;D1])[C;D2][C;D3;x1;z2]1=O', '[A:1]', - 'CC(C)N=C(C)C1C(=O)CC(C)(C)CC1=O', 'CC(C)N'), -) - -_amine_ivdde = ( - ('[N;D2,D3:1]-;!@[C;z2;x1;D3]([C;D2;x0;z1][C;D3;x0;z1]([C;D1])[C;D1])=[C;D3;r6;x0;z2]1[C;x1;z2;D3](=O)[C;D2][C;D4;x0;z1]([C;D1])([C;D1])[C;D2][C;D3;x1;z2]1=O', '[A:1]', - 'CC(C)CC(NC(C)C)=C1C(=O)CC(C)(C)CC1=O', 'CC(C)N'), - ('[N;D2:1]=;!@[C;x1;D3]([C;D2;x0;z1][C;D3;x0;z1]([C;D1])[C;D1])-[C;D3;r6;x0;z1]1[C;x1;z2;D3](=O)[C;D2][C;D4;x0;z1]([C;D1])([C;D1])[C;D2][C;D3;x1;z2]1=O', '[A:1]', - 'CC(C)CC(=NC(C)C)C1C(=O)CC(C)(C)CC1=O', 'CC(C)N'), -) - -_amine_benzyl = ( # [H], ... - ('[N;D2,D3:1]-;!@[C;D2;z1;x1]-[C;a;r6]:1:[C;D2]:[C;D2]:[C;D2]:[C;D2]:[C;D2]:1', '[A:1]', - 'CC(C)NCc1ccccc1', 'CC(C)N'), -) - -_amine_methoxy_benzyl = ( # PMB or MPM - ('[N;D2,D3:1]-;!@[C;D2;z1;x1]-[C;a;r6]:1:[C;D2]:[C;D2]:[C;D3;x1]([O;D2;x0][C;D1]):[C;D2]:[C;D2]:1', '[A:1]', - 'CC(C)NCc1ccc(OC)cc1', 'CC(C)N'), -) - -_amine_dimethoxybenzyl = ( - ('[N;D2,D3:1]-;!@[C;D2;z1;x1]-[C;a;r6]:1:[C;D3;x1]([O;D2;x0][C;D1]):[C;D2]:[C;D3;x1]([O;D2;x0][C;D1]):[C;D2]:[C;D2]:1', '[A:1]', - 'CC(C)NCc1c(OC)cc(OC)cc1', 'CC(C)N'), -) - -_amine_mtt = ( - ('[N;D2,D3:1]-;!@[C;D4;z1;x1](-[C;a;r6]:1:[C;D2]:[C;D2]:[C;D3;x0]([C;D1]):[C;D2]:[C;D2]:1)(-[C;a;r6]:1:[C;D2]:[C;D2]:[C;D2]:[C;D2]:[C;D2]:1)-[C;a;r6]:1:[C;D2]:[C;D2]:[C;D2]:[C;D2]:[C;D2]:1', '[A:1]', - 'CC(C)NC(c1ccccc1)(c1ccccc1)c1ccc(C)cc1', 'CC(C)N'), -) - -_amine_bhoc = ( - ('[N;D2,D3:1]-;!@[C;z2;x3](=O)[O;D2;x0]-[C;D3;z1;x1](-[C;a;r6]:1:[C;D2]:[C;D2]:[C;D2]:[C;D2]:[C;D2]:1)-[C;a;r6]:1:[C;D2]:[C;D2]:[C;D2]:[C;D2]:[C;D2]:1', '[A:1]', - 'CNC(=O)OC(c1ccccc1)c1ccccc1', 'CN'), -) - -_amine_tritil = ( - ('[N;D2,D3:1]-;!@[C;D4;z1;x1](-[C;a;r6]:1:[C;D2]:[C;D2]:[C;D2]:[C;D2]:[C;D2]:1)(-[C;a;r6]:1:[C;D2]:[C;D2]:[C;D2]:[C;D2]:[C;D2]:1)-[C;a;r6]:1:[C;D2]:[C;D2]:[C;D2]:[C;D2]:[C;D2]:1', '[A:1]', - 'CC(C)NC(c1ccccc1)(c1ccccc1)c1ccccc1', 'CC(C)N'), -) - -_amine_chloro_tritil = ( - ('[N;D2,D3:1]-;!@[C;D4;z1;x1](-[C;a;r6]:1:[C;D3;x1]([Cl;D1]):[C;D2]:[C;D2]:[C;D2]:[C;D2]:1)(-[C;a;r6]:1:[C;D2]:[C;D2]:[C;D2]:[C;D2]:[C;D2]:1)-[C;a;r6]:1:[C;D2]:[C;D2]:[C;D2]:[C;D2]:[C;D2]:1', '[A:1]', - 'CC(C)NC(c1c(Cl)cccc1)(c1ccccc1)c1ccccc1', 'CC(C)N'), -) - -_thiol_tritil = ( - ('[S;D2;x0;z1:1]-;!@[C;D4;z1;x1](-[C;a;r6]:1:[C;D2]:[C;D2]:[C;D2]:[C;D2]:[C;D2]:1)(-[C;a;r6]:1:[C;D2]:[C;D2]:[C;D2]:[C;D2]:[C;D2]:1)-[C;a;r6]:1:[C;D2]:[C;D2]:[C;D2]:[C;D2]:[C;D2]:1', '[A:1]', - 'CC(C)SC(c1ccccc1)(c1ccccc1)c1ccccc1', 'CC(C)S'), -) - -_thiol_mmt = ( - ('[S;D2;x0;z1:1]-;!@[C;D4;z1;x1](-[C;a;r6]:1:[C;D2]:[C;D2]:[C;D3;x1]([O;D2;x0][C;D1]):[C;D2]:[C;D2]:1)(-[C;a;r6]:1:[C;D2]:[C;D2]:[C;D2]:[C;D2]:[C;D2]:1)-[C;a;r6]:1:[C;D2]:[C;D2]:[C;D2]:[C;D2]:[C;D2]:1', '[A:1]', - 'c1ccc(cc1)C(SC(C)C)(c1ccccc1)c1ccc(OC)cc1', 'CC(C)S'), -) - -_thiol_dimetoxy_tritil = ( - ('[S;D2;x0;z1:1]-;!@[C;D4;z1;x1](-[C;a;r6]:1:[C;D2]:[C;D2]:[C;D3;x1]([O;D2;x0][C;D1]):[C;D2]:[C;D2]:1)(-[C;a;r6]:1:[C;D2]:[C;D2]:[C;D3;x1]([O;D2;x0][C;D1]):[C;D2]:[C;D2]:1)-[C;a;r6]:1:[C;D2]:[C;D2]:[C;D2]:[C;D2]:[C;D2]:1', '[A:1]', - 'COc1ccc(cc1)C(SC(C)C)(c1ccccc1)c1ccc(OC)cc1', 'CC(C)S'), -) - -_thiol_chloro_tritil = ( - ('[S;D2;x0;z1:1]-;!@[C;D4;z1;x1](-[C;a;r6]:1:[C;D3;x1]([Cl;D1]):[C;D2]:[C;D2]:[C;D2]:[C;D2]:1)(-[C;a;r6]:1:[C;D2]:[C;D2]:[C;D2]:[C;D2]:[C;D2]:1)-[C;a;r6]:1:[C;D2]:[C;D2]:[C;D2]:[C;D2]:[C;D2]:1', '[A:1]', - 'CC(C)SC(c1c(Cl)cccc1)(c1ccccc1)c1ccccc1', 'CC(C)S'), -) - -_thiol_benzyl = ( - ('[S;D2;x0;z1:1]-;!@[C;D2;z1;x1]-[C;a;r6]:1:[C;D2]:[C;D2]:[C;D2]:[C;D2]:[C;D2]:1', '[A:1]', - 'CC(C)SCc1ccccc1', 'CC(C)S'), -) - -_thiol_tbu = ( - ('[S;D2;x0;z1:1]-;!@[C;D4;x1;z1]([C;D1])([C;D1])[C;D1]', '[A:1]', - 'CC(C)SC(C)(C)C', 'CC(C)S'), -) - -_thiol_stbu = ( - ('[S;D2;x1;z1:1]-;!@[S;D2;z1;x1]-[C;D4;x1;z1]([C;D1])([C;D1])[C;D1]', '[A:1]', - 'CC(C)SSC(C)(C)C', 'CC(C)S'), -) - -_thiol_strimethoxyphenyl = ( - ('[S;D2;x1;z1:1]-;!@[S;D2;z1;x1]-[C;a;r6]:1:[C;D3;x1]([O;D2;x0][C;D1]):[C;D2]:[C;D3;x1]([O;D2;x0][C;D1]):[C;D2]:[C;D3;x1]:1[O;D2;x0][C;D1]', '[A:1]', - 'COc1cc(OC)c(SSC(C)C)c(OC)c1', 'CC(C)S'), -) - -_thiol_amine_dimethoxybenzyl = ( - ('[S;D2;r5;x0;z1:1]1[C;M][C;M][N;z1:2]-[C;D3;x2;z1]1-[C;a;r6]:1:[C;D3;x1]([O;D2;x0][C;D1]):[C;D2]:[C;D3;x1]([O;D2;x0][C;D1]):[C;D2]:[C;D2]:1', '[A:1].[A:2]', - 'COC1=CC=C(C2NC(C)CS2)C(OC)=C1', 'NC(C)CS'), -) - - -################# -# Magic Factory # -################# - -_groups = [k[1:] for k, v in globals().items() if k.startswith('_') and isinstance(v, tuple) and v] -__all__ = ['apply_all'] + _groups -_cache = {} - - -def _prepare_reactor(rules, name): - rxn = [Transformer(smarts(r), smarts(p)) for r, p, *_ in rules] - - def w(molecule: MoleculeContainer, /) -> MoleculeContainer: - """ - Remove protective groups from the given molecule if applicable. - """ - for r in rxn: - while True: - try: - molecule = next(r(molecule)) - except StopIteration: - break - return molecule - - w.__module__ = __name__ - w.__qualname__ = w.__name__ = name - return w - - -def apply_all(molecule: MoleculeContainer, /) -> MoleculeContainer: - """ - Remove all found protective groups from the given molecule. - """ - for name in _groups: - molecule = __getattr__(name)(molecule) - return molecule - - -def __getattr__(name): - try: - return _cache[name] - except KeyError: - if name in _groups: - _cache[name] = t = _prepare_reactor(globals()[f'_{name}'], name) - return t - raise AttributeError - - -def __dir__(): - return __all__ diff --git a/chython/reactor/groups.py b/chython/reactor/groups.py deleted file mode 100644 index 9294b1e6..00000000 --- a/chython/reactor/groups.py +++ /dev/null @@ -1,128 +0,0 @@ -# -*- coding: utf-8 -*- -# -# Copyright 2024 Ramil Nugmanov -# This file is part of chython. -# -# chython is free software; you can redistribute it and/or modify -# it under the terms of the GNU Lesser General Public License as published by -# the Free Software Foundation; either version 3 of the License, or -# (at your option) any later version. -# -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU Lesser General Public License for more details. -# -# You should have received a copy of the GNU Lesser General Public License -# along with this program; if not, see . -# -from ..files import smarts - - -_groups = { - 'Alkene': '[C;z2;x0]=[C;x0;z2]', - 'Alkene hetero': '[C;z2;x1,x2]=[C;z2]', - 'Alkene terminal': '[C;z2;x0;D1]=[C;x0;z2]', - 'Alkene hetero terminal': '[C;z2;x0;D1]=[C;x1,x2;z2]', - 'Alkyne': '[C;z3;x0]#[C;x0]', - 'Alkyne hetero': '[C;z3;x1]#[C]', - 'Alkyne terminal': '[C;z3;x0;D1]#[C;x0]', - 'Alkyne hetero terminal': '[C;z3;x0;D1]#[C;x1]', - - 'Alkyl Halide': '[F,Cl,Br,I;D1][C;x1;z1]', - 'Cyclopropyl Halide': '[F,Cl,Br,I;D1][C;x1;z1;r3]', - - 'Aryl Halide': '[F,Cl,Br,I;D1]-[C;a]', - 'Aryl Fluoride': '[F;D1]-[C;a]', - 'Aryl Chloride': '[Cl;D1]-[C;a]', - 'Aryl Bromide': '[Br;D1]-[C;a]', - 'Aryl Iodide': '[I;D1]-[C;a]', - - 'Aryl Halide SNAr alpha': '[F,Cl,Br,I;D1][C;a]:N', - 'Aryl Halide SNAr gamma': '[F,Cl,Br,I;D1][C;a]:C:C:N', - - 'Aryl Fluoride SNAr alpha': '[F;D1][C;a]:N', - 'Aryl Fluoride SNAr gamma': '[F;D1][C;a]:C:C:N', - 'Aryl Chloride SNAr alpha': '[Cl;D1][C;a]:N', - 'Aryl Chloride SNAr gamma': '[Cl;D1][C;a]:C:C:N', - 'Aryl Bromide SNAr alpha': '[Br;D1][C;a]:N', - 'Aryl Bromide SNAr gamma': '[Br;D1][C;a]:C:C:N', - 'Aryl Iodide SNAr alpha': '[I;D1][C;a]:N', - 'Aryl Iodide SNAr gamma': '[I;D1][C;a]:C:C:N', - - 'Alcohol aliphatic': '[O;D1;x0;z1][C;x1;z1]', - 'Alcohol primary or secondary aliphatic': '[O;D1;x0;z1][C;D1,D2,D3;x1;z1]', - 'Alcohol tertiary aliphatic': '[O;D1;x0;z1][C;D4;x1;z1]', - - 'Alcohol aromatic': '[O;D1;x0;z1][C;a]', - - 'Aldehyde': '[O;z2;x0]=[C;D1,D2;x1;z2]', - 'Aldehyde aliphatic': '[O;z2;x0]=[C;D2;x1;z2][C;z1]', - 'Aldehyde aromatic': '[O;z2;x0]=[C;D2;x1;z2][C;a]', - 'Ketone': '[O;z2;x0]=[C;D3;x1;z2]', - - 'Carboxylic Acid': '[O;D1;z1;x0][C;D3;x2;z2]=O', - 'Carboxylic Acid aliphatic': '[O;D1;z1;x0][C;D3;x2;z2](=O)[C;z1]', - 'Carboxylic Acid aromatic': '[O;D1;z1;x0][C;D3;x2;z2](=O)[C;a]', - - 'Carboxylic Acid Ester': '[O;z2;x0]=[C;D3;x2;z2][O;D2;x0]', - 'Carboxylic Acid Halide': '[F,Cl,Br,I;D1][C;D3;x2;z2]=O', - - 'Amine primary': '[N;D1;x0;z1][C;z1,z4;x1]', - 'Amine primary aliphatic': '[N;D1;x0;z1][C;z1;x1]', - 'Amine primary aromatic': '[N;D1;x0;z1][C;a]', - 'Amine secondary': '[N;D2;x0;z1]([C;z1,z4;x1])[C;z1,z4;x1]', - 'Amine secondary aliphatic': '[N;D2;x0;z1]([C;z1;x1])[C;z1;x1]', - 'Amine secondary aromatic': '[N;D2;x0;z1]([C;a])[C;z1,z4;x1]', - 'Amine tertiary': '[N;D3;x0;z1]([C;z1,z4;x1])([C;z1,z4;x1])[C;z1,z4;x1]', - 'Amine cyclic': '[N;D2;x0;z1;r4,r5,r6,r7,r8]([C;z1;x1])[C;z1;x1]', - - 'Alpha-aminoacid': '[N;D1;x0;z1][C;z1;x1][C;D3;z2;x2](=O)[O;D1]', - 'Beta-aminoacid': '[N;D1;x0;z1][C;z1;x1][C;z1][C;D3;z2;x2](=O)[O;D1]', - 'Gamma-aminoacid': '[N;D1;x0;z1][C;z1;x1][C;z1][C;z1][C;D3;z2;x2](=O)[O;D1]', - - 'Alpha-aminoacid N-protected': '[N;D2;x0;z1]([C;z1;x1][C;D3;z2;x2](=O)[O;D1])[C;D3;z2;x3](=O)[O;D2;x0]C', - 'Beta-aminoacid N-protected': '[N;D2;x0;z1]([C;z1;x1][C;z1][C;D3;z2;x2](=O)[O;D1])[C;D3;z2;x3](=O)[O;D2;x0]C', - 'Gamma-aminoacid N-protected': '[N;D2;x0;z1]([C;z1;x1][C;z1][C;z1][C;D3;z2;x2](=O)[O;D1])[C;D3;z2;x3](=O)[O;D2;x0]C', - - 'Alpha-aminoacid O-protected': '[N;D1;x0;z1][C;z1;x1][C;D3;z2;x2](=O)[O;D2;x0][C;z1;x1]', - 'Beta-aminoacid O-protected': '[N;D1;x0;z1][C;z1;x1][C;z1][C;D3;z2;x2](=O)[O;D2;x0][C;z1;x1]', - 'Gamma-aminoacid O-protected': '[N;D1;x0;z1][C;z1;x1][C;z1][C;z1][C;D3;z2;x2](=O)[O;D2;x0][C;z1;x1]', - - 'Aryl Sulfone': '[S;D4;z3;x2](=O)(=O)(-[C;a])-[C;x1]', - 'Azide': '[N-;D1;x1;z2]=[N+;D2;x2;z3]=[N;D2;x1;z2]', - - 'Boronic Acid': '[B;D3;x2;z1]([O;D1])[O;D1]', - 'Boronic Acid Ester': '[B;D3;x2;z1]([O;D2;x1])([O;D2;x1])-;!@C', - 'Boronic Acid Ester aliphatic': '[B;D3;x2;z1]([O;D2;x1])([O;D2;x1])-;!@[C;x1;z1]', - 'Boronic Acid Ester aromatic': '[B;D3;x2;z1]([O;D2;x1])([O;D2;x1])-;!@[C;a]', - 'Trifluoroborate': '[B;D4;x3;z1;-](F)(F)(F)', - - 'Thiol aliphatic': '[S;D1;x0;z1][C;x1;z1]', - 'Thiol aromatic': '[S;D1;x0;z1][C;a]', - - 'Hydrazine aromatic': '[N;D1;x1;z1][N;D2;x1;z1][C;a]', - 'Hydrazine aliphatic': '[N;D1;x1;z1][N;D2;x1;z1][C;x1;z1]', - - 'Isocyanate': '[O;D1;z2;x0]=[C;D2;x2;z3]=[N;D2;x0;z2]', - 'Isothiocyanate': '[S;D1;z2;x0]=[C;D2;x2;z3]=[N;D2;x0;z2]', - 'Nitrile': '[N;D1;z3;x0]#[C;D2;x1]', - 'Isonitrile': '[C-;D1;x1;z3]#[N+;D2;x0]', - 'Sulfonyl Halide': '[S;D4;z3;x3]([F,Cl,Br,I;D1])(=O)=O', - 'Lactam': '[O;D1;x0;z2]=[C;D3;x2](-;@[N;x0;z1]-;@[C;x1;z1])[C;x0;z1]', - 'Cyclic Anhydride': '[O;D1;x0;z2]=[C;D3;x2;z2;r4,r5,r6,r7,r8][O;D2;x0][C;D3;x2;z2]=O' -} - -_smarts = {n.lower().replace(' ', '_'): smarts(s) for n, s in _groups.items()} -__all__ = list(_smarts) - - -def __getattr__(name): - try: - return _smarts[name] - except KeyError: - raise AttributeError - - -def __dir__(): - return __all__ diff --git a/chython/reactor/reactions/__init__.py b/chython/reactor/reactions/__init__.py deleted file mode 100644 index 3ee46ec3..00000000 --- a/chython/reactor/reactions/__init__.py +++ /dev/null @@ -1,158 +0,0 @@ -# -*- coding: utf-8 -*- -# -# Copyright 2022-2024 Ramil Nugmanov -# Copyright 2023 Timur Gimadiev -# This file is part of chython. -# -# chython is free software; you can redistribute it and/or modify -# it under the terms of the GNU Lesser General Public License as published by -# the Free Software Foundation; either version 3 of the License, or -# (at your option) any later version. -# -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU Lesser General Public License for more details. -# -# You should have received a copy of the GNU Lesser General Public License -# along with this program; if not, see . -# -from collections import deque -from itertools import product -from typing import Iterator, Optional, List -from ._amidation import template as amidation_template -from ._amine_isocyanate import template as amine_isocyanate_template -from ._buchwald_hartwig import template as buchwald_hartwig_template -from ._esterification import template as esterification_template -from ._macmillan import template as macmillan_template -from ._reductive_amination import template as reductive_amination_template -from ._sonogashira import template as songashira_template -from ._sulfonamidation import template as sulfonamidation_template -from ._suzuki_miyaura import template as suzuki_miyaura_template -from ._xec_sp2_sp3 import template as xec_template -from ..reactor import Reactor, fix_mapping_overlap -from ... import smarts, ReactionContainer, MoleculeContainer - -""" -Predefined reactors for common reactions. -""" - - -################# -# Magic Factory # -################# - -__all__ = ['PreparedReactor', 'prepare_reactor'] -__all__.extend(k[:-9] for k, v in globals().items() if k.endswith('_template') and isinstance(v, dict) and v) -_cache = {} - - -class PreparedReactor: - """ - Prepared reactors with predefined sets of templates. - """ - def __init__(self, rules, name): - self.name = name - self.rules = rules - - self.rxn_ms = [] - self.rxn_os = [] - self.alerts = [] - - self.global_alerts = [smarts(x) for x in rules['alerts']] - - for c in rules['templates']: - alerts = [smarts(x) for x in c['alerts']] - p = smarts(c['product']) - for rs in product(*([smarts(x) for x in c[x]] for x in 'ABCD' if x in c)): - self.rxn_ms.append(Reactor(rs, [p], one_shot=False, automorphism_filter=False)) # noqa - self.rxn_os.append(Reactor(rs, [p], one_shot=True, automorphism_filter=False)) # noqa - self.alerts.append(alerts) - - def __repr__(self): - return f'{__name__}.{self.name}' - - def __str__(self): - return f'Reactor<{self.rules["name"]}>' - - def __call__(self, *molecules: MoleculeContainer, one_shot=True, check_alerts: bool = True, - excess: Optional[List[int]] = None) -> Iterator[ReactionContainer]: - """ - :param molecules: Reactants molecules. - :param one_shot: Generate only single stage products. Otherwise, all possible combinations, including products. - :param check_alerts: Check structural alerts of reactants. - :param excess: Molecules indices which can be involved in multistep synthesis. All by default. - """ - if not molecules: - raise ValueError('empty molecule list') - if check_alerts and any(a < m for a, m in product(self.global_alerts, molecules)): - return - - molecules = fix_mapping_overlap(molecules) - seen = set() - if one_shot: - for rx, al in zip(self.rxn_os, self.alerts): - if check_alerts and any(a < m for a, m in product(al, molecules)): - continue - for r in rx(*molecules): - if str(r) in seen: - continue - seen.add(str(r)) - yield r - return - - excess = molecules if excess is None else [molecules[x] for x in excess] - stack = deque([]) - for i, (rx, al) in enumerate(zip(self.rxn_ms, self.alerts)): - if check_alerts and any(a < m for a, m in product(al, molecules)): - continue - x = self.rxn_ms.copy() - del x[i] - stack.appendleft((rx, molecules, x)) - - while stack: - rx, rct, nxt_rxn = stack.pop() - for r in rx(*rct): - if str(r) in seen: - continue - seen.add(str(r)) - - r = ReactionContainer([x.copy() for x in molecules], r.products) - yield r - - x = excess.copy() - for p in reversed(r.products): - x.insert(0, p.copy()) - x = fix_mapping_overlap(x) - if excess is not molecules: - # expected that product can react with all excess molecules simultaneously. - # e.g. multicomponent reaction (Ugi) - for m, nrx in enumerate(nxt_rxn): - z = nxt_rxn.copy() - del z[m] - stack.append((nrx, x.copy(), z)) - else: # drop one of the reactants - for n in range(len(r.products), len(x)): - y = x.copy() - del y[n] - for m, nrx in enumerate(nxt_rxn): - z = nxt_rxn.copy() - del z[m] - stack.append((nrx, y, z)) - - -prepare_reactor = PreparedReactor # backward compatibility - - -def __getattr__(name): - try: - return _cache[name] - except KeyError: - if name in __all__: - _cache[name] = t = PreparedReactor(globals()[f'{name}_template'], name) - return t - raise AttributeError - - -def __dir__(): - return __all__ diff --git a/chython/reactor/reactions/_amidation.py b/chython/reactor/reactions/_amidation.py deleted file mode 100644 index 6072072e..00000000 --- a/chython/reactor/reactions/_amidation.py +++ /dev/null @@ -1,58 +0,0 @@ -# -*- coding: utf-8 -*- -# -# Copyright 2022-2024 Ramil Nugmanov -# Copyright 2023 Timur Gimadiev -# This file is part of chython. -# -# chython is free software; you can redistribute it and/or modify -# it under the terms of the GNU Lesser General Public License as published by -# the Free Software Foundation; either version 3 of the License, or -# (at your option) any later version. -# -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU Lesser General Public License for more details. -# -# You should have received a copy of the GNU Lesser General Public License -# along with this program; if not, see . -# - - -template = { - 'name': 'Amidation Reaction', - 'description': 'Amides formation from acids and amines', - 'templates': [ - { - 'A': [ - # [H,R]COOH - '[O;x0;z2;M]=[C;x2:1][O;D1:2]' - ], - 'B': [ - # Ar-NH2 - '[N;D1;x0;z1:3][C;a;M]', - # Alk-NH2 - '[N;D1;x0;z1:3][C;z1;x1;M]', - # Ar-NH-Ar - '[N;D2;x0;z1:3]([C;a;M])[C;a;M]', - # Alk-NH-Ar - '[N;D2;x0;z1:3]([C;a;M])[C;z1;x1;M]', - # Alk2NH - '[N;D2;x0;z1:3]([C;z1;x1;M])[C;z1;x1;M]', - # N1COCCC1 - '[N;D2;x0;z1;r5,r6,r7,r8:3]([C;z1;x2;M]-;@[O;M])[C;z1;x1;M]', - # CNO[R,H] - '[N;D2;x1;z1:3]([O;x1;z1;M])[C;z1;x1;M]', - # C[NH]NAc - '[N;D2;x1;z1:3]([N;D2;z1;x1;M][C;x2;z2;M]=[O;M])[C;z1;x1;M]' - ], - 'product': '[A:1]-[A:3]', - 'alerts': [], - 'ufe': { - 'A': 2, # use existing terminal atom - 'B': '[A:3][At;M]' # add temporary terminal atom - } - } - ], - 'alerts': ['[O;D1;x0;z1][C;z1;x1]', '[O;D1;z1][C,N;a]'] # global untolerant groups -} diff --git a/chython/reactor/reactions/_amine_isocyanate.py b/chython/reactor/reactions/_amine_isocyanate.py deleted file mode 100644 index 48111013..00000000 --- a/chython/reactor/reactions/_amine_isocyanate.py +++ /dev/null @@ -1,52 +0,0 @@ -# -*- coding: utf-8 -*- -# -# Copyright 2022-2024 Ramil Nugmanov -# Copyright 2023 Timur Gimadiev -# This file is part of chython. -# -# chython is free software; you can redistribute it and/or modify -# it under the terms of the GNU Lesser General Public License as published by -# the Free Software Foundation; either version 3 of the License, or -# (at your option) any later version. -# -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU Lesser General Public License for more details. -# -# You should have received a copy of the GNU Lesser General Public License -# along with this program; if not, see . -# - - -template = { - 'name': 'Amine with isocyanate reaction', - 'description': 'Amine with isocyanate reaction, C-N coupling reaction', - 'templates': [ - { - 'A': [ - # RN=C=O - '[C;D2;x2;z3:2](=[O;M])=[N;D2;x0;z2:1]' - ], - 'B': [ - # Ar-NH2 - '[N;D1;x0;z1:3][C;a;M]', - # Alk-NH2 - '[N;D1;x0;z1:3][C;z1;x1;M]', - # Ar-NH-Ar - '[N;D2;x0;z1:3]([C;a;M])[C;a;M]', - # Alk-NH-Ar - '[N;D2;x0;z1:3]([C;a;M])[C;z1;x1;M]', - # Alk2NH - '[N;D2;x0;z1:3]([C;z1;x1;M])[C;z1;x1;M]' - ], - 'product': '[A:1][A:2]-[A:3]', - 'alerts': [], - 'ufe': { - 'A': '[A:1][A:2][At;M]', - 'B': '[A:3][At;M]' - } - } - ], - 'alerts': [] -} diff --git a/chython/reactor/reactions/_buchwald_hartwig.py b/chython/reactor/reactions/_buchwald_hartwig.py deleted file mode 100644 index 552a7d6d..00000000 --- a/chython/reactor/reactions/_buchwald_hartwig.py +++ /dev/null @@ -1,50 +0,0 @@ -# -*- coding: utf-8 -*- -# -# Copyright 2022-2024 Ramil Nugmanov -# Copyright 2023 Timur Gimadiev -# This file is part of chython. -# -# chython is free software; you can redistribute it and/or modify -# it under the terms of the GNU Lesser General Public License as published by -# the Free Software Foundation; either version 3 of the License, or -# (at your option) any later version. -# -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU Lesser General Public License for more details. -# -# You should have received a copy of the GNU Lesser General Public License -# along with this program; if not, see . -# - - -template = { - 'name': 'Buchwald-Hartwig reaction', - 'description': 'Buchwald-Hartwig amination reaction, C-N coupling reaction', - 'templates': [ - { - 'A': [ - # Hal-Ar - '[Cl,Br,I;D1:1]-[C;a:2]' - ], - 'B': [ - # Ar-NH2 - '[N;D1;x0;z1:3][C;a;M]', - # Alk-NH2 - '[N;D1;x0;z1:3][C;z1;x1;M]', - # Alk-NH-Ar - '[N;D2;x0;z1:3]([C;a;M])[C;z1;x1;M]', - # Alk2NH - '[N;D2;x0;z1:3]([C;z1;x1;M])[C;z1;x1;M]' - ], - 'product': '[A:2]-[A:3]', - 'alerts': [], - 'ufe': { - 'A': 1, - 'B': '[A:3][At;M]' - } - } - ], - 'alerts': [] -} diff --git a/chython/reactor/reactions/_esterification.py b/chython/reactor/reactions/_esterification.py deleted file mode 100644 index a31cfa88..00000000 --- a/chython/reactor/reactions/_esterification.py +++ /dev/null @@ -1,49 +0,0 @@ -# -*- coding: utf-8 -*- -# -# Copyright 2022-2024 Ramil Nugmanov -# Copyright 2023 Timur Gimadiev -# This file is part of chython. -# -# chython is free software; you can redistribute it and/or modify -# it under the terms of the GNU Lesser General Public License as published by -# the Free Software Foundation; either version 3 of the License, or -# (at your option) any later version. -# -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU Lesser General Public License for more details. -# -# You should have received a copy of the GNU Lesser General Public License -# along with this program; if not, see . -# - - -template = { - 'name': 'Fischer esterification', - 'description': 'Esters formation from alcohols and acids', - 'templates': [ - # reactants sets fully mixable - { - 'A': [ - # C(=O)O - '[O;D1;x0;z1:2]-[C;x2;z2:1]=[O;M]', - ], - 'B': [ - # CO - '[O;D1;x0;z1:3]-[C;x1;z1;M]' - ], - 'product': '[A:1]-[A:3]', - # condition-specific untolerant groups - 'alerts': [ - '[S;D1;x0;z1][C;x1;z1]', # thiol - '[O,S;D1;z1][A;a]' # [thia]phenol - ], - 'ufe': { - 'A': 2, - 'B': '[A:3][At;M]' - } - } - ], - 'alerts': [] # global untolerant groups -} diff --git a/chython/reactor/reactions/_macmillan.py b/chython/reactor/reactions/_macmillan.py deleted file mode 100644 index 4670aa3b..00000000 --- a/chython/reactor/reactions/_macmillan.py +++ /dev/null @@ -1,43 +0,0 @@ -# -*- coding: utf-8 -*- -# -# Copyright 2024 Ramil Nugmanov -# This file is part of chython. -# -# chython is free software; you can redistribute it and/or modify -# it under the terms of the GNU Lesser General Public License as published by -# the Free Software Foundation; either version 3 of the License, or -# (at your option) any later version. -# -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU Lesser General Public License for more details. -# -# You should have received a copy of the GNU Lesser General Public License -# along with this program; if not, see . -# - - -template = { - 'name': 'Macmillan', - 'description': 'Deoxygenative C-C coupling reaction', - 'templates': [ - { - 'A': [ - # Hal-Ar - '[Cl,Br,I;D1:1]-[C;a:2]' - ], - 'B': [ - # CO - '[O;D1;x0;z1:3]-[C;x1;z1:4]' - ], - 'product': '[A:2]-[A:4]', - 'alerts': [], - 'ufe': { - 'A': 1, - 'B': 3 - } - } - ], - 'alerts': [] -} diff --git a/chython/reactor/reactions/_reductive_amination.py b/chython/reactor/reactions/_reductive_amination.py deleted file mode 100644 index c4b2dcf9..00000000 --- a/chython/reactor/reactions/_reductive_amination.py +++ /dev/null @@ -1,50 +0,0 @@ -# -*- coding: utf-8 -*- -# -# Copyright 2022-2024 Ramil Nugmanov -# Copyright 2023 Timur Gimadiev -# This file is part of chython. -# -# chython is free software; you can redistribute it and/or modify -# it under the terms of the GNU Lesser General Public License as published by -# the Free Software Foundation; either version 3 of the License, or -# (at your option) any later version. -# -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU Lesser General Public License for more details. -# -# You should have received a copy of the GNU Lesser General Public License -# along with this program; if not, see . -# - - -template = { - 'name': 'Amine carbonyl reductive amination reaction', - 'description': 'Amines formation from carbonyls and amines', - 'templates': [ - { - 'A': [ - # O=CR2 - '[O;x0;z2:2]=[C;x1:1]' - ], - 'B': [ - # Ar-NH2 - '[N;D1;x0;z1:3][C;a;M]', - # Alk-NH2 - '[N;D1;x0;z1:3][C;z1;x1;M]', - # Alk-NH-Ar - '[N;D2;x0;z1:3]([C;a;M])[C;z1;x1;M]', - # Alk2NH - '[N;D2;x0;z1:3]([C;z1;x1;M])[C;z1;x1;M]' - ], - 'product': '[A:1]-[A:3]', - 'alerts': [], - 'ufe': { - 'A': 2, - 'B': '[A:3][At;M]' - } - } - ], - 'alerts': [] -} diff --git a/chython/reactor/reactions/_sonogashira.py b/chython/reactor/reactions/_sonogashira.py deleted file mode 100644 index a031da89..00000000 --- a/chython/reactor/reactions/_sonogashira.py +++ /dev/null @@ -1,49 +0,0 @@ -# -*- coding: utf-8 -*- -# -# Copyright 2022-2024 Ramil Nugmanov -# Copyright 2023 Timur Gimadiev -# This file is part of chython. -# -# chython is free software; you can redistribute it and/or modify -# it under the terms of the GNU Lesser General Public License as published by -# the Free Software Foundation; either version 3 of the License, or -# (at your option) any later version. -# -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU Lesser General Public License for more details. -# -# You should have received a copy of the GNU Lesser General Public License -# along with this program; if not, see . -# - - -template = { - 'name': 'Sonogashira reaction', - 'description': 'Sonogashira reaction, C-C coupling reaction. It employs a palladium catalyst as well as copper' - 'co-catalyst', - 'templates': [ - { - 'A': [ - # HC#C-R - '[C;D1;x0;z3:1]#[C;D2;x0;M]' - ], - 'B': [ - # Ar-Hal - '[Cl,Br,I;D1:3]-[C;a:2]', - # C=C-Hal - '[Cl,Br,I;D1:3]-[C;x1;z2:2]=[C;x0;z2;M]', - # R-C(=O)-Hal - '[Cl,Br,I;D1:3]-[C;x2;z2:2]=[O;M]' - ], - 'product': '[A:1]-[A:2]', - 'alerts': [], - 'ufe': { - 'A': '[A:1][At;M]', - 'B': 3 - } - } - ], - 'alerts': [] -} diff --git a/chython/reactor/reactions/_sulfonamidation.py b/chython/reactor/reactions/_sulfonamidation.py deleted file mode 100644 index 9f1abfea..00000000 --- a/chython/reactor/reactions/_sulfonamidation.py +++ /dev/null @@ -1,52 +0,0 @@ -# -*- coding: utf-8 -*- -# -# Copyright 2022-2024 Ramil Nugmanov -# Copyright 2023 Timur Gimadiev -# This file is part of chython. -# -# chython is free software; you can redistribute it and/or modify -# it under the terms of the GNU Lesser General Public License as published by -# the Free Software Foundation; either version 3 of the License, or -# (at your option) any later version. -# -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU Lesser General Public License for more details. -# -# You should have received a copy of the GNU Lesser General Public License -# along with this program; if not, see . -# - - -template = { - 'name': 'Sulfoamination reaction', - 'description': 'Sulfoamination reaction, S-N coupling reaction', - 'templates': [ - { - 'A': [ - # RS(=O)(=O)X - '[S;D4;x3;z3:1]([O,F,Cl,Br,I;D1:2])(=[O;M])(=[O;M])[C;M]' - ], - 'B': [ - # Ar-NH2 - '[N;D1;x0;z1:3][C;a;M]', - # Alk-NH2 - '[N;D1;x0;z1:3][C;z1;x1;M]', - # Ar-NH-Ar - '[N;D2;x0;z1:3]([C;a;M])[C;a;M]', - # Alk-NH-Ar - '[N;D2;x0;z1:3]([C;a;M])[C;z1;x1;M]', - # Alk2NH - '[N;D2;x0;z1:3]([C;z1;x1;M])[C;z1;x1;M]' - ], - 'product': '[A:1]-[A:3]', - 'alerts': [], - 'ufe': { - 'A': 2, - 'B': '[A:3][At;M]' - } - }, - ], - 'alerts': [] -} diff --git a/chython/reactor/reactions/_suzuki_miyaura.py b/chython/reactor/reactions/_suzuki_miyaura.py deleted file mode 100644 index 57535a1d..00000000 --- a/chython/reactor/reactions/_suzuki_miyaura.py +++ /dev/null @@ -1,88 +0,0 @@ -# -*- coding: utf-8 -*- -# -# Copyright 2022-2024 Ramil Nugmanov -# Copyright 2023 Timur Gimadiev -# This file is part of chython. -# -# chython is free software; you can redistribute it and/or modify -# it under the terms of the GNU Lesser General Public License as published by -# the Free Software Foundation; either version 3 of the License, or -# (at your option) any later version. -# -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU Lesser General Public License for more details. -# -# You should have received a copy of the GNU Lesser General Public License -# along with this program; if not, see . -# - - -template = { - 'name': 'Suzuki-Miyaura reaction', - 'description': 'Suzuki-Miyaura C-C coupling reaction', - 'templates': [ - { - 'A': [ - # X-Ar - '[Cl,Br,I;D1:1]-[C;a:2]' - ], - 'B': [ - # Ar-B - '[B;D3;x2;z1:4]([O:5])([O:6])-[C;a:3]', - # C=C-B, [N,O]C=C-B, C=C([N,O])-B - '[B;D3;x2;z1:4]([O:5])([O:6])-[C;x1,x2;z2:3]=[C;x0,x1;z2;M]', - # B-C#C - '[B;D3;x2;z1:4]([O:5])([O:6])-[C;D2;x1;z3:3]', - # B-C(alk) - '[B;D3;x2;z1:4]([O:5])([O:6])-[C;x1,x2;z1:3]' - ], - 'product': '[A:2]-[A:3]', - 'alerts': [], - 'ufe': { - 'A': 1, - 'B': '[A:3][At;M]' - } - }, - { - 'A': [ - # X-C=C - '[Cl,Br,I;D1:1]-[C;x1,x2;z2:2]=[C;x0,x1;z2;M]' - ], - 'B': [ - # Ar-B - '[B;D3;x2;z1:4]([O:5])([O:6])-[C;a:3]', - # C=C-B, [N,O]C=C-B, C=C([N,O])-B - '[B;D3;x2;z1:4]([O:5])([O:6])-[C;x1,x2;z2:3]=[C;x0,x1;z2;M]', - # B-C(alk) - '[B;D3;x2;z1:4]([O:5])([O:6])-[C;x1,x2;z1:3]' - ], - 'product': '[A:2]-[A:3]', - 'alerts': [], - 'ufe': { - 'A': 1, - 'B': '[A:3][At;M]' - } - }, - { - 'A': [ - # X-C(alk) - '[Cl,Br;D1:1]-[C;x1,x2;z1:2]' - ], - 'B': [ - # Ar-B - '[B;D3;x2;z1:4]([O:5])([O:6])-[C;a:3]', - # C=C-B, [N,O]C=C-B, C=C([N,O])-B - '[B;D3;x2;z1:4]([O:5])([O:6])-[C;x1,x2;z2:3]=[C;x0,x1;z2;M]' - ], - 'product': '[A:2]-[A:3]', - 'alerts': [], - 'ufe': { - 'A': 1, - 'B': '[A:3][At;M]' - } - } - ], - 'alerts': [] -} diff --git a/chython/reactor/reactions/_xec_sp2_sp3.py b/chython/reactor/reactions/_xec_sp2_sp3.py deleted file mode 100644 index 3c225fae..00000000 --- a/chython/reactor/reactions/_xec_sp2_sp3.py +++ /dev/null @@ -1,47 +0,0 @@ -# -*- coding: utf-8 -*- -# -# Copyright 2025 Kostia Chernichenko -# This file is part of chython. -# -# chython is free software; you can redistribute it and/or modify -# it under the terms of the GNU Lesser General Public License as published by -# the Free Software Foundation; either version 3 of the License, or -# (at your option) any later version. -# -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU Lesser General Public License for more details. -# -# You should have received a copy of the GNU Lesser General Public License -# along with this program; if not, see . -# - - -template = { - 'name': 'XEC', - 'description': 'Cross-electrophile C-sp2-X C-sp3-X coupling reaction', - 'templates': [ - { - 'A': [ - # Hal-Ar - '[Cl,Br,I;D1:1]-[C;a:2]', - # Hal-pseudoaromatic, more specifically C5, C6 vinylic - '[Cl,Br,I;D1:1]-[C;z2;r5,r6:2]', - # Ar triflate - '[C;a:2]-[O;D2;x1:1]-[S;x3;D4:10](=[O:11])(=[O:12])-[C;D4:13](-[F;D1:14])(-[F;D1:15])-[F;D1:16]' - ], - 'B': [ - # sp3-C-X - '[Cl,Br,I;D1:3]-[C;z1:4]' - ], - 'product': '[A:2]-[A:4]', - 'alerts': [], - 'ufe': { - 'A': '[A:2][At;M]', - 'B': 3 - } - } - ], - 'alerts': [] -} diff --git a/chython/reactor/reactions/ufe.py b/chython/reactor/reactions/ufe.py deleted file mode 100644 index 2bdfc95b..00000000 --- a/chython/reactor/reactions/ufe.py +++ /dev/null @@ -1,99 +0,0 @@ -# -*- coding: utf-8 -*- -# -# Copyright 2024 Ramil Nugmanov -# This file is part of chython. -# -# chython is free software; you can redistribute it and/or modify -# it under the terms of the GNU Lesser General Public License as published by -# the Free Software Foundation; either version 3 of the License, or -# (at your option) any later version. -# -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU Lesser General Public License for more details. -# -# You should have received a copy of the GNU Lesser General Public License -# along with this program; if not, see . -# -from typing import Iterator -from ._amidation import template as amidation_template -from ._amine_isocyanate import template as amine_isocyanate_template -from ._buchwald_hartwig import template as buchwald_hartwig_template -from ._esterification import template as esterification_template -from ._macmillan import template as macmillan_template -from ._reductive_amination import template as reductive_amination_template -from ._sonogashira import template as sonogashira_template -from ._sulfonamidation import template as sulfonamidation_template -from ._suzuki_miyaura import template as suzuki_miyaura_template -from ..transformer import Transformer -from ... import MoleculeContainer, smarts -from ...periodictable import At - - -__all__ = ['PreparedUFE'] -__all__.extend(k[:-9] for k, v in globals().items() if k.endswith('_template') and isinstance(v, dict) and v) -_cache = {} - - -class TransformerWrapper: - def __init__(self, query, transformation, name): - if isinstance(transformation, str): - self.transformer = Transformer(smarts(query), smarts(transformation), copy_metadata=True, - fix_aromatic_rings=False, fix_tautomers=False) - else: - self.query = smarts(query) - self.mapping = transformation - self.transformer = None - self.name = name - - def __call__(self, molecule: MoleculeContainer) -> Iterator[MoleculeContainer]: - if self.transformer is None: - for mapping in self.query.get_mapping(molecule): - n = mapping[self.mapping] - copy = molecule.copy() - copy._atoms[n].__class__ = At # ad-hoc for masking leaving group - copy._hydrogens[n] = 0 - copy.meta[self.name] = n - yield copy - else: - for copy in self.transformer(molecule): - copy.meta[self.name] = max(copy) - yield copy - - -class PreparedUFE: - def __init__(self, rules, name): - self.name = name - self.rules = rules - self.transformations = [] - - for n, rule in enumerate(rules['templates']): - for g in 'AB': - for s in rule[g]: - t = TransformerWrapper(s, rule['ufe'][g], f'{name}_{g}{n}') - self.transformations.append(t) - - def __call__(self, molecule: MoleculeContainer) -> Iterator[MoleculeContainer]: - for transformer in self.transformations: - yield from transformer(molecule) - - def __repr__(self): - return f'{__name__}.{self.name}' - - def __str__(self): - return f'UFE<{self.rules["name"]}>' - - -def __getattr__(name): - try: - return _cache[name] - except KeyError: - if name in __all__: - _cache[name] = t = PreparedUFE(globals()[f'{name}_template'], name) - return t - raise AttributeError - - -def __dir__(): - return __all__ diff --git a/chython/reactor/reactor.py b/chython/reactor/reactor.py index 89a8ba89..dd8aa2c0 100644 --- a/chython/reactor/reactor.py +++ b/chython/reactor/reactor.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- # -# Copyright 2019-2025 Ramil Nugmanov +# Copyright 2019-2026 Ramil Nugmanov # Copyright 2019 Adelia Fatykhova # This file is part of chython. # @@ -141,7 +141,11 @@ def _single_stage(self, chosen, ignored) -> Iterator[List[MoleculeContainer]]: if united_chosen is None: united_chosen = reduce(or_, chosen) max_ignored_number = max(ignored, default=0) - new = self._patcher(united_chosen, mapping) + try: + new = self._patcher(united_chosen, mapping) + except Exception: + logger.info('invalid product structure, skipping') + continue collision = set(new).intersection(ignored) if collision: new.remap(dict(zip(collision, count(max(max_ignored_number, max(new)) + 1)))) diff --git a/chython/reactor/retro/__init__.py b/chython/reactor/retro/__init__.py deleted file mode 100644 index caa804af..00000000 --- a/chython/reactor/retro/__init__.py +++ /dev/null @@ -1,77 +0,0 @@ -# -*- coding: utf-8 -*- -# -# Copyright 2024 Ramil Nugmanov -# This file is part of chython. -# -# chython is free software; you can redistribute it and/or modify -# it under the terms of the GNU Lesser General Public License as published by -# the Free Software Foundation; either version 3 of the License, or -# (at your option) any later version. -# -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU Lesser General Public License for more details. -# -# You should have received a copy of the GNU Lesser General Public License -# along with this program; if not, see . -# -from chython import Reactor, ReactionContainer, smarts -from typing import Iterator -from ._amidation import template as amidation_template -from ._aryl_amination import template as aryl_amination_template -from ._mitsunobu import template as mitsunobu_template -from ._sonogashira import template as sonogashira_template -from ._suzuki_miyaura import template as suzuki_miyaura_template - - -__all__ = ['PreparedReactor'] -__all__.extend(k[:-9] for k, v in globals().items() if k.endswith('_template') and isinstance(v, dict) and v) -_cache = {} - - -class PreparedReactor: - """ - Prepared retrosynthetic reactors with predefined sets of templates. - """ - def __init__(self, rules, name): - self.name = name - self.rules = rules - - self.rxn = rxn = [] - for tmp in rules['templates']: - p = smarts(tmp['product']) - rs = [smarts(x) for x in tmp['reactants']] - rxn.append(Reactor([p], rs, automorphism_filter=False)) # noqa - - def __repr__(self): - return f'{__name__}.{self.name}' - - def __str__(self): - return f'RetroReactor<{self.rules["name"]}>' - - def __call__(self, molecule) -> Iterator[ReactionContainer]: - """ - :param molecule: Product molecule - """ - seen = set() - for rx in self.rxn: - for r in rx(molecule): - if str(r) in seen: - continue - seen.add(str(r)) - yield r - - -def __getattr__(name): - try: - return _cache[name] - except KeyError: - if name in __all__: - _cache[name] = t = PreparedReactor(globals()[f'{name}_template'], name) - return t - raise AttributeError - - -def __dir__(): - return __all__ diff --git a/chython/reactor/retro/_amidation.py b/chython/reactor/retro/_amidation.py deleted file mode 100644 index 323bf018..00000000 --- a/chython/reactor/retro/_amidation.py +++ /dev/null @@ -1,39 +0,0 @@ -# -*- coding: utf-8 -*- -# -# Copyright 2024 Ramil Nugmanov -# This file is part of chython. -# -# chython is free software; you can redistribute it and/or modify -# it under the terms of the GNU Lesser General Public License as published by -# the Free Software Foundation; either version 3 of the License, or -# (at your option) any later version. -# -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU Lesser General Public License for more details. -# -# You should have received a copy of the GNU Lesser General Public License -# along with this program; if not, see . -# - -template = { - 'name': 'Amidation reaction', - 'description': 'Amide Coupling with Amines and Acids/Halo-Anhydrides', - 'templates': [ - { - 'product': '[N;D2,D3;z1:1]-;!@[C;x2;z2:2]=[O;M]', # any SP3 nitrogen with carboxy - 'reactants': [ - '[A:1]', - '[A:2]-[O;M]' - ] - }, - { - 'product': '[N;D2;z2;x0:1]-;!@[C;x2;z2:2]=[O;M]', # C=N-C(=O)R - 'reactants': [ - '[A:1]', - '[A:2]-[O;M]' - ] - } - ] -} diff --git a/chython/reactor/retro/_mitsunobu.py b/chython/reactor/retro/_mitsunobu.py deleted file mode 100644 index 51ba667b..00000000 --- a/chython/reactor/retro/_mitsunobu.py +++ /dev/null @@ -1,49 +0,0 @@ -# -*- coding: utf-8 -*- -# -# Copyright 2024 Ramil Nugmanov -# This file is part of chython. -# -# chython is free software; you can redistribute it and/or modify -# it under the terms of the GNU Lesser General Public License as published by -# the Free Software Foundation; either version 3 of the License, or -# (at your option) any later version. -# -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU Lesser General Public License for more details. -# -# You should have received a copy of the GNU Lesser General Public License -# along with this program; if not, see . -# - -template = { - 'name': 'Mitsunobu reaction', - 'description': 'Phenol-Alcohol Phenol-Acid Acid-Alcohol couplings', - 'templates': [ - { - # Ph-O-Alk - 'product': '[O;D2;x0;z1:1](-;!@[C;a;M])[C;z1;x1:2]', - 'reactants': [ - '[A:1]', - '[A:2]-[O;M]' - ] - }, - { - # Ac-O-Alk - 'product': '[O;D2;x0;z1:1](-;!@[C;x2;z2;M]=[O;M])[C;z1;x1:2]', - 'reactants': [ - '[A:1]', - '[A:2]-[O;M]' - ] - }, - { - # Ph-O-Ac - 'product': '[O;D2;x0;z1:1](-;!@[C;D3;x2;z2:2]=[O;M])[C;a;M]', - 'reactants': [ - '[A:1]', - '[A:2]-[O;M]' - ] - } - ] -} diff --git a/chython/reactor/retro/_sonogashira.py b/chython/reactor/retro/_sonogashira.py deleted file mode 100644 index 01ab553e..00000000 --- a/chython/reactor/retro/_sonogashira.py +++ /dev/null @@ -1,49 +0,0 @@ -# -*- coding: utf-8 -*- -# -# Copyright 2024 Ramil Nugmanov -# This file is part of chython. -# -# chython is free software; you can redistribute it and/or modify -# it under the terms of the GNU Lesser General Public License as published by -# the Free Software Foundation; either version 3 of the License, or -# (at your option) any later version. -# -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU Lesser General Public License for more details. -# -# You should have received a copy of the GNU Lesser General Public License -# along with this program; if not, see . -# - -template = { - 'name': 'Sonogashira reaction', - 'description': 'Alkyne Ar-X CSP2-X Ac-X couplings', - 'templates': [ - { - # Ar - 'product': '[C;D2;x0;z3:1](-;!@[C;a:2])#[C;D2;x0;M]', - 'reactants': [ - '[A:1]', - '[A:2]-[Br;M]' - ] - }, - { - # Ac - 'product': '[C;D2;x0;z3:1](-;!@[C;D3;x1;z2:2]=[O;M])#[C;D2;x0;M]', - 'reactants': [ - '[A:1]', - '[A:2]-[Cl;M]' - ] - }, - { - # CSP2 - 'product': '[C;D2;x0;z3:1](-;!@[C;x0;z2:2])#[C;D2;x0;M]', - 'reactants': [ - '[A:1]', - '[A:2]-[Br;M]' - ] - } - ] -} diff --git a/chython/reactor/retro/_suzuki_miyaura.py b/chython/reactor/retro/_suzuki_miyaura.py deleted file mode 100644 index fb88bc18..00000000 --- a/chython/reactor/retro/_suzuki_miyaura.py +++ /dev/null @@ -1,54 +0,0 @@ -# -*- coding: utf-8 -*- -# -# Copyright 2024 Ramil Nugmanov -# This file is part of chython. -# -# chython is free software; you can redistribute it and/or modify -# it under the terms of the GNU Lesser General Public License as published by -# the Free Software Foundation; either version 3 of the License, or -# (at your option) any later version. -# -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU Lesser General Public License for more details. -# -# You should have received a copy of the GNU Lesser General Public License -# along with this program; if not, see . -# - -template = { - 'name': 'Suzuki-Miyaura reaction', - 'description': 'Ar-Ar Ar-CSP2 CSP2-CSP2 couplings', - 'templates': [ - { - 'product': '[C;a;D3:1]-;!@[C;a:2]', - 'reactants': [ - '[A:1]-[B;M]([O;M])[O;M]', - '[A:2]-[Br;M]' - ] - }, - { - 'product': '[C;a;D3:1]-;!@[C;z2:2]=[C;M]', - 'reactants': [ - '[A:1]-[B;M]([O;M])[O;M]', - '[A:2]-[Br;M]' - ] - }, - { - # reverse - 'product': '[C;a;D3:1]-;!@[C;z2:2]=[C;M]', - 'reactants': [ - '[A:1]-[Br;M]', - '[A:2]-[B;M]([O;M])[O;M]' - ] - }, - { - 'product': '[C;D2,D3;z2:1](-;!@[C;z2:2]=[C;M])=[C;M]', - 'reactants': [ - '[A:1]-[B;M]([O;M])[O;M]', - '[A:2]-[Br;M]' - ] - } - ] -} diff --git a/chython/reactor/scaffold.py b/chython/reactor/scaffold.py deleted file mode 100644 index 312a48fc..00000000 --- a/chython/reactor/scaffold.py +++ /dev/null @@ -1,122 +0,0 @@ -# -*- coding: utf-8 -*- -# -# Copyright 2024 Ramil Nugmanov -# This file is part of chython. -# -# chython is free software; you can redistribute it and/or modify -# it under the terms of the GNU Lesser General Public License as published by -# the Free Software Foundation; either version 3 of the License, or -# (at your option) any later version. -# -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU Lesser General Public License for more details. -# -# You should have received a copy of the GNU Lesser General Public License -# along with this program; if not, see . -# -from .. import smarts, MoleculeContainer -from .transformer import Transformer - -""" -Predefined transformers for common reactive groups cleavage. -""" - -_alcohol = ( - ('[O;D1;z1;x0][C;z1;x1:1]', '[A:1]', # rule - 'CCO', 'CC', # match test - 'c1ccccc1O'), # false-match test -) - -_phenol = ( - ('[O;D1;z1;x0][C;a:1]', '[A:1]', 'c1ccccc1O', 'c1ccccc1', 'CCO'), -) - -_chloro_aryl = ( - ('[Cl;D1;z1;x0][C;a:1]', '[A:1]', 'c1ccccc1Cl', 'c1ccccc1', 'c1ccccc1Br', 'CCCl'), -) - -_bromo_aryl = ( - ('[Br;D1;z1;x0][C;a:1]', '[A:1]', 'c1ccccc1Br', 'c1ccccc1', 'c1ccccc1I', 'CCBr'), -) - -_iodo_aryl = ( - ('[I;D1;z1;x0][C;a:1]', '[A:1]', 'c1ccccc1I', 'c1ccccc1', 'c1ccccc1Cl', 'CCI'), -) - -_chloro_alkyl = ( - ('[Cl;D1;z1;x0][C;x1;z1:1]', '[A:1]', 'CCCl', 'CC', 'c1ccccc1Cl'), -) - -_bromo_alkyl = ( - ('[Br;D1;z1;x0][C;x1;z1:1]', '[A:1]', 'CCBr', 'CC', 'c1ccccc1Br'), -) - -_iodo_alkyl = ( - ('[I;D1;z1;x0][C;x1;z1:1]', '[A:1]', 'CCI', 'CC', 'c1ccccc1I'), -) - -_carboxy = ( - ('[O;D1;z1;x0][C;D3;!R;x2;z2](=[O;D1])[C:1]', '[A:1]', 'CCC(=O)O', 'CC'), -) - -_chloro_anhydride = ( - ('[Cl;D1;z1;x0][C;D3;!R;x2;z2](=[O;D1])[C:1]', '[A:1]', 'CCC(=O)Cl', 'CC'), -) - -_amine_primary = ( - ('[N;D1;z1;x0][C;x1;z1:1]', '[A:1]', 'CCN', 'CC', 'CNC'), - ('[N;D1;z1;x0][C;a:1]', '[A:1]', 'c1ccccc1N', 'c1ccccc1', 'c1ccccc1NC'), -) - -################# -# Magic Factory # -################# - -_groups = [k[1:] for k, v in globals().items() if k.startswith('_') and isinstance(v, tuple) and v] -__all__ = ['apply_all'] + _groups -_cache = {} - - -def _prepare_reactor(rules, name): - rxn = [Transformer(smarts(r), smarts(p)) for r, p, *_ in rules] - - def w(molecule: MoleculeContainer, /) -> MoleculeContainer: - """ - Remove reactive groups from the given molecule if applicable. - """ - for r in rxn: - while True: - try: - molecule = next(r(molecule)) - except StopIteration: - break - return molecule - - w.__module__ = __name__ - w.__qualname__ = w.__name__ = name - return w - - -def apply_all(molecule: MoleculeContainer, /) -> MoleculeContainer: - """ - Remove all found reactive groups from the given molecule. - """ - for name in _groups: - molecule = __getattr__(name)(molecule) - return molecule - - -def __getattr__(name): - try: - return _cache[name] - except KeyError: - if name in _groups: - _cache[name] = t = _prepare_reactor(globals()[f'_{name}'], name) - return t - raise AttributeError - - -def __dir__(): - return __all__ diff --git a/chython/reactor/test/__init__.py b/chython/reactor/test/__init__.py index c8a5a613..e5d5aa63 100644 --- a/chython/reactor/test/__init__.py +++ b/chython/reactor/test/__init__.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- # -# Copyright 2022 Ramil Nugmanov +# Copyright 2022-2026 Ramil Nugmanov # This file is part of chython. # # chython is free software; you can redistribute it and/or modify diff --git a/chython/reactor/test/test_deprotection.py b/chython/reactor/test/test_deprotection.py deleted file mode 100644 index 1e07b77b..00000000 --- a/chython/reactor/test/test_deprotection.py +++ /dev/null @@ -1,42 +0,0 @@ -# -*- coding: utf-8 -*- -# -# Copyright 2022-2024 Ramil Nugmanov -# This file is part of chython. -# -# chython is free software; you can redistribute it and/or modify -# it under the terms of the GNU Lesser General Public License as published by -# the Free Software Foundation; either version 3 of the License, or -# (at your option) any later version. -# -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU Lesser General Public License for more details. -# -# You should have received a copy of the GNU Lesser General Public License -# along with this program; if not, see . -# -from chython import smiles, smarts, Transformer -from chython.reactor import deprotection - - -def test_deprotection(): - for x in dir(deprotection): - if x == 'apply_all': - continue - for r in getattr(deprotection, '_' + x): - if len(r) > 2: # has test - q, p, t, a, *bs = r - t = smiles(t) - t.canonicalize() - q = smarts(q) - a = smiles(a) - a.canonicalize() - # test match - assert q < t, f'{x}: {q} !< {t}' - o = next(Transformer(q, smarts(p))(t)) - assert o == a, f'{x}: {o} != {a}' - for b in bs: - b = smiles(b) - b.canonicalize() - assert not q < b, f'{x}: {q} < {b}' diff --git a/chython/reactor/test/test_reactor.py b/chython/reactor/test/test_reactor.py index 37b4268e..45e43587 100644 --- a/chython/reactor/test/test_reactor.py +++ b/chython/reactor/test/test_reactor.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- # -# Copyright 2025 Ramil Nugmanov +# Copyright 2025, 2026 Ramil Nugmanov # This file is part of chython. # # chython is free software; you can redistribute it and/or modify diff --git a/chython/reactor/test/test_scaffold.py b/chython/reactor/test/test_scaffold.py deleted file mode 100644 index 28da5386..00000000 --- a/chython/reactor/test/test_scaffold.py +++ /dev/null @@ -1,57 +0,0 @@ -# -*- coding: utf-8 -*- -# -# Copyright 2024 Ramil Nugmanov -# This file is part of chython. -# -# chython is free software; you can redistribute it and/or modify -# it under the terms of the GNU Lesser General Public License as published by -# the Free Software Foundation; either version 3 of the License, or -# (at your option) any later version. -# -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU Lesser General Public License for more details. -# -# You should have received a copy of the GNU Lesser General Public License -# along with this program; if not, see . -# -from chython import smiles, smarts, Transformer -from chython.reactor import scaffold -from itertools import product - - -def test_scaffold(): - qs = set() - ts = set() - for x in dir(scaffold): - if x == 'apply_all': - continue - for r in getattr(scaffold, '_' + x): - if len(r) > 2: # has test - q, p, t, a, *bs = r - t = smiles(t) - t.canonicalize() - q = smarts(q) - qs.add(q) - ts.add(t) - a = smiles(a) - a.canonicalize() - # test match - assert q < t, f'{x}: {q} !< {t}' - o = next(Transformer(q, smarts(p))(t)) - assert o == a, f'{x}: {o} != {a}' - for b in bs: - b = smiles(b) - b.canonicalize() - assert not q < b, f'{x}: {q} < {b}' - - # test rule-test is unique pair - assert len(qs) == len(ts) - - m = 0 - for q, t in product(qs, ts): - m += q < t - - # test selectivity of rules - assert len(qs) == m diff --git a/chython/reactor/transformer.py b/chython/reactor/transformer.py index cbb3f505..bf51f992 100644 --- a/chython/reactor/transformer.py +++ b/chython/reactor/transformer.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- # -# Copyright 2014-2025 Ramil Nugmanov +# Copyright 2014-2026 Ramil Nugmanov # Copyright 2019 Adelia Fatykhova # This file is part of chython. # @@ -17,11 +17,15 @@ # You should have received a copy of the GNU Lesser General Public License # along with this program; if not, see . # +from logging import getLogger from typing import Union from .base import BaseReactor from ..containers import QueryContainer, MoleculeContainer +logger = getLogger('chython.reactor') + + class Transformer(BaseReactor): """ Editor for molecules. @@ -54,7 +58,11 @@ def __call__(self, structure: MoleculeContainer): raise TypeError('only Molecules possible') for mapping in self._pattern.get_mapping(structure, automorphism_filter=self._automorphism_filter): - transformed = self._patcher(structure, mapping) + try: + transformed = self._patcher(structure, mapping) + except Exception: + logger.info('invalid product structure, skipping') + continue if self._copy_metadata: transformed.meta.update(structure.meta) yield transformed diff --git a/chython/utils/__init__.py b/chython/utils/__init__.py index f8de7a60..027d053f 100644 --- a/chython/utils/__init__.py +++ b/chython/utils/__init__.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- # -# Copyright 2019-2025 Ramil Nugmanov +# Copyright 2019-2026 Ramil Nugmanov # This file is part of chython. # # chython is free software; you can redistribute it and/or modify diff --git a/chython/utils/free_wilson.py b/chython/utils/free_wilson.py index 9ee415c3..2d648b5c 100644 --- a/chython/utils/free_wilson.py +++ b/chython/utils/free_wilson.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- # -# Copyright 2022-2024 Ramil Nugmanov +# Copyright 2022-2026 Ramil Nugmanov # This file is part of chython. # # chython is free software; you can redistribute it and/or modify diff --git a/chython/utils/grid.py b/chython/utils/grid.py index 01b5dd90..72f85d10 100644 --- a/chython/utils/grid.py +++ b/chython/utils/grid.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- # -# Copyright 2021-2025 Ramil Nugmanov +# Copyright 2021-2026 Ramil Nugmanov # Copyright 2024 Philippe Gantzer # This file is part of chython. # diff --git a/chython/utils/retro.py b/chython/utils/retro.py index cd84b184..92b5e71b 100644 --- a/chython/utils/retro.py +++ b/chython/utils/retro.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- # -# Copyright 2021-2025 Ramil Nugmanov +# Copyright 2021-2026 Ramil Nugmanov # Copyright 2021 Alexander Sizov # This file is part of chython. # diff --git a/doc/config.rst b/doc/config.rst index f5e5d4db..a59e6b8e 100644 --- a/doc/config.rst +++ b/doc/config.rst @@ -19,8 +19,8 @@ Configuration Reference chython.conformer_engine = 'rdkit' # default # Options: 'rdkit', 'cdpkit' - # Neural AAM device (set before first reset_mapping call) - chython.torch_device = 'cpu' # default; 'cuda:0' for GPU + # Neural AAM device (legacy, unused since migration to ONNX) + chython.torch_device = 'cpu' # kept for backward compatibility # Java JAR paths (CDK, OPSIN) chython.class_paths = ['/path/to/cdk.jar', '/path/to/opsin.jar'] diff --git a/doc/index.rst b/doc/index.rst index d5b97cfc..862b6524 100644 --- a/doc/index.rst +++ b/doc/index.rst @@ -14,7 +14,8 @@ Library for processing molecules and reactions in Python. - Standardize, canonicalize, and enumerate tautomers - Morgan and linear fingerprints with Tanimoto similarity - Atom-to-atom mapping (neural + rule-based) -- Template-based reaction application and deprotection +- Template-based reaction application +- Functional and protective group detection and deprotection - Stereo handling (tetrahedral, allene, cis-trans) - 2D/3D depiction with Jupyter support - RDKit interoperability diff --git a/doc/reactions.rst b/doc/reactions.rst index 4c9bdf7c..e52a323b 100644 --- a/doc/reactions.rst +++ b/doc/reactions.rst @@ -1,7 +1,7 @@ Reactions & Templates ===================== -Parsing, CGR, atom-atom mapping, reaction templates, and deprotection. +Parsing, CGR, atom-atom mapping, reaction templates, functional groups, and deprotection. Parsing Reactions @@ -117,24 +117,36 @@ CGR overlays reactant and product graphs, showing bond changes: Atom-Atom Mapping ----------------- -Neural attention-based mapping (requires ``chytorch-rxnmap`` package): +Neural attention-based mapping (requires ``chython-rxnmap`` package): .. code-block:: python - import chython - chython.torch_device = 'cpu' # set before first use; 'cuda:0' for GPU + from chython import smiles rxn = smiles('CCO.CC(=O)O>>CCOC(=O)C.O') + + # Neural attention mapping (ONNX-based, CPU only) + rxn.attention_mapping() + + # With score (float, higher = more confident) + score = rxn.attention_mapping(return_score=True) + + # Keep original reactant atom numbers + rxn.attention_mapping(keep_reactants_numbering=True) + +``attention_mapping`` loads the ONNX model once on first call. + +Utility methods: + +.. code-block:: python + + # Reset mapping: deduplicate atom numbers across components (no model) rxn.reset_mapping() - # Rule-based fix for known mapping mistakes + # Rule-based fix for known mapping mistakes (called automatically by attention_mapping) rxn.fix_mapping() log = rxn.fix_mapping(logging=True) -``reset_mapping`` loads the neural model once on first call. -To use GPU, set ``chython.torch_device`` before the first call. -For multiprocessing, call ``reset_mapping`` only inside workers to avoid a single-GPU bottleneck. - Reactor (Multi-Reactant Templates) ------------------------------------ @@ -186,20 +198,176 @@ Transformer applies a pattern replacement to a single molecule: print(str(result)) -Deprotection ------------- +Predefined Reactors +-------------------- -Built-in templates for ~50+ protective group removals: +The ``@`` operator enumerates possible reactions between molecules using predefined +Reactor templates built on functional group detection. It returns a generator +of ``(reaction_name, ReactionContainer)`` tuples: .. code-block:: python - from chython.reactor.deprotection import apply_all, hydroxyl_benzyl, amine_boc + from chython import smiles - mol = smiles('...') # protected molecule + acid = smiles('CC(=O)O') + amine = smiles('CCN') - # Remove specific protection group - for result in hydroxyl_benzyl(mol): - print(str(result)) + # Two-component reaction + for name, rxn in acid @ amine: + print(name, rxn) + # amidation CCN.O=C(O)C>>CCNC(=O)C + +The operator is symmetric — order does not matter: + +.. code-block:: python + + list(amine @ acid) == list(acid @ amine) # True + +Multi-component reactions use a list: + +.. code-block:: python + + aldehyde = smiles('CC=O') + amine = smiles('CCN') + isocyanide = smiles('[C-]#[N+]C') + + for name, rxn in aldehyde @ [amine, isocyanide]: + print(name, rxn) + # ugi_3cr C(=O)C.CCN.[C-]#[N+]C>>CCNC(C(=O)NC)C + +Each molecule's ``functional_groups`` property is used as a pre-filter to select +applicable Reactor templates. Only matching combinations are attempted. + +Selective reaction application with the ``reaction`` keyword: + +.. code-block:: python + + arx = smiles('Brc1ccccc1') + boronic = smiles('OB(O)c1ccccc1') + + # Only Suzuki coupling (skip other possible reactions) + for name, rxn in arx.react(boronic, reaction='suzuki'): + print(name, rxn) + + +Oxidation, Reduction & Transformation +--------------------------------------- + +Single-molecule transformations are available via dedicated methods: + +.. code-block:: python + + mol = smiles('OCC') + mol.canonicalize() + + # Oxidation products + for name, rxn in mol.oxidize(): + print(name, rxn) + # alcohol_to_aldehyde CCO>>C=O + + # Reduction products + ketone = smiles('CC(=O)c1ccccc1') + ketone.canonicalize() + for name, rxn in ketone.reduce(): + print(name, rxn) + # ketone_to_alcohol c1ccccc1C(C)=O>>OC(C)c1ccccc1 + + # Functional group interconversions (Appel, borylation, ring closures, etc.) + for name, rxn in mol.transform(): + print(name, rxn) + # appel CCO>>CCBr + + # All single-molecule transformations at once (~ operator) + for name, rxn in ~mol: + print(name, rxn) + +All methods accept an optional ``reaction`` keyword to apply selectively: + +.. code-block:: python + + mol = smiles('OC(C)c1ccccc1') + mol.canonicalize() + + # Only oxidize to ketone (skip other possible transformations) + for name, rxn in mol.oxidize(reaction='alcohol_to_ketone'): + print(name, rxn) + + # Only Appel (alcohol → bromide) + for name, rxn in mol.transform(reaction='appel'): + print(name, rxn) + + +Functional & Protective Groups +------------------------------- + +Detect functional groups and their counts: + +.. code-block:: python + + mol = smiles('Clc1ccc(Br)cc1O') + mol.canonicalize() + + mol.functional_groups + # {'aryl_chloride': 1, 'aryl_bromide': 1, 'phenol': 1} + +Detect protective groups: + +.. code-block:: python + + mol = smiles('CC(NC(=O)OC(C)(C)C)CNC(=O)OC(C)(C)C') + mol.canonicalize() + + mol.protective_groups + # {'amine_boc': 2} + +Remove protective groups (in-place): + +.. code-block:: python + + mol = smiles('c1ccccc1NC(=O)OC(C)(C)C') + mol.canonicalize() + + # Remove specific protective group + mol.remove_protection('amine_boc') # returns True if changed + str(mol) # 'c1ccccc1N' + + # Remove all known protective groups + mol2 = smiles('CC(NC(=O)OC(C)(C)C)COC(=O)OCC=C') + mol2.canonicalize() + mol2.remove_protection() # removes all found PGs + + +Reconstruct Mapping +------------------- + +Annotate a reaction by trying to reconstruct the product from reactants using +predefined templates. If successful, sets atom-to-atom mapping and returns +matched reaction labels: + +.. code-block:: python + + from chython import smiles + + rxn = smiles('Brc1ccccc1.OB(O)c1ccc(F)cc1>>Fc1ccc(-c2ccccc2)cc1') + rxn.reset_mapping() + + labels = rxn.reconstruct_mapping() + # ['react:suzuki'] + + # Mapping is now set on the product + format(rxn, 'm') # reaction SMILES with atom mapping + +Supports single-product reactions. Tries in order: + +1. Standalone deprotection +2. Standalone protection (reverse) +3. Single-molecule transforms (oxidize/reduce/transform) +4. Deprotection + transform composition +5. Multi-component reactions (subset-based) + +Returns an empty list if no template matches: + +.. code-block:: python - # Remove all known protection groups iteratively - result = apply_all(mol) + rxn = smiles('CC>>CCC') + rxn.reconstruct_mapping() # [] diff --git a/doc/search.rst b/doc/search.rst new file mode 100644 index 00000000..4d969f00 --- /dev/null +++ b/doc/search.rst @@ -0,0 +1,295 @@ +Substructure Search & Fingerprints +=================================== + +Isomorphism, SMARTS queries, query building, and molecular fingerprints. + + +Substructure Check +------------------ + +.. code-block:: python + + from chython import smiles + + benzene = smiles('c1ccccc1') + toluene = smiles('Cc1ccccc1') + + # Operator-based + benzene < toluene # True: benzene is substructure of toluene + benzene <= toluene # True: substructure or equal + benzene < benzene # False: not strict substructure of itself + benzene <= benzene # True: equal + + # Method-based + benzene.is_substructure(toluene) # True + benzene.is_equal(toluene) # False + + +Enumerating Matches +------------------- + +``get_mapping()`` yields all substructure mappings as dicts ``{query_atom: target_atom}``: + +.. code-block:: python + + query = smiles('CC') + target = smiles('CCC') + + # First match + mapping = next(query.get_mapping(target)) + + # All matches (automorphism_filter=True by default skips symmetric duplicates) + for mapping in query.get_mapping(target): + print(mapping) + + # All symmetry-equivalent matches + for mapping in query.get_mapping(target, automorphism_filter=False): + print(mapping) + + # Restrict search to specific atoms + for mapping in query.get_mapping(target, searching_scope=[1, 2]): + print(mapping) + + +SMARTS Queries +-------------- + +``smarts()`` returns a ``QueryContainer`` with pattern-matching semantics: + +.. code-block:: python + + from chython import smarts, smiles + + # Carbonyl + query = smarts('[C]=[O]') + query <= smiles('CC(=O)O') # True + + for mapping in query.get_mapping(smiles('CC(=O)O')): + print(mapping) + + # Aromatic nitrogen + smarts('[N;a]') <= smiles('c1ccncc1') # True + + # Element containment shortcut + 'N' in smiles('c1ccncc1') # True + 'Br' in smiles('c1ccncc1') # False + + +SMARTS Language +--------------- + +Chython's SMARTS differs from RDKit/OpenBabel in several ways. + +Atom Primitives +~~~~~~~~~~~~~~~ + +Standard: + +- ``#N`` - atomic number (``#6`` for carbon) +- ``D`` - degree / neighbor count (``D3``) +- ``h`` - implicit hydrogen count (``h1``) +- ``r`` - ring size membership (``r5``, ``r6``) +- ``!R`` - acyclic (not in any ring) +- ``a`` - aromatic +- ``A`` - any element (wildcard) +- Charge: ``+``, ``-``, ``+2``, ``-3`` +- Isotope: ``[14C]`` +- Stereo: ``@``, ``@@`` + +Chython extensions: + +- ``z`` - hybridization: ``z1`` = sp3, ``z2`` = sp2, ``z3`` = sp, ``z4`` = aromatic +- ``x`` - heteroatom neighbor count: ``x0`` = none, ``x2`` = two +- ``M`` - any metal (d-element) + +.. code-block:: python + + smarts('[C;z2;x0]') # sp2 carbon, no heteroatom neighbors + smarts('[O;D1;z1;x0][C;D3;x2;z2]=O') # carboxylic acid + smarts('[M]') # any metal + +NOT Supported +~~~~~~~~~~~~~ + +- Recursive SMARTS ``$(...)`` +- Valence ``v`` +- Total connectivity ``X`` +- Ring count ``R`` without size (use ``r`` with explicit sizes or ``!R``) +- Implicit AND ``&`` (use ``;`` instead) + +Logical Operators +~~~~~~~~~~~~~~~~~ + +- ``;`` = AND between primitives: ``[C;D3;r6]`` +- ``,`` = OR within same primitive type: ``[r5,r6]``, ``[C,N]`` + +OR cannot mix different primitive types: ``[D1,h1]`` raises an error. + +.. code-block:: python + + smarts('[C;r5,r6;a]') # aromatic C in 5- or 6-membered ring + smarts('[C,N]') # carbon or nitrogen + smarts('[C;!R]') # acyclic carbon + +Bond Queries +~~~~~~~~~~~~ + +- ``-`` single, ``=`` double, ``#`` triple, ``:`` aromatic, ``~`` any +- OR: ``-,=`` (single or double) +- Negation: ``!:`` (not aromatic) +- Ring bonds: ``-;@`` (single in ring), ``-;!@`` (single not in ring) + +.. code-block:: python + + smarts('[C]-;!@[C]') # non-ring single bond between two carbons + +CXSMARTS Extensions +~~~~~~~~~~~~~~~~~~~ + +Radicals and atom properties via CXSMARTS notation: + +.. code-block:: python + + # Aromatic C in 5/6-ring, non-ring single bond to SP3/SP2 radical C + # with 0-1 hydrogens and no heteroatom neighbors + q = smarts('[C;r5,r6;a]-;!@[C;h0,h1] |^1:1,atomProp:1.hyb.32:1.het.0|') + + +Query Building API +------------------ + +Build queries programmatically with ``QueryContainer``: + +.. code-block:: python + + from chython import QueryContainer + from chython.containers.bonds import QueryBond + from chython.periodictable import ListElement + + # Acyclic ketone (thia-ketone included) + q = QueryContainer() + q.add_atom('C', neighbors=3, hybridization=2, heteroatoms=1, rings_sizes=0, hydrogens=0) + q.add_atom(ListElement(['O', 'S']), n=3) # O or S at atom number 3 + q.add_bond(1, 3, 2) # double bond + print(q) + + q < smiles('CC(=O)O') # True + q < smiles('CC(=S)C') # True + q < smiles('CC=O') # False (C has wrong neighbor count) + + # Ring-ring linker using QueryBond(order, in_ring) + q = QueryContainer() + q.add_atom('C', rings_sizes=6, hybridization=4) + q.add_atom('C', rings_sizes=6, hybridization=4) + q.add_bond(1, 2, QueryBond(1, False)) # single bond, NOT in ring + + q < smiles('c1ccc(cc1)-c1ccccc1') # True (biphenyl) + q < smiles('C1CC(=O)CC1') # False + +Query from existing molecule: + +.. code-block:: python + + mol = smiles('NCC(=O)O') + # Extract query for atoms 3,4,5 with environment constraints + carboxy = mol.substructure([3, 4, 5], as_query=True, + skip_neighbors_marks=False, + skip_hybridizations_marks=False, + skip_hydrogens_marks=False, + skip_rings_sizes_marks=False) + carboxy < smiles('NCC(=O)O') # True (carboxylic acid) + carboxy < smiles('NCC(=O)OC') # False (ester, not acid) + + +Automorphism +------------ + +.. code-block:: python + + mol = smiles('c1ccccc1') + + mol.is_automorphic() # True for benzene + + for mapping in mol.get_automorphism_mapping(): + print(mapping) + + +Maximum Common Substructure (MCS) +---------------------------------- + +.. code-block:: python + + mol1 = smiles('c1ccccc1O') # phenol + mol2 = smiles('c1ccccc1N') # aniline + + for mapping in mol1.get_mcs_mapping(mol2, limit=10000): + print(mapping) # {mol1_atom: mol2_atom} + break # first = largest MCS + + +Morgan Fingerprints +------------------- + +Similar to ECFP / RDKit Morgan fingerprints: + +.. code-block:: python + + from chython import smiles + import numpy as np + + mol = smiles('c1ccccc1O') + + # Binary fingerprint as numpy array (shape: (1024,)) + fp = mol.morgan_fingerprint( + min_radius=1, + max_radius=4, + length=1024, + number_active_bits=2, + ) + + # Bit indices only (more memory efficient) + bits = mol.morgan_bit_set(min_radius=1, max_radius=4, length=1024) + + # Raw hashes (no folding) - useful for exact fragment matching + hashes = mol.morgan_hash_set(min_radius=1, max_radius=4) + + +Linear Fingerprints +------------------- + +Based on linear path fragments (similar to RDKit RDKFingerprint): + +.. code-block:: python + + fp = mol.linear_fingerprint( + min_radius=1, + max_radius=4, + length=1024, + number_active_bits=2, + number_bit_pairs=4, # count-sensitive bits + ) + + bits = mol.linear_bit_set(min_radius=1, max_radius=4, length=1024) + hashes = mol.linear_hash_set(min_radius=1, max_radius=4) + + +Tanimoto Similarity +------------------- + +.. code-block:: python + + import numpy as np + + mol1 = smiles('c1ccccc1O') + mol2 = smiles('c1ccccc1N') + + fp1 = mol1.morgan_fingerprint() + fp2 = mol2.morgan_fingerprint() + + # Via numpy + tanimoto = np.dot(fp1, fp2) / (fp1.sum() + fp2.sum() - np.dot(fp1, fp2)) + + # Via bit sets (faster) + bits1 = mol1.morgan_bit_set() + bits2 = mol2.morgan_bit_set() + tanimoto = len(bits1 & bits2) / len(bits1 | bits2) diff --git a/pyproject.toml b/pyproject.toml index 8c3ffc58..23ef2e17 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = 'chython' -version = '2.17' +version = '2.18' description = 'Library for processing molecules and reactions in python way' authors = ['Ramil Nugmanov '] license = 'LGPLv3' @@ -14,7 +14,7 @@ classifiers=[ 'Operating System :: OS Independent', 'Programming Language :: Python', 'Programming Language :: Python :: 3 :: Only', - 'Programming Language :: Python :: 3.8', + 'Programming Language :: Python :: 3.10', 'Topic :: Scientific/Engineering', 'Topic :: Scientific/Engineering :: Chemistry', 'Topic :: Scientific/Engineering :: Information Analysis', @@ -30,22 +30,22 @@ include = [ ] [tool.poetry.dependencies] -python = ">=3.8" -cachedmethods = '>=0.1.4' +python = ">=3.10" lazy-object-proxy = '>=1.6' -lxml = '>=4.1' +numpy = ">=1.21.0" mini-racer = {version = '>=0.12.4', optional = true} py-mini-racer = {version = '>=0.6.0', optional = true} -chytorch-rxnmap = {version = '>=1.4', optional = true} +onnxruntime = {version = '>=1.16', optional = true} +scipy = {version = '>=1.10', optional = true} +chython-rxnmap = {version = '>=2.0', optional = true} rdkit = {version = '>=2023.9', optional = true} pyppeteer = {version = '>=2.0.0', optional = true} jpype1 = {version = '>=1.6.0', optional = true} openbabel-wheel = {version = '>=3.1.1.22', optional = true} cdpkit = {version = '>=1.2.3', optional = true} -numpy = ">=1.21.0" [tool.poetry.extras] -mapping = ['chytorch-rxnmap'] +mapping = ['onnxruntime', 'scipy', 'chython-rxnmap'] rdkit = ['rdkit'] png = ['pyppeteer'] racer-default = ['mini-racer']